From 79290687bf93542246d63721a5f2d840000f3e38 Mon Sep 17 00:00:00 2001 From: Yeuoly Date: Thu, 26 Jun 2025 16:47:51 +0800 Subject: [PATCH] refactor: improve type handling in structured output parsing - Changed the type of `structured_output` from `dict` to `Mapping` for better type flexibility. - Introduced a new variable `parsed` to hold the result of JSON validation, enhancing clarity. - Updated error handling to use a temporary variable `temp_parsed` for JSON repair, ensuring correct type casting before assignment. --- .../output_parser/structured_output.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/api/core/llm_generator/output_parser/structured_output.py b/api/core/llm_generator/output_parser/structured_output.py index 21e056b9b8..6dc5ce9fd8 100644 --- a/api/core/llm_generator/output_parser/structured_output.py +++ b/api/core/llm_generator/output_parser/structured_output.py @@ -1,7 +1,7 @@ import json from collections.abc import Generator, Mapping, Sequence from copy import deepcopy -from typing import Any, Literal, Optional, overload +from typing import Any, Literal, Optional, cast, overload import json_repair from pydantic import TypeAdapter, ValidationError @@ -233,7 +233,8 @@ def _handle_prompt_based_schema( def _parse_structured_output(result_text: str) -> Mapping[str, Any]: - structured_output: dict[str, Any] = {} + structured_output: Mapping[str, Any] = {} + parsed: Mapping[str, Any] = {} try: parsed = TypeAdapter(Mapping).validate_json(result_text) if not isinstance(parsed, dict): @@ -241,14 +242,14 @@ def _parse_structured_output(result_text: str) -> Mapping[str, Any]: structured_output = parsed except ValidationError as e: # if the result_text is not a valid json, try to repair it - parsed = json_repair.loads(result_text) - if not isinstance(parsed, dict): + temp_parsed = json_repair.loads(result_text) + if not isinstance(temp_parsed, dict): # handle reasoning model like deepseek-r1 got '\n\n\n' prefix - if isinstance(parsed, list): - parsed = next((item for item in parsed if isinstance(item, dict)), {}) + if isinstance(temp_parsed, list): + parsed = next((item for item in temp_parsed if isinstance(item, dict)), {}) else: raise OutputParserError(f"Failed to parse structured output: {result_text}") - structured_output = parsed + structured_output = cast(dict, parsed) return structured_output