From 0e3f902992004a8849db6b8692fdc41035e8c99a Mon Sep 17 00:00:00 2001 From: Yeuoly Date: Thu, 26 Jun 2025 17:12:07 +0800 Subject: [PATCH] refactor: improve error handling in structured output parser - Updated the error handling logic to use a temporary variable for JSON repair, ensuring correct type casting before assignment. - Enhanced clarity by refining the parsing process and maintaining the existing functionality. --- .../output_parser/structured_output.py | 6 ++--- .../workflow/nodes/test_llm.py | 26 ++----------------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/api/core/llm_generator/output_parser/structured_output.py b/api/core/llm_generator/output_parser/structured_output.py index 6246a9ce8e..a4d8ff598c 100644 --- a/api/core/llm_generator/output_parser/structured_output.py +++ b/api/core/llm_generator/output_parser/structured_output.py @@ -244,16 +244,16 @@ def _parse_structured_output(result_text: str) -> Mapping[str, Any]: if not isinstance(parsed, dict): raise OutputParserError(f"Failed to parse structured output: {result_text}") structured_output = parsed - except ValidationError as e: + except ValidationError: # if the result_text is not a valid json, try to repair it temp_parsed = json_repair.loads(result_text) if not isinstance(temp_parsed, dict): # handle reasoning model like deepseek-r1 got '\n\n\n' prefix if isinstance(temp_parsed, list): - parsed = next((item for item in temp_parsed if isinstance(item, dict)), {}) + temp_parsed = next((item for item in parsed if isinstance(item, dict)), {}) else: raise OutputParserError(f"Failed to parse structured output: {result_text}") - structured_output = cast(dict, parsed) + structured_output = cast(dict, temp_parsed) return structured_output diff --git a/api/tests/integration_tests/workflow/nodes/test_llm.py b/api/tests/integration_tests/workflow/nodes/test_llm.py index a3b2fdc376..389d1071f3 100644 --- a/api/tests/integration_tests/workflow/nodes/test_llm.py +++ b/api/tests/integration_tests/workflow/nodes/test_llm.py @@ -9,6 +9,7 @@ from unittest.mock import MagicMock, patch import pytest from core.app.entities.app_invoke_entities import InvokeFrom +from core.llm_generator.output_parser.structured_output import _parse_structured_output from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage from core.model_runtime.entities.message_entities import AssistantPromptMessage from core.workflow.entities.variable_pool import VariablePool @@ -277,29 +278,6 @@ def test_execute_llm_with_jinja2(flask_req_ctx, setup_code_executor_mock): def test_extract_json(): - node = init_llm_node( - config={ - "id": "llm", - "data": { - "title": "123", - "type": "llm", - "model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "chat", "completion_params": {}}, - "prompt_config": { - "structured_output": { - "enabled": True, - "schema": { - "type": "object", - "properties": {"name": {"type": "string"}, "age": {"type": "number"}}, - }, - } - }, - "prompt_template": [{"role": "user", "text": "{{#sys.query#}}"}], - "memory": None, - "context": {"enabled": False}, - "vision": {"enabled": False}, - }, - }, - ) llm_texts = [ '\n\n{"name": "test", "age": 123', # resoning model (deepseek-r1) '{"name":"test","age":123}', # json schema model (gpt-4o) @@ -308,4 +286,4 @@ def test_extract_json(): '{"name":"test",age:123}', # without quotes (qwen-2.5-0.5b) ] result = {"name": "test", "age": 123} - assert all(node._parse_structured_output(item) == result for item in llm_texts) + assert all(_parse_structured_output(item) == result for item in llm_texts)