refactor: improve error handling in structured output parser

- Updated the error handling logic to use a temporary variable for JSON repair, ensuring correct type casting before assignment. - Enhanced clarity by refining the parsing process and maintaining the existing functionality.
11 months ago · 0e3f902992
parent 655cfd25cf
commit 0e3f902992
2 changed files with 5 additions and 27 deletions
--- a/api/core/llm_generator/output_parser/structured_output.py
+++ b/api/core/llm_generator/output_parser/structured_output.py
@ -244,16 +244,16 @@ def _parse_structured_output(result_text: str) -> Mapping[str, Any]:
        if not isinstance(parsed, dict):
            raise OutputParserError(f"Failed to parse structured output: {result_text}")
        structured_output = parsed
-    except ValidationError as e:
+    except ValidationError:
        # if the result_text is not a valid json, try to repair it
        temp_parsed = json_repair.loads(result_text)
        if not isinstance(temp_parsed, dict):
            # handle reasoning model like deepseek-r1 got '<think>\n\n</think>\n' prefix
            if isinstance(temp_parsed, list):
-                parsed = next((item for item in temp_parsed if isinstance(item, dict)), {})
+                temp_parsed = next((item for item in parsed if isinstance(item, dict)), {})
            else:
                raise OutputParserError(f"Failed to parse structured output: {result_text}")
-        structured_output = cast(dict, parsed)
+        structured_output = cast(dict, temp_parsed)
    return structured_output


--- a/api/tests/integration_tests/workflow/nodes/test_llm.py
+++ b/api/tests/integration_tests/workflow/nodes/test_llm.py
@ -9,6 +9,7 @@ from unittest.mock import MagicMock, patch
 import pytest

 from core.app.entities.app_invoke_entities import InvokeFrom
+from core.llm_generator.output_parser.structured_output import _parse_structured_output
 from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
 from core.model_runtime.entities.message_entities import AssistantPromptMessage
 from core.workflow.entities.variable_pool import VariablePool
@ -277,29 +278,6 @@ def test_execute_llm_with_jinja2(flask_req_ctx, setup_code_executor_mock):


 def test_extract_json():
-    node = init_llm_node(
-        config={
-            "id": "llm",
-            "data": {
-                "title": "123",
-                "type": "llm",
-                "model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "chat", "completion_params": {}},
-                "prompt_config": {
-                    "structured_output": {
-                        "enabled": True,
-                        "schema": {
-                            "type": "object",
-                            "properties": {"name": {"type": "string"}, "age": {"type": "number"}},
-                        },
-                    }
-                },
-                "prompt_template": [{"role": "user", "text": "{{#sys.query#}}"}],
-                "memory": None,
-                "context": {"enabled": False},
-                "vision": {"enabled": False},
-            },
-        },
-    )
    llm_texts = [
        '<think>\n\n</think>{"name": "test", "age": 123',  # resoning model (deepseek-r1)
        '{"name":"test","age":123}',  # json schema model (gpt-4o)
@ -308,4 +286,4 @@ def test_extract_json():
        '{"name":"test",age:123}',  # without quotes (qwen-2.5-0.5b)
    ]
    result = {"name": "test", "age": 123}
-    assert all(node._parse_structured_output(item) == result for item in llm_texts)
+    assert all(_parse_structured_output(item) == result for item in llm_texts)