refactor: improve error handling in structured output parser

- Updated the error handling logic to use a temporary variable for JSON repair, ensuring correct type casting before assignment.
- Enhanced clarity by refining the parsing process and maintaining the existing functionality.
pull/21565/head
Yeuoly 11 months ago
parent 655cfd25cf
commit 0e3f902992

@ -244,16 +244,16 @@ def _parse_structured_output(result_text: str) -> Mapping[str, Any]:
if not isinstance(parsed, dict):
raise OutputParserError(f"Failed to parse structured output: {result_text}")
structured_output = parsed
except ValidationError as e:
except ValidationError:
# if the result_text is not a valid json, try to repair it
temp_parsed = json_repair.loads(result_text)
if not isinstance(temp_parsed, dict):
# handle reasoning model like deepseek-r1 got '<think>\n\n</think>\n' prefix
if isinstance(temp_parsed, list):
parsed = next((item for item in temp_parsed if isinstance(item, dict)), {})
temp_parsed = next((item for item in parsed if isinstance(item, dict)), {})
else:
raise OutputParserError(f"Failed to parse structured output: {result_text}")
structured_output = cast(dict, parsed)
structured_output = cast(dict, temp_parsed)
return structured_output

@ -9,6 +9,7 @@ from unittest.mock import MagicMock, patch
import pytest
from core.app.entities.app_invoke_entities import InvokeFrom
from core.llm_generator.output_parser.structured_output import _parse_structured_output
from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
from core.model_runtime.entities.message_entities import AssistantPromptMessage
from core.workflow.entities.variable_pool import VariablePool
@ -277,29 +278,6 @@ def test_execute_llm_with_jinja2(flask_req_ctx, setup_code_executor_mock):
def test_extract_json():
node = init_llm_node(
config={
"id": "llm",
"data": {
"title": "123",
"type": "llm",
"model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "chat", "completion_params": {}},
"prompt_config": {
"structured_output": {
"enabled": True,
"schema": {
"type": "object",
"properties": {"name": {"type": "string"}, "age": {"type": "number"}},
},
}
},
"prompt_template": [{"role": "user", "text": "{{#sys.query#}}"}],
"memory": None,
"context": {"enabled": False},
"vision": {"enabled": False},
},
},
)
llm_texts = [
'<think>\n\n</think>{"name": "test", "age": 123', # resoning model (deepseek-r1)
'{"name":"test","age":123}', # json schema model (gpt-4o)
@ -308,4 +286,4 @@ def test_extract_json():
'{"name":"test",age:123}', # without quotes (qwen-2.5-0.5b)
]
result = {"name": "test", "age": 123}
assert all(node._parse_structured_output(item) == result for item in llm_texts)
assert all(_parse_structured_output(item) == result for item in llm_texts)

Loading…
Cancel
Save