From e5719de7849da2a62b756baf2e987ce6bbd449ce Mon Sep 17 00:00:00 2001
From: Yeuoly <admin@srmxy.cn>
Date: Fri, 27 Jun 2025 14:15:39 +0800
Subject: [PATCH] refactor: enhance structured output handling in LLM generator

- Introduced `LLMResultChunkWithStructuredOutput` to encapsulate structured output within result chunks, improving data organization.
- Updated the `invoke_llm_with_structured_output` function to yield the new result chunk type, enhancing the clarity of output handling.
- Modified the `LLMStructuredOutput` class to allow optional structured output, increasing flexibility in response formats.
- Added a new request model `RequestInvokeLLMWithStructuredOutput` to facilitate structured output requests, improving API usability.
---
 .../output_parser/structured_output.py        | 44 +++++++++++++++----
 .../model_runtime/entities/llm_entities.py    |  8 +++-
 api/core/plugin/backwards_invocation/model.py |  6 ++-
 api/core/plugin/entities/request.py           | 10 +++++
 api/core/workflow/nodes/llm/node.py           | 12 +++--
 5 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/api/core/llm_generator/output_parser/structured_output.py b/api/core/llm_generator/output_parser/structured_output.py
index ae829eeb48..33f30afc0b 100644
--- a/api/core/llm_generator/output_parser/structured_output.py
+++ b/api/core/llm_generator/output_parser/structured_output.py
@@ -12,10 +12,16 @@ from core.model_runtime.callbacks.base_callback import Callback
 from core.model_runtime.entities.llm_entities import (
     LLMResult,
     LLMResultChunk,
+    LLMResultChunkDelta,
+    LLMResultChunkWithStructuredOutput,
     LLMResultWithStructuredOutput,
-    LLMStructuredOutput,
 )
-from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool, SystemPromptMessage
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessage,
+    PromptMessageTool,
+    SystemPromptMessage,
+)
 from core.model_runtime.entities.model_entities import AIModelEntity, ParameterRule
 from core.workflow.utils.structured_output.entities import ResponseFormat, SpecialModelType
 from core.workflow.utils.structured_output.prompt import STRUCTURED_OUTPUT_PROMPT
@@ -34,7 +40,7 @@ def invoke_llm_with_structured_output(
     stream: Literal[True] = True,
     user: Optional[str] = None,
     callbacks: Optional[list[Callback]] = None,
-) -> Generator[LLMResultChunk | LLMStructuredOutput, None, None]: ...
+) -> Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
 
 
 @overload
@@ -66,7 +72,7 @@ def invoke_llm_with_structured_output(
     stream: bool = True,
     user: Optional[str] = None,
     callbacks: Optional[list[Callback]] = None,
-) -> LLMResultWithStructuredOutput | Generator[LLMResultChunk | LLMStructuredOutput, None, None]: ...
+) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
 
 
 def invoke_llm_with_structured_output(
@@ -81,7 +87,7 @@ def invoke_llm_with_structured_output(
     stream: bool = True,
     user: Optional[str] = None,
     callbacks: Optional[list[Callback]] = None,
-) -> LLMResultWithStructuredOutput | Generator[LLMResultChunk | LLMStructuredOutput, None, None]:
+) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]:
     """
     Invoke large language model with structured output
     1. This method invokes model_instance.invoke_llm with json_schema
@@ -143,14 +149,36 @@ def invoke_llm_with_structured_output(
         )
     else:
 
-        def generator() -> Generator[LLMStructuredOutput, None, None]:
+        def generator() -> Generator[LLMResultChunkWithStructuredOutput, None, None]:
             result_text = ""
+            prompt_messages = []
+            system_fingerprint = None
             for event in llm_result:
                 if isinstance(event, LLMResultChunk):
                     if isinstance(event.delta.message.content, str):
                         result_text += event.delta.message.content
-
-            yield LLMStructuredOutput(structured_output=_parse_structured_output(result_text))
+                        prompt_messages = event.prompt_messages
+                        system_fingerprint = event.system_fingerprint
+
+                yield LLMResultChunkWithStructuredOutput(
+                    model=model_schema.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=system_fingerprint,
+                    delta=event.delta,
+                )
+
+            yield LLMResultChunkWithStructuredOutput(
+                structured_output=_parse_structured_output(result_text),
+                model=model_schema.model,
+                prompt_messages=prompt_messages,
+                system_fingerprint=system_fingerprint,
+                delta=LLMResultChunkDelta(
+                    index=0,
+                    message=AssistantPromptMessage(content=""),
+                    usage=None,
+                    finish_reason=None,
+                ),
+            )
 
         return generator()
 
diff --git a/api/core/model_runtime/entities/llm_entities.py b/api/core/model_runtime/entities/llm_entities.py
index 94c256fb7d..e52b0eba55 100644
--- a/api/core/model_runtime/entities/llm_entities.py
+++ b/api/core/model_runtime/entities/llm_entities.py
@@ -106,7 +106,7 @@ class LLMStructuredOutput(BaseModel):
     Model class for llm structured output.
     """
 
-    structured_output: Mapping[str, Any]
+    structured_output: Optional[Mapping[str, Any]] = None
 
 
 class LLMResultWithStructuredOutput(LLMResult, LLMStructuredOutput):
@@ -137,6 +137,12 @@ class LLMResultChunk(BaseModel):
     delta: LLMResultChunkDelta
 
 
+class LLMResultChunkWithStructuredOutput(LLMResultChunk, LLMStructuredOutput):
+    """
+    Model class for llm result chunk with structured output.
+    """
+
+
 class NumTokensResult(PriceInfo):
     """
     Model class for number of tokens result.
diff --git a/api/core/plugin/backwards_invocation/model.py b/api/core/plugin/backwards_invocation/model.py
index 072644e53b..9428d198a9 100644
--- a/api/core/plugin/backwards_invocation/model.py
+++ b/api/core/plugin/backwards_invocation/model.py
@@ -3,7 +3,11 @@ from binascii import hexlify, unhexlify
 from collections.abc import Generator
 
 from core.model_manager import ModelManager
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import (
+    LLMResult,
+    LLMResultChunk,
+    LLMResultChunkDelta,
+)
 from core.model_runtime.entities.message_entities import (
     PromptMessage,
     SystemPromptMessage,
diff --git a/api/core/plugin/entities/request.py b/api/core/plugin/entities/request.py
index 1692020ec8..f9c81ed4d5 100644
--- a/api/core/plugin/entities/request.py
+++ b/api/core/plugin/entities/request.py
@@ -82,6 +82,16 @@ class RequestInvokeLLM(BaseRequestInvokeModel):
         return v
 
 
+class RequestInvokeLLMWithStructuredOutput(RequestInvokeLLM):
+    """
+    Request to invoke LLM with structured output
+    """
+
+    structured_output_schema: dict[str, Any] = Field(
+        default_factory=dict, description="The schema of the structured output in JSON schema format"
+    )
+
+
 class RequestInvokeTextEmbedding(BaseRequestInvokeModel):
     """
     Request to invoke text embedding
diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py
index 0036e22f21..b5225ce548 100644
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@@ -18,7 +18,13 @@ from core.model_runtime.entities import (
     PromptMessageContentType,
     TextPromptMessageContent,
 )
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMStructuredOutput, LLMUsage
+from core.model_runtime.entities.llm_entities import (
+    LLMResult,
+    LLMResultChunk,
+    LLMResultChunkWithStructuredOutput,
+    LLMStructuredOutput,
+    LLMUsage,
+)
 from core.model_runtime.entities.message_entities import (
     AssistantPromptMessage,
     PromptMessageContentUnionTypes,
@@ -344,6 +350,8 @@ class LLMNode(BaseNode[LLMNodeData]):
         # Consume the invoke result and handle generator exception
         try:
             for result in invoke_result:
+                if isinstance(result, LLMResultChunkWithStructuredOutput):
+                    yield result
                 if isinstance(result, LLMResultChunk):
                     contents = result.delta.message.content
                     for text_part in self._save_multimodal_output_and_convert_result_to_markdown(contents):
@@ -363,8 +371,6 @@ class LLMNode(BaseNode[LLMNodeData]):
                         usage = result.delta.usage
                     if finish_reason is None and result.delta.finish_reason:
                         finish_reason = result.delta.finish_reason
-                elif isinstance(result, LLMStructuredOutput):
-                    yield result
         except OutputParserError as e:
             raise LLMNodeError(f"Failed to parse structured output: {e}")