From c55285bacedbef6c659bc372eefa9ba65d5f9d29 Mon Sep 17 00:00:00 2001
From: "liuchangsheng@wisdomidata.com" <liuchangsheng@wisdomidata.com>
Date: Tue, 10 Jun 2025 10:47:22 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=90=88=E5=B9=B6?=
 =?UTF-8?q?=E5=90=8E=E7=AB=AF=E4=BB=A3=E7=A0=81-=20=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?=E5=90=88=E5=B9=B6BUG?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/core/rag/datasource/retrieval_service.py | 8 ++++----
 api/core/workflow/nodes/vanna/vanna_node.py  | 4 ++--
 api/pyproject.toml                           | 7 +++++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py
index 93864abbce..fc499713f7 100644
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -405,6 +405,9 @@ class RetrievalService:
                     record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks")  # type: ignore
                     record["score"] = segment_child_map[record["segment"].id]["max_score"]
 
+            # 高分片段，自动拼接相近的片段
+            cls.append_next_segments(records=records,dataset_documents=dataset_documents)
+
             result = []
             for record in records:
                 # Extract segment
@@ -422,8 +425,6 @@ class RetrievalService:
                     if score_value is not None and isinstance(score_value, int | float | str)
                     else None
                 )
-            cls.append_next_segments(records=records,dataset_documents=dataset_documents)
-
                 # Create RetrievalSegments object
                 retrieval_segment = RetrievalSegments(segment=segment, child_chunks=child_chunks, score=score)
                 result.append(retrieval_segment)
@@ -435,7 +436,6 @@ class RetrievalService:
 
     @classmethod
     def append_next_segments(cls, records: list[dict], dataset_documents : dict):
-        # import pdb; pdb.set_trace()
         def filter_record(record):
             document_id = record["segment"].document_id
             if document_id in dataset_documents:
@@ -521,4 +521,4 @@ class RetrievalService:
                 if document_segment.position == this_positions + 1:
                     next_segment = document_segment
                     break
-        return next_segment
\ No newline at end of file
+        return next_segment
diff --git a/api/core/workflow/nodes/vanna/vanna_node.py b/api/core/workflow/nodes/vanna/vanna_node.py
index 56f6e9a320..64d2bdc2aa 100644
--- a/api/core/workflow/nodes/vanna/vanna_node.py
+++ b/api/core/workflow/nodes/vanna/vanna_node.py
@@ -8,7 +8,7 @@ from core.workflow.entities.node_entities import NodeRunResult
 from core.workflow.nodes.enums import NodeType
 from core.workflow.nodes.llm import LLMNode
 from extensions.utils.vanna_text2sql import VannaServer
-from models.workflow import WorkflowNodeExecutionStatus
+from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
 
 from .entities import VannaNodeData
 
@@ -89,4 +89,4 @@ class VannaNode(LLMNode):
             status=WorkflowNodeExecutionStatus.SUCCEEDED,
             outputs={"output": sql}
         )
-    
+
diff --git a/api/pyproject.toml b/api/pyproject.toml
index f497274da1..9d41ea502f 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -2,8 +2,12 @@
 name = "dify-api"
 dynamic = ["version"]
 requires-python = ">=3.11,<3.13"
-
 dependencies = [
+    "vanna[milvus,mysql,ollama,postgres]>=0.7.9,<0.8.0",
+    "pymilvus[model]>=2.5.8,<2.6.0",
+    "pillow>=11.2.1,<12.0.0",
+    "pymupdf>=1.25.5,<1.26.0",
+    "kaleido==0.2.1",
     "authlib==1.3.1",
     "azure-identity==1.16.1",
     "beautifulsoup4==4.12.2",
@@ -190,7 +194,6 @@ vdb = [
     "oracledb==3.0.0",
     "pgvecto-rs[sqlalchemy]~=0.2.1",
     "pgvector==0.2.5",
-    "pymilvus~=2.5.0",
     "pymochow==1.3.1",
     "pyobvector~=0.1.6",
     "qdrant-client==1.9.0",