From a1d20085e659e816e70a2ff7c39e04fbf48292fb Mon Sep 17 00:00:00 2001
From: Chenming C <43266446+chen622@users.noreply.github.com>
Date: Thu, 17 Apr 2025 10:10:27 +0800
Subject: [PATCH 1/4] fix: change the method of update_dataset api in document
 (#18197)

---
 .../datasets/template/template.en.mdx         | 69 ++++++++++++++++---
 .../datasets/template/template.zh.mdx         | 69 ++++++++++++++++---
 2 files changed, 122 insertions(+), 16 deletions(-)
diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx
index 357b66a96f..54e08b45d8 100644
--- a/web/app/(commonLayout)/datasets/template/template.en.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.en.mdx
@@ -557,7 +557,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
 
 <Heading
   url='/datasets/{dataset_id}'
-  method='POST'
+  method='PATCH'
   title='Update knowledge base'
   name='#update_dataset'
 />
@@ -585,8 +585,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
       <Property name='embedding_model' type='string' key='embedding_model'>
         Specified embedding model, corresponding to the model field(Optional)
       </Property>
-      <Property name='retrieval_model' type='string' key='retrieval_model'>
-        Specified retrieval model, corresponding to the model field(Optional)
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        Retrieval model (optional, if not filled, it will be recalled according to the default method)
+        - <code>search_method</code> (text) Search method: One of the following four keywords is required
+          - <code>keyword_search</code> Keyword search
+          - <code>semantic_search</code> Semantic search
+          - <code>full_text_search</code> Full-text search
+          - <code>hybrid_search</code> Hybrid search
+        - <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
+        - <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
+            - <code>reranking_provider_name</code> (string) Rerank model provider
+            - <code>reranking_model_name</code> (string) Rerank model name
+        - <code>weights</code> (float) Semantic search weight setting in hybrid search mode
+        - <code>top_k</code> (integer) Number of results to return (optional)
+        - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
+        - <code>score_threshold</code> (float) Score threshold
       </Property>
       <Property name='partial_member_list' type='array' key='partial_member_list'>
         Partial member list(Optional)
@@ -596,16 +609,56 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
   <Col sticky>
     <CodeGroup
       title="Request"
-      tag="POST"
+      tag="PATCH"
       label="/datasets/{dataset_id}"
-      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me", "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' `}
+      targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{
+        "name": "Test Knowledge Base", 
+        "indexing_technique": "high_quality", 
+        "permission": "only_me", 
+        "embedding_model_provider": "zhipuai", 
+        "embedding_model": "embedding-3", 
+        "retrieval_model": {
+          "search_method": "keyword_search",
+          "reranking_enable": false,
+          "reranking_mode": null,
+          "reranking_model": {
+              "reranking_provider_name": "",
+              "reranking_model_name": ""
+          },
+          "weights": null,
+          "top_k": 1,
+          "score_threshold_enabled": false,
+          "score_threshold": null
+        }, 
+        "partial_member_list": []
+      }'
+    `}
     >
     ```bash {{ title: 'cURL' }}
-    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \
+    curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \
     --header 'Authorization: Bearer {api_key}' \
     --header 'Content-Type: application/json' \
-    --data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\
-      "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}'
+    --data-raw '{
+      "name": "Test Knowledge Base", 
+      "indexing_technique": "high_quality", 
+      "permission": "only_me", 
+      "embedding_model_provider": "zhipuai", 
+      "embedding_model": "embedding-3", 
+      "retrieval_model": {
+        "search_method": "keyword_search",
+        "reranking_enable": false,
+        "reranking_mode": null,
+        "reranking_model": {
+            "reranking_provider_name": "",
+            "reranking_model_name": ""
+        },
+        "weights": null,
+        "top_k": 1,
+        "score_threshold_enabled": false,
+        "score_threshold": null
+      }, 
+      "partial_member_list": []
+    }'
     ```
     </CodeGroup>
     <CodeGroup title="Response">
diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx
index fb8f728b61..b435a9bb67 100644
--- a/web/app/(commonLayout)/datasets/template/template.zh.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx
@@ -557,7 +557,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
 
 <Heading
   url='/datasets/{dataset_id}'
-  method='POST'
+  method='PATCH'
   title='修改知识库详情'
   name='#update_dataset'
 />
@@ -589,8 +589,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
       <Property name='embedding_model' type='string' key='embedding_model'>
         嵌入模型（选填）
       </Property>
-      <Property name='retrieval_model' type='string' key='retrieval_model'>
-        检索模型（选填）
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        检索参数（选填，如不填，按照默认方式召回）
+        - <code>search_method</code> (text) 检索方法：以下三个关键字之一，必填
+          - <code>keyword_search</code> 关键字检索
+          - <code>semantic_search</code> 语义检索
+          - <code>full_text_search</code> 全文检索
+          - <code>hybrid_search</code> 混合检索
+        - <code>reranking_enable</code> (bool) 是否启用 Reranking，非必填，如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
+        - <code>reranking_mode</code> (object) Rerank 模型配置，非必填，如果启用了 reranking 则传值
+            - <code>reranking_provider_name</code> (string) Rerank 模型提供商
+            - <code>reranking_model_name</code> (string) Rerank 模型名称
+        - <code>weights</code> (float) 混合检索模式下语意检索的权重设置
+        - <code>top_k</code> (integer) 返回结果数量，非必填
+        - <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
+        - <code>score_threshold</code> (float) Score 阈值
       </Property>
       <Property name='partial_member_list' type='array' key='partial_member_list'>
         部分团队成员 ID 列表（选填）
@@ -600,16 +613,56 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
   <Col sticky>
     <CodeGroup
       title="Request"
-      tag="POST"
+      tag="PATCH"
       label="/datasets/{dataset_id}"
-      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me", "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' `}
+      targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{
+        "name": "Test Knowledge Base", 
+        "indexing_technique": "high_quality", 
+        "permission": "only_me", 
+        "embedding_model_provider": "zhipuai", 
+        "embedding_model": "embedding-3", 
+        "retrieval_model": {
+          "search_method": "keyword_search",
+          "reranking_enable": false,
+          "reranking_mode": null,
+          "reranking_model": {
+              "reranking_provider_name": "",
+              "reranking_model_name": ""
+          },
+          "weights": null,
+          "top_k": 1,
+          "score_threshold_enabled": false,
+          "score_threshold": null
+        }, 
+        "partial_member_list": []
+      }'
+    `}
     >
     ```bash {{ title: 'cURL' }}
-    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \
+    curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \
     --header 'Authorization: Bearer {api_key}' \
     --header 'Content-Type: application/json' \
-    --data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\
-      "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}'
+    --data-raw '{
+      "name": "Test Knowledge Base", 
+      "indexing_technique": "high_quality", 
+      "permission": "only_me", 
+      "embedding_model_provider": "zhipuai", 
+      "embedding_model": "embedding-3", 
+      "retrieval_model": {
+        "search_method": "keyword_search",
+        "reranking_enable": false,
+        "reranking_mode": null,
+        "reranking_model": {
+            "reranking_provider_name": "",
+            "reranking_model_name": ""
+        },
+        "weights": null,
+        "top_k": 1,
+        "score_threshold_enabled": false,
+        "score_threshold": null
+      }, 
+      "partial_member_list": []
+    }'
     ```
     </CodeGroup>
     <CodeGroup title="Response">

From e8d98e3d8907105c524f045c360d7115edc238b7 Mon Sep 17 00:00:00 2001
From: Rain Wang <rainwzp@icloud.com>
Date: Thu, 17 Apr 2025 10:38:56 +0800
Subject: [PATCH 2/4] Add analyzer_params config for milvus vectordb (#18180)

---
 api/.env.example                              |  1 +
 api/configs/middleware/vdb/milvus_config.py   |  5 ++++
 .../datasource/vdb/milvus/milvus_vector.py    | 24 ++++++++++++-------
 docker/.env.example                           |  1 +
 docker/docker-compose.yaml                    |  1 +
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/api/.env.example b/api/.env.example
index af95a4fe2d..502461f658 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -165,6 +165,7 @@ MILVUS_URI=http://127.0.0.1:19530
 MILVUS_TOKEN=
 MILVUS_USER=root
 MILVUS_PASSWORD=Milvus
+MILVUS_ANALYZER_PARAMS=
 
 # MyScale configuration
 MYSCALE_HOST=127.0.0.1
diff --git a/api/configs/middleware/vdb/milvus_config.py b/api/configs/middleware/vdb/milvus_config.py
index ebdf8857b9..d398ef5bd8 100644
--- a/api/configs/middleware/vdb/milvus_config.py
+++ b/api/configs/middleware/vdb/milvus_config.py
@@ -39,3 +39,8 @@ class MilvusConfig(BaseSettings):
         "older versions",
         default=True,
     )
+
+    MILVUS_ANALYZER_PARAMS: Optional[str] = Field(
+        description='Milvus text analyzer parameters, e.g., {"type": "chinese"} for Chinese segmentation support.',
+        default=None,
+    )
diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py
index 7a3319f4a6..100bcb198c 100644
--- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py
+++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py
@@ -32,6 +32,7 @@ class MilvusConfig(BaseModel):
     batch_size: int = 100  # Batch size for operations
     database: str = "default"  # Database name
     enable_hybrid_search: bool = False  # Flag to enable hybrid search
+    analyzer_params: Optional[str] = None  # Analyzer params
 
     @model_validator(mode="before")
     @classmethod
@@ -58,6 +59,7 @@ class MilvusConfig(BaseModel):
             "user": self.user,
             "password": self.password,
             "db_name": self.database,
+            "analyzer_params": self.analyzer_params,
         }
 
 
@@ -300,14 +302,19 @@ class MilvusVector(BaseVector):
 
                 # Create the text field, enable_analyzer will be set True to support milvus automatically
                 # transfer text to sparse_vector, reference: https://milvus.io/docs/full-text-search.md
-                fields.append(
-                    FieldSchema(
-                        Field.CONTENT_KEY.value,
-                        DataType.VARCHAR,
-                        max_length=65_535,
-                        enable_analyzer=self._hybrid_search_enabled,
-                    )
-                )
+                content_field_kwargs: dict[str, Any] = {
+                    "max_length": 65_535,
+                    "enable_analyzer": self._hybrid_search_enabled,
+                }
+                if (
+                    self._hybrid_search_enabled
+                    and self._client_config.analyzer_params is not None
+                    and self._client_config.analyzer_params.strip()
+                ):
+                    content_field_kwargs["analyzer_params"] = self._client_config.analyzer_params
+
+                fields.append(FieldSchema(Field.CONTENT_KEY.value, DataType.VARCHAR, **content_field_kwargs))
+
                 # Create the primary key field
                 fields.append(FieldSchema(Field.PRIMARY_KEY.value, DataType.INT64, is_primary=True, auto_id=True))
                 # Create the vector field, supports binary or float vectors
@@ -383,5 +390,6 @@ class MilvusVectorFactory(AbstractVectorFactory):
                 password=dify_config.MILVUS_PASSWORD or "",
                 database=dify_config.MILVUS_DATABASE or "",
                 enable_hybrid_search=dify_config.MILVUS_ENABLE_HYBRID_SEARCH or False,
+                analyzer_params=dify_config.MILVUS_ANALYZER_PARAMS or "",
             ),
         )
diff --git a/docker/.env.example b/docker/.env.example
index e49e8fee89..9b372dcec9 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -410,6 +410,7 @@ MILVUS_TOKEN=
 MILVUS_USER=
 MILVUS_PASSWORD=
 MILVUS_ENABLE_HYBRID_SEARCH=False
+MILVUS_ANALYZER_PARAMS=
 
 # MyScale configuration, only available when VECTOR_STORE is `myscale`
 # For multi-language support, please set MYSCALE_FTS_PARAMS with referring to:
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index 25b0c56561..172cbe2d2f 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -142,6 +142,7 @@ x-shared-env: &shared-api-worker-env
   MILVUS_USER: ${MILVUS_USER:-}
   MILVUS_PASSWORD: ${MILVUS_PASSWORD:-}
   MILVUS_ENABLE_HYBRID_SEARCH: ${MILVUS_ENABLE_HYBRID_SEARCH:-False}
+  MILVUS_ANALYZER_PARAMS: ${MILVUS_ANALYZER_PARAMS:-}
   MYSCALE_HOST: ${MYSCALE_HOST:-myscale}
   MYSCALE_PORT: ${MYSCALE_PORT:-8123}
   MYSCALE_USER: ${MYSCALE_USER:-default}

From 6d66e3f680b849cfb718e7dd73bdbd4916ce4194 Mon Sep 17 00:00:00 2001
From: Novice <857526207@qq.com>
Date: Thu, 17 Apr 2025 10:41:56 +0800
Subject: [PATCH 3/4] fix(follow_ups): handle empty LLM responses in context
 (#18237)

---
 api/core/memory/token_buffer_memory.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py
index 003a0c85b1..3c90dd22a2 100644
--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -44,6 +44,7 @@ class TokenBufferMemory:
                 Message.created_at,
                 Message.workflow_run_id,
                 Message.parent_message_id,
+                Message.answer_tokens,
             )
             .filter(
                 Message.conversation_id == self.conversation.id,
@@ -63,7 +64,7 @@ class TokenBufferMemory:
         thread_messages = extract_thread_messages(messages)
 
         # for newly created message, its answer is temporarily empty, we don't need to add it to memory
-        if thread_messages and not thread_messages[0].answer:
+        if thread_messages and not thread_messages[0].answer and thread_messages[0].answer_tokens == 0:
             thread_messages.pop(0)
 
         messages = list(reversed(thread_messages))

From 9d139fa30677821588fc03f360576a50bd5ad13d Mon Sep 17 00:00:00 2001
From: Joel <iamjoel007@gmail.com>
Date: Thu, 17 Apr 2025 11:22:06 +0800
Subject: [PATCH 4/4] fix: Could not load the logo of workflow as Tool in Agent
 Node (#18243)

---
 .../workflow/nodes/agent/components/tool-icon.tsx          | 6 ++++--
 web/app/components/workflow/nodes/agent/node.tsx           | 7 ++++---
 web/app/components/workflow/nodes/agent/panel.tsx          | 1 -
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/web/app/components/workflow/nodes/agent/components/tool-icon.tsx b/web/app/components/workflow/nodes/agent/components/tool-icon.tsx
index 4ac789f22e..b94258855a 100644
--- a/web/app/components/workflow/nodes/agent/components/tool-icon.tsx
+++ b/web/app/components/workflow/nodes/agent/components/tool-icon.tsx
@@ -10,6 +10,7 @@ import { Group } from '@/app/components/base/icons/src/vender/other'
 type Status = 'not-installed' | 'not-authorized' | undefined
 
 export type ToolIconProps = {
+  id: string
   providerName: string
 }
 
@@ -29,10 +30,11 @@ export const ToolIcon = memo(({ providerName }: ToolIconProps) => {
   const author = providerNameParts[0]
   const name = providerNameParts[1]
   const icon = useMemo(() => {
+    if (!isDataReady) return ''
     if (currentProvider) return currentProvider.icon as string
     const iconFromMarketPlace = getIconFromMarketPlace(`${author}/${name}`)
     return iconFromMarketPlace
-  }, [author, currentProvider, name])
+  }, [author, currentProvider, name, isDataReady])
   const status: Status = useMemo(() => {
     if (!isDataReady) return undefined
     if (!currentProvider) return 'not-installed'
@@ -60,7 +62,7 @@ export const ToolIcon = memo(({ providerName }: ToolIconProps) => {
       )}
       ref={containerRef}
     >
-      {!iconFetchError
+      {(!iconFetchError && isDataReady)
 
         ? <img
           src={icon}
diff --git a/web/app/components/workflow/nodes/agent/node.tsx b/web/app/components/workflow/nodes/agent/node.tsx
index adfba65f35..57ad2a0b81 100644
--- a/web/app/components/workflow/nodes/agent/node.tsx
+++ b/web/app/components/workflow/nodes/agent/node.tsx
@@ -39,12 +39,13 @@ const AgentNode: FC<NodeProps<AgentNodeType>> = (props) => {
 
   const tools = useMemo(() => {
     const tools: Array<ToolIconProps> = []
-    currentStrategy?.parameters.forEach((param) => {
+    currentStrategy?.parameters.forEach((param, i) => {
       if (param.type === FormTypeEnum.toolSelector) {
         const field = param.name
         const value = inputs.agent_parameters?.[field]?.value
         if (value) {
           tools.push({
+            id: `${param.name}-${i}`,
             providerName: value.provider_name as any,
           })
         }
@@ -55,6 +56,7 @@ const AgentNode: FC<NodeProps<AgentNodeType>> = (props) => {
         if (value) {
           (value as unknown as any[]).forEach((item) => {
             tools.push({
+              id: `${param.name}-${i}`,
               providerName: item.provider_name,
             })
           })
@@ -102,8 +104,7 @@ const AgentNode: FC<NodeProps<AgentNodeType>> = (props) => {
       {t('workflow.nodes.agent.toolbox')}
     </GroupLabel>}>
       <div className='grid grid-cols-10 gap-0.5'>
-        {/* eslint-disable-next-line sonarjs/no-uniq-key */}
-        {tools.map(tool => <ToolIcon {...tool} key={Math.random()} />)}
+        {tools.map(tool => <ToolIcon {...tool} key={tool.id} />)}
       </div>
     </Group>}
   </div>
diff --git a/web/app/components/workflow/nodes/agent/panel.tsx b/web/app/components/workflow/nodes/agent/panel.tsx
index 6a80728d91..19be60cb51 100644
--- a/web/app/components/workflow/nodes/agent/panel.tsx
+++ b/web/app/components/workflow/nodes/agent/panel.tsx
@@ -54,7 +54,6 @@ const AgentPanel: FC<NodePanelProps<AgentNodeType>> = (props) => {
     outputSchema,
     handleMemoryChange,
   } = useConfig(props.id, props.data)
-  console.log('currentStrategy', currentStrategy)
   const { t } = useTranslation()
   const nodeInfo = useMemo(() => {
     if (!runResult)