From a1d20085e659e816e70a2ff7c39e04fbf48292fb Mon Sep 17 00:00:00 2001 From: Chenming C <43266446+chen622@users.noreply.github.com> Date: Thu, 17 Apr 2025 10:10:27 +0800 Subject: [PATCH 1/4] fix: change the method of update_dataset api in document (#18197) --- .../datasets/template/template.en.mdx | 69 ++++++++++++++++--- .../datasets/template/template.zh.mdx | 69 ++++++++++++++++--- 2 files changed, 122 insertions(+), 16 deletions(-) diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index 357b66a96f..54e08b45d8 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -557,7 +557,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi @@ -585,8 +585,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi Specified embedding model, corresponding to the model field(Optional) - - Specified retrieval model, corresponding to the model field(Optional) + + Retrieval model (optional, if not filled, it will be recalled according to the default method) + - search_method (text) Search method: One of the following four keywords is required + - keyword_search Keyword search + - semantic_search Semantic search + - full_text_search Full-text search + - hybrid_search Hybrid search + - reranking_enable (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional) + - reranking_mode (object) Rerank model configuration, required if reranking is enabled + - reranking_provider_name (string) Rerank model provider + - reranking_model_name (string) Rerank model name + - weights (float) Semantic search weight setting in hybrid search mode + - top_k (integer) Number of results to return (optional) + - score_threshold_enabled (bool) Whether to enable score threshold + - score_threshold (float) Score threshold Partial member list(Optional) @@ -596,16 +609,56 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \ + curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ - --data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\ - "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' + --data-raw '{ + "name": "Test Knowledge Base", + "indexing_technique": "high_quality", + "permission": "only_me", + "embedding_model_provider": "zhipuai", + "embedding_model": "embedding-3", + "retrieval_model": { + "search_method": "keyword_search", + "reranking_enable": false, + "reranking_mode": null, + "reranking_model": { + "reranking_provider_name": "", + "reranking_model_name": "" + }, + "weights": null, + "top_k": 1, + "score_threshold_enabled": false, + "score_threshold": null + }, + "partial_member_list": [] + }' ``` diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index fb8f728b61..b435a9bb67 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -557,7 +557,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi @@ -589,8 +589,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi 嵌入模型(选填) - - 检索模型(选填) + + 检索参数(选填,如不填,按照默认方式召回) + - search_method (text) 检索方法:以下三个关键字之一,必填 + - keyword_search 关键字检索 + - semantic_search 语义检索 + - full_text_search 全文检索 + - hybrid_search 混合检索 + - reranking_enable (bool) 是否启用 Reranking,非必填,如果检索模式为 semantic_search 模式或者 hybrid_search 则传值 + - reranking_mode (object) Rerank 模型配置,非必填,如果启用了 reranking 则传值 + - reranking_provider_name (string) Rerank 模型提供商 + - reranking_model_name (string) Rerank 模型名称 + - weights (float) 混合检索模式下语意检索的权重设置 + - top_k (integer) 返回结果数量,非必填 + - score_threshold_enabled (bool) 是否开启 score 阈值 + - score_threshold (float) Score 阈值 部分团队成员 ID 列表(选填) @@ -600,16 +613,56 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \ + curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ - --data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\ - "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' + --data-raw '{ + "name": "Test Knowledge Base", + "indexing_technique": "high_quality", + "permission": "only_me", + "embedding_model_provider": "zhipuai", + "embedding_model": "embedding-3", + "retrieval_model": { + "search_method": "keyword_search", + "reranking_enable": false, + "reranking_mode": null, + "reranking_model": { + "reranking_provider_name": "", + "reranking_model_name": "" + }, + "weights": null, + "top_k": 1, + "score_threshold_enabled": false, + "score_threshold": null + }, + "partial_member_list": [] + }' ``` From e8d98e3d8907105c524f045c360d7115edc238b7 Mon Sep 17 00:00:00 2001 From: Rain Wang Date: Thu, 17 Apr 2025 10:38:56 +0800 Subject: [PATCH 2/4] Add analyzer_params config for milvus vectordb (#18180) --- api/.env.example | 1 + api/configs/middleware/vdb/milvus_config.py | 5 ++++ .../datasource/vdb/milvus/milvus_vector.py | 24 ++++++++++++------- docker/.env.example | 1 + docker/docker-compose.yaml | 1 + 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/api/.env.example b/api/.env.example index af95a4fe2d..502461f658 100644 --- a/api/.env.example +++ b/api/.env.example @@ -165,6 +165,7 @@ MILVUS_URI=http://127.0.0.1:19530 MILVUS_TOKEN= MILVUS_USER=root MILVUS_PASSWORD=Milvus +MILVUS_ANALYZER_PARAMS= # MyScale configuration MYSCALE_HOST=127.0.0.1 diff --git a/api/configs/middleware/vdb/milvus_config.py b/api/configs/middleware/vdb/milvus_config.py index ebdf8857b9..d398ef5bd8 100644 --- a/api/configs/middleware/vdb/milvus_config.py +++ b/api/configs/middleware/vdb/milvus_config.py @@ -39,3 +39,8 @@ class MilvusConfig(BaseSettings): "older versions", default=True, ) + + MILVUS_ANALYZER_PARAMS: Optional[str] = Field( + description='Milvus text analyzer parameters, e.g., {"type": "chinese"} for Chinese segmentation support.', + default=None, + ) diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index 7a3319f4a6..100bcb198c 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -32,6 +32,7 @@ class MilvusConfig(BaseModel): batch_size: int = 100 # Batch size for operations database: str = "default" # Database name enable_hybrid_search: bool = False # Flag to enable hybrid search + analyzer_params: Optional[str] = None # Analyzer params @model_validator(mode="before") @classmethod @@ -58,6 +59,7 @@ class MilvusConfig(BaseModel): "user": self.user, "password": self.password, "db_name": self.database, + "analyzer_params": self.analyzer_params, } @@ -300,14 +302,19 @@ class MilvusVector(BaseVector): # Create the text field, enable_analyzer will be set True to support milvus automatically # transfer text to sparse_vector, reference: https://milvus.io/docs/full-text-search.md - fields.append( - FieldSchema( - Field.CONTENT_KEY.value, - DataType.VARCHAR, - max_length=65_535, - enable_analyzer=self._hybrid_search_enabled, - ) - ) + content_field_kwargs: dict[str, Any] = { + "max_length": 65_535, + "enable_analyzer": self._hybrid_search_enabled, + } + if ( + self._hybrid_search_enabled + and self._client_config.analyzer_params is not None + and self._client_config.analyzer_params.strip() + ): + content_field_kwargs["analyzer_params"] = self._client_config.analyzer_params + + fields.append(FieldSchema(Field.CONTENT_KEY.value, DataType.VARCHAR, **content_field_kwargs)) + # Create the primary key field fields.append(FieldSchema(Field.PRIMARY_KEY.value, DataType.INT64, is_primary=True, auto_id=True)) # Create the vector field, supports binary or float vectors @@ -383,5 +390,6 @@ class MilvusVectorFactory(AbstractVectorFactory): password=dify_config.MILVUS_PASSWORD or "", database=dify_config.MILVUS_DATABASE or "", enable_hybrid_search=dify_config.MILVUS_ENABLE_HYBRID_SEARCH or False, + analyzer_params=dify_config.MILVUS_ANALYZER_PARAMS or "", ), ) diff --git a/docker/.env.example b/docker/.env.example index e49e8fee89..9b372dcec9 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -410,6 +410,7 @@ MILVUS_TOKEN= MILVUS_USER= MILVUS_PASSWORD= MILVUS_ENABLE_HYBRID_SEARCH=False +MILVUS_ANALYZER_PARAMS= # MyScale configuration, only available when VECTOR_STORE is `myscale` # For multi-language support, please set MYSCALE_FTS_PARAMS with referring to: diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 25b0c56561..172cbe2d2f 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -142,6 +142,7 @@ x-shared-env: &shared-api-worker-env MILVUS_USER: ${MILVUS_USER:-} MILVUS_PASSWORD: ${MILVUS_PASSWORD:-} MILVUS_ENABLE_HYBRID_SEARCH: ${MILVUS_ENABLE_HYBRID_SEARCH:-False} + MILVUS_ANALYZER_PARAMS: ${MILVUS_ANALYZER_PARAMS:-} MYSCALE_HOST: ${MYSCALE_HOST:-myscale} MYSCALE_PORT: ${MYSCALE_PORT:-8123} MYSCALE_USER: ${MYSCALE_USER:-default} From 6d66e3f680b849cfb718e7dd73bdbd4916ce4194 Mon Sep 17 00:00:00 2001 From: Novice <857526207@qq.com> Date: Thu, 17 Apr 2025 10:41:56 +0800 Subject: [PATCH 3/4] fix(follow_ups): handle empty LLM responses in context (#18237) --- api/core/memory/token_buffer_memory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py index 003a0c85b1..3c90dd22a2 100644 --- a/api/core/memory/token_buffer_memory.py +++ b/api/core/memory/token_buffer_memory.py @@ -44,6 +44,7 @@ class TokenBufferMemory: Message.created_at, Message.workflow_run_id, Message.parent_message_id, + Message.answer_tokens, ) .filter( Message.conversation_id == self.conversation.id, @@ -63,7 +64,7 @@ class TokenBufferMemory: thread_messages = extract_thread_messages(messages) # for newly created message, its answer is temporarily empty, we don't need to add it to memory - if thread_messages and not thread_messages[0].answer: + if thread_messages and not thread_messages[0].answer and thread_messages[0].answer_tokens == 0: thread_messages.pop(0) messages = list(reversed(thread_messages)) From 9d139fa30677821588fc03f360576a50bd5ad13d Mon Sep 17 00:00:00 2001 From: Joel Date: Thu, 17 Apr 2025 11:22:06 +0800 Subject: [PATCH 4/4] fix: Could not load the logo of workflow as Tool in Agent Node (#18243) --- .../workflow/nodes/agent/components/tool-icon.tsx | 6 ++++-- web/app/components/workflow/nodes/agent/node.tsx | 7 ++++--- web/app/components/workflow/nodes/agent/panel.tsx | 1 - 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/web/app/components/workflow/nodes/agent/components/tool-icon.tsx b/web/app/components/workflow/nodes/agent/components/tool-icon.tsx index 4ac789f22e..b94258855a 100644 --- a/web/app/components/workflow/nodes/agent/components/tool-icon.tsx +++ b/web/app/components/workflow/nodes/agent/components/tool-icon.tsx @@ -10,6 +10,7 @@ import { Group } from '@/app/components/base/icons/src/vender/other' type Status = 'not-installed' | 'not-authorized' | undefined export type ToolIconProps = { + id: string providerName: string } @@ -29,10 +30,11 @@ export const ToolIcon = memo(({ providerName }: ToolIconProps) => { const author = providerNameParts[0] const name = providerNameParts[1] const icon = useMemo(() => { + if (!isDataReady) return '' if (currentProvider) return currentProvider.icon as string const iconFromMarketPlace = getIconFromMarketPlace(`${author}/${name}`) return iconFromMarketPlace - }, [author, currentProvider, name]) + }, [author, currentProvider, name, isDataReady]) const status: Status = useMemo(() => { if (!isDataReady) return undefined if (!currentProvider) return 'not-installed' @@ -60,7 +62,7 @@ export const ToolIcon = memo(({ providerName }: ToolIconProps) => { )} ref={containerRef} > - {!iconFetchError + {(!iconFetchError && isDataReady) ? > = (props) => { const tools = useMemo(() => { const tools: Array = [] - currentStrategy?.parameters.forEach((param) => { + currentStrategy?.parameters.forEach((param, i) => { if (param.type === FormTypeEnum.toolSelector) { const field = param.name const value = inputs.agent_parameters?.[field]?.value if (value) { tools.push({ + id: `${param.name}-${i}`, providerName: value.provider_name as any, }) } @@ -55,6 +56,7 @@ const AgentNode: FC> = (props) => { if (value) { (value as unknown as any[]).forEach((item) => { tools.push({ + id: `${param.name}-${i}`, providerName: item.provider_name, }) }) @@ -102,8 +104,7 @@ const AgentNode: FC> = (props) => { {t('workflow.nodes.agent.toolbox')} }>
- {/* eslint-disable-next-line sonarjs/no-uniq-key */} - {tools.map(tool => )} + {tools.map(tool => )}
} diff --git a/web/app/components/workflow/nodes/agent/panel.tsx b/web/app/components/workflow/nodes/agent/panel.tsx index 6a80728d91..19be60cb51 100644 --- a/web/app/components/workflow/nodes/agent/panel.tsx +++ b/web/app/components/workflow/nodes/agent/panel.tsx @@ -54,7 +54,6 @@ const AgentPanel: FC> = (props) => { outputSchema, handleMemoryChange, } = useConfig(props.id, props.data) - console.log('currentStrategy', currentStrategy) const { t } = useTranslation() const nodeInfo = useMemo(() => { if (!runResult)