Merge branch 'langgenius:main' into add-document-status-update

pull/18235/head
GuanMu 1 year ago committed by GitHub
commit 1839b1b0c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -165,6 +165,7 @@ MILVUS_URI=http://127.0.0.1:19530
MILVUS_TOKEN= MILVUS_TOKEN=
MILVUS_USER=root MILVUS_USER=root
MILVUS_PASSWORD=Milvus MILVUS_PASSWORD=Milvus
MILVUS_ANALYZER_PARAMS=
# MyScale configuration # MyScale configuration
MYSCALE_HOST=127.0.0.1 MYSCALE_HOST=127.0.0.1

@ -39,3 +39,8 @@ class MilvusConfig(BaseSettings):
"older versions", "older versions",
default=True, default=True,
) )
MILVUS_ANALYZER_PARAMS: Optional[str] = Field(
description='Milvus text analyzer parameters, e.g., {"type": "chinese"} for Chinese segmentation support.',
default=None,
)

@ -44,6 +44,7 @@ class TokenBufferMemory:
Message.created_at, Message.created_at,
Message.workflow_run_id, Message.workflow_run_id,
Message.parent_message_id, Message.parent_message_id,
Message.answer_tokens,
) )
.filter( .filter(
Message.conversation_id == self.conversation.id, Message.conversation_id == self.conversation.id,
@ -63,7 +64,7 @@ class TokenBufferMemory:
thread_messages = extract_thread_messages(messages) thread_messages = extract_thread_messages(messages)
# for newly created message, its answer is temporarily empty, we don't need to add it to memory # for newly created message, its answer is temporarily empty, we don't need to add it to memory
if thread_messages and not thread_messages[0].answer: if thread_messages and not thread_messages[0].answer and thread_messages[0].answer_tokens == 0:
thread_messages.pop(0) thread_messages.pop(0)
messages = list(reversed(thread_messages)) messages = list(reversed(thread_messages))

@ -32,6 +32,7 @@ class MilvusConfig(BaseModel):
batch_size: int = 100 # Batch size for operations batch_size: int = 100 # Batch size for operations
database: str = "default" # Database name database: str = "default" # Database name
enable_hybrid_search: bool = False # Flag to enable hybrid search enable_hybrid_search: bool = False # Flag to enable hybrid search
analyzer_params: Optional[str] = None # Analyzer params
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
@ -58,6 +59,7 @@ class MilvusConfig(BaseModel):
"user": self.user, "user": self.user,
"password": self.password, "password": self.password,
"db_name": self.database, "db_name": self.database,
"analyzer_params": self.analyzer_params,
} }
@ -300,14 +302,19 @@ class MilvusVector(BaseVector):
# Create the text field, enable_analyzer will be set True to support milvus automatically # Create the text field, enable_analyzer will be set True to support milvus automatically
# transfer text to sparse_vector, reference: https://milvus.io/docs/full-text-search.md # transfer text to sparse_vector, reference: https://milvus.io/docs/full-text-search.md
fields.append( content_field_kwargs: dict[str, Any] = {
FieldSchema( "max_length": 65_535,
Field.CONTENT_KEY.value, "enable_analyzer": self._hybrid_search_enabled,
DataType.VARCHAR, }
max_length=65_535, if (
enable_analyzer=self._hybrid_search_enabled, self._hybrid_search_enabled
) and self._client_config.analyzer_params is not None
) and self._client_config.analyzer_params.strip()
):
content_field_kwargs["analyzer_params"] = self._client_config.analyzer_params
fields.append(FieldSchema(Field.CONTENT_KEY.value, DataType.VARCHAR, **content_field_kwargs))
# Create the primary key field # Create the primary key field
fields.append(FieldSchema(Field.PRIMARY_KEY.value, DataType.INT64, is_primary=True, auto_id=True)) fields.append(FieldSchema(Field.PRIMARY_KEY.value, DataType.INT64, is_primary=True, auto_id=True))
# Create the vector field, supports binary or float vectors # Create the vector field, supports binary or float vectors
@ -383,5 +390,6 @@ class MilvusVectorFactory(AbstractVectorFactory):
password=dify_config.MILVUS_PASSWORD or "", password=dify_config.MILVUS_PASSWORD or "",
database=dify_config.MILVUS_DATABASE or "", database=dify_config.MILVUS_DATABASE or "",
enable_hybrid_search=dify_config.MILVUS_ENABLE_HYBRID_SEARCH or False, enable_hybrid_search=dify_config.MILVUS_ENABLE_HYBRID_SEARCH or False,
analyzer_params=dify_config.MILVUS_ANALYZER_PARAMS or "",
), ),
) )

@ -410,6 +410,7 @@ MILVUS_TOKEN=
MILVUS_USER= MILVUS_USER=
MILVUS_PASSWORD= MILVUS_PASSWORD=
MILVUS_ENABLE_HYBRID_SEARCH=False MILVUS_ENABLE_HYBRID_SEARCH=False
MILVUS_ANALYZER_PARAMS=
# MyScale configuration, only available when VECTOR_STORE is `myscale` # MyScale configuration, only available when VECTOR_STORE is `myscale`
# For multi-language support, please set MYSCALE_FTS_PARAMS with referring to: # For multi-language support, please set MYSCALE_FTS_PARAMS with referring to:

@ -142,6 +142,7 @@ x-shared-env: &shared-api-worker-env
MILVUS_USER: ${MILVUS_USER:-} MILVUS_USER: ${MILVUS_USER:-}
MILVUS_PASSWORD: ${MILVUS_PASSWORD:-} MILVUS_PASSWORD: ${MILVUS_PASSWORD:-}
MILVUS_ENABLE_HYBRID_SEARCH: ${MILVUS_ENABLE_HYBRID_SEARCH:-False} MILVUS_ENABLE_HYBRID_SEARCH: ${MILVUS_ENABLE_HYBRID_SEARCH:-False}
MILVUS_ANALYZER_PARAMS: ${MILVUS_ANALYZER_PARAMS:-}
MYSCALE_HOST: ${MYSCALE_HOST:-myscale} MYSCALE_HOST: ${MYSCALE_HOST:-myscale}
MYSCALE_PORT: ${MYSCALE_PORT:-8123} MYSCALE_PORT: ${MYSCALE_PORT:-8123}
MYSCALE_USER: ${MYSCALE_USER:-default} MYSCALE_USER: ${MYSCALE_USER:-default}

@ -557,7 +557,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Heading <Heading
url='/datasets/{dataset_id}' url='/datasets/{dataset_id}'
method='POST' method='PATCH'
title='Update knowledge base' title='Update knowledge base'
name='#update_dataset' name='#update_dataset'
/> />
@ -585,8 +585,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Property name='embedding_model' type='string' key='embedding_model'> <Property name='embedding_model' type='string' key='embedding_model'>
Specified embedding model, corresponding to the model field(Optional) Specified embedding model, corresponding to the model field(Optional)
</Property> </Property>
<Property name='retrieval_model' type='string' key='retrieval_model'> <Property name='retrieval_model' type='object' key='retrieval_model'>
Specified retrieval model, corresponding to the model field(Optional) Retrieval model (optional, if not filled, it will be recalled according to the default method)
- <code>search_method</code> (text) Search method: One of the following four keywords is required
- <code>keyword_search</code> Keyword search
- <code>semantic_search</code> Semantic search
- <code>full_text_search</code> Full-text search
- <code>hybrid_search</code> Hybrid search
- <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
- <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
- <code>reranking_provider_name</code> (string) Rerank model provider
- <code>reranking_model_name</code> (string) Rerank model name
- <code>weights</code> (float) Semantic search weight setting in hybrid search mode
- <code>top_k</code> (integer) Number of results to return (optional)
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
- <code>score_threshold</code> (float) Score threshold
</Property> </Property>
<Property name='partial_member_list' type='array' key='partial_member_list'> <Property name='partial_member_list' type='array' key='partial_member_list'>
Partial member list(Optional) Partial member list(Optional)
@ -596,16 +609,56 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Col sticky> <Col sticky>
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="PATCH"
label="/datasets/{dataset_id}" label="/datasets/{dataset_id}"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me", "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' `} targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{
"name": "Test Knowledge Base",
"indexing_technique": "high_quality",
"permission": "only_me",
"embedding_model_provider": "zhipuai",
"embedding_model": "embedding-3",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
},
"partial_member_list": []
}'
`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \ curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\ --data-raw '{
"embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' "name": "Test Knowledge Base",
"indexing_technique": "high_quality",
"permission": "only_me",
"embedding_model_provider": "zhipuai",
"embedding_model": "embedding-3",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
},
"partial_member_list": []
}'
``` ```
</CodeGroup> </CodeGroup>
<CodeGroup title="Response"> <CodeGroup title="Response">

@ -557,7 +557,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Heading <Heading
url='/datasets/{dataset_id}' url='/datasets/{dataset_id}'
method='POST' method='PATCH'
title='修改知识库详情' title='修改知识库详情'
name='#update_dataset' name='#update_dataset'
/> />
@ -589,8 +589,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Property name='embedding_model' type='string' key='embedding_model'> <Property name='embedding_model' type='string' key='embedding_model'>
嵌入模型(选填) 嵌入模型(选填)
</Property> </Property>
<Property name='retrieval_model' type='string' key='retrieval_model'> <Property name='retrieval_model' type='object' key='retrieval_model'>
检索模型(选填) 检索参数(选填,如不填,按照默认方式召回)
- <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
- <code>keyword_search</code> 关键字检索
- <code>semantic_search</code> 语义检索
- <code>full_text_search</code> 全文检索
- <code>hybrid_search</code> 混合检索
- <code>reranking_enable</code> (bool) 是否启用 Reranking非必填如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
- <code>reranking_mode</code> (object) Rerank 模型配置,非必填,如果启用了 reranking 则传值
- <code>reranking_provider_name</code> (string) Rerank 模型提供商
- <code>reranking_model_name</code> (string) Rerank 模型名称
- <code>weights</code> (float) 混合检索模式下语意检索的权重设置
- <code>top_k</code> (integer) 返回结果数量,非必填
- <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
- <code>score_threshold</code> (float) Score 阈值
</Property> </Property>
<Property name='partial_member_list' type='array' key='partial_member_list'> <Property name='partial_member_list' type='array' key='partial_member_list'>
部分团队成员 ID 列表(选填) 部分团队成员 ID 列表(选填)
@ -600,16 +613,56 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Col sticky> <Col sticky>
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="PATCH"
label="/datasets/{dataset_id}" label="/datasets/{dataset_id}"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me", "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' `} targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{
"name": "Test Knowledge Base",
"indexing_technique": "high_quality",
"permission": "only_me",
"embedding_model_provider": "zhipuai",
"embedding_model": "embedding-3",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
},
"partial_member_list": []
}'
`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \ curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\ --data-raw '{
"embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' "name": "Test Knowledge Base",
"indexing_technique": "high_quality",
"permission": "only_me",
"embedding_model_provider": "zhipuai",
"embedding_model": "embedding-3",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
},
"partial_member_list": []
}'
``` ```
</CodeGroup> </CodeGroup>
<CodeGroup title="Response"> <CodeGroup title="Response">

@ -10,6 +10,7 @@ import { Group } from '@/app/components/base/icons/src/vender/other'
type Status = 'not-installed' | 'not-authorized' | undefined type Status = 'not-installed' | 'not-authorized' | undefined
export type ToolIconProps = { export type ToolIconProps = {
id: string
providerName: string providerName: string
} }
@ -29,10 +30,11 @@ export const ToolIcon = memo(({ providerName }: ToolIconProps) => {
const author = providerNameParts[0] const author = providerNameParts[0]
const name = providerNameParts[1] const name = providerNameParts[1]
const icon = useMemo(() => { const icon = useMemo(() => {
if (!isDataReady) return ''
if (currentProvider) return currentProvider.icon as string if (currentProvider) return currentProvider.icon as string
const iconFromMarketPlace = getIconFromMarketPlace(`${author}/${name}`) const iconFromMarketPlace = getIconFromMarketPlace(`${author}/${name}`)
return iconFromMarketPlace return iconFromMarketPlace
}, [author, currentProvider, name]) }, [author, currentProvider, name, isDataReady])
const status: Status = useMemo(() => { const status: Status = useMemo(() => {
if (!isDataReady) return undefined if (!isDataReady) return undefined
if (!currentProvider) return 'not-installed' if (!currentProvider) return 'not-installed'
@ -60,7 +62,7 @@ export const ToolIcon = memo(({ providerName }: ToolIconProps) => {
)} )}
ref={containerRef} ref={containerRef}
> >
{!iconFetchError {(!iconFetchError && isDataReady)
? <img ? <img
src={icon} src={icon}

@ -39,12 +39,13 @@ const AgentNode: FC<NodeProps<AgentNodeType>> = (props) => {
const tools = useMemo(() => { const tools = useMemo(() => {
const tools: Array<ToolIconProps> = [] const tools: Array<ToolIconProps> = []
currentStrategy?.parameters.forEach((param) => { currentStrategy?.parameters.forEach((param, i) => {
if (param.type === FormTypeEnum.toolSelector) { if (param.type === FormTypeEnum.toolSelector) {
const field = param.name const field = param.name
const value = inputs.agent_parameters?.[field]?.value const value = inputs.agent_parameters?.[field]?.value
if (value) { if (value) {
tools.push({ tools.push({
id: `${param.name}-${i}`,
providerName: value.provider_name as any, providerName: value.provider_name as any,
}) })
} }
@ -55,6 +56,7 @@ const AgentNode: FC<NodeProps<AgentNodeType>> = (props) => {
if (value) { if (value) {
(value as unknown as any[]).forEach((item) => { (value as unknown as any[]).forEach((item) => {
tools.push({ tools.push({
id: `${param.name}-${i}`,
providerName: item.provider_name, providerName: item.provider_name,
}) })
}) })
@ -102,8 +104,7 @@ const AgentNode: FC<NodeProps<AgentNodeType>> = (props) => {
{t('workflow.nodes.agent.toolbox')} {t('workflow.nodes.agent.toolbox')}
</GroupLabel>}> </GroupLabel>}>
<div className='grid grid-cols-10 gap-0.5'> <div className='grid grid-cols-10 gap-0.5'>
{/* eslint-disable-next-line sonarjs/no-uniq-key */} {tools.map(tool => <ToolIcon {...tool} key={tool.id} />)}
{tools.map(tool => <ToolIcon {...tool} key={Math.random()} />)}
</div> </div>
</Group>} </Group>}
</div> </div>

@ -54,7 +54,6 @@ const AgentPanel: FC<NodePanelProps<AgentNodeType>> = (props) => {
outputSchema, outputSchema,
handleMemoryChange, handleMemoryChange,
} = useConfig(props.id, props.data) } = useConfig(props.id, props.data)
console.log('currentStrategy', currentStrategy)
const { t } = useTranslation() const { t } = useTranslation()
const nodeInfo = useMemo(() => { const nodeInfo = useMemo(() => {
if (!runResult) if (!runResult)

Loading…
Cancel
Save