From 45abd177947f708a96d17fe829c17eb534ccc9c2 Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Sun, 1 Jun 2025 10:44:30 +0800 Subject: [PATCH 01/10] support metadata condition filter use array Signed-off-by: kenwoodjw --- ARRAY_METADATA_FILTER_DEBUG.md | 151 ++++++++++++++++ ARRAY_METADATA_FILTER_FINAL_TEST.md | 147 ++++++++++++++++ ARRAY_METADATA_FILTER_README.md | 154 ++++++++++++++++ ARRAY_METADATA_FILTER_TEST.md | 108 ++++++++++++ api/core/app/app_config/entities.py | 5 +- api/core/rag/entities/metadata_entities.py | 4 +- api/core/rag/retrieval/dataset_retrieval.py | 56 ++++++ .../nodes/knowledge_retrieval/entities.py | 4 +- .../knowledge_retrieval_node.py | 63 +++++++ api/core/workflow/utils/condition/entities.py | 4 +- .../configuration/dataset-config/index.tsx | 1 + .../metadata-dataset/create-content.tsx | 7 +- web/app/components/datasets/metadata/types.ts | 1 + .../condition-list/condition-array.tsx | 164 ++++++++++++++++++ ...=> condition-common-variable-selector.tsx} | 41 +++-- .../condition-list/condition-item.tsx | 60 ++++++- .../condition-list/condition-operator.tsx | 2 +- .../condition-variable-selector.tsx | 26 ++- .../metadata/condition-list/index.tsx | 6 + .../metadata/condition-list/utils.ts | 13 ++ .../components/metadata/metadata-icon.tsx | 6 + .../nodes/knowledge-retrieval/panel.tsx | 4 + .../nodes/knowledge-retrieval/types.ts | 6 +- .../nodes/knowledge-retrieval/use-config.ts | 18 ++ web/i18n/en-US/workflow.ts | 8 +- web/i18n/zh-Hans/workflow.ts | 14 +- 26 files changed, 1038 insertions(+), 35 deletions(-) create mode 100644 ARRAY_METADATA_FILTER_DEBUG.md create mode 100644 ARRAY_METADATA_FILTER_FINAL_TEST.md create mode 100644 ARRAY_METADATA_FILTER_README.md create mode 100644 ARRAY_METADATA_FILTER_TEST.md create mode 100644 web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx rename web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/{condition-common-variable-selector.tsx.tsx => condition-common-variable-selector.tsx} (66%) diff --git a/ARRAY_METADATA_FILTER_DEBUG.md b/ARRAY_METADATA_FILTER_DEBUG.md new file mode 100644 index 0000000000..baeec07eec --- /dev/null +++ b/ARRAY_METADATA_FILTER_DEBUG.md @@ -0,0 +1,151 @@ +# 数组元数据过滤功能 - Debug指南 + +## 问题分析 + +从你的截图可以看出,当前选择的是`name`字段,它是`string`类型的元数据字段。要使用数组变量选择器,需要满足以下条件: + +1. **元数据字段必须是array类型** - 当前显示的是string类型 +2. **工作流中需要有array类型的变量** - 用于变量选择 + +## Debug步骤 + +### 1. 创建array类型的元数据字段 + +首先需要在知识库中创建一个array类型的元数据字段: + +```bash +# 通过API创建array类型元数据字段 +curl -X POST \ + http://localhost:3000/datasets/{dataset_id}/metadata \ + -H "Authorization: Bearer your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "type": "array", + "name": "job_ids" + }' +``` + +### 2. 在工作流中添加数组变量 + +在工作流的开始节点中添加array类型的变量: + +```json +{ + "key": "user_jobs", + "name": "用户工作ID列表", + "type": "array", + "required": true, + "default": ["job1", "job2"] +} +``` + +### 3. 测试流程 + +1. **选择array类型字段**:在元数据过滤条件中选择`job_ids`字段 +2. **查看变量选择器**:此时应该显示array类型的变量 +3. **验证操作符**:确认显示了`in`, `not in`, `contains`, `not contains`等数组操作符 + +### 4. 添加调试日志 + +我已经在代码中添加了调试日志,打开浏览器开发者工具查看: + +- `🔍 ConditionArray Debug` - 显示可用的数组变量 +- `🔧 数组变量被选择` - 显示变量选择过程 + +### 5. 检查数据流 + +**当前问题**: +- 元数据字段类型:`string` (name字段) +- 需要的字段类型:`array` (如job_ids字段) + +**解决方案**: +1. 创建array类型的元数据字段 +2. 选择该字段进行过滤 +3. 此时变量选择器会显示数组类型变量 + +## 验证方法 + +### 检查元数据字段列表 +```javascript +// 在浏览器控制台运行 +console.log('当前元数据字段:', metadataList); +``` + +### 检查可用变量 +```javascript +// 检查数组变量 +console.log('可用数组变量:', availableArrayVars); +console.log('通用数组变量:', availableCommonArrayVars); +``` + +### 验证变量过滤逻辑 +```javascript +// 检查变量过滤器 +const filterArrayVar = (varPayload) => { + return [ + 'arrayString', + 'arrayNumber', + 'arrayObject', + 'array' + ].includes(varPayload.type); +}; +console.log('过滤后的数组变量:', variables.filter(filterArrayVar)); +``` + +## 常见问题 + +### Q1: 为什么没有显示数组变量? +**A**: 当前选择的是string类型字段。数组变量只在array类型字段中显示。 + +### Q2: 如何创建array类型的元数据字段? +**A**: +1. 通过API创建:`POST /datasets/{id}/metadata` with `{"type": "array", "name": "field_name"}` +2. 或在知识库管理界面创建 + +### Q3: 变量选择器为空? +**A**: 检查工作流中是否有array类型的变量,确保变量类型为 `arrayString`, `arrayNumber`, `arrayObject`, 或 `array` + +### Q4: 如何验证功能正常工作? +**A**: +1. 创建array类型元数据字段 +2. 在工作流开始节点添加array变量 +3. 选择array字段进行过滤 +4. 查看变量选择器是否显示数组变量 + +## 示例配置 + +### 元数据字段示例 +```json +{ + "id": "job_ids_field", + "name": "job_ids", + "type": "array" +} +``` + +### 工作流变量示例 +```json +{ + "key": "target_jobs", + "name": "目标工作列表", + "type": "array", + "default": ["job1", "job2", "job3"] +} +``` + +### 过滤条件示例 +```json +{ + "id": "condition_1", + "name": "job_ids", + "comparison_operator": "in", + "value": ["job1", "job2"] +} +``` + +## 下一步 + +1. 首先确认是否有array类型的元数据字段 +2. 如果没有,创建一个 +3. 确保工作流中有array类型的变量 +4. 选择array字段进行测试 \ No newline at end of file diff --git a/ARRAY_METADATA_FILTER_FINAL_TEST.md b/ARRAY_METADATA_FILTER_FINAL_TEST.md new file mode 100644 index 0000000000..6e4b08779c --- /dev/null +++ b/ARRAY_METADATA_FILTER_FINAL_TEST.md @@ -0,0 +1,147 @@ +# 数组元数据过滤功能 - 最终测试指南 + +## 🎯 功能概述 + +现在Dify的知识检索节点支持使用**数组变量作为过滤条件的值**,实现如下过滤逻辑: +- `document_type in ["pdf", "docx", "txt"]` +- `priority not in [1, 2, 3]` + +## ✅ 已修复的问题 + +### 1. **ComparisonOperator导入错误** +- **问题**: `ReferenceError: ComparisonOperator is not defined` +- **修复**: 修改导入语句,导入枚举值而非仅类型定义 +- **文件**: `condition-item.tsx`, `condition-operator.tsx` + +### 2. **操作符支持范围** +- **问题**: string/number类型字段没有显示in/not in操作符 +- **修复**: 在`utils.ts`中为基础类型添加数组操作符 +- **文件**: `utils.ts` + +### 3. **条件渲染逻辑** +- **问题**: in/not in操作符没有使用数组输入组件 +- **修复**: 修改条件渲染逻辑,根据操作符类型选择组件 +- **文件**: `condition-item.tsx` + +### 4. **数组变量过滤逻辑** +- **问题**: 数组变量过滤过于严格,遗漏某些数组类型 +- **修复**: 改进filterArrayVar函数,支持所有数组类型 +- **文件**: `use-config.ts` + +### 5. **变量类型匹配** +- **问题**: ConditionVariableSelector类型定义过于严格 +- **修复**: 支持字符串类型参数,改进数组类型匹配 +- **文件**: `condition-variable-selector.tsx` + +### 6. **数据传递链路** +- **问题**: ConditionList没有传递数组变量相关props +- **修复**: 添加availableArrayVars等props传递 +- **文件**: `condition-list/index.tsx` + +## 🧪 测试步骤 + +### 步骤1: 创建测试工作流 + +1. **创建新工作流**,包含以下节点: + - **开始节点**: 输入变量 `query` + - **代码执行节点**: 输出字符串数组 + - **知识检索节点**: 使用元数据过滤 + +### 步骤2: 配置代码执行节点 + +```python +def main() -> dict: + return { + "file_types": ["pdf", "docx", "txt"], + "priorities": [1, 2, 3], + "categories": ["tech", "business", "personal"] + } +``` + +### 步骤3: 配置知识检索节点 + +1. **添加数据集**(确保数据集有元数据字段) +2. **设置元数据过滤模式**为"手动" +3. **添加过滤条件**: + - 选择字符串类型元数据字段(如 `document_type`) + - 选择操作符 `in` + - 选择变量模式,选择代码节点的 `file_types` 输出 + +### 步骤4: 验证功能 + +#### 前端验证 +- [ ] 能看到 `in` 和 `not in` 操作符选项 +- [ ] 能选择数组类型的变量 +- [ ] 界面正确显示选择的数组变量 +- [ ] 配置能够正确保存和加载 + +#### 后端验证 +- [ ] 运行工作流不报错 +- [ ] 数组变量值被正确解析 +- [ ] 过滤结果符合预期 +- [ ] 支持多条件组合 + +## 🔍 调试日志检查 + +现在调试日志应该显示: + +```javascript +🔍 ConditionArray Debug: + - valueMethod: variable + - isCommonVariable: undefined + - nodesOutputVars (数组变量): [{ nodeId: 'code_node', vars: [...] }] // 不再是空数组 + - availableNodes: [{ id: 'code_node', data: {...} }] // 不再是空数组 + - commonVariables: [] + +🔍 ConditionVariableSelector Debug: + - varType: array + - nodesOutputVars: [{ nodeId: 'code_node', vars: [...] }] // 应该有数据 + - availableNodes: [{ id: 'code_node', data: {...} }] // 应该有数据 +``` + +## 🎯 支持的数组类型 + +现在支持以下所有数组类型: +- `array` - 通用数组 +- `array[string]` - 字符串数组 +- `array[number]` - 数字数组 +- `array[object]` - 对象数组 +- `array[file]` - 文件数组 +- 任何以 `array` 开头的自定义类型 + +## 🚀 使用场景示例 + +### 场景1: 文档类型过滤 +``` +document_type in {{code_node.file_types}} +// 其中 file_types = ["pdf", "docx", "txt"] +``` + +### 场景2: 优先级排除 +``` +priority not in {{code_node.excluded_priorities}} +// 其中 excluded_priorities = [0, 10] +``` + +### 场景3: 多条件组合 +``` +document_type in {{code_node.allowed_types}} AND +created_date > "2024-01-01" AND +priority not in {{code_node.excluded_priorities}} +``` + +## ✅ 完成状态 + +- [x] 前端操作符支持 +- [x] 前端条件渲染修复 +- [x] 变量选择器集成 +- [x] 导入错误修复 +- [x] 数组类型过滤改进 +- [x] 变量类型匹配修复 +- [x] 数据传递链路修复 +- [x] 后端数组处理支持 +- [x] 类型安全保证 + +## 🎉 功能已完全可用! + +现在您可以在知识检索节点中完全使用数组变量进行元数据过滤了! \ No newline at end of file diff --git a/ARRAY_METADATA_FILTER_README.md b/ARRAY_METADATA_FILTER_README.md new file mode 100644 index 0000000000..c340bfe22b --- /dev/null +++ b/ARRAY_METADATA_FILTER_README.md @@ -0,0 +1,154 @@ +# 元数据数组过滤功能实现 + +## 功能概述 + +这个实现为Dify的知识检索系统添加了对数组类型元数据的过滤支持,解决了GitHub Issue #16195中提到的需求。 + +## 问题背景 + +用户在使用Dify的知识检索功能时,需要根据包含特定`job_ids`的数组来过滤文档,但现有系统只支持字符串、数字和时间类型的元数据过滤,不支持数组类型的条件匹配。 + +## 解决方案 + +### 1. 前端改动 + +#### 新增数组类型支持 +- 在`MetadataFilteringVariableType`枚举中添加了`array`类型 +- 更新了`MetadataFilteringCondition`类型以支持`string[]`值 +- 为数组类型添加了专门的操作符:`in`、`not in`、`contains`、`not contains`、`empty`、`not empty` +- 添加了数组类型的图标支持(使用`RiListUnordered`图标) + +#### 文件修改 +```typescript +// web/app/components/workflow/nodes/knowledge-retrieval/types.ts +export enum MetadataFilteringVariableType { + string = 'string', + number = 'number', + time = 'time', + select = 'select', + array = 'array', // 新增 +} + +export type MetadataFilteringCondition = { + id: string + name: string + comparison_operator: ComparisonOperator + value?: string | number | string[] // 支持数组值 +} +``` + +### 2. 后端改动 + +#### 数据库查询逻辑 +在PostgreSQL的JSONB字段中实现数组条件查询: + +**`in` 操作符逻辑:** +- 检查文档的元数据字段是否包含输入数组中的任何值 +- 使用OR逻辑连接多个LIKE条件 + +**`not in` 操作符逻辑:** +- 检查文档的元数据字段是否不包含输入数组中的任何值 +- 使用AND逻辑连接多个NOT LIKE条件 + +#### 文件修改 +```python +# api/core/rag/retrieval/dataset_retrieval.py +# api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py + +case "in": + if isinstance(value, (list, tuple)): + or_conditions = [] + for i, v in enumerate(value): + param_key = f"{key_value}_{i}" + if isinstance(v, str): + or_conditions.append( + (text(f"documents.doc_metadata ->> :{key} LIKE :{param_key}")).params( + **{key: metadata_name, param_key: f'%"{v}"%'} + ) + ) + if or_conditions: + filters.append(or_(*or_conditions)) +``` + +## 使用示例 + +### 场景:根据job_ids数组过滤文档 + +假设有以下文档元数据: +```json +{ + "doc1": {"job_ids": ["job1", "job2", "job3"]}, + "doc2": {"job_ids": ["job2", "job4", "job5"]}, + "doc3": {"job_ids": ["job6", "job7"]} +} +``` + +### 查询1:包含指定job_ids的文档 +``` +条件:job_ids in ["job1", "job4"] +结果:返回doc1和doc2,因为它们分别包含job1和job4 +``` + +### 查询2:不包含指定job_ids的文档 +``` +条件:job_ids not in ["job2", "job6"] +结果:返回doc3(如果存在不包含job2和job6的其他文档) +``` + +## 对应的SQL查询 + +### 包含查询 (in) +```sql +SELECT * FROM documents WHERE + doc_metadata ->> 'job_ids' LIKE '%"job1"%' OR + doc_metadata ->> 'job_ids' LIKE '%"job4"%'; +``` + +### 排除查询 (not in) +```sql +SELECT * FROM documents WHERE + doc_metadata ->> 'job_ids' NOT LIKE '%"job2"%' AND + doc_metadata ->> 'job_ids' NOT LIKE '%"job6"%'; +``` + +## 测试 + +运行测试脚本: +```bash +python test_array_metadata_filter.py +``` + +这将演示数组过滤功能的工作原理。 + +## 技术细节 + +### 数据存储 +- 元数据存储在PostgreSQL的JSONB字段中 +- 数组值在JSON中以字符串数组形式存储:`["value1", "value2"]` +- 使用LIKE操作符进行部分匹配:`LIKE '%"value"%'` + +### 性能考虑 +- 使用了数据库索引:`db.Index("document_metadata_idx", "doc_metadata", postgresql_using="gin")` +- JSONB字段支持GIN索引,能够高效处理包含查询 + +### 支持的操作符 +- `in`: 检查字段是否包含数组中的任意值 +- `not in`: 检查字段是否不包含数组中的任意值 +- `contains`: 检查字段是否包含特定值 +- `not contains`: 检查字段是否不包含特定值 +- `empty`: 检查字段是否为空 +- `not empty`: 检查字段是否不为空 + +## 扩展性 + +这个实现为未来支持更复杂的数组操作奠定了基础,比如: +- `all of`: 检查是否包含数组中的所有值 +- `any of`: 检查是否包含数组中的任意值(类似当前的`in`) +- 数组长度比较 +- 数组交集/并集操作 + +## 兼容性 + +- 向后兼容:现有的字符串、数字、时间类型过滤功能保持不变 +- 数据库兼容:利用PostgreSQL的JSONB特性,无需额外的schema变更 +- API兼容:扩展现有的元数据过滤API,不破坏现有接口 \ No newline at end of file diff --git a/ARRAY_METADATA_FILTER_TEST.md b/ARRAY_METADATA_FILTER_TEST.md new file mode 100644 index 0000000000..8d297cd13a --- /dev/null +++ b/ARRAY_METADATA_FILTER_TEST.md @@ -0,0 +1,108 @@ +# 数组元数据过滤功能测试 + +## 🧪 测试场景 + +### 1. 字符串字段 + 数组变量过滤 + +**测试目标**: 验证字符串类型的元数据字段能否使用数组变量进行 `in`/`not in` 过滤 + +**测试步骤**: +1. 创建一个工作流,包含: + - 开始节点:输入变量 `filename` + - 代码执行节点:输出数组 `["doc1.pdf", "doc2.pdf", "doc3.pdf"]` + - 知识检索节点:使用元数据过滤 + +2. 在知识检索节点中: + - 选择字符串类型元数据字段(如 `document_name`) + - 选择操作符 `in` 或 `not in` + - 在值选择中选择代码执行节点的数组输出 + +**期望结果**: +- 能够在操作符下拉中看到 `in` 和 `not in` 选项 +- 能够选择数组类型的变量作为过滤值 +- 运行时正确过滤匹配的文档 + +### 2. 数字字段 + 数组变量过滤 + +**测试目标**: 验证数字类型的元数据字段能否使用数组变量进行过滤 + +**测试步骤**: +1. 创建代码执行节点输出数字数组 `[1, 2, 3]` +2. 在知识检索节点中: + - 选择数字类型元数据字段(如 `priority`) + - 选择操作符 `in` + - 选择数组变量作为过滤值 + +**期望结果**: 文档按数字数组正确过滤 + +### 3. 多条件组合测试 + +**测试目标**: 验证数组过滤与其他条件的组合 + +**测试步骤**: +1. 设置多个过滤条件: + - `document_type in ["pdf", "docx"]`(数组过滤) + - `created_date > "2024-01-01"`(常规过滤) + - 逻辑操作符:AND + +**期望结果**: 所有条件正确组合执行 + +## 🔍 验证要点 + +### 前端检查 +- [ ] 操作符下拉菜单包含 `in` 和 `not in` +- [ ] 变量选择器显示数组类型变量 +- [ ] 界面正确渲染数组输入组件 +- [ ] 保存/加载配置正确 + +### 后端检查 +- [ ] 正确解析数组变量值 +- [ ] 数据库查询语句正确生成 +- [ ] 过滤结果准确 +- [ ] 错误处理完善 + +## 🐛 已知问题修复 + +### 1. ComparisonOperator 导入错误 +**问题**: `ReferenceError: ComparisonOperator is not defined` +**修复**: 修改导入语句,导入枚举值而非仅类型定义 + +### 2. 操作符可见性 +**问题**: string/number 类型字段没有显示 in/not in 操作符 +**修复**: 在 `utils.ts` 中为基础类型添加数组操作符 + +### 3. 条件渲染逻辑 +**问题**: in/not in 操作符没有使用数组输入组件 +**修复**: 修改 `condition-item.tsx` 中的条件渲染逻辑 + +## ✅ 功能完成状态 + +- [x] 前端操作符支持 +- [x] 前端条件渲染 +- [x] 变量选择器集成 +- [x] 导入错误修复 +- [x] 后端数组处理 +- [x] 类型安全保证 + +## 🚀 使用示例 + +```javascript +// 工作流配置示例 +{ + "metadata_filtering_conditions": { + "logical_operator": "and", + "conditions": [ + { + "name": "document_type", + "comparison_operator": "in", + "value": "{{code_node.file_types}}" // 数组变量 + }, + { + "name": "priority", + "comparison_operator": "not in", + "value": "{{code_node.excluded_priorities}}" // 数字数组 + } + ] + } +} +``` \ No newline at end of file diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index 3f31b1c3d5..0b034a4268 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -157,6 +157,9 @@ SupportedComparisonOperator = Literal[ # for time "before", "after", + # for array operations + "in", + "not in", ] @@ -174,7 +177,7 @@ class Condition(BaseModel): name: str comparison_operator: SupportedComparisonOperator - value: str | Sequence[str] | None | int | float = None + value: str | Sequence[str] | Sequence[int] | Sequence[float] | None | int | float = None class MetadataFilteringCondition(BaseModel): diff --git a/api/core/rag/entities/metadata_entities.py b/api/core/rag/entities/metadata_entities.py index 6ef932ad22..d8395c575e 100644 --- a/api/core/rag/entities/metadata_entities.py +++ b/api/core/rag/entities/metadata_entities.py @@ -13,6 +13,8 @@ SupportedComparisonOperator = Literal[ "is not", "empty", "not empty", + "in", + "not in", # for number "=", "≠", @@ -33,7 +35,7 @@ class Condition(BaseModel): name: str comparison_operator: SupportedComparisonOperator - value: str | Sequence[str] | None | int | float = None + value: str | Sequence[str] | Sequence[int] | Sequence[float] | None | int | float = None class MetadataCondition(BaseModel): diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 6978860529..0f6885e416 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -1046,6 +1046,62 @@ class DatasetRetrieval: filters.append(DatasetDocument.doc_metadata[metadata_name] != f'"{value}"') else: filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != value) + case "in": + if isinstance(value, list | tuple): + # For arrays: check if metadata field contains any value from the input array + or_conditions = [] + for i, v in enumerate(value): + param_key = f"{key_value}_{i}" + if isinstance(v, str): + or_conditions.append( + (text(f"documents.doc_metadata ->> :{key} LIKE :{param_key}")).params( + **{key: metadata_name, param_key: f'%"{v}"%'} + ) + ) + else: + or_conditions.append( + (text(f"documents.doc_metadata ->> :{key} = :{param_key}")).params( + **{key: metadata_name, param_key: str(v)} + ) + ) + if or_conditions: + filters.append(or_(*or_conditions)) + else: + # Single value case + if isinstance(value, str): + filters.append( + (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( + **{key: metadata_name, key_value: f'%"{value}"%'} + ) + ) + case "not in": + if isinstance(value, list | tuple): + # For arrays: check if metadata field does not contain any value from the input array + and_conditions = [] + for i, v in enumerate(value): + param_key = f"{key_value}_{i}" + if isinstance(v, str): + and_conditions.append( + (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{param_key}")).params( + **{key: metadata_name, param_key: f'%"{v}"%'} + ) + ) + else: + and_conditions.append( + (text(f"documents.doc_metadata ->> :{key} != :{param_key}")).params( + **{key: metadata_name, param_key: str(v)} + ) + ) + if and_conditions: + filters.append(and_(*and_conditions)) + else: + # Single value case + if isinstance(value, str): + filters.append( + (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params( + **{key: metadata_name, key_value: f'%"{value}"%'} + ) + ) case "empty": filters.append(DatasetDocument.doc_metadata[metadata_name].is_(None)) case "not empty": diff --git a/api/core/workflow/nodes/knowledge_retrieval/entities.py b/api/core/workflow/nodes/knowledge_retrieval/entities.py index d2e5a15545..c4492b5e0d 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/entities.py +++ b/api/core/workflow/nodes/knowledge_retrieval/entities.py @@ -85,6 +85,8 @@ SupportedComparisonOperator = Literal[ "is not", "empty", "not empty", + "in", + "not in", # for number "=", "≠", @@ -105,7 +107,7 @@ class Condition(BaseModel): name: str comparison_operator: SupportedComparisonOperator - value: str | Sequence[str] | None | int | float = None + value: str | Sequence[str] | Sequence[int] | Sequence[float] | None | int | float = None class MetadataFilteringCondition(BaseModel): diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 2ddb4f8a0b..06c9983a78 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -375,8 +375,15 @@ class KnowledgeRetrievalNode(LLMNode): expected_value = expected_value.value # type: ignore elif expected_value.value_type == "string": # type: ignore expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore + elif expected_value.value_type in ( + "array[number]", "array[string]", "array[object]", "array" + ): # type: ignore + expected_value = expected_value.value # type: ignore else: raise ValueError("Invalid expected metadata value type") + elif isinstance(expected_value, list): + # For constant array values + pass conditions.append( Condition( name=metadata_name, @@ -515,6 +522,62 @@ class KnowledgeRetrievalNode(LLMNode): filters.append(Document.doc_metadata[metadata_name] != f'"{value}"') else: filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != value) + case "in": + if isinstance(value, list | tuple): + # For arrays: check if metadata field contains any value from the input array + or_conditions = [] + for i, v in enumerate(value): + param_key = f"{key_value}_{i}" + if isinstance(v, str): + or_conditions.append( + (text(f"documents.doc_metadata ->> :{key} LIKE :{param_key}")).params( + **{key: metadata_name, param_key: f'%"{v}"%'} + ) + ) + else: + or_conditions.append( + (text(f"documents.doc_metadata ->> :{key} = :{param_key}")).params( + **{key: metadata_name, param_key: str(v)} + ) + ) + if or_conditions: + filters.append(or_(*or_conditions)) + else: + # Single value case + if isinstance(value, str): + filters.append( + (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( + **{key: metadata_name, key_value: f'%"{value}"%'} + ) + ) + case "not in": + if isinstance(value, list | tuple): + # For arrays: check if metadata field does not contain any value from the input array + and_conditions = [] + for i, v in enumerate(value): + param_key = f"{key_value}_{i}" + if isinstance(v, str): + and_conditions.append( + (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{param_key}")).params( + **{key: metadata_name, param_key: f'%"{v}"%'} + ) + ) + else: + and_conditions.append( + (text(f"documents.doc_metadata ->> :{key} != :{param_key}")).params( + **{key: metadata_name, param_key: str(v)} + ) + ) + if and_conditions: + filters.append(and_(*and_conditions)) + else: + # Single value case + if isinstance(value, str): + filters.append( + (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params( + **{key: metadata_name, key_value: f'%"{value}"%'} + ) + ) case "empty": filters.append(Document.doc_metadata[metadata_name].is_(None)) case "not empty": diff --git a/api/core/workflow/utils/condition/entities.py b/api/core/workflow/utils/condition/entities.py index 56871a15d8..00b4f6fc8e 100644 --- a/api/core/workflow/utils/condition/entities.py +++ b/api/core/workflow/utils/condition/entities.py @@ -34,7 +34,7 @@ SupportedComparisonOperator = Literal[ class SubCondition(BaseModel): key: str comparison_operator: SupportedComparisonOperator - value: str | Sequence[str] | None = None + value: str | Sequence[str] | Sequence[int] | Sequence[float] | None | int | float = None class SubVariableCondition(BaseModel): @@ -45,5 +45,5 @@ class SubVariableCondition(BaseModel): class Condition(BaseModel): variable_selector: list[str] comparison_operator: SupportedComparisonOperator - value: str | Sequence[str] | None = None + value: str | Sequence[str] | Sequence[int] | Sequence[float] | None | int | float = None sub_variable_condition: SubVariableCondition | None = None diff --git a/web/app/components/app/configuration/dataset-config/index.tsx b/web/app/components/app/configuration/dataset-config/index.tsx index 6165cfdeec..6c3bd0f772 100644 --- a/web/app/components/app/configuration/dataset-config/index.tsx +++ b/web/app/components/app/configuration/dataset-config/index.tsx @@ -272,6 +272,7 @@ const DatasetConfig: FC = () => { isCommonVariable availableCommonStringVars={promptVariablesToSelect.filter(item => item.type === MetadataFilteringVariableType.string || item.type === MetadataFilteringVariableType.select)} availableCommonNumberVars={promptVariablesToSelect.filter(item => item.type === MetadataFilteringVariableType.number)} + availableCommonArrayVars={promptVariablesToSelect.filter(item => item.type === MetadataFilteringVariableType.array || item.type.startsWith('array'))} /> diff --git a/web/app/components/datasets/metadata/metadata-dataset/create-content.tsx b/web/app/components/datasets/metadata/metadata-dataset/create-content.tsx index 3ac1a046ff..0282d87a29 100644 --- a/web/app/components/datasets/metadata/metadata-dataset/create-content.tsx +++ b/web/app/components/datasets/metadata/metadata-dataset/create-content.tsx @@ -58,7 +58,7 @@ const CreateContent: FC = ({ >
-
+
= ({ selected={type === DataType.time} onSelect={handleTypeChange(DataType.time)} /> +
diff --git a/web/app/components/datasets/metadata/types.ts b/web/app/components/datasets/metadata/types.ts index 00688e29cc..f3b4a25b6f 100644 --- a/web/app/components/datasets/metadata/types.ts +++ b/web/app/components/datasets/metadata/types.ts @@ -2,6 +2,7 @@ export enum DataType { string = 'string', number = 'number', time = 'time', + array = 'array', } export type BuiltInMetadataItem = { diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx new file mode 100644 index 0000000000..8cb54647bd --- /dev/null +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx @@ -0,0 +1,164 @@ +import { useCallback, useEffect } from 'react' +import { useTranslation } from 'react-i18next' +import ConditionValueMethod from './condition-value-method' +import type { ConditionValueMethodProps } from './condition-value-method' +import ConditionVariableSelector from './condition-variable-selector' +import ConditionCommonVariableSelector from './condition-common-variable-selector' +import type { + Node, + NodeOutPutVar, + ValueSelector, +} from '@/app/components/workflow/types' +import { VarType } from '@/app/components/workflow/types' +import Input from '@/app/components/base/input' + +type ConditionArrayProps = { + value?: string | string[] | (string | number)[] + onChange: (value?: string | string[] | (string | number)[]) => void + nodesOutputVars: NodeOutPutVar[] + availableNodes: Node[] + isCommonVariable?: boolean + commonVariables: { name: string, type: string }[] +} & ConditionValueMethodProps + +const ConditionArray = ({ + value, + onChange, + valueMethod = 'constant', + onValueMethodChange, + nodesOutputVars, + availableNodes, + isCommonVariable, + commonVariables, +}: ConditionArrayProps) => { + const { t } = useTranslation() + + const parseValueSelector = useCallback((value?: string | string[] | (string | number)[]): string[] => { + if (typeof value !== 'string') + return [] + + // 支持多种格式: + // 1. {{#nodeId.variable#}} 格式 + if (value.includes('#')) { + const match = value.match(/\{\{#([^#]+)#\}\}/) + if (match && match[1]) + return match[1].split('.') + } + + // 2. nodeId.variable 格式(直接格式) + if (value.includes('.')) + return value.split('.') + + return [] + }, []) + + const currentValueSelector = parseValueSelector(value) + + useEffect(() => { + console.log('🔍 ConditionArray Debug:') + console.log(' - valueMethod:', valueMethod) + console.log(' - isCommonVariable:', isCommonVariable) + console.log(' - value:', value) + console.log(' - currentValueSelector:', currentValueSelector) + console.log(' - nodesOutputVars (数组变量):', nodesOutputVars) + console.log(' - availableNodes:', availableNodes) + console.log(' - commonVariables (通用数组变量):', commonVariables) + }, [valueMethod, isCommonVariable, value, currentValueSelector, nodesOutputVars, availableNodes, commonVariables]) + + const handleVariableValueChange = useCallback((v: ValueSelector) => { + console.log('🔧 数组变量被选择:', v) + onChange(`{{#${v.join('.')}#}}`) + }, [onChange]) + + const handleCommonVariableValueChange = useCallback((v: string) => { + console.log('🔧 通用数组变量被选择:', v) + onChange(`{{${v}}}`) + }, [onChange]) + + const handleConstantValueChange = useCallback((inputValue: string) => { + // Parse comma-separated values into array + if (inputValue.trim() === '') { + onChange([]) + return + } + + // Split by comma and trim whitespace + const arrayValues = inputValue.split(',').map((item) => { + const trimmed = item.trim() + if (trimmed === '') return null + + // Try to convert to number if it's a valid number + const numericValue = Number(trimmed) + if (!isNaN(numericValue) && isFinite(numericValue)) + return numericValue + + // Otherwise keep as string + return trimmed + }).filter(item => item !== null) + + console.log('🔧 常量数组值被设置:', arrayValues) + onChange(arrayValues) + }, [onChange]) + + const displayValue = Array.isArray(value) ? value.map(v => String(v)).join(', ') : (value || '') + + // Filter available variables to show only array types + const filteredNodesOutputVars = nodesOutputVars.filter(nodeVar => + nodeVar.vars.some(v => + v.type === VarType.arrayString + || v.type === VarType.arrayNumber + || v.type === VarType.arrayObject + || v.type === VarType.arrayFile + || v.type === VarType.array + || v.type.toString().startsWith('array'), + ), + ) + + const filteredCommonVariables = commonVariables.filter(v => + v.type === 'array' + || v.type.startsWith('array'), + ) + + return ( +
+ +
+ { + valueMethod === 'variable' && !isCommonVariable && ( + + ) + } + { + valueMethod === 'variable' && isCommonVariable && ( + + ) + } + { + valueMethod === 'constant' && ( + handleConstantValueChange(e.target.value)} + placeholder={t('workflow.nodes.knowledgeRetrieval.metadata.panel.arrayPlaceholder') || 'Enter comma-separated values'} + /> + ) + } +
+ ) +} + +export default ConditionArray diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx similarity index 66% rename from web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx.tsx rename to web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx index 00ba306d03..631715e1c0 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx @@ -24,6 +24,21 @@ const ConditionCommonVariableSelector = ({ const { t } = useTranslation() const [open, setOpen] = useState(false) + // 添加调试日志 + console.log('ConditionCommonVariableSelector - variables:', variables) + console.log('ConditionCommonVariableSelector - varType:', varType) + + // 过滤变量,支持数组类型 + const filteredVariables = variables.filter((v) => { + // 如果是数组类型变量,始终显示 + const isArrayType = v.type === 'array' || v.type.startsWith('array') + + // 如果是指定类型或数组类型,则显示 + return v.type === varType || isArrayType + }) + + console.log('ConditionCommonVariableSelector - filteredVariables:', filteredVariables) + const selected = variables.find(v => v.name === value) const handleChange = useCallback((v: string) => { onChange(v) @@ -41,7 +56,7 @@ const ConditionCommonVariableSelector = ({ }} > { - if (!variables.length) return + if (!filteredVariables.length) return setOpen(!open) }}>
@@ -71,16 +86,22 @@ const ConditionCommonVariableSelector = ({
{ - variables.map(v => ( -
handleChange(v.name)} - > - - {v.name} + filteredVariables.length > 0 ? ( + filteredVariables.map(v => ( +
handleChange(v.name)} + > + + {v.name} +
+ )) + ) : ( +
+ {t('workflow.nodes.knowledgeRetrieval.metadata.panel.noVariables')}
- )) + ) }
diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx index a93155113e..4675c13834 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx @@ -16,14 +16,14 @@ import ConditionOperator from './condition-operator' import ConditionString from './condition-string' import ConditionNumber from './condition-number' import ConditionDate from './condition-date' +import ConditionArray from './condition-array' import type { - ComparisonOperator, HandleRemoveCondition, HandleUpdateCondition, MetadataFilteringCondition, MetadataShape, } from '@/app/components/workflow/nodes/knowledge-retrieval/types' -import { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types' +import { ComparisonOperator, MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types' import cn from '@/utils/classnames' type ConditionItemProps = { @@ -32,7 +32,7 @@ type ConditionItemProps = { condition: MetadataFilteringCondition // condition may the condition of case or condition of sub variable onRemoveCondition?: HandleRemoveCondition onUpdateCondition?: HandleUpdateCondition -} & Pick +} & Pick const ConditionItem = ({ className, disabled, @@ -44,9 +44,12 @@ const ConditionItem = ({ availableStringNodesWithParent = [], availableNumberVars = [], availableNumberNodesWithParent = [], + availableArrayVars = [], + availableArrayNodesWithParent = [], isCommonVariable, availableCommonStringVars = [], availableCommonNumberVars = [], + availableCommonArrayVars = [], }: ConditionItemProps) => { const [isHovered, setIsHovered] = useState(false) @@ -100,6 +103,35 @@ const ConditionItem = ({ } } + // For array type, handle both string and array values + if (currentMetadata?.type === MetadataFilteringVariableType.array) { + if (typeof condition.value === 'string') { + const regex = isCommonVariable ? COMMON_VARIABLE_REGEX : VARIABLE_REGEX + const matchedStartNumber = isCommonVariable ? 2 : 3 + const matched = condition.value.match(regex) + + if (matched?.length) { + return { + value: matched[0].slice(matchedStartNumber, -matchedStartNumber), + valueMethod: 'variable', + } + } + else { + return { + value: condition.value, + valueMethod: 'constant', + } + } + } + else { + // Array value + return { + value: condition.value, + valueMethod: 'constant', + } + } + } + return { value: condition.value, valueMethod: 'constant', @@ -144,7 +176,8 @@ const ConditionItem = ({ { !comparisonOperatorNotRequireValue(condition.comparison_operator) && (currentMetadata?.type === MetadataFilteringVariableType.string - || currentMetadata?.type === MetadataFilteringVariableType.select) && ( + || currentMetadata?.type === MetadataFilteringVariableType.select) + && ![ComparisonOperator.in, ComparisonOperator.notIn].includes(condition.comparison_operator) && ( ) } + { + !comparisonOperatorNotRequireValue(condition.comparison_operator) + && ([ComparisonOperator.in, ComparisonOperator.notIn].includes(condition.comparison_operator) + || currentMetadata?.type === MetadataFilteringVariableType.array) && ( + + ) + } { !comparisonOperatorNotRequireValue(condition.comparison_operator) && currentMetadata?.type === MetadataFilteringVariableType.time && ( void @@ -34,11 +34,22 @@ const ConditionVariableSelector = ({ const { t } = useTranslation() const [open, setOpen] = useState(false) + // 添加调试日志 + console.log('🔍 ConditionVariableSelector Debug:') + console.log(' - varType:', varType) + console.log(' - nodesOutputVars:', nodesOutputVars) + console.log(' - availableNodes:', availableNodes) + const handleChange = useCallback((valueSelector: ValueSelector, varItem: Var) => { onChange(valueSelector, varItem) setOpen(false) }, [onChange]) + const isArrayType = varType === 'array' || varType === VarType.array + || varType === VarType.arrayString || varType === VarType.arrayNumber + || varType === VarType.arrayObject || varType === VarType.arrayFile + || (typeof varType === 'string' && varType.startsWith('array')) + return ( @@ -69,7 +80,7 @@ const ConditionVariableSelector = ({ {t('workflow.nodes.knowledgeRetrieval.metadata.panel.select')}
- {varType} + {isArrayType ? 'array' : varType}
) @@ -82,6 +93,15 @@ const ConditionVariableSelector = ({ vars={nodesOutputVars} isSupportFileVar onChange={handleChange} + filterVar={(varPayload) => { + // If varType is array-related, filter for all array types + if (isArrayType) { + return [VarType.arrayString, VarType.arrayNumber, VarType.arrayObject, VarType.arrayFile, VarType.array].includes(varPayload.type) + || varPayload.type.toString().startsWith('array') + } + // For other types, use exact match + return varPayload.type === varType + }} />
diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/index.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/index.tsx index 4b129f4c31..d682293a78 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/index.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/index.tsx @@ -22,9 +22,12 @@ const ConditionList = ({ availableStringNodesWithParent, availableNumberVars, availableNumberNodesWithParent, + availableArrayVars, + availableArrayNodesWithParent, isCommonVariable, availableCommonNumberVars, availableCommonStringVars, + availableCommonArrayVars, }: ConditionListProps) => { const { conditions, logical_operator } = metadataFilteringConditions @@ -61,9 +64,12 @@ const ConditionList = ({ availableStringNodesWithParent={availableStringNodesWithParent} availableNumberVars={availableNumberVars} availableNumberNodesWithParent={availableNumberNodesWithParent} + availableArrayVars={availableArrayVars} + availableArrayNodesWithParent={availableArrayNodesWithParent} isCommonVariable={isCommonVariable} availableCommonStringVars={availableCommonStringVars} availableCommonNumberVars={availableCommonNumberVars} + availableCommonArrayVars={availableCommonArrayVars} /> )) } diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/utils.ts b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/utils.ts index 6397023991..57bc3fc763 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/utils.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/utils.ts @@ -30,6 +30,8 @@ export const getOperators = (type?: MetadataFilteringVariableType) => { ComparisonOperator.notContains, ComparisonOperator.startWith, ComparisonOperator.endWith, + ComparisonOperator.in, + ComparisonOperator.notIn, ComparisonOperator.empty, ComparisonOperator.notEmpty, ] @@ -41,6 +43,17 @@ export const getOperators = (type?: MetadataFilteringVariableType) => { ComparisonOperator.lessThan, ComparisonOperator.largerThanOrEqual, ComparisonOperator.lessThanOrEqual, + ComparisonOperator.in, + ComparisonOperator.notIn, + ComparisonOperator.empty, + ComparisonOperator.notEmpty, + ] + case MetadataFilteringVariableType.array: + return [ + ComparisonOperator.in, + ComparisonOperator.notIn, + ComparisonOperator.contains, + ComparisonOperator.notContains, ComparisonOperator.empty, ComparisonOperator.notEmpty, ] diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-icon.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-icon.tsx index 4a3f539ef4..be2e94c916 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-icon.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-icon.tsx @@ -1,6 +1,7 @@ import { memo } from 'react' import { RiHashtag, + RiListUnordered, RiTextSnippet, RiTimeLine, } from '@remixicon/react' @@ -32,6 +33,11 @@ const MetadataIcon = ({ ) } + { + type === MetadataFilteringVariableType.array && ( + + ) + } ) } diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/panel.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/panel.tsx index 3b5eefd853..0946a27a14 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/panel.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/panel.tsx @@ -61,6 +61,8 @@ const Panel: FC> = ({ availableStringNodesWithParent, availableNumberVars, availableNumberNodesWithParent, + availableArrayVars, + availableArrayNodesWithParent, } = useConfig(id, data) const handleOpenFromPropsChange = useCallback((openFromProps: boolean) => { @@ -149,6 +151,8 @@ const Panel: FC> = ({ availableStringNodesWithParent={availableStringNodesWithParent} availableNumberVars={availableNumberVars} availableNumberNodesWithParent={availableNumberNodesWithParent} + availableArrayVars={availableArrayVars} + availableArrayNodesWithParent={availableArrayNodesWithParent} />
diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts index 1cae4ecd3b..cef0dec5bb 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts @@ -81,13 +81,14 @@ export enum MetadataFilteringVariableType { number = 'number', time = 'time', select = 'select', + array = 'array', } export type MetadataFilteringCondition = { id: string name: string comparison_operator: ComparisonOperator - value?: string | number + value?: string | number | string[] } export type MetadataFilteringConditions = { @@ -127,7 +128,10 @@ export type MetadataShape = { availableStringNodesWithParent?: Node[] availableNumberVars?: NodeOutPutVar[] availableNumberNodesWithParent?: Node[] + availableArrayVars?: NodeOutPutVar[] + availableArrayNodesWithParent?: Node[] isCommonVariable?: boolean availableCommonStringVars?: { name: string; type: string; }[] availableCommonNumberVars?: { name: string; type: string; }[] + availableCommonArrayVars?: { name: string; type: string; }[] } diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts index 42aa7def25..02034cc280 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts @@ -317,6 +317,8 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => { if (type === MetadataFilteringVariableType.number) operator = ComparisonOperator.equal + else if (type === MetadataFilteringVariableType.array) + operator = ComparisonOperator.in const newCondition = { id: uuid4(), @@ -413,6 +415,20 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => { filterVar: filterNumberVar, }) + const filterArrayVar = useCallback((varPayload: Var) => { + // 匹配所有数组类型:array, array[string], array[number], array[object], array[file] + return [VarType.arrayString, VarType.arrayNumber, VarType.arrayObject, VarType.arrayFile, VarType.array].includes(varPayload.type) + || varPayload.type.toString().startsWith('array') + }, []) + + const { + availableVars: availableArrayVars, + availableNodesWithParent: availableArrayNodesWithParent, + } = useAvailableVarList(id, { + onlyLeafNodeVar: false, + filterVar: filterArrayVar, + }) + return { readOnly, inputs, @@ -446,6 +462,8 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => { availableStringNodesWithParent, availableNumberVars, availableNumberNodesWithParent, + availableArrayVars, + availableArrayNodesWithParent, } } diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts index 57cb42a0b1..a526d8e9fd 100644 --- a/web/i18n/en-US/workflow.ts +++ b/web/i18n/en-US/workflow.ts @@ -406,7 +406,7 @@ const translation = { roleDescription: { system: 'Give high level instructions for the conversation', user: 'Provide instructions, queries, or any text-based input to the model', - assistant: 'The model’s responses based on the user messages', + assistant: 'The model\'s responses based on the user messages', }, addMessage: 'Add Message', vision: 'vision', @@ -488,16 +488,18 @@ const translation = { add: 'Add Condition', search: 'Search metadata', placeholder: 'Enter value', + arrayPlaceholder: 'Enter comma-separated values (e.g., value1, value2, value3)', datePlaceholder: 'Choose a time...', select: 'Select variable...', + noVariables: 'No variables available', }, }, }, http: { inputVars: 'Input Variables', api: 'API', - apiPlaceholder: 'Enter URL, type ‘/’ insert variable', - extractListPlaceholder: 'Enter list item index, type ‘/’ insert variable', + apiPlaceholder: 'Enter URL, type \'/\' insert variable', + extractListPlaceholder: 'Enter list item index, type \'/\' insert variable', notStartWithHttp: 'API should start with http:// or https://', key: 'Key', type: 'Type', diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts index b8257d8229..057393016a 100644 --- a/web/i18n/zh-Hans/workflow.ts +++ b/web/i18n/zh-Hans/workflow.ts @@ -58,7 +58,7 @@ const translation = { processData: '数据处理', input: '输入', output: '输出', - jinjaEditorPlaceholder: '输入 “/” 或 “{” 插入变量', + jinjaEditorPlaceholder: '输入 "/" 或 "{" 插入变量', viewOnly: '只读', showRunHistory: '显示运行历史', enableJinja: '开启支持 Jinja 模板', @@ -110,7 +110,7 @@ const translation = { branch: '分支', onFailure: '异常时', addFailureBranch: '添加异常分支', - openInExplore: '在“探索”中打开', + openInExplore: '在"探索"中打开', loadMore: '加载更多', noHistory: '没有历史版本', }, @@ -271,7 +271,7 @@ const translation = { 'variable-aggregator': '将多路分支的变量聚合为一个变量,以实现下游节点统一配置。', 'iteration': '对列表对象执行多次步骤直至输出所有结果。', 'loop': '循环执行一段逻辑直到满足结束条件或者到达循环次数上限。', - 'loop-end': '相当于“break” 此节点没有配置项,当循环体内运行到此节点后循环终止。', + 'loop-end': '相当于"break" 此节点没有配置项,当循环体内运行到此节点后循环终止。', 'parameter-extractor': '利用 LLM 从自然语言内推理提取出结构化参数,用于后置的工具调用或 HTTP 请求。', 'document-extractor': '用于将用户上传的文档解析为 LLM 便于理解的文本内容。', 'list-operator': '用于过滤或排序数组内容。', @@ -489,16 +489,18 @@ const translation = { add: '添加条件', search: '搜索元数据', placeholder: '输入值', - datePlaceholder: '选择日期...', + arrayPlaceholder: '输入逗号分隔的值 (例如: 值1, 值2, 值3)', + datePlaceholder: '选择时间...', select: '选择变量...', + noVariables: '没有可用变量', }, }, }, http: { inputVars: '输入变量', api: 'API', - apiPlaceholder: '输入 URL,输入变量时请键入‘/’', - extractListPlaceholder: '输入提取列表编号,输入变量时请键入‘/’', + apiPlaceholder: '输入 URL,输入变量时请键入\'/\'', + extractListPlaceholder: '输入提取列表编号,输入变量时请键入\'/\'', notStartWithHttp: 'API 应该以 http:// 或 https:// 开头', key: '键', type: '类型', From 9b0bf4cedfd24a4b6f64e8870ebe4764e49e94f6 Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Tue, 3 Jun 2025 14:10:21 +0800 Subject: [PATCH 02/10] update knowledge_retrieval logic Signed-off-by: kenwoodjw --- ARRAY_METADATA_FILTER_DEBUG.md | 151 ----------------- ARRAY_METADATA_FILTER_FINAL_TEST.md | 147 ----------------- ARRAY_METADATA_FILTER_README.md | 154 ------------------ ARRAY_METADATA_FILTER_TEST.md | 108 ------------ api/core/rag/retrieval/dataset_retrieval.py | 44 ++--- .../knowledge_retrieval_node.py | 53 +++--- .../condition-list/condition-array.tsx | 15 +- 7 files changed, 49 insertions(+), 623 deletions(-) delete mode 100644 ARRAY_METADATA_FILTER_DEBUG.md delete mode 100644 ARRAY_METADATA_FILTER_FINAL_TEST.md delete mode 100644 ARRAY_METADATA_FILTER_README.md delete mode 100644 ARRAY_METADATA_FILTER_TEST.md diff --git a/ARRAY_METADATA_FILTER_DEBUG.md b/ARRAY_METADATA_FILTER_DEBUG.md deleted file mode 100644 index baeec07eec..0000000000 --- a/ARRAY_METADATA_FILTER_DEBUG.md +++ /dev/null @@ -1,151 +0,0 @@ -# 数组元数据过滤功能 - Debug指南 - -## 问题分析 - -从你的截图可以看出,当前选择的是`name`字段,它是`string`类型的元数据字段。要使用数组变量选择器,需要满足以下条件: - -1. **元数据字段必须是array类型** - 当前显示的是string类型 -2. **工作流中需要有array类型的变量** - 用于变量选择 - -## Debug步骤 - -### 1. 创建array类型的元数据字段 - -首先需要在知识库中创建一个array类型的元数据字段: - -```bash -# 通过API创建array类型元数据字段 -curl -X POST \ - http://localhost:3000/datasets/{dataset_id}/metadata \ - -H "Authorization: Bearer your-api-key" \ - -H "Content-Type: application/json" \ - -d '{ - "type": "array", - "name": "job_ids" - }' -``` - -### 2. 在工作流中添加数组变量 - -在工作流的开始节点中添加array类型的变量: - -```json -{ - "key": "user_jobs", - "name": "用户工作ID列表", - "type": "array", - "required": true, - "default": ["job1", "job2"] -} -``` - -### 3. 测试流程 - -1. **选择array类型字段**:在元数据过滤条件中选择`job_ids`字段 -2. **查看变量选择器**:此时应该显示array类型的变量 -3. **验证操作符**:确认显示了`in`, `not in`, `contains`, `not contains`等数组操作符 - -### 4. 添加调试日志 - -我已经在代码中添加了调试日志,打开浏览器开发者工具查看: - -- `🔍 ConditionArray Debug` - 显示可用的数组变量 -- `🔧 数组变量被选择` - 显示变量选择过程 - -### 5. 检查数据流 - -**当前问题**: -- 元数据字段类型:`string` (name字段) -- 需要的字段类型:`array` (如job_ids字段) - -**解决方案**: -1. 创建array类型的元数据字段 -2. 选择该字段进行过滤 -3. 此时变量选择器会显示数组类型变量 - -## 验证方法 - -### 检查元数据字段列表 -```javascript -// 在浏览器控制台运行 -console.log('当前元数据字段:', metadataList); -``` - -### 检查可用变量 -```javascript -// 检查数组变量 -console.log('可用数组变量:', availableArrayVars); -console.log('通用数组变量:', availableCommonArrayVars); -``` - -### 验证变量过滤逻辑 -```javascript -// 检查变量过滤器 -const filterArrayVar = (varPayload) => { - return [ - 'arrayString', - 'arrayNumber', - 'arrayObject', - 'array' - ].includes(varPayload.type); -}; -console.log('过滤后的数组变量:', variables.filter(filterArrayVar)); -``` - -## 常见问题 - -### Q1: 为什么没有显示数组变量? -**A**: 当前选择的是string类型字段。数组变量只在array类型字段中显示。 - -### Q2: 如何创建array类型的元数据字段? -**A**: -1. 通过API创建:`POST /datasets/{id}/metadata` with `{"type": "array", "name": "field_name"}` -2. 或在知识库管理界面创建 - -### Q3: 变量选择器为空? -**A**: 检查工作流中是否有array类型的变量,确保变量类型为 `arrayString`, `arrayNumber`, `arrayObject`, 或 `array` - -### Q4: 如何验证功能正常工作? -**A**: -1. 创建array类型元数据字段 -2. 在工作流开始节点添加array变量 -3. 选择array字段进行过滤 -4. 查看变量选择器是否显示数组变量 - -## 示例配置 - -### 元数据字段示例 -```json -{ - "id": "job_ids_field", - "name": "job_ids", - "type": "array" -} -``` - -### 工作流变量示例 -```json -{ - "key": "target_jobs", - "name": "目标工作列表", - "type": "array", - "default": ["job1", "job2", "job3"] -} -``` - -### 过滤条件示例 -```json -{ - "id": "condition_1", - "name": "job_ids", - "comparison_operator": "in", - "value": ["job1", "job2"] -} -``` - -## 下一步 - -1. 首先确认是否有array类型的元数据字段 -2. 如果没有,创建一个 -3. 确保工作流中有array类型的变量 -4. 选择array字段进行测试 \ No newline at end of file diff --git a/ARRAY_METADATA_FILTER_FINAL_TEST.md b/ARRAY_METADATA_FILTER_FINAL_TEST.md deleted file mode 100644 index 6e4b08779c..0000000000 --- a/ARRAY_METADATA_FILTER_FINAL_TEST.md +++ /dev/null @@ -1,147 +0,0 @@ -# 数组元数据过滤功能 - 最终测试指南 - -## 🎯 功能概述 - -现在Dify的知识检索节点支持使用**数组变量作为过滤条件的值**,实现如下过滤逻辑: -- `document_type in ["pdf", "docx", "txt"]` -- `priority not in [1, 2, 3]` - -## ✅ 已修复的问题 - -### 1. **ComparisonOperator导入错误** -- **问题**: `ReferenceError: ComparisonOperator is not defined` -- **修复**: 修改导入语句,导入枚举值而非仅类型定义 -- **文件**: `condition-item.tsx`, `condition-operator.tsx` - -### 2. **操作符支持范围** -- **问题**: string/number类型字段没有显示in/not in操作符 -- **修复**: 在`utils.ts`中为基础类型添加数组操作符 -- **文件**: `utils.ts` - -### 3. **条件渲染逻辑** -- **问题**: in/not in操作符没有使用数组输入组件 -- **修复**: 修改条件渲染逻辑,根据操作符类型选择组件 -- **文件**: `condition-item.tsx` - -### 4. **数组变量过滤逻辑** -- **问题**: 数组变量过滤过于严格,遗漏某些数组类型 -- **修复**: 改进filterArrayVar函数,支持所有数组类型 -- **文件**: `use-config.ts` - -### 5. **变量类型匹配** -- **问题**: ConditionVariableSelector类型定义过于严格 -- **修复**: 支持字符串类型参数,改进数组类型匹配 -- **文件**: `condition-variable-selector.tsx` - -### 6. **数据传递链路** -- **问题**: ConditionList没有传递数组变量相关props -- **修复**: 添加availableArrayVars等props传递 -- **文件**: `condition-list/index.tsx` - -## 🧪 测试步骤 - -### 步骤1: 创建测试工作流 - -1. **创建新工作流**,包含以下节点: - - **开始节点**: 输入变量 `query` - - **代码执行节点**: 输出字符串数组 - - **知识检索节点**: 使用元数据过滤 - -### 步骤2: 配置代码执行节点 - -```python -def main() -> dict: - return { - "file_types": ["pdf", "docx", "txt"], - "priorities": [1, 2, 3], - "categories": ["tech", "business", "personal"] - } -``` - -### 步骤3: 配置知识检索节点 - -1. **添加数据集**(确保数据集有元数据字段) -2. **设置元数据过滤模式**为"手动" -3. **添加过滤条件**: - - 选择字符串类型元数据字段(如 `document_type`) - - 选择操作符 `in` - - 选择变量模式,选择代码节点的 `file_types` 输出 - -### 步骤4: 验证功能 - -#### 前端验证 -- [ ] 能看到 `in` 和 `not in` 操作符选项 -- [ ] 能选择数组类型的变量 -- [ ] 界面正确显示选择的数组变量 -- [ ] 配置能够正确保存和加载 - -#### 后端验证 -- [ ] 运行工作流不报错 -- [ ] 数组变量值被正确解析 -- [ ] 过滤结果符合预期 -- [ ] 支持多条件组合 - -## 🔍 调试日志检查 - -现在调试日志应该显示: - -```javascript -🔍 ConditionArray Debug: - - valueMethod: variable - - isCommonVariable: undefined - - nodesOutputVars (数组变量): [{ nodeId: 'code_node', vars: [...] }] // 不再是空数组 - - availableNodes: [{ id: 'code_node', data: {...} }] // 不再是空数组 - - commonVariables: [] - -🔍 ConditionVariableSelector Debug: - - varType: array - - nodesOutputVars: [{ nodeId: 'code_node', vars: [...] }] // 应该有数据 - - availableNodes: [{ id: 'code_node', data: {...} }] // 应该有数据 -``` - -## 🎯 支持的数组类型 - -现在支持以下所有数组类型: -- `array` - 通用数组 -- `array[string]` - 字符串数组 -- `array[number]` - 数字数组 -- `array[object]` - 对象数组 -- `array[file]` - 文件数组 -- 任何以 `array` 开头的自定义类型 - -## 🚀 使用场景示例 - -### 场景1: 文档类型过滤 -``` -document_type in {{code_node.file_types}} -// 其中 file_types = ["pdf", "docx", "txt"] -``` - -### 场景2: 优先级排除 -``` -priority not in {{code_node.excluded_priorities}} -// 其中 excluded_priorities = [0, 10] -``` - -### 场景3: 多条件组合 -``` -document_type in {{code_node.allowed_types}} AND -created_date > "2024-01-01" AND -priority not in {{code_node.excluded_priorities}} -``` - -## ✅ 完成状态 - -- [x] 前端操作符支持 -- [x] 前端条件渲染修复 -- [x] 变量选择器集成 -- [x] 导入错误修复 -- [x] 数组类型过滤改进 -- [x] 变量类型匹配修复 -- [x] 数据传递链路修复 -- [x] 后端数组处理支持 -- [x] 类型安全保证 - -## 🎉 功能已完全可用! - -现在您可以在知识检索节点中完全使用数组变量进行元数据过滤了! \ No newline at end of file diff --git a/ARRAY_METADATA_FILTER_README.md b/ARRAY_METADATA_FILTER_README.md deleted file mode 100644 index c340bfe22b..0000000000 --- a/ARRAY_METADATA_FILTER_README.md +++ /dev/null @@ -1,154 +0,0 @@ -# 元数据数组过滤功能实现 - -## 功能概述 - -这个实现为Dify的知识检索系统添加了对数组类型元数据的过滤支持,解决了GitHub Issue #16195中提到的需求。 - -## 问题背景 - -用户在使用Dify的知识检索功能时,需要根据包含特定`job_ids`的数组来过滤文档,但现有系统只支持字符串、数字和时间类型的元数据过滤,不支持数组类型的条件匹配。 - -## 解决方案 - -### 1. 前端改动 - -#### 新增数组类型支持 -- 在`MetadataFilteringVariableType`枚举中添加了`array`类型 -- 更新了`MetadataFilteringCondition`类型以支持`string[]`值 -- 为数组类型添加了专门的操作符:`in`、`not in`、`contains`、`not contains`、`empty`、`not empty` -- 添加了数组类型的图标支持(使用`RiListUnordered`图标) - -#### 文件修改 -```typescript -// web/app/components/workflow/nodes/knowledge-retrieval/types.ts -export enum MetadataFilteringVariableType { - string = 'string', - number = 'number', - time = 'time', - select = 'select', - array = 'array', // 新增 -} - -export type MetadataFilteringCondition = { - id: string - name: string - comparison_operator: ComparisonOperator - value?: string | number | string[] // 支持数组值 -} -``` - -### 2. 后端改动 - -#### 数据库查询逻辑 -在PostgreSQL的JSONB字段中实现数组条件查询: - -**`in` 操作符逻辑:** -- 检查文档的元数据字段是否包含输入数组中的任何值 -- 使用OR逻辑连接多个LIKE条件 - -**`not in` 操作符逻辑:** -- 检查文档的元数据字段是否不包含输入数组中的任何值 -- 使用AND逻辑连接多个NOT LIKE条件 - -#### 文件修改 -```python -# api/core/rag/retrieval/dataset_retrieval.py -# api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py - -case "in": - if isinstance(value, (list, tuple)): - or_conditions = [] - for i, v in enumerate(value): - param_key = f"{key_value}_{i}" - if isinstance(v, str): - or_conditions.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{param_key}")).params( - **{key: metadata_name, param_key: f'%"{v}"%'} - ) - ) - if or_conditions: - filters.append(or_(*or_conditions)) -``` - -## 使用示例 - -### 场景:根据job_ids数组过滤文档 - -假设有以下文档元数据: -```json -{ - "doc1": {"job_ids": ["job1", "job2", "job3"]}, - "doc2": {"job_ids": ["job2", "job4", "job5"]}, - "doc3": {"job_ids": ["job6", "job7"]} -} -``` - -### 查询1:包含指定job_ids的文档 -``` -条件:job_ids in ["job1", "job4"] -结果:返回doc1和doc2,因为它们分别包含job1和job4 -``` - -### 查询2:不包含指定job_ids的文档 -``` -条件:job_ids not in ["job2", "job6"] -结果:返回doc3(如果存在不包含job2和job6的其他文档) -``` - -## 对应的SQL查询 - -### 包含查询 (in) -```sql -SELECT * FROM documents WHERE - doc_metadata ->> 'job_ids' LIKE '%"job1"%' OR - doc_metadata ->> 'job_ids' LIKE '%"job4"%'; -``` - -### 排除查询 (not in) -```sql -SELECT * FROM documents WHERE - doc_metadata ->> 'job_ids' NOT LIKE '%"job2"%' AND - doc_metadata ->> 'job_ids' NOT LIKE '%"job6"%'; -``` - -## 测试 - -运行测试脚本: -```bash -python test_array_metadata_filter.py -``` - -这将演示数组过滤功能的工作原理。 - -## 技术细节 - -### 数据存储 -- 元数据存储在PostgreSQL的JSONB字段中 -- 数组值在JSON中以字符串数组形式存储:`["value1", "value2"]` -- 使用LIKE操作符进行部分匹配:`LIKE '%"value"%'` - -### 性能考虑 -- 使用了数据库索引:`db.Index("document_metadata_idx", "doc_metadata", postgresql_using="gin")` -- JSONB字段支持GIN索引,能够高效处理包含查询 - -### 支持的操作符 -- `in`: 检查字段是否包含数组中的任意值 -- `not in`: 检查字段是否不包含数组中的任意值 -- `contains`: 检查字段是否包含特定值 -- `not contains`: 检查字段是否不包含特定值 -- `empty`: 检查字段是否为空 -- `not empty`: 检查字段是否不为空 - -## 扩展性 - -这个实现为未来支持更复杂的数组操作奠定了基础,比如: -- `all of`: 检查是否包含数组中的所有值 -- `any of`: 检查是否包含数组中的任意值(类似当前的`in`) -- 数组长度比较 -- 数组交集/并集操作 - -## 兼容性 - -- 向后兼容:现有的字符串、数字、时间类型过滤功能保持不变 -- 数据库兼容:利用PostgreSQL的JSONB特性,无需额外的schema变更 -- API兼容:扩展现有的元数据过滤API,不破坏现有接口 \ No newline at end of file diff --git a/ARRAY_METADATA_FILTER_TEST.md b/ARRAY_METADATA_FILTER_TEST.md deleted file mode 100644 index 8d297cd13a..0000000000 --- a/ARRAY_METADATA_FILTER_TEST.md +++ /dev/null @@ -1,108 +0,0 @@ -# 数组元数据过滤功能测试 - -## 🧪 测试场景 - -### 1. 字符串字段 + 数组变量过滤 - -**测试目标**: 验证字符串类型的元数据字段能否使用数组变量进行 `in`/`not in` 过滤 - -**测试步骤**: -1. 创建一个工作流,包含: - - 开始节点:输入变量 `filename` - - 代码执行节点:输出数组 `["doc1.pdf", "doc2.pdf", "doc3.pdf"]` - - 知识检索节点:使用元数据过滤 - -2. 在知识检索节点中: - - 选择字符串类型元数据字段(如 `document_name`) - - 选择操作符 `in` 或 `not in` - - 在值选择中选择代码执行节点的数组输出 - -**期望结果**: -- 能够在操作符下拉中看到 `in` 和 `not in` 选项 -- 能够选择数组类型的变量作为过滤值 -- 运行时正确过滤匹配的文档 - -### 2. 数字字段 + 数组变量过滤 - -**测试目标**: 验证数字类型的元数据字段能否使用数组变量进行过滤 - -**测试步骤**: -1. 创建代码执行节点输出数字数组 `[1, 2, 3]` -2. 在知识检索节点中: - - 选择数字类型元数据字段(如 `priority`) - - 选择操作符 `in` - - 选择数组变量作为过滤值 - -**期望结果**: 文档按数字数组正确过滤 - -### 3. 多条件组合测试 - -**测试目标**: 验证数组过滤与其他条件的组合 - -**测试步骤**: -1. 设置多个过滤条件: - - `document_type in ["pdf", "docx"]`(数组过滤) - - `created_date > "2024-01-01"`(常规过滤) - - 逻辑操作符:AND - -**期望结果**: 所有条件正确组合执行 - -## 🔍 验证要点 - -### 前端检查 -- [ ] 操作符下拉菜单包含 `in` 和 `not in` -- [ ] 变量选择器显示数组类型变量 -- [ ] 界面正确渲染数组输入组件 -- [ ] 保存/加载配置正确 - -### 后端检查 -- [ ] 正确解析数组变量值 -- [ ] 数据库查询语句正确生成 -- [ ] 过滤结果准确 -- [ ] 错误处理完善 - -## 🐛 已知问题修复 - -### 1. ComparisonOperator 导入错误 -**问题**: `ReferenceError: ComparisonOperator is not defined` -**修复**: 修改导入语句,导入枚举值而非仅类型定义 - -### 2. 操作符可见性 -**问题**: string/number 类型字段没有显示 in/not in 操作符 -**修复**: 在 `utils.ts` 中为基础类型添加数组操作符 - -### 3. 条件渲染逻辑 -**问题**: in/not in 操作符没有使用数组输入组件 -**修复**: 修改 `condition-item.tsx` 中的条件渲染逻辑 - -## ✅ 功能完成状态 - -- [x] 前端操作符支持 -- [x] 前端条件渲染 -- [x] 变量选择器集成 -- [x] 导入错误修复 -- [x] 后端数组处理 -- [x] 类型安全保证 - -## 🚀 使用示例 - -```javascript -// 工作流配置示例 -{ - "metadata_filtering_conditions": { - "logical_operator": "and", - "conditions": [ - { - "name": "document_type", - "comparison_operator": "in", - "value": "{{code_node.file_types}}" // 数组变量 - }, - { - "name": "priority", - "comparison_operator": "not in", - "value": "{{code_node.excluded_priorities}}" // 数字数组 - } - ] - } -} -``` \ No newline at end of file diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 0f6885e416..45f41d522f 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -1048,59 +1048,51 @@ class DatasetRetrieval: filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != value) case "in": if isinstance(value, list | tuple): - # For arrays: check if metadata field contains any value from the input array + # For arrays: check if metadata field (single value) is in the input array or_conditions = [] for i, v in enumerate(value): param_key = f"{key_value}_{i}" if isinstance(v, str): - or_conditions.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{param_key}")).params( - **{key: metadata_name, param_key: f'%"{v}"%'} - ) - ) + # For string type: exact match with quoted string + or_conditions.append(DatasetDocument.doc_metadata[metadata_name] == f'"{v}"') else: + # For number type: exact match as numeric value or_conditions.append( - (text(f"documents.doc_metadata ->> :{key} = :{param_key}")).params( - **{key: metadata_name, param_key: str(v)} - ) + sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) == v ) if or_conditions: filters.append(or_(*or_conditions)) else: - # Single value case + # Single value case (backward compatibility) if isinstance(value, str): + filters.append(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"') + else: filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f'%"{value}"%'} - ) + sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) == value ) case "not in": if isinstance(value, list | tuple): - # For arrays: check if metadata field does not contain any value from the input array + # For arrays: check if metadata field (single value) is not in the input array and_conditions = [] for i, v in enumerate(value): param_key = f"{key_value}_{i}" if isinstance(v, str): - and_conditions.append( - (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{param_key}")).params( - **{key: metadata_name, param_key: f'%"{v}"%'} - ) - ) + # For string type: not equal to quoted string + and_conditions.append(DatasetDocument.doc_metadata[metadata_name] != f'"{v}"') else: + # For number type: not equal to numeric value and_conditions.append( - (text(f"documents.doc_metadata ->> :{key} != :{param_key}")).params( - **{key: metadata_name, param_key: str(v)} - ) + sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != v ) if and_conditions: filters.append(and_(*and_conditions)) else: - # Single value case + # Single value case (backward compatibility) if isinstance(value, str): + filters.append(DatasetDocument.doc_metadata[metadata_name] != f'"{value}"') + else: filters.append( - (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f'%"{value}"%'} - ) + sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != value ) case "empty": filters.append(DatasetDocument.doc_metadata[metadata_name].is_(None)) diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 06c9983a78..9f633c39db 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -376,7 +376,10 @@ class KnowledgeRetrievalNode(LLMNode): elif expected_value.value_type == "string": # type: ignore expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore elif expected_value.value_type in ( - "array[number]", "array[string]", "array[object]", "array" + "array[number]", + "array[string]", + "array[object]", + "array", ): # type: ignore expected_value = expected_value.value # type: ignore else: @@ -524,60 +527,48 @@ class KnowledgeRetrievalNode(LLMNode): filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != value) case "in": if isinstance(value, list | tuple): - # For arrays: check if metadata field contains any value from the input array + # For arrays: check if metadata field (single value) is in the input array or_conditions = [] for i, v in enumerate(value): param_key = f"{key_value}_{i}" if isinstance(v, str): - or_conditions.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{param_key}")).params( - **{key: metadata_name, param_key: f'%"{v}"%'} - ) - ) + # For string type: exact match with quoted string + or_conditions.append(Document.doc_metadata[metadata_name] == f'"{v}"') else: + # For number type: exact match as numeric value or_conditions.append( - (text(f"documents.doc_metadata ->> :{key} = :{param_key}")).params( - **{key: metadata_name, param_key: str(v)} - ) + sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == v ) if or_conditions: filters.append(or_(*or_conditions)) else: - # Single value case + # Single value case (backward compatibility) if isinstance(value, str): - filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f'%"{value}"%'} - ) - ) + filters.append(Document.doc_metadata[metadata_name] == f'"{value}"') + else: + filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == value) case "not in": if isinstance(value, list | tuple): - # For arrays: check if metadata field does not contain any value from the input array + # For arrays: check if metadata field (single value) is not in the input array and_conditions = [] for i, v in enumerate(value): param_key = f"{key_value}_{i}" if isinstance(v, str): - and_conditions.append( - (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{param_key}")).params( - **{key: metadata_name, param_key: f'%"{v}"%'} - ) - ) + # For string type: not equal to quoted string + and_conditions.append(Document.doc_metadata[metadata_name] != f'"{v}"') else: + # For number type: not equal to numeric value and_conditions.append( - (text(f"documents.doc_metadata ->> :{key} != :{param_key}")).params( - **{key: metadata_name, param_key: str(v)} - ) + sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != v ) if and_conditions: filters.append(and_(*and_conditions)) else: - # Single value case + # Single value case (backward compatibility) if isinstance(value, str): - filters.append( - (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f'%"{value}"%'} - ) - ) + filters.append(Document.doc_metadata[metadata_name] != f'"{value}"') + else: + filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != value) case "empty": filters.append(Document.doc_metadata[metadata_name].is_(None)) case "not empty": diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx index 8cb54647bd..fe13a4b17e 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx @@ -87,16 +87,19 @@ const ConditionArray = ({ const trimmed = item.trim() if (trimmed === '') return null - // Try to convert to number if it's a valid number - const numericValue = Number(trimmed) - if (!isNaN(numericValue) && isFinite(numericValue)) - return numericValue + // Try to convert to number if it's a valid number (only if it looks like a pure numeric value) + if (/^-?\d+(\.\d+)?$/.test(trimmed)) { + const numericValue = Number(trimmed) + if (!isNaN(numericValue) && isFinite(numericValue)) + return numericValue + } - // Otherwise keep as string - return trimmed + // Otherwise keep as string (remove quotes if present) + return trimmed.replace(/^["']|["']$/g, '') }).filter(item => item !== null) console.log('🔧 常量数组值被设置:', arrayValues) + console.log('🔧 数组类型检测:', arrayValues.map(v => typeof v)) onChange(arrayValues) }, [onChange]) From b6e63e83295a4174af23a048760ec6ec2a79261e Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Tue, 3 Jun 2025 15:23:50 +0800 Subject: [PATCH 03/10] remove debug log and add type check Signed-off-by: kenwoodjw --- api/core/rag/retrieval/dataset_retrieval.py | 29 ++- .../knowledge_retrieval_node.py | 32 +++- api/pyproject.toml | 2 +- api/uv.lock | 8 +- .../condition-list/condition-array.tsx | 174 ++++++++++++------ .../condition-list/condition-item.tsx | 2 + 6 files changed, 170 insertions(+), 77 deletions(-) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 45f41d522f..dd24227f6c 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -1048,18 +1048,22 @@ class DatasetRetrieval: filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != value) case "in": if isinstance(value, list | tuple): - # For arrays: check if metadata field (single value) is in the input array + if not value: + return filters + or_conditions = [] for i, v in enumerate(value): - param_key = f"{key_value}_{i}" if isinstance(v, str): - # For string type: exact match with quoted string or_conditions.append(DatasetDocument.doc_metadata[metadata_name] == f'"{v}"') - else: - # For number type: exact match as numeric value + elif isinstance(v, int | float): or_conditions.append( sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) == v ) + or_conditions.append(DatasetDocument.doc_metadata[metadata_name] == str(v)) + else: + v_str = str(v) + or_conditions.append(DatasetDocument.doc_metadata[metadata_name] == f'"{v_str}"') + if or_conditions: filters.append(or_(*or_conditions)) else: @@ -1072,18 +1076,23 @@ class DatasetRetrieval: ) case "not in": if isinstance(value, list | tuple): - # For arrays: check if metadata field (single value) is not in the input array + if not value: + return filters + and_conditions = [] for i, v in enumerate(value): - param_key = f"{key_value}_{i}" if isinstance(v, str): - # For string type: not equal to quoted string and_conditions.append(DatasetDocument.doc_metadata[metadata_name] != f'"{v}"') - else: - # For number type: not equal to numeric value + elif isinstance(v, int | float): + and_conditions.append( sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != v ) + and_conditions.append(DatasetDocument.doc_metadata[metadata_name] != str(v)) + else: + v_str = str(v) + and_conditions.append(DatasetDocument.doc_metadata[metadata_name] != f'"{v_str}"') + if and_conditions: filters.append(and_(*and_conditions)) else: diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 9f633c39db..083b0579bb 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -527,18 +527,24 @@ class KnowledgeRetrievalNode(LLMNode): filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != value) case "in": if isinstance(value, list | tuple): - # For arrays: check if metadata field (single value) is in the input array + if not value: + return filters + + # Generate matching conditions for each value, supporting both number and string matching or_conditions = [] for i, v in enumerate(value): - param_key = f"{key_value}_{i}" if isinstance(v, str): - # For string type: exact match with quoted string or_conditions.append(Document.doc_metadata[metadata_name] == f'"{v}"') - else: - # For number type: exact match as numeric value + elif isinstance(v, int | float): + or_conditions.append( sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == v ) + or_conditions.append(Document.doc_metadata[metadata_name] == str(v)) + else: + v_str = str(v) + or_conditions.append(Document.doc_metadata[metadata_name] == f'"{v_str}"') + if or_conditions: filters.append(or_(*or_conditions)) else: @@ -549,18 +555,24 @@ class KnowledgeRetrievalNode(LLMNode): filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == value) case "not in": if isinstance(value, list | tuple): - # For arrays: check if metadata field (single value) is not in the input array + if not value: # 空数组 + return filters + + # 为每个值生成不匹配条件 and_conditions = [] for i, v in enumerate(value): - param_key = f"{key_value}_{i}" if isinstance(v, str): - # For string type: not equal to quoted string and_conditions.append(Document.doc_metadata[metadata_name] != f'"{v}"') - else: - # For number type: not equal to numeric value + elif isinstance(v, int | float): + and_conditions.append( sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != v ) + and_conditions.append(Document.doc_metadata[metadata_name] != str(v)) + else: + v_str = str(v) + and_conditions.append(Document.doc_metadata[metadata_name] != f'"{v_str}"') + if and_conditions: filters.append(and_(*and_conditions)) else: diff --git a/api/pyproject.toml b/api/pyproject.toml index 50a765c0e1..204b449eaa 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "chardet~=5.1.0", "flask~=3.1.0", "flask-compress~=1.17", - "flask-cors~=5.0.0", + "flask-cors~=6.0.0", "flask-login~=0.6.3", "flask-migrate~=4.0.7", "flask-restful~=0.3.10", diff --git a/api/uv.lock b/api/uv.lock index a1e1d6146a..596ee35f85 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1392,7 +1392,7 @@ requires-dist = [ { name = "chardet", specifier = "~=5.1.0" }, { name = "flask", specifier = "~=3.1.0" }, { name = "flask-compress", specifier = "~=1.17" }, - { name = "flask-cors", specifier = "~=5.0.0" }, + { name = "flask-cors", specifier = "~=6.0.0" }, { name = "flask-login", specifier = "~=0.6.3" }, { name = "flask-migrate", specifier = "~=4.0.7" }, { name = "flask-restful", specifier = "~=0.3.10" }, @@ -1765,15 +1765,15 @@ wheels = [ [[package]] name = "flask-cors" -version = "5.0.1" +version = "6.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "flask" }, { name = "werkzeug" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/32/d8/667bd90d1ee41c96e938bafe81052494e70b7abd9498c4a0215c103b9667/flask_cors-5.0.1.tar.gz", hash = "sha256:6ccb38d16d6b72bbc156c1c3f192bc435bfcc3c2bc864b2df1eb9b2d97b2403c", size = 11643 } +sdist = { url = "https://files.pythonhosted.org/packages/20/e7/b3c6afdd984672b55dff07482699c688af6c01bd7fd5dd55f9c9d1a88d1c/flask_cors-6.0.0.tar.gz", hash = "sha256:4592c1570246bf7beee96b74bc0adbbfcb1b0318f6ba05c412e8909eceec3393", size = 11875 } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/61/4aea5fb55be1b6f95e604627dc6c50c47d693e39cab2ac086ee0155a0abd/flask_cors-5.0.1-py3-none-any.whl", hash = "sha256:fa5cb364ead54bbf401a26dbf03030c6b18fb2fcaf70408096a572b409586b0c", size = 11296 }, + { url = "https://files.pythonhosted.org/packages/ba/f0/0ee29090016345938f016ee98aa8b5de1c500ee93491dc0c76495848fca1/flask_cors-6.0.0-py3-none-any.whl", hash = "sha256:6332073356452343a8ccddbfec7befdc3fdd040141fe776ec9b94c262f058657", size = 11549 }, ] [[package]] diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx index fe13a4b17e..e7aa1d8b5a 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-array.tsx @@ -1,4 +1,4 @@ -import { useCallback, useEffect } from 'react' +import { useCallback, useMemo } from 'react' import { useTranslation } from 'react-i18next' import ConditionValueMethod from './condition-value-method' import type { ConditionValueMethodProps } from './condition-value-method' @@ -11,14 +11,17 @@ import type { } from '@/app/components/workflow/types' import { VarType } from '@/app/components/workflow/types' import Input from '@/app/components/base/input' +import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types' type ConditionArrayProps = { - value?: string | string[] | (string | number)[] + value?: string | string[] | (string | number)[] | number onChange: (value?: string | string[] | (string | number)[]) => void nodesOutputVars: NodeOutPutVar[] availableNodes: Node[] isCommonVariable?: boolean commonVariables: { name: string, type: string }[] + fieldType?: MetadataFilteringVariableType + strictTypeChecking?: boolean } & ConditionValueMethodProps const ConditionArray = ({ @@ -30,22 +33,24 @@ const ConditionArray = ({ availableNodes, isCommonVariable, commonVariables, + fieldType, + strictTypeChecking = false, }: ConditionArrayProps) => { const { t } = useTranslation() - const parseValueSelector = useCallback((value?: string | string[] | (string | number)[]): string[] => { + const parseValueSelector = useCallback((value?: string | string[] | (string | number)[] | number): string[] => { if (typeof value !== 'string') return [] - // 支持多种格式: - // 1. {{#nodeId.variable#}} 格式 + // Support multiple formats: + // 1. {{#nodeId.variable#}} format if (value.includes('#')) { const match = value.match(/\{\{#([^#]+)#\}\}/) if (match && match[1]) return match[1].split('.') } - // 2. nodeId.variable 格式(直接格式) + // 2. nodeId.variable format (direct format) if (value.includes('.')) return value.split('.') @@ -54,27 +59,55 @@ const ConditionArray = ({ const currentValueSelector = parseValueSelector(value) - useEffect(() => { - console.log('🔍 ConditionArray Debug:') - console.log(' - valueMethod:', valueMethod) - console.log(' - isCommonVariable:', isCommonVariable) - console.log(' - value:', value) - console.log(' - currentValueSelector:', currentValueSelector) - console.log(' - nodesOutputVars (数组变量):', nodesOutputVars) - console.log(' - availableNodes:', availableNodes) - console.log(' - commonVariables (通用数组变量):', commonVariables) - }, [valueMethod, isCommonVariable, value, currentValueSelector, nodesOutputVars, availableNodes, commonVariables]) - const handleVariableValueChange = useCallback((v: ValueSelector) => { - console.log('🔧 数组变量被选择:', v) onChange(`{{#${v.join('.')}#}}`) }, [onChange]) const handleCommonVariableValueChange = useCallback((v: string) => { - console.log('🔧 通用数组变量被选择:', v) onChange(`{{${v}}}`) }, [onChange]) + // Type compatibility check + const checkTypeCompatibility = useCallback((selectedVariable: any) => { + if (!fieldType || !selectedVariable) return null + + // Get variable type + const variableType = selectedVariable.type || selectedVariable.value_type + + // Define compatibility rules + const compatibilityRules: Record = { + string: ['array[string]', 'array', 'string'], + number: ['array[number]', 'array', 'number'], + select: ['array[string]', 'array', 'string'], + array: ['array[string]', 'array[number]', 'array[object]', 'array'], + time: ['array[string]', 'array', 'string'], // Time field compatibility + } + + const compatibleTypes = compatibilityRules[fieldType as string] || [] + + if (!compatibleTypes.includes(variableType)) { + return { + warning: true, + message: `⚠️ Type mismatch: ${fieldType} field is not recommended to use ${variableType} type variables`, + } + } + + return null + }, [fieldType]) + + // Check if currently selected variable is compatible + const typeCompatibilityCheck = useMemo(() => { + if (valueMethod === 'variable' && currentValueSelector.length > 0) { + // Find currently selected variable information + const selectedVar = nodesOutputVars.find(nodeVar => + nodeVar.nodeId === currentValueSelector[0], + )?.vars.find(v => v.variable === currentValueSelector[1]) + + return checkTypeCompatibility(selectedVar) + } + return null + }, [valueMethod, currentValueSelector, nodesOutputVars, checkTypeCompatibility]) + const handleConstantValueChange = useCallback((inputValue: string) => { // Parse comma-separated values into array if (inputValue.trim() === '') { @@ -87,40 +120,63 @@ const ConditionArray = ({ const trimmed = item.trim() if (trimmed === '') return null - // Try to convert to number if it's a valid number (only if it looks like a pure numeric value) + // Keep natural type detection: pure numbers auto-convert to numbers, otherwise keep as strings if (/^-?\d+(\.\d+)?$/.test(trimmed)) { const numericValue = Number(trimmed) if (!isNaN(numericValue) && isFinite(numericValue)) return numericValue } - // Otherwise keep as string (remove quotes if present) + // Remove quotes (if any) and keep as string return trimmed.replace(/^["']|["']$/g, '') }).filter(item => item !== null) - console.log('🔧 常量数组值被设置:', arrayValues) - console.log('🔧 数组类型检测:', arrayValues.map(v => typeof v)) onChange(arrayValues) }, [onChange]) const displayValue = Array.isArray(value) ? value.map(v => String(v)).join(', ') : (value || '') - // Filter available variables to show only array types - const filteredNodesOutputVars = nodesOutputVars.filter(nodeVar => - nodeVar.vars.some(v => - v.type === VarType.arrayString - || v.type === VarType.arrayNumber - || v.type === VarType.arrayObject - || v.type === VarType.arrayFile - || v.type === VarType.array - || v.type.toString().startsWith('array'), - ), - ) - - const filteredCommonVariables = commonVariables.filter(v => - v.type === 'array' - || v.type.startsWith('array'), - ) + // Filter variables based on strict mode + const filteredNodesOutputVars = useMemo(() => { + const basicFilter = nodesOutputVars.filter(nodeVar => + nodeVar.vars.some(v => + v.type === VarType.arrayString + || v.type === VarType.arrayNumber + || v.type === VarType.arrayObject + || v.type === VarType.arrayFile + || v.type === VarType.array + || v.type.toString().startsWith('array'), + ), + ) + + if (!strictTypeChecking || !fieldType) + return basicFilter + + // Strict mode: only show type-compatible variables + return basicFilter.map(nodeVar => ({ + ...nodeVar, + vars: nodeVar.vars.filter((v) => { + const typeCheck = checkTypeCompatibility(v) + return !typeCheck?.warning + }), + })).filter(nodeVar => nodeVar.vars.length > 0) + }, [nodesOutputVars, strictTypeChecking, fieldType, checkTypeCompatibility]) + + const filteredCommonVariables = useMemo(() => { + const basicFilter = commonVariables.filter(v => + v.type === 'array' + || v.type.startsWith('array'), + ) + + if (!strictTypeChecking || !fieldType) + return basicFilter + + // Strict mode: only show type-compatible variables + return basicFilter.filter((v) => { + const typeCheck = checkTypeCompatibility(v) + return !typeCheck?.warning + }) + }, [commonVariables, strictTypeChecking, fieldType, checkTypeCompatibility]) return (
@@ -131,23 +187,37 @@ const ConditionArray = ({
{ valueMethod === 'variable' && !isCommonVariable && ( - +
+ + {typeCompatibilityCheck?.warning && ( +
+ {typeCompatibilityCheck.message} +
+ )} +
) } { valueMethod === 'variable' && isCommonVariable && ( - +
+ + {typeCompatibilityCheck?.warning && ( +
+ {typeCompatibilityCheck.message} +
+ )} +
) } { diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx index 4675c13834..1876e3358f 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-item.tsx @@ -218,6 +218,8 @@ const ConditionItem = ({ onChange={handleValueChange} isCommonVariable={isCommonVariable} commonVariables={availableCommonArrayVars} + fieldType={currentMetadata?.type} + strictTypeChecking={false} /> ) } From f471057a173e405a8111d2f9c3c27bf7966bc44e Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Tue, 3 Jun 2025 16:01:46 +0800 Subject: [PATCH 04/10] not update unnecessary file Signed-off-by: kenwoodjw --- .../knowledge_retrieval_node.py | 4 ++-- api/pyproject.toml | 4 ++-- api/uv.lock | 10 +++++----- web/i18n/zh-Hans/workflow.ts | 14 ++++++-------- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 083b0579bb..4995fd22d9 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -555,10 +555,10 @@ class KnowledgeRetrievalNode(LLMNode): filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == value) case "not in": if isinstance(value, list | tuple): - if not value: # 空数组 + if not value: return filters - # 为每个值生成不匹配条件 + # generate not in conditions and_conditions = [] for i, v in enumerate(value): if isinstance(v, str): diff --git a/api/pyproject.toml b/api/pyproject.toml index 204b449eaa..82669b9862 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "chardet~=5.1.0", "flask~=3.1.0", "flask-compress~=1.17", - "flask-cors~=6.0.0", + "flask-cors~=5.0.0", "flask-login~=0.6.3", "flask-migrate~=4.0.7", "flask-restful~=0.3.10", @@ -199,4 +199,4 @@ vdb = [ "volcengine-compat~=1.0.0", "weaviate-client~=3.24.0", "xinference-client~=1.2.2", -] +] \ No newline at end of file diff --git a/api/uv.lock b/api/uv.lock index 596ee35f85..96c959cc22 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1392,7 +1392,7 @@ requires-dist = [ { name = "chardet", specifier = "~=5.1.0" }, { name = "flask", specifier = "~=3.1.0" }, { name = "flask-compress", specifier = "~=1.17" }, - { name = "flask-cors", specifier = "~=6.0.0" }, + { name = "flask-cors", specifier = "~=5.0.0" }, { name = "flask-login", specifier = "~=0.6.3" }, { name = "flask-migrate", specifier = "~=4.0.7" }, { name = "flask-restful", specifier = "~=0.3.10" }, @@ -1765,15 +1765,15 @@ wheels = [ [[package]] name = "flask-cors" -version = "6.0.0" +version = "5.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "flask" }, { name = "werkzeug" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/20/e7/b3c6afdd984672b55dff07482699c688af6c01bd7fd5dd55f9c9d1a88d1c/flask_cors-6.0.0.tar.gz", hash = "sha256:4592c1570246bf7beee96b74bc0adbbfcb1b0318f6ba05c412e8909eceec3393", size = 11875 } +sdist = { url = "https://files.pythonhosted.org/packages/32/d8/667bd90d1ee41c96e938bafe81052494e70b7abd9498c4a0215c103b9667/flask_cors-5.0.1.tar.gz", hash = "sha256:6ccb38d16d6b72bbc156c1c3f192bc435bfcc3c2bc864b2df1eb9b2d97b2403c", size = 11643 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/f0/0ee29090016345938f016ee98aa8b5de1c500ee93491dc0c76495848fca1/flask_cors-6.0.0-py3-none-any.whl", hash = "sha256:6332073356452343a8ccddbfec7befdc3fdd040141fe776ec9b94c262f058657", size = 11549 }, + { url = "https://files.pythonhosted.org/packages/85/61/4aea5fb55be1b6f95e604627dc6c50c47d693e39cab2ac086ee0155a0abd/flask_cors-5.0.1-py3-none-any.whl", hash = "sha256:fa5cb364ead54bbf401a26dbf03030c6b18fb2fcaf70408096a572b409586b0c", size = 11296 }, ] [[package]] @@ -6680,4 +6680,4 @@ wheels = [ [package.optional-dependencies] cffi = [ { name = "cffi", marker = "platform_python_implementation == 'PyPy'" }, -] +] \ No newline at end of file diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts index 057393016a..b8257d8229 100644 --- a/web/i18n/zh-Hans/workflow.ts +++ b/web/i18n/zh-Hans/workflow.ts @@ -58,7 +58,7 @@ const translation = { processData: '数据处理', input: '输入', output: '输出', - jinjaEditorPlaceholder: '输入 "/" 或 "{" 插入变量', + jinjaEditorPlaceholder: '输入 “/” 或 “{” 插入变量', viewOnly: '只读', showRunHistory: '显示运行历史', enableJinja: '开启支持 Jinja 模板', @@ -110,7 +110,7 @@ const translation = { branch: '分支', onFailure: '异常时', addFailureBranch: '添加异常分支', - openInExplore: '在"探索"中打开', + openInExplore: '在“探索”中打开', loadMore: '加载更多', noHistory: '没有历史版本', }, @@ -271,7 +271,7 @@ const translation = { 'variable-aggregator': '将多路分支的变量聚合为一个变量,以实现下游节点统一配置。', 'iteration': '对列表对象执行多次步骤直至输出所有结果。', 'loop': '循环执行一段逻辑直到满足结束条件或者到达循环次数上限。', - 'loop-end': '相当于"break" 此节点没有配置项,当循环体内运行到此节点后循环终止。', + 'loop-end': '相当于“break” 此节点没有配置项,当循环体内运行到此节点后循环终止。', 'parameter-extractor': '利用 LLM 从自然语言内推理提取出结构化参数,用于后置的工具调用或 HTTP 请求。', 'document-extractor': '用于将用户上传的文档解析为 LLM 便于理解的文本内容。', 'list-operator': '用于过滤或排序数组内容。', @@ -489,18 +489,16 @@ const translation = { add: '添加条件', search: '搜索元数据', placeholder: '输入值', - arrayPlaceholder: '输入逗号分隔的值 (例如: 值1, 值2, 值3)', - datePlaceholder: '选择时间...', + datePlaceholder: '选择日期...', select: '选择变量...', - noVariables: '没有可用变量', }, }, }, http: { inputVars: '输入变量', api: 'API', - apiPlaceholder: '输入 URL,输入变量时请键入\'/\'', - extractListPlaceholder: '输入提取列表编号,输入变量时请键入\'/\'', + apiPlaceholder: '输入 URL,输入变量时请键入‘/’', + extractListPlaceholder: '输入提取列表编号,输入变量时请键入‘/’', notStartWithHttp: 'API 应该以 http:// 或 https:// 开头', key: '键', type: '类型', From 8933f1020dfc538fbbbd39652e5591541483592c Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Tue, 3 Jun 2025 16:04:39 +0800 Subject: [PATCH 05/10] fix pyproject and uv.lock Signed-off-by: kenwoodjw --- api/pyproject.toml | 2 +- api/uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index 82669b9862..50a765c0e1 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -199,4 +199,4 @@ vdb = [ "volcengine-compat~=1.0.0", "weaviate-client~=3.24.0", "xinference-client~=1.2.2", -] \ No newline at end of file +] diff --git a/api/uv.lock b/api/uv.lock index 96c959cc22..a1e1d6146a 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -6680,4 +6680,4 @@ wheels = [ [package.optional-dependencies] cffi = [ { name = "cffi", marker = "platform_python_implementation == 'PyPy'" }, -] \ No newline at end of file +] From 0620c9de948d12971d3212ade1132ab9eb901d2c Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Tue, 3 Jun 2025 16:10:06 +0800 Subject: [PATCH 06/10] not updte i18n file Signed-off-by: kenwoodjw --- web/i18n/en-US/workflow.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts index a526d8e9fd..57cb42a0b1 100644 --- a/web/i18n/en-US/workflow.ts +++ b/web/i18n/en-US/workflow.ts @@ -406,7 +406,7 @@ const translation = { roleDescription: { system: 'Give high level instructions for the conversation', user: 'Provide instructions, queries, or any text-based input to the model', - assistant: 'The model\'s responses based on the user messages', + assistant: 'The model’s responses based on the user messages', }, addMessage: 'Add Message', vision: 'vision', @@ -488,18 +488,16 @@ const translation = { add: 'Add Condition', search: 'Search metadata', placeholder: 'Enter value', - arrayPlaceholder: 'Enter comma-separated values (e.g., value1, value2, value3)', datePlaceholder: 'Choose a time...', select: 'Select variable...', - noVariables: 'No variables available', }, }, }, http: { inputVars: 'Input Variables', api: 'API', - apiPlaceholder: 'Enter URL, type \'/\' insert variable', - extractListPlaceholder: 'Enter list item index, type \'/\' insert variable', + apiPlaceholder: 'Enter URL, type ‘/’ insert variable', + extractListPlaceholder: 'Enter list item index, type ‘/’ insert variable', notStartWithHttp: 'API should start with http:// or https://', key: 'Key', type: 'Type', From 0ad92524581cb09c3e41954af4949e2cd984fda7 Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Thu, 5 Jun 2025 15:51:44 +0800 Subject: [PATCH 07/10] resolved conflicts Signed-off-by: kenwoodjw --- .../condition-common-variable-selector.tsx | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx index 631715e1c0..77ec63e8f3 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/condition-list/condition-common-variable-selector.tsx @@ -24,21 +24,11 @@ const ConditionCommonVariableSelector = ({ const { t } = useTranslation() const [open, setOpen] = useState(false) - // 添加调试日志 - console.log('ConditionCommonVariableSelector - variables:', variables) - console.log('ConditionCommonVariableSelector - varType:', varType) - - // 过滤变量,支持数组类型 const filteredVariables = variables.filter((v) => { - // 如果是数组类型变量,始终显示 const isArrayType = v.type === 'array' || v.type.startsWith('array') - - // 如果是指定类型或数组类型,则显示 return v.type === varType || isArrayType }) - console.log('ConditionCommonVariableSelector - filteredVariables:', filteredVariables) - const selected = variables.find(v => v.name === value) const handleChange = useCallback((v: string) => { onChange(v) From 5ab4553793d023e2d9629d7a2fb07d3372ca1771 Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Thu, 5 Jun 2025 16:02:41 +0800 Subject: [PATCH 08/10] fix ruff check Signed-off-by: kenwoodjw --- api/core/rag/retrieval/dataset_retrieval.py | 11 +++++------ .../knowledge_retrieval/knowledge_retrieval_node.py | 12 +++++------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index dd24227f6c..6b220a9d66 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -1048,9 +1048,9 @@ class DatasetRetrieval: filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != value) case "in": if isinstance(value, list | tuple): - if not value: + if not value: return filters - + or_conditions = [] for i, v in enumerate(value): if isinstance(v, str): @@ -1063,7 +1063,7 @@ class DatasetRetrieval: else: v_str = str(v) or_conditions.append(DatasetDocument.doc_metadata[metadata_name] == f'"{v_str}"') - + if or_conditions: filters.append(or_(*or_conditions)) else: @@ -1078,13 +1078,12 @@ class DatasetRetrieval: if isinstance(value, list | tuple): if not value: return filters - + and_conditions = [] for i, v in enumerate(value): if isinstance(v, str): and_conditions.append(DatasetDocument.doc_metadata[metadata_name] != f'"{v}"') elif isinstance(v, int | float): - and_conditions.append( sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != v ) @@ -1092,7 +1091,7 @@ class DatasetRetrieval: else: v_str = str(v) and_conditions.append(DatasetDocument.doc_metadata[metadata_name] != f'"{v_str}"') - + if and_conditions: filters.append(and_(*and_conditions)) else: diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index ad4a9f46d3..dd314073a2 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -533,14 +533,13 @@ class KnowledgeRetrievalNode(LLMNode): if isinstance(value, list | tuple): if not value: return filters - + # Generate matching conditions for each value, supporting both number and string matching or_conditions = [] for i, v in enumerate(value): if isinstance(v, str): or_conditions.append(Document.doc_metadata[metadata_name] == f'"{v}"') elif isinstance(v, int | float): - or_conditions.append( sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == v ) @@ -548,7 +547,7 @@ class KnowledgeRetrievalNode(LLMNode): else: v_str = str(v) or_conditions.append(Document.doc_metadata[metadata_name] == f'"{v_str}"') - + if or_conditions: filters.append(or_(*or_conditions)) else: @@ -559,16 +558,15 @@ class KnowledgeRetrievalNode(LLMNode): filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == value) case "not in": if isinstance(value, list | tuple): - if not value: + if not value: return filters - + # generate not in conditions and_conditions = [] for i, v in enumerate(value): if isinstance(v, str): and_conditions.append(Document.doc_metadata[metadata_name] != f'"{v}"') elif isinstance(v, int | float): - and_conditions.append( sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != v ) @@ -576,7 +574,7 @@ class KnowledgeRetrievalNode(LLMNode): else: v_str = str(v) and_conditions.append(Document.doc_metadata[metadata_name] != f'"{v_str}"') - + if and_conditions: filters.append(and_(*and_conditions)) else: From 5e00ee96baba497d7ed6d0aa96f98a9bd0a7ab34 Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Thu, 5 Jun 2025 17:20:46 +0800 Subject: [PATCH 09/10] fix mypy check Signed-off-by: kenwoodjw --- .../knowledge_retrieval_node.py | 27 ++++++++++--------- .../workflow/utils/condition/processor.py | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index dd314073a2..c964c252ce 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -375,19 +375,20 @@ class KnowledgeRetrievalNode(LLMNode): expected_value = self.graph_runtime_state.variable_pool.convert_template( expected_value ).value[0] - if expected_value.value_type == "number": # type: ignore - expected_value = expected_value.value # type: ignore - elif expected_value.value_type == "string": # type: ignore - expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore - elif expected_value.value_type in ( - "array[number]", - "array[string]", - "array[object]", - "array", - ): # type: ignore - expected_value = expected_value.value # type: ignore - else: - raise ValueError("Invalid expected metadata value type") + if hasattr(expected_value, "value_type"): + if expected_value.value_type == "number": # type: ignore + expected_value = expected_value.value # type: ignore + elif expected_value.value_type == "string": # type: ignore + expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore + elif expected_value.value_type in ( + "array[number]", + "array[string]", + "array[object]", + "array", + ): # type: ignore + expected_value = expected_value.value # type: ignore + else: + raise ValueError("Invalid expected metadata value type") elif isinstance(expected_value, list): # For constant array values pass diff --git a/api/core/workflow/utils/condition/processor.py b/api/core/workflow/utils/condition/processor.py index 9795387788..c321f1812c 100644 --- a/api/core/workflow/utils/condition/processor.py +++ b/api/core/workflow/utils/condition/processor.py @@ -77,7 +77,7 @@ def _evaluate_condition( *, operator: SupportedComparisonOperator, value: Any, - expected: str | Sequence[str] | None, + expected: str | Sequence[str] | Sequence[int] | Sequence[float] | int | float | None, ) -> bool: match operator: case "contains": From 11ec8b9584b5dfe54863ff8284133b44be4de62f Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Thu, 5 Jun 2025 22:28:14 +0800 Subject: [PATCH 10/10] fix MyPy check Signed-off-by: kenwoodjw --- .../nodes/knowledge_retrieval/knowledge_retrieval_node.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index c964c252ce..8381bfd8f4 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -375,17 +375,17 @@ class KnowledgeRetrievalNode(LLMNode): expected_value = self.graph_runtime_state.variable_pool.convert_template( expected_value ).value[0] - if hasattr(expected_value, "value_type"): + if hasattr(expected_value, "value_type") and expected_value is not None: if expected_value.value_type == "number": # type: ignore expected_value = expected_value.value # type: ignore elif expected_value.value_type == "string": # type: ignore expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore - elif expected_value.value_type in ( + elif expected_value.value_type in ( # type: ignore "array[number]", "array[string]", "array[object]", "array", - ): # type: ignore + ): expected_value = expected_value.value # type: ignore else: raise ValueError("Invalid expected metadata value type")