diff --git a/api/core/workflow/nodes/knowledge_retrieval/entities.py b/api/core/workflow/nodes/knowledge_retrieval/entities.py index d2e5a15545..53aaadc7f2 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/entities.py +++ b/api/core/workflow/nodes/knowledge_retrieval/entities.py @@ -117,6 +117,18 @@ class MetadataFilteringCondition(BaseModel): conditions: Optional[list[Condition]] = Field(default=None, deprecated=True) +class MetadataFilteringComplexSubCondition(BaseModel): + logical_operator: Optional[Literal["and", "or"]] = "and" + conditions: Optional[list[Condition]] = Field(default=None, deprecated=True) + sub_conditions: Optional[list["MetadataFilteringComplexSubCondition"]] = None + +class MetadataFilteringComplexCondition(BaseModel): + """ + Complex Metadata Filtering Condition. + """ + logical_operator: Optional[Literal["and", "or"]] = "and" + conditions: Optional[list[MetadataFilteringComplexSubCondition]] = Field(default=None, deprecated=True) + class KnowledgeRetrievalNodeData(BaseNodeData): """ Knowledge retrieval Node Data. @@ -128,7 +140,8 @@ class KnowledgeRetrievalNodeData(BaseNodeData): retrieval_mode: Literal["single", "multiple"] multiple_retrieval_config: Optional[MultipleRetrievalConfig] = None single_retrieval_config: Optional[SingleRetrievalConfig] = None - metadata_filtering_mode: Optional[Literal["disabled", "automatic", "manual"]] = "disabled" + metadata_filtering_mode: Optional[Literal["disabled", "automatic", "manual", "complex_conditions"]] = "disabled" metadata_model_config: Optional[ModelConfig] = None metadata_filtering_conditions: Optional[MetadataFilteringCondition] = None + metadata_filtering_complex_conditions: Optional[MetadataFilteringComplexCondition] = None vision: VisionConfig = Field(default_factory=VisionConfig) diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 07a711cc4e..ab5e414668 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -44,7 +44,12 @@ from models.dataset import Dataset, DatasetMetadata, Document, RateLimitLog from models.workflow import WorkflowNodeExecutionStatus from services.feature_service import FeatureService -from .entities import KnowledgeRetrievalNodeData, ModelConfig +from .entities import ( + KnowledgeRetrievalNodeData, + MetadataFilteringComplexCondition, + MetadataFilteringComplexSubCondition, + ModelConfig, +) from .exc import ( InvalidModelTypeError, KnowledgeRetrievalNodeError, @@ -315,7 +320,60 @@ class KnowledgeRetrievalNode(LLMNode): for position, item in enumerate(retrieval_resource_list, start=1): item["metadata"]["position"] = position return retrieval_resource_list - + + def _recursive_metadata_filter( + self, metadata_filtering_complex_conditions: MetadataFilteringComplexSubCondition, filters + ): + logical_operator = metadata_filtering_complex_conditions.logical_operator + conditions = metadata_filtering_complex_conditions.conditions + sub_conditions = metadata_filtering_complex_conditions.sub_conditions + + sub_filters = [] + if sub_conditions: + for sub_condition in sub_conditions: + sub_filter = self._recursive_metadata_filter(sub_condition, filters) + sub_filters.append(sub_filter) + + temp_filters = [] + if conditions: + for sequence, condition in enumerate(conditions): + metadata_name = condition.name + expected_value = condition.value + if expected_value is not None or condition.comparison_operator in ("empty", "not empty"): + if isinstance(expected_value, str): + expected_value = self.graph_runtime_state.variable_pool.convert_template( + expected_value + ).value[0] + if expected_value.value_type == "number": # type: ignore + expected_value = expected_value.value # type: ignore + elif expected_value.value_type == "string": # type: ignore + expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore + else: + raise ValueError("Invalid expected metadata value type") + temp_filters = self._process_metadata_filter_func( + sequence, + condition.comparison_operator, + metadata_name, + expected_value, + temp_filters, + ) + + if temp_filters: + if logical_operator == "and": # type: ignore + temp_filters = and_(*temp_filters) + else: + temp_filters = or_(*temp_filters) + filters.append(temp_filters) + + if sub_filters: + if logical_operator == "and": # type: ignore + sub_filters = and_(*sub_filters) + else: + sub_filters = or_(*sub_filters) + filters.append(sub_filters) + + return filters + def _get_metadata_filter_condition( self, dataset_ids: list, query: str, node_data: KnowledgeRetrievalNodeData ) -> tuple[Optional[dict[str, list[str]]], Optional[MetadataCondition]]: @@ -329,6 +387,27 @@ class KnowledgeRetrievalNode(LLMNode): metadata_condition = None if node_data.metadata_filtering_mode == "disabled": return None, None + elif node_data.metadata_filtering_mode == "complex_conditions": + # todo: do not support external_knowledge_retrieval + if node_data.metadata_filtering_complex_conditions: + # Enable forward references + MetadataFilteringComplexSubCondition.model_rebuild() + metadata_filtering_complex_conditions = MetadataFilteringComplexCondition( + **node_data.metadata_filtering_complex_conditions.model_dump()) + for sequence, condition in enumerate(metadata_filtering_complex_conditions.conditions): # type: ignore + filters = self._recursive_metadata_filter(condition, filters) + if filters: + if metadata_filtering_complex_conditions.logical_operator == "and": # type: ignore + document_query = document_query.filter(and_(*filters)) + else: + document_query = document_query.filter(or_(*filters)) + metadata_condition = metadata_filtering_complex_conditions + documents = document_query.all() + # group by dataset_id + metadata_filter_document_ids = defaultdict(list) if documents else None # type: ignore + for document in documents: + metadata_filter_document_ids[document.dataset_id].append(document.id) # type: ignore + return metadata_filter_document_ids, metadata_condition elif node_data.metadata_filtering_mode == "automatic": automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data) if automatic_metadata_filters: diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-filter/metadata-filter-selector.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-filter/metadata-filter-selector.tsx index 7183e685f4..4bc0f868e8 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-filter/metadata-filter-selector.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/metadata/metadata-filter/metadata-filter-selector.tsx @@ -38,6 +38,11 @@ const MetadataFilterSelector = ({ value: t('workflow.nodes.knowledgeRetrieval.metadata.options.manual.title'), desc: t('workflow.nodes.knowledgeRetrieval.metadata.options.manual.subTitle'), }, + { + key: MetadataFilteringModeEnum.complexConditions, + value: t('workflow.nodes.knowledgeRetrieval.metadata.options.complexConditions.title'), + desc: t('workflow.nodes.knowledgeRetrieval.metadata.options.complexConditions.subTitle'), + }, ] const selectedOption = options.find(option => option.key === value)! diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts index 1cae4ecd3b..b19ddaddfb 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts @@ -74,6 +74,7 @@ export enum MetadataFilteringModeEnum { disabled = 'disabled', automatic = 'automatic', manual = 'manual', + complexConditions = 'complex_conditions', } export enum MetadataFilteringVariableType { diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts index 939229caeb..3041e94bcc 100644 --- a/web/i18n/en-US/workflow.ts +++ b/web/i18n/en-US/workflow.ts @@ -448,6 +448,10 @@ const translation = { title: 'Manual', subTitle: 'Manually add metadata filtering conditions', }, + complexConditions: { + title: 'complexConditions', + subTitle: 'Manually add metadata filtering complex conditions', + }, }, panel: { title: 'Metadata Filter Conditions', diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts index 26d847f306..03abd7ae32 100644 --- a/web/i18n/zh-Hans/workflow.ts +++ b/web/i18n/zh-Hans/workflow.ts @@ -449,6 +449,10 @@ const translation = { title: '手动', subTitle: '手动添加元数据过滤条件', }, + complexConditions: { + title: '手动多重条件', + subTitle: '手动添加元数据多重过滤条件', + }, }, panel: { title: '元数据过滤条件',