From e246fd48eeee11cd4783231533826db48c438d81 Mon Sep 17 00:00:00 2001 From: Dongyu Li <544104925@qq.com> Date: Fri, 6 Jun 2025 11:17:45 +0800 Subject: [PATCH] feat(kb_index): When the knowledge base is in the high-quality indexing mode, economical index will not be created. --- .../processor/paragraph_index_processor.py | 2 ++ api/services/vector_service.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py index dca84b9041..9b90bd2bb3 100644 --- a/api/core/rag/index_processor/processor/paragraph_index_processor.py +++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py @@ -76,6 +76,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor): if dataset.indexing_technique == "high_quality": vector = Vector(dataset) vector.create(documents) + with_keywords = False if with_keywords: keywords_list = kwargs.get("keywords_list") keyword = Keyword(dataset) @@ -91,6 +92,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor): vector.delete_by_ids(node_ids) else: vector.delete() + with_keywords = False if with_keywords: keyword = Keyword(dataset) if node_ids: diff --git a/api/services/vector_service.py b/api/services/vector_service.py index 19e37f4ee3..9165139193 100644 --- a/api/services/vector_service.py +++ b/api/services/vector_service.py @@ -97,16 +97,16 @@ class VectorService: vector = Vector(dataset=dataset) vector.delete_by_ids([segment.index_node_id]) vector.add_texts([document], duplicate_check=True) - - # update keyword index - keyword = Keyword(dataset) - keyword.delete_by_ids([segment.index_node_id]) - - # save keyword index - if keywords and len(keywords) > 0: - keyword.add_texts([document], keywords_list=[keywords]) else: - keyword.add_texts([document]) + # update keyword index + keyword = Keyword(dataset) + keyword.delete_by_ids([segment.index_node_id]) + + # save keyword index + if keywords and len(keywords) > 0: + keyword.add_texts([document], keywords_list=[keywords]) + else: + keyword.add_texts([document]) @classmethod def generate_child_chunks(