From d4a09805a31dc3013ecc9ed05fc1143023060c7f Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Fri, 7 Feb 2025 16:08:25 +0800 Subject: [PATCH] improve preview document tokenizer (#13328) --- .../index_processor/processor/parent_child_index_processor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/core/rag/index_processor/processor/parent_child_index_processor.py b/api/core/rag/index_processor/processor/parent_child_index_processor.py index 3140122081..894b85339a 100644 --- a/api/core/rag/index_processor/processor/parent_child_index_processor.py +++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py @@ -47,6 +47,8 @@ class ParentChildIndexProcessor(BaseIndexProcessor): embedding_model_instance=kwargs.get("embedding_model_instance"), ) for document in documents: + if kwargs.get("preview") and len(all_documents) >= 10: + return all_documents # document clean document_text = CleanProcessor.clean(document.page_content, process_rule) document.page_content = document_text