|
|
|
@ -22,7 +22,9 @@ class VectorService:
|
|
|
|
|
|
|
|
|
|
|
|
for segment in segments:
|
|
|
|
for segment in segments:
|
|
|
|
if doc_form == IndexType.PARENT_CHILD_INDEX:
|
|
|
|
if doc_form == IndexType.PARENT_CHILD_INDEX:
|
|
|
|
document = DatasetDocument.query.filter_by(id=segment.document_id).first()
|
|
|
|
document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first()
|
|
|
|
|
|
|
|
if not document:
|
|
|
|
|
|
|
|
continue
|
|
|
|
# get the process rule
|
|
|
|
# get the process rule
|
|
|
|
processing_rule = (
|
|
|
|
processing_rule = (
|
|
|
|
db.session.query(DatasetProcessRule)
|
|
|
|
db.session.query(DatasetProcessRule)
|
|
|
|
@ -52,7 +54,7 @@ class VectorService:
|
|
|
|
raise ValueError("The knowledge base index technique is not high quality!")
|
|
|
|
raise ValueError("The knowledge base index technique is not high quality!")
|
|
|
|
cls.generate_child_chunks(segment, document, dataset, embedding_model_instance, processing_rule, False)
|
|
|
|
cls.generate_child_chunks(segment, document, dataset, embedding_model_instance, processing_rule, False)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
document = Document(
|
|
|
|
document = Document( # type: ignore
|
|
|
|
page_content=segment.content,
|
|
|
|
page_content=segment.content,
|
|
|
|
metadata={
|
|
|
|
metadata={
|
|
|
|
"doc_id": segment.index_node_id,
|
|
|
|
"doc_id": segment.index_node_id,
|
|
|
|
@ -64,7 +66,7 @@ class VectorService:
|
|
|
|
documents.append(document)
|
|
|
|
documents.append(document)
|
|
|
|
if len(documents) > 0:
|
|
|
|
if len(documents) > 0:
|
|
|
|
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
|
|
|
|
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
|
|
|
|
index_processor.load(dataset, documents, with_keywords=True, keywords_list=keywords_list)
|
|
|
|
index_processor.load(dataset, documents, with_keywords=True, keywords_list=keywords_list) # type: ignore
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@classmethod
|
|
|
|
def update_segment_vector(cls, keywords: Optional[list[str]], segment: DocumentSegment, dataset: Dataset):
|
|
|
|
def update_segment_vector(cls, keywords: Optional[list[str]], segment: DocumentSegment, dataset: Dataset):
|
|
|
|
|