|
|
|
@ -405,6 +405,9 @@ class RetrievalService:
|
|
|
|
record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks") # type: ignore
|
|
|
|
record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks") # type: ignore
|
|
|
|
record["score"] = segment_child_map[record["segment"].id]["max_score"]
|
|
|
|
record["score"] = segment_child_map[record["segment"].id]["max_score"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 高分片段,自动拼接相近的片段
|
|
|
|
|
|
|
|
cls.append_next_segments(records=records,dataset_documents=dataset_documents)
|
|
|
|
|
|
|
|
|
|
|
|
result = []
|
|
|
|
result = []
|
|
|
|
for record in records:
|
|
|
|
for record in records:
|
|
|
|
# Extract segment
|
|
|
|
# Extract segment
|
|
|
|
@ -422,8 +425,6 @@ class RetrievalService:
|
|
|
|
if score_value is not None and isinstance(score_value, int | float | str)
|
|
|
|
if score_value is not None and isinstance(score_value, int | float | str)
|
|
|
|
else None
|
|
|
|
else None
|
|
|
|
)
|
|
|
|
)
|
|
|
|
cls.append_next_segments(records=records,dataset_documents=dataset_documents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Create RetrievalSegments object
|
|
|
|
# Create RetrievalSegments object
|
|
|
|
retrieval_segment = RetrievalSegments(segment=segment, child_chunks=child_chunks, score=score)
|
|
|
|
retrieval_segment = RetrievalSegments(segment=segment, child_chunks=child_chunks, score=score)
|
|
|
|
result.append(retrieval_segment)
|
|
|
|
result.append(retrieval_segment)
|
|
|
|
@ -435,7 +436,6 @@ class RetrievalService:
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@classmethod
|
|
|
|
def append_next_segments(cls, records: list[dict], dataset_documents : dict):
|
|
|
|
def append_next_segments(cls, records: list[dict], dataset_documents : dict):
|
|
|
|
# import pdb; pdb.set_trace()
|
|
|
|
|
|
|
|
def filter_record(record):
|
|
|
|
def filter_record(record):
|
|
|
|
document_id = record["segment"].document_id
|
|
|
|
document_id = record["segment"].document_id
|
|
|
|
if document_id in dataset_documents:
|
|
|
|
if document_id in dataset_documents:
|
|
|
|
@ -521,4 +521,4 @@ class RetrievalService:
|
|
|
|
if document_segment.position == this_positions + 1:
|
|
|
|
if document_segment.position == this_positions + 1:
|
|
|
|
next_segment = document_segment
|
|
|
|
next_segment = document_segment
|
|
|
|
break
|
|
|
|
break
|
|
|
|
return next_segment
|
|
|
|
return next_segment
|
|
|
|
|