From af570b1e37a0f8382fd7878137e8ed0f3530930b Mon Sep 17 00:00:00 2001 From: "liuchangsheng@wisdomidata.com" Date: Mon, 16 Jun 2025 10:14:15 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Dify=E3=80=91=20=E5=8E=BB=E6=8E=89?= =?UTF-8?q?=E6=89=93=E5=8D=B0=E8=AF=AD=E5=8F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/extensions/utils/search_tool.py | 3 --- api/services/ext/dataset_ext_service.py | 4 ---- 2 files changed, 7 deletions(-) diff --git a/api/extensions/utils/search_tool.py b/api/extensions/utils/search_tool.py index 8ace63f483..0ed73b67c0 100644 --- a/api/extensions/utils/search_tool.py +++ b/api/extensions/utils/search_tool.py @@ -64,7 +64,6 @@ def get_text_index_score(text_indexs: list[TextIndex],search_texts: list[str]): return 100 - deduct_points def get_full_search_text_max_score(search_texts: list[str], target_text: str) -> (int, list[TextIndex]): - import pdb; pdb.set_trace() # 1. 建立 source 中每个字符的索引映射 # pos_map = defaultdict(list) text_index_groups:list[list[TextIndex]] = [] @@ -74,7 +73,6 @@ def get_full_search_text_max_score(search_texts: list[str], target_text: str) -> # pos_map[search_text].extend(text_indexs) text_index_groups.append(text_indexs) - import pdb; pdb.set_trace() # groups:list[list[TextIndex]] = [] max_score = -100000 max_index_list:list[TextIndex] @@ -105,7 +103,6 @@ def get_full_search_text_max_score(search_texts: list[str], target_text: str) -> # texts.append(text) # print("--------------------------") # print("".join(texts)) - import pdb; pdb.set_trace() return (max_score,max_index_list) if __name__ == "__main__": diff --git a/api/services/ext/dataset_ext_service.py b/api/services/ext/dataset_ext_service.py index 4e7f412d67..a322e1345a 100644 --- a/api/services/ext/dataset_ext_service.py +++ b/api/services/ext/dataset_ext_service.py @@ -226,7 +226,6 @@ class DocumentExtService: tenant_id : str, query_text: str, file_ids: str) -> list[dict]: - import pdb; pdb.set_trace() if not file_ids: return [] @@ -243,7 +242,6 @@ class DocumentExtService: segments_rows = DocumentExtService.filter_rows_by_file_ids(segments_rows, file_ids) # 过滤文件ID document_rows = DocumentExtService.filter_rows_by_file_ids(document_rows, file_ids) - import pdb; pdb.set_trace() # 计算分值高的数据 segment_datas = DocumentExtService.get_full_search_segments_by_score( keywords=keywords, @@ -307,7 +305,6 @@ class DocumentExtService: def get_keywords(query_text: str) -> Keywords: # 分词器分词关键词 keyword_texts = list(jieba.cut(query_text)) - import pdb; pdb.set_trace() # 判断关键词的长度 jieba.analyse.set_stop_words("services/ext/stopwords.txt") # def get_text(): @@ -423,7 +420,6 @@ class DocumentExtService: # 按照分值排序 max_score_segments = sorted(max_score_segments, key=lambda x: x['score'], reverse=True) - import pdb; pdb.set_trace() return max_score_segments def filter_rows_by_file_ids(search_datas: list[Row], file_ids: str) -> list[Row]: