【Dify】 去掉打印语句

pull/22121/head
liuchangsheng@wisdomidata.com 11 months ago
parent 8048e83979
commit af570b1e37

@ -64,7 +64,6 @@ def get_text_index_score(text_indexs: list[TextIndex],search_texts: list[str]):
return 100 - deduct_points return 100 - deduct_points
def get_full_search_text_max_score(search_texts: list[str], target_text: str) -> (int, list[TextIndex]): def get_full_search_text_max_score(search_texts: list[str], target_text: str) -> (int, list[TextIndex]):
import pdb; pdb.set_trace()
# 1. 建立 source 中每个字符的索引映射 # 1. 建立 source 中每个字符的索引映射
# pos_map = defaultdict(list) # pos_map = defaultdict(list)
text_index_groups:list[list[TextIndex]] = [] text_index_groups:list[list[TextIndex]] = []
@ -74,7 +73,6 @@ def get_full_search_text_max_score(search_texts: list[str], target_text: str) ->
# pos_map[search_text].extend(text_indexs) # pos_map[search_text].extend(text_indexs)
text_index_groups.append(text_indexs) text_index_groups.append(text_indexs)
import pdb; pdb.set_trace()
# groups:list[list[TextIndex]] = [] # groups:list[list[TextIndex]] = []
max_score = -100000 max_score = -100000
max_index_list:list[TextIndex] max_index_list:list[TextIndex]
@ -105,7 +103,6 @@ def get_full_search_text_max_score(search_texts: list[str], target_text: str) ->
# texts.append(text) # texts.append(text)
# print("--------------------------") # print("--------------------------")
# print("".join(texts)) # print("".join(texts))
import pdb; pdb.set_trace()
return (max_score,max_index_list) return (max_score,max_index_list)
if __name__ == "__main__": if __name__ == "__main__":

@ -226,7 +226,6 @@ class DocumentExtService:
tenant_id : str, tenant_id : str,
query_text: str, query_text: str,
file_ids: str) -> list[dict]: file_ids: str) -> list[dict]:
import pdb; pdb.set_trace()
if not file_ids: if not file_ids:
return [] return []
@ -243,7 +242,6 @@ class DocumentExtService:
segments_rows = DocumentExtService.filter_rows_by_file_ids(segments_rows, file_ids) segments_rows = DocumentExtService.filter_rows_by_file_ids(segments_rows, file_ids)
# 过滤文件ID # 过滤文件ID
document_rows = DocumentExtService.filter_rows_by_file_ids(document_rows, file_ids) document_rows = DocumentExtService.filter_rows_by_file_ids(document_rows, file_ids)
import pdb; pdb.set_trace()
# 计算分值高的数据 # 计算分值高的数据
segment_datas = DocumentExtService.get_full_search_segments_by_score( segment_datas = DocumentExtService.get_full_search_segments_by_score(
keywords=keywords, keywords=keywords,
@ -307,7 +305,6 @@ class DocumentExtService:
def get_keywords(query_text: str) -> Keywords: def get_keywords(query_text: str) -> Keywords:
# 分词器分词关键词 # 分词器分词关键词
keyword_texts = list(jieba.cut(query_text)) keyword_texts = list(jieba.cut(query_text))
import pdb; pdb.set_trace()
# 判断关键词的长度 # 判断关键词的长度
jieba.analyse.set_stop_words("services/ext/stopwords.txt") jieba.analyse.set_stop_words("services/ext/stopwords.txt")
# def get_text(): # def get_text():
@ -423,7 +420,6 @@ class DocumentExtService:
# 按照分值排序 # 按照分值排序
max_score_segments = sorted(max_score_segments, key=lambda x: x['score'], reverse=True) max_score_segments = sorted(max_score_segments, key=lambda x: x['score'], reverse=True)
import pdb; pdb.set_trace()
return max_score_segments return max_score_segments
def filter_rows_by_file_ids(search_datas: list[Row], file_ids: str) -> list[Row]: def filter_rows_by_file_ids(search_datas: list[Row], file_ids: str) -> list[Row]:

Loading…
Cancel
Save