【Dify】 增加全文检索的接口- 查询文件ID - 完善

pull/22121/head
liuchangsheng@wisdomidata.com 11 months ago
parent 559397ffee
commit 53964c60c1

@ -95,16 +95,18 @@ class FullSearchTextApi(Resource):
help='List of names' help='List of names'
) )
parser.add_argument("query_text", type=str, required=True, location="json") parser.add_argument("query_text", type=str, required=True, location="json")
parser.add_argument("file_ids", type=str, required=True, location="json")
args = parser.parse_args() args = parser.parse_args()
dataset_names = args.dataset_names dataset_names = args.dataset_names
query_text = args.query_text query_text = args.query_text
file_ids = args.file_ids
current_user = flask_login.current_user current_user = flask_login.current_user
tenant = current_user.current_tenant tenant = current_user.current_tenant
search_datas = DocumentExtService.get_full_search_data( search_datas = DocumentExtService.get_full_search_data(
dataset_names=dataset_names, dataset_names=dataset_names,
tenant_id=tenant.id, tenant_id=tenant.id,
query_text=query_text query_text=query_text,
file_ids=file_ids
) )
return search_datas return search_datas

@ -202,7 +202,13 @@ class DocumentExtService:
return next_segment return next_segment
def get_full_search_data(dataset_names: list[str], tenant_id : str, query_text: str): def get_full_search_data(dataset_names: list[str],
tenant_id : str,
query_text: str,
file_ids: str) -> list[dict]:
if not file_ids:
return []
datasets = db.session.query(Dataset).filter(Dataset.name.in_(dataset_names),Dataset.tenant_id == tenant_id).all() datasets = db.session.query(Dataset).filter(Dataset.name.in_(dataset_names),Dataset.tenant_id == tenant_id).all()
dataset_ids = [dataset.id for dataset in datasets] dataset_ids = [dataset.id for dataset in datasets]
@ -214,11 +220,12 @@ class DocumentExtService:
search_data = { search_data = {
"title": segment.document_name, "title": segment.document_name,
"content": segment.segment_content, "content": segment.segment_content,
"doc_metadata": segment.metadata, "doc_metadata": segment.doc_metadata,
"query": query_text "query": query_text
} }
search_datas.append(search_data) search_datas.append(search_data)
search_datas = DocumentExtService.filter_by_file_ids(search_datas=search_datas, file_ids=file_ids)
return search_datas return search_datas
def get_full_search_segments(dataset_ids: list[str], query_text: str): def get_full_search_segments(dataset_ids: list[str], query_text: str):
@ -271,3 +278,10 @@ class DocumentExtService:
else: else:
fetch_segments.append(segment_list[1]) fetch_segments.append(segment_list[1])
return fetch_segments return fetch_segments
def filter_by_file_ids(search_datas: list[dict], file_ids: str) -> list[dict]:
file_id_list = file_ids.split(",")
return [
item for item in search_datas
if item.get("doc_metadata", {}).get("file_id") in file_ids
]

Loading…
Cancel
Save