|
|
|
|
@ -7,6 +7,7 @@ import time
|
|
|
|
|
import uuid
|
|
|
|
|
from typing import Optional, List, cast
|
|
|
|
|
|
|
|
|
|
from flask import current_app, Flask
|
|
|
|
|
from flask_login import current_user
|
|
|
|
|
from langchain.schema import Document
|
|
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
|
|
|
|
|
@ -522,7 +523,8 @@ class IndexingRunner:
|
|
|
|
|
sub_documents = all_documents[i:i + 10]
|
|
|
|
|
for doc in sub_documents:
|
|
|
|
|
document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={
|
|
|
|
|
'tenant_id': tenant_id, 'document_node': doc, 'all_qa_documents': all_qa_documents})
|
|
|
|
|
'flask_app': current_app._get_current_object(), 'tenant_id': tenant_id, 'document_node': doc,
|
|
|
|
|
'all_qa_documents': all_qa_documents})
|
|
|
|
|
threads.append(document_format_thread)
|
|
|
|
|
document_format_thread.start()
|
|
|
|
|
for thread in threads:
|
|
|
|
|
@ -530,10 +532,11 @@ class IndexingRunner:
|
|
|
|
|
return all_qa_documents
|
|
|
|
|
return all_documents
|
|
|
|
|
|
|
|
|
|
def format_qa_document(self, tenant_id: str, document_node, all_qa_documents):
|
|
|
|
|
def format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents):
|
|
|
|
|
format_documents = []
|
|
|
|
|
if document_node.page_content is None or not document_node.page_content.strip():
|
|
|
|
|
return
|
|
|
|
|
with flask_app.app_context():
|
|
|
|
|
try:
|
|
|
|
|
# qa model document
|
|
|
|
|
response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content)
|
|
|
|
|
|