add re_segment document param check

pull/114/head
Jyong 3 years ago
parent a41495703e
commit ffa8e4ccd1

@ -207,8 +207,8 @@ class DatasetDocumentListApi(Resource):
parser = reqparse.RequestParser() parser = reqparse.RequestParser()
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False,
location='json') location='json')
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json') parser.add_argument('data_source', type=dict, required=False, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json') parser.add_argument('process_rule', type=dict, required=False, location='json')
parser.add_argument('duplicate', type=bool, nullable=False, location='json') parser.add_argument('duplicate', type=bool, nullable=False, location='json')
parser.add_argument('original_document_id', type=str, required=False, location='json') parser.add_argument('original_document_id', type=str, required=False, location='json')
args = parser.parse_args() args = parser.parse_args()
@ -245,8 +245,8 @@ class DatasetInitApi(Resource):
parser = reqparse.RequestParser() parser = reqparse.RequestParser()
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, required=True, parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, required=True,
nullable=False, location='json') nullable=False, location='json')
parser.add_argument('data_source', type=dict, required=False, location='json') parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=False, location='json') parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
args = parser.parse_args() args = parser.parse_args()
# validate args # validate args

@ -18,8 +18,8 @@ from services.errors.account import NoPermissionError
from services.errors.dataset import DatasetNameDuplicateError from services.errors.dataset import DatasetNameDuplicateError
from services.errors.document import DocumentIndexingError from services.errors.document import DocumentIndexingError
from services.errors.file import FileNotExistsError from services.errors.file import FileNotExistsError
from tasks import document_indexing_update_task
from tasks.document_indexing_task import document_indexing_task from tasks.document_indexing_task import document_indexing_task
from tasks.document_indexing_update_task import document_indexing_update_task
class DatasetService: class DatasetService:
@ -358,8 +358,9 @@ class DocumentService:
if dataset.indexing_technique == 'high_quality': if dataset.indexing_technique == 'high_quality':
IndexBuilder.get_default_service_context(dataset.tenant_id) IndexBuilder.get_default_service_context(dataset.tenant_id)
if document_data["original_document_id"]: if 'original_document_id' in document_data and document_data["original_document_id"]:
DocumentService.update_document_with_dataset_id(dataset, document_data, account) document = DocumentService.update_document_with_dataset_id(dataset, document_data, account)
else:
# save process rule # save process rule
if not dataset_process_rule: if not dataset_process_rule:
process_rule = document_data["process_rule"] process_rule = document_data["process_rule"]
@ -419,7 +420,6 @@ class DocumentService:
# trigger async task # trigger async task
document_indexing_task.delay(document.dataset_id, document.id) document_indexing_task.delay(document.dataset_id, document.id)
return document return document
@staticmethod @staticmethod
@ -430,7 +430,7 @@ class DocumentService:
if document.display_status != 'available': if document.display_status != 'available':
raise ValueError("Document is not available") raise ValueError("Document is not available")
# save process rule # save process rule
if 'process_rule' in document_data or document_data['process_rule']: if 'process_rule' in document_data and document_data['process_rule']:
process_rule = document_data["process_rule"] process_rule = document_data["process_rule"]
if process_rule["mode"] == "custom": if process_rule["mode"] == "custom":
dataset_process_rule = DatasetProcessRule( dataset_process_rule = DatasetProcessRule(
@ -450,7 +450,7 @@ class DocumentService:
db.session.commit() db.session.commit()
document.dataset_process_rule_id = dataset_process_rule.id document.dataset_process_rule_id = dataset_process_rule.id
# update document data source # update document data source
if 'data_source' in document_data or document_data['data_source']: if 'data_source' in document_data and document_data['data_source']:
file_name = '' file_name = ''
data_source_info = {} data_source_info = {}
if document_data["data_source"]["type"] == "upload_file": if document_data["data_source"]["type"] == "upload_file":
@ -517,13 +517,13 @@ class DocumentService:
DocumentService.data_source_args_validate(args) DocumentService.data_source_args_validate(args)
DocumentService.process_rule_args_validate(args) DocumentService.process_rule_args_validate(args)
else: else:
if ('data_source' not in args or not args['data_source']) and ( if ('data_source' not in args and not args['data_source'])\
'process_rule' not in args or not args['process_rule']): and ('process_rule' not in args and not args['process_rule']):
raise ValueError("Data source or Process rule is required") raise ValueError("Data source or Process rule is required")
else: else:
if 'data_source' in args or args['data_source']: if 'data_source' in args and args['data_source']:
DocumentService.data_source_args_validate(args) DocumentService.data_source_args_validate(args)
elif 'process_rule' in args or args['process_rule']: if 'process_rule' in args and args['process_rule']:
DocumentService.process_rule_args_validate(args) DocumentService.process_rule_args_validate(args)
@classmethod @classmethod

@ -17,7 +17,7 @@ from models.dataset import Document, Dataset, DocumentSegment
@shared_task @shared_task
def document_indexing_update_task(dataset_id: str, document_id: str): def document_indexing_update_task(dataset_id: str, document_id: str):
""" """
Async process document Async update document
:param dataset_id: :param dataset_id:
:param document_id: :param document_id:
@ -65,7 +65,6 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
click.style('Cleaned document when document update data source or process rule: {} latency: {}'.format(document_id, end_at - start_at), fg='green')) click.style('Cleaned document when document update data source or process rule: {} latency: {}'.format(document_id, end_at - start_at), fg='green'))
except Exception: except Exception:
logging.exception("Cleaned document when document update data source or process rule failed") logging.exception("Cleaned document when document update data source or process rule failed")
# start document re_segment
try: try:
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
indexing_runner.run(document) indexing_runner.run(document)

Loading…
Cancel
Save