add external knowledge
parent
bbb6fcc4f0
commit
517cdb2ca4
@ -0,0 +1,213 @@
|
|||||||
|
import flask_restful
|
||||||
|
from flask import request
|
||||||
|
from flask_login import current_user
|
||||||
|
from flask_restful import Resource, marshal, marshal_with, reqparse
|
||||||
|
from werkzeug.exceptions import Forbidden, NotFound
|
||||||
|
|
||||||
|
import services
|
||||||
|
from configs import dify_config
|
||||||
|
from controllers.console import api
|
||||||
|
from controllers.console.apikey import api_key_fields, api_key_list
|
||||||
|
from controllers.console.app.error import ProviderNotInitializeError
|
||||||
|
from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError
|
||||||
|
from controllers.console.setup import setup_required
|
||||||
|
from controllers.console.wraps import account_initialization_required
|
||||||
|
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
|
from core.indexing_runner import IndexingRunner
|
||||||
|
from core.model_runtime.entities.model_entities import ModelType
|
||||||
|
from core.provider_manager import ProviderManager
|
||||||
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
|
from extensions.ext_database import db
|
||||||
|
from fields.app_fields import related_app_list
|
||||||
|
from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fields
|
||||||
|
from fields.document_fields import document_status_fields
|
||||||
|
from libs.login import login_required
|
||||||
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
|
from models.model import ApiToken, UploadFile
|
||||||
|
from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService
|
||||||
|
from services.external_knowledge_service import ExternalDatasetService
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_name(name):
|
||||||
|
if not name or len(name) < 1 or len(name) > 100:
|
||||||
|
raise ValueError('Name must be between 1 to 100 characters.')
|
||||||
|
return name
|
||||||
|
|
||||||
|
def _validate_description_length(description):
|
||||||
|
if len(description) > 400:
|
||||||
|
raise ValueError('Description cannot exceed 400 characters.')
|
||||||
|
return description
|
||||||
|
|
||||||
|
class ExternalApiTemplateListApi(Resource):
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def get(self):
|
||||||
|
page = request.args.get('page', default=1, type=int)
|
||||||
|
limit = request.args.get('limit', default=20, type=int)
|
||||||
|
search = request.args.get('keyword', default=None, type=str)
|
||||||
|
|
||||||
|
api_templates, total = ExternalDatasetService.get_external_api_templates(
|
||||||
|
page,
|
||||||
|
limit,
|
||||||
|
current_user.current_tenant_id,
|
||||||
|
search
|
||||||
|
)
|
||||||
|
response = {
|
||||||
|
'data': [item.to_dict() for item in api_templates],
|
||||||
|
'has_more': len(api_templates) == limit,
|
||||||
|
'limit': limit,
|
||||||
|
'total': total,
|
||||||
|
'page': page
|
||||||
|
}
|
||||||
|
return response, 200
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def post(self):
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('name', nullable=False, required=True,
|
||||||
|
help='type is required. Name must be between 1 to 100 characters.',
|
||||||
|
type=_validate_name)
|
||||||
|
parser.add_argument('settings', type=list, location='json',
|
||||||
|
nullable=False,
|
||||||
|
required=True, )
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
ExternalDatasetService.validate_api_list(args['settings'])
|
||||||
|
|
||||||
|
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
|
||||||
|
if not current_user.is_dataset_editor:
|
||||||
|
raise Forbidden()
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_template = ExternalDatasetService.create_api_template(
|
||||||
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
user_id=current_user.id,
|
||||||
|
args=args
|
||||||
|
)
|
||||||
|
except services.errors.dataset.DatasetNameDuplicateError:
|
||||||
|
raise DatasetNameDuplicateError()
|
||||||
|
|
||||||
|
return api_template.to_dict(), 201
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalApiTemplateApi(Resource):
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def get(self, api_template_id):
|
||||||
|
api_template_id = str(api_template_id)
|
||||||
|
api_template = ExternalDatasetService.get_api_template(api_template_id)
|
||||||
|
if api_template is None:
|
||||||
|
raise NotFound("API template not found.")
|
||||||
|
|
||||||
|
return api_template.to_dict(), 200
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def patch(self, api_template_id):
|
||||||
|
api_template_id = str(api_template_id)
|
||||||
|
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('name', nullable=False, required=True,
|
||||||
|
help='type is required. Name must be between 1 to 100 characters.',
|
||||||
|
type=_validate_name)
|
||||||
|
parser.add_argument('settings', type=list, location='json',
|
||||||
|
nullable=False,
|
||||||
|
required=True, )
|
||||||
|
args = parser.parse_args()
|
||||||
|
ExternalDatasetService.validate_api_list(args['settings'])
|
||||||
|
|
||||||
|
api_template = ExternalDatasetService.update_api_template(
|
||||||
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
user_id=current_user.id,
|
||||||
|
api_template_id=api_template_id,
|
||||||
|
args=args
|
||||||
|
)
|
||||||
|
|
||||||
|
return api_template.to_dict(), 200
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def delete(self, api_template_id):
|
||||||
|
api_template_id = str(api_template_id)
|
||||||
|
|
||||||
|
# The role of the current user in the ta table must be admin, owner, or editor
|
||||||
|
if not current_user.is_editor or current_user.is_dataset_operator:
|
||||||
|
raise Forbidden()
|
||||||
|
|
||||||
|
ExternalDatasetService.delete_api_template(current_user.current_tenant_id, api_template_id)
|
||||||
|
return {'result': 'success'}, 204
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalApiUseCheckApi(Resource):
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def get(self, api_template_id):
|
||||||
|
api_template_id = str(api_template_id)
|
||||||
|
|
||||||
|
external_api_template_is_using = ExternalDatasetService.external_api_template_use_check(api_template_id)
|
||||||
|
return {'is_using': external_api_template_is_using}, 200
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalDatasetInitApi(Resource):
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def post(self):
|
||||||
|
# The role of the current user in the ta table must be admin, owner, or editor
|
||||||
|
if not current_user.is_editor:
|
||||||
|
raise Forbidden()
|
||||||
|
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('api_template_id', type=str, required=True, nullable=True, location='json')
|
||||||
|
parser.add_argument('name', nullable=False, required=True,
|
||||||
|
help='name is required. Name must be between 1 to 100 characters.',
|
||||||
|
type=_validate_name)
|
||||||
|
parser.add_argument('description', type=str, required=True, nullable=True, location='json')
|
||||||
|
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
|
||||||
|
parser.add_argument('process_parameter', type=dict, required=True, nullable=True, location='json')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
|
||||||
|
if not current_user.is_dataset_editor:
|
||||||
|
raise Forbidden()
|
||||||
|
|
||||||
|
# validate args
|
||||||
|
ExternalDatasetService.document_create_args_validate(
|
||||||
|
current_user.current_tenant_id,
|
||||||
|
args['api_template_id'],
|
||||||
|
args['process_parameter']
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
dataset, documents, batch = ExternalDatasetService.init_external_dataset(
|
||||||
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
user_id=current_user.id,
|
||||||
|
args=args,
|
||||||
|
)
|
||||||
|
except Exception as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
response = {
|
||||||
|
'dataset': dataset,
|
||||||
|
'documents': documents,
|
||||||
|
'batch': batch
|
||||||
|
}
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
api.add_resource(ExternalApiTemplateListApi, '/datasets/external-api-template')
|
||||||
|
api.add_resource(ExternalApiTemplateApi, '/datasets/external-api-template/<uuid:api_template_id>')
|
||||||
|
api.add_resource(ExternalApiUseCheckApi, '/datasets/external-api-template/<uuid:api_template_id>/use-check')
|
||||||
|
|
||||||
@ -0,0 +1,11 @@
|
|||||||
|
from flask_restful import fields
|
||||||
|
|
||||||
|
from libs.helper import TimestampField
|
||||||
|
|
||||||
|
api_template_query_detail_fields = {
|
||||||
|
"id": fields.String,
|
||||||
|
"name": fields.String,
|
||||||
|
"setting": fields.String,
|
||||||
|
"created_by": fields.String,
|
||||||
|
"created_at": TimestampField,
|
||||||
|
}
|
||||||
@ -0,0 +1,176 @@
|
|||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from extensions.ext_database import db
|
||||||
|
from extensions.ext_redis import redis_client
|
||||||
|
from libs import helper
|
||||||
|
from models.account import Account, TenantAccountRole
|
||||||
|
from models.dataset import (
|
||||||
|
AppDatasetJoin,
|
||||||
|
Dataset,
|
||||||
|
DatasetCollectionBinding,
|
||||||
|
DatasetPermission,
|
||||||
|
DatasetProcessRule,
|
||||||
|
DatasetQuery,
|
||||||
|
Document,
|
||||||
|
DocumentSegment, ExternalApiTemplates, ExternalKnowledgeBindings,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalDatasetService:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_external_api_templates(page, per_page, tenant_id, search=None) -> tuple[list[ExternalApiTemplates], int]:
|
||||||
|
query = ExternalApiTemplates.query.filter(ExternalApiTemplates.tenant_id == tenant_id).order_by(
|
||||||
|
ExternalApiTemplates.created_at.desc()
|
||||||
|
)
|
||||||
|
if search:
|
||||||
|
query = query.filter(ExternalApiTemplates.name.ilike(f'%{search}%'))
|
||||||
|
|
||||||
|
api_templates = query.paginate(
|
||||||
|
page=page,
|
||||||
|
per_page=per_page,
|
||||||
|
max_per_page=100,
|
||||||
|
error_out=False
|
||||||
|
)
|
||||||
|
|
||||||
|
return api_templates.items, api_templates.total
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate_api_list(cls, api_settings: list[dict]):
|
||||||
|
if not api_settings:
|
||||||
|
raise ValueError('api list is empty')
|
||||||
|
for api_settings_dict in api_settings:
|
||||||
|
if not api_settings_dict.get('method'):
|
||||||
|
raise ValueError('api name is required')
|
||||||
|
|
||||||
|
if not api_settings_dict.get('url'):
|
||||||
|
raise ValueError('api url is required')
|
||||||
|
|
||||||
|
if api_settings_dict.get('authorization'):
|
||||||
|
if not api_settings_dict.get('authorization').get('type'):
|
||||||
|
raise ValueError('authorization type is required')
|
||||||
|
if api_settings_dict.get('authorization').get('type') == 'bearer':
|
||||||
|
if not api_settings_dict.get('authorization').get('api_key'):
|
||||||
|
raise ValueError('authorization token is required')
|
||||||
|
if api_settings_dict.get('authorization').get('type') == 'custom':
|
||||||
|
if not api_settings_dict.get('authorization').get('header'):
|
||||||
|
raise ValueError('authorization header is required')
|
||||||
|
|
||||||
|
if api_settings_dict.get('method') in ['create', 'update']:
|
||||||
|
if not api_settings_dict.get('callback_setting'):
|
||||||
|
raise ValueError('callback_setting is required for create and update method')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_api_template(tenant_id: str, user_id: str, args: dict) -> ExternalApiTemplates:
|
||||||
|
api_template = ExternalApiTemplates(
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
created_by=user_id,
|
||||||
|
updated_by=user_id,
|
||||||
|
name=args.get('name'),
|
||||||
|
settings=json.dumps(args.get('settings'), ensure_ascii=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
db.session.add(api_template)
|
||||||
|
db.session.commit()
|
||||||
|
return api_template
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_api_template(api_template_id: str) -> ExternalApiTemplates:
|
||||||
|
return ExternalApiTemplates.query.filter_by(
|
||||||
|
id=api_template_id
|
||||||
|
).first()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def update_api_template(tenant_id, user_id, api_template_id, args) -> ExternalApiTemplates:
|
||||||
|
api_template = ExternalApiTemplates.query.filter_by(
|
||||||
|
id=api_template_id,
|
||||||
|
tenant_id=tenant_id
|
||||||
|
).first()
|
||||||
|
if api_template is None:
|
||||||
|
raise ValueError('api template not found')
|
||||||
|
|
||||||
|
api_template.name = args.get('name')
|
||||||
|
api_template.settings = json.dumps(args.get('settings'), ensure_ascii=False)
|
||||||
|
api_template.updated_by = user_id
|
||||||
|
api_template.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
return api_template
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def delete_api_template(tenant_id: str, api_template_id: str):
|
||||||
|
api_template = ExternalApiTemplates.query.filter_by(
|
||||||
|
id=api_template_id,
|
||||||
|
tenant_id=tenant_id
|
||||||
|
).first()
|
||||||
|
if api_template is None:
|
||||||
|
raise ValueError('api template not found')
|
||||||
|
|
||||||
|
db.session.delete(api_template)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def external_api_template_use_check(api_template_id: str) -> bool:
|
||||||
|
count = ExternalKnowledgeBindings.query.filter_by(
|
||||||
|
external_api_template_id=api_template_id
|
||||||
|
).count()
|
||||||
|
if count > 0:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def document_create_args_validate(tenant_id: str, api_template_id: str, process_parameter: dict):
|
||||||
|
api_template = ExternalApiTemplates.query.filter_by(
|
||||||
|
id=api_template_id,
|
||||||
|
tenant_id=tenant_id
|
||||||
|
).first()
|
||||||
|
if api_template is None:
|
||||||
|
raise ValueError('api template not found')
|
||||||
|
settings = json.loads(api_template.settings)
|
||||||
|
for settings in settings:
|
||||||
|
if settings.get('method') == 'create':
|
||||||
|
parameters = settings.get('parameters')
|
||||||
|
for parameter in parameters:
|
||||||
|
if parameter.get('required') and not process_parameter.get(parameter.get('name')):
|
||||||
|
raise ValueError(f'{parameter.get("name")} is required')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def init_external_dataset(tenant_id: str, user_id: str, args: dict):
|
||||||
|
api_template_id = args.get('api_template_id')
|
||||||
|
data_source = args.get('data_source')
|
||||||
|
process_parameter = args.get('process_parameter')
|
||||||
|
api_template = ExternalApiTemplates.query.filter_by(
|
||||||
|
id=api_template_id,
|
||||||
|
tenant_id=tenant_id
|
||||||
|
).first()
|
||||||
|
if api_template is None:
|
||||||
|
raise ValueError('api template not found')
|
||||||
|
settings = json.loads(api_template.settings)
|
||||||
|
for settings in settings:
|
||||||
|
if settings.get('method') == 'create':
|
||||||
|
|
||||||
|
ExternalDatasetService.process_external_api(api_template_id, data_source, process_parameter)
|
||||||
|
break
|
||||||
|
# save dataset
|
||||||
|
dataset = Dataset(
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
name=args.get('name'),
|
||||||
|
description=args.get('description', ''),
|
||||||
|
provider='external',
|
||||||
|
created_by=user_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
db.session.add(dataset)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
external_document_indexing_task.delay(dataset.id, api_template_id, data_source, process_parameter)
|
||||||
|
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def process_external_api(api_template_id: str, data_source: dict, process_parameter: dict):
|
||||||
|
pass
|
||||||
|
|
||||||
@ -0,0 +1,62 @@
|
|||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
|
import click
|
||||||
|
from celery import shared_task
|
||||||
|
|
||||||
|
from configs import dify_config
|
||||||
|
from core.indexing_runner import DocumentIsPausedException, IndexingRunner
|
||||||
|
from extensions.ext_database import db
|
||||||
|
from models.dataset import Dataset, Document, ExternalApiTemplates
|
||||||
|
from models.model import UploadFile
|
||||||
|
from services.feature_service import FeatureService
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task(queue='dataset')
|
||||||
|
def external_document_indexing_task(dataset_id: str, api_template_id: str, data_source: dict, process_parameter: dict):
|
||||||
|
"""
|
||||||
|
Async process document
|
||||||
|
:param dataset_id:
|
||||||
|
:param api_template_id:
|
||||||
|
:param data_source:
|
||||||
|
:param process_parameter:
|
||||||
|
Usage: external_document_indexing_task.delay(dataset_id, document_id)
|
||||||
|
"""
|
||||||
|
documents = []
|
||||||
|
start_at = time.perf_counter()
|
||||||
|
|
||||||
|
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||||
|
if not dataset:
|
||||||
|
logging.info(click.style('Processed external dataset: {} failed, dataset not exit.'.format(dataset_id), fg='red'))
|
||||||
|
return
|
||||||
|
|
||||||
|
# get external api template
|
||||||
|
api_template = db.session.query(ExternalApiTemplates).filter(
|
||||||
|
ExternalApiTemplates.id == api_template_id,
|
||||||
|
ExternalApiTemplates.tenant_id == dataset.tenant_id
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if not api_template:
|
||||||
|
logging.info(click.style('Processed external dataset: {} failed, api template: {} not exit.'.format(dataset_id, api_template_id), fg='red'))
|
||||||
|
return
|
||||||
|
file_resource = []
|
||||||
|
if data_source["type"] == "upload_file":
|
||||||
|
upload_file_list = data_source["info_list"]['file_info_list']['file_ids']
|
||||||
|
for file_id in upload_file_list:
|
||||||
|
file = db.session.query(UploadFile).filter(
|
||||||
|
UploadFile.tenant_id == dataset.tenant_id,
|
||||||
|
UploadFile.id == file_id
|
||||||
|
).first()
|
||||||
|
if file:
|
||||||
|
file_resource.append(file)
|
||||||
|
try:
|
||||||
|
# assemble headers
|
||||||
|
headers = self._assembling_headers()
|
||||||
|
|
||||||
|
# do http request
|
||||||
|
response = self._do_http_request(headers)
|
||||||
|
except DocumentIsPausedException as ex:
|
||||||
|
logging.info(click.style(str(ex), fg='yellow'))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
Loading…
Reference in New Issue