feat: added dataset recall testing API (#9300)
parent
5c7b1358d4
commit
8501af298f
@ -1,88 +1,24 @@
|
|||||||
import logging
|
from flask_restful import Resource
|
||||||
|
|
||||||
from flask_login import current_user
|
|
||||||
from flask_restful import Resource, marshal, reqparse
|
|
||||||
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
|
|
||||||
|
|
||||||
import services
|
|
||||||
from controllers.console import api
|
from controllers.console import api
|
||||||
from controllers.console.app.error import (
|
from controllers.console.datasets.hit_testing_base import DatasetsHitTestingBase
|
||||||
CompletionRequestError,
|
|
||||||
ProviderModelCurrentlyNotSupportError,
|
|
||||||
ProviderNotInitializeError,
|
|
||||||
ProviderQuotaExceededError,
|
|
||||||
)
|
|
||||||
from controllers.console.datasets.error import DatasetNotInitializedError
|
|
||||||
from controllers.console.setup import setup_required
|
from controllers.console.setup import setup_required
|
||||||
from controllers.console.wraps import account_initialization_required
|
from controllers.console.wraps import account_initialization_required
|
||||||
from core.errors.error import (
|
|
||||||
LLMBadRequestError,
|
|
||||||
ModelCurrentlyNotSupportError,
|
|
||||||
ProviderTokenNotInitError,
|
|
||||||
QuotaExceededError,
|
|
||||||
)
|
|
||||||
from core.model_runtime.errors.invoke import InvokeError
|
|
||||||
from fields.hit_testing_fields import hit_testing_record_fields
|
|
||||||
from libs.login import login_required
|
from libs.login import login_required
|
||||||
from services.dataset_service import DatasetService
|
|
||||||
from services.hit_testing_service import HitTestingService
|
|
||||||
|
|
||||||
|
|
||||||
class HitTestingApi(Resource):
|
class HitTestingApi(Resource, DatasetsHitTestingBase):
|
||||||
@setup_required
|
@setup_required
|
||||||
@login_required
|
@login_required
|
||||||
@account_initialization_required
|
@account_initialization_required
|
||||||
def post(self, dataset_id):
|
def post(self, dataset_id):
|
||||||
dataset_id_str = str(dataset_id)
|
dataset_id_str = str(dataset_id)
|
||||||
|
|
||||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
dataset = self.get_and_validate_dataset(dataset_id_str)
|
||||||
if dataset is None:
|
args = self.parse_args()
|
||||||
raise NotFound("Dataset not found.")
|
self.hit_testing_args_check(args)
|
||||||
|
|
||||||
try:
|
|
||||||
DatasetService.check_dataset_permission(dataset, current_user)
|
|
||||||
except services.errors.account.NoPermissionError as e:
|
|
||||||
raise Forbidden(str(e))
|
|
||||||
|
|
||||||
parser = reqparse.RequestParser()
|
|
||||||
parser.add_argument("query", type=str, location="json")
|
|
||||||
parser.add_argument("retrieval_model", type=dict, required=False, location="json")
|
|
||||||
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
HitTestingService.hit_testing_args_check(args)
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = HitTestingService.retrieve(
|
|
||||||
dataset=dataset,
|
|
||||||
query=args["query"],
|
|
||||||
account=current_user,
|
|
||||||
retrieval_model=args["retrieval_model"],
|
|
||||||
external_retrieval_model=args["external_retrieval_model"],
|
|
||||||
limit=10,
|
|
||||||
)
|
|
||||||
|
|
||||||
return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}
|
return self.perform_hit_testing(dataset, args)
|
||||||
except services.errors.index.IndexNotInitializedError:
|
|
||||||
raise DatasetNotInitializedError()
|
|
||||||
except ProviderTokenNotInitError as ex:
|
|
||||||
raise ProviderNotInitializeError(ex.description)
|
|
||||||
except QuotaExceededError:
|
|
||||||
raise ProviderQuotaExceededError()
|
|
||||||
except ModelCurrentlyNotSupportError:
|
|
||||||
raise ProviderModelCurrentlyNotSupportError()
|
|
||||||
except LLMBadRequestError:
|
|
||||||
raise ProviderNotInitializeError(
|
|
||||||
"No Embedding Model or Reranking Model available. Please configure a valid provider "
|
|
||||||
"in the Settings -> Model Provider."
|
|
||||||
)
|
|
||||||
except InvokeError as e:
|
|
||||||
raise CompletionRequestError(e.description)
|
|
||||||
except ValueError as e:
|
|
||||||
raise ValueError(str(e))
|
|
||||||
except Exception as e:
|
|
||||||
logging.exception("Hit testing failed.")
|
|
||||||
raise InternalServerError(str(e))
|
|
||||||
|
|
||||||
|
|
||||||
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing")
|
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing")
|
||||||
|
|||||||
@ -0,0 +1,85 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from flask_login import current_user
|
||||||
|
from flask_restful import marshal, reqparse
|
||||||
|
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
|
||||||
|
|
||||||
|
import services.dataset_service
|
||||||
|
from controllers.console.app.error import (
|
||||||
|
CompletionRequestError,
|
||||||
|
ProviderModelCurrentlyNotSupportError,
|
||||||
|
ProviderNotInitializeError,
|
||||||
|
ProviderQuotaExceededError,
|
||||||
|
)
|
||||||
|
from controllers.console.datasets.error import DatasetNotInitializedError
|
||||||
|
from core.errors.error import (
|
||||||
|
LLMBadRequestError,
|
||||||
|
ModelCurrentlyNotSupportError,
|
||||||
|
ProviderTokenNotInitError,
|
||||||
|
QuotaExceededError,
|
||||||
|
)
|
||||||
|
from core.model_runtime.errors.invoke import InvokeError
|
||||||
|
from fields.hit_testing_fields import hit_testing_record_fields
|
||||||
|
from services.dataset_service import DatasetService
|
||||||
|
from services.hit_testing_service import HitTestingService
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetsHitTestingBase:
|
||||||
|
@staticmethod
|
||||||
|
def get_and_validate_dataset(dataset_id: str):
|
||||||
|
dataset = DatasetService.get_dataset(dataset_id)
|
||||||
|
if dataset is None:
|
||||||
|
raise NotFound("Dataset not found.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
DatasetService.check_dataset_permission(dataset, current_user)
|
||||||
|
except services.errors.account.NoPermissionError as e:
|
||||||
|
raise Forbidden(str(e))
|
||||||
|
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def hit_testing_args_check(args):
|
||||||
|
HitTestingService.hit_testing_args_check(args)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_args():
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
|
||||||
|
parser.add_argument("query", type=str, location="json")
|
||||||
|
parser.add_argument("retrieval_model", type=dict, required=False, location="json")
|
||||||
|
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def perform_hit_testing(dataset, args):
|
||||||
|
try:
|
||||||
|
response = HitTestingService.retrieve(
|
||||||
|
dataset=dataset,
|
||||||
|
query=args["query"],
|
||||||
|
account=current_user,
|
||||||
|
retrieval_model=args["retrieval_model"],
|
||||||
|
external_retrieval_model=args["external_retrieval_model"],
|
||||||
|
limit=10,
|
||||||
|
)
|
||||||
|
return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}
|
||||||
|
except services.errors.index.IndexNotInitializedError:
|
||||||
|
raise DatasetNotInitializedError()
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
except QuotaExceededError:
|
||||||
|
raise ProviderQuotaExceededError()
|
||||||
|
except ModelCurrentlyNotSupportError:
|
||||||
|
raise ProviderModelCurrentlyNotSupportError()
|
||||||
|
except LLMBadRequestError:
|
||||||
|
raise ProviderNotInitializeError(
|
||||||
|
"No Embedding Model or Reranking Model available. Please configure a valid provider "
|
||||||
|
"in the Settings -> Model Provider."
|
||||||
|
)
|
||||||
|
except InvokeError as e:
|
||||||
|
raise CompletionRequestError(e.description)
|
||||||
|
except ValueError as e:
|
||||||
|
raise ValueError(str(e))
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception("Hit testing failed.")
|
||||||
|
raise InternalServerError(str(e))
|
||||||
@ -0,0 +1,17 @@
|
|||||||
|
from controllers.console.datasets.hit_testing_base import DatasetsHitTestingBase
|
||||||
|
from controllers.service_api import api
|
||||||
|
from controllers.service_api.wraps import DatasetApiResource
|
||||||
|
|
||||||
|
|
||||||
|
class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase):
|
||||||
|
def post(self, tenant_id, dataset_id):
|
||||||
|
dataset_id_str = str(dataset_id)
|
||||||
|
|
||||||
|
dataset = self.get_and_validate_dataset(dataset_id_str)
|
||||||
|
args = self.parse_args()
|
||||||
|
self.hit_testing_args_check(args)
|
||||||
|
|
||||||
|
return self.perform_hit_testing(dataset, args)
|
||||||
|
|
||||||
|
|
||||||
|
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing")
|
||||||
Loading…
Reference in New Issue