feat: remove unused codes
parent
196bfeaaf4
commit
8236373498
Binary file not shown.
@ -1,120 +0,0 @@
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from core.helper.module_import_helper import get_subclasses_from_module, import_module_from_source
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
|
||||
from core.model_runtime.entities.provider_entities import ProviderEntity
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
from core.tools.utils.yaml_utils import load_yaml_file
|
||||
|
||||
|
||||
class ModelProvider(ABC):
|
||||
provider_schema: Optional[ProviderEntity] = None
|
||||
model_instance_map: dict[str, AIModel] = {}
|
||||
|
||||
@abstractmethod
|
||||
def validate_provider_credentials(self, credentials: dict) -> None:
|
||||
"""
|
||||
Validate provider credentials
|
||||
You can choose any validate_credentials method of model type or implement validate method by yourself,
|
||||
such as: get model list api
|
||||
|
||||
if validate failed, raise exception
|
||||
|
||||
:param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_provider_schema(self) -> ProviderEntity:
|
||||
"""
|
||||
Get provider schema
|
||||
|
||||
:return: provider schema
|
||||
"""
|
||||
if self.provider_schema:
|
||||
return self.provider_schema
|
||||
|
||||
# get dirname of the current path
|
||||
provider_name = self.__class__.__module__.split(".")[-1]
|
||||
|
||||
# get the path of the model_provider classes
|
||||
base_path = os.path.abspath(__file__)
|
||||
current_path = os.path.join(os.path.dirname(os.path.dirname(base_path)), provider_name)
|
||||
|
||||
# read provider schema from yaml file
|
||||
yaml_path = os.path.join(current_path, f"{provider_name}.yaml")
|
||||
yaml_data = load_yaml_file(yaml_path)
|
||||
|
||||
try:
|
||||
# yaml_data to entity
|
||||
provider_schema = ProviderEntity(**yaml_data)
|
||||
except Exception as e:
|
||||
raise Exception(f"Invalid provider schema for {provider_name}: {str(e)}")
|
||||
|
||||
# cache schema
|
||||
self.provider_schema = provider_schema
|
||||
|
||||
return provider_schema
|
||||
|
||||
def models(self, model_type: ModelType) -> list[AIModelEntity]:
|
||||
"""
|
||||
Get all models for given model type
|
||||
|
||||
:param model_type: model type defined in `ModelType`
|
||||
:return: list of models
|
||||
"""
|
||||
provider_schema = self.get_provider_schema()
|
||||
if model_type not in provider_schema.supported_model_types:
|
||||
return []
|
||||
|
||||
# get model instance of the model type
|
||||
model_instance = self.get_model_instance(model_type)
|
||||
|
||||
# get predefined models (predefined_models)
|
||||
models = model_instance.predefined_models()
|
||||
|
||||
# return models
|
||||
return models
|
||||
|
||||
def get_model_instance(self, model_type: ModelType) -> AIModel:
|
||||
"""
|
||||
Get model instance
|
||||
|
||||
:param model_type: model type defined in `ModelType`
|
||||
:return:
|
||||
"""
|
||||
# get dirname of the current path
|
||||
provider_name = self.__class__.__module__.split(".")[-1]
|
||||
|
||||
if f"{provider_name}.{model_type.value}" in self.model_instance_map:
|
||||
return self.model_instance_map[f"{provider_name}.{model_type.value}"]
|
||||
|
||||
# get the path of the model type classes
|
||||
base_path = os.path.abspath(__file__)
|
||||
model_type_name = model_type.value.replace("-", "_")
|
||||
model_type_path = os.path.join(os.path.dirname(os.path.dirname(base_path)), provider_name, model_type_name)
|
||||
model_type_py_path = os.path.join(model_type_path, f"{model_type_name}.py")
|
||||
|
||||
if not os.path.isdir(model_type_path) or not os.path.exists(model_type_py_path):
|
||||
raise Exception(f"Invalid model type {model_type} for provider {provider_name}")
|
||||
|
||||
# Dynamic loading {model_type_name}.py file and find the subclass of AIModel
|
||||
parent_module = ".".join(self.__class__.__module__.split(".")[:-1])
|
||||
mod = import_module_from_source(
|
||||
module_name=f"{parent_module}.{model_type_name}.{model_type_name}", py_file_path=model_type_py_path
|
||||
)
|
||||
model_class = next(
|
||||
filter(
|
||||
lambda x: x.__module__ == mod.__name__ and not x.__abstractmethods__,
|
||||
get_subclasses_from_module(mod, AIModel),
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not model_class:
|
||||
raise Exception(f"Missing AIModel Class for model type {model_type} in {model_type_py_path}")
|
||||
|
||||
model_instance_map = model_class()
|
||||
self.model_instance_map[f"{provider_name}.{model_type.value}"] = model_instance_map
|
||||
|
||||
return model_instance_map
|
||||
@ -1,54 +0,0 @@
|
||||
from abc import abstractmethod
|
||||
from typing import IO, Optional
|
||||
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
|
||||
|
||||
class Text2ImageModel(AIModel):
|
||||
"""
|
||||
Model class for text2img model.
|
||||
"""
|
||||
|
||||
model_type: ModelType = ModelType.TEXT2IMG
|
||||
|
||||
# pydantic configs
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
def invoke(
|
||||
self, model: str, credentials: dict, prompt: str, model_parameters: dict, user: Optional[str] = None
|
||||
) -> list[IO[bytes]]:
|
||||
"""
|
||||
Invoke Text2Image model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt: prompt for image generation
|
||||
:param model_parameters: model parameters
|
||||
:param user: unique user id
|
||||
|
||||
:return: image bytes
|
||||
"""
|
||||
try:
|
||||
return self._invoke(model, credentials, prompt, model_parameters, user)
|
||||
except Exception as e:
|
||||
raise self._transform_invoke_error(e)
|
||||
|
||||
@abstractmethod
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, prompt: str, model_parameters: dict, user: Optional[str] = None
|
||||
) -> list[IO[bytes]]:
|
||||
"""
|
||||
Invoke Text2Image model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt: prompt for image generation
|
||||
:param model_parameters: model parameters
|
||||
:param user: unique user id
|
||||
|
||||
:return: image bytes
|
||||
"""
|
||||
raise NotImplementedError
|
||||
@ -1,34 +1,9 @@
|
||||
from os.path import abspath, dirname, join
|
||||
from threading import Lock
|
||||
from typing import Any
|
||||
|
||||
from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer
|
||||
|
||||
_tokenizer = None
|
||||
_lock = Lock()
|
||||
import tiktoken
|
||||
|
||||
|
||||
class GPT2Tokenizer:
|
||||
@staticmethod
|
||||
def _get_num_tokens_by_gpt2(text: str) -> int:
|
||||
"""
|
||||
use gpt2 tokenizer to get num tokens
|
||||
"""
|
||||
_tokenizer = GPT2Tokenizer.get_encoder()
|
||||
tokens = _tokenizer.encode(text, verbose=False)
|
||||
return len(tokens)
|
||||
|
||||
@staticmethod
|
||||
def get_num_tokens(text: str) -> int:
|
||||
return GPT2Tokenizer._get_num_tokens_by_gpt2(text)
|
||||
|
||||
@staticmethod
|
||||
def get_encoder() -> Any:
|
||||
global _tokenizer, _lock
|
||||
with _lock:
|
||||
if _tokenizer is None:
|
||||
base_path = abspath(__file__)
|
||||
gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
|
||||
_tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
|
||||
|
||||
return _tokenizer
|
||||
encoding = tiktoken.encoding_for_model("gpt2")
|
||||
tiktoken_vec = encoding.encode(text)
|
||||
return len(tiktoken_vec)
|
||||
|
||||
@ -1,191 +0,0 @@
|
||||
import base64
|
||||
import copy
|
||||
import time
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import tiktoken
|
||||
from openai import AzureOpenAI
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.azure_openai._common import _CommonAzureOpenAI
|
||||
from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_BASE_MODELS, AzureBaseModel
|
||||
|
||||
|
||||
class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
base_model_name = credentials["base_model_name"]
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = AzureOpenAI(**credentials_kwargs)
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if user:
|
||||
extra_model_kwargs["user"] = user
|
||||
|
||||
extra_model_kwargs["encoding_format"] = "base64"
|
||||
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
embeddings: list[list[float]] = [[] for _ in range(len(texts))]
|
||||
tokens = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
try:
|
||||
enc = tiktoken.encoding_for_model(base_model_name)
|
||||
except KeyError:
|
||||
enc = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
token = enc.encode(text)
|
||||
for j in range(0, len(token), context_size):
|
||||
tokens += [token[j : j + context_size]]
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(tokens), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model, client=client, texts=tokens[i : i + max_chunks], extra_model_kwargs=extra_model_kwargs
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
|
||||
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
|
||||
for i in range(len(indices)):
|
||||
results[indices[i]].append(batched_embeddings[i])
|
||||
num_tokens_in_batch[indices[i]].append(len(tokens[i]))
|
||||
|
||||
for i in range(len(texts)):
|
||||
_result = results[i]
|
||||
if len(_result) == 0:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model, client=client, texts="", extra_model_kwargs=extra_model_kwargs
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
average = embeddings_batch[0]
|
||||
else:
|
||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=base_model_name)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
|
||||
try:
|
||||
enc = tiktoken.encoding_for_model(credentials["base_model_name"])
|
||||
except KeyError:
|
||||
enc = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
total_num_tokens = 0
|
||||
for text in texts:
|
||||
# calculate the number of tokens in the encoded text
|
||||
tokenized_text = enc.encode(text)
|
||||
total_num_tokens += len(tokenized_text)
|
||||
|
||||
return total_num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
if "openai_api_base" not in credentials:
|
||||
raise CredentialsValidateFailedError("Azure OpenAI API Base Endpoint is required")
|
||||
|
||||
if "openai_api_key" not in credentials:
|
||||
raise CredentialsValidateFailedError("Azure OpenAI API key is required")
|
||||
|
||||
if "base_model_name" not in credentials:
|
||||
raise CredentialsValidateFailedError("Base Model Name is required")
|
||||
|
||||
if not self._get_ai_model_entity(credentials["base_model_name"], model):
|
||||
raise CredentialsValidateFailedError(f'Base Model Name {credentials["base_model_name"]} is invalid')
|
||||
|
||||
try:
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = AzureOpenAI(**credentials_kwargs)
|
||||
|
||||
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
|
||||
ai_model_entity = self._get_ai_model_entity(credentials["base_model_name"], model)
|
||||
return ai_model_entity.entity
|
||||
|
||||
@staticmethod
|
||||
def _embedding_invoke(
|
||||
model: str, client: AzureOpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
|
||||
) -> tuple[list[list[float]], int]:
|
||||
response = client.embeddings.create(
|
||||
input=texts,
|
||||
model=model,
|
||||
**extra_model_kwargs,
|
||||
)
|
||||
|
||||
if "encoding_format" in extra_model_kwargs and extra_model_kwargs["encoding_format"] == "base64":
|
||||
# decode base64 embedding
|
||||
return (
|
||||
[list(np.frombuffer(base64.b64decode(data.embedding), dtype="float32")) for data in response.data],
|
||||
response.usage.total_tokens,
|
||||
)
|
||||
|
||||
return [data.embedding for data in response.data], response.usage.total_tokens
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
@staticmethod
|
||||
def _get_ai_model_entity(base_model_name: str, model: str) -> AzureBaseModel:
|
||||
for ai_model_entity in EMBEDDING_BASE_MODELS:
|
||||
if ai_model_entity.base_model_name == base_model_name:
|
||||
ai_model_entity_copy = copy.deepcopy(ai_model_entity)
|
||||
ai_model_entity_copy.entity.model = model
|
||||
ai_model_entity_copy.entity.label.en_US = model
|
||||
ai_model_entity_copy.entity.label.zh_Hans = model
|
||||
return ai_model_entity_copy
|
||||
|
||||
return None
|
||||
@ -1,207 +0,0 @@
|
||||
import time
|
||||
from json import dumps
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import BaichuanTokenizer
|
||||
from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import (
|
||||
BadRequestError,
|
||||
InsufficientAccountBalanceError,
|
||||
InternalServerError,
|
||||
InvalidAPIKeyError,
|
||||
InvalidAuthenticationError,
|
||||
RateLimitReachedError,
|
||||
)
|
||||
|
||||
|
||||
class BaichuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for BaiChuan text embedding model.
|
||||
"""
|
||||
|
||||
api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
if model != "baichuan-text-embedding":
|
||||
raise ValueError("Invalid model name")
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError("api_key is required")
|
||||
|
||||
# split into chunks of batch size 16
|
||||
chunks = []
|
||||
for i in range(0, len(texts), 16):
|
||||
chunks.append(texts[i : i + 16])
|
||||
|
||||
embeddings = []
|
||||
token_usage = 0
|
||||
|
||||
for chunk in chunks:
|
||||
# embedding chunk
|
||||
chunk_embeddings, chunk_usage = self.embedding(model=model, api_key=api_key, texts=chunk, user=user)
|
||||
|
||||
embeddings.extend(chunk_embeddings)
|
||||
token_usage += chunk_usage
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=embeddings,
|
||||
usage=self._calc_response_usage(model=model, credentials=credentials, tokens=token_usage),
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def embedding(
|
||||
self, model: str, api_key, texts: list[str], user: Optional[str] = None
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Embed given texts
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:return: embeddings result
|
||||
"""
|
||||
url = self.api_base
|
||||
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
|
||||
|
||||
data = {"model": "Baichuan-Text-Embedding", "input": texts}
|
||||
|
||||
try:
|
||||
response = post(url, headers=headers, data=dumps(data))
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
try:
|
||||
resp = response.json()
|
||||
# try to parse error message
|
||||
err = resp["error"]["code"]
|
||||
msg = resp["error"]["message"]
|
||||
except Exception as e:
|
||||
raise InternalServerError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
if err == "invalid_api_key":
|
||||
raise InvalidAPIKeyError(msg)
|
||||
elif err == "insufficient_quota":
|
||||
raise InsufficientAccountBalanceError(msg)
|
||||
elif err == "invalid_authentication":
|
||||
raise InvalidAuthenticationError(msg)
|
||||
elif err and "rate" in err:
|
||||
raise RateLimitReachedError(msg)
|
||||
elif err and "internal" in err:
|
||||
raise InternalServerError(msg)
|
||||
elif err == "api_key_empty":
|
||||
raise InvalidAPIKeyError(msg)
|
||||
else:
|
||||
raise InternalServerError(f"Unknown error: {err} with message: {msg}")
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
embeddings = resp["data"]
|
||||
usage = resp["usage"]
|
||||
except Exception as e:
|
||||
raise InternalServerError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
return [data["embedding"] for data in embeddings], usage["total_tokens"]
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use BaichuanTokenizer to get num tokens
|
||||
num_tokens += BaichuanTokenizer._get_num_tokens(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except InvalidAPIKeyError:
|
||||
raise CredentialsValidateFailedError("Invalid api key")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [],
|
||||
InvokeServerUnavailableError: [InternalServerError],
|
||||
InvokeRateLimitError: [RateLimitReachedError],
|
||||
InvokeAuthorizationError: [
|
||||
InvalidAuthenticationError,
|
||||
InsufficientAccountBalanceError,
|
||||
InvalidAPIKeyError,
|
||||
],
|
||||
InvokeBadRequestError: [BadRequestError, KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,223 +0,0 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import cohere
|
||||
import numpy as np
|
||||
from cohere.core import RequestOptions
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
|
||||
class CohereTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Cohere text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
embeddings: list[list[float]] = [[] for _ in range(len(texts))]
|
||||
tokens = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
tokenize_response = self._tokenize(model=model, credentials=credentials, text=text)
|
||||
|
||||
for j in range(0, len(tokenize_response), context_size):
|
||||
tokens += [tokenize_response[j : j + context_size]]
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(tokens), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
# call embedding model
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model, credentials=credentials, texts=["".join(token) for token in tokens[i : i + max_chunks]]
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
|
||||
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
|
||||
for i in range(len(indices)):
|
||||
results[indices[i]].append(batched_embeddings[i])
|
||||
num_tokens_in_batch[indices[i]].append(len(tokens[i]))
|
||||
|
||||
for i in range(len(texts)):
|
||||
_result = results[i]
|
||||
if len(_result) == 0:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model, credentials=credentials, texts=[" "]
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
average = embeddings_batch[0]
|
||||
else:
|
||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
|
||||
full_text = " ".join(texts)
|
||||
|
||||
try:
|
||||
response = self._tokenize(model=model, credentials=credentials, text=full_text)
|
||||
except Exception as e:
|
||||
raise self._transform_invoke_error(e)
|
||||
|
||||
return len(response)
|
||||
|
||||
def _tokenize(self, model: str, credentials: dict, text: str) -> list[str]:
|
||||
"""
|
||||
Tokenize text
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param text: text to tokenize
|
||||
:return:
|
||||
"""
|
||||
if not text:
|
||||
return []
|
||||
|
||||
# initialize client
|
||||
client = cohere.Client(credentials.get("api_key"), base_url=credentials.get("base_url"))
|
||||
|
||||
response = client.tokenize(text=text, model=model, offline=False, request_options=RequestOptions(max_retries=0))
|
||||
|
||||
return response.token_strings
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# call embedding model
|
||||
self._embedding_invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _embedding_invoke(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Invoke embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return: embeddings and used tokens
|
||||
"""
|
||||
# initialize client
|
||||
client = cohere.Client(credentials.get("api_key"), base_url=credentials.get("base_url"))
|
||||
|
||||
# call embedding model
|
||||
response = client.embed(
|
||||
texts=texts,
|
||||
model=model,
|
||||
input_type="search_document" if len(texts) > 1 else "search_query",
|
||||
request_options=RequestOptions(max_retries=1),
|
||||
)
|
||||
|
||||
return response.embeddings, int(response.meta.billed_units.input_tokens)
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [cohere.errors.service_unavailable_error.ServiceUnavailableError],
|
||||
InvokeServerUnavailableError: [cohere.errors.internal_server_error.InternalServerError],
|
||||
InvokeRateLimitError: [cohere.errors.too_many_requests_error.TooManyRequestsError],
|
||||
InvokeAuthorizationError: [
|
||||
cohere.errors.unauthorized_error.UnauthorizedError,
|
||||
cohere.errors.forbidden_error.ForbiddenError,
|
||||
],
|
||||
InvokeBadRequestError: [
|
||||
cohere.core.api_error.ApiError,
|
||||
cohere.errors.bad_request_error.BadRequestError,
|
||||
cohere.errors.not_found_error.NotFoundError,
|
||||
],
|
||||
}
|
||||
@ -1,30 +0,0 @@
|
||||
provider: fireworks
|
||||
label:
|
||||
zh_Hans: Fireworks AI
|
||||
en_US: Fireworks AI
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.svg
|
||||
background: "#FCFDFF"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your API Key from Fireworks AI
|
||||
zh_Hans: 从 Fireworks AI 获取 API Key
|
||||
url:
|
||||
en_US: https://fireworks.ai/account/api-keys
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: fireworks_api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
@ -1,46 +0,0 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Vision Instruct
|
||||
en_US: Llama 3.2 11B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.2'
|
||||
output: '0.2'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,46 +0,0 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Instruct
|
||||
en_US: Llama 3.2 1B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,46 +0,0 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Instruct
|
||||
en_US: Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,46 +0,0 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Vision Instruct
|
||||
en_US: Llama 3.2 90B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.9'
|
||||
output: '0.9'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,12 +0,0 @@
|
||||
model: WhereIsAI/UAE-Large-V1
|
||||
label:
|
||||
zh_Hans: UAE-Large-V1
|
||||
en_US: UAE-Large-V1
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
||||
@ -1,12 +0,0 @@
|
||||
model: thenlper/gte-base
|
||||
label:
|
||||
zh_Hans: GTE-base
|
||||
en_US: GTE-base
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
||||
@ -1,12 +0,0 @@
|
||||
model: thenlper/gte-large
|
||||
label:
|
||||
zh_Hans: GTE-large
|
||||
en_US: GTE-large
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
||||
@ -1,12 +0,0 @@
|
||||
model: nomic-ai/nomic-embed-text-v1.5
|
||||
label:
|
||||
zh_Hans: nomic-embed-text-v1.5
|
||||
en_US: nomic-embed-text-v1.5
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
max_chunks: 16
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
||||
@ -1,12 +0,0 @@
|
||||
model: nomic-ai/nomic-embed-text-v1
|
||||
label:
|
||||
zh_Hans: nomic-embed-text-v1
|
||||
en_US: nomic-embed-text-v1
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
max_chunks: 16
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
||||
@ -1,151 +0,0 @@
|
||||
import time
|
||||
from collections.abc import Mapping
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
from openai import OpenAI
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
|
||||
|
||||
|
||||
class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Fireworks text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if user:
|
||||
extra_model_kwargs["user"] = user
|
||||
|
||||
extra_model_kwargs["encoding_format"] = "float"
|
||||
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
# Here token count is only an approximation based on the GPT2 tokenizer
|
||||
# TODO: Optimize for better token estimation and chunking
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model,
|
||||
client=client,
|
||||
texts=inputs[i : i + max_chunks],
|
||||
extra_model_kwargs=extra_model_kwargs,
|
||||
)
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: Mapping) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
# call embedding model
|
||||
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _embedding_invoke(
|
||||
self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Invoke embedding model
|
||||
:param model: model name
|
||||
:param client: model client
|
||||
:param texts: texts to embed
|
||||
:param extra_model_kwargs: extra model kwargs
|
||||
:return: embeddings and used tokens
|
||||
"""
|
||||
response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
|
||||
return [data.embedding for data in response.data], response.usage.total_tokens
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
|
||||
)
|
||||
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,76 +0,0 @@
|
||||
provider: fishaudio
|
||||
label:
|
||||
en_US: Fish Audio
|
||||
description:
|
||||
en_US: Models provided by Fish Audio, currently only support TTS.
|
||||
zh_Hans: Fish Audio 提供的模型,目前仅支持 TTS。
|
||||
icon_small:
|
||||
en_US: fishaudio_s_en.svg
|
||||
icon_large:
|
||||
en_US: fishaudio_l_en.svg
|
||||
background: "#E5E7EB"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your API key from Fish Audio
|
||||
zh_Hans: 从 Fish Audio 获取你的 API Key
|
||||
url:
|
||||
en_US: https://fish.audio/go-api/
|
||||
supported_model_types:
|
||||
- tts
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: api_base
|
||||
label:
|
||||
en_US: API URL
|
||||
type: text-input
|
||||
required: false
|
||||
default: https://api.fish.audio
|
||||
placeholder:
|
||||
en_US: Enter your API URL
|
||||
zh_Hans: 在此输入您的 API URL
|
||||
- variable: use_public_models
|
||||
label:
|
||||
en_US: Use Public Models
|
||||
type: select
|
||||
required: false
|
||||
default: "false"
|
||||
placeholder:
|
||||
en_US: Toggle to use public models
|
||||
zh_Hans: 切换以使用公共模型
|
||||
options:
|
||||
- value: "true"
|
||||
label:
|
||||
en_US: Allow Public Models
|
||||
zh_Hans: 使用公共模型
|
||||
- value: "false"
|
||||
label:
|
||||
en_US: Private Models Only
|
||||
zh_Hans: 仅使用私有模型
|
||||
- variable: latency
|
||||
label:
|
||||
en_US: Latency
|
||||
type: select
|
||||
required: false
|
||||
default: "normal"
|
||||
placeholder:
|
||||
en_US: Toggle to choice latency
|
||||
zh_Hans: 切换以调整延迟
|
||||
options:
|
||||
- value: "balanced"
|
||||
label:
|
||||
en_US: Low (may affect quality)
|
||||
zh_Hans: 低延迟 (可能降低质量)
|
||||
- value: "normal"
|
||||
label:
|
||||
en_US: Normal
|
||||
zh_Hans: 标准
|
||||
@ -1,48 +0,0 @@
|
||||
model: gemini-1.5-flash-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,48 +0,0 @@
|
||||
model: gemini-1.5-flash-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,48 +0,0 @@
|
||||
model: gemini-1.5-flash-8b-exp-0924
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 8B 0924
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,48 +0,0 @@
|
||||
model: gemini-1.5-flash
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,48 +0,0 @@
|
||||
model: gemini-1.5-pro-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,48 +0,0 @@
|
||||
model: gemini-1.5-pro-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,48 +0,0 @@
|
||||
model: gemini-1.5-pro
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,25 +0,0 @@
|
||||
model: llama-3.2-11b-text-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Text (Preview)
|
||||
en_US: Llama 3.2 11B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,25 +0,0 @@
|
||||
model: llama-3.2-1b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Text (Preview)
|
||||
en_US: Llama 3.2 1B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,25 +0,0 @@
|
||||
model: llama-3.2-3b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Text (Preview)
|
||||
en_US: Llama 3.2 3B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,25 +0,0 @@
|
||||
model: llama-3.2-90b-text-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Text (Preview)
|
||||
en_US: Llama 3.2 90B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,189 +0,0 @@
|
||||
import json
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
from huggingface_hub import HfApi, InferenceClient
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.huggingface_hub._common import _CommonHuggingfaceHub
|
||||
|
||||
HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/"
|
||||
|
||||
|
||||
class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
client = InferenceClient(token=credentials["huggingfacehub_api_token"])
|
||||
|
||||
execute_model = model
|
||||
|
||||
if credentials["huggingfacehub_api_type"] == "inference_endpoints":
|
||||
execute_model = credentials["huggingfacehub_endpoint_url"]
|
||||
|
||||
output = client.post(
|
||||
json={"inputs": texts, "options": {"wait_for_model": False, "use_cache": False}}, model=execute_model
|
||||
)
|
||||
|
||||
embeddings = json.loads(output.decode())
|
||||
|
||||
tokens = self.get_num_tokens(model, credentials, texts)
|
||||
usage = self._calc_response_usage(model, credentials, tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=self._mean_pooling(embeddings), usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
try:
|
||||
if "huggingfacehub_api_type" not in credentials:
|
||||
raise CredentialsValidateFailedError("Huggingface Hub Endpoint Type must be provided.")
|
||||
|
||||
if "huggingfacehub_api_token" not in credentials:
|
||||
raise CredentialsValidateFailedError("Huggingface Hub API Token must be provided.")
|
||||
|
||||
if credentials["huggingfacehub_api_type"] == "inference_endpoints":
|
||||
if "huggingface_namespace" not in credentials:
|
||||
raise CredentialsValidateFailedError(
|
||||
"Huggingface Hub User Name / Organization Name must be provided."
|
||||
)
|
||||
|
||||
if "huggingfacehub_endpoint_url" not in credentials:
|
||||
raise CredentialsValidateFailedError("Huggingface Hub Endpoint URL must be provided.")
|
||||
|
||||
if "task_type" not in credentials:
|
||||
raise CredentialsValidateFailedError("Huggingface Hub Task Type must be provided.")
|
||||
|
||||
if credentials["task_type"] != "feature-extraction":
|
||||
raise CredentialsValidateFailedError("Huggingface Hub Task Type is invalid.")
|
||||
|
||||
self._check_endpoint_url_model_repository_name(credentials, model)
|
||||
|
||||
model = credentials["huggingfacehub_endpoint_url"]
|
||||
|
||||
elif credentials["huggingfacehub_api_type"] == "hosted_inference_api":
|
||||
self._check_hosted_model_task_type(credentials["huggingfacehub_api_token"], model)
|
||||
else:
|
||||
raise CredentialsValidateFailedError("Huggingface Hub Endpoint Type is invalid.")
|
||||
|
||||
client = InferenceClient(token=credentials["huggingfacehub_api_token"])
|
||||
client.feature_extraction(text="hello world", model=model)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model_properties={"context_size": 10000, "max_chunks": 1},
|
||||
)
|
||||
return entity
|
||||
|
||||
# https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
|
||||
# Returned values are a list of floats, or a list[list[floats]]
|
||||
# (depending on if you sent a string or a list of string,
|
||||
# and if the automatic reduction, usually mean_pooling for instance was applied for you or not.
|
||||
# This should be explained on the model's README.)
|
||||
@staticmethod
|
||||
def _mean_pooling(embeddings: list) -> list[float]:
|
||||
# If automatic reduction by giving model, no need to mean_pooling.
|
||||
# For example one: List[List[float]]
|
||||
if not isinstance(embeddings[0][0], list):
|
||||
return embeddings
|
||||
|
||||
# For example two: List[List[List[float]]], need to mean_pooling.
|
||||
sentence_embeddings = [np.mean(embedding[0], axis=0).tolist() for embedding in embeddings]
|
||||
return sentence_embeddings
|
||||
|
||||
@staticmethod
|
||||
def _check_hosted_model_task_type(huggingfacehub_api_token: str, model_name: str) -> None:
|
||||
hf_api = HfApi(token=huggingfacehub_api_token)
|
||||
model_info = hf_api.model_info(repo_id=model_name)
|
||||
|
||||
try:
|
||||
if not model_info:
|
||||
raise ValueError(f"Model {model_name} not found.")
|
||||
|
||||
if "inference" in model_info.cardData and not model_info.cardData["inference"]:
|
||||
raise ValueError(f"Inference API has been turned off for this model {model_name}.")
|
||||
|
||||
valid_tasks = "feature-extraction"
|
||||
if model_info.pipeline_tag not in valid_tasks:
|
||||
raise ValueError(f"Model {model_name} is not a valid task, must be one of {valid_tasks}.")
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(f"{str(e)}")
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
@staticmethod
|
||||
def _check_endpoint_url_model_repository_name(credentials: dict, model_name: str):
|
||||
try:
|
||||
url = f'{HUGGINGFACE_ENDPOINT_API}{credentials["huggingface_namespace"]}'
|
||||
headers = {
|
||||
"Authorization": f'Bearer {credentials["huggingfacehub_api_token"]}',
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
response = requests.get(url=url, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ValueError("User Name or Organization Name is invalid.")
|
||||
|
||||
model_repository_name = ""
|
||||
|
||||
for item in response.json().get("items", []):
|
||||
if item.get("status", {}).get("url") == credentials["huggingfacehub_endpoint_url"]:
|
||||
model_repository_name = item.get("model", {}).get("repository")
|
||||
break
|
||||
|
||||
if model_repository_name != model_name:
|
||||
raise ValueError(
|
||||
f"Model Name {model_name} is invalid. Please check it on the inference endpoints console."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(str(e))
|
||||
@ -1,209 +0,0 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.huggingface_tei.tei_helper import TeiHelper
|
||||
|
||||
|
||||
class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Text Embedding Inference text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
credentials should be like:
|
||||
{
|
||||
'server_url': 'server url',
|
||||
'model_uid': 'model uid',
|
||||
}
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
server_url = credentials["server_url"]
|
||||
|
||||
server_url = server_url.removesuffix("/")
|
||||
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
# get tokenized results from TEI
|
||||
batched_tokenize_result = TeiHelper.invoke_tokenize(server_url, texts)
|
||||
|
||||
for i, (text, tokenize_result) in enumerate(zip(texts, batched_tokenize_result)):
|
||||
# Check if the number of tokens is larger than the context size
|
||||
num_tokens = len(tokenize_result)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
# Find the best cutoff point
|
||||
pre_special_token_count = 0
|
||||
for token in tokenize_result:
|
||||
if token["special"]:
|
||||
pre_special_token_count += 1
|
||||
else:
|
||||
break
|
||||
rest_special_token_count = (
|
||||
len([token for token in tokenize_result if token["special"]]) - pre_special_token_count
|
||||
)
|
||||
|
||||
# Calculate the cutoff point, leave 20 extra space to avoid exceeding the limit
|
||||
token_cutoff = context_size - rest_special_token_count - 20
|
||||
|
||||
# Find the cutoff index
|
||||
cutpoint_token = tokenize_result[token_cutoff]
|
||||
cutoff = cutpoint_token["start"]
|
||||
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
|
||||
try:
|
||||
used_tokens = 0
|
||||
for i in _iter:
|
||||
iter_texts = inputs[i : i + max_chunks]
|
||||
results = TeiHelper.invoke_embeddings(server_url, iter_texts)
|
||||
embeddings = results["data"]
|
||||
embeddings = [embedding["embedding"] for embedding in embeddings]
|
||||
batched_embeddings.extend(embeddings)
|
||||
|
||||
usage = results["usage"]
|
||||
used_tokens += usage["total_tokens"]
|
||||
except RuntimeError as e:
|
||||
raise InvokeServerUnavailableError(str(e))
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
result = TextEmbeddingResult(model=model, embeddings=batched_embeddings, usage=usage)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
server_url = credentials["server_url"]
|
||||
|
||||
server_url = server_url.removesuffix("/")
|
||||
|
||||
batch_tokens = TeiHelper.invoke_tokenize(server_url, texts)
|
||||
num_tokens = sum(len(tokens) for tokens in batch_tokens)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
server_url = credentials["server_url"]
|
||||
extra_args = TeiHelper.get_tei_extra_parameter(server_url, model)
|
||||
print(extra_args)
|
||||
if extra_args.model_type != "embedding":
|
||||
raise CredentialsValidateFailedError("Current model is not a embedding model")
|
||||
|
||||
credentials["context_size"] = extra_args.max_input_length
|
||||
credentials["max_chunks"] = extra_args.max_client_batch_size
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
used to define customizable model schema
|
||||
"""
|
||||
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model_properties={
|
||||
ModelPropertyKey.MAX_CHUNKS: int(credentials.get("max_chunks", 1)),
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 512)),
|
||||
},
|
||||
parameter_rules=[],
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -1,169 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from tencentcloud.common import credential
|
||||
from tencentcloud.common.exception import TencentCloudSDKException
|
||||
from tencentcloud.common.profile.client_profile import ClientProfile
|
||||
from tencentcloud.common.profile.http_profile import HttpProfile
|
||||
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HunyuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Hunyuan text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
if model != "hunyuan-embedding":
|
||||
raise ValueError("Invalid model name")
|
||||
|
||||
client = self._setup_hunyuan_client(credentials)
|
||||
|
||||
embeddings = []
|
||||
token_usage = 0
|
||||
|
||||
for input in texts:
|
||||
request = models.GetEmbeddingRequest()
|
||||
params = {"Input": input}
|
||||
request.from_json_string(json.dumps(params))
|
||||
response = client.GetEmbedding(request)
|
||||
usage = response.Usage.TotalTokens
|
||||
|
||||
embeddings.extend([data.Embedding for data in response.Data])
|
||||
token_usage += usage
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=embeddings,
|
||||
usage=self._calc_response_usage(model=model, credentials=credentials, tokens=token_usage),
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate credentials
|
||||
"""
|
||||
try:
|
||||
client = self._setup_hunyuan_client(credentials)
|
||||
|
||||
req = models.ChatCompletionsRequest()
|
||||
params = {
|
||||
"Model": model,
|
||||
"Messages": [{"Role": "user", "Content": "hello"}],
|
||||
"TopP": 1,
|
||||
"Temperature": 0,
|
||||
"Stream": False,
|
||||
}
|
||||
req.from_json_string(json.dumps(params))
|
||||
client.ChatCompletions(req)
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
|
||||
|
||||
def _setup_hunyuan_client(self, credentials):
|
||||
secret_id = credentials["secret_id"]
|
||||
secret_key = credentials["secret_key"]
|
||||
cred = credential.Credential(secret_id, secret_key)
|
||||
httpProfile = HttpProfile()
|
||||
httpProfile.endpoint = "hunyuan.tencentcloudapi.com"
|
||||
clientProfile = ClientProfile()
|
||||
clientProfile.httpProfile = httpProfile
|
||||
client = hunyuan_client.HunyuanClient(cred, "", clientProfile)
|
||||
return client
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeError: [TencentCloudSDKException],
|
||||
}
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
# client = self._setup_hunyuan_client(credentials)
|
||||
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
# use client.GetTokenCount to get num tokens
|
||||
# request = models.GetTokenCountRequest()
|
||||
# params = {
|
||||
# "Prompt": text
|
||||
# }
|
||||
# request.from_json_string(json.dumps(params))
|
||||
# response = client.GetTokenCount(request)
|
||||
# num_tokens += response.TokenCount
|
||||
|
||||
return num_tokens
|
||||
@ -1,69 +0,0 @@
|
||||
provider: jina
|
||||
label:
|
||||
en_US: Jina AI
|
||||
description:
|
||||
en_US: Embedding and Rerank Model Supported
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.svg
|
||||
background: "#EFFDFD"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your API key from Jina AI
|
||||
zh_Hans: 从 Jina AI 获取 API Key
|
||||
url:
|
||||
en_US: https://jina.ai/
|
||||
supported_model_types:
|
||||
- text-embedding
|
||||
- rerank
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
- customizable-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
model_credential_schema:
|
||||
model:
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
placeholder:
|
||||
en_US: Enter your model name
|
||||
zh_Hans: 输入模型名称
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: base_url
|
||||
label:
|
||||
zh_Hans: 服务器 URL
|
||||
en_US: Base URL
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: Base URL, e.g. https://api.jina.ai/v1
|
||||
en_US: Base URL, e.g. https://api.jina.ai/v1
|
||||
default: 'https://api.jina.ai/v1'
|
||||
- variable: context_size
|
||||
label:
|
||||
zh_Hans: 上下文大小
|
||||
en_US: Context size
|
||||
placeholder:
|
||||
zh_Hans: 输入上下文大小
|
||||
en_US: Enter context size
|
||||
required: false
|
||||
type: text-input
|
||||
default: '8192'
|
||||
@ -1,199 +0,0 @@
|
||||
import time
|
||||
from json import JSONDecodeError, dumps
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.jina.text_embedding.jina_tokenizer import JinaTokenizer
|
||||
|
||||
|
||||
class JinaTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Jina text embedding model.
|
||||
"""
|
||||
|
||||
api_base: str = "https://api.jina.ai/v1"
|
||||
|
||||
def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
|
||||
"""
|
||||
Parse model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return: parsed credentials
|
||||
"""
|
||||
|
||||
def transform_jina_input_text(model, text):
|
||||
if model == "jina-clip-v1":
|
||||
return {"text": text}
|
||||
return text
|
||||
|
||||
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
|
||||
|
||||
# model specific parameters
|
||||
if model == "jina-embeddings-v3":
|
||||
# set `task` type according to input type for the best performance
|
||||
data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
|
||||
|
||||
return data
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError("api_key is required")
|
||||
|
||||
base_url = credentials.get("base_url", self.api_base)
|
||||
base_url = base_url.removesuffix("/")
|
||||
|
||||
url = base_url + "/embeddings"
|
||||
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
|
||||
|
||||
data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
|
||||
|
||||
try:
|
||||
response = post(url, headers=headers, data=dumps(data))
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
try:
|
||||
resp = response.json()
|
||||
msg = resp["detail"]
|
||||
if response.status_code == 401:
|
||||
raise InvokeAuthorizationError(msg)
|
||||
elif response.status_code == 429:
|
||||
raise InvokeRateLimitError(msg)
|
||||
elif response.status_code == 500:
|
||||
raise InvokeServerUnavailableError(msg)
|
||||
else:
|
||||
raise InvokeBadRequestError(msg)
|
||||
except JSONDecodeError as e:
|
||||
raise InvokeServerUnavailableError(
|
||||
f"Failed to convert response to json: {e} with text: {response.text}"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
embeddings = resp["data"]
|
||||
usage = resp["usage"]
|
||||
except Exception as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use JinaTokenizer to get num tokens
|
||||
num_tokens += JinaTokenizer.get_num_tokens(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError, InvokeBadRequestError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size"))},
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -1,189 +0,0 @@
|
||||
import time
|
||||
from json import JSONDecodeError, dumps
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
from yarl import URL
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
|
||||
class LocalAITextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for LocalAI text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
if len(texts) != 1:
|
||||
raise InvokeBadRequestError("Only one text is supported")
|
||||
|
||||
server_url = credentials["server_url"]
|
||||
model_name = model
|
||||
if not server_url:
|
||||
raise CredentialsValidateFailedError("server_url is required")
|
||||
if not model_name:
|
||||
raise CredentialsValidateFailedError("model_name is required")
|
||||
|
||||
url = server_url
|
||||
headers = {"Authorization": "Bearer 123", "Content-Type": "application/json"}
|
||||
|
||||
data = {"model": model_name, "input": texts[0]}
|
||||
|
||||
try:
|
||||
response = post(str(URL(url) / "embeddings"), headers=headers, data=dumps(data), timeout=10)
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
try:
|
||||
resp = response.json()
|
||||
code = resp["error"]["code"]
|
||||
msg = resp["error"]["message"]
|
||||
if code == 500:
|
||||
raise InvokeServerUnavailableError(msg)
|
||||
|
||||
if response.status_code == 401:
|
||||
raise InvokeAuthorizationError(msg)
|
||||
elif response.status_code == 429:
|
||||
raise InvokeRateLimitError(msg)
|
||||
elif response.status_code == 500:
|
||||
raise InvokeServerUnavailableError(msg)
|
||||
else:
|
||||
raise InvokeError(msg)
|
||||
except JSONDecodeError as e:
|
||||
raise InvokeServerUnavailableError(
|
||||
f"Failed to convert response to json: {e} with text: {response.text}"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
embeddings = resp["data"]
|
||||
usage = resp["usage"]
|
||||
except Exception as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use GPT2Tokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def _get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
Get customizable model schema
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return: model schema
|
||||
"""
|
||||
return AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(zh_Hans=model, en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
features=[],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512")),
|
||||
ModelPropertyKey.MAX_CHUNKS: 1,
|
||||
},
|
||||
parameter_rules=[],
|
||||
)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except InvokeAuthorizationError:
|
||||
raise CredentialsValidateFailedError("Invalid credentials")
|
||||
except InvokeConnectionError as e:
|
||||
raise CredentialsValidateFailedError(f"Invalid credentials: {e}")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,184 +0,0 @@
|
||||
import time
|
||||
from json import dumps
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.minimax.llm.errors import (
|
||||
BadRequestError,
|
||||
InsufficientAccountBalanceError,
|
||||
InternalServerError,
|
||||
InvalidAPIKeyError,
|
||||
InvalidAuthenticationError,
|
||||
RateLimitReachedError,
|
||||
)
|
||||
|
||||
|
||||
class MinimaxTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Minimax text embedding model.
|
||||
"""
|
||||
|
||||
api_base: str = "https://api.minimax.chat/v1/embeddings"
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["minimax_api_key"]
|
||||
group_id = credentials["minimax_group_id"]
|
||||
if model != "embo-01":
|
||||
raise ValueError("Invalid model name")
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError("api_key is required")
|
||||
url = f"{self.api_base}?GroupId={group_id}"
|
||||
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
|
||||
|
||||
data = {"model": "embo-01", "texts": texts, "type": "db"}
|
||||
|
||||
try:
|
||||
response = post(url, headers=headers, data=dumps(data))
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
raise InvokeServerUnavailableError(response.text)
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
# check if there is an error
|
||||
if resp["base_resp"]["status_code"] != 0:
|
||||
code = resp["base_resp"]["status_code"]
|
||||
msg = resp["base_resp"]["status_msg"]
|
||||
self._handle_error(code, msg)
|
||||
|
||||
embeddings = resp["vectors"]
|
||||
total_tokens = resp["total_tokens"]
|
||||
except InvalidAuthenticationError:
|
||||
raise InvalidAPIKeyError("Invalid api key")
|
||||
except KeyError as e:
|
||||
raise InternalServerError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=total_tokens)
|
||||
|
||||
result = TextEmbeddingResult(model=model, embeddings=embeddings, usage=usage)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use MinimaxTokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except InvalidAPIKeyError:
|
||||
raise CredentialsValidateFailedError("Invalid api key")
|
||||
|
||||
def _handle_error(self, code: int, msg: str):
|
||||
if code in {1000, 1001}:
|
||||
raise InternalServerError(msg)
|
||||
elif code == 1002:
|
||||
raise RateLimitReachedError(msg)
|
||||
elif code == 1004:
|
||||
raise InvalidAuthenticationError(msg)
|
||||
elif code == 1008:
|
||||
raise InsufficientAccountBalanceError(msg)
|
||||
elif code == 2013:
|
||||
raise BadRequestError(msg)
|
||||
else:
|
||||
raise InternalServerError(msg)
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [],
|
||||
InvokeServerUnavailableError: [InternalServerError],
|
||||
InvokeRateLimitError: [RateLimitReachedError],
|
||||
InvokeAuthorizationError: [
|
||||
InvalidAuthenticationError,
|
||||
InsufficientAccountBalanceError,
|
||||
InvalidAPIKeyError,
|
||||
],
|
||||
InvokeBadRequestError: [BadRequestError, KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,170 +0,0 @@
|
||||
import time
|
||||
from json import JSONDecodeError, dumps
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
|
||||
class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for MixedBread text embedding model.
|
||||
"""
|
||||
|
||||
api_base: str = "https://api.mixedbread.ai/v1"
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError("api_key is required")
|
||||
|
||||
base_url = credentials.get("base_url", self.api_base)
|
||||
base_url = base_url.removesuffix("/")
|
||||
|
||||
url = base_url + "/embeddings"
|
||||
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
|
||||
|
||||
data = {"model": model, "input": texts}
|
||||
|
||||
try:
|
||||
response = requests.post(url, headers=headers, data=dumps(data))
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
try:
|
||||
resp = response.json()
|
||||
msg = resp["detail"]
|
||||
if response.status_code == 401:
|
||||
raise InvokeAuthorizationError(msg)
|
||||
elif response.status_code == 429:
|
||||
raise InvokeRateLimitError(msg)
|
||||
elif response.status_code == 500:
|
||||
raise InvokeServerUnavailableError(msg)
|
||||
else:
|
||||
raise InvokeBadRequestError(msg)
|
||||
except JSONDecodeError as e:
|
||||
raise InvokeServerUnavailableError(
|
||||
f"Failed to convert response to json: {e} with text: {response.text}"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
embeddings = resp["data"]
|
||||
usage = resp["usage"]
|
||||
except Exception as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError, InvokeBadRequestError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -1,165 +0,0 @@
|
||||
import time
|
||||
from functools import wraps
|
||||
from typing import Optional
|
||||
|
||||
from nomic import embed
|
||||
from nomic import login as nomic_login
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import (
|
||||
EmbeddingUsage,
|
||||
TextEmbeddingResult,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import (
|
||||
TextEmbeddingModel,
|
||||
)
|
||||
from core.model_runtime.model_providers.nomic._common import _CommonNomic
|
||||
|
||||
|
||||
def nomic_login_required(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
if not kwargs.get("credentials"):
|
||||
raise ValueError("missing credentials parameters")
|
||||
credentials = kwargs.get("credentials")
|
||||
if "nomic_api_key" not in credentials:
|
||||
raise ValueError("missing nomic_api_key in credentials parameters")
|
||||
# nomic login
|
||||
nomic_login(credentials["nomic_api_key"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for nomic text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
embeddings, prompt_tokens, total_tokens = self.embed_text(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
texts=texts,
|
||||
)
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(
|
||||
model=model, credentials=credentials, tokens=prompt_tokens, total_tokens=total_tokens
|
||||
)
|
||||
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# call embedding model
|
||||
self.embed_text(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@nomic_login_required
|
||||
def embed_text(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int, int]:
|
||||
"""Call out to Nomic's embedding endpoint.
|
||||
|
||||
Args:
|
||||
model: The model to use for embedding.
|
||||
texts: The list of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embeddings, one for each text, and tokens usage.
|
||||
"""
|
||||
embeddings: list[list[float]] = []
|
||||
prompt_tokens = 0
|
||||
total_tokens = 0
|
||||
|
||||
response = embed.text(
|
||||
model=model,
|
||||
texts=texts,
|
||||
)
|
||||
|
||||
if not (response and "embeddings" in response):
|
||||
raise ValueError("Embedding data is missing in the response.")
|
||||
|
||||
if not (response and "usage" in response):
|
||||
raise ValueError("Response usage is missing.")
|
||||
|
||||
if "prompt_tokens" not in response["usage"]:
|
||||
raise ValueError("Response usage does not contain prompt tokens.")
|
||||
|
||||
if "total_tokens" not in response["usage"]:
|
||||
raise ValueError("Response usage does not contain total tokens.")
|
||||
|
||||
embeddings = [list(map(float, e)) for e in response["embeddings"]]
|
||||
total_tokens = response["usage"]["total_tokens"]
|
||||
prompt_tokens = response["usage"]["prompt_tokens"]
|
||||
return embeddings, prompt_tokens, total_tokens
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int, total_tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: prompt tokens
|
||||
:param total_tokens: total tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
price_type=PriceType.INPUT,
|
||||
tokens=tokens,
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=total_tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,158 +0,0 @@
|
||||
import time
|
||||
from json import JSONDecodeError, dumps
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
|
||||
class NvidiaTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Nvidia text embedding model.
|
||||
"""
|
||||
|
||||
api_base: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings"
|
||||
models: list[str] = ["NV-Embed-QA"]
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
if model not in self.models:
|
||||
raise InvokeBadRequestError("Invalid model name")
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError("api_key is required")
|
||||
url = self.api_base
|
||||
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
|
||||
|
||||
data = {"model": model, "input": texts[0], "input_type": "query"}
|
||||
|
||||
try:
|
||||
response = post(url, headers=headers, data=dumps(data))
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
try:
|
||||
resp = response.json()
|
||||
msg = resp["detail"]
|
||||
if response.status_code == 401:
|
||||
raise InvokeAuthorizationError(msg)
|
||||
elif response.status_code == 429:
|
||||
raise InvokeRateLimitError(msg)
|
||||
elif response.status_code == 500:
|
||||
raise InvokeServerUnavailableError(msg)
|
||||
else:
|
||||
raise InvokeError(msg)
|
||||
except JSONDecodeError as e:
|
||||
raise InvokeServerUnavailableError(
|
||||
f"Failed to convert response to json: {e} with text: {response.text}"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
embeddings = resp["data"]
|
||||
usage = resp["usage"]
|
||||
except Exception as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use JinaTokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except InvokeAuthorizationError:
|
||||
raise CredentialsValidateFailedError("Invalid api key")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,224 +0,0 @@
|
||||
import base64
|
||||
import copy
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import oci
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
request_template = {
|
||||
"compartmentId": "",
|
||||
"servingMode": {"modelId": "cohere.embed-english-light-v3.0", "servingType": "ON_DEMAND"},
|
||||
"truncate": "NONE",
|
||||
"inputs": [""],
|
||||
}
|
||||
oci_config_template = {
|
||||
"user": "",
|
||||
"fingerprint": "",
|
||||
"tenancy": "",
|
||||
"region": "",
|
||||
"compartment_id": "",
|
||||
"key_content": "",
|
||||
}
|
||||
|
||||
|
||||
class OCITextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Cohere text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
# Here token count is only an approximation based on the GPT2 tokenizer
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
# call embedding model
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model, credentials=credentials, texts=inputs[i : i + max_chunks]
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def get_num_characters(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
characters = 0
|
||||
for text in texts:
|
||||
characters += len(text)
|
||||
return characters
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# call embedding model
|
||||
self._embedding_invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _embedding_invoke(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Invoke embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return: embeddings and used tokens
|
||||
"""
|
||||
|
||||
# oci
|
||||
# initialize client
|
||||
oci_config = copy.deepcopy(oci_config_template)
|
||||
if "oci_config_content" in credentials:
|
||||
oci_config_content = base64.b64decode(credentials.get("oci_config_content")).decode("utf-8")
|
||||
config_items = oci_config_content.split("/")
|
||||
if len(config_items) != 5:
|
||||
raise CredentialsValidateFailedError(
|
||||
"oci_config_content should be base64.b64encode("
|
||||
"'user_ocid/fingerprint/tenancy_ocid/region/compartment_ocid'.encode('utf-8'))"
|
||||
)
|
||||
oci_config["user"] = config_items[0]
|
||||
oci_config["fingerprint"] = config_items[1]
|
||||
oci_config["tenancy"] = config_items[2]
|
||||
oci_config["region"] = config_items[3]
|
||||
oci_config["compartment_id"] = config_items[4]
|
||||
else:
|
||||
raise CredentialsValidateFailedError("need to set oci_config_content in credentials ")
|
||||
if "oci_key_content" in credentials:
|
||||
oci_key_content = base64.b64decode(credentials.get("oci_key_content")).decode("utf-8")
|
||||
oci_config["key_content"] = oci_key_content.encode(encoding="utf-8")
|
||||
else:
|
||||
raise CredentialsValidateFailedError("need to set oci_config_content in credentials ")
|
||||
# oci_config = oci.config.from_file('~/.oci/config', credentials.get('oci_api_profile'))
|
||||
compartment_id = oci_config["compartment_id"]
|
||||
client = oci.generative_ai_inference.GenerativeAiInferenceClient(config=oci_config)
|
||||
# call embedding model
|
||||
request_args = copy.deepcopy(request_template)
|
||||
request_args["compartmentId"] = compartment_id
|
||||
request_args["servingMode"]["modelId"] = model
|
||||
request_args["inputs"] = texts
|
||||
response = client.embed_text(request_args)
|
||||
return response.data.embeddings, self.get_num_characters(model=model, credentials=credentials, texts=texts)
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
@ -1,211 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from decimal import Decimal
|
||||
from typing import Optional
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
FetchFrom,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
PriceConfig,
|
||||
PriceType,
|
||||
)
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OllamaEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for an Ollama text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
# Prepare headers and payload for the request
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
endpoint_url = credentials.get("base_url")
|
||||
if not endpoint_url.endswith("/"):
|
||||
endpoint_url += "/"
|
||||
|
||||
endpoint_url = urljoin(endpoint_url, "api/embed")
|
||||
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
|
||||
inputs = []
|
||||
used_tokens = 0
|
||||
|
||||
for text in texts:
|
||||
# Here token count is only an approximation based on the GPT2 tokenizer
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
|
||||
# Prepare the payload for the request
|
||||
payload = {"input": inputs, "model": model, "options": {"use_mmap": True}}
|
||||
|
||||
# Make the request to the Ollama API
|
||||
response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300))
|
||||
|
||||
response.raise_for_status() # Raise an exception for HTTP errors
|
||||
response_data = response.json()
|
||||
|
||||
# Extract embeddings and used tokens from the response
|
||||
embeddings = response_data["embeddings"]
|
||||
embedding_used_tokens = self.get_num_tokens(model, credentials, inputs)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Approximate number of tokens for given messages using GPT2 tokenizer
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except InvokeError as ex:
|
||||
raise CredentialsValidateFailedError(f"An error occurred during credentials validation: {ex.description}")
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(f"An error occurred during credentials validation: {str(ex)}")
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size")),
|
||||
ModelPropertyKey.MAX_CHUNKS: 1,
|
||||
},
|
||||
parameter_rules=[],
|
||||
pricing=PriceConfig(
|
||||
input=Decimal(credentials.get("input_price", 0)),
|
||||
unit=Decimal(credentials.get("unit", 0)),
|
||||
currency=credentials.get("currency", "USD"),
|
||||
),
|
||||
)
|
||||
|
||||
return entity
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeAuthorizationError: [
|
||||
requests.exceptions.InvalidHeader, # Missing or Invalid API Key
|
||||
],
|
||||
InvokeBadRequestError: [
|
||||
requests.exceptions.HTTPError, # Invalid Endpoint URL or model name
|
||||
requests.exceptions.InvalidURL, # Misconfigured request or other API error
|
||||
],
|
||||
InvokeRateLimitError: [
|
||||
requests.exceptions.RetryError # Too many requests sent in a short period of time
|
||||
],
|
||||
InvokeServerUnavailableError: [
|
||||
requests.exceptions.ConnectionError, # Engine Overloaded
|
||||
requests.exceptions.HTTPError, # Server Error
|
||||
],
|
||||
InvokeConnectionError: [
|
||||
requests.exceptions.ConnectTimeout, # Timeout
|
||||
requests.exceptions.ReadTimeout, # Timeout
|
||||
],
|
||||
}
|
||||
@ -1,203 +0,0 @@
|
||||
import base64
|
||||
import time
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import tiktoken
|
||||
from openai import OpenAI
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.openai._common import _CommonOpenAI
|
||||
|
||||
|
||||
class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for OpenAI text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
# init model client
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if user:
|
||||
extra_model_kwargs["user"] = user
|
||||
|
||||
extra_model_kwargs["encoding_format"] = "base64"
|
||||
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
embeddings: list[list[float]] = [[] for _ in range(len(texts))]
|
||||
tokens = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
try:
|
||||
enc = tiktoken.encoding_for_model(model)
|
||||
except KeyError:
|
||||
enc = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
token = enc.encode(text)
|
||||
for j in range(0, len(token), context_size):
|
||||
tokens += [token[j : j + context_size]]
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(tokens), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
# call embedding model
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model, client=client, texts=tokens[i : i + max_chunks], extra_model_kwargs=extra_model_kwargs
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
|
||||
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
|
||||
for i in range(len(indices)):
|
||||
results[indices[i]].append(batched_embeddings[i])
|
||||
num_tokens_in_batch[indices[i]].append(len(tokens[i]))
|
||||
|
||||
for i in range(len(texts)):
|
||||
_result = results[i]
|
||||
if len(_result) == 0:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model, client=client, texts="", extra_model_kwargs=extra_model_kwargs
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
average = embeddings_batch[0]
|
||||
else:
|
||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
|
||||
try:
|
||||
enc = tiktoken.encoding_for_model(model)
|
||||
except KeyError:
|
||||
enc = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
total_num_tokens = 0
|
||||
for text in texts:
|
||||
# calculate the number of tokens in the encoded text
|
||||
tokenized_text = enc.encode(text)
|
||||
total_num_tokens += len(tokenized_text)
|
||||
|
||||
return total_num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
# call embedding model
|
||||
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _embedding_invoke(
|
||||
self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Invoke embedding model
|
||||
|
||||
:param model: model name
|
||||
:param client: model client
|
||||
:param texts: texts to embed
|
||||
:param extra_model_kwargs: extra model kwargs
|
||||
:return: embeddings and used tokens
|
||||
"""
|
||||
# call embedding model
|
||||
response = client.embeddings.create(
|
||||
input=texts,
|
||||
model=model,
|
||||
**extra_model_kwargs,
|
||||
)
|
||||
|
||||
if "encoding_format" in extra_model_kwargs and extra_model_kwargs["encoding_format"] == "base64":
|
||||
# decode base64 embedding
|
||||
return (
|
||||
[list(np.frombuffer(base64.b64decode(data.embedding), dtype="float32")) for data in response.data],
|
||||
response.usage.total_tokens,
|
||||
)
|
||||
|
||||
return [data.embedding for data in response.data], response.usage.total_tokens
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,217 +0,0 @@
|
||||
import json
|
||||
import time
|
||||
from decimal import Decimal
|
||||
from typing import Optional
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
FetchFrom,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
PriceConfig,
|
||||
PriceType,
|
||||
)
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOaiApiCompat
|
||||
|
||||
|
||||
class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for an OpenAI API-compatible text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
# Prepare headers and payload for the request
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
api_key = credentials.get("api_key")
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
endpoint_url = credentials.get("endpoint_url")
|
||||
if not endpoint_url.endswith("/"):
|
||||
endpoint_url += "/"
|
||||
|
||||
endpoint_url = urljoin(endpoint_url, "embeddings")
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if user:
|
||||
extra_model_kwargs["user"] = user
|
||||
|
||||
extra_model_kwargs["encoding_format"] = "float"
|
||||
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
# Here token count is only an approximation based on the GPT2 tokenizer
|
||||
# TODO: Optimize for better token estimation and chunking
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
# Prepare the payload for the request
|
||||
payload = {"input": inputs[i : i + max_chunks], "model": model, **extra_model_kwargs}
|
||||
|
||||
# Make the request to the OpenAI API
|
||||
response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300))
|
||||
|
||||
response.raise_for_status() # Raise an exception for HTTP errors
|
||||
response_data = response.json()
|
||||
|
||||
# Extract embeddings and used tokens from the response
|
||||
embeddings_batch = [data["embedding"] for data in response_data["data"]]
|
||||
embedding_used_tokens = response_data["usage"]["total_tokens"]
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Approximate number of tokens for given messages using GPT2 tokenizer
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
api_key = credentials.get("api_key")
|
||||
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
endpoint_url = credentials.get("endpoint_url")
|
||||
if not endpoint_url.endswith("/"):
|
||||
endpoint_url += "/"
|
||||
|
||||
endpoint_url = urljoin(endpoint_url, "embeddings")
|
||||
|
||||
payload = {"input": "ping", "model": model}
|
||||
|
||||
response = requests.post(url=endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300))
|
||||
|
||||
if response.status_code != 200:
|
||||
raise CredentialsValidateFailedError(
|
||||
f"Credentials validation failed with status code {response.status_code}"
|
||||
)
|
||||
|
||||
try:
|
||||
json_result = response.json()
|
||||
except json.JSONDecodeError as e:
|
||||
raise CredentialsValidateFailedError("Credentials validation failed: JSON decode error")
|
||||
|
||||
if "model" not in json_result:
|
||||
raise CredentialsValidateFailedError("Credentials validation failed: invalid response")
|
||||
except CredentialsValidateFailedError:
|
||||
raise
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size")),
|
||||
ModelPropertyKey.MAX_CHUNKS: 1,
|
||||
},
|
||||
parameter_rules=[],
|
||||
pricing=PriceConfig(
|
||||
input=Decimal(credentials.get("input_price", 0)),
|
||||
unit=Decimal(credentials.get("unit", 0)),
|
||||
currency=credentials.get("currency", "USD"),
|
||||
),
|
||||
)
|
||||
|
||||
return entity
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,155 +0,0 @@
|
||||
import time
|
||||
from json import dumps
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
|
||||
class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for OpenLLM text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
server_url = credentials["server_url"]
|
||||
if not server_url:
|
||||
raise CredentialsValidateFailedError("server_url is required")
|
||||
|
||||
headers = {"Content-Type": "application/json", "accept": "application/json"}
|
||||
|
||||
url = f"{server_url}/v1/embeddings"
|
||||
|
||||
data = texts
|
||||
try:
|
||||
response = post(url, headers=headers, data=dumps(data))
|
||||
except (ConnectionError, InvalidSchema, MissingSchema) as e:
|
||||
# cloud not connect to the server
|
||||
raise InvokeAuthorizationError(f"Invalid server URL: {e}")
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
if response.status_code == 400:
|
||||
raise InvokeBadRequestError(response.text)
|
||||
elif response.status_code == 404:
|
||||
raise InvokeAuthorizationError(response.text)
|
||||
elif response.status_code == 500:
|
||||
raise InvokeServerUnavailableError(response.text)
|
||||
|
||||
try:
|
||||
resp = response.json()[0]
|
||||
embeddings = resp["embeddings"]
|
||||
total_tokens = resp["num_tokens"]
|
||||
except KeyError as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=total_tokens)
|
||||
|
||||
result = TextEmbeddingResult(model=model, embeddings=embeddings, usage=usage)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use GPT2Tokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except InvokeAuthorizationError:
|
||||
raise CredentialsValidateFailedError("Invalid server_url")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,152 +0,0 @@
|
||||
import json
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from replicate import Client as ReplicateClient
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.replicate._common import _CommonReplicate
|
||||
|
||||
|
||||
class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
|
||||
|
||||
if "model_version" in credentials:
|
||||
model_version = credentials["model_version"]
|
||||
else:
|
||||
model_info = client.models.get(model)
|
||||
model_version = model_info.latest_version.id
|
||||
|
||||
replicate_model_version = f"{model}:{model_version}"
|
||||
|
||||
text_input_key = self._get_text_input_key(model, model_version, client)
|
||||
|
||||
embeddings = self._generate_embeddings_by_text_input_key(client, replicate_model_version, text_input_key, texts)
|
||||
|
||||
tokens = self.get_num_tokens(model, credentials, texts)
|
||||
usage = self._calc_response_usage(model, credentials, tokens)
|
||||
|
||||
return TextEmbeddingResult(model=model, embeddings=embeddings, usage=usage)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
if "replicate_api_token" not in credentials:
|
||||
raise CredentialsValidateFailedError("Replicate Access Token must be provided.")
|
||||
|
||||
try:
|
||||
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
|
||||
|
||||
if "model_version" in credentials:
|
||||
model_version = credentials["model_version"]
|
||||
else:
|
||||
model_info = client.models.get(model)
|
||||
model_version = model_info.latest_version.id
|
||||
|
||||
replicate_model_version = f"{model}:{model_version}"
|
||||
|
||||
text_input_key = self._get_text_input_key(model, model_version, client)
|
||||
|
||||
self._generate_embeddings_by_text_input_key(
|
||||
client, replicate_model_version, text_input_key, ["Hello worlds!"]
|
||||
)
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(str(e))
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model_properties={"context_size": 4096, "max_chunks": 1},
|
||||
)
|
||||
return entity
|
||||
|
||||
@staticmethod
|
||||
def _get_text_input_key(model: str, model_version: str, client: ReplicateClient) -> str:
|
||||
model_info = client.models.get(model)
|
||||
model_info_version = model_info.versions.get(model_version)
|
||||
|
||||
# sort through the openapi schema to get the name of text, texts or inputs
|
||||
input_properties = sorted(
|
||||
model_info_version.openapi_schema["components"]["schemas"]["Input"]["properties"].items(),
|
||||
key=lambda item: item[1].get("x-order", 0),
|
||||
)
|
||||
|
||||
for input_property in input_properties:
|
||||
if input_property[0] in {"text", "texts", "inputs"}:
|
||||
text_input_key = input_property[0]
|
||||
return text_input_key
|
||||
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _generate_embeddings_by_text_input_key(
|
||||
client: ReplicateClient, replicate_model_version: str, text_input_key: str, texts: list[str]
|
||||
) -> list[list[float]]:
|
||||
if text_input_key in {"text", "inputs"}:
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
result = client.run(replicate_model_version, input={text_input_key: text})
|
||||
embeddings.append(result[0].get("embedding"))
|
||||
|
||||
return [list(map(float, e)) for e in embeddings]
|
||||
elif "texts" == text_input_key:
|
||||
result = client.run(
|
||||
replicate_model_version,
|
||||
input={
|
||||
"texts": json.dumps(texts),
|
||||
"batch_size": 4,
|
||||
"convert_to_numpy": False,
|
||||
"normalize_embeddings": True,
|
||||
},
|
||||
)
|
||||
return result
|
||||
else:
|
||||
raise ValueError(f"embeddings input key is invalid: {text_input_key}")
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,463 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from collections.abc import Generator, Iterator
|
||||
from typing import Any, Optional, Union, cast
|
||||
|
||||
# from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
||||
import boto3
|
||||
from sagemaker import Predictor, serializers
|
||||
from sagemaker.session import Session
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
ImagePromptMessageContent,
|
||||
PromptMessage,
|
||||
PromptMessageContent,
|
||||
PromptMessageContentType,
|
||||
PromptMessageTool,
|
||||
SystemPromptMessage,
|
||||
ToolPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
FetchFrom,
|
||||
I18nObject,
|
||||
ModelFeature,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
ParameterRule,
|
||||
ParameterType,
|
||||
)
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def inference(predictor, messages: list[dict[str, Any]], params: dict[str, Any], stop: list, stream=False):
|
||||
"""
|
||||
params:
|
||||
predictor : Sagemaker Predictor
|
||||
messages (List[Dict[str,Any]]): message list。
|
||||
messages = [
|
||||
{"role": "system", "content":"please answer in Chinese"},
|
||||
{"role": "user", "content": "who are you? what are you doing?"},
|
||||
]
|
||||
params (Dict[str,Any]): model parameters for LLM。
|
||||
stream (bool): False by default。
|
||||
|
||||
response:
|
||||
result of inference if stream is False
|
||||
Iterator of Chunks if stream is True
|
||||
"""
|
||||
payload = {
|
||||
"model": params.get("model_name"),
|
||||
"stop": stop,
|
||||
"messages": messages,
|
||||
"stream": stream,
|
||||
"max_tokens": params.get("max_new_tokens", params.get("max_tokens", 2048)),
|
||||
"temperature": params.get("temperature", 0.1),
|
||||
"top_p": params.get("top_p", 0.9),
|
||||
}
|
||||
|
||||
if not stream:
|
||||
response = predictor.predict(payload)
|
||||
return response
|
||||
else:
|
||||
response_stream = predictor.predict_stream(payload)
|
||||
return response_stream
|
||||
|
||||
|
||||
class SageMakerLargeLanguageModel(LargeLanguageModel):
|
||||
"""
|
||||
Model class for Cohere large language model.
|
||||
"""
|
||||
|
||||
sagemaker_session: Any = None
|
||||
predictor: Any = None
|
||||
sagemaker_endpoint: str = None
|
||||
|
||||
def _handle_chat_generate_response(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
tools: list[PromptMessageTool],
|
||||
resp: bytes,
|
||||
) -> LLMResult:
|
||||
"""
|
||||
handle normal chat generate response
|
||||
"""
|
||||
resp_obj = json.loads(resp.decode("utf-8"))
|
||||
resp_str = resp_obj.get("choices")[0].get("message").get("content")
|
||||
|
||||
if len(resp_str) == 0:
|
||||
raise InvokeServerUnavailableError("Empty response")
|
||||
|
||||
assistant_prompt_message = AssistantPromptMessage(content=resp_str, tool_calls=[])
|
||||
|
||||
prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools)
|
||||
completion_tokens = self._num_tokens_from_messages(messages=[assistant_prompt_message], tools=tools)
|
||||
|
||||
usage = self._calc_response_usage(
|
||||
model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
||||
)
|
||||
|
||||
response = LLMResult(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
system_fingerprint=None,
|
||||
usage=usage,
|
||||
message=assistant_prompt_message,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _handle_chat_stream_response(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
tools: list[PromptMessageTool],
|
||||
resp: Iterator[bytes],
|
||||
) -> Generator:
|
||||
"""
|
||||
handle stream chat generate response
|
||||
"""
|
||||
full_response = ""
|
||||
buffer = ""
|
||||
for chunk_bytes in resp:
|
||||
buffer += chunk_bytes.decode("utf-8")
|
||||
last_idx = 0
|
||||
for match in re.finditer(r"^data:\s*(.+?)(\n\n)", buffer):
|
||||
try:
|
||||
data = json.loads(match.group(1).strip())
|
||||
last_idx = match.span()[1]
|
||||
|
||||
if "content" in data["choices"][0]["delta"]:
|
||||
chunk_content = data["choices"][0]["delta"]["content"]
|
||||
assistant_prompt_message = AssistantPromptMessage(content=chunk_content, tool_calls=[])
|
||||
|
||||
if data["choices"][0]["finish_reason"] is not None:
|
||||
temp_assistant_prompt_message = AssistantPromptMessage(content=full_response, tool_calls=[])
|
||||
prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools)
|
||||
completion_tokens = self._num_tokens_from_messages(
|
||||
messages=[temp_assistant_prompt_message], tools=[]
|
||||
)
|
||||
usage = self._calc_response_usage(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
)
|
||||
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
system_fingerprint=None,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=0,
|
||||
message=assistant_prompt_message,
|
||||
finish_reason=data["choices"][0]["finish_reason"],
|
||||
usage=usage,
|
||||
),
|
||||
)
|
||||
else:
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
system_fingerprint=None,
|
||||
delta=LLMResultChunkDelta(index=0, message=assistant_prompt_message),
|
||||
)
|
||||
|
||||
full_response += chunk_content
|
||||
except (json.JSONDecodeError, KeyError, IndexError) as e:
|
||||
logger.info("json parse exception, content: {}".format(match.group(1).strip()))
|
||||
pass
|
||||
|
||||
buffer = buffer[last_idx:]
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke large language model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param tools: tools for tool calling
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
if not self.sagemaker_session:
|
||||
access_key = credentials.get("aws_access_key_id")
|
||||
secret_key = credentials.get("aws_secret_access_key")
|
||||
aws_region = credentials.get("aws_region")
|
||||
boto_session = None
|
||||
if aws_region:
|
||||
if access_key and secret_key:
|
||||
boto_session = boto3.Session(
|
||||
aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=aws_region
|
||||
)
|
||||
else:
|
||||
boto_session = boto3.Session(region_name=aws_region)
|
||||
else:
|
||||
boto_session = boto3.Session()
|
||||
|
||||
sagemaker_client = boto_session.client("sagemaker")
|
||||
self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
|
||||
|
||||
if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
|
||||
self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
|
||||
self.predictor = Predictor(
|
||||
endpoint_name=self.sagemaker_endpoint,
|
||||
sagemaker_session=self.sagemaker_session,
|
||||
serializer=serializers.JSONSerializer(),
|
||||
)
|
||||
|
||||
messages: list[dict[str, Any]] = [{"role": p.role.value, "content": p.content} for p in prompt_messages]
|
||||
response = inference(
|
||||
predictor=self.predictor, messages=messages, params=model_parameters, stop=stop, stream=stream
|
||||
)
|
||||
|
||||
if stream:
|
||||
if tools and len(tools) > 0:
|
||||
raise InvokeBadRequestError(f"{model}'s tool calls does not support stream mode")
|
||||
|
||||
return self._handle_chat_stream_response(
|
||||
model=model, credentials=credentials, prompt_messages=prompt_messages, tools=tools, resp=response
|
||||
)
|
||||
return self._handle_chat_generate_response(
|
||||
model=model, credentials=credentials, prompt_messages=prompt_messages, tools=tools, resp=response
|
||||
)
|
||||
|
||||
def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
|
||||
"""
|
||||
Convert PromptMessage to dict for OpenAI Compatibility API
|
||||
"""
|
||||
if isinstance(message, UserPromptMessage):
|
||||
message = cast(UserPromptMessage, message)
|
||||
if isinstance(message.content, str):
|
||||
message_dict = {"role": "user", "content": message.content}
|
||||
else:
|
||||
sub_messages = []
|
||||
for message_content in message.content:
|
||||
if message_content.type == PromptMessageContentType.TEXT:
|
||||
message_content = cast(PromptMessageContent, message_content)
|
||||
sub_message_dict = {"type": "text", "text": message_content.data}
|
||||
sub_messages.append(sub_message_dict)
|
||||
elif message_content.type == PromptMessageContentType.IMAGE:
|
||||
message_content = cast(ImagePromptMessageContent, message_content)
|
||||
sub_message_dict = {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": message_content.data, "detail": message_content.detail.value},
|
||||
}
|
||||
sub_messages.append(sub_message_dict)
|
||||
message_dict = {"role": "user", "content": sub_messages}
|
||||
elif isinstance(message, AssistantPromptMessage):
|
||||
message = cast(AssistantPromptMessage, message)
|
||||
message_dict = {"role": "assistant", "content": message.content}
|
||||
if message.tool_calls and len(message.tool_calls) > 0:
|
||||
message_dict["function_call"] = {
|
||||
"name": message.tool_calls[0].function.name,
|
||||
"arguments": message.tool_calls[0].function.arguments,
|
||||
}
|
||||
elif isinstance(message, SystemPromptMessage):
|
||||
message = cast(SystemPromptMessage, message)
|
||||
message_dict = {"role": "system", "content": message.content}
|
||||
elif isinstance(message, ToolPromptMessage):
|
||||
message = cast(ToolPromptMessage, message)
|
||||
message_dict = {"tool_call_id": message.tool_call_id, "role": "tool", "content": message.content}
|
||||
else:
|
||||
raise ValueError(f"Unknown message type {type(message)}")
|
||||
|
||||
return message_dict
|
||||
|
||||
def _num_tokens_from_messages(
|
||||
self, messages: list[PromptMessage], tools: list[PromptMessageTool], is_completion_model: bool = False
|
||||
) -> int:
|
||||
def tokens(text: str):
|
||||
return self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if is_completion_model:
|
||||
return sum(tokens(str(message.content)) for message in messages)
|
||||
|
||||
tokens_per_message = 3
|
||||
tokens_per_name = 1
|
||||
|
||||
num_tokens = 0
|
||||
messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages]
|
||||
for message in messages_dict:
|
||||
num_tokens += tokens_per_message
|
||||
for key, value in message.items():
|
||||
if isinstance(value, list):
|
||||
text = ""
|
||||
for item in value:
|
||||
if isinstance(item, dict) and item["type"] == "text":
|
||||
text += item["text"]
|
||||
|
||||
value = text
|
||||
|
||||
if key == "tool_calls":
|
||||
for tool_call in value:
|
||||
for t_key, t_value in tool_call.items():
|
||||
num_tokens += tokens(t_key)
|
||||
if t_key == "function":
|
||||
for f_key, f_value in t_value.items():
|
||||
num_tokens += tokens(f_key)
|
||||
num_tokens += tokens(f_value)
|
||||
else:
|
||||
num_tokens += tokens(t_key)
|
||||
num_tokens += tokens(t_value)
|
||||
if key == "function_call":
|
||||
for t_key, t_value in value.items():
|
||||
num_tokens += tokens(t_key)
|
||||
if t_key == "function":
|
||||
for f_key, f_value in t_value.items():
|
||||
num_tokens += tokens(f_key)
|
||||
num_tokens += tokens(f_value)
|
||||
else:
|
||||
num_tokens += tokens(t_key)
|
||||
num_tokens += tokens(t_value)
|
||||
else:
|
||||
num_tokens += tokens(str(value))
|
||||
|
||||
if key == "name":
|
||||
num_tokens += tokens_per_name
|
||||
num_tokens += 3
|
||||
|
||||
if tools:
|
||||
num_tokens += self._num_tokens_for_tools(tools)
|
||||
|
||||
return num_tokens
|
||||
|
||||
def get_num_tokens(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param tools: tools for tool calling
|
||||
:return:
|
||||
"""
|
||||
# get model mode
|
||||
try:
|
||||
return self._num_tokens_from_messages(prompt_messages, tools)
|
||||
except Exception as e:
|
||||
raise self._transform_invoke_error(e)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# get model mode
|
||||
pass
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [InvokeBadRequestError, KeyError, ValueError],
|
||||
}
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
used to define customizable model schema
|
||||
"""
|
||||
rules = [
|
||||
ParameterRule(
|
||||
name="temperature",
|
||||
type=ParameterType.FLOAT,
|
||||
use_template="temperature",
|
||||
label=I18nObject(zh_Hans="温度", en_US="Temperature"),
|
||||
),
|
||||
ParameterRule(
|
||||
name="top_p",
|
||||
type=ParameterType.FLOAT,
|
||||
use_template="top_p",
|
||||
label=I18nObject(zh_Hans="Top P", en_US="Top P"),
|
||||
),
|
||||
ParameterRule(
|
||||
name="max_tokens",
|
||||
type=ParameterType.INT,
|
||||
use_template="max_tokens",
|
||||
min=1,
|
||||
max=credentials.get("context_length", 2048),
|
||||
default=512,
|
||||
label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
|
||||
),
|
||||
]
|
||||
|
||||
completion_type = LLMMode.value_of(credentials["mode"]).value
|
||||
|
||||
features = []
|
||||
|
||||
support_function_call = credentials.get("support_function_call", False)
|
||||
if support_function_call:
|
||||
features.append(ModelFeature.TOOL_CALL)
|
||||
|
||||
support_vision = credentials.get("support_vision", False)
|
||||
if support_vision:
|
||||
features.append(ModelFeature.VISION)
|
||||
|
||||
context_length = credentials.get("context_length", 2048)
|
||||
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.LLM,
|
||||
features=features,
|
||||
model_properties={ModelPropertyKey.MODE: completion_type, ModelPropertyKey.CONTEXT_SIZE: context_length},
|
||||
parameter_rules=rules,
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -1,200 +0,0 @@
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
import boto3
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
BATCH_SIZE = 20
|
||||
CONTEXT_SIZE = 8192
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def batch_generator(generator, batch_size):
|
||||
while True:
|
||||
batch = list(itertools.islice(generator, batch_size))
|
||||
if not batch:
|
||||
break
|
||||
yield batch
|
||||
|
||||
|
||||
class SageMakerEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Cohere text embedding model.
|
||||
"""
|
||||
|
||||
sagemaker_client: Any = None
|
||||
|
||||
def _sagemaker_embedding(self, sm_client, endpoint_name, content_list: list[str]):
|
||||
response_model = sm_client.invoke_endpoint(
|
||||
EndpointName=endpoint_name,
|
||||
Body=json.dumps({"inputs": content_list, "parameters": {}, "is_query": False, "instruction": ""}),
|
||||
ContentType="application/json",
|
||||
)
|
||||
json_str = response_model["Body"].read().decode("utf8")
|
||||
json_obj = json.loads(json_str)
|
||||
embeddings = json_obj["embeddings"]
|
||||
return embeddings
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# get model properties
|
||||
try:
|
||||
line = 1
|
||||
if not self.sagemaker_client:
|
||||
access_key = credentials.get("aws_access_key_id")
|
||||
secret_key = credentials.get("aws_secret_access_key")
|
||||
aws_region = credentials.get("aws_region")
|
||||
if aws_region:
|
||||
if access_key and secret_key:
|
||||
self.sagemaker_client = boto3.client(
|
||||
"sagemaker-runtime",
|
||||
aws_access_key_id=access_key,
|
||||
aws_secret_access_key=secret_key,
|
||||
region_name=aws_region,
|
||||
)
|
||||
else:
|
||||
self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region)
|
||||
else:
|
||||
self.sagemaker_client = boto3.client("sagemaker-runtime")
|
||||
|
||||
line = 2
|
||||
sagemaker_endpoint = credentials.get("sagemaker_endpoint")
|
||||
|
||||
line = 3
|
||||
truncated_texts = [item[:CONTEXT_SIZE] for item in texts]
|
||||
|
||||
batches = batch_generator((text for text in truncated_texts), batch_size=BATCH_SIZE)
|
||||
all_embeddings = []
|
||||
|
||||
line = 4
|
||||
for batch in batches:
|
||||
embeddings = self._sagemaker_embedding(self.sagemaker_client, sagemaker_endpoint, batch)
|
||||
all_embeddings.extend(embeddings)
|
||||
|
||||
line = 5
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
tokens=0, # It's not SAAS API, usage is meaningless
|
||||
)
|
||||
line = 6
|
||||
|
||||
return TextEmbeddingResult(embeddings=all_embeddings, usage=usage, model=model)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Exception {e}, line : {line}")
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return 0
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
print("validate_credentials ok....")
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
used to define customizable model schema
|
||||
"""
|
||||
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: CONTEXT_SIZE,
|
||||
ModelPropertyKey.MAX_CHUNKS: BATCH_SIZE,
|
||||
},
|
||||
parameter_rules=[],
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -1,28 +0,0 @@
|
||||
- Qwen/Qwen2.5-72B-Instruct
|
||||
- Qwen/Qwen2.5-32B-Instruct
|
||||
- Qwen/Qwen2.5-14B-Instruct
|
||||
- Qwen/Qwen2.5-7B-Instruct
|
||||
- Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
- Qwen/Qwen2.5-Math-72B-Instruct
|
||||
- Qwen/Qwen2-72B-Instruct
|
||||
- Qwen/Qwen2-57B-A14B-Instruct
|
||||
- Qwen/Qwen2-7B-Instruct
|
||||
- Qwen/Qwen2-1.5B-Instruct
|
||||
- deepseek-ai/DeepSeek-V2.5
|
||||
- deepseek-ai/DeepSeek-V2-Chat
|
||||
- deepseek-ai/DeepSeek-Coder-V2-Instruct
|
||||
- THUDM/glm-4-9b-chat
|
||||
- 01-ai/Yi-1.5-34B-Chat-16K
|
||||
- 01-ai/Yi-1.5-9B-Chat-16K
|
||||
- 01-ai/Yi-1.5-6B-Chat
|
||||
- internlm/internlm2_5-20b-chat
|
||||
- internlm/internlm2_5-7b-chat
|
||||
- meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- meta-llama/Meta-Llama-3-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3-8B-Instruct
|
||||
- google/gemma-2-27b-it
|
||||
- google/gemma-2-9b-it
|
||||
- mistralai/Mistral-7B-Instruct-v0.2
|
||||
- mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
@ -1,30 +0,0 @@
|
||||
model: internlm/internlm2_5-20b-chat
|
||||
label:
|
||||
en_US: internlm/internlm2_5-20b-chat
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '1'
|
||||
output: '1'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -1,46 +0,0 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
|
||||
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
|
||||
OAICompatEmbeddingModel,
|
||||
)
|
||||
|
||||
|
||||
class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
|
||||
"""
|
||||
Model class for Siliconflow text embedding model.
|
||||
"""
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
self._add_custom_parameters(credentials)
|
||||
super().validate_credentials(model, credentials)
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
self._add_custom_parameters(credentials)
|
||||
return super()._invoke(model, credentials, texts, user)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
self._add_custom_parameters(credentials)
|
||||
return super().get_num_tokens(model, credentials, texts)
|
||||
|
||||
@classmethod
|
||||
def _add_custom_parameters(cls, credentials: dict) -> None:
|
||||
credentials["endpoint_url"] = "https://api.siliconflow.cn/v1"
|
||||
@ -1,309 +0,0 @@
|
||||
import threading
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
PromptMessage,
|
||||
PromptMessageTool,
|
||||
SystemPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
|
||||
from ._client import SparkLLMClient
|
||||
|
||||
|
||||
class SparkLargeLanguageModel(LargeLanguageModel):
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke large language model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param tools: tools for tool calling
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
# invoke model
|
||||
return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user)
|
||||
|
||||
def get_num_tokens(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param tools: tools for tool calling
|
||||
:return:
|
||||
"""
|
||||
prompt = self._convert_messages_to_prompt(prompt_messages)
|
||||
|
||||
return self._get_num_tokens_by_gpt2(prompt)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._generate(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
prompt_messages=[
|
||||
UserPromptMessage(content="ping"),
|
||||
],
|
||||
model_parameters={
|
||||
"temperature": 0.5,
|
||||
},
|
||||
stream=False,
|
||||
)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke large language model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
extra_model_kwargs = {}
|
||||
if stop:
|
||||
extra_model_kwargs["stop_sequences"] = stop
|
||||
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
|
||||
client = SparkLLMClient(
|
||||
model=model,
|
||||
**credentials_kwargs,
|
||||
)
|
||||
|
||||
thread = threading.Thread(
|
||||
target=client.run,
|
||||
args=(
|
||||
[
|
||||
{"role": prompt_message.role.value, "content": prompt_message.content}
|
||||
for prompt_message in prompt_messages
|
||||
],
|
||||
user,
|
||||
model_parameters,
|
||||
stream,
|
||||
),
|
||||
)
|
||||
thread.start()
|
||||
|
||||
if stream:
|
||||
return self._handle_generate_stream_response(thread, model, credentials, client, prompt_messages)
|
||||
|
||||
return self._handle_generate_response(thread, model, credentials, client, prompt_messages)
|
||||
|
||||
def _handle_generate_response(
|
||||
self,
|
||||
thread: threading.Thread,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
client: SparkLLMClient,
|
||||
prompt_messages: list[PromptMessage],
|
||||
) -> LLMResult:
|
||||
"""
|
||||
Handle llm response
|
||||
|
||||
:param model: model name
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response
|
||||
"""
|
||||
completion = ""
|
||||
|
||||
for content in client.subscribe():
|
||||
if isinstance(content, dict):
|
||||
delta = content["data"]
|
||||
else:
|
||||
delta = content
|
||||
|
||||
completion += delta
|
||||
|
||||
thread.join()
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(content=completion)
|
||||
|
||||
# calculate num tokens
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
# transform response
|
||||
result = LLMResult(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
message=assistant_prompt_message,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _handle_generate_stream_response(
|
||||
self,
|
||||
thread: threading.Thread,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
client: SparkLLMClient,
|
||||
prompt_messages: list[PromptMessage],
|
||||
) -> Generator:
|
||||
"""
|
||||
Handle llm stream response
|
||||
|
||||
:param thread: thread
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response chunk generator result
|
||||
"""
|
||||
completion = ""
|
||||
for index, content in enumerate(client.subscribe()):
|
||||
if isinstance(content, dict):
|
||||
delta = content["data"]
|
||||
else:
|
||||
delta = content
|
||||
completion += delta
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=delta or "",
|
||||
)
|
||||
temp_assistant_prompt_message = AssistantPromptMessage(
|
||||
content=completion,
|
||||
)
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(index=index, message=assistant_prompt_message, usage=usage),
|
||||
)
|
||||
|
||||
thread.join()
|
||||
|
||||
def _to_credential_kwargs(self, credentials: dict) -> dict:
|
||||
"""
|
||||
Transform credentials to kwargs for model instance
|
||||
|
||||
:param credentials:
|
||||
:return:
|
||||
"""
|
||||
credentials_kwargs = {
|
||||
"app_id": credentials["app_id"],
|
||||
"api_secret": credentials["api_secret"],
|
||||
"api_key": credentials["api_key"],
|
||||
}
|
||||
|
||||
return credentials_kwargs
|
||||
|
||||
def _convert_one_message_to_text(self, message: PromptMessage) -> str:
|
||||
"""
|
||||
Convert a single message to a string.
|
||||
|
||||
:param message: PromptMessage to convert.
|
||||
:return: String representation of the message.
|
||||
"""
|
||||
human_prompt = "\n\nHuman:"
|
||||
ai_prompt = "\n\nAssistant:"
|
||||
content = message.content
|
||||
|
||||
if isinstance(message, UserPromptMessage):
|
||||
message_text = f"{human_prompt} {content}"
|
||||
elif isinstance(message, AssistantPromptMessage):
|
||||
message_text = f"{ai_prompt} {content}"
|
||||
elif isinstance(message, SystemPromptMessage):
|
||||
message_text = content
|
||||
else:
|
||||
raise ValueError(f"Got unknown type {message}")
|
||||
|
||||
return message_text
|
||||
|
||||
def _convert_messages_to_prompt(self, messages: list[PromptMessage]) -> str:
|
||||
"""
|
||||
Format a list of messages into a full prompt for the Anthropic model
|
||||
|
||||
:param messages: List of PromptMessage to combine.
|
||||
:return: Combined string with necessary human_prompt and ai_prompt tags.
|
||||
"""
|
||||
messages = messages.copy() # don't mutate the original list
|
||||
|
||||
text = "".join(self._convert_one_message_to_text(message) for message in messages)
|
||||
|
||||
# trim off the trailing ' ' that might come from the "Assistant: "
|
||||
return text.rstrip()
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [],
|
||||
InvokeServerUnavailableError: [],
|
||||
InvokeRateLimitError: [],
|
||||
InvokeAuthorizationError: [],
|
||||
InvokeBadRequestError: [],
|
||||
}
|
||||
@ -1,177 +0,0 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import dashscope
|
||||
import numpy as np
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import (
|
||||
EmbeddingUsage,
|
||||
TextEmbeddingResult,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import (
|
||||
TextEmbeddingModel,
|
||||
)
|
||||
from core.model_runtime.model_providers.tongyi._common import _CommonTongyi
|
||||
|
||||
|
||||
class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Tongyi text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
# Here token count is only an approximation based on the GPT2 tokenizer
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
embeddings_batch, embedding_used_tokens = self.embed_documents(
|
||||
credentials_kwargs=credentials_kwargs,
|
||||
model=model,
|
||||
texts=inputs[i : i + max_chunks],
|
||||
)
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
total_num_tokens = 0
|
||||
for text in texts:
|
||||
total_num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
return total_num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
|
||||
# call embedding model
|
||||
self.embed_documents(credentials_kwargs=credentials_kwargs, model=model, texts=["ping"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@staticmethod
|
||||
def embed_documents(credentials_kwargs: dict, model: str, texts: list[str]) -> tuple[list[list[float]], int]:
|
||||
"""Call out to Tongyi's embedding endpoint.
|
||||
|
||||
Args:
|
||||
credentials_kwargs: The credentials to use for the call.
|
||||
model: The model to use for embedding.
|
||||
texts: The list of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embeddings, one for each text, and tokens usage.
|
||||
"""
|
||||
embeddings = []
|
||||
embedding_used_tokens = 0
|
||||
for text in texts:
|
||||
response = dashscope.TextEmbedding.call(
|
||||
api_key=credentials_kwargs["dashscope_api_key"],
|
||||
model=model,
|
||||
input=text,
|
||||
text_type="document",
|
||||
)
|
||||
if response.output and "embeddings" in response.output and response.output["embeddings"]:
|
||||
data = response.output["embeddings"][0]
|
||||
if "embedding" in data:
|
||||
embeddings.append(data["embedding"])
|
||||
else:
|
||||
raise ValueError("Embedding data is missing in the response.")
|
||||
else:
|
||||
raise ValueError("Response output is missing or does not contain embeddings.")
|
||||
|
||||
if response.usage and "total_tokens" in response.usage:
|
||||
embedding_used_tokens += response.usage["total_tokens"]
|
||||
else:
|
||||
raise ValueError("Response usage is missing or does not contain total tokens.")
|
||||
|
||||
return [list(map(float, e)) for e in embeddings], embedding_used_tokens
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
price_type=PriceType.INPUT,
|
||||
tokens=tokens,
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,197 +0,0 @@
|
||||
import base64
|
||||
import time
|
||||
from collections.abc import Mapping
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from openai import OpenAI
|
||||
from tokenizers import Tokenizer
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.upstage._common import _CommonUpstage
|
||||
|
||||
|
||||
class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Upstage text embedding model.
|
||||
"""
|
||||
|
||||
def _get_tokenizer(self) -> Tokenizer:
|
||||
return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: str | None = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if user:
|
||||
extra_model_kwargs["user"] = user
|
||||
extra_model_kwargs["encoding_format"] = "base64"
|
||||
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
embeddings: list[list[float]] = [[] for _ in range(len(texts))]
|
||||
tokens = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
tokenizer = self._get_tokenizer()
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
token = tokenizer.encode(text, add_special_tokens=False).tokens
|
||||
for j in range(0, len(token), context_size):
|
||||
tokens += [token[j : j + context_size]]
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(tokens), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model,
|
||||
client=client,
|
||||
texts=tokens[i : i + max_chunks],
|
||||
extra_model_kwargs=extra_model_kwargs,
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
|
||||
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
|
||||
|
||||
for i in range(len(indices)):
|
||||
results[indices[i]].append(batched_embeddings[i])
|
||||
num_tokens_in_batch[indices[i]].append(len(tokens[i]))
|
||||
|
||||
for i in range(len(texts)):
|
||||
_result = results[i]
|
||||
if len(_result) == 0:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model,
|
||||
client=client,
|
||||
texts=[texts[i]],
|
||||
extra_model_kwargs=extra_model_kwargs,
|
||||
)
|
||||
used_tokens += embedding_used_tokens
|
||||
average = embeddings_batch[0]
|
||||
else:
|
||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
tokenizer = self._get_tokenizer()
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
|
||||
tokenizer = self._get_tokenizer()
|
||||
|
||||
total_num_tokens = 0
|
||||
for text in texts:
|
||||
# calculate the number of tokens in the encoded text
|
||||
tokenized_text = tokenizer.encode(text)
|
||||
total_num_tokens += len(tokenized_text)
|
||||
|
||||
return total_num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: Mapping) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
# call embedding model
|
||||
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _embedding_invoke(
|
||||
self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Invoke embedding model
|
||||
:param model: model name
|
||||
:param client: model client
|
||||
:param texts: texts to embed
|
||||
:param extra_model_kwargs: extra model kwargs
|
||||
:return: embeddings and used tokens
|
||||
"""
|
||||
response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
|
||||
|
||||
if "encoding_format" in extra_model_kwargs and extra_model_kwargs["encoding_format"] == "base64":
|
||||
return (
|
||||
[
|
||||
list(np.frombuffer(base64.b64decode(embedding.embedding), dtype=np.float32))
|
||||
for embedding in response.data
|
||||
],
|
||||
response.usage.total_tokens,
|
||||
)
|
||||
|
||||
return [data.embedding for data in response.data], response.usage.total_tokens
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
|
||||
)
|
||||
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,37 +0,0 @@
|
||||
model: gemini-1.5-flash-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,37 +0,0 @@
|
||||
model: gemini-1.5-flash-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,37 +0,0 @@
|
||||
model: gemini-1.5-pro-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,37 +0,0 @@
|
||||
model: gemini-1.5-pro-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,37 +0,0 @@
|
||||
model: gemini-flash-experimental
|
||||
label:
|
||||
en_US: Gemini Flash Experimental
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,37 +0,0 @@
|
||||
model: gemini-pro-experimental
|
||||
label:
|
||||
en_US: Gemini Pro Experimental
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,733 +0,0 @@
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union, cast
|
||||
|
||||
import google.auth.transport.requests
|
||||
import vertexai.generative_models as glm
|
||||
from anthropic import AnthropicVertex, Stream
|
||||
from anthropic.types import (
|
||||
ContentBlockDeltaEvent,
|
||||
Message,
|
||||
MessageDeltaEvent,
|
||||
MessageStartEvent,
|
||||
MessageStopEvent,
|
||||
MessageStreamEvent,
|
||||
)
|
||||
from google.api_core import exceptions
|
||||
from google.cloud import aiplatform
|
||||
from google.oauth2 import service_account
|
||||
from PIL import Image
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
ImagePromptMessageContent,
|
||||
PromptMessage,
|
||||
PromptMessageContentType,
|
||||
PromptMessageTool,
|
||||
SystemPromptMessage,
|
||||
TextPromptMessageContent,
|
||||
ToolPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VertexAiLargeLanguageModel(LargeLanguageModel):
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke large language model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param tools: tools for tool calling
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
# invoke anthropic models via anthropic official SDK
|
||||
if "claude" in model:
|
||||
return self._generate_anthropic(model, credentials, prompt_messages, model_parameters, stop, stream, user)
|
||||
# invoke Gemini model
|
||||
return self._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
|
||||
|
||||
def _generate_anthropic(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke Anthropic large language model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
# use Anthropic official SDK references
|
||||
# - https://github.com/anthropics/anthropic-sdk-python
|
||||
service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
|
||||
project_id = credentials["vertex_project_id"]
|
||||
SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
|
||||
token = ""
|
||||
|
||||
# get access token from service account credential
|
||||
if service_account_info:
|
||||
credentials = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
|
||||
request = google.auth.transport.requests.Request()
|
||||
credentials.refresh(request)
|
||||
token = credentials.token
|
||||
|
||||
# Vertex AI Anthropic Claude3 Opus model available in us-east5 region, Sonnet and Haiku available
|
||||
# in us-central1 region
|
||||
if "opus" in model or "claude-3-5-sonnet" in model:
|
||||
location = "us-east5"
|
||||
else:
|
||||
location = "us-central1"
|
||||
|
||||
# use access token to authenticate
|
||||
if token:
|
||||
client = AnthropicVertex(region=location, project_id=project_id, access_token=token)
|
||||
# When access token is empty, try to use the Google Cloud VM's built-in service account
|
||||
# or the GOOGLE_APPLICATION_CREDENTIALS environment variable
|
||||
else:
|
||||
client = AnthropicVertex(
|
||||
region=location,
|
||||
project_id=project_id,
|
||||
)
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if stop:
|
||||
extra_model_kwargs["stop_sequences"] = stop
|
||||
|
||||
system, prompt_message_dicts = self._convert_claude_prompt_messages(prompt_messages)
|
||||
|
||||
if system:
|
||||
extra_model_kwargs["system"] = system
|
||||
|
||||
response = client.messages.create(
|
||||
model=model, messages=prompt_message_dicts, stream=stream, **model_parameters, **extra_model_kwargs
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._handle_claude_stream_response(model, credentials, response, prompt_messages)
|
||||
|
||||
return self._handle_claude_response(model, credentials, response, prompt_messages)
|
||||
|
||||
def _handle_claude_response(
|
||||
self, model: str, credentials: dict, response: Message, prompt_messages: list[PromptMessage]
|
||||
) -> LLMResult:
|
||||
"""
|
||||
Handle llm chat response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: full response chunk generator result
|
||||
"""
|
||||
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(content=response.content[0].text)
|
||||
|
||||
# calculate num tokens
|
||||
if response.usage:
|
||||
# transform usage
|
||||
prompt_tokens = response.usage.input_tokens
|
||||
completion_tokens = response.usage.output_tokens
|
||||
else:
|
||||
# calculate num tokens
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
# transform response
|
||||
response = LLMResult(
|
||||
model=response.model, prompt_messages=prompt_messages, message=assistant_prompt_message, usage=usage
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _handle_claude_stream_response(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
response: Stream[MessageStreamEvent],
|
||||
prompt_messages: list[PromptMessage],
|
||||
) -> Generator:
|
||||
"""
|
||||
Handle llm chat stream response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
|
||||
try:
|
||||
full_assistant_content = ""
|
||||
return_model = None
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
finish_reason = None
|
||||
index = 0
|
||||
|
||||
for chunk in response:
|
||||
if isinstance(chunk, MessageStartEvent):
|
||||
return_model = chunk.message.model
|
||||
input_tokens = chunk.message.usage.input_tokens
|
||||
elif isinstance(chunk, MessageDeltaEvent):
|
||||
output_tokens = chunk.usage.output_tokens
|
||||
finish_reason = chunk.delta.stop_reason
|
||||
elif isinstance(chunk, MessageStopEvent):
|
||||
usage = self._calc_response_usage(model, credentials, input_tokens, output_tokens)
|
||||
yield LLMResultChunk(
|
||||
model=return_model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index + 1,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
),
|
||||
)
|
||||
elif isinstance(chunk, ContentBlockDeltaEvent):
|
||||
chunk_text = chunk.delta.text or ""
|
||||
full_assistant_content += chunk_text
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=chunk_text or "",
|
||||
)
|
||||
index = chunk.index
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index,
|
||||
message=assistant_prompt_message,
|
||||
),
|
||||
)
|
||||
except Exception as ex:
|
||||
raise InvokeError(str(ex))
|
||||
|
||||
def _calc_claude_response_usage(
|
||||
self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int
|
||||
) -> LLMUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_tokens: prompt tokens
|
||||
:param completion_tokens: completion tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get prompt price info
|
||||
prompt_price_info = self.get_price(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
price_type=PriceType.INPUT,
|
||||
tokens=prompt_tokens,
|
||||
)
|
||||
|
||||
# get completion price info
|
||||
completion_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.OUTPUT, tokens=completion_tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = LLMUsage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
prompt_unit_price=prompt_price_info.unit_price,
|
||||
prompt_price_unit=prompt_price_info.unit,
|
||||
prompt_price=prompt_price_info.total_amount,
|
||||
completion_tokens=completion_tokens,
|
||||
completion_unit_price=completion_price_info.unit_price,
|
||||
completion_price_unit=completion_price_info.unit,
|
||||
completion_price=completion_price_info.total_amount,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
total_price=prompt_price_info.total_amount + completion_price_info.total_amount,
|
||||
currency=prompt_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def _convert_claude_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
|
||||
"""
|
||||
Convert prompt messages to dict list and system
|
||||
"""
|
||||
|
||||
system = ""
|
||||
first_loop = True
|
||||
for message in prompt_messages:
|
||||
if isinstance(message, SystemPromptMessage):
|
||||
message.content = message.content.strip()
|
||||
if first_loop:
|
||||
system = message.content
|
||||
first_loop = False
|
||||
else:
|
||||
system += "\n"
|
||||
system += message.content
|
||||
|
||||
prompt_message_dicts = []
|
||||
for message in prompt_messages:
|
||||
if not isinstance(message, SystemPromptMessage):
|
||||
prompt_message_dicts.append(self._convert_claude_prompt_message_to_dict(message))
|
||||
|
||||
return system, prompt_message_dicts
|
||||
|
||||
def _convert_claude_prompt_message_to_dict(self, message: PromptMessage) -> dict:
|
||||
"""
|
||||
Convert PromptMessage to dict
|
||||
"""
|
||||
if isinstance(message, UserPromptMessage):
|
||||
message = cast(UserPromptMessage, message)
|
||||
if isinstance(message.content, str):
|
||||
message_dict = {"role": "user", "content": message.content}
|
||||
else:
|
||||
sub_messages = []
|
||||
for message_content in message.content:
|
||||
if message_content.type == PromptMessageContentType.TEXT:
|
||||
message_content = cast(TextPromptMessageContent, message_content)
|
||||
sub_message_dict = {"type": "text", "text": message_content.data}
|
||||
sub_messages.append(sub_message_dict)
|
||||
elif message_content.type == PromptMessageContentType.IMAGE:
|
||||
message_content = cast(ImagePromptMessageContent, message_content)
|
||||
if not message_content.data.startswith("data:"):
|
||||
# fetch image data from url
|
||||
try:
|
||||
image_content = requests.get(message_content.data).content
|
||||
with Image.open(io.BytesIO(image_content)) as img:
|
||||
mime_type = f"image/{img.format.lower()}"
|
||||
base64_data = base64.b64encode(image_content).decode("utf-8")
|
||||
except Exception as ex:
|
||||
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
|
||||
else:
|
||||
data_split = message_content.data.split(";base64,")
|
||||
mime_type = data_split[0].replace("data:", "")
|
||||
base64_data = data_split[1]
|
||||
|
||||
if mime_type not in {"image/jpeg", "image/png", "image/gif", "image/webp"}:
|
||||
raise ValueError(
|
||||
f"Unsupported image type {mime_type}, "
|
||||
f"only support image/jpeg, image/png, image/gif, and image/webp"
|
||||
)
|
||||
|
||||
sub_message_dict = {
|
||||
"type": "image",
|
||||
"source": {"type": "base64", "media_type": mime_type, "data": base64_data},
|
||||
}
|
||||
sub_messages.append(sub_message_dict)
|
||||
|
||||
message_dict = {"role": "user", "content": sub_messages}
|
||||
elif isinstance(message, AssistantPromptMessage):
|
||||
message = cast(AssistantPromptMessage, message)
|
||||
message_dict = {"role": "assistant", "content": message.content}
|
||||
elif isinstance(message, SystemPromptMessage):
|
||||
message = cast(SystemPromptMessage, message)
|
||||
message_dict = {"role": "system", "content": message.content}
|
||||
else:
|
||||
raise ValueError(f"Got unknown type {message}")
|
||||
|
||||
return message_dict
|
||||
|
||||
def get_num_tokens(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param tools: tools for tool calling
|
||||
:return:md = gml.GenerativeModel(model)
|
||||
"""
|
||||
prompt = self._convert_messages_to_prompt(prompt_messages)
|
||||
|
||||
return self._get_num_tokens_by_gpt2(prompt)
|
||||
|
||||
def _convert_messages_to_prompt(self, messages: list[PromptMessage]) -> str:
|
||||
"""
|
||||
Format a list of messages into a full prompt for the Google model
|
||||
|
||||
:param messages: List of PromptMessage to combine.
|
||||
:return: Combined string with necessary human_prompt and ai_prompt tags.
|
||||
"""
|
||||
messages = messages.copy() # don't mutate the original list
|
||||
|
||||
text = "".join(self._convert_one_message_to_text(message) for message in messages)
|
||||
|
||||
return text.rstrip()
|
||||
|
||||
def _convert_tools_to_glm_tool(self, tools: list[PromptMessageTool]) -> glm.Tool:
|
||||
"""
|
||||
Convert tool messages to glm tools
|
||||
|
||||
:param tools: tool messages
|
||||
:return: glm tools
|
||||
"""
|
||||
return glm.Tool(
|
||||
function_declarations=[
|
||||
glm.FunctionDeclaration(
|
||||
name=tool.name,
|
||||
parameters=glm.Schema(
|
||||
type=glm.Type.OBJECT,
|
||||
properties={
|
||||
key: {
|
||||
"type_": value.get("type", "string").upper(),
|
||||
"description": value.get("description", ""),
|
||||
"enum": value.get("enum", []),
|
||||
}
|
||||
for key, value in tool.parameters.get("properties", {}).items()
|
||||
},
|
||||
required=tool.parameters.get("required", []),
|
||||
),
|
||||
)
|
||||
for tool in tools
|
||||
]
|
||||
)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
|
||||
try:
|
||||
ping_message = SystemPromptMessage(content="ping")
|
||||
self._generate(model, credentials, [ping_message], {"max_tokens_to_sample": 5})
|
||||
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke large language model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials kwargs
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
config_kwargs = model_parameters.copy()
|
||||
config_kwargs["max_output_tokens"] = config_kwargs.pop("max_tokens_to_sample", None)
|
||||
|
||||
if stop:
|
||||
config_kwargs["stop_sequences"] = stop
|
||||
|
||||
service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
|
||||
project_id = credentials["vertex_project_id"]
|
||||
location = credentials["vertex_location"]
|
||||
if service_account_info:
|
||||
service_accountSA = service_account.Credentials.from_service_account_info(service_account_info)
|
||||
aiplatform.init(credentials=service_accountSA, project=project_id, location=location)
|
||||
else:
|
||||
aiplatform.init(project=project_id, location=location)
|
||||
|
||||
history = []
|
||||
system_instruction = ""
|
||||
# hack for gemini-pro-vision, which currently does not support multi-turn chat
|
||||
if model == "gemini-1.0-pro-vision-001":
|
||||
last_msg = prompt_messages[-1]
|
||||
content = self._format_message_to_glm_content(last_msg)
|
||||
history.append(content)
|
||||
else:
|
||||
for msg in prompt_messages:
|
||||
if isinstance(msg, SystemPromptMessage):
|
||||
system_instruction = msg.content
|
||||
else:
|
||||
content = self._format_message_to_glm_content(msg)
|
||||
if history and history[-1].role == content.role:
|
||||
history[-1].parts.extend(content.parts)
|
||||
else:
|
||||
history.append(content)
|
||||
|
||||
google_model = glm.GenerativeModel(model_name=model, system_instruction=system_instruction)
|
||||
|
||||
response = google_model.generate_content(
|
||||
contents=history,
|
||||
generation_config=glm.GenerationConfig(**config_kwargs),
|
||||
stream=stream,
|
||||
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
|
||||
|
||||
return self._handle_generate_response(model, credentials, response, prompt_messages)
|
||||
|
||||
def _handle_generate_response(
|
||||
self, model: str, credentials: dict, response: glm.GenerationResponse, prompt_messages: list[PromptMessage]
|
||||
) -> LLMResult:
|
||||
"""
|
||||
Handle llm response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response
|
||||
"""
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(content=response.candidates[0].content.parts[0].text)
|
||||
|
||||
# calculate num tokens
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
# transform response
|
||||
result = LLMResult(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
message=assistant_prompt_message,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _handle_generate_stream_response(
|
||||
self, model: str, credentials: dict, response: glm.GenerationResponse, prompt_messages: list[PromptMessage]
|
||||
) -> Generator:
|
||||
"""
|
||||
Handle llm stream response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response chunk generator result
|
||||
"""
|
||||
index = -1
|
||||
for chunk in response:
|
||||
for part in chunk.candidates[0].content.parts:
|
||||
assistant_prompt_message = AssistantPromptMessage(content="")
|
||||
|
||||
if part.text:
|
||||
assistant_prompt_message.content += part.text
|
||||
|
||||
if part.function_call:
|
||||
assistant_prompt_message.tool_calls = [
|
||||
AssistantPromptMessage.ToolCall(
|
||||
id=part.function_call.name,
|
||||
type="function",
|
||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(
|
||||
name=part.function_call.name,
|
||||
arguments=json.dumps(dict(part.function_call.args.items())),
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
index += 1
|
||||
|
||||
if not hasattr(chunk, "finish_reason") or not chunk.finish_reason:
|
||||
# transform assistant message to prompt message
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(index=index, message=assistant_prompt_message),
|
||||
)
|
||||
else:
|
||||
# calculate num tokens
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index,
|
||||
message=assistant_prompt_message,
|
||||
finish_reason=chunk.candidates[0].finish_reason,
|
||||
usage=usage,
|
||||
),
|
||||
)
|
||||
|
||||
def _convert_one_message_to_text(self, message: PromptMessage) -> str:
|
||||
"""
|
||||
Convert a single message to a string.
|
||||
|
||||
:param message: PromptMessage to convert.
|
||||
:return: String representation of the message.
|
||||
"""
|
||||
human_prompt = "\n\nuser:"
|
||||
ai_prompt = "\n\nmodel:"
|
||||
|
||||
content = message.content
|
||||
if isinstance(content, list):
|
||||
content = "".join(c.data for c in content if c.type != PromptMessageContentType.IMAGE)
|
||||
|
||||
if isinstance(message, UserPromptMessage):
|
||||
message_text = f"{human_prompt} {content}"
|
||||
elif isinstance(message, AssistantPromptMessage):
|
||||
message_text = f"{ai_prompt} {content}"
|
||||
elif isinstance(message, SystemPromptMessage | ToolPromptMessage):
|
||||
message_text = f"{human_prompt} {content}"
|
||||
else:
|
||||
raise ValueError(f"Got unknown type {message}")
|
||||
|
||||
return message_text
|
||||
|
||||
def _format_message_to_glm_content(self, message: PromptMessage) -> glm.Content:
|
||||
"""
|
||||
Format a single message into glm.Content for Google API
|
||||
|
||||
:param message: one PromptMessage
|
||||
:return: glm Content representation of message
|
||||
"""
|
||||
if isinstance(message, UserPromptMessage):
|
||||
glm_content = glm.Content(role="user", parts=[])
|
||||
|
||||
if isinstance(message.content, str):
|
||||
glm_content = glm.Content(role="user", parts=[glm.Part.from_text(message.content)])
|
||||
else:
|
||||
parts = []
|
||||
for c in message.content:
|
||||
if c.type == PromptMessageContentType.TEXT:
|
||||
parts.append(glm.Part.from_text(c.data))
|
||||
else:
|
||||
metadata, data = c.data.split(",", 1)
|
||||
mime_type = metadata.split(";", 1)[0].split(":")[1]
|
||||
parts.append(glm.Part.from_data(mime_type=mime_type, data=data))
|
||||
glm_content = glm.Content(role="user", parts=parts)
|
||||
return glm_content
|
||||
elif isinstance(message, AssistantPromptMessage):
|
||||
if message.content:
|
||||
glm_content = glm.Content(role="model", parts=[glm.Part.from_text(message.content)])
|
||||
if message.tool_calls:
|
||||
glm_content = glm.Content(
|
||||
role="model",
|
||||
parts=[
|
||||
glm.Part.from_function_response(
|
||||
glm.FunctionCall(
|
||||
name=message.tool_calls[0].function.name,
|
||||
args=json.loads(message.tool_calls[0].function.arguments),
|
||||
)
|
||||
)
|
||||
],
|
||||
)
|
||||
return glm_content
|
||||
elif isinstance(message, ToolPromptMessage):
|
||||
glm_content = glm.Content(
|
||||
role="function",
|
||||
parts=[
|
||||
glm.Part(
|
||||
function_response=glm.FunctionResponse(
|
||||
name=message.name, response={"response": message.content}
|
||||
)
|
||||
)
|
||||
],
|
||||
)
|
||||
return glm_content
|
||||
else:
|
||||
raise ValueError(f"Got unknown type {message}")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the ermd = gml.GenerativeModel(model) error type thrown to the caller
|
||||
The value is the md = gml.GenerativeModel(model) error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke emd = gml.GenerativeModel(model) error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [exceptions.RetryError],
|
||||
InvokeServerUnavailableError: [
|
||||
exceptions.ServiceUnavailable,
|
||||
exceptions.InternalServerError,
|
||||
exceptions.BadGateway,
|
||||
exceptions.GatewayTimeout,
|
||||
exceptions.DeadlineExceeded,
|
||||
],
|
||||
InvokeRateLimitError: [exceptions.ResourceExhausted, exceptions.TooManyRequests],
|
||||
InvokeAuthorizationError: [
|
||||
exceptions.Unauthenticated,
|
||||
exceptions.PermissionDenied,
|
||||
exceptions.Unauthenticated,
|
||||
exceptions.Forbidden,
|
||||
],
|
||||
InvokeBadRequestError: [
|
||||
exceptions.BadRequest,
|
||||
exceptions.InvalidArgument,
|
||||
exceptions.FailedPrecondition,
|
||||
exceptions.OutOfRange,
|
||||
exceptions.NotFound,
|
||||
exceptions.MethodNotAllowed,
|
||||
exceptions.Conflict,
|
||||
exceptions.AlreadyExists,
|
||||
exceptions.Aborted,
|
||||
exceptions.LengthRequired,
|
||||
exceptions.PreconditionFailed,
|
||||
exceptions.RequestRangeNotSatisfiable,
|
||||
exceptions.Cancelled,
|
||||
],
|
||||
}
|
||||
@ -1,187 +0,0 @@
|
||||
import base64
|
||||
import json
|
||||
import time
|
||||
from decimal import Decimal
|
||||
from typing import Optional
|
||||
|
||||
import tiktoken
|
||||
from google.cloud import aiplatform
|
||||
from google.oauth2 import service_account
|
||||
from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
FetchFrom,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
PriceConfig,
|
||||
PriceType,
|
||||
)
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.vertex_ai._common import _CommonVertexAi
|
||||
|
||||
|
||||
class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Vertex AI text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
|
||||
project_id = credentials["vertex_project_id"]
|
||||
location = credentials["vertex_location"]
|
||||
if service_account_info:
|
||||
service_accountSA = service_account.Credentials.from_service_account_info(service_account_info)
|
||||
aiplatform.init(credentials=service_accountSA, project=project_id, location=location)
|
||||
else:
|
||||
aiplatform.init(project=project_id, location=location)
|
||||
|
||||
client = VertexTextEmbeddingModel.from_pretrained(model)
|
||||
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(client=client, texts=texts)
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=embedding_used_tokens)
|
||||
|
||||
return TextEmbeddingResult(embeddings=embeddings_batch, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
|
||||
try:
|
||||
enc = tiktoken.encoding_for_model(model)
|
||||
except KeyError:
|
||||
enc = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
total_num_tokens = 0
|
||||
for text in texts:
|
||||
# calculate the number of tokens in the encoded text
|
||||
tokenized_text = enc.encode(text)
|
||||
total_num_tokens += len(tokenized_text)
|
||||
|
||||
return total_num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
|
||||
project_id = credentials["vertex_project_id"]
|
||||
location = credentials["vertex_location"]
|
||||
if service_account_info:
|
||||
service_accountSA = service_account.Credentials.from_service_account_info(service_account_info)
|
||||
aiplatform.init(credentials=service_accountSA, project=project_id, location=location)
|
||||
else:
|
||||
aiplatform.init(project=project_id, location=location)
|
||||
|
||||
client = VertexTextEmbeddingModel.from_pretrained(model)
|
||||
|
||||
# call embedding model
|
||||
self._embedding_invoke(model=model, client=client, texts=["ping"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _embedding_invoke(self, client: VertexTextEmbeddingModel, texts: list[str]) -> [list[float], int]: # type: ignore
|
||||
"""
|
||||
Invoke embedding model
|
||||
|
||||
:param model: model name
|
||||
:param client: model client
|
||||
:param texts: texts to embed
|
||||
:return: embeddings and used tokens
|
||||
"""
|
||||
response = client.get_embeddings(texts)
|
||||
|
||||
embeddings = []
|
||||
token_usage = 0
|
||||
|
||||
for i in range(len(response)):
|
||||
embeddings.append(response[i].values)
|
||||
token_usage += int(response[i].statistics.token_count)
|
||||
|
||||
return embeddings, token_usage
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size")),
|
||||
ModelPropertyKey.MAX_CHUNKS: 1,
|
||||
},
|
||||
parameter_rules=[],
|
||||
pricing=PriceConfig(
|
||||
input=Decimal(credentials.get("input_price", 0)),
|
||||
unit=Decimal(credentials.get("unit", 0)),
|
||||
currency=credentials.get("currency", "USD"),
|
||||
),
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -1,198 +0,0 @@
|
||||
import time
|
||||
from decimal import Decimal
|
||||
from typing import Optional
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
FetchFrom,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
PriceConfig,
|
||||
PriceType,
|
||||
)
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.volcengine_maas.client import ArkClientV3
|
||||
from core.model_runtime.model_providers.volcengine_maas.legacy.client import MaaSClient
|
||||
from core.model_runtime.model_providers.volcengine_maas.legacy.errors import (
|
||||
AuthErrors,
|
||||
BadRequestErrors,
|
||||
ConnectionErrors,
|
||||
MaasError,
|
||||
RateLimitErrors,
|
||||
ServerUnavailableErrors,
|
||||
)
|
||||
from core.model_runtime.model_providers.volcengine_maas.text_embedding.models import get_model_config
|
||||
|
||||
|
||||
class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for VolcengineMaaS text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
if ArkClientV3.is_legacy(credentials):
|
||||
return self._generate_v2(model, credentials, texts, user)
|
||||
|
||||
return self._generate_v3(model, credentials, texts, user)
|
||||
|
||||
def _generate_v2(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
) -> TextEmbeddingResult:
|
||||
client = MaaSClient.from_credential(credentials)
|
||||
resp = MaaSClient.wrap_exception(lambda: client.embeddings(texts))
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=resp["usage"]["total_tokens"])
|
||||
|
||||
result = TextEmbeddingResult(model=model, embeddings=[v["embedding"] for v in resp["data"]], usage=usage)
|
||||
|
||||
return result
|
||||
|
||||
def _generate_v3(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
) -> TextEmbeddingResult:
|
||||
client = ArkClientV3.from_credentials(credentials)
|
||||
resp = client.embeddings(texts)
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=resp.usage.total_tokens)
|
||||
|
||||
result = TextEmbeddingResult(model=model, embeddings=[v.embedding for v in resp.data], usage=usage)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use GPT2Tokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
if ArkClientV3.is_legacy(credentials):
|
||||
return self._validate_credentials_v2(model, credentials)
|
||||
return self._validate_credentials_v3(model, credentials)
|
||||
|
||||
def _validate_credentials_v2(self, model: str, credentials: dict) -> None:
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except MaasError as e:
|
||||
raise CredentialsValidateFailedError(e.message)
|
||||
|
||||
def _validate_credentials_v3(self, model: str, credentials: dict) -> None:
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(e)
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: ConnectionErrors.values(),
|
||||
InvokeServerUnavailableError: ServerUnavailableErrors.values(),
|
||||
InvokeRateLimitError: RateLimitErrors.values(),
|
||||
InvokeAuthorizationError: AuthErrors.values(),
|
||||
InvokeBadRequestError: BadRequestErrors.values(),
|
||||
}
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
"""
|
||||
generate custom model entities from credentials
|
||||
"""
|
||||
model_config = get_model_config(credentials)
|
||||
model_properties = {
|
||||
ModelPropertyKey.CONTEXT_SIZE: model_config.properties.context_size,
|
||||
ModelPropertyKey.MAX_CHUNKS: model_config.properties.max_chunks,
|
||||
}
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties=model_properties,
|
||||
parameter_rules=[],
|
||||
pricing=PriceConfig(
|
||||
input=Decimal(credentials.get("input_price", 0)),
|
||||
unit=Decimal(credentials.get("unit", 0)),
|
||||
currency=credentials.get("currency", "USD"),
|
||||
),
|
||||
)
|
||||
|
||||
return entity
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,187 +0,0 @@
|
||||
import time
|
||||
from abc import abstractmethod
|
||||
from collections.abc import Mapping
|
||||
from json import dumps
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
from requests import Response, post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import InvokeError
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.wenxin._common import BaiduAccessToken, _CommonWenxin
|
||||
from core.model_runtime.model_providers.wenxin.wenxin_errors import (
|
||||
BadRequestError,
|
||||
InternalServerError,
|
||||
invoke_error_mapping,
|
||||
)
|
||||
|
||||
|
||||
class TextEmbedding:
|
||||
@abstractmethod
|
||||
def embed_documents(self, model: str, texts: list[str], user: str) -> (list[list[float]], int, int):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class WenxinTextEmbedding(_CommonWenxin, TextEmbedding):
|
||||
def embed_documents(self, model: str, texts: list[str], user: str) -> (list[list[float]], int, int):
|
||||
access_token = self._get_access_token()
|
||||
url = f"{self.api_bases[model]}?access_token={access_token}"
|
||||
body = self._build_embed_request_body(model, texts, user)
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
resp = post(url, data=dumps(body), headers=headers)
|
||||
if resp.status_code != 200:
|
||||
raise InternalServerError(f"Failed to invoke ernie bot: {resp.text}")
|
||||
return self._handle_embed_response(model, resp)
|
||||
|
||||
def _build_embed_request_body(self, model: str, texts: list[str], user: str) -> dict[str, Any]:
|
||||
if len(texts) == 0:
|
||||
raise BadRequestError("The number of texts should not be zero.")
|
||||
body = {
|
||||
"input": texts,
|
||||
"user_id": user,
|
||||
}
|
||||
return body
|
||||
|
||||
def _handle_embed_response(self, model: str, response: Response) -> (list[list[float]], int, int):
|
||||
data = response.json()
|
||||
if "error_code" in data:
|
||||
code = data["error_code"]
|
||||
msg = data["error_msg"]
|
||||
# raise error
|
||||
self._handle_error(code, msg)
|
||||
|
||||
embeddings = [v["embedding"] for v in data["data"]]
|
||||
_usage = data["usage"]
|
||||
tokens = _usage["prompt_tokens"]
|
||||
total_tokens = _usage["total_tokens"]
|
||||
|
||||
return embeddings, tokens, total_tokens
|
||||
|
||||
|
||||
class WenxinTextEmbeddingModel(TextEmbeddingModel):
|
||||
def _create_text_embedding(self, api_key: str, secret_key: str) -> TextEmbedding:
|
||||
return WenxinTextEmbedding(api_key, secret_key)
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
api_key = credentials["api_key"]
|
||||
secret_key = credentials["secret_key"]
|
||||
embedding: TextEmbedding = self._create_text_embedding(api_key, secret_key)
|
||||
user = user or "ErnieBotDefault"
|
||||
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
used_total_tokens = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
# Here token count is only an approximation based on the GPT2 tokenizer
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
for i in _iter:
|
||||
embeddings_batch, _used_tokens, _total_used_tokens = embedding.embed_documents(
|
||||
model, inputs[i : i + max_chunks], user
|
||||
)
|
||||
used_tokens += _used_tokens
|
||||
used_total_tokens += _total_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
usage = self._calc_response_usage(model, credentials, used_tokens, used_total_tokens)
|
||||
return TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=batched_embeddings,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
total_num_tokens = 0
|
||||
for text in texts:
|
||||
total_num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
return total_num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: Mapping) -> None:
|
||||
api_key = credentials["api_key"]
|
||||
secret_key = credentials["secret_key"]
|
||||
try:
|
||||
BaiduAccessToken.get_access_token(api_key, secret_key)
|
||||
except Exception as e:
|
||||
raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return invoke_error_mapping()
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int, total_tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=total_tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -1,204 +0,0 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.xinference.xinference_helper import XinferenceHelper
|
||||
|
||||
|
||||
class XinferenceTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Xinference text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
credentials should be like:
|
||||
{
|
||||
'server_url': 'server url',
|
||||
'model_uid': 'model uid',
|
||||
}
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
server_url = credentials["server_url"]
|
||||
model_uid = credentials["model_uid"]
|
||||
api_key = credentials.get("api_key")
|
||||
server_url = server_url.removesuffix("/")
|
||||
auth_headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
||||
|
||||
try:
|
||||
handle = RESTfulEmbeddingModelHandle(model_uid, server_url, auth_headers)
|
||||
embeddings = handle.create_embedding(input=texts)
|
||||
except RuntimeError as e:
|
||||
raise InvokeServerUnavailableError(str(e))
|
||||
|
||||
"""
|
||||
for convenience, the response json is like:
|
||||
class Embedding(TypedDict):
|
||||
object: Literal["list"]
|
||||
model: str
|
||||
data: List[EmbeddingData]
|
||||
usage: EmbeddingUsage
|
||||
class EmbeddingUsage(TypedDict):
|
||||
prompt_tokens: int
|
||||
total_tokens: int
|
||||
class EmbeddingData(TypedDict):
|
||||
index: int
|
||||
object: str
|
||||
embedding: List[float]
|
||||
"""
|
||||
|
||||
usage = embeddings["usage"]
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model, embeddings=[embedding["embedding"] for embedding in embeddings["data"]], usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use GPT2Tokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
if "/" in credentials["model_uid"] or "?" in credentials["model_uid"] or "#" in credentials["model_uid"]:
|
||||
raise CredentialsValidateFailedError("model_uid should not contain /, ?, or #")
|
||||
|
||||
server_url = credentials["server_url"]
|
||||
model_uid = credentials["model_uid"]
|
||||
api_key = credentials.get("api_key")
|
||||
extra_args = XinferenceHelper.get_xinference_extra_parameter(
|
||||
server_url=server_url,
|
||||
model_uid=model_uid,
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
if extra_args.max_tokens:
|
||||
credentials["max_tokens"] = extra_args.max_tokens
|
||||
server_url = server_url.removesuffix("/")
|
||||
|
||||
client = Client(
|
||||
base_url=server_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
try:
|
||||
handle = client.get_model(model_uid=model_uid)
|
||||
except RuntimeError as e:
|
||||
raise InvokeAuthorizationError(e)
|
||||
|
||||
if not isinstance(handle, RESTfulEmbeddingModelHandle):
|
||||
raise InvokeBadRequestError(
|
||||
"please check model type, the model you want to invoke is not a text embedding model"
|
||||
)
|
||||
|
||||
self._invoke(model=model, credentials=credentials, texts=["ping"])
|
||||
except InvokeAuthorizationError as e:
|
||||
raise CredentialsValidateFailedError(f"Failed to validate credentials for model {model}: {e}")
|
||||
except RuntimeError as e:
|
||||
raise CredentialsValidateFailedError(e)
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
used to define customizable model schema
|
||||
"""
|
||||
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model_properties={
|
||||
ModelPropertyKey.MAX_CHUNKS: 1,
|
||||
ModelPropertyKey.CONTEXT_SIZE: "max_tokens" in credentials and credentials["max_tokens"] or 512,
|
||||
},
|
||||
parameter_rules=[],
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -1,357 +0,0 @@
|
||||
import hashlib
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import site
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unicodedata
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
|
||||
import chardet
|
||||
import cloudscraper
|
||||
from bs4 import BeautifulSoup, CData, Comment, NavigableString
|
||||
from regex import regex
|
||||
|
||||
from core.helper import ssrf_proxy
|
||||
from core.rag.extractor import extract_processor
|
||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||
|
||||
FULL_TEMPLATE = """
|
||||
TITLE: {title}
|
||||
AUTHORS: {authors}
|
||||
PUBLISH DATE: {publish_date}
|
||||
TOP_IMAGE_URL: {top_image}
|
||||
TEXT:
|
||||
|
||||
{text}
|
||||
"""
|
||||
|
||||
|
||||
def page_result(text: str, cursor: int, max_length: int) -> str:
|
||||
"""Page through `text` and return a substring of `max_length` characters starting from `cursor`."""
|
||||
return text[cursor : cursor + max_length]
|
||||
|
||||
|
||||
def get_url(url: str, user_agent: str | None = None) -> str:
|
||||
"""Fetch URL and return the contents as a string."""
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
|
||||
" Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
if user_agent:
|
||||
headers["User-Agent"] = user_agent
|
||||
|
||||
main_content_type = None
|
||||
supported_content_types = extract_processor.SUPPORT_URL_CONTENT_TYPES + ["text/html"]
|
||||
response = ssrf_proxy.head(url, headers=headers, follow_redirects=True, timeout=(5, 10))
|
||||
|
||||
if response.status_code == 200:
|
||||
# check content-type
|
||||
content_type = response.headers.get("Content-Type")
|
||||
if content_type:
|
||||
main_content_type = response.headers.get("Content-Type").split(";")[0].strip()
|
||||
else:
|
||||
content_disposition = response.headers.get("Content-Disposition", "")
|
||||
filename_match = re.search(r'filename="([^"]+)"', content_disposition)
|
||||
if filename_match:
|
||||
filename = unquote(filename_match.group(1))
|
||||
extension = re.search(r"\.(\w+)$", filename)
|
||||
if extension:
|
||||
main_content_type = mimetypes.guess_type(filename)[0]
|
||||
|
||||
if main_content_type not in supported_content_types:
|
||||
return "Unsupported content-type [{}] of URL.".format(main_content_type)
|
||||
|
||||
if main_content_type in extract_processor.SUPPORT_URL_CONTENT_TYPES:
|
||||
return ExtractProcessor.load_from_url(url, return_text=True)
|
||||
|
||||
response = ssrf_proxy.get(url, headers=headers, follow_redirects=True, timeout=(120, 300))
|
||||
elif response.status_code == 403:
|
||||
scraper = cloudscraper.create_scraper()
|
||||
scraper.perform_request = ssrf_proxy.make_request
|
||||
response = scraper.get(url, headers=headers, follow_redirects=True, timeout=(120, 300))
|
||||
|
||||
if response.status_code != 200:
|
||||
return "URL returned status code {}.".format(response.status_code)
|
||||
|
||||
# Detect encoding using chardet
|
||||
detected_encoding = chardet.detect(response.content)
|
||||
encoding = detected_encoding["encoding"]
|
||||
if encoding:
|
||||
try:
|
||||
content = response.content.decode(encoding)
|
||||
except (UnicodeDecodeError, TypeError):
|
||||
content = response.text
|
||||
else:
|
||||
content = response.text
|
||||
|
||||
a = extract_using_readabilipy(content)
|
||||
|
||||
if not a["plain_text"] or not a["plain_text"].strip():
|
||||
return ""
|
||||
|
||||
res = FULL_TEMPLATE.format(
|
||||
title=a["title"],
|
||||
authors=a["byline"],
|
||||
publish_date=a["date"],
|
||||
top_image="",
|
||||
text=a["plain_text"] or "",
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def extract_using_readabilipy(html):
|
||||
with tempfile.NamedTemporaryFile(delete=False, mode="w+") as f_html:
|
||||
f_html.write(html)
|
||||
f_html.close()
|
||||
html_path = f_html.name
|
||||
|
||||
# Call Mozilla's Readability.js Readability.parse() function via node, writing output to a temporary file
|
||||
article_json_path = html_path + ".json"
|
||||
jsdir = os.path.join(find_module_path("readabilipy"), "javascript")
|
||||
with chdir(jsdir):
|
||||
subprocess.check_call(["node", "ExtractArticle.js", "-i", html_path, "-o", article_json_path])
|
||||
|
||||
# Read output of call to Readability.parse() from JSON file and return as Python dictionary
|
||||
input_json = json.loads(Path(article_json_path).read_text(encoding="utf-8"))
|
||||
|
||||
# Deleting files after processing
|
||||
os.unlink(article_json_path)
|
||||
os.unlink(html_path)
|
||||
|
||||
article_json = {
|
||||
"title": None,
|
||||
"byline": None,
|
||||
"date": None,
|
||||
"content": None,
|
||||
"plain_content": None,
|
||||
"plain_text": None,
|
||||
}
|
||||
# Populate article fields from readability fields where present
|
||||
if input_json:
|
||||
if input_json.get("title"):
|
||||
article_json["title"] = input_json["title"]
|
||||
if input_json.get("byline"):
|
||||
article_json["byline"] = input_json["byline"]
|
||||
if input_json.get("date"):
|
||||
article_json["date"] = input_json["date"]
|
||||
if input_json.get("content"):
|
||||
article_json["content"] = input_json["content"]
|
||||
article_json["plain_content"] = plain_content(article_json["content"], False, False)
|
||||
article_json["plain_text"] = extract_text_blocks_as_plain_text(article_json["plain_content"])
|
||||
if input_json.get("textContent"):
|
||||
article_json["plain_text"] = input_json["textContent"]
|
||||
article_json["plain_text"] = re.sub(r"\n\s*\n", "\n", article_json["plain_text"])
|
||||
|
||||
return article_json
|
||||
|
||||
|
||||
def find_module_path(module_name):
|
||||
for package_path in site.getsitepackages():
|
||||
potential_path = os.path.join(package_path, module_name)
|
||||
if os.path.exists(potential_path):
|
||||
return potential_path
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@contextmanager
|
||||
def chdir(path):
|
||||
"""Change directory in context and return to original on exit"""
|
||||
# From https://stackoverflow.com/a/37996581, couldn't find a built-in
|
||||
original_path = os.getcwd()
|
||||
os.chdir(path)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
os.chdir(original_path)
|
||||
|
||||
|
||||
def extract_text_blocks_as_plain_text(paragraph_html):
|
||||
# Load article as DOM
|
||||
soup = BeautifulSoup(paragraph_html, "html.parser")
|
||||
# Select all lists
|
||||
list_elements = soup.find_all(["ul", "ol"])
|
||||
# Prefix text in all list items with "* " and make lists paragraphs
|
||||
for list_element in list_elements:
|
||||
plain_items = "".join(
|
||||
list(filter(None, [plain_text_leaf_node(li)["text"] for li in list_element.find_all("li")]))
|
||||
)
|
||||
list_element.string = plain_items
|
||||
list_element.name = "p"
|
||||
# Select all text blocks
|
||||
text_blocks = [s.parent for s in soup.find_all(string=True)]
|
||||
text_blocks = [plain_text_leaf_node(block) for block in text_blocks]
|
||||
# Drop empty paragraphs
|
||||
text_blocks = list(filter(lambda p: p["text"] is not None, text_blocks))
|
||||
return text_blocks
|
||||
|
||||
|
||||
def plain_text_leaf_node(element):
|
||||
# Extract all text, stripped of any child HTML elements and normalize it
|
||||
plain_text = normalize_text(element.get_text())
|
||||
if plain_text != "" and element.name == "li":
|
||||
plain_text = "* {}, ".format(plain_text)
|
||||
if plain_text == "":
|
||||
plain_text = None
|
||||
if "data-node-index" in element.attrs:
|
||||
plain = {"node_index": element["data-node-index"], "text": plain_text}
|
||||
else:
|
||||
plain = {"text": plain_text}
|
||||
return plain
|
||||
|
||||
|
||||
def plain_content(readability_content, content_digests, node_indexes):
|
||||
# Load article as DOM
|
||||
soup = BeautifulSoup(readability_content, "html.parser")
|
||||
# Make all elements plain
|
||||
elements = plain_elements(soup.contents, content_digests, node_indexes)
|
||||
if node_indexes:
|
||||
# Add node index attributes to nodes
|
||||
elements = [add_node_indexes(element) for element in elements]
|
||||
# Replace article contents with plain elements
|
||||
soup.contents = elements
|
||||
return str(soup)
|
||||
|
||||
|
||||
def plain_elements(elements, content_digests, node_indexes):
|
||||
# Get plain content versions of all elements
|
||||
elements = [plain_element(element, content_digests, node_indexes) for element in elements]
|
||||
if content_digests:
|
||||
# Add content digest attribute to nodes
|
||||
elements = [add_content_digest(element) for element in elements]
|
||||
return elements
|
||||
|
||||
|
||||
def plain_element(element, content_digests, node_indexes):
|
||||
# For lists, we make each item plain text
|
||||
if is_leaf(element):
|
||||
# For leaf node elements, extract the text content, discarding any HTML tags
|
||||
# 1. Get element contents as text
|
||||
plain_text = element.get_text()
|
||||
# 2. Normalize the extracted text string to a canonical representation
|
||||
plain_text = normalize_text(plain_text)
|
||||
# 3. Update element content to be plain text
|
||||
element.string = plain_text
|
||||
elif is_text(element):
|
||||
if is_non_printing(element):
|
||||
# The simplified HTML may have come from Readability.js so might
|
||||
# have non-printing text (e.g. Comment or CData). In this case, we
|
||||
# keep the structure, but ensure that the string is empty.
|
||||
element = type(element)("")
|
||||
else:
|
||||
plain_text = element.string
|
||||
plain_text = normalize_text(plain_text)
|
||||
element = type(element)(plain_text)
|
||||
else:
|
||||
# If not a leaf node or leaf type call recursively on child nodes, replacing
|
||||
element.contents = plain_elements(element.contents, content_digests, node_indexes)
|
||||
return element
|
||||
|
||||
|
||||
def add_node_indexes(element, node_index="0"):
|
||||
# Can't add attributes to string types
|
||||
if is_text(element):
|
||||
return element
|
||||
# Add index to current element
|
||||
element["data-node-index"] = node_index
|
||||
# Add index to child elements
|
||||
for local_idx, child in enumerate([c for c in element.contents if not is_text(c)], start=1):
|
||||
# Can't add attributes to leaf string types
|
||||
child_index = "{stem}.{local}".format(stem=node_index, local=local_idx)
|
||||
add_node_indexes(child, node_index=child_index)
|
||||
return element
|
||||
|
||||
|
||||
def normalize_text(text):
|
||||
"""Normalize unicode and whitespace."""
|
||||
# Normalize unicode first to try and standardize whitespace characters as much as possible before normalizing them
|
||||
text = strip_control_characters(text)
|
||||
text = normalize_unicode(text)
|
||||
text = normalize_whitespace(text)
|
||||
return text
|
||||
|
||||
|
||||
def strip_control_characters(text):
|
||||
"""Strip out unicode control characters which might break the parsing."""
|
||||
# Unicode control characters
|
||||
# [Cc]: Other, Control [includes new lines]
|
||||
# [Cf]: Other, Format
|
||||
# [Cn]: Other, Not Assigned
|
||||
# [Co]: Other, Private Use
|
||||
# [Cs]: Other, Surrogate
|
||||
control_chars = {"Cc", "Cf", "Cn", "Co", "Cs"}
|
||||
retained_chars = ["\t", "\n", "\r", "\f"]
|
||||
|
||||
# Remove non-printing control characters
|
||||
return "".join(
|
||||
[
|
||||
"" if (unicodedata.category(char) in control_chars) and (char not in retained_chars) else char
|
||||
for char in text
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def normalize_unicode(text):
|
||||
"""Normalize unicode such that things that are visually equivalent map to the same unicode string where possible."""
|
||||
normal_form = "NFKC"
|
||||
text = unicodedata.normalize(normal_form, text)
|
||||
return text
|
||||
|
||||
|
||||
def normalize_whitespace(text):
|
||||
"""Replace runs of whitespace characters with a single space as this is what happens when HTML text is displayed."""
|
||||
text = regex.sub(r"\s+", " ", text)
|
||||
# Remove leading and trailing whitespace
|
||||
text = text.strip()
|
||||
return text
|
||||
|
||||
|
||||
def is_leaf(element):
|
||||
return element.name in {"p", "li"}
|
||||
|
||||
|
||||
def is_text(element):
|
||||
return isinstance(element, NavigableString)
|
||||
|
||||
|
||||
def is_non_printing(element):
|
||||
return any(isinstance(element, _e) for _e in [Comment, CData])
|
||||
|
||||
|
||||
def add_content_digest(element):
|
||||
if not is_text(element):
|
||||
element["data-content-digest"] = content_digest(element)
|
||||
return element
|
||||
|
||||
|
||||
def content_digest(element):
|
||||
if is_text(element):
|
||||
# Hash
|
||||
trimmed_string = element.string.strip()
|
||||
if trimmed_string == "":
|
||||
digest = ""
|
||||
else:
|
||||
digest = hashlib.sha256(trimmed_string.encode("utf-8")).hexdigest()
|
||||
else:
|
||||
contents = element.contents
|
||||
num_contents = len(contents)
|
||||
if num_contents == 0:
|
||||
# No hash when no child elements exist
|
||||
digest = ""
|
||||
elif num_contents == 1:
|
||||
# If single child, use digest of child
|
||||
digest = content_digest(contents[0])
|
||||
else:
|
||||
# Build content digest from the "non-empty" digests of child nodes
|
||||
digest = hashlib.sha256()
|
||||
child_digests = list(filter(lambda x: x != "", [content_digest(content) for content in contents]))
|
||||
for child in child_digests:
|
||||
digest.update(child.encode("utf-8"))
|
||||
digest = digest.hexdigest()
|
||||
return digest
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,98 +0,0 @@
|
||||
import os
|
||||
from collections.abc import Iterable
|
||||
from typing import Any, Literal, Union
|
||||
|
||||
import anthropic
|
||||
import pytest
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from anthropic import Anthropic, Stream
|
||||
from anthropic.resources import Messages
|
||||
from anthropic.types import (
|
||||
ContentBlock,
|
||||
ContentBlockDeltaEvent,
|
||||
Message,
|
||||
MessageDeltaEvent,
|
||||
MessageDeltaUsage,
|
||||
MessageParam,
|
||||
MessageStartEvent,
|
||||
MessageStopEvent,
|
||||
MessageStreamEvent,
|
||||
TextDelta,
|
||||
Usage,
|
||||
)
|
||||
from anthropic.types.message_delta_event import Delta
|
||||
|
||||
MOCK = os.getenv("MOCK_SWITCH", "false") == "true"
|
||||
|
||||
|
||||
class MockAnthropicClass:
|
||||
@staticmethod
|
||||
def mocked_anthropic_chat_create_sync(model: str) -> Message:
|
||||
return Message(
|
||||
id="msg-123",
|
||||
type="message",
|
||||
role="assistant",
|
||||
content=[ContentBlock(text="hello, I'm a chatbot from anthropic", type="text")],
|
||||
model=model,
|
||||
stop_reason="stop_sequence",
|
||||
usage=Usage(input_tokens=1, output_tokens=1),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def mocked_anthropic_chat_create_stream(model: str) -> Stream[MessageStreamEvent]:
|
||||
full_response_text = "hello, I'm a chatbot from anthropic"
|
||||
|
||||
yield MessageStartEvent(
|
||||
type="message_start",
|
||||
message=Message(
|
||||
id="msg-123",
|
||||
content=[],
|
||||
role="assistant",
|
||||
model=model,
|
||||
stop_reason=None,
|
||||
type="message",
|
||||
usage=Usage(input_tokens=1, output_tokens=1),
|
||||
),
|
||||
)
|
||||
|
||||
index = 0
|
||||
for i in range(0, len(full_response_text)):
|
||||
yield ContentBlockDeltaEvent(
|
||||
type="content_block_delta", delta=TextDelta(text=full_response_text[i], type="text_delta"), index=index
|
||||
)
|
||||
|
||||
index += 1
|
||||
|
||||
yield MessageDeltaEvent(
|
||||
type="message_delta", delta=Delta(stop_reason="stop_sequence"), usage=MessageDeltaUsage(output_tokens=1)
|
||||
)
|
||||
|
||||
yield MessageStopEvent(type="message_stop")
|
||||
|
||||
def mocked_anthropic(
|
||||
self: Messages,
|
||||
*,
|
||||
max_tokens: int,
|
||||
messages: Iterable[MessageParam],
|
||||
model: str,
|
||||
stream: Literal[True],
|
||||
**kwargs: Any,
|
||||
) -> Union[Message, Stream[MessageStreamEvent]]:
|
||||
if len(self._client.api_key) < 18:
|
||||
raise anthropic.AuthenticationError("Invalid API key")
|
||||
|
||||
if stream:
|
||||
return MockAnthropicClass.mocked_anthropic_chat_create_stream(model=model)
|
||||
else:
|
||||
return MockAnthropicClass.mocked_anthropic_chat_create_sync(model=model)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_anthropic_mock(request, monkeypatch: MonkeyPatch):
|
||||
if MOCK:
|
||||
monkeypatch.setattr(Messages, "create", MockAnthropicClass.mocked_anthropic)
|
||||
|
||||
yield
|
||||
|
||||
if MOCK:
|
||||
monkeypatch.undo()
|
||||
@ -1,82 +0,0 @@
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from typing import Literal
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
|
||||
|
||||
def mock_get(*args, **kwargs):
|
||||
if kwargs.get("headers", {}).get("Authorization") != "Bearer test":
|
||||
raise httpx.HTTPStatusError(
|
||||
"Invalid API key",
|
||||
request=httpx.Request("GET", ""),
|
||||
response=httpx.Response(401),
|
||||
)
|
||||
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"items": [
|
||||
{"title": "Model 1", "_id": "model1"},
|
||||
{"title": "Model 2", "_id": "model2"},
|
||||
]
|
||||
},
|
||||
request=httpx.Request("GET", ""),
|
||||
)
|
||||
|
||||
|
||||
def mock_stream(*args, **kwargs):
|
||||
class MockStreamResponse:
|
||||
def __init__(self):
|
||||
self.status_code = 200
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
def iter_bytes(self):
|
||||
yield b"Mocked audio data"
|
||||
|
||||
return MockStreamResponse()
|
||||
|
||||
|
||||
def mock_fishaudio(
|
||||
monkeypatch: MonkeyPatch,
|
||||
methods: list[Literal["list-models", "tts"]],
|
||||
) -> Callable[[], None]:
|
||||
"""
|
||||
mock fishaudio module
|
||||
|
||||
:param monkeypatch: pytest monkeypatch fixture
|
||||
:return: unpatch function
|
||||
"""
|
||||
|
||||
def unpatch() -> None:
|
||||
monkeypatch.undo()
|
||||
|
||||
if "list-models" in methods:
|
||||
monkeypatch.setattr(httpx, "get", mock_get)
|
||||
|
||||
if "tts" in methods:
|
||||
monkeypatch.setattr(httpx, "stream", mock_stream)
|
||||
|
||||
return unpatch
|
||||
|
||||
|
||||
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_fishaudio_mock(request, monkeypatch):
|
||||
methods = request.param if hasattr(request, "param") else []
|
||||
if MOCK:
|
||||
unpatch = mock_fishaudio(monkeypatch, methods=methods)
|
||||
|
||||
yield
|
||||
|
||||
if MOCK:
|
||||
unpatch()
|
||||
@ -1,116 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
import google.generativeai.types.generation_types as generation_config_types
|
||||
import pytest
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from google.ai import generativelanguage as glm
|
||||
from google.ai.generativelanguage_v1beta.types import content as gag_content
|
||||
from google.generativeai import GenerativeModel
|
||||
from google.generativeai.client import _ClientManager, configure
|
||||
from google.generativeai.types import GenerateContentResponse, content_types, safety_types
|
||||
from google.generativeai.types.generation_types import BaseGenerateContentResponse
|
||||
|
||||
current_api_key = ""
|
||||
|
||||
|
||||
class MockGoogleResponseClass:
|
||||
_done = False
|
||||
|
||||
def __iter__(self):
|
||||
full_response_text = "it's google!"
|
||||
|
||||
for i in range(0, len(full_response_text) + 1, 1):
|
||||
if i == len(full_response_text):
|
||||
self._done = True
|
||||
yield GenerateContentResponse(
|
||||
done=True, iterator=None, result=glm.GenerateContentResponse({}), chunks=[]
|
||||
)
|
||||
else:
|
||||
yield GenerateContentResponse(
|
||||
done=False, iterator=None, result=glm.GenerateContentResponse({}), chunks=[]
|
||||
)
|
||||
|
||||
|
||||
class MockGoogleResponseCandidateClass:
|
||||
finish_reason = "stop"
|
||||
|
||||
@property
|
||||
def content(self) -> gag_content.Content:
|
||||
return gag_content.Content(parts=[gag_content.Part(text="it's google!")])
|
||||
|
||||
|
||||
class MockGoogleClass:
|
||||
@staticmethod
|
||||
def generate_content_sync() -> GenerateContentResponse:
|
||||
return GenerateContentResponse(done=True, iterator=None, result=glm.GenerateContentResponse({}), chunks=[])
|
||||
|
||||
@staticmethod
|
||||
def generate_content_stream() -> Generator[GenerateContentResponse, None, None]:
|
||||
return MockGoogleResponseClass()
|
||||
|
||||
def generate_content(
|
||||
self: GenerativeModel,
|
||||
contents: content_types.ContentsType,
|
||||
*,
|
||||
generation_config: generation_config_types.GenerationConfigType | None = None,
|
||||
safety_settings: safety_types.SafetySettingOptions | None = None,
|
||||
stream: bool = False,
|
||||
**kwargs,
|
||||
) -> GenerateContentResponse:
|
||||
global current_api_key
|
||||
|
||||
if len(current_api_key) < 16:
|
||||
raise Exception("Invalid API key")
|
||||
|
||||
if stream:
|
||||
return MockGoogleClass.generate_content_stream()
|
||||
|
||||
return MockGoogleClass.generate_content_sync()
|
||||
|
||||
@property
|
||||
def generative_response_text(self) -> str:
|
||||
return "it's google!"
|
||||
|
||||
@property
|
||||
def generative_response_candidates(self) -> list[MockGoogleResponseCandidateClass]:
|
||||
return [MockGoogleResponseCandidateClass()]
|
||||
|
||||
def make_client(self: _ClientManager, name: str):
|
||||
global current_api_key
|
||||
|
||||
if name.endswith("_async"):
|
||||
name = name.split("_")[0]
|
||||
cls = getattr(glm, name.title() + "ServiceAsyncClient")
|
||||
else:
|
||||
cls = getattr(glm, name.title() + "ServiceClient")
|
||||
|
||||
# Attempt to configure using defaults.
|
||||
if not self.client_config:
|
||||
configure()
|
||||
|
||||
client_options = self.client_config.get("client_options", None)
|
||||
if client_options:
|
||||
current_api_key = client_options.api_key
|
||||
|
||||
def nop(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
original_init = cls.__init__
|
||||
cls.__init__ = nop
|
||||
client: glm.GenerativeServiceClient = cls(**self.client_config)
|
||||
cls.__init__ = original_init
|
||||
|
||||
if not self.default_metadata:
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_google_mock(request, monkeypatch: MonkeyPatch):
|
||||
monkeypatch.setattr(BaseGenerateContentResponse, "text", MockGoogleClass.generative_response_text)
|
||||
monkeypatch.setattr(BaseGenerateContentResponse, "candidates", MockGoogleClass.generative_response_candidates)
|
||||
monkeypatch.setattr(GenerativeModel, "generate_content", MockGoogleClass.generate_content)
|
||||
monkeypatch.setattr(_ClientManager, "make_client", MockGoogleClass.make_client)
|
||||
|
||||
yield
|
||||
|
||||
monkeypatch.undo()
|
||||
@ -1,20 +0,0 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from huggingface_hub import InferenceClient
|
||||
|
||||
from tests.integration_tests.model_runtime.__mock.huggingface_chat import MockHuggingfaceChatClass
|
||||
|
||||
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_huggingface_mock(request, monkeypatch: MonkeyPatch):
|
||||
if MOCK:
|
||||
monkeypatch.setattr(InferenceClient, "text_generation", MockHuggingfaceChatClass.text_generation)
|
||||
|
||||
yield
|
||||
|
||||
if MOCK:
|
||||
monkeypatch.undo()
|
||||
@ -1,56 +0,0 @@
|
||||
import re
|
||||
from collections.abc import Generator
|
||||
from typing import Any, Literal, Optional, Union
|
||||
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from huggingface_hub import InferenceClient
|
||||
from huggingface_hub.inference._text_generation import (
|
||||
Details,
|
||||
StreamDetails,
|
||||
TextGenerationResponse,
|
||||
TextGenerationStreamResponse,
|
||||
Token,
|
||||
)
|
||||
from huggingface_hub.utils import BadRequestError
|
||||
|
||||
|
||||
class MockHuggingfaceChatClass:
|
||||
@staticmethod
|
||||
def generate_create_sync(model: str) -> TextGenerationResponse:
|
||||
response = TextGenerationResponse(
|
||||
generated_text="You can call me Miku Miku o~e~o~",
|
||||
details=Details(
|
||||
finish_reason="length",
|
||||
generated_tokens=6,
|
||||
tokens=[Token(id=0, text="You", logprob=0.0, special=False) for i in range(0, 6)],
|
||||
),
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def generate_create_stream(model: str) -> Generator[TextGenerationStreamResponse, None, None]:
|
||||
full_text = "You can call me Miku Miku o~e~o~"
|
||||
|
||||
for i in range(0, len(full_text)):
|
||||
response = TextGenerationStreamResponse(
|
||||
token=Token(id=i, text=full_text[i], logprob=0.0, special=False),
|
||||
)
|
||||
response.generated_text = full_text[i]
|
||||
response.details = StreamDetails(finish_reason="stop_sequence", generated_tokens=1)
|
||||
|
||||
yield response
|
||||
|
||||
def text_generation(
|
||||
self: InferenceClient, prompt: str, *, stream: Literal[False] = ..., model: Optional[str] = None, **kwargs: Any
|
||||
) -> Union[TextGenerationResponse, Generator[TextGenerationStreamResponse, None, None]]:
|
||||
# check if key is valid
|
||||
if not re.match(r"Bearer\shf\-[a-zA-Z0-9]{16,}", self.headers["authorization"]):
|
||||
raise BadRequestError("Invalid API key")
|
||||
|
||||
if model is None:
|
||||
raise BadRequestError("Invalid model")
|
||||
|
||||
if stream:
|
||||
return MockHuggingfaceChatClass.generate_create_stream(model)
|
||||
return MockHuggingfaceChatClass.generate_create_sync(model)
|
||||
@ -1,94 +0,0 @@
|
||||
from core.model_runtime.model_providers.huggingface_tei.tei_helper import TeiModelExtraParameter
|
||||
|
||||
|
||||
class MockTEIClass:
|
||||
@staticmethod
|
||||
def get_tei_extra_parameter(server_url: str, model_name: str) -> TeiModelExtraParameter:
|
||||
# During mock, we don't have a real server to query, so we just return a dummy value
|
||||
if "rerank" in model_name:
|
||||
model_type = "reranker"
|
||||
else:
|
||||
model_type = "embedding"
|
||||
|
||||
return TeiModelExtraParameter(model_type=model_type, max_input_length=512, max_client_batch_size=1)
|
||||
|
||||
@staticmethod
|
||||
def invoke_tokenize(server_url: str, texts: list[str]) -> list[list[dict]]:
|
||||
# Use space as token separator, and split the text into tokens
|
||||
tokenized_texts = []
|
||||
for text in texts:
|
||||
tokens = text.split(" ")
|
||||
current_index = 0
|
||||
tokenized_text = []
|
||||
for idx, token in enumerate(tokens):
|
||||
s_token = {
|
||||
"id": idx,
|
||||
"text": token,
|
||||
"special": False,
|
||||
"start": current_index,
|
||||
"stop": current_index + len(token),
|
||||
}
|
||||
current_index += len(token) + 1
|
||||
tokenized_text.append(s_token)
|
||||
tokenized_texts.append(tokenized_text)
|
||||
return tokenized_texts
|
||||
|
||||
@staticmethod
|
||||
def invoke_embeddings(server_url: str, texts: list[str]) -> dict:
|
||||
# {
|
||||
# "object": "list",
|
||||
# "data": [
|
||||
# {
|
||||
# "object": "embedding",
|
||||
# "embedding": [...],
|
||||
# "index": 0
|
||||
# }
|
||||
# ],
|
||||
# "model": "MODEL_NAME",
|
||||
# "usage": {
|
||||
# "prompt_tokens": 3,
|
||||
# "total_tokens": 3
|
||||
# }
|
||||
# }
|
||||
embeddings = []
|
||||
for idx in range(len(texts)):
|
||||
embedding = [0.1] * 768
|
||||
embeddings.append(
|
||||
{
|
||||
"object": "embedding",
|
||||
"embedding": embedding,
|
||||
"index": idx,
|
||||
}
|
||||
)
|
||||
return {
|
||||
"object": "list",
|
||||
"data": embeddings,
|
||||
"model": "MODEL_NAME",
|
||||
"usage": {
|
||||
"prompt_tokens": sum(len(text.split(" ")) for text in texts),
|
||||
"total_tokens": sum(len(text.split(" ")) for text in texts),
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def invoke_rerank(server_url: str, query: str, texts: list[str]) -> list[dict]:
|
||||
# Example response:
|
||||
# [
|
||||
# {
|
||||
# "index": 0,
|
||||
# "text": "Deep Learning is ...",
|
||||
# "score": 0.9950755
|
||||
# }
|
||||
# ]
|
||||
reranked_docs = []
|
||||
for idx, text in enumerate(texts):
|
||||
reranked_docs.append(
|
||||
{
|
||||
"index": idx,
|
||||
"text": text,
|
||||
"score": 0.9,
|
||||
}
|
||||
)
|
||||
# For mock, only return the first document
|
||||
break
|
||||
return reranked_docs
|
||||
@ -1,59 +0,0 @@
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from typing import Any, Literal, Union
|
||||
|
||||
import pytest
|
||||
|
||||
# import monkeypatch
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from nomic import embed
|
||||
|
||||
|
||||
def create_embedding(texts: list[str], model: str, **kwargs: Any) -> dict:
|
||||
texts_len = len(texts)
|
||||
|
||||
foo_embedding_sample = 0.123456
|
||||
|
||||
combined = {
|
||||
"embeddings": [[foo_embedding_sample for _ in range(768)] for _ in range(texts_len)],
|
||||
"usage": {"prompt_tokens": texts_len, "total_tokens": texts_len},
|
||||
"model": model,
|
||||
"inference_mode": "remote",
|
||||
}
|
||||
|
||||
return combined
|
||||
|
||||
|
||||
def mock_nomic(
|
||||
monkeypatch: MonkeyPatch,
|
||||
methods: list[Literal["text_embedding"]],
|
||||
) -> Callable[[], None]:
|
||||
"""
|
||||
mock nomic module
|
||||
|
||||
:param monkeypatch: pytest monkeypatch fixture
|
||||
:return: unpatch function
|
||||
"""
|
||||
|
||||
def unpatch() -> None:
|
||||
monkeypatch.undo()
|
||||
|
||||
if "text_embedding" in methods:
|
||||
monkeypatch.setattr(embed, "text", create_embedding)
|
||||
|
||||
return unpatch
|
||||
|
||||
|
||||
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_nomic_mock(request, monkeypatch):
|
||||
methods = request.param if hasattr(request, "param") else []
|
||||
if MOCK:
|
||||
unpatch = mock_nomic(monkeypatch, methods=methods)
|
||||
|
||||
yield
|
||||
|
||||
if MOCK:
|
||||
unpatch()
|
||||
@ -1,269 +0,0 @@
|
||||
import re
|
||||
from collections.abc import Generator
|
||||
from json import dumps, loads
|
||||
from time import time
|
||||
|
||||
# import monkeypatch
|
||||
from typing import Any, Literal, Optional, Union
|
||||
|
||||
from openai import AzureOpenAI, OpenAI
|
||||
from openai._types import NOT_GIVEN, NotGiven
|
||||
from openai.resources.chat.completions import Completions
|
||||
from openai.types import Completion as CompletionMessage
|
||||
from openai.types.chat import (
|
||||
ChatCompletion,
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionToolChoiceOptionParam,
|
||||
ChatCompletionToolParam,
|
||||
completion_create_params,
|
||||
)
|
||||
from openai.types.chat.chat_completion import ChatCompletion as _ChatCompletion
|
||||
from openai.types.chat.chat_completion import Choice as _ChatCompletionChoice
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
Choice,
|
||||
ChoiceDelta,
|
||||
ChoiceDeltaFunctionCall,
|
||||
ChoiceDeltaToolCall,
|
||||
ChoiceDeltaToolCallFunction,
|
||||
)
|
||||
from openai.types.chat.chat_completion_message import ChatCompletionMessage, FunctionCall
|
||||
from openai.types.chat.chat_completion_message_tool_call import Function
|
||||
from openai.types.completion_usage import CompletionUsage
|
||||
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||
|
||||
|
||||
class MockChatClass:
|
||||
@staticmethod
|
||||
def generate_function_call(
|
||||
functions: list[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
) -> Optional[FunctionCall]:
|
||||
if not functions or len(functions) == 0:
|
||||
return None
|
||||
function: completion_create_params.Function = functions[0]
|
||||
function_name = function["name"]
|
||||
function_description = function["description"]
|
||||
function_parameters = function["parameters"]
|
||||
function_parameters_type = function_parameters["type"]
|
||||
if function_parameters_type != "object":
|
||||
return None
|
||||
function_parameters_properties = function_parameters["properties"]
|
||||
function_parameters_required = function_parameters["required"]
|
||||
parameters = {}
|
||||
for parameter_name, parameter in function_parameters_properties.items():
|
||||
if parameter_name not in function_parameters_required:
|
||||
continue
|
||||
parameter_type = parameter["type"]
|
||||
if parameter_type == "string":
|
||||
if "enum" in parameter:
|
||||
if len(parameter["enum"]) == 0:
|
||||
continue
|
||||
parameters[parameter_name] = parameter["enum"][0]
|
||||
else:
|
||||
parameters[parameter_name] = "kawaii"
|
||||
elif parameter_type == "integer":
|
||||
parameters[parameter_name] = 114514
|
||||
elif parameter_type == "number":
|
||||
parameters[parameter_name] = 1919810.0
|
||||
elif parameter_type == "boolean":
|
||||
parameters[parameter_name] = True
|
||||
|
||||
return FunctionCall(name=function_name, arguments=dumps(parameters))
|
||||
|
||||
@staticmethod
|
||||
def generate_tool_calls(tools=NOT_GIVEN) -> Optional[list[ChatCompletionMessageToolCall]]:
|
||||
list_tool_calls = []
|
||||
if not tools or len(tools) == 0:
|
||||
return None
|
||||
tool = tools[0]
|
||||
|
||||
if "type" in tools and tools["type"] != "function":
|
||||
return None
|
||||
|
||||
function = tool["function"]
|
||||
|
||||
function_call = MockChatClass.generate_function_call(functions=[function])
|
||||
if function_call is None:
|
||||
return None
|
||||
|
||||
list_tool_calls.append(
|
||||
ChatCompletionMessageToolCall(
|
||||
id="sakurajima-mai",
|
||||
function=Function(
|
||||
name=function_call.name,
|
||||
arguments=function_call.arguments,
|
||||
),
|
||||
type="function",
|
||||
)
|
||||
)
|
||||
|
||||
return list_tool_calls
|
||||
|
||||
@staticmethod
|
||||
def mocked_openai_chat_create_sync(
|
||||
model: str,
|
||||
functions: list[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
tools: list[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
) -> CompletionMessage:
|
||||
tool_calls = []
|
||||
function_call = MockChatClass.generate_function_call(functions=functions)
|
||||
if not function_call:
|
||||
tool_calls = MockChatClass.generate_tool_calls(tools=tools)
|
||||
|
||||
return _ChatCompletion(
|
||||
id="cmpl-3QJQa5jXJ5Z5X",
|
||||
choices=[
|
||||
_ChatCompletionChoice(
|
||||
finish_reason="content_filter",
|
||||
index=0,
|
||||
message=ChatCompletionMessage(
|
||||
content="elaina", role="assistant", function_call=function_call, tool_calls=tool_calls
|
||||
),
|
||||
)
|
||||
],
|
||||
created=int(time()),
|
||||
model=model,
|
||||
object="chat.completion",
|
||||
system_fingerprint="",
|
||||
usage=CompletionUsage(
|
||||
prompt_tokens=2,
|
||||
completion_tokens=1,
|
||||
total_tokens=3,
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def mocked_openai_chat_create_stream(
|
||||
model: str,
|
||||
functions: list[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
tools: list[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
) -> Generator[ChatCompletionChunk, None, None]:
|
||||
tool_calls = []
|
||||
function_call = MockChatClass.generate_function_call(functions=functions)
|
||||
if not function_call:
|
||||
tool_calls = MockChatClass.generate_tool_calls(tools=tools)
|
||||
|
||||
full_text = "Hello, world!\n\n```python\nprint('Hello, world!')\n```"
|
||||
for i in range(0, len(full_text) + 1):
|
||||
if i == len(full_text):
|
||||
yield ChatCompletionChunk(
|
||||
id="cmpl-3QJQa5jXJ5Z5X",
|
||||
choices=[
|
||||
Choice(
|
||||
delta=ChoiceDelta(
|
||||
content="",
|
||||
function_call=ChoiceDeltaFunctionCall(
|
||||
name=function_call.name,
|
||||
arguments=function_call.arguments,
|
||||
)
|
||||
if function_call
|
||||
else None,
|
||||
role="assistant",
|
||||
tool_calls=[
|
||||
ChoiceDeltaToolCall(
|
||||
index=0,
|
||||
id="misaka-mikoto",
|
||||
function=ChoiceDeltaToolCallFunction(
|
||||
name=tool_calls[0].function.name,
|
||||
arguments=tool_calls[0].function.arguments,
|
||||
),
|
||||
type="function",
|
||||
)
|
||||
]
|
||||
if tool_calls and len(tool_calls) > 0
|
||||
else None,
|
||||
),
|
||||
finish_reason="function_call",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=int(time()),
|
||||
model=model,
|
||||
object="chat.completion.chunk",
|
||||
system_fingerprint="",
|
||||
usage=CompletionUsage(
|
||||
prompt_tokens=2,
|
||||
completion_tokens=17,
|
||||
total_tokens=19,
|
||||
),
|
||||
)
|
||||
else:
|
||||
yield ChatCompletionChunk(
|
||||
id="cmpl-3QJQa5jXJ5Z5X",
|
||||
choices=[
|
||||
Choice(
|
||||
delta=ChoiceDelta(
|
||||
content=full_text[i],
|
||||
role="assistant",
|
||||
),
|
||||
finish_reason="content_filter",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=int(time()),
|
||||
model=model,
|
||||
object="chat.completion.chunk",
|
||||
system_fingerprint="",
|
||||
)
|
||||
|
||||
def chat_create(
|
||||
self: Completions,
|
||||
*,
|
||||
messages: list[ChatCompletionMessageParam],
|
||||
model: Union[
|
||||
str,
|
||||
Literal[
|
||||
"gpt-4-1106-preview",
|
||||
"gpt-4-vision-preview",
|
||||
"gpt-4",
|
||||
"gpt-4-0314",
|
||||
"gpt-4-0613",
|
||||
"gpt-4-32k",
|
||||
"gpt-4-32k-0314",
|
||||
"gpt-4-32k-0613",
|
||||
"gpt-3.5-turbo-1106",
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-3.5-turbo-16k",
|
||||
"gpt-3.5-turbo-0301",
|
||||
"gpt-3.5-turbo-0613",
|
||||
"gpt-3.5-turbo-16k-0613",
|
||||
],
|
||||
],
|
||||
functions: list[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
|
||||
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
|
||||
tools: list[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
**kwargs: Any,
|
||||
):
|
||||
openai_models = [
|
||||
"gpt-4-1106-preview",
|
||||
"gpt-4-vision-preview",
|
||||
"gpt-4",
|
||||
"gpt-4-0314",
|
||||
"gpt-4-0613",
|
||||
"gpt-4-32k",
|
||||
"gpt-4-32k-0314",
|
||||
"gpt-4-32k-0613",
|
||||
"gpt-3.5-turbo-1106",
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-3.5-turbo-16k",
|
||||
"gpt-3.5-turbo-0301",
|
||||
"gpt-3.5-turbo-0613",
|
||||
"gpt-3.5-turbo-16k-0613",
|
||||
]
|
||||
azure_openai_models = ["gpt35", "gpt-4v", "gpt-35-turbo"]
|
||||
if not re.match(r"^(https?):\/\/[^\s\/$.?#].[^\s]*$", str(self._client.base_url)):
|
||||
raise InvokeAuthorizationError("Invalid base url")
|
||||
if model in openai_models + azure_openai_models:
|
||||
if not re.match(r"sk-[a-zA-Z0-9]{24,}$", self._client.api_key) and type(self._client) == OpenAI:
|
||||
# sometime, provider use OpenAI compatible API will not have api key or have different api key format
|
||||
# so we only check if model is in openai_models
|
||||
raise InvokeAuthorizationError("Invalid api key")
|
||||
if len(self._client.api_key) < 18 and type(self._client) == AzureOpenAI:
|
||||
raise InvokeAuthorizationError("Invalid api key")
|
||||
if stream:
|
||||
return MockChatClass.mocked_openai_chat_create_stream(model=model, functions=functions, tools=tools)
|
||||
|
||||
return MockChatClass.mocked_openai_chat_create_sync(model=model, functions=functions, tools=tools)
|
||||
@ -1,130 +0,0 @@
|
||||
import re
|
||||
from collections.abc import Generator
|
||||
from time import time
|
||||
|
||||
# import monkeypatch
|
||||
from typing import Any, Literal, Optional, Union
|
||||
|
||||
from openai import AzureOpenAI, BadRequestError, OpenAI
|
||||
from openai._types import NOT_GIVEN, NotGiven
|
||||
from openai.resources.completions import Completions
|
||||
from openai.types import Completion as CompletionMessage
|
||||
from openai.types.completion import CompletionChoice
|
||||
from openai.types.completion_usage import CompletionUsage
|
||||
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||
|
||||
|
||||
class MockCompletionsClass:
|
||||
@staticmethod
|
||||
def mocked_openai_completion_create_sync(model: str) -> CompletionMessage:
|
||||
return CompletionMessage(
|
||||
id="cmpl-3QJQa5jXJ5Z5X",
|
||||
object="text_completion",
|
||||
created=int(time()),
|
||||
model=model,
|
||||
system_fingerprint="",
|
||||
choices=[
|
||||
CompletionChoice(
|
||||
text="mock",
|
||||
index=0,
|
||||
logprobs=None,
|
||||
finish_reason="stop",
|
||||
)
|
||||
],
|
||||
usage=CompletionUsage(
|
||||
prompt_tokens=2,
|
||||
completion_tokens=1,
|
||||
total_tokens=3,
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def mocked_openai_completion_create_stream(model: str) -> Generator[CompletionMessage, None, None]:
|
||||
full_text = "Hello, world!\n\n```python\nprint('Hello, world!')\n```"
|
||||
for i in range(0, len(full_text) + 1):
|
||||
if i == len(full_text):
|
||||
yield CompletionMessage(
|
||||
id="cmpl-3QJQa5jXJ5Z5X",
|
||||
object="text_completion",
|
||||
created=int(time()),
|
||||
model=model,
|
||||
system_fingerprint="",
|
||||
choices=[
|
||||
CompletionChoice(
|
||||
text="",
|
||||
index=0,
|
||||
logprobs=None,
|
||||
finish_reason="stop",
|
||||
)
|
||||
],
|
||||
usage=CompletionUsage(
|
||||
prompt_tokens=2,
|
||||
completion_tokens=17,
|
||||
total_tokens=19,
|
||||
),
|
||||
)
|
||||
else:
|
||||
yield CompletionMessage(
|
||||
id="cmpl-3QJQa5jXJ5Z5X",
|
||||
object="text_completion",
|
||||
created=int(time()),
|
||||
model=model,
|
||||
system_fingerprint="",
|
||||
choices=[
|
||||
CompletionChoice(text=full_text[i], index=0, logprobs=None, finish_reason="content_filter")
|
||||
],
|
||||
)
|
||||
|
||||
def completion_create(
|
||||
self: Completions,
|
||||
*,
|
||||
model: Union[
|
||||
str,
|
||||
Literal[
|
||||
"babbage-002",
|
||||
"davinci-002",
|
||||
"gpt-3.5-turbo-instruct",
|
||||
"text-davinci-003",
|
||||
"text-davinci-002",
|
||||
"text-davinci-001",
|
||||
"code-davinci-002",
|
||||
"text-curie-001",
|
||||
"text-babbage-001",
|
||||
"text-ada-001",
|
||||
],
|
||||
],
|
||||
prompt: Union[str, list[str], list[int], list[list[int]], None],
|
||||
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
|
||||
**kwargs: Any,
|
||||
):
|
||||
openai_models = [
|
||||
"babbage-002",
|
||||
"davinci-002",
|
||||
"gpt-3.5-turbo-instruct",
|
||||
"text-davinci-003",
|
||||
"text-davinci-002",
|
||||
"text-davinci-001",
|
||||
"code-davinci-002",
|
||||
"text-curie-001",
|
||||
"text-babbage-001",
|
||||
"text-ada-001",
|
||||
]
|
||||
azure_openai_models = ["gpt-35-turbo-instruct"]
|
||||
|
||||
if not re.match(r"^(https?):\/\/[^\s\/$.?#].[^\s]*$", str(self._client.base_url)):
|
||||
raise InvokeAuthorizationError("Invalid base url")
|
||||
if model in openai_models + azure_openai_models:
|
||||
if not re.match(r"sk-[a-zA-Z0-9]{24,}$", self._client.api_key) and type(self._client) == OpenAI:
|
||||
# sometime, provider use OpenAI compatible API will not have api key or have different api key format
|
||||
# so we only check if model is in openai_models
|
||||
raise InvokeAuthorizationError("Invalid api key")
|
||||
if len(self._client.api_key) < 18 and type(self._client) == AzureOpenAI:
|
||||
raise InvokeAuthorizationError("Invalid api key")
|
||||
|
||||
if not prompt:
|
||||
raise BadRequestError("Invalid prompt")
|
||||
if stream:
|
||||
return MockCompletionsClass.mocked_openai_completion_create_stream(model=model)
|
||||
|
||||
return MockCompletionsClass.mocked_openai_completion_create_sync(model=model)
|
||||
File diff suppressed because one or more lines are too long
@ -1,23 +0,0 @@
|
||||
from time import time
|
||||
|
||||
from openai.resources.models import Models
|
||||
from openai.types.model import Model
|
||||
|
||||
|
||||
class MockModelClass:
|
||||
"""
|
||||
mock class for openai.models.Models
|
||||
"""
|
||||
|
||||
def list(
|
||||
self,
|
||||
**kwargs,
|
||||
) -> list[Model]:
|
||||
return [
|
||||
Model(
|
||||
id="ft:gpt-3.5-turbo-0613:personal::8GYJLPDQ",
|
||||
created=int(time()),
|
||||
object="model",
|
||||
owned_by="organization:org-123",
|
||||
)
|
||||
]
|
||||
@ -1,29 +0,0 @@
|
||||
import re
|
||||
from typing import Any, Literal, Union
|
||||
|
||||
from openai._types import NOT_GIVEN, FileTypes, NotGiven
|
||||
from openai.resources.audio.transcriptions import Transcriptions
|
||||
from openai.types.audio.transcription import Transcription
|
||||
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||
|
||||
|
||||
class MockSpeech2TextClass:
|
||||
def speech2text_create(
|
||||
self: Transcriptions,
|
||||
*,
|
||||
file: FileTypes,
|
||||
model: Union[str, Literal["whisper-1"]],
|
||||
language: str | NotGiven = NOT_GIVEN,
|
||||
prompt: str | NotGiven = NOT_GIVEN,
|
||||
response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
|
||||
temperature: float | NotGiven = NOT_GIVEN,
|
||||
**kwargs: Any,
|
||||
) -> Transcription:
|
||||
if not re.match(r"^(https?):\/\/[^\s\/$.?#].[^\s]*$", str(self._client.base_url)):
|
||||
raise InvokeAuthorizationError("Invalid base url")
|
||||
|
||||
if len(self._client.api_key) < 18:
|
||||
raise InvokeAuthorizationError("Invalid API key")
|
||||
|
||||
return Transcription(text="1, 2, 3, 4, 5, 6, 7, 8, 9, 10")
|
||||
@ -1,170 +0,0 @@
|
||||
import os
|
||||
import re
|
||||
from typing import Union
|
||||
|
||||
import pytest
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from requests import Response
|
||||
from requests.exceptions import ConnectionError
|
||||
from requests.sessions import Session
|
||||
from xinference_client.client.restful.restful_client import (
|
||||
Client,
|
||||
RESTfulChatModelHandle,
|
||||
RESTfulEmbeddingModelHandle,
|
||||
RESTfulGenerateModelHandle,
|
||||
RESTfulRerankModelHandle,
|
||||
)
|
||||
from xinference_client.types import Embedding, EmbeddingData, EmbeddingUsage
|
||||
|
||||
|
||||
class MockXinferenceClass:
|
||||
def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
|
||||
if not re.match(r"https?:\/\/[^\s\/$.?#].[^\s]*$", self.base_url):
|
||||
raise RuntimeError("404 Not Found")
|
||||
|
||||
if "generate" == model_uid:
|
||||
return RESTfulGenerateModelHandle(model_uid, base_url=self.base_url, auth_headers={})
|
||||
if "chat" == model_uid:
|
||||
return RESTfulChatModelHandle(model_uid, base_url=self.base_url, auth_headers={})
|
||||
if "embedding" == model_uid:
|
||||
return RESTfulEmbeddingModelHandle(model_uid, base_url=self.base_url, auth_headers={})
|
||||
if "rerank" == model_uid:
|
||||
return RESTfulRerankModelHandle(model_uid, base_url=self.base_url, auth_headers={})
|
||||
raise RuntimeError("404 Not Found")
|
||||
|
||||
def get(self: Session, url: str, **kwargs):
|
||||
response = Response()
|
||||
if "v1/models/" in url:
|
||||
# get model uid
|
||||
model_uid = url.split("/")[-1] or ""
|
||||
if not re.match(
|
||||
r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}", model_uid
|
||||
) and model_uid not in {"generate", "chat", "embedding", "rerank"}:
|
||||
response.status_code = 404
|
||||
response._content = b"{}"
|
||||
return response
|
||||
|
||||
# check if url is valid
|
||||
if not re.match(r"^(https?):\/\/[^\s\/$.?#].[^\s]*$", url):
|
||||
response.status_code = 404
|
||||
response._content = b"{}"
|
||||
return response
|
||||
|
||||
if model_uid in {"generate", "chat"}:
|
||||
response.status_code = 200
|
||||
response._content = b"""{
|
||||
"model_type": "LLM",
|
||||
"address": "127.0.0.1:43877",
|
||||
"accelerators": [
|
||||
"0",
|
||||
"1"
|
||||
],
|
||||
"model_name": "chatglm3-6b",
|
||||
"model_lang": [
|
||||
"en"
|
||||
],
|
||||
"model_ability": [
|
||||
"generate",
|
||||
"chat"
|
||||
],
|
||||
"model_description": "latest chatglm3",
|
||||
"model_format": "pytorch",
|
||||
"model_size_in_billions": 7,
|
||||
"quantization": "none",
|
||||
"model_hub": "huggingface",
|
||||
"revision": null,
|
||||
"context_length": 2048,
|
||||
"replica": 1
|
||||
}"""
|
||||
return response
|
||||
|
||||
elif model_uid == "embedding":
|
||||
response.status_code = 200
|
||||
response._content = b"""{
|
||||
"model_type": "embedding",
|
||||
"address": "127.0.0.1:43877",
|
||||
"accelerators": [
|
||||
"0",
|
||||
"1"
|
||||
],
|
||||
"model_name": "bge",
|
||||
"model_lang": [
|
||||
"en"
|
||||
],
|
||||
"revision": null,
|
||||
"max_tokens": 512
|
||||
}"""
|
||||
return response
|
||||
|
||||
elif "v1/cluster/auth" in url:
|
||||
response.status_code = 200
|
||||
response._content = b"""{
|
||||
"auth": true
|
||||
}"""
|
||||
return response
|
||||
|
||||
def _check_cluster_authenticated(self):
|
||||
self._cluster_authed = True
|
||||
|
||||
def rerank(
|
||||
self: RESTfulRerankModelHandle, documents: list[str], query: str, top_n: int, return_documents: bool
|
||||
) -> dict:
|
||||
# check if self._model_uid is a valid uuid
|
||||
if (
|
||||
not re.match(r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}", self._model_uid)
|
||||
and self._model_uid != "rerank"
|
||||
):
|
||||
raise RuntimeError("404 Not Found")
|
||||
|
||||
if not re.match(r"^(https?):\/\/[^\s\/$.?#].[^\s]*$", self._base_url):
|
||||
raise RuntimeError("404 Not Found")
|
||||
|
||||
if top_n is None:
|
||||
top_n = 1
|
||||
|
||||
return {
|
||||
"results": [
|
||||
{"index": i, "document": doc, "relevance_score": 0.9} for i, doc in enumerate(documents[:top_n])
|
||||
]
|
||||
}
|
||||
|
||||
def create_embedding(self: RESTfulGenerateModelHandle, input: Union[str, list[str]], **kwargs) -> dict:
|
||||
# check if self._model_uid is a valid uuid
|
||||
if (
|
||||
not re.match(r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}", self._model_uid)
|
||||
and self._model_uid != "embedding"
|
||||
):
|
||||
raise RuntimeError("404 Not Found")
|
||||
|
||||
if isinstance(input, str):
|
||||
input = [input]
|
||||
ipt_len = len(input)
|
||||
|
||||
embedding = Embedding(
|
||||
object="list",
|
||||
model=self._model_uid,
|
||||
data=[
|
||||
EmbeddingData(index=i, object="embedding", embedding=[1919.810 for _ in range(768)])
|
||||
for i in range(ipt_len)
|
||||
],
|
||||
usage=EmbeddingUsage(prompt_tokens=ipt_len, total_tokens=ipt_len),
|
||||
)
|
||||
|
||||
return embedding
|
||||
|
||||
|
||||
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_xinference_mock(request, monkeypatch: MonkeyPatch):
|
||||
if MOCK:
|
||||
monkeypatch.setattr(Client, "get_model", MockXinferenceClass.get_chat_model)
|
||||
monkeypatch.setattr(Client, "_check_cluster_authenticated", MockXinferenceClass._check_cluster_authenticated)
|
||||
monkeypatch.setattr(Session, "get", MockXinferenceClass.get)
|
||||
monkeypatch.setattr(RESTfulEmbeddingModelHandle, "create_embedding", MockXinferenceClass.create_embedding)
|
||||
monkeypatch.setattr(RESTfulRerankModelHandle, "rerank", MockXinferenceClass.rerank)
|
||||
yield
|
||||
|
||||
if MOCK:
|
||||
monkeypatch.undo()
|
||||
@ -1,92 +0,0 @@
|
||||
import os
|
||||
from collections.abc import Generator
|
||||
|
||||
import pytest
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.message_entities import AssistantPromptMessage, SystemPromptMessage, UserPromptMessage
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.anthropic.llm.llm import AnthropicLargeLanguageModel
|
||||
from tests.integration_tests.model_runtime.__mock.anthropic import setup_anthropic_mock
|
||||
|
||||
|
||||
@pytest.mark.parametrize("setup_anthropic_mock", [["none"]], indirect=True)
|
||||
def test_validate_credentials(setup_anthropic_mock):
|
||||
model = AnthropicLargeLanguageModel()
|
||||
|
||||
with pytest.raises(CredentialsValidateFailedError):
|
||||
model.validate_credentials(model="claude-instant-1.2", credentials={"anthropic_api_key": "invalid_key"})
|
||||
|
||||
model.validate_credentials(
|
||||
model="claude-instant-1.2", credentials={"anthropic_api_key": os.environ.get("ANTHROPIC_API_KEY")}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("setup_anthropic_mock", [["none"]], indirect=True)
|
||||
def test_invoke_model(setup_anthropic_mock):
|
||||
model = AnthropicLargeLanguageModel()
|
||||
|
||||
response = model.invoke(
|
||||
model="claude-instant-1.2",
|
||||
credentials={
|
||||
"anthropic_api_key": os.environ.get("ANTHROPIC_API_KEY"),
|
||||
"anthropic_api_url": os.environ.get("ANTHROPIC_API_URL"),
|
||||
},
|
||||
prompt_messages=[
|
||||
SystemPromptMessage(
|
||||
content="You are a helpful AI assistant.",
|
||||
),
|
||||
UserPromptMessage(content="Hello World!"),
|
||||
],
|
||||
model_parameters={"temperature": 0.0, "top_p": 1.0, "max_tokens": 10},
|
||||
stop=["How"],
|
||||
stream=False,
|
||||
user="abc-123",
|
||||
)
|
||||
|
||||
assert isinstance(response, LLMResult)
|
||||
assert len(response.message.content) > 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("setup_anthropic_mock", [["none"]], indirect=True)
|
||||
def test_invoke_stream_model(setup_anthropic_mock):
|
||||
model = AnthropicLargeLanguageModel()
|
||||
|
||||
response = model.invoke(
|
||||
model="claude-instant-1.2",
|
||||
credentials={"anthropic_api_key": os.environ.get("ANTHROPIC_API_KEY")},
|
||||
prompt_messages=[
|
||||
SystemPromptMessage(
|
||||
content="You are a helpful AI assistant.",
|
||||
),
|
||||
UserPromptMessage(content="Hello World!"),
|
||||
],
|
||||
model_parameters={"temperature": 0.0, "max_tokens": 100},
|
||||
stream=True,
|
||||
user="abc-123",
|
||||
)
|
||||
|
||||
assert isinstance(response, Generator)
|
||||
|
||||
for chunk in response:
|
||||
assert isinstance(chunk, LLMResultChunk)
|
||||
assert isinstance(chunk.delta, LLMResultChunkDelta)
|
||||
assert isinstance(chunk.delta.message, AssistantPromptMessage)
|
||||
assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
|
||||
|
||||
|
||||
def test_get_num_tokens():
|
||||
model = AnthropicLargeLanguageModel()
|
||||
|
||||
num_tokens = model.get_num_tokens(
|
||||
model="claude-instant-1.2",
|
||||
credentials={"anthropic_api_key": os.environ.get("ANTHROPIC_API_KEY")},
|
||||
prompt_messages=[
|
||||
SystemPromptMessage(
|
||||
content="You are a helpful AI assistant.",
|
||||
),
|
||||
UserPromptMessage(content="Hello World!"),
|
||||
],
|
||||
)
|
||||
|
||||
assert num_tokens == 18
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue