Merge branch 'feat/parent-child-retrieval' of github.com:langgenius/dify into feat/parent-child-retrieval
commit
493ec06e95
@ -0,0 +1,51 @@
|
||||
from enum import StrEnum
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class OpenDALScheme(StrEnum):
|
||||
FS = "fs"
|
||||
S3 = "s3"
|
||||
|
||||
|
||||
class OpenDALStorageConfig(BaseSettings):
|
||||
STORAGE_OPENDAL_SCHEME: str = Field(
|
||||
default=OpenDALScheme.FS.value,
|
||||
description="OpenDAL scheme.",
|
||||
)
|
||||
# FS
|
||||
OPENDAL_FS_ROOT: str = Field(
|
||||
default="storage",
|
||||
description="Root path for local storage.",
|
||||
)
|
||||
# S3
|
||||
OPENDAL_S3_ROOT: str = Field(
|
||||
default="/",
|
||||
description="Root path for S3 storage.",
|
||||
)
|
||||
OPENDAL_S3_BUCKET: str = Field(
|
||||
default="",
|
||||
description="S3 bucket name.",
|
||||
)
|
||||
OPENDAL_S3_ENDPOINT: str = Field(
|
||||
default="https://s3.amazonaws.com",
|
||||
description="S3 endpoint URL.",
|
||||
)
|
||||
OPENDAL_S3_ACCESS_KEY_ID: str = Field(
|
||||
default="",
|
||||
description="S3 access key ID.",
|
||||
)
|
||||
OPENDAL_S3_SECRET_ACCESS_KEY: str = Field(
|
||||
default="",
|
||||
description="S3 secret access key.",
|
||||
)
|
||||
OPENDAL_S3_REGION: str = Field(
|
||||
default="",
|
||||
description="S3 region.",
|
||||
)
|
||||
OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field(
|
||||
default="",
|
||||
description="S3 server-side encryption.",
|
||||
)
|
||||
@ -0,0 +1,17 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from .apollo import ApolloSettingsSourceInfo
|
||||
from .base import RemoteSettingsSource
|
||||
from .enums import RemoteSettingsSourceName
|
||||
|
||||
|
||||
class RemoteSettingsSourceConfig(ApolloSettingsSourceInfo):
|
||||
REMOTE_SETTINGS_SOURCE_NAME: RemoteSettingsSourceName | str = Field(
|
||||
description="name of remote config source",
|
||||
default="",
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["RemoteSettingsSource", "RemoteSettingsSourceConfig", "RemoteSettingsSourceName"]
|
||||
@ -0,0 +1,55 @@
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic.fields import FieldInfo
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from configs.remote_settings_sources.base import RemoteSettingsSource
|
||||
|
||||
from .client import ApolloClient
|
||||
|
||||
|
||||
class ApolloSettingsSourceInfo(BaseSettings):
|
||||
"""
|
||||
Packaging build information
|
||||
"""
|
||||
|
||||
APOLLO_APP_ID: Optional[str] = Field(
|
||||
description="apollo app_id",
|
||||
default=None,
|
||||
)
|
||||
|
||||
APOLLO_CLUSTER: Optional[str] = Field(
|
||||
description="apollo cluster",
|
||||
default=None,
|
||||
)
|
||||
|
||||
APOLLO_CONFIG_URL: Optional[str] = Field(
|
||||
description="apollo config url",
|
||||
default=None,
|
||||
)
|
||||
|
||||
APOLLO_NAMESPACE: Optional[str] = Field(
|
||||
description="apollo namespace",
|
||||
default=None,
|
||||
)
|
||||
|
||||
|
||||
class ApolloSettingsSource(RemoteSettingsSource):
|
||||
def __init__(self, configs: Mapping[str, Any]):
|
||||
self.client = ApolloClient(
|
||||
app_id=configs["APOLLO_APP_ID"],
|
||||
cluster=configs["APOLLO_CLUSTER"],
|
||||
config_url=configs["APOLLO_CONFIG_URL"],
|
||||
start_hot_update=False,
|
||||
_notification_map={configs["APOLLO_NAMESPACE"]: -1},
|
||||
)
|
||||
self.namespace = configs["APOLLO_NAMESPACE"]
|
||||
self.remote_configs = self.client.get_all_dicts(self.namespace)
|
||||
|
||||
def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
|
||||
if not isinstance(self.remote_configs, dict):
|
||||
raise ValueError(f"remote configs is not dict, but {type(self.remote_configs)}")
|
||||
field_value = self.remote_configs.get(field_name)
|
||||
return field_value, field_name, False
|
||||
@ -0,0 +1,303 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from .python_3x import http_request, makedirs_wrapper
|
||||
from .utils import (
|
||||
CONFIGURATIONS,
|
||||
NAMESPACE_NAME,
|
||||
NOTIFICATION_ID,
|
||||
get_value_from_dict,
|
||||
init_ip,
|
||||
no_key_cache_key,
|
||||
signature,
|
||||
url_encode_wrapper,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ApolloClient:
|
||||
def __init__(
|
||||
self,
|
||||
config_url,
|
||||
app_id,
|
||||
cluster="default",
|
||||
secret="",
|
||||
start_hot_update=True,
|
||||
change_listener=None,
|
||||
_notification_map=None,
|
||||
):
|
||||
# Core routing parameters
|
||||
self.config_url = config_url
|
||||
self.cluster = cluster
|
||||
self.app_id = app_id
|
||||
|
||||
# Non-core parameters
|
||||
self.ip = init_ip()
|
||||
self.secret = secret
|
||||
|
||||
# Check the parameter variables
|
||||
|
||||
# Private control variables
|
||||
self._cycle_time = 5
|
||||
self._stopping = False
|
||||
self._cache = {}
|
||||
self._no_key = {}
|
||||
self._hash = {}
|
||||
self._pull_timeout = 75
|
||||
self._cache_file_path = os.path.expanduser("~") + "/.dify/config/remote-settings/apollo/cache/"
|
||||
self._long_poll_thread = None
|
||||
self._change_listener = change_listener # "add" "delete" "update"
|
||||
if _notification_map is None:
|
||||
_notification_map = {"application": -1}
|
||||
self._notification_map = _notification_map
|
||||
self.last_release_key = None
|
||||
# Private startup method
|
||||
self._path_checker()
|
||||
if start_hot_update:
|
||||
self._start_hot_update()
|
||||
|
||||
# start the heartbeat thread
|
||||
heartbeat = threading.Thread(target=self._heart_beat)
|
||||
heartbeat.daemon = True
|
||||
heartbeat.start()
|
||||
|
||||
def get_json_from_net(self, namespace="application"):
|
||||
url = "{}/configs/{}/{}/{}?releaseKey={}&ip={}".format(
|
||||
self.config_url, self.app_id, self.cluster, namespace, "", self.ip
|
||||
)
|
||||
try:
|
||||
code, body = http_request(url, timeout=3, headers=self._sign_headers(url))
|
||||
if code == 200:
|
||||
if not body:
|
||||
logger.error(f"get_json_from_net load configs failed, body is {body}")
|
||||
return None
|
||||
data = json.loads(body)
|
||||
data = data["configurations"]
|
||||
return_data = {CONFIGURATIONS: data}
|
||||
return return_data
|
||||
else:
|
||||
return None
|
||||
except Exception:
|
||||
logger.exception("an error occurred in get_json_from_net")
|
||||
return None
|
||||
|
||||
def get_value(self, key, default_val=None, namespace="application"):
|
||||
try:
|
||||
# read memory configuration
|
||||
namespace_cache = self._cache.get(namespace)
|
||||
val = get_value_from_dict(namespace_cache, key)
|
||||
if val is not None:
|
||||
return val
|
||||
|
||||
no_key = no_key_cache_key(namespace, key)
|
||||
if no_key in self._no_key:
|
||||
return default_val
|
||||
|
||||
# read the network configuration
|
||||
namespace_data = self.get_json_from_net(namespace)
|
||||
val = get_value_from_dict(namespace_data, key)
|
||||
if val is not None:
|
||||
self._update_cache_and_file(namespace_data, namespace)
|
||||
return val
|
||||
|
||||
# read the file configuration
|
||||
namespace_cache = self._get_local_cache(namespace)
|
||||
val = get_value_from_dict(namespace_cache, key)
|
||||
if val is not None:
|
||||
self._update_cache_and_file(namespace_cache, namespace)
|
||||
return val
|
||||
|
||||
# If all of them are not obtained, the default value is returned
|
||||
# and the local cache is set to None
|
||||
self._set_local_cache_none(namespace, key)
|
||||
return default_val
|
||||
except Exception:
|
||||
logger.exception("get_value has error, [key is %s], [namespace is %s]", key, namespace)
|
||||
return default_val
|
||||
|
||||
# Set the key of a namespace to none, and do not set default val
|
||||
# to ensure the real-time correctness of the function call.
|
||||
# If the user does not have the same default val twice
|
||||
# and the default val is used here, there may be a problem.
|
||||
def _set_local_cache_none(self, namespace, key):
|
||||
no_key = no_key_cache_key(namespace, key)
|
||||
self._no_key[no_key] = key
|
||||
|
||||
def _start_hot_update(self):
|
||||
self._long_poll_thread = threading.Thread(target=self._listener)
|
||||
# When the asynchronous thread is started, the daemon thread will automatically exit
|
||||
# when the main thread is launched.
|
||||
self._long_poll_thread.daemon = True
|
||||
self._long_poll_thread.start()
|
||||
|
||||
def stop(self):
|
||||
self._stopping = True
|
||||
logger.info("Stopping listener...")
|
||||
|
||||
# Call the set callback function, and if it is abnormal, try it out
|
||||
def _call_listener(self, namespace, old_kv, new_kv):
|
||||
if self._change_listener is None:
|
||||
return
|
||||
if old_kv is None:
|
||||
old_kv = {}
|
||||
if new_kv is None:
|
||||
new_kv = {}
|
||||
try:
|
||||
for key in old_kv:
|
||||
new_value = new_kv.get(key)
|
||||
old_value = old_kv.get(key)
|
||||
if new_value is None:
|
||||
# If newValue is empty, it means key, and the value is deleted.
|
||||
self._change_listener("delete", namespace, key, old_value)
|
||||
continue
|
||||
if new_value != old_value:
|
||||
self._change_listener("update", namespace, key, new_value)
|
||||
continue
|
||||
for key in new_kv:
|
||||
new_value = new_kv.get(key)
|
||||
old_value = old_kv.get(key)
|
||||
if old_value is None:
|
||||
self._change_listener("add", namespace, key, new_value)
|
||||
except BaseException as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
def _path_checker(self):
|
||||
if not os.path.isdir(self._cache_file_path):
|
||||
makedirs_wrapper(self._cache_file_path)
|
||||
|
||||
# update the local cache and file cache
|
||||
def _update_cache_and_file(self, namespace_data, namespace="application"):
|
||||
# update the local cache
|
||||
self._cache[namespace] = namespace_data
|
||||
# update the file cache
|
||||
new_string = json.dumps(namespace_data)
|
||||
new_hash = hashlib.md5(new_string.encode("utf-8")).hexdigest()
|
||||
if self._hash.get(namespace) == new_hash:
|
||||
pass
|
||||
else:
|
||||
file_path = Path(self._cache_file_path) / f"{self.app_id}_configuration_{namespace}.txt"
|
||||
file_path.write_text(new_string)
|
||||
self._hash[namespace] = new_hash
|
||||
|
||||
# get the configuration from the local file
|
||||
def _get_local_cache(self, namespace="application"):
|
||||
cache_file_path = os.path.join(self._cache_file_path, f"{self.app_id}_configuration_{namespace}.txt")
|
||||
if os.path.isfile(cache_file_path):
|
||||
with open(cache_file_path) as f:
|
||||
result = json.loads(f.readline())
|
||||
return result
|
||||
return {}
|
||||
|
||||
def _long_poll(self):
|
||||
notifications = []
|
||||
for key in self._cache:
|
||||
namespace_data = self._cache[key]
|
||||
notification_id = -1
|
||||
if NOTIFICATION_ID in namespace_data:
|
||||
notification_id = self._cache[key][NOTIFICATION_ID]
|
||||
notifications.append({NAMESPACE_NAME: key, NOTIFICATION_ID: notification_id})
|
||||
try:
|
||||
# if the length is 0 it is returned directly
|
||||
if len(notifications) == 0:
|
||||
return
|
||||
url = "{}/notifications/v2".format(self.config_url)
|
||||
params = {
|
||||
"appId": self.app_id,
|
||||
"cluster": self.cluster,
|
||||
"notifications": json.dumps(notifications, ensure_ascii=False),
|
||||
}
|
||||
param_str = url_encode_wrapper(params)
|
||||
url = url + "?" + param_str
|
||||
code, body = http_request(url, self._pull_timeout, headers=self._sign_headers(url))
|
||||
http_code = code
|
||||
if http_code == 304:
|
||||
logger.debug("No change, loop...")
|
||||
return
|
||||
if http_code == 200:
|
||||
if not body:
|
||||
logger.error(f"_long_poll load configs failed,body is {body}")
|
||||
return
|
||||
data = json.loads(body)
|
||||
for entry in data:
|
||||
namespace = entry[NAMESPACE_NAME]
|
||||
n_id = entry[NOTIFICATION_ID]
|
||||
logger.info("%s has changes: notificationId=%d", namespace, n_id)
|
||||
self._get_net_and_set_local(namespace, n_id, call_change=True)
|
||||
return
|
||||
else:
|
||||
logger.warning("Sleep...")
|
||||
except Exception as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
def _get_net_and_set_local(self, namespace, n_id, call_change=False):
|
||||
namespace_data = self.get_json_from_net(namespace)
|
||||
if not namespace_data:
|
||||
return
|
||||
namespace_data[NOTIFICATION_ID] = n_id
|
||||
old_namespace = self._cache.get(namespace)
|
||||
self._update_cache_and_file(namespace_data, namespace)
|
||||
if self._change_listener is not None and call_change and old_namespace:
|
||||
old_kv = old_namespace.get(CONFIGURATIONS)
|
||||
new_kv = namespace_data.get(CONFIGURATIONS)
|
||||
self._call_listener(namespace, old_kv, new_kv)
|
||||
|
||||
def _listener(self):
|
||||
logger.info("start long_poll")
|
||||
while not self._stopping:
|
||||
self._long_poll()
|
||||
time.sleep(self._cycle_time)
|
||||
logger.info("stopped, long_poll")
|
||||
|
||||
# add the need for endorsement to the header
|
||||
def _sign_headers(self, url):
|
||||
headers = {}
|
||||
if self.secret == "":
|
||||
return headers
|
||||
uri = url[len(self.config_url) : len(url)]
|
||||
time_unix_now = str(int(round(time.time() * 1000)))
|
||||
headers["Authorization"] = "Apollo " + self.app_id + ":" + signature(time_unix_now, uri, self.secret)
|
||||
headers["Timestamp"] = time_unix_now
|
||||
return headers
|
||||
|
||||
def _heart_beat(self):
|
||||
while not self._stopping:
|
||||
for namespace in self._notification_map:
|
||||
self._do_heart_beat(namespace)
|
||||
time.sleep(60 * 10) # 10分钟
|
||||
|
||||
def _do_heart_beat(self, namespace):
|
||||
url = "{}/configs/{}/{}/{}?ip={}".format(self.config_url, self.app_id, self.cluster, namespace, self.ip)
|
||||
try:
|
||||
code, body = http_request(url, timeout=3, headers=self._sign_headers(url))
|
||||
if code == 200:
|
||||
if not body:
|
||||
logger.error(f"_do_heart_beat load configs failed,body is {body}")
|
||||
return None
|
||||
data = json.loads(body)
|
||||
if self.last_release_key == data["releaseKey"]:
|
||||
return None
|
||||
self.last_release_key = data["releaseKey"]
|
||||
data = data["configurations"]
|
||||
self._update_cache_and_file(data, namespace)
|
||||
else:
|
||||
return None
|
||||
except Exception:
|
||||
logger.exception("an error occurred in _do_heart_beat")
|
||||
return None
|
||||
|
||||
def get_all_dicts(self, namespace):
|
||||
namespace_data = self._cache.get(namespace)
|
||||
if namespace_data is None:
|
||||
net_namespace_data = self.get_json_from_net(namespace)
|
||||
if not net_namespace_data:
|
||||
return namespace_data
|
||||
namespace_data = net_namespace_data.get(CONFIGURATIONS)
|
||||
if namespace_data:
|
||||
self._update_cache_and_file(namespace_data, namespace)
|
||||
return namespace_data
|
||||
@ -0,0 +1,41 @@
|
||||
import logging
|
||||
import os
|
||||
import ssl
|
||||
import urllib.request
|
||||
from urllib import parse
|
||||
from urllib.error import HTTPError
|
||||
|
||||
# Create an SSL context that allows for a lower level of security
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.set_ciphers("HIGH:!DH:!aNULL")
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
# Create an opener object and pass in a custom SSL context
|
||||
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
||||
|
||||
urllib.request.install_opener(opener)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def http_request(url, timeout, headers={}):
|
||||
try:
|
||||
request = urllib.request.Request(url, headers=headers)
|
||||
res = urllib.request.urlopen(request, timeout=timeout)
|
||||
body = res.read().decode("utf-8")
|
||||
return res.code, body
|
||||
except HTTPError as e:
|
||||
if e.code == 304:
|
||||
logger.warning("http_request error,code is 304, maybe you should check secret")
|
||||
return 304, None
|
||||
logger.warning("http_request error,code is %d, msg is %s", e.code, e.msg)
|
||||
raise e
|
||||
|
||||
|
||||
def url_encode(params):
|
||||
return parse.urlencode(params)
|
||||
|
||||
|
||||
def makedirs_wrapper(path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
@ -0,0 +1,51 @@
|
||||
import hashlib
|
||||
import socket
|
||||
|
||||
from .python_3x import url_encode
|
||||
|
||||
# define constants
|
||||
CONFIGURATIONS = "configurations"
|
||||
NOTIFICATION_ID = "notificationId"
|
||||
NAMESPACE_NAME = "namespaceName"
|
||||
|
||||
|
||||
# add timestamps uris and keys
|
||||
def signature(timestamp, uri, secret):
|
||||
import base64
|
||||
import hmac
|
||||
|
||||
string_to_sign = "" + timestamp + "\n" + uri
|
||||
hmac_code = hmac.new(secret.encode(), string_to_sign.encode(), hashlib.sha1).digest()
|
||||
return base64.b64encode(hmac_code).decode()
|
||||
|
||||
|
||||
def url_encode_wrapper(params):
|
||||
return url_encode(params)
|
||||
|
||||
|
||||
def no_key_cache_key(namespace, key):
|
||||
return "{}{}{}".format(namespace, len(namespace), key)
|
||||
|
||||
|
||||
# Returns whether the obtained value is obtained, and None if it does not
|
||||
def get_value_from_dict(namespace_cache, key):
|
||||
if namespace_cache:
|
||||
kv_data = namespace_cache.get(CONFIGURATIONS)
|
||||
if kv_data is None:
|
||||
return None
|
||||
if key in kv_data:
|
||||
return kv_data[key]
|
||||
return None
|
||||
|
||||
|
||||
def init_ip():
|
||||
ip = ""
|
||||
s = None
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 53))
|
||||
ip = s.getsockname()[0]
|
||||
finally:
|
||||
if s:
|
||||
s.close()
|
||||
return ip
|
||||
@ -0,0 +1,15 @@
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from pydantic.fields import FieldInfo
|
||||
|
||||
|
||||
class RemoteSettingsSource:
|
||||
def __init__(self, configs: Mapping[str, Any]):
|
||||
pass
|
||||
|
||||
def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
|
||||
raise NotImplementedError
|
||||
|
||||
def prepare_field_value(self, field_name: str, field: FieldInfo, value: Any, value_is_complex: bool) -> Any:
|
||||
return value
|
||||
@ -0,0 +1,5 @@
|
||||
from enum import StrEnum
|
||||
|
||||
|
||||
class RemoteSettingsSourceName(StrEnum):
|
||||
APOLLO = "apollo"
|
||||
@ -0,0 +1,21 @@
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
|
||||
|
||||
def get_bedrock_client(service_name, credentials=None):
|
||||
client_config = Config(region_name=credentials["aws_region"])
|
||||
aws_access_key_id = credentials["aws_access_key_id"]
|
||||
aws_secret_access_key = credentials["aws_secret_access_key"]
|
||||
if aws_access_key_id and aws_secret_access_key:
|
||||
# use aksk to call bedrock
|
||||
client = boto3.client(
|
||||
service_name=service_name,
|
||||
config=client_config,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
)
|
||||
else:
|
||||
# use iam without aksk to call
|
||||
client = boto3.client(service_name=service_name, config=client_config)
|
||||
|
||||
return client
|
||||
@ -0,0 +1,2 @@
|
||||
- amazon.rerank-v1
|
||||
- cohere.rerank-v3-5
|
||||
@ -0,0 +1,4 @@
|
||||
model: amazon.rerank-v1:0
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 5120
|
||||
@ -0,0 +1,4 @@
|
||||
model: cohere.rerank-v3-5:0
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 5120
|
||||
@ -0,0 +1,139 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
|
||||
from core.model_runtime.model_providers.bedrock.get_bedrock_client import get_bedrock_client
|
||||
|
||||
|
||||
class BedrockRerankModel(RerankModel):
|
||||
"""
|
||||
Model class for Cohere rerank model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
query: str,
|
||||
docs: list[str],
|
||||
score_threshold: Optional[float] = None,
|
||||
top_n: Optional[int] = None,
|
||||
user: Optional[str] = None,
|
||||
) -> RerankResult:
|
||||
"""
|
||||
Invoke rerank model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param query: search query
|
||||
:param docs: docs for reranking
|
||||
:param score_threshold: score threshold
|
||||
:param top_n: top n
|
||||
:param user: unique user id
|
||||
:return: rerank result
|
||||
"""
|
||||
|
||||
if len(docs) == 0:
|
||||
return RerankResult(model=model, docs=docs)
|
||||
|
||||
# initialize client
|
||||
bedrock_runtime = get_bedrock_client("bedrock-agent-runtime", credentials)
|
||||
queries = [{"type": "TEXT", "textQuery": {"text": query}}]
|
||||
text_sources = []
|
||||
for text in docs:
|
||||
text_sources.append(
|
||||
{
|
||||
"type": "INLINE",
|
||||
"inlineDocumentSource": {
|
||||
"type": "TEXT",
|
||||
"textDocument": {
|
||||
"text": text,
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
modelId = model
|
||||
region = credentials["aws_region"]
|
||||
model_package_arn = f"arn:aws:bedrock:{region}::foundation-model/{modelId}"
|
||||
rerankingConfiguration = {
|
||||
"type": "BEDROCK_RERANKING_MODEL",
|
||||
"bedrockRerankingConfiguration": {
|
||||
"numberOfResults": top_n,
|
||||
"modelConfiguration": {
|
||||
"modelArn": model_package_arn,
|
||||
},
|
||||
},
|
||||
}
|
||||
response = bedrock_runtime.rerank(
|
||||
queries=queries, sources=text_sources, rerankingConfiguration=rerankingConfiguration
|
||||
)
|
||||
|
||||
rerank_documents = []
|
||||
for idx, result in enumerate(response["results"]):
|
||||
# format document
|
||||
index = result["index"]
|
||||
rerank_document = RerankDocument(
|
||||
index=index,
|
||||
text=docs[index],
|
||||
score=result["relevanceScore"],
|
||||
)
|
||||
|
||||
# score threshold check
|
||||
if score_threshold is not None:
|
||||
if rerank_document.score >= score_threshold:
|
||||
rerank_documents.append(rerank_document)
|
||||
else:
|
||||
rerank_documents.append(rerank_document)
|
||||
|
||||
return RerankResult(model=model, docs=rerank_documents)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self.invoke(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
query="What is the capital of the United States?",
|
||||
docs=[
|
||||
"Carson City is the capital city of the American state of Nevada. At the 2010 United States "
|
||||
"Census, Carson City had a population of 55,274.",
|
||||
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
|
||||
"are a political division controlled by the United States. Its capital is Saipan.",
|
||||
],
|
||||
score_threshold=0.8,
|
||||
)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the ermd = genai.GenerativeModel(model) error type thrown to the caller
|
||||
The value is the md = genai.GenerativeModel(model) error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke emd = genai.GenerativeModel(model) error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [],
|
||||
InvokeServerUnavailableError: [],
|
||||
InvokeRateLimitError: [],
|
||||
InvokeAuthorizationError: [],
|
||||
InvokeBadRequestError: [],
|
||||
}
|
||||
@ -0,0 +1,4 @@
|
||||
model: rerank-v3.5
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 5120
|
||||
@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-exp
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Exp
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -0,0 +1,38 @@
|
||||
model: gemini-exp-1206
|
||||
label:
|
||||
en_US: Gemini exp 1206
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -0,0 +1,25 @@
|
||||
model: gemma-7b-it
|
||||
label:
|
||||
zh_Hans: Gemma 7B Instruction Tuned
|
||||
en_US: Gemma 7B Instruction Tuned
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -0,0 +1,25 @@
|
||||
model: gemma2-9b-it
|
||||
label:
|
||||
zh_Hans: Gemma 2 9B Instruction Tuned
|
||||
en_US: Gemma 2 9B Instruction Tuned
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -0,0 +1,25 @@
|
||||
model: llama-3.3-70b-specdec
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Specdec
|
||||
en_US: Llama 3.3 70B Specdec
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32768
|
||||
pricing:
|
||||
input: "0.05"
|
||||
output: "0.1"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
@ -0,0 +1,25 @@
|
||||
model: llama-3.3-70b-versatile
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Versatile
|
||||
en_US: Llama 3.3 70B Versatile
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32768
|
||||
pricing:
|
||||
input: "0.05"
|
||||
output: "0.1"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
@ -0,0 +1,25 @@
|
||||
model: llama3-groq-70b-8192-tool-use-preview
|
||||
label:
|
||||
zh_Hans: Llama3-groq-70b-8192-tool-use (PREVIEW)
|
||||
en_US: Llama3-groq-70b-8192-tool-use (PREVIEW)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.08'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -0,0 +1,52 @@
|
||||
model: pixtral-large-2411
|
||||
label:
|
||||
zh_Hans: pixtral-large-2411
|
||||
en_US: pixtral-large-2411
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@ -0,0 +1,52 @@
|
||||
model: pixtral-large-latest
|
||||
label:
|
||||
zh_Hans: pixtral-large-latest
|
||||
en_US: pixtral-large-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@ -0,0 +1,53 @@
|
||||
model: meta-llama/Llama-3.3-70B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '4.13'
|
||||
output: '4.13'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,53 @@
|
||||
model: Qwen/QwQ-32B-Preview
|
||||
label:
|
||||
en_US: Qwen/QwQ-32B-Preview
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '1.26'
|
||||
output: '1.26'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-exp
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Exp
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue