add param to set query timeout when do vector/full-text search

pull/18613/head
jiangzhijie 1 year ago
parent cf464d252d
commit e581c8b70a

@ -297,6 +297,7 @@ LINDORM_URL=http://ld-*******************-proxy-search-pub.lindorm.aliyuncs.com:
LINDORM_USERNAME=admin
LINDORM_PASSWORD=admin
USING_UGC_INDEX=False
QUERY_TIMEOUT=1
# OceanBase Vector configuration
OCEANBASE_VECTOR_HOST=127.0.0.1

@ -32,3 +32,8 @@ class LindormConfig(BaseSettings):
description="Using UGC index will store the same type of Index in a single index but can retrieve separately.",
default=False,
)
QUERY_TIMEOUT: Optional[float] = Field(
description="The lindorm search request timeout (s)",
default=2.0
)

@ -32,6 +32,7 @@ class LindormVectorStoreConfig(BaseModel):
username: Optional[str] = None
password: Optional[str] = None
using_ugc: Optional[bool] = False
request_timeout: Optional[float] = 1.0 # timeout units: s
@model_validator(mode="before")
@classmethod
@ -81,12 +82,12 @@ class LindormVectorStore(BaseVector):
self._client.indices.refresh(index=self._collection_name)
def add_texts(
self,
documents: list[Document],
embeddings: list[list[float]],
batch_size: int = 64,
timeout: int = 60,
**kwargs,
self,
documents: list[Document],
embeddings: list[list[float]],
batch_size: int = 64,
timeout: int = 60,
**kwargs,
):
logger.info(f"Total documents to add: {len(documents)}")
uuids = self._get_uuids(documents)
@ -251,7 +252,7 @@ class LindormVectorStore(BaseVector):
query = default_vector_search_query(query_vector=query_vector, k=top_k, filters=filters, **kwargs)
try:
params = {}
params = {"timeout": self._client_config.request_timeout}
if self._using_ugc:
params["routing"] = self._routing
response = self._client.search(index=self._collection_name, body=query, params=params)
@ -304,8 +305,8 @@ class LindormVectorStore(BaseVector):
routing=routing,
routing_field=self._routing_field,
)
response = self._client.search(index=self._collection_name, body=full_text_query)
params = {"timeout": self._client_config.request_timeout}
response = self._client.search(index=self._collection_name, body=full_text_query, params=params)
docs = []
for hit in response["hits"]["hits"]:
docs.append(
@ -444,17 +445,17 @@ def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dic
def default_text_search_query(
query_text: str,
k: int = 4,
text_field: str = Field.CONTENT_KEY.value,
must: Optional[list[dict]] = None,
must_not: Optional[list[dict]] = None,
should: Optional[list[dict]] = None,
minimum_should_match: int = 0,
filters: Optional[list[dict]] = None,
routing: Optional[str] = None,
routing_field: Optional[str] = None,
**kwargs,
query_text: str,
k: int = 4,
text_field: str = Field.CONTENT_KEY.value,
must: Optional[list[dict]] = None,
must_not: Optional[list[dict]] = None,
should: Optional[list[dict]] = None,
minimum_should_match: int = 0,
filters: Optional[list[dict]] = None,
routing: Optional[str] = None,
routing_field: Optional[str] = None,
**kwargs,
) -> dict:
query_clause: dict[str, Any] = {}
if routing is not None:
@ -501,17 +502,17 @@ def default_text_search_query(
def default_vector_search_query(
query_vector: list[float],
k: int = 4,
min_score: str = "0.0",
ef_search: Optional[str] = None, # only for hnsw
nprobe: Optional[str] = None, # "2000"
reorder_factor: Optional[str] = None, # "20"
client_refactor: Optional[str] = None, # "true"
vector_field: str = Field.VECTOR.value,
filters: Optional[list[dict]] = None,
filter_type: Optional[str] = None,
**kwargs,
query_vector: list[float],
k: int = 4,
min_score: str = "0.0",
ef_search: Optional[str] = None, # only for hnsw
nprobe: Optional[str] = None, # "2000"
reorder_factor: Optional[str] = None, # "20"
client_refactor: Optional[str] = None, # "true"
vector_field: str = Field.VECTOR.value,
filters: Optional[list[dict]] = None,
filter_type: Optional[str] = None,
**kwargs,
) -> dict:
if filters is not None:
filter_type = "pre_filter" if filter_type is None else filter_type
@ -554,6 +555,7 @@ class LindormVectorStoreFactory(AbstractVectorFactory):
username=dify_config.LINDORM_USERNAME,
password=dify_config.LINDORM_PASSWORD,
using_ugc=dify_config.USING_UGC_INDEX,
request_timeout=dify_config.QUERY_TIMEOUT
)
using_ugc = dify_config.USING_UGC_INDEX
if using_ugc is None:

@ -553,6 +553,7 @@ VIKINGDB_SOCKET_TIMEOUT=30
LINDORM_URL=http://lindorm:30070
LINDORM_USERNAME=lindorm
LINDORM_PASSWORD=lindorm
QUERY_TIMEOUT=1
# OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase`
OCEANBASE_VECTOR_HOST=oceanbase

Loading…
Cancel
Save