pull/22616/merge
LZC6244 10 months ago committed by GitHub
commit db1b7644d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -28,152 +28,151 @@ class AbstractVectorFactory(ABC):
index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}} index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}}
return index_struct_dict return index_struct_dict
VECTOR_FACTORY_CLS: type[AbstractVectorFactory]
class Vector: match dify_config.VECTOR_STORE:
def __init__(self, dataset: Dataset, attributes: Optional[list] = None):
if attributes is None:
attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
self._dataset = dataset
self._embeddings = self._get_embeddings()
self._attributes = attributes
self._vector_processor = self._init_vector()
def _init_vector(self) -> BaseVector:
vector_type = dify_config.VECTOR_STORE
if self._dataset.index_struct_dict:
vector_type = self._dataset.index_struct_dict["type"]
else:
if dify_config.VECTOR_STORE_WHITELIST_ENABLE:
whitelist = (
db.session.query(Whitelist)
.where(Whitelist.tenant_id == self._dataset.tenant_id, Whitelist.category == "vector_db")
.one_or_none()
)
if whitelist:
vector_type = VectorType.TIDB_ON_QDRANT
if not vector_type:
raise ValueError("Vector store must be specified.")
vector_factory_cls = self.get_vector_factory(vector_type)
return vector_factory_cls().init_vector(self._dataset, self._attributes, self._embeddings)
@staticmethod
def get_vector_factory(vector_type: str) -> type[AbstractVectorFactory]:
match vector_type:
case VectorType.CHROMA: case VectorType.CHROMA:
from core.rag.datasource.vdb.chroma.chroma_vector import ChromaVectorFactory from core.rag.datasource.vdb.chroma.chroma_vector import ChromaVectorFactory
return ChromaVectorFactory VECTOR_FACTORY_CLS = ChromaVectorFactory
case VectorType.MILVUS: case VectorType.MILVUS:
from core.rag.datasource.vdb.milvus.milvus_vector import MilvusVectorFactory from core.rag.datasource.vdb.milvus.milvus_vector import MilvusVectorFactory
return MilvusVectorFactory VECTOR_FACTORY_CLS = MilvusVectorFactory
case VectorType.MYSCALE: case VectorType.MYSCALE:
from core.rag.datasource.vdb.myscale.myscale_vector import MyScaleVectorFactory from core.rag.datasource.vdb.myscale.myscale_vector import MyScaleVectorFactory
return MyScaleVectorFactory VECTOR_FACTORY_CLS = MyScaleVectorFactory
case VectorType.PGVECTOR: case VectorType.PGVECTOR:
from core.rag.datasource.vdb.pgvector.pgvector import PGVectorFactory from core.rag.datasource.vdb.pgvector.pgvector import PGVectorFactory
return PGVectorFactory VECTOR_FACTORY_CLS = PGVectorFactory
case VectorType.VASTBASE: case VectorType.VASTBASE:
from core.rag.datasource.vdb.pyvastbase.vastbase_vector import VastbaseVectorFactory from core.rag.datasource.vdb.pyvastbase.vastbase_vector import VastbaseVectorFactory
return VastbaseVectorFactory VECTOR_FACTORY_CLS = VastbaseVectorFactory
case VectorType.PGVECTO_RS: case VectorType.PGVECTO_RS:
from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRSFactory from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRSFactory
return PGVectoRSFactory VECTOR_FACTORY_CLS = PGVectoRSFactory
case VectorType.QDRANT: case VectorType.QDRANT:
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantVectorFactory from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantVectorFactory
return QdrantVectorFactory VECTOR_FACTORY_CLS = QdrantVectorFactory
case VectorType.RELYT: case VectorType.RELYT:
from core.rag.datasource.vdb.relyt.relyt_vector import RelytVectorFactory from core.rag.datasource.vdb.relyt.relyt_vector import RelytVectorFactory
return RelytVectorFactory VECTOR_FACTORY_CLS = RelytVectorFactory
case VectorType.ELASTICSEARCH: case VectorType.ELASTICSEARCH:
from core.rag.datasource.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory from core.rag.datasource.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory
return ElasticSearchVectorFactory VECTOR_FACTORY_CLS = ElasticSearchVectorFactory
case VectorType.ELASTICSEARCH_JA: case VectorType.ELASTICSEARCH_JA:
from core.rag.datasource.vdb.elasticsearch.elasticsearch_ja_vector import ( from core.rag.datasource.vdb.elasticsearch.elasticsearch_ja_vector import (
ElasticSearchJaVectorFactory, ElasticSearchJaVectorFactory,
) )
return ElasticSearchJaVectorFactory VECTOR_FACTORY_CLS = ElasticSearchJaVectorFactory
case VectorType.TIDB_VECTOR: case VectorType.TIDB_VECTOR:
from core.rag.datasource.vdb.tidb_vector.tidb_vector import TiDBVectorFactory from core.rag.datasource.vdb.tidb_vector.tidb_vector import TiDBVectorFactory
return TiDBVectorFactory VECTOR_FACTORY_CLS = TiDBVectorFactory
case VectorType.WEAVIATE: case VectorType.WEAVIATE:
from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateVectorFactory from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateVectorFactory
return WeaviateVectorFactory VECTOR_FACTORY_CLS = WeaviateVectorFactory
case VectorType.TENCENT: case VectorType.TENCENT:
from core.rag.datasource.vdb.tencent.tencent_vector import TencentVectorFactory from core.rag.datasource.vdb.tencent.tencent_vector import TencentVectorFactory
return TencentVectorFactory VECTOR_FACTORY_CLS = TencentVectorFactory
case VectorType.ORACLE: case VectorType.ORACLE:
from core.rag.datasource.vdb.oracle.oraclevector import OracleVectorFactory from core.rag.datasource.vdb.oracle.oraclevector import OracleVectorFactory
return OracleVectorFactory VECTOR_FACTORY_CLS = OracleVectorFactory
case VectorType.OPENSEARCH: case VectorType.OPENSEARCH:
from core.rag.datasource.vdb.opensearch.opensearch_vector import OpenSearchVectorFactory from core.rag.datasource.vdb.opensearch.opensearch_vector import OpenSearchVectorFactory
return OpenSearchVectorFactory VECTOR_FACTORY_CLS = OpenSearchVectorFactory
case VectorType.ANALYTICDB: case VectorType.ANALYTICDB:
from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory
return AnalyticdbVectorFactory VECTOR_FACTORY_CLS = AnalyticdbVectorFactory
case VectorType.COUCHBASE: case VectorType.COUCHBASE:
from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseVectorFactory from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseVectorFactory
return CouchbaseVectorFactory VECTOR_FACTORY_CLS = CouchbaseVectorFactory
case VectorType.BAIDU: case VectorType.BAIDU:
from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory
return BaiduVectorFactory VECTOR_FACTORY_CLS = BaiduVectorFactory
case VectorType.VIKINGDB: case VectorType.VIKINGDB:
from core.rag.datasource.vdb.vikingdb.vikingdb_vector import VikingDBVectorFactory from core.rag.datasource.vdb.vikingdb.vikingdb_vector import VikingDBVectorFactory
return VikingDBVectorFactory VECTOR_FACTORY_CLS = VikingDBVectorFactory
case VectorType.UPSTASH: case VectorType.UPSTASH:
from core.rag.datasource.vdb.upstash.upstash_vector import UpstashVectorFactory from core.rag.datasource.vdb.upstash.upstash_vector import UpstashVectorFactory
return UpstashVectorFactory VECTOR_FACTORY_CLS = UpstashVectorFactory
case VectorType.TIDB_ON_QDRANT: case VectorType.TIDB_ON_QDRANT:
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import TidbOnQdrantVectorFactory from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import TidbOnQdrantVectorFactory
return TidbOnQdrantVectorFactory VECTOR_FACTORY_CLS = TidbOnQdrantVectorFactory
case VectorType.LINDORM: case VectorType.LINDORM:
from core.rag.datasource.vdb.lindorm.lindorm_vector import LindormVectorStoreFactory from core.rag.datasource.vdb.lindorm.lindorm_vector import LindormVectorStoreFactory
return LindormVectorStoreFactory VECTOR_FACTORY_CLS = LindormVectorStoreFactory
case VectorType.OCEANBASE: case VectorType.OCEANBASE:
from core.rag.datasource.vdb.oceanbase.oceanbase_vector import OceanBaseVectorFactory from core.rag.datasource.vdb.oceanbase.oceanbase_vector import OceanBaseVectorFactory
return OceanBaseVectorFactory VECTOR_FACTORY_CLS = OceanBaseVectorFactory
case VectorType.OPENGAUSS: case VectorType.OPENGAUSS:
from core.rag.datasource.vdb.opengauss.opengauss import OpenGaussFactory from core.rag.datasource.vdb.opengauss.opengauss import OpenGaussFactory
return OpenGaussFactory VECTOR_FACTORY_CLS = OpenGaussFactory
case VectorType.TABLESTORE: case VectorType.TABLESTORE:
from core.rag.datasource.vdb.tablestore.tablestore_vector import TableStoreVectorFactory from core.rag.datasource.vdb.tablestore.tablestore_vector import TableStoreVectorFactory
return TableStoreVectorFactory VECTOR_FACTORY_CLS = TableStoreVectorFactory
case VectorType.HUAWEI_CLOUD: case VectorType.HUAWEI_CLOUD:
from core.rag.datasource.vdb.huawei.huawei_cloud_vector import HuaweiCloudVectorFactory from core.rag.datasource.vdb.huawei.huawei_cloud_vector import HuaweiCloudVectorFactory
return HuaweiCloudVectorFactory VECTOR_FACTORY_CLS = HuaweiCloudVectorFactory
case VectorType.MATRIXONE: case VectorType.MATRIXONE:
from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory
return MatrixoneVectorFactory VECTOR_FACTORY_CLS = MatrixoneVectorFactory
case _: case _:
raise ValueError(f"Vector store {vector_type} is not supported.") raise ValueError(f"Vector store {dify_config.VECTOR_STORE} is not supported.")
class Vector:
def __init__(self, dataset: Dataset, attributes: Optional[list] = None):
if attributes is None:
attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
self._dataset = dataset
self._embeddings = self._get_embeddings()
self._attributes = attributes
self._vector_processor = self._init_vector()
def _init_vector(self) -> BaseVector:
vector_type = dify_config.VECTOR_STORE
if self._dataset.index_struct_dict:
vector_type = self._dataset.index_struct_dict["type"]
else:
if dify_config.VECTOR_STORE_WHITELIST_ENABLE:
whitelist = (
db.session.query(Whitelist)
.where(Whitelist.tenant_id == self._dataset.tenant_id, Whitelist.category == "vector_db")
.one_or_none()
)
if whitelist:
vector_type = VectorType.TIDB_ON_QDRANT
if not vector_type:
raise ValueError("Vector store must be specified.")
return VECTOR_FACTORY_CLS().init_vector(self._dataset, self._attributes, self._embeddings)
def create(self, texts: Optional[list] = None, **kwargs): def create(self, texts: Optional[list] = None, **kwargs):
if texts: if texts:

Loading…
Cancel
Save