From c334af00f7bec49554a9bb34965308e1b52daa87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=90=B1=E5=B3=B6=20=E5=85=8B=E8=8B=B1?= Date: Wed, 14 May 2025 16:43:56 +0900 Subject: [PATCH] Fix: PGVector table index creation issue --- .../rag/datasource/vdb/pgvector/pgvector.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 366a21c381..dfa94468af 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -1,6 +1,7 @@ import json import logging import uuid +import hashlib from contextlib import contextmanager from typing import Any @@ -61,12 +62,12 @@ CREATE TABLE IF NOT EXISTS {table_name} ( """ SQL_CREATE_INDEX = """ -CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} +CREATE INDEX IF NOT EXISTS hnsw_idx_{index_hash} ON {table_name} USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); """ SQL_CREATE_INDEX_PG_BIGM = """ -CREATE INDEX IF NOT EXISTS bigm_idx ON {table_name} +CREATE INDEX IF NOT EXISTS bigm_idx_{index_hash} ON {table_name} USING gin (text gin_bigm_ops); """ @@ -76,6 +77,7 @@ class PGVector(BaseVector): super().__init__(collection_name) self.pool = self._create_connection_pool(config) self.table_name = f"embedding_{collection_name}" + self.index_hash = hashlib.md5(self.table_name.encode()).hexdigest()[:8] self.pg_bigm = config.pg_bigm def get_type(self) -> str: @@ -256,10 +258,15 @@ class PGVector(BaseVector): # PG hnsw index only support 2000 dimension or less # ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing if dimension <= 2000: - cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name)) + cur.execute(SQL_CREATE_INDEX.format( + table_name=self.table_name, + index_hash=self.index_hash + )) if self.pg_bigm: - cur.execute("CREATE EXTENSION IF NOT EXISTS pg_bigm") - cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name)) + cur.execute(SQL_CREATE_INDEX_PG_BIGM.format( + table_name=self.table_name, + index_hash=self.index_hash + )) redis_client.set(collection_exist_cache_key, 1, ex=3600)