fix: resolve CI linting issues and add missing newlines

- Fix all line length issues (120 character limit)
- Remove all trailing whitespace
- Add missing newlines at end of files
- Add CLICKZETTA_VOLUME_DIFY_PREFIX environment variable to docker-compose.yaml
- Ensure proper code formatting for all ClickZetta files

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
pull/22551/head
yunqiqiliang 10 months ago
parent f3b1bdc04f
commit f57fa13f1b

@ -67,3 +67,4 @@ class ClickzettaConfig(BaseModel):
description="Distance function for vector similarity: l2_distance or cosine_distance",
default="cosine_distance",
)

@ -191,7 +191,8 @@ class ClickzettaVector(BaseVector):
id STRING NOT NULL COMMENT 'Unique document identifier',
{Field.CONTENT_KEY.value} STRING NOT NULL COMMENT 'Document text content for search and retrieval',
{Field.METADATA_KEY.value} JSON COMMENT 'Document metadata including source, type, and other attributes',
{Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT 'High-dimensional embedding vector for semantic similarity search',
{Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT
'High-dimensional embedding vector for semantic similarity search',
PRIMARY KEY (id)
) COMMENT 'Dify RAG knowledge base vector storage table for document embeddings and content'
"""
@ -363,13 +364,18 @@ class ClickzettaVector(BaseVector):
# Use parameterized INSERT with executemany for better performance and security
# Cast JSON and VECTOR in SQL, pass raw data as parameters
columns = f"id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}, {Field.VECTOR.value}"
insert_sql = f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))"
insert_sql = (
f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) "
f"VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))"
)
with self._connection.cursor() as cursor:
try:
cursor.executemany(insert_sql, data_rows)
logger.info(f"Inserted batch {batch_index // batch_size + 1}/{total_batches} "
f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)")
logger.info(
f"Inserted batch {batch_index // batch_size + 1}/{total_batches} "
f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)"
)
except Exception as e:
logger.exception(f"Parameterized SQL execution failed for {len(data_rows)} documents: {e}")
logger.exception(f"SQL template: {insert_sql}")
@ -445,7 +451,9 @@ class ClickzettaVector(BaseVector):
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
# Use json_extract_string function for ClickZetta compatibility
filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})")
filter_clauses.append(
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
)
# No need for dataset_id filter since each dataset has its own table
@ -541,7 +549,9 @@ class ClickzettaVector(BaseVector):
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
# Use json_extract_string function for ClickZetta compatibility
filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})")
filter_clauses.append(
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
)
# No need for dataset_id filter since each dataset has its own table
@ -620,7 +630,9 @@ class ClickzettaVector(BaseVector):
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
# Use json_extract_string function for ClickZetta compatibility
filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})")
filter_clauses.append(
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
)
# No need for dataset_id filter since each dataset has its own table
@ -724,3 +736,4 @@ class ClickzettaVectorFactory(AbstractVectorFactory):
collection_name = Dataset.gen_collection_name_by_id(dataset.id).lower()
return ClickzettaVector(collection_name=collection_name, config=config)

@ -105,10 +105,15 @@ class VolumePermissionManager:
result = cursor.fetchone()
if result:
logger.debug(f"User Volume permission check for {current_user}, operation {operation.name}: granted (basic connection verified)")
logger.debug(
f"User Volume permission check for {current_user}, operation {operation.name}: "
f"granted (basic connection verified)"
)
return True
else:
logger.warning(f"User Volume permission check failed: cannot verify basic connection for {current_user}")
logger.warning(
f"User Volume permission check failed: cannot verify basic connection for {current_user}"
)
return False
except Exception as e:
@ -342,7 +347,8 @@ class VolumePermissionManager:
logger.info(f"Raw grants result for {volume_name}: {grants}")
# 解析权限结果
# 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to, grantee_name, grantor_name, grant_option, granted_time)
# 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
# grantee_name, grantor_name, grant_option, granted_time)
for grant in grants:
logger.info(f"Processing grant: {grant}")
if len(grant) >= 5:
@ -351,11 +357,15 @@ class VolumePermissionManager:
granted_on = grant[3]
object_name = grant[4]
logger.info(f"Grant details - type: {granted_type}, privilege: {privilege}, granted_on: {granted_on}, object_name: {object_name}")
logger.info(
f"Grant details - type: {granted_type}, privilege: {privilege}, "
f"granted_on: {granted_on}, object_name: {object_name}"
)
# 检查是否是对该Volume的权限或者是层级权限
if (granted_type == "PRIVILEGE" and granted_on == "VOLUME" and object_name.endswith(volume_name)) or \
(granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME"):
if ((granted_type == "PRIVILEGE" and granted_on == "VOLUME" and
object_name.endswith(volume_name)) or
(granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME")):
logger.info(f"Matching grant found for {volume_name}")

@ -87,11 +87,12 @@ x-shared-env: &shared-api-worker-env
WEB_API_CORS_ALLOW_ORIGINS: ${WEB_API_CORS_ALLOW_ORIGINS:-*}
CONSOLE_CORS_ALLOW_ORIGINS: ${CONSOLE_CORS_ALLOW_ORIGINS:-*}
STORAGE_TYPE: ${STORAGE_TYPE:-opendal}
OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
CLICKZETTA_VOLUME_TYPE: ${CLICKZETTA_VOLUME_TYPE:-user}
CLICKZETTA_VOLUME_NAME: ${CLICKZETTA_VOLUME_NAME:-}
CLICKZETTA_VOLUME_TABLE_PREFIX: ${CLICKZETTA_VOLUME_TABLE_PREFIX:-dataset_}
OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
CLICKZETTA_VOLUME_DIFY_PREFIX: ${CLICKZETTA_VOLUME_DIFY_PREFIX:-dify_km}
S3_ENDPOINT: ${S3_ENDPOINT:-}
S3_REGION: ${S3_REGION:-us-east-1}
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-difyai}

Loading…
Cancel
Save