fix: resolve CI linting issues and add missing newlines

- Fix all line length issues (120 character limit)
- Remove all trailing whitespace
- Add missing newlines at end of files
- Add CLICKZETTA_VOLUME_DIFY_PREFIX environment variable to docker-compose.yaml
- Ensure proper code formatting for all ClickZetta files

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
pull/22551/head
yunqiqiliang 10 months ago
parent f3b1bdc04f
commit f57fa13f1b

@ -67,3 +67,4 @@ class ClickzettaConfig(BaseModel):
description="Distance function for vector similarity: l2_distance or cosine_distance", description="Distance function for vector similarity: l2_distance or cosine_distance",
default="cosine_distance", default="cosine_distance",
) )

@ -191,7 +191,8 @@ class ClickzettaVector(BaseVector):
id STRING NOT NULL COMMENT 'Unique document identifier', id STRING NOT NULL COMMENT 'Unique document identifier',
{Field.CONTENT_KEY.value} STRING NOT NULL COMMENT 'Document text content for search and retrieval', {Field.CONTENT_KEY.value} STRING NOT NULL COMMENT 'Document text content for search and retrieval',
{Field.METADATA_KEY.value} JSON COMMENT 'Document metadata including source, type, and other attributes', {Field.METADATA_KEY.value} JSON COMMENT 'Document metadata including source, type, and other attributes',
{Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT 'High-dimensional embedding vector for semantic similarity search', {Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT
'High-dimensional embedding vector for semantic similarity search',
PRIMARY KEY (id) PRIMARY KEY (id)
) COMMENT 'Dify RAG knowledge base vector storage table for document embeddings and content' ) COMMENT 'Dify RAG knowledge base vector storage table for document embeddings and content'
""" """
@ -363,13 +364,18 @@ class ClickzettaVector(BaseVector):
# Use parameterized INSERT with executemany for better performance and security # Use parameterized INSERT with executemany for better performance and security
# Cast JSON and VECTOR in SQL, pass raw data as parameters # Cast JSON and VECTOR in SQL, pass raw data as parameters
columns = f"id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}, {Field.VECTOR.value}" columns = f"id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}, {Field.VECTOR.value}"
insert_sql = f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))" insert_sql = (
f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) "
f"VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))"
)
with self._connection.cursor() as cursor: with self._connection.cursor() as cursor:
try: try:
cursor.executemany(insert_sql, data_rows) cursor.executemany(insert_sql, data_rows)
logger.info(f"Inserted batch {batch_index // batch_size + 1}/{total_batches} " logger.info(
f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)") f"Inserted batch {batch_index // batch_size + 1}/{total_batches} "
f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)"
)
except Exception as e: except Exception as e:
logger.exception(f"Parameterized SQL execution failed for {len(data_rows)} documents: {e}") logger.exception(f"Parameterized SQL execution failed for {len(data_rows)} documents: {e}")
logger.exception(f"SQL template: {insert_sql}") logger.exception(f"SQL template: {insert_sql}")
@ -445,7 +451,9 @@ class ClickzettaVector(BaseVector):
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter] safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids) doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
# Use json_extract_string function for ClickZetta compatibility # Use json_extract_string function for ClickZetta compatibility
filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})") filter_clauses.append(
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
)
# No need for dataset_id filter since each dataset has its own table # No need for dataset_id filter since each dataset has its own table
@ -541,7 +549,9 @@ class ClickzettaVector(BaseVector):
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter] safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids) doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
# Use json_extract_string function for ClickZetta compatibility # Use json_extract_string function for ClickZetta compatibility
filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})") filter_clauses.append(
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
)
# No need for dataset_id filter since each dataset has its own table # No need for dataset_id filter since each dataset has its own table
@ -620,7 +630,9 @@ class ClickzettaVector(BaseVector):
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter] safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids) doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
# Use json_extract_string function for ClickZetta compatibility # Use json_extract_string function for ClickZetta compatibility
filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})") filter_clauses.append(
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
)
# No need for dataset_id filter since each dataset has its own table # No need for dataset_id filter since each dataset has its own table
@ -724,3 +736,4 @@ class ClickzettaVectorFactory(AbstractVectorFactory):
collection_name = Dataset.gen_collection_name_by_id(dataset.id).lower() collection_name = Dataset.gen_collection_name_by_id(dataset.id).lower()
return ClickzettaVector(collection_name=collection_name, config=config) return ClickzettaVector(collection_name=collection_name, config=config)

@ -105,10 +105,15 @@ class VolumePermissionManager:
result = cursor.fetchone() result = cursor.fetchone()
if result: if result:
logger.debug(f"User Volume permission check for {current_user}, operation {operation.name}: granted (basic connection verified)") logger.debug(
f"User Volume permission check for {current_user}, operation {operation.name}: "
f"granted (basic connection verified)"
)
return True return True
else: else:
logger.warning(f"User Volume permission check failed: cannot verify basic connection for {current_user}") logger.warning(
f"User Volume permission check failed: cannot verify basic connection for {current_user}"
)
return False return False
except Exception as e: except Exception as e:
@ -342,7 +347,8 @@ class VolumePermissionManager:
logger.info(f"Raw grants result for {volume_name}: {grants}") logger.info(f"Raw grants result for {volume_name}: {grants}")
# 解析权限结果 # 解析权限结果
# 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to, grantee_name, grantor_name, grant_option, granted_time) # 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
# grantee_name, grantor_name, grant_option, granted_time)
for grant in grants: for grant in grants:
logger.info(f"Processing grant: {grant}") logger.info(f"Processing grant: {grant}")
if len(grant) >= 5: if len(grant) >= 5:
@ -351,11 +357,15 @@ class VolumePermissionManager:
granted_on = grant[3] granted_on = grant[3]
object_name = grant[4] object_name = grant[4]
logger.info(f"Grant details - type: {granted_type}, privilege: {privilege}, granted_on: {granted_on}, object_name: {object_name}") logger.info(
f"Grant details - type: {granted_type}, privilege: {privilege}, "
f"granted_on: {granted_on}, object_name: {object_name}"
)
# 检查是否是对该Volume的权限或者是层级权限 # 检查是否是对该Volume的权限或者是层级权限
if (granted_type == "PRIVILEGE" and granted_on == "VOLUME" and object_name.endswith(volume_name)) or \ if ((granted_type == "PRIVILEGE" and granted_on == "VOLUME" and
(granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME"): object_name.endswith(volume_name)) or
(granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME")):
logger.info(f"Matching grant found for {volume_name}") logger.info(f"Matching grant found for {volume_name}")

@ -87,11 +87,12 @@ x-shared-env: &shared-api-worker-env
WEB_API_CORS_ALLOW_ORIGINS: ${WEB_API_CORS_ALLOW_ORIGINS:-*} WEB_API_CORS_ALLOW_ORIGINS: ${WEB_API_CORS_ALLOW_ORIGINS:-*}
CONSOLE_CORS_ALLOW_ORIGINS: ${CONSOLE_CORS_ALLOW_ORIGINS:-*} CONSOLE_CORS_ALLOW_ORIGINS: ${CONSOLE_CORS_ALLOW_ORIGINS:-*}
STORAGE_TYPE: ${STORAGE_TYPE:-opendal} STORAGE_TYPE: ${STORAGE_TYPE:-opendal}
OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
CLICKZETTA_VOLUME_TYPE: ${CLICKZETTA_VOLUME_TYPE:-user} CLICKZETTA_VOLUME_TYPE: ${CLICKZETTA_VOLUME_TYPE:-user}
CLICKZETTA_VOLUME_NAME: ${CLICKZETTA_VOLUME_NAME:-} CLICKZETTA_VOLUME_NAME: ${CLICKZETTA_VOLUME_NAME:-}
CLICKZETTA_VOLUME_TABLE_PREFIX: ${CLICKZETTA_VOLUME_TABLE_PREFIX:-dataset_} CLICKZETTA_VOLUME_TABLE_PREFIX: ${CLICKZETTA_VOLUME_TABLE_PREFIX:-dataset_}
OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs} CLICKZETTA_VOLUME_DIFY_PREFIX: ${CLICKZETTA_VOLUME_DIFY_PREFIX:-dify_km}
OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
S3_ENDPOINT: ${S3_ENDPOINT:-} S3_ENDPOINT: ${S3_ENDPOINT:-}
S3_REGION: ${S3_REGION:-us-east-1} S3_REGION: ${S3_REGION:-us-east-1}
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-difyai} S3_BUCKET_NAME: ${S3_BUCKET_NAME:-difyai}

Loading…
Cancel
Save