fix: resolve CI linting issues and add missing newlines

- Fix all line length issues (120 character limit) - Remove all trailing whitespace - Add missing newlines at end of files - Add CLICKZETTA_VOLUME_DIFY_PREFIX environment variable to docker-compose.yaml - Ensure proper code formatting for all ClickZetta files 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
10 months ago · f57fa13f1b
parent f3b1bdc04f
commit f57fa13f1b
12 changed files with 225 additions and 200 deletions
--- a/api/configs/middleware/vdb/clickzetta_config.py
+++ b/api/configs/middleware/vdb/clickzetta_config.py
@ -67,3 +67,4 @@ class ClickzettaConfig(BaseModel):
        description="Distance function for vector similarity: l2_distance or cosine_distance",
        default="cosine_distance",
    )
--- a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
+++ b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
@ -191,7 +191,8 @@ class ClickzettaVector(BaseVector):
            id STRING NOT NULL COMMENT 'Unique document identifier',
            {Field.CONTENT_KEY.value} STRING NOT NULL COMMENT 'Document text content for search and retrieval',
            {Field.METADATA_KEY.value} JSON COMMENT 'Document metadata including source, type, and other attributes',
-            {Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT 'High-dimensional embedding vector for semantic similarity search',
+            {Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT
                'High-dimensional embedding vector for semantic similarity search',
            PRIMARY KEY (id)
        ) COMMENT 'Dify RAG knowledge base vector storage table for document embeddings and content'
        """
@ -363,13 +364,18 @@ class ClickzettaVector(BaseVector):
        # Use parameterized INSERT with executemany for better performance and security
        # Cast JSON and VECTOR in SQL, pass raw data as parameters
        columns = f"id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}, {Field.VECTOR.value}"
-        insert_sql = f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))"
+        insert_sql = (
            f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) "
            f"VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))"
        )
        with self._connection.cursor() as cursor:
            try:
                cursor.executemany(insert_sql, data_rows)
-                logger.info(f"Inserted batch {batch_index // batch_size + 1}/{total_batches} "
+                logger.info(
-                           f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)")
+                    f"Inserted batch {batch_index // batch_size + 1}/{total_batches} "
                    f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)"
                )
            except Exception as e:
                logger.exception(f"Parameterized SQL execution failed for {len(data_rows)} documents: {e}")
                logger.exception(f"SQL template: {insert_sql}")
@ -445,7 +451,9 @@ class ClickzettaVector(BaseVector):
            safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
            doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
            # Use json_extract_string function for ClickZetta compatibility
-            filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})")
+            filter_clauses.append(
                f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
            )
        # No need for dataset_id filter since each dataset has its own table
@ -541,7 +549,9 @@ class ClickzettaVector(BaseVector):
            safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
            doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
            # Use json_extract_string function for ClickZetta compatibility
-            filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})")
+            filter_clauses.append(
                f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
            )
        # No need for dataset_id filter since each dataset has its own table
@ -620,7 +630,9 @@ class ClickzettaVector(BaseVector):
            safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
            doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
            # Use json_extract_string function for ClickZetta compatibility
-            filter_clauses.append(f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})")
+            filter_clauses.append(
                f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
            )
        # No need for dataset_id filter since each dataset has its own table
@ -724,3 +736,4 @@ class ClickzettaVectorFactory(AbstractVectorFactory):
        collection_name = Dataset.gen_collection_name_by_id(dataset.id).lower()
        return ClickzettaVector(collection_name=collection_name, config=config)
--- a/api/extensions/storage/clickzetta_volume/volume_permissions.py
+++ b/api/extensions/storage/clickzetta_volume/volume_permissions.py
@ -105,10 +105,15 @@ class VolumePermissionManager:
                result = cursor.fetchone()
                if result:
-                    logger.debug(f"User Volume permission check for {current_user}, operation {operation.name}: granted (basic connection verified)")
+                    logger.debug(
                        f"User Volume permission check for {current_user}, operation {operation.name}: "
                        f"granted (basic connection verified)"
                    )
                    return True
                else:
-                    logger.warning(f"User Volume permission check failed: cannot verify basic connection for {current_user}")
+                    logger.warning(
                        f"User Volume permission check failed: cannot verify basic connection for {current_user}"
                    )
                    return False
        except Exception as e:
@ -342,7 +347,8 @@ class VolumePermissionManager:
                logger.info(f"Raw grants result for {volume_name}: {grants}")
                # 解析权限结果
-                # 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to, grantee_name, grantor_name, grant_option, granted_time)
+                # 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
                #       grantee_name, grantor_name, grant_option, granted_time)
                for grant in grants:
                    logger.info(f"Processing grant: {grant}")
                    if len(grant) >= 5:
@ -351,11 +357,15 @@ class VolumePermissionManager:
                        granted_on = grant[3]
                        object_name = grant[4]
-                        logger.info(f"Grant details - type: {granted_type}, privilege: {privilege}, granted_on: {granted_on}, object_name: {object_name}")
+                        logger.info(
                            f"Grant details - type: {granted_type}, privilege: {privilege}, "
                            f"granted_on: {granted_on}, object_name: {object_name}"
                        )
                        # 检查是否是对该Volume的权限或者是层级权限
-                        if (granted_type == "PRIVILEGE" and granted_on == "VOLUME" and object_name.endswith(volume_name)) or \
+                        if ((granted_type == "PRIVILEGE" and granted_on == "VOLUME" and
-                           (granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME"):
+                             object_name.endswith(volume_name)) or
                            (granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME")):
                            logger.info(f"Matching grant found for {volume_name}")
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@ -87,11 +87,12 @@ x-shared-env: &shared-api-worker-env
  WEB_API_CORS_ALLOW_ORIGINS: ${WEB_API_CORS_ALLOW_ORIGINS:-*}
  CONSOLE_CORS_ALLOW_ORIGINS: ${CONSOLE_CORS_ALLOW_ORIGINS:-*}
  STORAGE_TYPE: ${STORAGE_TYPE:-opendal}
  OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
  OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
  CLICKZETTA_VOLUME_TYPE: ${CLICKZETTA_VOLUME_TYPE:-user}
  CLICKZETTA_VOLUME_NAME: ${CLICKZETTA_VOLUME_NAME:-}
  CLICKZETTA_VOLUME_TABLE_PREFIX: ${CLICKZETTA_VOLUME_TABLE_PREFIX:-dataset_}
-  OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
+  CLICKZETTA_VOLUME_DIFY_PREFIX: ${CLICKZETTA_VOLUME_DIFY_PREFIX:-dify_km}
  OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
  S3_ENDPOINT: ${S3_ENDPOINT:-}
  S3_REGION: ${S3_REGION:-us-east-1}
  S3_BUCKET_NAME: ${S3_BUCKET_NAME:-difyai}