fix: drop dead code phase2 unused class (#22042)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
7 months ago · d2933c2bfe
parent 3587bd4040
commit d2933c2bfe
23 changed files with 1 additions and 576 deletions
--- a/api/controllers/console/datasets/error.py
+++ b/api/controllers/console/datasets/error.py
@ -25,12 +25,6 @@ class UnsupportedFileTypeError(BaseHTTPException):
    code = 415
 class HighQualityDatasetOnlyError(BaseHTTPException):
    error_code = "high_quality_dataset_only"
    description = "Current operation only supports 'high-quality' datasets."
    code = 400
 class DatasetNotInitializedError(BaseHTTPException):
    error_code = "dataset_not_initialized"
    description = "The dataset is still being initialized or indexing. Please wait a moment."
--- a/api/controllers/console/workspace/error.py
+++ b/api/controllers/console/workspace/error.py
@ -13,12 +13,6 @@ class CurrentPasswordIncorrectError(BaseHTTPException):
    code = 400
 class ProviderRequestFailedError(BaseHTTPException):
    error_code = "provider_request_failed"
    description = None
    code = 400
 class InvalidInvitationCodeError(BaseHTTPException):
    error_code = "invalid_invitation_code"
    description = "Invalid invitation code."
--- a/api/controllers/service_api/dataset/error.py
+++ b/api/controllers/service_api/dataset/error.py
@ -25,12 +25,6 @@ class UnsupportedFileTypeError(BaseHTTPException):
    code = 415
 class HighQualityDatasetOnlyError(BaseHTTPException):
    error_code = "high_quality_dataset_only"
    description = "Current operation only supports 'high-quality' datasets."
    code = 400
 class DatasetNotInitializedError(BaseHTTPException):
    error_code = "dataset_not_initialized"
    description = "The dataset is still being initialized or indexing. Please wait a moment."
--- a/api/core/app/task_pipeline/exc.py
+++ b/api/core/app/task_pipeline/exc.py
@ -10,8 +10,3 @@ class RecordNotFoundError(TaskPipilineError):
 class WorkflowRunNotFoundError(RecordNotFoundError):
    def __init__(self, workflow_run_id: str):
        super().__init__("WorkflowRun", workflow_run_id)
 class WorkflowNodeExecutionNotFoundError(RecordNotFoundError):
    def __init__(self, workflow_node_execution_id: str):
        super().__init__("WorkflowNodeExecution", workflow_node_execution_id)
--- a/api/core/file/tool_file_parser.py
+++ b/api/core/file/tool_file_parser.py
@ -7,13 +7,6 @@ if TYPE_CHECKING:
 _tool_file_manager_factory: Callable[[], "ToolFileManager"] | None = None
 class ToolFileParser:
    @staticmethod
    def get_tool_file_manager() -> "ToolFileManager":
        assert _tool_file_manager_factory is not None
        return _tool_file_manager_factory()
 def set_tool_file_manager_factory(factory: Callable[[], "ToolFileManager"]) -> None:
    global _tool_file_manager_factory
    _tool_file_manager_factory = factory
--- a/api/core/helper/url_signer.py
+++ b/api/core/helper/url_signer.py
@ -1,52 +0,0 @@
 import base64
 import hashlib
 import hmac
 import os
 import time
 from pydantic import BaseModel, Field
 from configs import dify_config
 class SignedUrlParams(BaseModel):
    sign_key: str = Field(..., description="The sign key")
    timestamp: str = Field(..., description="Timestamp")
    nonce: str = Field(..., description="Nonce")
    sign: str = Field(..., description="Signature")
 class UrlSigner:
    @classmethod
    def get_signed_url(cls, url: str, sign_key: str, prefix: str) -> str:
        signed_url_params = cls.get_signed_url_params(sign_key, prefix)
        return (
            f"{url}?timestamp={signed_url_params.timestamp}"
            f"&nonce={signed_url_params.nonce}&sign={signed_url_params.sign}"
        )
    @classmethod
    def get_signed_url_params(cls, sign_key: str, prefix: str) -> SignedUrlParams:
        timestamp = str(int(time.time()))
        nonce = os.urandom(16).hex()
        sign = cls._sign(sign_key, timestamp, nonce, prefix)
        return SignedUrlParams(sign_key=sign_key, timestamp=timestamp, nonce=nonce, sign=sign)
    @classmethod
    def verify(cls, sign_key: str, timestamp: str, nonce: str, sign: str, prefix: str) -> bool:
        recalculated_sign = cls._sign(sign_key, timestamp, nonce, prefix)
        return sign == recalculated_sign
    @classmethod
    def _sign(cls, sign_key: str, timestamp: str, nonce: str, prefix: str) -> str:
        if not dify_config.SECRET_KEY:
            raise Exception("SECRET_KEY is not set")
        data_to_sign = f"{prefix}|{sign_key}|{timestamp}|{nonce}"
        secret_key = dify_config.SECRET_KEY.encode()
        sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
        encoded_sign = base64.urlsafe_b64encode(sign).decode()
        return encoded_sign
--- a/api/core/plugin/entities/plugin.py
+++ b/api/core/plugin/entities/plugin.py
@ -135,17 +135,6 @@ class PluginEntity(PluginInstallation):
        return self
 class GithubPackage(BaseModel):
    repo: str
    version: str
    package: str
 class GithubVersion(BaseModel):
    repo: str
    version: str
 class GenericProviderID:
    organization: str
    plugin_name: str
--- a/api/core/rag/cleaner/unstructured/unstructured_extra_whitespace_cleaner.py
+++ b/api/core/rag/cleaner/unstructured/unstructured_extra_whitespace_cleaner.py
@ -1,12 +0,0 @@
 """Abstract interface for document clean implementations."""
 from core.rag.cleaner.cleaner_base import BaseCleaner
 class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
    def clean(self, content) -> str:
        """clean document content."""
        from unstructured.cleaners.core import clean_extra_whitespace
        # Returns "ITEM 1A: RISK FACTORS"
        return clean_extra_whitespace(content)
--- a/api/core/rag/cleaner/unstructured/unstructured_group_broken_paragraphs_cleaner.py
+++ b/api/core/rag/cleaner/unstructured/unstructured_group_broken_paragraphs_cleaner.py
@ -1,15 +0,0 @@
 """Abstract interface for document clean implementations."""
 from core.rag.cleaner.cleaner_base import BaseCleaner
 class UnstructuredGroupBrokenParagraphsCleaner(BaseCleaner):
    def clean(self, content) -> str:
        """clean document content."""
        import re
        from unstructured.cleaners.core import group_broken_paragraphs
        para_split_re = re.compile(r"(\s*\n\s*){3}")
        return group_broken_paragraphs(content, paragraph_split=para_split_re)
--- a/api/core/rag/cleaner/unstructured/unstructured_non_ascii_chars_cleaner.py
+++ b/api/core/rag/cleaner/unstructured/unstructured_non_ascii_chars_cleaner.py
@ -1,12 +0,0 @@
 """Abstract interface for document clean implementations."""
 from core.rag.cleaner.cleaner_base import BaseCleaner
 class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
    def clean(self, content) -> str:
        """clean document content."""
        from unstructured.cleaners.core import clean_non_ascii_chars
        # Returns "This text contains non-ascii characters!"
        return clean_non_ascii_chars(content)
--- a/api/core/rag/cleaner/unstructured/unstructured_replace_unicode_quotes_cleaner.py
+++ b/api/core/rag/cleaner/unstructured/unstructured_replace_unicode_quotes_cleaner.py
@ -1,12 +0,0 @@
 """Abstract interface for document clean implementations."""
 from core.rag.cleaner.cleaner_base import BaseCleaner
 class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
    def clean(self, content) -> str:
        """Replaces unicode quote characters, such as the \x91 character in a string."""
        from unstructured.cleaners.core import replace_unicode_quotes
        return replace_unicode_quotes(content)
--- a/api/core/rag/cleaner/unstructured/unstructured_translate_text_cleaner.py
+++ b/api/core/rag/cleaner/unstructured/unstructured_translate_text_cleaner.py
@ -1,11 +0,0 @@
 """Abstract interface for document clean implementations."""
 from core.rag.cleaner.cleaner_base import BaseCleaner
 class UnstructuredTranslateTextCleaner(BaseCleaner):
    def clean(self, content) -> str:
        """clean document content."""
        from unstructured.cleaners.translate import translate_text
        return translate_text(content)
--- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_entities.py
+++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_entities.py
@ -1,17 +0,0 @@
 from typing import Optional
 from pydantic import BaseModel
 class ClusterEntity(BaseModel):
    """
    Model Config Entity.
    """
    name: str
    cluster_id: str
    displayName: str
    region: str
    spendingLimit: Optional[int] = 1000
    version: str
    createdBy: str
--- a/api/core/rag/extractor/blob/blob.py
+++ b/api/core/rag/extractor/blob/blob.py
@ -9,8 +9,7 @@ from __future__ import annotations
 import contextlib
 import mimetypes
-from abc import ABC, abstractmethod
+from collections.abc import Generator, Mapping
 from collections.abc import Generator, Iterable, Mapping
 from io import BufferedReader, BytesIO
 from pathlib import Path, PurePath
 from typing import Any, Optional, Union
@ -143,21 +142,3 @@ class Blob(BaseModel):
        if self.source:
            str_repr += f" {self.source}"
        return str_repr
 class BlobLoader(ABC):
    """Abstract interface for blob loaders implementation.
    Implementer should be able to load raw content from a datasource system according
    to some criteria and return the raw content lazily as a stream of blobs.
    """
    @abstractmethod
    def yield_blobs(
        self,
    ) -> Iterable[Blob]:
        """A lazy loader for raw data represented by Blob object.
        Returns:
            A generator over blobs
        """
--- a/api/core/rag/extractor/unstructured/unstructured_pdf_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_pdf_extractor.py
@ -1,47 +0,0 @@
 import logging
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
 logger = logging.getLogger(__name__)
 class UnstructuredPDFExtractor(BaseExtractor):
    """Load pdf files.
    Args:
        file_path: Path to the file to load.
        api_url: Unstructured API URL
        api_key: Unstructured API Key
    """
    def __init__(self, file_path: str, api_url: str, api_key: str):
        """Initialize with file path."""
        self._file_path = file_path
        self._api_url = api_url
        self._api_key = api_key
    def extract(self) -> list[Document]:
        if self._api_url:
            from unstructured.partition.api import partition_via_api
            elements = partition_via_api(
                filename=self._file_path, api_url=self._api_url, api_key=self._api_key, strategy="auto"
            )
        else:
            from unstructured.partition.pdf import partition_pdf
            elements = partition_pdf(filename=self._file_path, strategy="auto")
        from unstructured.chunking.title import chunk_by_title
        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
            documents.append(Document(page_content=text))
        return documents
--- a/api/core/rag/extractor/unstructured/unstructured_text_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_text_extractor.py
@ -1,34 +0,0 @@
 import logging
 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.models.document import Document
 logger = logging.getLogger(__name__)
 class UnstructuredTextExtractor(BaseExtractor):
    """Load msg files.
    Args:
        file_path: Path to the file to load.
    """
    def __init__(self, file_path: str, api_url: str):
        """Initialize with file path."""
        self._file_path = file_path
        self._api_url = api_url
    def extract(self) -> list[Document]:
        from unstructured.partition.text import partition_text
        elements = partition_text(filename=self._file_path)
        from unstructured.chunking.title import chunk_by_title
        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
            documents.append(Document(page_content=text))
        return documents
--- a/api/core/rag/splitter/text_splitter.py
+++ b/api/core/rag/splitter/text_splitter.py
@ -10,7 +10,6 @@ from typing import (
    Any,
    Literal,
    Optional,
    TypedDict,
    TypeVar,
    Union,
 )
@ -168,167 +167,6 @@ class TextSplitter(BaseDocumentTransformer, ABC):
        raise NotImplementedError
 class CharacterTextSplitter(TextSplitter):
    """Splitting text that looks at characters."""
    def __init__(self, separator: str = "\n\n", **kwargs: Any) -> None:
        """Create a new TextSplitter."""
        super().__init__(**kwargs)
        self._separator = separator
    def split_text(self, text: str) -> list[str]:
        """Split incoming text and return chunks."""
        # First we naively split the large input into a bunch of smaller ones.
        splits = _split_text_with_regex(text, self._separator, self._keep_separator)
        _separator = "" if self._keep_separator else self._separator
        _good_splits_lengths = []  # cache the lengths of the splits
        if splits:
            _good_splits_lengths.extend(self._length_function(splits))
        return self._merge_splits(splits, _separator, _good_splits_lengths)
 class LineType(TypedDict):
    """Line type as typed dict."""
    metadata: dict[str, str]
    content: str
 class HeaderType(TypedDict):
    """Header type as typed dict."""
    level: int
    name: str
    data: str
 class MarkdownHeaderTextSplitter:
    """Splitting markdown files based on specified headers."""
    def __init__(self, headers_to_split_on: list[tuple[str, str]], return_each_line: bool = False):
        """Create a new MarkdownHeaderTextSplitter.
        Args:
            headers_to_split_on: Headers we want to track
            return_each_line: Return each line w/ associated headers
        """
        # Output line-by-line or aggregated into chunks w/ common headers
        self.return_each_line = return_each_line
        # Given the headers we want to split on,
        # (e.g., "#, ##, etc") order by length
        self.headers_to_split_on = sorted(headers_to_split_on, key=lambda split: len(split[0]), reverse=True)
    def aggregate_lines_to_chunks(self, lines: list[LineType]) -> list[Document]:
        """Combine lines with common metadata into chunks
        Args:
            lines: Line of text / associated header metadata
        """
        aggregated_chunks: list[LineType] = []
        for line in lines:
            if aggregated_chunks and aggregated_chunks[-1]["metadata"] == line["metadata"]:
                # If the last line in the aggregated list
                # has the same metadata as the current line,
                # append the current content to the last lines's content
                aggregated_chunks[-1]["content"] += "  \n" + line["content"]
            else:
                # Otherwise, append the current line to the aggregated list
                aggregated_chunks.append(line)
        return [Document(page_content=chunk["content"], metadata=chunk["metadata"]) for chunk in aggregated_chunks]
    def split_text(self, text: str) -> list[Document]:
        """Split markdown file
        Args:
            text: Markdown file"""
        # Split the input text by newline character ("\n").
        lines = text.split("\n")
        # Final output
        lines_with_metadata: list[LineType] = []
        # Content and metadata of the chunk currently being processed
        current_content: list[str] = []
        current_metadata: dict[str, str] = {}
        # Keep track of the nested header structure
        # header_stack: List[Dict[str, Union[int, str]]] = []
        header_stack: list[HeaderType] = []
        initial_metadata: dict[str, str] = {}
        for line in lines:
            stripped_line = line.strip()
            # Check each line against each of the header types (e.g., #, ##)
            for sep, name in self.headers_to_split_on:
                # Check if line starts with a header that we intend to split on
                if stripped_line.startswith(sep) and (
                    # Header with no text OR header is followed by space
                    # Both are valid conditions that sep is being used a header
                    len(stripped_line) == len(sep) or stripped_line[len(sep)] == " "
                ):
                    # Ensure we are tracking the header as metadata
                    if name is not None:
                        # Get the current header level
                        current_header_level = sep.count("#")
                        # Pop out headers of lower or same level from the stack
                        while header_stack and header_stack[-1]["level"] >= current_header_level:
                            # We have encountered a new header
                            # at the same or higher level
                            popped_header = header_stack.pop()
                            # Clear the metadata for the
                            # popped header in initial_metadata
                            if popped_header["name"] in initial_metadata:
                                initial_metadata.pop(popped_header["name"])
                        # Push the current header to the stack
                        header: HeaderType = {
                            "level": current_header_level,
                            "name": name,
                            "data": stripped_line[len(sep) :].strip(),
                        }
                        header_stack.append(header)
                        # Update initial_metadata with the current header
                        initial_metadata[name] = header["data"]
                    # Add the previous line to the lines_with_metadata
                    # only if current_content is not empty
                    if current_content:
                        lines_with_metadata.append(
                            {
                                "content": "\n".join(current_content),
                                "metadata": current_metadata.copy(),
                            }
                        )
                        current_content.clear()
                    break
            else:
                if stripped_line:
                    current_content.append(stripped_line)
                elif current_content:
                    lines_with_metadata.append(
                        {
                            "content": "\n".join(current_content),
                            "metadata": current_metadata.copy(),
                        }
                    )
                    current_content.clear()
            current_metadata = initial_metadata.copy()
        if current_content:
            lines_with_metadata.append({"content": "\n".join(current_content), "metadata": current_metadata})
        # lines_with_metadata has each line with associated header metadata
        # aggregate these into chunks based on common metadata
        if not self.return_each_line:
            return self.aggregate_lines_to_chunks(lines_with_metadata)
        else:
            return [
                Document(page_content=chunk["content"], metadata=chunk["metadata"]) for chunk in lines_with_metadata
            ]
 # should be in newer Python versions (3.10+)
 # @dataclass(frozen=True, kw_only=True, slots=True)
@dataclass(frozen=True)
 class Tokenizer:
--- a/api/core/workflow/entities/workflow_entities.py
+++ b/api/core/workflow/entities/workflow_entities.py
@ -1,79 +0,0 @@
 from typing import Optional
 from pydantic import BaseModel
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.workflow.nodes.base import BaseIterationState, BaseLoopState, BaseNode
 from models.enums import UserFrom
 from models.workflow import Workflow, WorkflowType
 from .node_entities import NodeRunResult
 from .variable_pool import VariablePool
 class WorkflowNodeAndResult:
    node: BaseNode
    result: Optional[NodeRunResult] = None
    def __init__(self, node: BaseNode, result: Optional[NodeRunResult] = None):
        self.node = node
        self.result = result
 class WorkflowRunState:
    tenant_id: str
    app_id: str
    workflow_id: str
    workflow_type: WorkflowType
    user_id: str
    user_from: UserFrom
    invoke_from: InvokeFrom
    workflow_call_depth: int
    start_at: float
    variable_pool: VariablePool
    total_tokens: int = 0
    workflow_nodes_and_results: list[WorkflowNodeAndResult]
    class NodeRun(BaseModel):
        node_id: str
        iteration_node_id: str
        loop_node_id: str
    workflow_node_runs: list[NodeRun]
    workflow_node_steps: int
    current_iteration_state: Optional[BaseIterationState]
    current_loop_state: Optional[BaseLoopState]
    def __init__(
        self,
        workflow: Workflow,
        start_at: float,
        variable_pool: VariablePool,
        user_id: str,
        user_from: UserFrom,
        invoke_from: InvokeFrom,
        workflow_call_depth: int,
    ):
        self.workflow_id = workflow.id
        self.tenant_id = workflow.tenant_id
        self.app_id = workflow.app_id
        self.workflow_type = WorkflowType.value_of(workflow.type)
        self.user_id = user_id
        self.user_from = user_from
        self.invoke_from = invoke_from
        self.workflow_call_depth = workflow_call_depth
        self.start_at = start_at
        self.variable_pool = variable_pool
        self.total_tokens = 0
        self.workflow_node_steps = 1
        self.workflow_node_runs = []
        self.current_iteration_state = None
        self.current_loop_state = None
--- a/api/core/workflow/workflow_type_encoder.py
+++ b/api/core/workflow/workflow_type_encoder.py
@ -1,4 +1,3 @@
 import json
 from collections.abc import Mapping
 from typing import Any
@ -8,18 +7,6 @@ from core.file.models import File
 from core.variables import Segment
 class WorkflowRuntimeTypeEncoder(json.JSONEncoder):
    def default(self, o: Any):
        if isinstance(o, Segment):
            return o.value
        elif isinstance(o, File):
            return o.to_dict()
        elif isinstance(o, BaseModel):
            return o.model_dump(mode="json")
        else:
            return super().default(o)
 class WorkflowRuntimeTypeConverter:
    def to_json_encodable(self, value: Mapping[str, Any] | None) -> Mapping[str, Any] | None:
        result = self._to_json_encodable_recursive(value)
--- a/api/libs/helper.py
+++ b/api/libs/helper.py
@ -148,25 +148,6 @@ class StrLen:
        return value
 class FloatRange:
    """Restrict input to an float in a range (inclusive)"""
    def __init__(self, low, high, argument="argument"):
        self.low = low
        self.high = high
        self.argument = argument
    def __call__(self, value):
        value = _get_float(value)
        if value < self.low or value > self.high:
            error = "Invalid {arg}: {val}. {arg} must be within the range {lo} - {hi}".format(
                arg=self.argument, val=value, lo=self.low, hi=self.high
            )
            raise ValueError(error)
        return value
 class DatetimeString:
    def __init__(self, format, argument="argument"):
        self.format = format
--- a/api/libs/jsonutil.py
+++ b/api/libs/jsonutil.py
@ -1,11 +0,0 @@
 import json
 from pydantic import BaseModel
 class PydanticModelEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, BaseModel):
            return o.model_dump()
        else:
            super().default(o)
--- a/api/models/model.py
+++ b/api/models/model.py
@ -610,14 +610,6 @@ class InstalledApp(Base):
        return tenant
 class ConversationSource(StrEnum):
    """This enumeration is designed for use with `Conversation.from_source`."""
    # NOTE(QuantumGhost): The enumeration members may not cover all possible cases.
    API = "api"
    CONSOLE = "console"
 class Conversation(Base):
    __tablename__ = "conversations"
    __table_args__ = (
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@ -4,13 +4,6 @@ from typing import Literal, Optional
 from pydantic import BaseModel
 class SegmentUpdateEntity(BaseModel):
    content: str
    answer: Optional[str] = None
    keywords: Optional[list[str]] = None
    enabled: Optional[bool] = None
 class ParentMode(StrEnum):
    FULL_DOC = "full-doc"
    PARAGRAPH = "paragraph"
@ -153,10 +146,6 @@ class MetadataUpdateArgs(BaseModel):
    value: Optional[str | int | float] = None
 class MetadataValueUpdateArgs(BaseModel):
    fields: list[MetadataUpdateArgs]
 class MetadataDetail(BaseModel):
    id: str
    name: str