|
|
|
|
@ -1,60 +1,22 @@
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
import re
|
|
|
|
|
import time
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
from collections.abc import Mapping, Sequence
|
|
|
|
|
from typing import Any, Optional, cast
|
|
|
|
|
from typing import Any, cast
|
|
|
|
|
|
|
|
|
|
from sqlalchemy import Integer, and_, func, or_, text
|
|
|
|
|
from sqlalchemy import cast as sqlalchemy_cast
|
|
|
|
|
|
|
|
|
|
from core.app.app_config.entities import DatasetRetrieveConfigEntity
|
|
|
|
|
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
|
|
|
|
|
from core.entities.agent_entities import PlanningStrategy
|
|
|
|
|
from core.entities.model_entities import ModelStatus
|
|
|
|
|
from core.model_manager import ModelInstance, ModelManager
|
|
|
|
|
from core.model_runtime.entities.message_entities import PromptMessageRole
|
|
|
|
|
from core.model_runtime.entities.model_entities import ModelFeature, ModelType
|
|
|
|
|
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
|
|
|
|
from core.prompt.simple_prompt_transform import ModelMode
|
|
|
|
|
from core.rag.datasource.retrieval_service import RetrievalService
|
|
|
|
|
from core.rag.entities.metadata_entities import Condition, MetadataCondition
|
|
|
|
|
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
|
|
|
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
|
|
|
|
from core.variables import StringSegment
|
|
|
|
|
from core.variables.segments import ObjectSegment
|
|
|
|
|
from core.workflow.entities.node_entities import NodeRunResult
|
|
|
|
|
from core.workflow.nodes.enums import NodeType
|
|
|
|
|
from core.workflow.nodes.event.event import ModelInvokeCompletedEvent
|
|
|
|
|
from core.workflow.nodes.knowledge_retrieval.template_prompts import (
|
|
|
|
|
METADATA_FILTER_ASSISTANT_PROMPT_1,
|
|
|
|
|
METADATA_FILTER_ASSISTANT_PROMPT_2,
|
|
|
|
|
METADATA_FILTER_COMPLETION_PROMPT,
|
|
|
|
|
METADATA_FILTER_SYSTEM_PROMPT,
|
|
|
|
|
METADATA_FILTER_USER_PROMPT_1,
|
|
|
|
|
METADATA_FILTER_USER_PROMPT_3,
|
|
|
|
|
)
|
|
|
|
|
from core.workflow.nodes.llm.entities import LLMNodeChatModelMessage, LLMNodeCompletionModelPromptTemplate
|
|
|
|
|
from core.workflow.nodes.llm.node import LLMNode
|
|
|
|
|
from core.workflow.nodes.question_classifier.template_prompts import QUESTION_CLASSIFIER_USER_PROMPT_2
|
|
|
|
|
from extensions.ext_database import db
|
|
|
|
|
from extensions.ext_redis import redis_client
|
|
|
|
|
from libs.json_in_md_parser import parse_and_check_json_markdown
|
|
|
|
|
from models.dataset import Dataset, DatasetMetadata, Document, RateLimitLog
|
|
|
|
|
from models.dataset import Dataset, Document, RateLimitLog
|
|
|
|
|
from models.workflow import WorkflowNodeExecutionStatus
|
|
|
|
|
from services.dataset_service import DatasetService, DocumentService
|
|
|
|
|
from services.dataset_service import DocumentService
|
|
|
|
|
from services.feature_service import FeatureService
|
|
|
|
|
|
|
|
|
|
from .entities import KnowledgeIndexNodeData, KnowledgeRetrievalNodeData, ModelConfig
|
|
|
|
|
from .entities import KnowledgeIndexNodeData
|
|
|
|
|
from .exc import (
|
|
|
|
|
InvalidModelTypeError,
|
|
|
|
|
KnowledgeIndexNodeError,
|
|
|
|
|
KnowledgeRetrievalNodeError,
|
|
|
|
|
ModelCredentialsNotInitializedError,
|
|
|
|
|
ModelNotExistError,
|
|
|
|
|
ModelNotSupportedError,
|
|
|
|
|
ModelQuotaExceededError,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
@ -138,16 +100,15 @@ class KnowledgeIndexNode(LLMNode):
|
|
|
|
|
error_type=type(e).__name__,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _invoke_knowledge_index(self, node_data: KnowledgeIndexNodeData, document_id: str, chunks: list[any]) -> Any:
|
|
|
|
|
def _invoke_knowledge_index(self, node_data: KnowledgeIndexNodeData, chunks: list[Any]) -> Any:
|
|
|
|
|
dataset = Dataset.query.filter_by(id=node_data.dataset_id).first()
|
|
|
|
|
if not dataset:
|
|
|
|
|
raise KnowledgeIndexNodeError(f"Dataset {node_data.dataset_id} not found.")
|
|
|
|
|
|
|
|
|
|
document = Document.query.filter_by(id=document_id).first()
|
|
|
|
|
|
|
|
|
|
document = Document.query.filter_by(id=node_data.document_id).first()
|
|
|
|
|
if not document:
|
|
|
|
|
raise KnowledgeIndexNodeError(f"Document {document_id} not found.")
|
|
|
|
|
|
|
|
|
|
raise KnowledgeIndexNodeError(f"Document {node_data.document_id} not found.")
|
|
|
|
|
|
|
|
|
|
DocumentService.invoke_knowledge_index(
|
|
|
|
|
dataset=dataset,
|
|
|
|
|
document=document,
|
|
|
|
|
@ -156,5 +117,12 @@ class KnowledgeIndexNode(LLMNode):
|
|
|
|
|
index_method=node_data.index_method,
|
|
|
|
|
retrieval_setting=node_data.retrieval_setting,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"dataset_id": dataset.id,
|
|
|
|
|
"dataset_name": dataset.name,
|
|
|
|
|
"document_id": document.id,
|
|
|
|
|
"document_name": document.name,
|
|
|
|
|
"created_at": document.created_at,
|
|
|
|
|
"display_status": document.indexing_status,
|
|
|
|
|
}
|
|
|
|
|
|