diff --git a/api/core/callback_handler/index_tool_callback_handler.py b/api/core/callback_handler/index_tool_callback_handler.py index fd818d9a27..13c22213c4 100644 --- a/api/core/callback_handler/index_tool_callback_handler.py +++ b/api/core/callback_handler/index_tool_callback_handler.py @@ -1,3 +1,5 @@ +import logging + from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.queue_entities import QueueRetrieverResourcesEvent @@ -7,6 +9,8 @@ from extensions.ext_database import db from models.dataset import ChildChunk, DatasetQuery, DocumentSegment from models.dataset import Document as DatasetDocument +_logger = logging.getLogger(__name__) + class DatasetIndexToolCallbackHandler: """Callback handler for dataset tool.""" @@ -42,12 +46,13 @@ class DatasetIndexToolCallbackHandler: """Handle tool end.""" for document in documents: if document.metadata is not None: - dataset_document = ( - db.session.query(DatasetDocument) - .filter(DatasetDocument.id == document.metadata["document_id"]) - .first() - ) + document_id = document.metadata["document_id"] + dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first() if not dataset_document: + _logger.warning( + "Expected DatasetDocument record to exist, but none was found, document_id=%s", + document_id, + ) continue if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: child_chunk = ( diff --git a/api/schedule/clean_messages.py b/api/schedule/clean_messages.py index b213b154e7..f41f5264c7 100644 --- a/api/schedule/clean_messages.py +++ b/api/schedule/clean_messages.py @@ -1,4 +1,5 @@ import datetime +import logging import time import click @@ -20,6 +21,8 @@ from models.model import ( from models.web import SavedMessage from services.feature_service import FeatureService +_logger = logging.getLogger(__name__) + @app.celery.task(queue="dataset") def clean_messages(): @@ -48,6 +51,11 @@ def clean_messages(): plan_sandbox_clean_message_day = message.created_at app = db.session.query(App).filter_by(id=message.app_id).first() if not app: + _logger.warning( + "Expected App record to exist, but none was found, app_id=%s, message_id=%s", + message.app_id, + message.id, + ) continue features_cache_key = f"features:{app.tenant_id}" plan_cache = redis_client.get(features_cache_key) diff --git a/api/services/vector_service.py b/api/services/vector_service.py index 696bcd2667..18d10cc528 100644 --- a/api/services/vector_service.py +++ b/api/services/vector_service.py @@ -1,3 +1,4 @@ +import logging from typing import Optional from core.model_manager import ModelInstance, ModelManager @@ -12,6 +13,8 @@ from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegm from models.dataset import Document as DatasetDocument from services.entities.knowledge_entities.knowledge_entities import ParentMode +_logger = logging.getLogger(__name__) + class VectorService: @classmethod @@ -24,6 +27,11 @@ class VectorService: if doc_form == IndexType.PARENT_CHILD_INDEX: document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first() if not document: + _logger.warning( + "Expected DatasetDocument record to exist, but none was found, document_id=%s, segment_id=%s", + segment.document_id, + segment.id, + ) continue # get the process rule processing_rule = (