diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 5bb1773d4c..e5976f4c9a 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -672,8 +672,7 @@ class IndexingRunner: if extra_update_params: update_params.update(extra_update_params) - # type: ignore - db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params) + db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params) # type: ignore db.session.commit() @staticmethod diff --git a/api/core/rag/extractor/notion_extractor.py b/api/core/rag/extractor/notion_extractor.py index 83734602f2..81a0810e28 100644 --- a/api/core/rag/extractor/notion_extractor.py +++ b/api/core/rag/extractor/notion_extractor.py @@ -331,10 +331,10 @@ class NotionExtractor(BaseExtractor): last_edited_time = self.get_notion_last_edited_time() data_source_info = document_model.data_source_info_dict data_source_info["last_edited_time"] = last_edited_time - # type: ignore + db.session.query(DocumentModel).filter_by(id=document_model.id).update( {DocumentModel.data_source_info: json.dumps(data_source_info)} - ) + ) # type: ignore db.session.commit() def get_notion_last_edited_time(self) -> str: diff --git a/api/models/dataset.py b/api/models/dataset.py index 2e11944c50..c88a5d5ef6 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -660,10 +660,10 @@ class DocumentSegment(Base): dataset_id = mapped_column(StringUUID, nullable=False) document_id = mapped_column(StringUUID, nullable=False) position: Mapped[int] - content = mapped_column(db.Text, nullable=False) - answer = mapped_column(db.Text, nullable=True) - word_count = mapped_column(db.Integer, nullable=False) - tokens = mapped_column(db.Integer, nullable=False) + content: Mapped[str] + answer: Mapped[Optional[str]] + word_count: Mapped[int] + tokens: Mapped[int] # indexing fields keywords = mapped_column(db.JSON, nullable=True) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 9176ea71a2..b6db4812ba 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -214,11 +214,9 @@ class DatasetService: dataset.created_by = account.id dataset.updated_by = account.id dataset.tenant_id = tenant_id - # type: ignore - dataset.embedding_model_provider = embedding_model.provider if embedding_model else None - # type: ignore - dataset.embedding_model = embedding_model.model if embedding_model else None - dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None + dataset.embedding_model_provider = embedding_model.provider if embedding_model else None # type: ignore + dataset.embedding_model = embedding_model.model if embedding_model else None # type: ignore + dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None # type: ignore dataset.permission = permission or DatasetPermissionEnum.ONLY_ME dataset.provider = provider db.session.add(dataset) @@ -1541,9 +1539,10 @@ class DocumentService: db.session.add(document) db.session.commit() # update document segment - # type: ignore - update_params = {DocumentSegment.status: "re_segment"} - db.session.query(DocumentSegment).filter_by(document_id=document.id).update(update_params) + + db.session.query(DocumentSegment).filter_by(document_id=document.id).update( + {DocumentSegment.status: "re_segment"} + ) # type: ignore db.session.commit() # trigger async task document_indexing_update_task.delay(document.dataset_id, document.id) @@ -2228,7 +2227,7 @@ class SegmentService: # calc embedding use tokens if document.doc_form == "qa_model": segment.answer = args.answer - tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] + tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] # type: ignore else: tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0] segment.content = content