ignore more

pull/22644/head
Asuka Minato 10 months ago
parent 3387dc1695
commit e66325d309

@ -672,8 +672,7 @@ class IndexingRunner:
if extra_update_params: if extra_update_params:
update_params.update(extra_update_params) update_params.update(extra_update_params)
# type: ignore db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params) # type: ignore
db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params)
db.session.commit() db.session.commit()
@staticmethod @staticmethod

@ -331,10 +331,10 @@ class NotionExtractor(BaseExtractor):
last_edited_time = self.get_notion_last_edited_time() last_edited_time = self.get_notion_last_edited_time()
data_source_info = document_model.data_source_info_dict data_source_info = document_model.data_source_info_dict
data_source_info["last_edited_time"] = last_edited_time data_source_info["last_edited_time"] = last_edited_time
# type: ignore
db.session.query(DocumentModel).filter_by(id=document_model.id).update( db.session.query(DocumentModel).filter_by(id=document_model.id).update(
{DocumentModel.data_source_info: json.dumps(data_source_info)} {DocumentModel.data_source_info: json.dumps(data_source_info)}
) ) # type: ignore
db.session.commit() db.session.commit()
def get_notion_last_edited_time(self) -> str: def get_notion_last_edited_time(self) -> str:

@ -660,10 +660,10 @@ class DocumentSegment(Base):
dataset_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False)
document_id = mapped_column(StringUUID, nullable=False) document_id = mapped_column(StringUUID, nullable=False)
position: Mapped[int] position: Mapped[int]
content = mapped_column(db.Text, nullable=False) content: Mapped[str]
answer = mapped_column(db.Text, nullable=True) answer: Mapped[Optional[str]]
word_count = mapped_column(db.Integer, nullable=False) word_count: Mapped[int]
tokens = mapped_column(db.Integer, nullable=False) tokens: Mapped[int]
# indexing fields # indexing fields
keywords = mapped_column(db.JSON, nullable=True) keywords = mapped_column(db.JSON, nullable=True)

@ -214,11 +214,9 @@ class DatasetService:
dataset.created_by = account.id dataset.created_by = account.id
dataset.updated_by = account.id dataset.updated_by = account.id
dataset.tenant_id = tenant_id dataset.tenant_id = tenant_id
# type: ignore dataset.embedding_model_provider = embedding_model.provider if embedding_model else None # type: ignore
dataset.embedding_model_provider = embedding_model.provider if embedding_model else None dataset.embedding_model = embedding_model.model if embedding_model else None # type: ignore
# type: ignore dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None # type: ignore
dataset.embedding_model = embedding_model.model if embedding_model else None
dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None
dataset.permission = permission or DatasetPermissionEnum.ONLY_ME dataset.permission = permission or DatasetPermissionEnum.ONLY_ME
dataset.provider = provider dataset.provider = provider
db.session.add(dataset) db.session.add(dataset)
@ -1541,9 +1539,10 @@ class DocumentService:
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
# update document segment # update document segment
# type: ignore
update_params = {DocumentSegment.status: "re_segment"} db.session.query(DocumentSegment).filter_by(document_id=document.id).update(
db.session.query(DocumentSegment).filter_by(document_id=document.id).update(update_params) {DocumentSegment.status: "re_segment"}
) # type: ignore
db.session.commit() db.session.commit()
# trigger async task # trigger async task
document_indexing_update_task.delay(document.dataset_id, document.id) document_indexing_update_task.delay(document.dataset_id, document.id)
@ -2228,7 +2227,7 @@ class SegmentService:
# calc embedding use tokens # calc embedding use tokens
if document.doc_form == "qa_model": if document.doc_form == "qa_model":
segment.answer = args.answer segment.answer = args.answer
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] # type: ignore
else: else:
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0] tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
segment.content = content segment.content = content

Loading…
Cancel
Save