ignore more

pull/22644/head
Asuka Minato 9 months ago
parent 3387dc1695
commit e66325d309

@ -672,8 +672,7 @@ class IndexingRunner:
if extra_update_params:
update_params.update(extra_update_params)
# type: ignore
db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params)
db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params) # type: ignore
db.session.commit()
@staticmethod

@ -331,10 +331,10 @@ class NotionExtractor(BaseExtractor):
last_edited_time = self.get_notion_last_edited_time()
data_source_info = document_model.data_source_info_dict
data_source_info["last_edited_time"] = last_edited_time
# type: ignore
db.session.query(DocumentModel).filter_by(id=document_model.id).update(
{DocumentModel.data_source_info: json.dumps(data_source_info)}
)
) # type: ignore
db.session.commit()
def get_notion_last_edited_time(self) -> str:

@ -660,10 +660,10 @@ class DocumentSegment(Base):
dataset_id = mapped_column(StringUUID, nullable=False)
document_id = mapped_column(StringUUID, nullable=False)
position: Mapped[int]
content = mapped_column(db.Text, nullable=False)
answer = mapped_column(db.Text, nullable=True)
word_count = mapped_column(db.Integer, nullable=False)
tokens = mapped_column(db.Integer, nullable=False)
content: Mapped[str]
answer: Mapped[Optional[str]]
word_count: Mapped[int]
tokens: Mapped[int]
# indexing fields
keywords = mapped_column(db.JSON, nullable=True)

@ -214,11 +214,9 @@ class DatasetService:
dataset.created_by = account.id
dataset.updated_by = account.id
dataset.tenant_id = tenant_id
# type: ignore
dataset.embedding_model_provider = embedding_model.provider if embedding_model else None
# type: ignore
dataset.embedding_model = embedding_model.model if embedding_model else None
dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None
dataset.embedding_model_provider = embedding_model.provider if embedding_model else None # type: ignore
dataset.embedding_model = embedding_model.model if embedding_model else None # type: ignore
dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None # type: ignore
dataset.permission = permission or DatasetPermissionEnum.ONLY_ME
dataset.provider = provider
db.session.add(dataset)
@ -1541,9 +1539,10 @@ class DocumentService:
db.session.add(document)
db.session.commit()
# update document segment
# type: ignore
update_params = {DocumentSegment.status: "re_segment"}
db.session.query(DocumentSegment).filter_by(document_id=document.id).update(update_params)
db.session.query(DocumentSegment).filter_by(document_id=document.id).update(
{DocumentSegment.status: "re_segment"}
) # type: ignore
db.session.commit()
# trigger async task
document_indexing_update_task.delay(document.dataset_id, document.id)
@ -2228,7 +2227,7 @@ class SegmentService:
# calc embedding use tokens
if document.doc_form == "qa_model":
segment.answer = args.answer
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0]
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] # type: ignore
else:
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
segment.content = content

Loading…
Cancel
Save