From ca7ffb186e0202f9a2ffb5ae57a4773e081f5e7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AD=8F=E6=81=92?= Date: Thu, 29 May 2025 09:15:27 +0800 Subject: [PATCH] Upgrade 1.4.1, change the CreatorUserRole class of models.enum --- api/core/rag/extractor/pdf_extractor.py | 6 +++--- api/pyproject.toml | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/api/core/rag/extractor/pdf_extractor.py b/api/core/rag/extractor/pdf_extractor.py index 0b519fabc1..c4a8da58a3 100644 --- a/api/core/rag/extractor/pdf_extractor.py +++ b/api/core/rag/extractor/pdf_extractor.py @@ -15,7 +15,7 @@ from core.rag.extractor.extractor_base import BaseExtractor from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_storage import storage -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.model import UploadFile class PdfExtractor(BaseExtractor): @@ -121,7 +121,7 @@ class PdfExtractor(BaseExtractor): extension=image_ext, mime_type=mime_type or "", created_by=self._user_id, - created_by_role=CreatedByRole.ACCOUNT, + created_by_role=CreatorUserRole.ACCOUNT, created_at=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None), used=True, used_by=self._user_id, @@ -159,4 +159,4 @@ class PdfExtractor(BaseExtractor): "has_tables": bool(tables) } - yield Document(page_content=content, metadata=metadata) \ No newline at end of file + yield Document(page_content=content, metadata=metadata) diff --git a/api/pyproject.toml b/api/pyproject.toml index 1c6adb6587..0cb91fec69 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -83,6 +83,8 @@ dependencies = [ "weave~=0.51.0", "yarl~=1.18.3", "webvtt-py~=0.5.1", + "pdfplumber~=0.11.6", + "pytesseract~=0.3.13" ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group.