diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index e2faae2f4d..4f3bfdf24e 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -11,7 +11,7 @@ import docx import pandas as pd import pypandoc # type: ignore import pypdfium2 # type: ignore -import webvtt +import webvtt # type: ignore import yaml # type: ignore from docx.document import Document from docx.oxml.table import CT_Tbl @@ -468,6 +468,7 @@ def _extract_text_from_msg(file_content: bytes) -> str: except Exception as e: raise TextExtractionError(f"Failed to extract text from MSG: {str(e)}") from e + def _extract_text_from_vtt(vtt_bytes: bytes) -> str: text = _extract_text_from_plain_text(vtt_bytes)