fix: lint

pull/20193/head
crazywoola 1 year ago
parent b7e349df5d
commit cb7b1c3310

@ -366,7 +366,7 @@ def _extract_text_from_excel(file_content: bytes) -> str:
df = excel_file.parse(sheet_name=sheet_name) df = excel_file.parse(sheet_name=sheet_name)
df.dropna(how="all", inplace=True) df.dropna(how="all", inplace=True)
# Create Markdown table two times to separate tables with a newline # Create Markdown table two times to separate tables with a newline
markdown_table += df.to_markdown(index=False, floatfmt='') + "\n\n" markdown_table += df.to_markdown(index=False, floatfmt="") + "\n\n"
except Exception as e: except Exception as e:
continue continue
return markdown_table return markdown_table

@ -10,9 +10,9 @@ from core.workflow.entities.node_entities import NodeRunResult
from core.workflow.nodes.document_extractor import DocumentExtractorNode, DocumentExtractorNodeData from core.workflow.nodes.document_extractor import DocumentExtractorNode, DocumentExtractorNodeData
from core.workflow.nodes.document_extractor.node import ( from core.workflow.nodes.document_extractor.node import (
_extract_text_from_docx, _extract_text_from_docx,
_extract_text_from_excel,
_extract_text_from_pdf, _extract_text_from_pdf,
_extract_text_from_plain_text, _extract_text_from_plain_text,
_extract_text_from_excel
) )
from core.workflow.nodes.enums import NodeType from core.workflow.nodes.enums import NodeType
from models.workflow import WorkflowNodeExecutionStatus from models.workflow import WorkflowNodeExecutionStatus
@ -184,6 +184,7 @@ def test_extract_text_from_docx(mock_document):
def test_node_type(document_extractor_node): def test_node_type(document_extractor_node):
assert document_extractor_node._node_type == NodeType.DOCUMENT_EXTRACTOR assert document_extractor_node._node_type == NodeType.DOCUMENT_EXTRACTOR
@patch("pandas.ExcelFile") @patch("pandas.ExcelFile")
def test_extract_text_from_excel_single_sheet(mock_excel_file): def test_extract_text_from_excel_single_sheet(mock_excel_file):
"""Test extracting text from Excel file with single sheet.""" """Test extracting text from Excel file with single sheet."""
@ -205,7 +206,7 @@ def test_extract_text_from_excel_single_sheet(mock_excel_file):
assert result == expected assert result == expected
mock_excel_file.assert_called_once() mock_excel_file.assert_called_once()
mock_df.dropna.assert_called_once_with(how="all", inplace=True) mock_df.dropna.assert_called_once_with(how="all", inplace=True)
mock_df.to_markdown.assert_called_once_with(index=False, floatfmt='') mock_df.to_markdown.assert_called_once_with(index=False, floatfmt="")
@patch("pandas.ExcelFile") @patch("pandas.ExcelFile")
@ -229,8 +230,10 @@ def test_extract_text_from_excel_multiple_sheets(mock_excel_file):
file_content = b"fake_excel_content_multiple_sheets" file_content = b"fake_excel_content_multiple_sheets"
result = _extract_text_from_excel(file_content) result = _extract_text_from_excel(file_content)
expected = ("| Product | Price |\n|---------|-------|\n| Apple | 1.50 |\n\n" expected = (
"| City | Population |\n|------|------------|\n| NYC | 8000000 |\n\n") "| Product | Price |\n|---------|-------|\n| Apple | 1.50 |\n\n"
"| City | Population |\n|------|------------|\n| NYC | 8000000 |\n\n"
)
assert result == expected assert result == expected
assert mock_excel_instance.parse.call_count == 2 assert mock_excel_instance.parse.call_count == 2
@ -354,7 +357,7 @@ def test_extract_text_from_excel_markdown_formatting(mock_excel_file):
result = _extract_text_from_excel(file_content) result = _extract_text_from_excel(file_content)
# Verify to_markdown was called with correct parameters # Verify to_markdown was called with correct parameters
mock_df.to_markdown.assert_called_once_with(index=False, floatfmt='') mock_df.to_markdown.assert_called_once_with(index=False, floatfmt="")
expected = "| Float | Int |\n|-------|-----|\n| 123456.78 | 42 |\n\n" expected = "| Float | Int |\n|-------|-----|\n| 123456.78 | 42 |\n\n"
assert result == expected assert result == expected

Loading…
Cancel
Save