Enhance file reading functionality to support XLSX files by converting them to CSV format, while maintaining automatic encoding detection for CSV files.

pull/21891/head
ytqh 1 year ago
parent fd430f281b
commit f55fdf380f

@ -81,13 +81,46 @@ class AnswersSummaryAnalysisApi(Resource):
) )
def _read_file_with_encoding_detection(self, file_id: str) -> Tuple[Optional[str], Optional[str]]: def _read_file_with_encoding_detection(self, file_id: str) -> Tuple[Optional[str], Optional[str]]:
"""Read file content with automatic encoding detection.""" """Read file content with automatic encoding detection.
Supports both CSV and XLSX files, converting XLSX to CSV text format.
"""
try: try:
upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first() upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
if not upload_file:
return None, None
# Get the file content from storage # Get the file content from storage
file_content = storage.load_once(upload_file.key) file_content = storage.load_once(upload_file.key)
# Check if the file is Excel (.xlsx) based on filename or mime type
file_extension = upload_file.name.split('.')[-1].lower() if upload_file.name else ''
mime_type = upload_file.mime_type if upload_file.mime_type else ''
is_excel = (
file_extension == 'xlsx'
or mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
if is_excel:
# Process Excel file
import io
import pandas as pd
# Load Excel data
excel_data = io.BytesIO(file_content)
try:
# Read all sheets, default to first sheet
df = pd.read_excel(excel_data, engine='openpyxl')
# Convert DataFrame to CSV string
csv_content = df.to_csv(index=False)
return csv_content, 'utf-8'
except Exception as e:
print(f"Error converting Excel file: {str(e)}")
return None, None
else:
# Process CSV file with encoding detection
# Detect the encoding # Detect the encoding
detection = chardet.detect(file_content) detection = chardet.detect(file_content)
encoding = detection.get('encoding', 'utf-8') encoding = detection.get('encoding', 'utf-8')

Loading…
Cancel
Save