add summary analysis tools for exam
parent
27827e8876
commit
a6ca634e35
@ -0,0 +1,188 @@
|
||||
import io
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import chardet
|
||||
from controllers.inner_tools import api
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from flask import jsonify, request
|
||||
from flask_restful import Resource # type: ignore
|
||||
from models.model import UploadFile
|
||||
from models.workflow import WorkflowRun
|
||||
|
||||
|
||||
class AnswersSummaryAnalysisApi(Resource):
|
||||
def post(self):
|
||||
"""Analyze answers and provide summary statistics by category.
|
||||
|
||||
This endpoint takes a file_id of an answer sheet and a JSON payload of problem categories.
|
||||
It reads the file, parses answers, and calculates success rates by category.
|
||||
"""
|
||||
# Parse request arguments
|
||||
if not request.is_json:
|
||||
return {"error": "Request must be JSON"}, 400
|
||||
|
||||
data = request.get_json()
|
||||
categories = data.get('categories')
|
||||
workflow_run_id = data.get('workflow_run_id')
|
||||
|
||||
# read the arg of this workflow run
|
||||
workflow_run = WorkflowRun.query.filter_by(id=workflow_run_id).first()
|
||||
if not workflow_run:
|
||||
return {"error": "workflow_run not found"}, 400
|
||||
|
||||
workflow_run_args = workflow_run.inputs
|
||||
if not workflow_run_args:
|
||||
return {"error": "workflow_run_args not found"}, 400
|
||||
|
||||
# get the file_id from the workflow_run_args
|
||||
try:
|
||||
args_json = json.loads(workflow_run_args)
|
||||
user_answers_file_id = args_json.get('user_answers').get('related_id')
|
||||
except json.JSONDecodeError:
|
||||
return {"error": "workflow_run_args must be a valid JSON string"}, 400
|
||||
|
||||
if not user_answers_file_id:
|
||||
return {"error": "file_id is required"}, 400
|
||||
if not categories:
|
||||
return {"error": "categories is required"}, 400
|
||||
|
||||
# Read the file content with encoding detection
|
||||
file_content, detected_encoding = self._read_file_with_encoding_detection(user_answers_file_id)
|
||||
if not file_content:
|
||||
return {"error": "Failed to read file or file not found"}, 404
|
||||
|
||||
# Parse the answers
|
||||
parsed_answers = self._parse_answers(file_content)
|
||||
if not parsed_answers:
|
||||
return {"error": "Failed to parse answers from file"}, 400
|
||||
|
||||
# Calculate category statistics
|
||||
summary_analysis = self._calculate_category_statistics(parsed_answers, categories)
|
||||
|
||||
# Return the response
|
||||
return jsonify({'user_answers': parsed_answers, 'summary_analysis': summary_analysis})
|
||||
|
||||
def _read_file_with_encoding_detection(self, file_id: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Read file content with automatic encoding detection."""
|
||||
try:
|
||||
upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
|
||||
|
||||
# Get the file content from storage
|
||||
file_content = storage.load_once(upload_file.key)
|
||||
|
||||
# Detect the encoding
|
||||
detection = chardet.detect(file_content)
|
||||
encoding = detection.get('encoding', 'utf-8')
|
||||
|
||||
# Try multiple encodings if needed
|
||||
encodings_to_try = [encoding, 'utf-8', 'gbk', 'gb2312', 'iso-8859-1', 'latin-1']
|
||||
decoded_content = None
|
||||
detected_encoding = None
|
||||
|
||||
for enc in encodings_to_try:
|
||||
try:
|
||||
decoded_content = file_content.decode(enc)
|
||||
detected_encoding = enc
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
return decoded_content, detected_encoding
|
||||
except Exception as e:
|
||||
print(f"Error reading file: {str(e)}")
|
||||
return None, None
|
||||
|
||||
def _parse_answers(self, file_content: str) -> List[Dict[str, Any]]:
|
||||
"""Parse answers from the file content.
|
||||
|
||||
Expected format is CSV with the following structure:
|
||||
- First column: Student ID (准考证号)
|
||||
- Second column: Name (姓名)
|
||||
- Third column: Score (得分)
|
||||
- Remaining columns: Answers to questions (1, 2, 3, etc.)
|
||||
"""
|
||||
try:
|
||||
import csv
|
||||
from io import StringIO
|
||||
|
||||
# Create a CSV reader from the string content
|
||||
csv_file = StringIO(file_content)
|
||||
csv_reader = csv.reader(csv_file)
|
||||
|
||||
# Get the header row
|
||||
header = next(csv_reader, None)
|
||||
if not header:
|
||||
return []
|
||||
|
||||
result = []
|
||||
for row in csv_reader:
|
||||
if not row or len(row) < 4: # Skip empty rows or rows with insufficient data
|
||||
continue
|
||||
|
||||
# Extract student ID and name
|
||||
student_id = row[0].strip()
|
||||
name = row[1].strip()
|
||||
|
||||
# Extract answers (skip ID, name, and score columns)
|
||||
answers = [ans.strip() for ans in row[3:]]
|
||||
|
||||
result.append({'user_name': name, 'code': student_id, 'answers': answers})
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
# Log the exception for debugging
|
||||
print(f"Error parsing answers: {str(e)}")
|
||||
return []
|
||||
|
||||
def _calculate_category_statistics(
|
||||
self, parsed_answers: List[Dict[str, Any]], categories: List[Dict[str, Any]]
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate statistics by category.
|
||||
|
||||
For demonstration, we're assuming:
|
||||
- Correct answers are predetermined or defined in the system
|
||||
- We're calculating the percentage of correct answers per category
|
||||
"""
|
||||
# Simplified example: assume we have correct answers defined
|
||||
# In a real system, these would come from a database or predefined source
|
||||
# For now, we'll just count non-empty answers
|
||||
|
||||
summary = {}
|
||||
|
||||
# For each category in the list
|
||||
for category in categories:
|
||||
category_name = category.get('name', '')
|
||||
question_numbers = category.get('items', [])
|
||||
|
||||
total_answers = 0
|
||||
valid_answers = 0
|
||||
|
||||
for answer_data in parsed_answers:
|
||||
answers = answer_data.get('answers', [])
|
||||
|
||||
# Check each question in this category
|
||||
for q_num in question_numbers:
|
||||
try:
|
||||
# Convert to 0-based index
|
||||
idx = int(q_num) - 1
|
||||
if idx < 0 or idx >= len(answers):
|
||||
continue
|
||||
|
||||
total_answers += 1
|
||||
# Count non-empty and non-placeholder answers as valid
|
||||
if answers[idx] and answers[idx] not in ['#', '?', '-']:
|
||||
valid_answers += 1
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
# Calculate percentage
|
||||
rate = valid_answers / total_answers if total_answers > 0 else 0
|
||||
summary[category_name] = round(rate, 2)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
# Add API endpoint
|
||||
api.add_resource(AnswersSummaryAnalysisApi, '/answers-summary-analysis')
|
||||
Loading…
Reference in New Issue