add summary analysis tools for exam

1 year ago · a6ca634e35
parent 27827e8876
commit a6ca634e35
2 changed files with 189 additions and 1 deletions
--- a/api/controllers/inner_tools/init.py
+++ b/api/controllers/inner_tools/init.py
@ -4,4 +4,4 @@ from libs.external_api import ExternalApi
 bp = Blueprint("inner_tools", __name__, url_prefix="/inner_tools")
 api = ExternalApi(bp)

-from . import markdown_to_pdf
+from . import answers_summary_analysis, markdown_to_pdf
--- a/api/controllers/inner_tools/answers_summary_analysis.py
+++ b/api/controllers/inner_tools/answers_summary_analysis.py
@ -0,0 +1,188 @@
+import io
+import json
+from typing import Any, Dict, List, Optional, Tuple
+
+import chardet
+from controllers.inner_tools import api
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from flask import jsonify, request
+from flask_restful import Resource  # type: ignore
+from models.model import UploadFile
+from models.workflow import WorkflowRun
+
+
+class AnswersSummaryAnalysisApi(Resource):
+    def post(self):
+        """Analyze answers and provide summary statistics by category.
+
+        This endpoint takes a file_id of an answer sheet and a JSON payload of problem categories.
+        It reads the file, parses answers, and calculates success rates by category.
+        """
+        # Parse request arguments
+        if not request.is_json:
+            return {"error": "Request must be JSON"}, 400
+
+        data = request.get_json()
+        categories = data.get('categories')
+        workflow_run_id = data.get('workflow_run_id')
+
+        # read the arg of this workflow run
+        workflow_run = WorkflowRun.query.filter_by(id=workflow_run_id).first()
+        if not workflow_run:
+            return {"error": "workflow_run not found"}, 400
+
+        workflow_run_args = workflow_run.inputs
+        if not workflow_run_args:
+            return {"error": "workflow_run_args not found"}, 400
+
+        # get the file_id from the workflow_run_args
+        try:
+            args_json = json.loads(workflow_run_args)
+            user_answers_file_id = args_json.get('user_answers').get('related_id')
+        except json.JSONDecodeError:
+            return {"error": "workflow_run_args must be a valid JSON string"}, 400
+
+        if not user_answers_file_id:
+            return {"error": "file_id is required"}, 400
+        if not categories:
+            return {"error": "categories is required"}, 400
+
+        # Read the file content with encoding detection
+        file_content, detected_encoding = self._read_file_with_encoding_detection(user_answers_file_id)
+        if not file_content:
+            return {"error": "Failed to read file or file not found"}, 404
+
+        # Parse the answers
+        parsed_answers = self._parse_answers(file_content)
+        if not parsed_answers:
+            return {"error": "Failed to parse answers from file"}, 400
+
+        # Calculate category statistics
+        summary_analysis = self._calculate_category_statistics(parsed_answers, categories)
+
+        # Return the response
+        return jsonify({'user_answers': parsed_answers, 'summary_analysis': summary_analysis})
+
+    def _read_file_with_encoding_detection(self, file_id: str) -> Tuple[Optional[str], Optional[str]]:
+        """Read file content with automatic encoding detection."""
+        try:
+            upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
+
+            # Get the file content from storage
+            file_content = storage.load_once(upload_file.key)
+
+            # Detect the encoding
+            detection = chardet.detect(file_content)
+            encoding = detection.get('encoding', 'utf-8')
+
+            # Try multiple encodings if needed
+            encodings_to_try = [encoding, 'utf-8', 'gbk', 'gb2312', 'iso-8859-1', 'latin-1']
+            decoded_content = None
+            detected_encoding = None
+
+            for enc in encodings_to_try:
+                try:
+                    decoded_content = file_content.decode(enc)
+                    detected_encoding = enc
+                    break
+                except UnicodeDecodeError:
+                    continue
+
+            return decoded_content, detected_encoding
+        except Exception as e:
+            print(f"Error reading file: {str(e)}")
+            return None, None
+
+    def _parse_answers(self, file_content: str) -> List[Dict[str, Any]]:
+        """Parse answers from the file content.
+
+        Expected format is CSV with the following structure:
+        - First column: Student ID (准考证号)
+        - Second column: Name (姓名)
+        - Third column: Score (得分)
+        - Remaining columns: Answers to questions (1, 2, 3, etc.)
+        """
+        try:
+            import csv
+            from io import StringIO
+
+            # Create a CSV reader from the string content
+            csv_file = StringIO(file_content)
+            csv_reader = csv.reader(csv_file)
+
+            # Get the header row
+            header = next(csv_reader, None)
+            if not header:
+                return []
+
+            result = []
+            for row in csv_reader:
+                if not row or len(row) < 4:  # Skip empty rows or rows with insufficient data
+                    continue
+
+                # Extract student ID and name
+                student_id = row[0].strip()
+                name = row[1].strip()
+
+                # Extract answers (skip ID, name, and score columns)
+                answers = [ans.strip() for ans in row[3:]]
+
+                result.append({'user_name': name, 'code': student_id, 'answers': answers})
+
+            return result
+        except Exception as e:
+            # Log the exception for debugging
+            print(f"Error parsing answers: {str(e)}")
+            return []
+
+    def _calculate_category_statistics(
+        self, parsed_answers: List[Dict[str, Any]], categories: List[Dict[str, Any]]
+    ) -> Dict[str, float]:
+        """Calculate statistics by category.
+
+        For demonstration, we're assuming:
+        - Correct answers are predetermined or defined in the system
+        - We're calculating the percentage of correct answers per category
+        """
+        # Simplified example: assume we have correct answers defined
+        # In a real system, these would come from a database or predefined source
+        # For now, we'll just count non-empty answers
+
+        summary = {}
+
+        # For each category in the list
+        for category in categories:
+            category_name = category.get('name', '')
+            question_numbers = category.get('items', [])
+
+            total_answers = 0
+            valid_answers = 0
+
+            for answer_data in parsed_answers:
+                answers = answer_data.get('answers', [])
+
+                # Check each question in this category
+                for q_num in question_numbers:
+                    try:
+                        # Convert to 0-based index
+                        idx = int(q_num) - 1
+                        if idx < 0 or idx >= len(answers):
+                            continue
+
+                        total_answers += 1
+                        # Count non-empty and non-placeholder answers as valid
+                        if answers[idx] and answers[idx] not in ['#', '?', '-']:
+                            valid_answers += 1
+                    except (ValueError, IndexError):
+                        continue
+
+            # Calculate percentage
+            rate = valid_answers / total_answers if total_answers > 0 else 0
+            summary[category_name] = round(rate, 2)
+
+        return summary
+
+
+# Add API endpoint
+api.add_resource(AnswersSummaryAnalysisApi, '/answers-summary-analysis')