import json import os import requests from concurrent.futures import ThreadPoolExecutor, as_completed from jsonschema import validate, ValidationError import uuid from flask import current_app, url_for from werkzeug.utils import secure_filename import sqlite3 # Import sqlite3 import sys # Ensure current directory is in Python path for local imports sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from database import get_db_connection from utils import create_a4_pdf_from_images # JSON v3.0 Schema for validation JSON_V3_SCHEMA = { "type": "object", "properties": { "version": {"type": "string", "const": "3.0"}, "source": {"type": "string"}, "test_name": {"type": "string"}, "test_id": {"type": "string"}, "test_mapping_id": {"type": "string"}, "metadata": {"type": "object"}, "config": { "type": "object", "properties": { "statuses_to_include": {"type": "array", "items": {"type": "string"}}, "layout": { "type": "object", "properties": { "images_per_page": {"type": "integer"}, "orientation": {"type": "string"} }, "required": ["images_per_page", "orientation"] } }, "required": ["statuses_to_include", "layout"] }, "questions": { "type": "array", "items": { "type": "object", "properties": { "question_number": {"type": "string"}, "image_url": {"type": "string", "format": "uri"}, "status": {"type": "string"}, "marked_solution": {"type": "string"}, "correct_solution": {"type": "string"}, "subject": {"type": "string"}, "chapter": {"type": "string"}, "topic": {"type": "string"}, "time_taken": {"type": "integer"} }, "required": ["question_number", "image_url", "status", "marked_solution", "correct_solution", "subject", "time_taken"] } }, "view": {"type": "boolean"} }, "required": ["version", "source", "test_name", "test_id", "test_mapping_id", "config", "questions", "view"] } class JSONProcessorV3: def __init__(self, data=None): self.data = data def validate(self): """Validates the JSON data against the v3.0 schema.""" try: validate(instance=self.data, schema=JSON_V3_SCHEMA) return True except ValidationError as e: raise ValueError(f"Schema validation failed: {e.message}") def download_image_from_url(self, url, save_path, timeout=30): """Downloads an image from a URL and saves it to a path.""" try: response = requests.get(url, timeout=timeout) response.raise_for_status() with open(save_path, 'wb') as f: f.write(response.content) return save_path except requests.exceptions.RequestException as e: print(f"Error downloading image from {url}: {e}") # Keep print for tests if current_app: current_app.logger.error(f"Error downloading image from {url}: {e}") return None def download_images_parallel(self, questions, output_dir, session_id, max_workers=10): """Downloads all images in parallel and returns a map of question number to local path.""" image_paths = {} with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_question = { executor.submit( self.download_image_from_url, q['image_url'], os.path.join(output_dir, f"{session_id}_q_{q['question_number']}.png") ): q for q in questions if q.get('image_url') } for future in as_completed(future_to_question): question = future_to_question[future] url = question['image_url'] try: path = future.result() if path: image_paths[question['question_number']] = path current_app.logger.info(f"Successfully downloaded image from {url}") else: current_app.logger.error(f"Failed to download image from {url}") except Exception as e: current_app.logger.error(f"Error processing image for question {question.get('question_number')} from {url}: {e}") return image_paths def process(self, user_id=1): # Default user_id for now, replace with actual user """Main processing logic for the v3.0 payload, including DB insertion and PDF generation.""" if not self.data: raise ValueError("No data provided to process.") current_app.logger.info("Starting processing of JSON v3.0 payload.") current_app.logger.info(f"Test Name: {self.data.get('test_name')}") current_app.logger.info(f"Test ID: {self.data.get('test_id')}") current_app.logger.info(f"Metadata: {self.data.get('metadata')}") if not self.validate(): raise ValueError("Schema validation failed.") conn = get_db_connection() try: test_name = self.data['test_name'] test_id = self.data['test_id'] test_mapping_id = self.data['test_mapping_id'] questions_payload = self.data['questions'] view_mode = self.data.get('view', False) metadata = json.dumps(self.data.get('metadata', {})) # Store metadata as JSON string config = self.data.get('config', {}) layout = config.get('layout', {}) images_per_page = layout.get('images_per_page', 4) orientation = layout.get('orientation', 'portrait') session_id = str(uuid.uuid4()) original_filename = f"{test_name}.json" # Name of the JSON file that was uploaded conn.execute( 'INSERT INTO sessions (id, original_filename, user_id, test_id, test_mapping_id, source, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)', (session_id, original_filename, user_id, test_id, test_mapping_id, self.data.get('source', 'manual'), metadata) ) processed_folder = current_app.config.get('PROCESSED_FOLDER', 'processed') os.makedirs(processed_folder, exist_ok=True) current_app.logger.info(f"Downloading images for test {test_id} to {processed_folder}") image_path_map = self.download_images_parallel(questions_payload, processed_folder, session_id) image_records = [] question_records = [] for i, q_data in enumerate(questions_payload): question_number = q_data['question_number'] # Check if image was downloaded processed_filename = None local_image_path = image_path_map.get(question_number) if local_image_path: processed_filename = os.path.basename(local_image_path) # Insert into images table image_insert_result = conn.execute( 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)', (session_id, i + 1, q_data.get('image_url', ''), f"Question {question_number}", processed_filename, 'cropped' if processed_filename else 'original_url_only') ) image_id = image_insert_result.lastrowid # Insert into questions table question_records.append(( session_id, image_id, question_number, q_data['status'], q_data['marked_solution'], q_data['correct_solution'], q_data.get('subject'), q_data.get('chapter'), q_data.get('topic'), q_data.get('time_taken') )) conn.executemany( 'INSERT INTO questions (session_id, image_id, question_number, status, marked_solution, actual_solution, subject, chapter, topic, time_taken) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', question_records ) conn.commit() response_data = { "status": "success", "message": "JSON v3.0 processed successfully." } if view_mode: query = "SELECT q.*, i.processed_filename FROM questions q JOIN images i ON q.image_id = i.id WHERE q.session_id = ? ORDER BY i.id" all_questions = [dict(row) for row in conn.execute(query, (session_id,)).fetchall()] if not all_questions: conn.rollback() raise ValueError('No questions found for PDF generation.') pdf_output_folder = current_app.config.get('OUTPUT_FOLDER', 'output') os.makedirs(pdf_output_folder, exist_ok=True) pdf_filename = f"{secure_filename(test_name)}_{session_id[:8]}.pdf" create_a4_pdf_from_images( image_info=all_questions, base_folder=processed_folder, output_filename=pdf_filename, images_per_page=images_per_page, output_folder=pdf_output_folder, orientation=orientation ) conn.execute( 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', (session_id, pdf_filename, test_name, test_mapping_id, 'Generated automatically via JSON v3.0 upload.', original_filename, user_id) ) conn.commit() response_data['view_url'] = url_for('main.view_pdf', filename=pdf_filename, _external=True) response_data['message'] = "PDF auto-generated and saved." else: response_data['edit_url'] = url_for('main.question_entry_v2', session_id=session_id, test_name=test_name, _external=True) response_data['message'] = "Session created for manual review." return response_data except ValueError as e: if conn: conn.rollback() current_app.logger.error(f"JSON v3.0 processing error: {e}") raise # Re-raise to be caught by the endpoint except sqlite3.Error as e: if conn: conn.rollback() current_app.logger.error(f"Database error during JSON v3.0 processing: {e}") raise ValueError(f"Database error: {e}") except Exception as e: if conn: conn.rollback() current_app.logger.error(f"Unhandled error during JSON v3.0 processing: {e}") raise ValueError(f"An unexpected error occurred: {e}") finally: if conn: conn.close()