Report-Generator / database.py
Jaimodiji's picture
Upload folder using huggingface_hub
92a22cd verified
import os
import sqlite3
from datetime import datetime, timedelta
from flask import current_app
from utils import get_db_connection
def setup_database():
"""Initializes the database and creates/updates tables as needed."""
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT NOT NULL UNIQUE,
email TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
""")
# Create sessions table
cursor.execute("""
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
original_filename TEXT,
persist INTEGER DEFAULT 0,
name TEXT,
user_id INTEGER,
session_type TEXT DEFAULT 'standard'
);
""")
# Create images table
cursor.execute("""
CREATE TABLE IF NOT EXISTS images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
image_index INTEGER NOT NULL,
filename TEXT NOT NULL,
original_name TEXT NOT NULL,
processed_filename TEXT,
image_type TEXT DEFAULT 'original',
box_id TEXT,
FOREIGN KEY (session_id) REFERENCES sessions (id)
);
""")
# Create questions table
cursor.execute("""
CREATE TABLE IF NOT EXISTS questions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
image_id INTEGER NOT NULL,
question_number TEXT,
subject TEXT,
status TEXT,
marked_solution TEXT,
actual_solution TEXT,
time_taken TEXT,
tags TEXT,
FOREIGN KEY (session_id) REFERENCES sessions (id),
FOREIGN KEY (image_id) REFERENCES images (id)
);
""")
# Create folders table
cursor.execute("""
CREATE TABLE IF NOT EXISTS folders (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
parent_id INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (parent_id) REFERENCES folders (id) ON DELETE CASCADE
);
""")
# Create generated_pdfs table
cursor.execute("""
CREATE TABLE IF NOT EXISTS generated_pdfs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
filename TEXT NOT NULL,
subject TEXT NOT NULL,
tags TEXT,
notes TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
source_filename TEXT,
folder_id INTEGER,
persist INTEGER DEFAULT 0,
FOREIGN KEY (session_id) REFERENCES sessions (id),
FOREIGN KEY (folder_id) REFERENCES folders (id) ON DELETE SET NULL
);
""")
# Create neetprep_questions table
cursor.execute("""
CREATE TABLE IF NOT EXISTS neetprep_questions (
id TEXT PRIMARY KEY,
question_text TEXT,
options TEXT,
correct_answer_index INTEGER,
level TEXT,
topic TEXT,
subject TEXT,
last_fetched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
""")
# Create neetprep_processed_attempts table
cursor.execute("""
CREATE TABLE IF NOT EXISTS neetprep_processed_attempts (
attempt_id TEXT PRIMARY KEY,
processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
""")
# Create subjective_folders table
cursor.execute("""
CREATE TABLE IF NOT EXISTS subjective_folders (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
parent_id INTEGER,
user_id INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (parent_id) REFERENCES subjective_folders (id) ON DELETE CASCADE,
FOREIGN KEY (user_id) REFERENCES users (id)
);
""")
# Create subjective_questions table
cursor.execute("""
CREATE TABLE IF NOT EXISTS subjective_questions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
question_topic TEXT NOT NULL,
question_html TEXT NOT NULL,
question_number_within_topic TEXT,
folder_id INTEGER,
topic_order INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users (id),
FOREIGN KEY (folder_id) REFERENCES subjective_folders (id) ON DELETE SET NULL
);
""")
# Create drive_sources table
cursor.execute("""
CREATE TABLE IF NOT EXISTS drive_sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
url TEXT NOT NULL,
local_path TEXT,
last_synced TIMESTAMP,
user_id INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users (id)
);
""")
# Create pdf_access_history table
cursor.execute("""
CREATE TABLE IF NOT EXISTS pdf_access_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
file_id TEXT NOT NULL,
filename TEXT NOT NULL,
source_type TEXT DEFAULT 'drive_api',
opened_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users (id)
);
""")
# Create qtab_folders table for question table organization
cursor.execute("""
CREATE TABLE IF NOT EXISTS qtab_folders (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
parent_id INTEGER,
user_id INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (parent_id) REFERENCES qtab_folders (id) ON DELETE CASCADE,
FOREIGN KEY (user_id) REFERENCES users (id)
);
""")
# Create qtab_images table for question-answer extraction
cursor.execute("""
CREATE TABLE IF NOT EXISTS qtab_images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
folder_id INTEGER,
filename TEXT NOT NULL,
original_name TEXT NOT NULL,
result_json TEXT,
status TEXT DEFAULT 'pending',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users (id),
FOREIGN KEY (folder_id) REFERENCES qtab_folders (id) ON DELETE SET NULL
);
""")
# --- Migrations ---
try:
cursor.execute("SELECT topic_order FROM subjective_questions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE subjective_questions ADD COLUMN topic_order INTEGER DEFAULT 0")
try:
cursor.execute("SELECT folder_id FROM subjective_questions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE subjective_questions ADD COLUMN folder_id INTEGER REFERENCES subjective_folders(id) ON DELETE SET NULL")
try:
cursor.execute("SELECT tags FROM questions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE questions ADD COLUMN tags TEXT")
try:
cursor.execute("SELECT tags FROM questions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE questions ADD COLUMN tags TEXT")
try:
cursor.execute("SELECT image_type FROM images LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE images ADD COLUMN image_type TEXT DEFAULT 'original'")
try:
cursor.execute("SELECT original_filename FROM sessions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE sessions ADD COLUMN original_filename TEXT")
try:
cursor.execute("SELECT persist FROM sessions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE sessions ADD COLUMN persist INTEGER DEFAULT 0")
try:
cursor.execute("SELECT name FROM sessions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE sessions ADD COLUMN name TEXT")
try:
cursor.execute("SELECT persist FROM generated_pdfs LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE generated_pdfs ADD COLUMN persist INTEGER DEFAULT 0")
try:
cursor.execute("SELECT folder_id FROM generated_pdfs LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE generated_pdfs ADD COLUMN folder_id INTEGER REFERENCES folders(id) ON DELETE SET NULL")
try:
cursor.execute("SELECT question_text FROM questions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE questions ADD COLUMN question_text TEXT")
try:
cursor.execute("SELECT chapter FROM questions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE questions ADD COLUMN chapter TEXT")
# --- Multi-user Migrations ---
try:
cursor.execute("SELECT user_id FROM sessions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE sessions ADD COLUMN user_id INTEGER REFERENCES users(id)")
try:
cursor.execute("SELECT user_id FROM generated_pdfs LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE generated_pdfs ADD COLUMN user_id INTEGER REFERENCES users(id)")
try:
cursor.execute("SELECT user_id FROM folders LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE folders ADD COLUMN user_id INTEGER REFERENCES users(id)")
try:
cursor.execute("SELECT neetprep_enabled FROM users LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE users ADD COLUMN neetprep_enabled INTEGER DEFAULT 1")
try:
cursor.execute("SELECT dpi FROM users LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE users ADD COLUMN dpi INTEGER DEFAULT 100")
try:
cursor.execute("SELECT color_rm_dpi FROM users LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE users ADD COLUMN color_rm_dpi INTEGER DEFAULT 200")
try:
cursor.execute("SELECT box_id FROM images LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE images ADD COLUMN box_id TEXT")
try:
cursor.execute("SELECT session_type FROM sessions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE sessions ADD COLUMN session_type TEXT DEFAULT 'standard'")
try:
cursor.execute("SELECT v2_default FROM users LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE users ADD COLUMN v2_default INTEGER DEFAULT 0")
try:
cursor.execute("SELECT magnifier_enabled FROM users LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE users ADD COLUMN magnifier_enabled INTEGER DEFAULT 1")
try:
cursor.execute("SELECT source_type FROM drive_sources LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE drive_sources ADD COLUMN source_type TEXT DEFAULT 'folder'")
try:
cursor.execute("SELECT google_token FROM users LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE users ADD COLUMN google_token TEXT")
try:
cursor.execute("SELECT question_json FROM subjective_questions LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE subjective_questions ADD COLUMN question_json TEXT")
try:
cursor.execute("SELECT classifier_model FROM users LIMIT 1")
except sqlite3.OperationalError:
cursor.execute("ALTER TABLE users ADD COLUMN classifier_model TEXT DEFAULT 'gemini'")
conn.commit()
conn.close()
def cleanup_old_data():
"""Removes sessions, files, and PDFs older than 1 day, unless persisted."""
print("Running cleanup of old data...")
conn = get_db_connection()
cutoff = datetime.now() - timedelta(days=1)
old_sessions = conn.execute('SELECT id FROM sessions WHERE created_at < ? AND persist = 0', (cutoff,)).fetchall()
for session in old_sessions:
session_id = session['id']
print(f"Deleting old session: {session_id}")
images_to_delete = conn.execute('SELECT filename, processed_filename FROM images WHERE session_id = ?', (session_id,)).fetchall()
for img in images_to_delete:
if img['filename']:
try: os.remove(os.path.join(current_app.config['UPLOAD_FOLDER'], img['filename']))
except OSError: pass
if img['processed_filename']:
try: os.remove(os.path.join(current_app.config['PROCESSED_FOLDER'], img['processed_filename']))
except OSError: pass
conn.execute('DELETE FROM questions WHERE session_id = ?', (session_id,))
conn.execute('DELETE FROM images WHERE session_id = ?', (session_id,))
conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,))
old_pdfs = conn.execute('SELECT id, filename FROM generated_pdfs WHERE created_at < ? AND persist = 0', (cutoff,)).fetchall()
for pdf in old_pdfs:
pdf_id, pdf_filename = pdf['id'], pdf['filename']
print(f"Deleting old generated PDF: {pdf_filename}")
try:
os.remove(os.path.join(current_app.config['OUTPUT_FOLDER'], pdf_filename))
except OSError:
pass
conn.execute('DELETE FROM generated_pdfs WHERE id = ?', (pdf_id,))
db_filenames = {row['filename'] for row in conn.execute('SELECT filename FROM generated_pdfs').fetchall()}
for filename in os.listdir(current_app.config['OUTPUT_FOLDER']):
if filename not in db_filenames:
file_path = os.path.join(current_app.config['OUTPUT_FOLDER'], filename)
file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
if file_mtime < cutoff:
print(f"Deleting old, orphaned PDF: {filename}")
try:
os.remove(file_path)
except OSError:
pass
conn.commit()
conn.close()
print("Cleanup finished.")
def get_folder_tree(user_id=None):
conn = get_db_connection()
if user_id:
folders = conn.execute('SELECT id, name, parent_id FROM folders WHERE user_id = ? ORDER BY name', (user_id,)).fetchall()
else:
# Fallback for old behavior or admin views
folders = conn.execute('SELECT id, name, parent_id FROM folders ORDER BY name').fetchall()
conn.close()
folder_map = {f['id']: dict(f) for f in folders}
tree = []
for folder_id, folder in folder_map.items():
if folder['parent_id']:
parent = folder_map.get(folder['parent_id'])
if parent:
if 'children' not in parent:
parent['children'] = []
parent['children'].append(folder)
else:
tree.append(folder)
return tree
def get_subjective_folder_tree(user_id):
conn = get_db_connection()
folders = conn.execute('SELECT id, name, parent_id FROM subjective_folders WHERE user_id = ? ORDER BY name', (user_id,)).fetchall()
conn.close()
folder_map = {f['id']: dict(f) for f in folders}
tree = []
for folder_id, folder in folder_map.items():
if folder['parent_id']:
parent = folder_map.get(folder['parent_id'])
if parent:
if 'children' not in parent:
parent['children'] = []
parent['children'].append(folder)
else:
tree.append(folder)
return tree
def get_qtab_folder_tree(user_id):
conn = get_db_connection()
folders = conn.execute('SELECT id, name, parent_id FROM qtab_folders WHERE user_id = ? ORDER BY name', (user_id,)).fetchall()
conn.close()
folder_map = {f['id']: dict(f) for f in folders}
tree = []
for folder_id, folder in folder_map.items():
if folder['parent_id']:
parent = folder_map.get(folder['parent_id'])
if parent:
if 'children' not in parent:
parent['children'] = []
parent['children'].append(folder)
else:
tree.append(folder)
return tree
def get_all_descendant_folder_ids(conn, folder_id, user_id=None):
"""Recursively gets all descendant folder IDs for a given folder, scoped to a user."""
if user_id:
children = conn.execute('SELECT id FROM folders WHERE parent_id = ? AND user_id = ?', (folder_id, user_id)).fetchall()
else:
children = conn.execute('SELECT id FROM folders WHERE parent_id = ?', (folder_id,)).fetchall()
folder_ids = [f['id'] for f in children]
for child_id in folder_ids:
folder_ids.extend(get_all_descendant_folder_ids(conn, child_id, user_id))
return folder_ids