from flask import Blueprint, render_template, current_app, request, flash, redirect, url_for from .models import ScheduleConfig, VolumeConfig, PaperMetadata from .db import db import pandas as pd from io import StringIO import codecs bp = Blueprint('main', __name__) @bp.route("/") def index(): return render_template("index.html") REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"} @bp.route('/upload', methods=['GET', 'POST']) def upload(): if request.method == 'POST': file = request.files.get('file') delimiter = request.form.get('delimiter', ',') if not file: return render_template('upload.html', error="No file selected.") try: stream = codecs.iterdecode(file.stream, 'utf-8') content = ''.join(stream) df = pd.read_csv(StringIO(content), delimiter=delimiter) except Exception as e: return render_template('upload.html', error=f"Failed to read CSV file: {e}") missing = REQUIRED_COLUMNS - set(df.columns) if missing: return render_template('upload.html', error=f"Missing required columns: {', '.join(missing)}") # Optional: parse 'published_online' to date def parse_date(val): if pd.isna(val): return None try: return pd.to_datetime(val).date() except Exception: return None for _, row in df.iterrows(): metadata = PaperMetadata( title=row['title'], doi=row['doi'], alt_id=row.get('alternative_id'), issn=row['issn'], type=row.get('type'), language=row.get('language'), published_online=parse_date(row.get('published_online')), status=None, file_path=None, error_msg=None ) db.session.add(metadata) try: db.session.commit() except Exception as e: db.session.rollback() return render_template('upload.html', error=f"Failed to save data to database: {e}") return render_template('upload.html', success="File uploaded and validated successfully!") return render_template('upload.html') @bp.route("/papers") def papers(): return render_template("papers.html", app_title="PaperScraper") @bp.route("/schedule", methods=["GET", "POST"]) def schedule(): if request.method == "POST": try: # Check if we're updating volume or schedule if 'total_volume' in request.form: # Volume update try: new_volume = float(request.form.get('total_volume', 0)) if new_volume <= 0 or new_volume > 1000: raise ValueError("Volume must be between 1 and 1000") volume_config = VolumeConfig.query.first() if not volume_config: volume_config = VolumeConfig(volume=new_volume) db.session.add(volume_config) else: volume_config.volume = new_volume db.session.commit() flash("Volume updated successfully!", "success") except ValueError as e: db.session.rollback() flash(f"Error updating volume: {str(e)}", "error") else: # Schedule update logic # Validate form data for hour in range(24): key = f"hour_{hour}" if key not in request.form: raise ValueError(f"Missing data for hour {hour}") try: weight = float(request.form.get(key, 0)) if weight < 0 or weight > 5: raise ValueError( f"Weight for hour {hour} must be between 0 and 5") except ValueError: raise ValueError(f"Invalid weight value for hour {hour}") # Update database if validation passes for hour in range(24): key = f"hour_{hour}" weight = float(request.form.get(key, 0)) config = ScheduleConfig.query.get(hour) if config: config.weight = weight else: db.session.add(ScheduleConfig(hour=hour, weight=weight)) db.session.commit() flash("Schedule updated successfully!", "success") except ValueError as e: db.session.rollback() flash(f"Error updating schedule: {str(e)}", "error") schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by( ScheduleConfig.hour).all()} volume = VolumeConfig.query.first() return render_template("schedule.html", schedule=schedule, volume=volume.volume, app_title="PaperScraper") @bp.route("/logs") def logs(): return render_template("logs.html", app_title="PaperScraper") @bp.route("/about") def about(): return render_template("about.html", app_title="PaperScraper")