from flask import Blueprint, render_template, current_app, request, flash, redirect, url_for, send_file from .models import ScheduleConfig, VolumeConfig, PaperMetadata from .db import db import pandas as pd from io import StringIO import codecs import datetime import io import csv from sqlalchemy import asc, desc bp = Blueprint('main', __name__) @bp.route("/") def index(): return render_template("index.html") REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"} @bp.route('/upload', methods=['GET', 'POST']) def upload(): if request.method == 'POST': file = request.files.get('file') delimiter = request.form.get('delimiter', ',') if not file: return render_template('upload.html', error="No file selected.") try: stream = codecs.iterdecode(file.stream, 'utf-8') content = ''.join(stream) df = pd.read_csv(StringIO(content), delimiter=delimiter) except Exception as e: return render_template('upload.html', error=f"Failed to read CSV file: {e}") missing = REQUIRED_COLUMNS - set(df.columns) if missing: return render_template('upload.html', error=f"Missing required columns: {', '.join(missing)}") # Optional: parse 'published_online' to date def parse_date(val): if pd.isna(val): return None try: return pd.to_datetime(val).date() except Exception: return None for _, row in df.iterrows(): metadata = PaperMetadata( title=row['title'], doi=row['doi'], alt_id=row.get('alternative_id'), issn=row['issn'], type=row.get('type'), language=row.get('language'), published_online=parse_date(row.get('published_online')), status="New", file_path=None, error_msg=None ) db.session.add(metadata) try: db.session.commit() except Exception as e: db.session.rollback() return render_template('upload.html', error=f"Failed to save data to database: {e}") return render_template('upload.html', success="File uploaded and validated successfully!") return render_template('upload.html') @bp.route('/papers') def list_papers(): page = request.args.get('page', 1, type=int) per_page = 50 # Filters status = request.args.get('status') created_from = request.args.get('created_from') created_to = request.args.get('created_to') updated_from = request.args.get('updated_from') updated_to = request.args.get('updated_to') sort_by = request.args.get('sort_by', 'created_at') sort_dir = request.args.get('sort_dir', 'desc') query = PaperMetadata.query # Apply filters if status: query = query.filter(PaperMetadata.status == status) def parse_date(val): from datetime import datetime try: return datetime.strptime(val, '%Y-%m-%d') except (ValueError, TypeError): return None if created_from := parse_date(created_from): query = query.filter(PaperMetadata.created_at >= created_from) if created_to := parse_date(created_to): query = query.filter(PaperMetadata.created_at <= created_to) if updated_from := parse_date(updated_from): query = query.filter(PaperMetadata.updated_at >= updated_from) if updated_to := parse_date(updated_to): query = query.filter(PaperMetadata.updated_at <= updated_to) # Sorting sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at) sort_func = desc if sort_dir == 'desc' else asc query = query.order_by(sort_func(sort_col)) # Pagination pagination = query.paginate(page=page, per_page=per_page, error_out=False) # Statistics total_papers = PaperMetadata.query.count() status_counts = ( db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status)) .group_by(PaperMetadata.status) .all() ) status_counts = {status: count for status, count in status_counts} return render_template( 'papers.html', papers=pagination.items, pagination=pagination, total_papers=total_papers, status_counts=status_counts, sort_by=sort_by, sort_dir=sort_dir, ) @bp.route('/papers/export') def export_papers(): query = PaperMetadata.query # Filters status = request.args.get('status') created_from = request.args.get('created_from') created_to = request.args.get('created_to') updated_from = request.args.get('updated_from') updated_to = request.args.get('updated_to') sort_by = request.args.get('sort_by', 'created_at') sort_dir = request.args.get('sort_dir', 'desc') query = PaperMetadata.query # Apply filters if status: query = query.filter(PaperMetadata.status == status) def parse_date(val): try: return datetime.datetime.strptime(val, "%Y-%m-%d") except Exception: return None output = io.StringIO() writer = csv.writer(output) writer.writerow(['ID', 'Title', 'Journal', 'DOI', 'ISSN', 'Status', 'Created At', 'Updated At']) for paper in query: writer.writerow([ paper.id, paper.title, getattr(paper, 'journal', ''), paper.doi, paper.issn, paper.status, paper.created_at, paper.updated_at ]) output.seek(0) return send_file(io.BytesIO(output.read().encode('utf-8')), mimetype='text/csv', as_attachment=True, download_name='papers.csv') from flask import jsonify, render_template @bp.route('/papers//detail') def paper_detail(paper_id): paper = PaperMetadata.query.get_or_404(paper_id) return render_template('partials/paper_detail_modal.html', paper=paper) @bp.route("/schedule", methods=["GET", "POST"]) def schedule(): if request.method == "POST": try: # Check if we're updating volume or schedule if 'total_volume' in request.form: # Volume update try: new_volume = float(request.form.get('total_volume', 0)) if new_volume <= 0 or new_volume > 1000: raise ValueError("Volume must be between 1 and 1000") volume_config = VolumeConfig.query.first() if not volume_config: volume_config = VolumeConfig(volume=new_volume) db.session.add(volume_config) else: volume_config.volume = new_volume db.session.commit() flash("Volume updated successfully!", "success") except ValueError as e: db.session.rollback() flash(f"Error updating volume: {str(e)}", "error") else: # Schedule update logic # Validate form data for hour in range(24): key = f"hour_{hour}" if key not in request.form: raise ValueError(f"Missing data for hour {hour}") try: weight = float(request.form.get(key, 0)) if weight < 0 or weight > 5: raise ValueError( f"Weight for hour {hour} must be between 0 and 5") except ValueError: raise ValueError(f"Invalid weight value for hour {hour}") # Update database if validation passes for hour in range(24): key = f"hour_{hour}" weight = float(request.form.get(key, 0)) config = ScheduleConfig.query.get(hour) if config: config.weight = weight else: db.session.add(ScheduleConfig(hour=hour, weight=weight)) db.session.commit() flash("Schedule updated successfully!", "success") except ValueError as e: db.session.rollback() flash(f"Error updating schedule: {str(e)}", "error") schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by( ScheduleConfig.hour).all()} volume = VolumeConfig.query.first() return render_template("schedule.html", schedule=schedule, volume=volume.volume, app_title="PaperScraper") @bp.route("/logs") def logs(): return render_template("logs.html", app_title="PaperScraper") @bp.route("/about") def about(): return render_template("about.html", app_title="PaperScraper")