2025-04-01 21:13:55 +02:00

151 lines
5.2 KiB
Python

from flask import Blueprint, render_template, current_app, request, flash, redirect, url_for
from .models import ScheduleConfig, VolumeConfig, PaperMetadata
from .db import db
import pandas as pd
from io import StringIO
import codecs
bp = Blueprint('main', __name__)
@bp.route("/")
def index():
return render_template("index.html")
REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
@bp.route('/upload', methods=['GET', 'POST'])
def upload():
if request.method == 'POST':
file = request.files.get('file')
delimiter = request.form.get('delimiter', ',')
if not file:
return render_template('upload.html', error="No file selected.")
try:
stream = codecs.iterdecode(file.stream, 'utf-8')
content = ''.join(stream)
df = pd.read_csv(StringIO(content), delimiter=delimiter)
except Exception as e:
return render_template('upload.html', error=f"Failed to read CSV file: {e}")
missing = REQUIRED_COLUMNS - set(df.columns)
if missing:
return render_template('upload.html', error=f"Missing required columns: {', '.join(missing)}")
# Optional: parse 'published_online' to date
def parse_date(val):
if pd.isna(val):
return None
try:
return pd.to_datetime(val).date()
except Exception:
return None
for _, row in df.iterrows():
metadata = PaperMetadata(
title=row['title'],
doi=row['doi'],
alt_id=row.get('alternative_id'),
issn=row['issn'],
type=row.get('type'),
language=row.get('language'),
published_online=parse_date(row.get('published_online')),
status=None,
file_path=None,
error_msg=None
)
db.session.add(metadata)
try:
db.session.commit()
except Exception as e:
db.session.rollback()
return render_template('upload.html', error=f"Failed to save data to database: {e}")
return render_template('upload.html', success="File uploaded and validated successfully!")
return render_template('upload.html')
@bp.route("/papers")
def papers():
return render_template("papers.html", app_title="PaperScraper")
@bp.route("/schedule", methods=["GET", "POST"])
def schedule():
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if 'total_volume' in request.form:
# Volume update
try:
new_volume = float(request.form.get('total_volume', 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
else:
# Schedule update logic
# Validate form data
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5")
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error")
schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by(
ScheduleConfig.hour).all()}
volume = VolumeConfig.query.first()
return render_template("schedule.html", schedule=schedule, volume=volume.volume, app_title="PaperScraper")
@bp.route("/logs")
def logs():
return render_template("logs.html", app_title="PaperScraper")
@bp.route("/about")
def about():
return render_template("about.html", app_title="PaperScraper")