2025-04-01 23:02:19 +02:00

272 lines
8.9 KiB
Python

from flask import Blueprint, render_template, current_app, request, flash, redirect, url_for, send_file
from .models import ScheduleConfig, VolumeConfig, PaperMetadata
from .db import db
import pandas as pd
from io import StringIO
import codecs
import datetime
import io
import csv
from sqlalchemy import asc, desc
bp = Blueprint('main', __name__)
@bp.route("/")
def index():
return render_template("index.html")
REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
@bp.route('/upload', methods=['GET', 'POST'])
def upload():
if request.method == 'POST':
file = request.files.get('file')
delimiter = request.form.get('delimiter', ',')
if not file:
return render_template('upload.html', error="No file selected.")
try:
stream = codecs.iterdecode(file.stream, 'utf-8')
content = ''.join(stream)
df = pd.read_csv(StringIO(content), delimiter=delimiter)
except Exception as e:
return render_template('upload.html', error=f"Failed to read CSV file: {e}")
missing = REQUIRED_COLUMNS - set(df.columns)
if missing:
return render_template('upload.html', error=f"Missing required columns: {', '.join(missing)}")
# Optional: parse 'published_online' to date
def parse_date(val):
if pd.isna(val):
return None
try:
return pd.to_datetime(val).date()
except Exception:
return None
for _, row in df.iterrows():
metadata = PaperMetadata(
title=row['title'],
doi=row['doi'],
alt_id=row.get('alternative_id'),
issn=row['issn'],
type=row.get('type'),
language=row.get('language'),
published_online=parse_date(row.get('published_online')),
status="New",
file_path=None,
error_msg=None
)
db.session.add(metadata)
try:
db.session.commit()
except Exception as e:
db.session.rollback()
return render_template('upload.html', error=f"Failed to save data to database: {e}")
return render_template('upload.html', success="File uploaded and validated successfully!")
return render_template('upload.html')
@bp.route('/papers')
def list_papers():
page = request.args.get('page', 1, type=int)
per_page = 50
# Filters
status = request.args.get('status')
created_from = request.args.get('created_from')
created_to = request.args.get('created_to')
updated_from = request.args.get('updated_from')
updated_to = request.args.get('updated_to')
sort_by = request.args.get('sort_by', 'created_at')
sort_dir = request.args.get('sort_dir', 'desc')
query = PaperMetadata.query
# Apply filters
if status:
query = query.filter(PaperMetadata.status == status)
def parse_date(val):
from datetime import datetime
try:
return datetime.strptime(val, '%Y-%m-%d')
except (ValueError, TypeError):
return None
if created_from := parse_date(created_from):
query = query.filter(PaperMetadata.created_at >= created_from)
if created_to := parse_date(created_to):
query = query.filter(PaperMetadata.created_at <= created_to)
if updated_from := parse_date(updated_from):
query = query.filter(PaperMetadata.updated_at >= updated_from)
if updated_to := parse_date(updated_to):
query = query.filter(PaperMetadata.updated_at <= updated_to)
# Sorting
sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at)
sort_func = desc if sort_dir == 'desc' else asc
query = query.order_by(sort_func(sort_col))
# Pagination
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
# Statistics
total_papers = PaperMetadata.query.count()
status_counts = (
db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status))
.group_by(PaperMetadata.status)
.all()
)
status_counts = {status: count for status, count in status_counts}
return render_template(
'papers.html',
papers=pagination.items,
pagination=pagination,
total_papers=total_papers,
status_counts=status_counts,
sort_by=sort_by,
sort_dir=sort_dir,
)
@bp.route('/papers/export')
def export_papers():
query = PaperMetadata.query
# Filters
status = request.args.get('status')
created_from = request.args.get('created_from')
created_to = request.args.get('created_to')
updated_from = request.args.get('updated_from')
updated_to = request.args.get('updated_to')
sort_by = request.args.get('sort_by', 'created_at')
sort_dir = request.args.get('sort_dir', 'desc')
query = PaperMetadata.query
# Apply filters
if status:
query = query.filter(PaperMetadata.status == status)
def parse_date(val):
try:
return datetime.datetime.strptime(val, "%Y-%m-%d")
except Exception:
return None
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(['ID', 'Title', 'Journal', 'DOI', 'ISSN',
'Status', 'Created At', 'Updated At'])
for paper in query:
writer.writerow([
paper.id,
paper.title,
getattr(paper, 'journal', ''),
paper.doi,
paper.issn,
paper.status,
paper.created_at,
paper.updated_at
])
output.seek(0)
return send_file(io.BytesIO(output.read().encode('utf-8')),
mimetype='text/csv',
as_attachment=True,
download_name='papers.csv')
from flask import jsonify, render_template
@bp.route('/papers/<int:paper_id>/detail')
def paper_detail(paper_id):
paper = PaperMetadata.query.get_or_404(paper_id)
return render_template('partials/paper_detail_modal.html', paper=paper)
@bp.route("/schedule", methods=["GET", "POST"])
def schedule():
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if 'total_volume' in request.form:
# Volume update
try:
new_volume = float(request.form.get('total_volume', 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
else:
# Schedule update logic
# Validate form data
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5")
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error")
schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by(
ScheduleConfig.hour).all()}
volume = VolumeConfig.query.first()
return render_template("schedule.html", schedule=schedule, volume=volume.volume, app_title="PaperScraper")
@bp.route("/logs")
def logs():
return render_template("logs.html", app_title="PaperScraper")
@bp.route("/about")
def about():
return render_template("about.html", app_title="PaperScraper")