refactors views to proper use: multiple files in blueprint folder & correct url handling using those blueprints

This commit is contained in:
Michael Beck 2025-04-11 13:47:54 +02:00
parent 7ff29ee2cd
commit 1a1de21759
12 changed files with 450 additions and 417 deletions

View File

@ -3,6 +3,7 @@ from flask import Flask
from .config import Config
from .db import db
from .models import init_schedule_config
from .blueprints import register_blueprints
def create_app(test_config=None):
@ -22,8 +23,6 @@ def create_app(test_config=None):
def inject_app_title():
return {"app_title": app.config["APP_TITLE"]}
from . import views
app.register_blueprint(views.bp)
register_blueprints(app)
return app

View File

@ -0,0 +1,15 @@
"""Blueprint registration module."""
from flask import Flask
from .main import bp as main_bp
from .papers import bp as papers_bp
from .upload import bp as upload_bp
from .schedule import bp as schedule_bp
def register_blueprints(app: Flask):
"""Register all blueprints with the Flask application."""
app.register_blueprint(main_bp)
app.register_blueprint(papers_bp, url_prefix='/papers')
app.register_blueprint(upload_bp, url_prefix='/upload')
app.register_blueprint(schedule_bp, url_prefix='/schedule')

View File

@ -0,0 +1,19 @@
"""Main routes for the application."""
from flask import Blueprint, render_template
bp = Blueprint("main", __name__)
@bp.route("/")
def index():
return render_template("index.html")
@bp.route("/logs")
def logs():
return render_template("logs.html", app_title="PaperScraper")
@bp.route("/about")
def about():
return render_template("about.html", app_title="PaperScraper")

View File

@ -0,0 +1,140 @@
"""Paper management routes."""
import csv
import datetime
import io
from flask import (
Blueprint,
render_template,
request,
send_file,
)
from sqlalchemy import asc, desc
from ..db import db
from ..models import PaperMetadata
bp = Blueprint("papers", __name__)
@bp.route("/")
def list_papers():
page = request.args.get("page", 1, type=int)
per_page = 50
# Filters
status = request.args.get("status")
created_from = request.args.get("created_from")
created_to = request.args.get("created_to")
updated_from = request.args.get("updated_from")
updated_to = request.args.get("updated_to")
sort_by = request.args.get("sort_by", "created_at")
sort_dir = request.args.get("sort_dir", "desc")
query = PaperMetadata.query
# Apply filters
if status:
query = query.filter(PaperMetadata.status == status)
def parse_date(val):
from datetime import datetime
try:
return datetime.strptime(val, "%Y-%m-%d")
except (ValueError, TypeError):
return None
if created_from := parse_date(created_from):
query = query.filter(PaperMetadata.created_at >= created_from)
if created_to := parse_date(created_to):
query = query.filter(PaperMetadata.created_at <= created_to)
if updated_from := parse_date(updated_from):
query = query.filter(PaperMetadata.updated_at >= updated_from)
if updated_to := parse_date(updated_to):
query = query.filter(PaperMetadata.updated_at <= updated_to)
# Sorting
sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at)
sort_func = desc if sort_dir == "desc" else asc
query = query.order_by(sort_func(sort_col))
# Pagination
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
# Statistics
total_papers = PaperMetadata.query.count()
status_counts = (
db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status))
.group_by(PaperMetadata.status)
.all()
)
status_counts = {status: count for status, count in status_counts}
return render_template(
"papers.html",
papers=pagination.items,
pagination=pagination,
total_papers=total_papers,
status_counts=status_counts,
sort_by=sort_by,
sort_dir=sort_dir,
)
@bp.route("/export")
def export_papers():
# Filters
status = request.args.get("status")
created_from = request.args.get("created_from")
created_to = request.args.get("created_to")
updated_from = request.args.get("updated_from")
updated_to = request.args.get("updated_to")
sort_by = request.args.get("sort_by", "created_at")
sort_dir = request.args.get("sort_dir", "desc")
query = PaperMetadata.query
# Apply filters
if status:
query = query.filter(PaperMetadata.status == status)
def parse_date(val):
try:
return datetime.datetime.strptime(val, "%Y-%m-%d")
except Exception:
return None
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(
["ID", "Title", "Journal", "DOI", "ISSN", "Status", "Created At", "Updated At"]
)
for paper in query:
writer.writerow(
[
paper.id,
paper.title,
getattr(paper, "journal", ""),
paper.doi,
paper.issn,
paper.status,
paper.created_at,
paper.updated_at,
]
)
output.seek(0)
return send_file(
io.BytesIO(output.read().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name="papers.csv",
)
@bp.route("/<int:paper_id>/detail")
def paper_detail(paper_id):
paper = PaperMetadata.query.get_or_404(paper_id)
return render_template("partials/paper_detail_modal.html", paper=paper)

View File

@ -0,0 +1,79 @@
"""Schedule configuration routes."""
from flask import Blueprint, flash, render_template, request
from ..db import db
from ..models import ScheduleConfig, VolumeConfig
bp = Blueprint("schedule", __name__)
@bp.route("/", methods=["GET", "POST"])
def schedule():
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if "total_volume" in request.form:
# Volume update
try:
new_volume = float(request.form.get("total_volume", 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
else:
# Schedule update logic
# Validate form data
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5"
)
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error")
schedule = {
sc.hour: sc.weight
for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
}
volume = VolumeConfig.query.first()
return render_template(
"schedule.html",
schedule=schedule,
volume=volume.volume if volume else 0,
app_title="PaperScraper",
)

View File

@ -0,0 +1,170 @@
"""Upload functionality for paper metadata."""
import codecs
import csv
import datetime
from io import StringIO
import pandas as pd
from flask import (
Blueprint,
flash,
redirect,
render_template,
request,
send_file,
session,
url_for,
)
from ..db import db
from ..models import PaperMetadata
bp = Blueprint("upload", __name__)
REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
@bp.route("/", methods=["GET", "POST"])
def upload():
if request.method == "POST":
file = request.files.get("file")
delimiter = request.form.get("delimiter", ",")
duplicate_strategy = request.form.get("duplicate_strategy", "skip")
if not file:
return render_template("upload.html", error="No file selected.")
try:
stream = codecs.iterdecode(file.stream, "utf-8")
content = "".join(stream)
df = pd.read_csv(StringIO(content), delimiter=delimiter)
except Exception as e:
return render_template("upload.html", error=f"Failed to read CSV file: {e}")
missing = REQUIRED_COLUMNS - set(df.columns)
if missing:
return render_template(
"upload.html", error=f"Missing required columns: {', '.join(missing)}"
)
# Optional: parse 'published_online' to date
def parse_date(val):
if pd.isna(val):
return None
try:
return pd.to_datetime(val).date()
except Exception:
return None
# Count statistics
added_count = 0
skipped_count = 0
updated_count = 0
error_count = 0
# Collect error information
errors = []
# Process each row
for index, row in df.iterrows():
try:
# Get DOI from row for error reporting
doi = str(row.get("doi", "N/A"))
# Validate required fields
for field in ["title", "doi", "issn"]:
if pd.isna(row.get(field)) or not str(row.get(field)).strip():
raise ValueError(f"Missing required field: {field}")
# Check if paper with this DOI already exists
existing = PaperMetadata.query.filter_by(doi=doi).first()
if existing:
if duplicate_strategy == 'update':
# Update existing record
existing.title = row["title"]
existing.alt_id = row.get("alternative_id")
existing.issn = row["issn"]
existing.journal = row.get("journal")
existing.type = row.get("type")
existing.language = row.get("language")
existing.published_online = parse_date(row.get("published_online"))
updated_count += 1
else:
# Skip this record
skipped_count += 1
continue
else:
# Create new record
metadata = PaperMetadata(
title=row["title"],
doi=doi,
alt_id=row.get("alternative_id"),
issn=row["issn"],
journal=row.get("journal"),
type=row.get("type"),
language=row.get("language"),
published_online=parse_date(row.get("published_online")),
status="New",
file_path=None,
error_msg=None,
)
db.session.add(metadata)
added_count += 1
except Exception as e:
error_count += 1
errors.append({
"row": index + 2, # +2 because index is 0-based and we have a header row
"doi": row.get("doi", "N/A"),
"error": str(e)
})
continue # Skip this row and continue with the next
try:
db.session.commit()
except Exception as e:
db.session.rollback()
return render_template(
"upload.html", error=f"Failed to save data to database: {e}"
)
# Prepare error samples for display
error_samples = errors[:5] if errors else []
error_message = None
if errors:
error_message = f"Encountered {len(errors)} errors. First 5 shown below."
# Store the full errors list in the session for potential download
if errors:
error_csv = StringIO()
writer = csv.DictWriter(error_csv, fieldnames=["row", "doi", "error"])
writer.writeheader()
writer.writerows(errors)
session["error_data"] = error_csv.getvalue()
return render_template(
"upload.html",
success=f"File processed! Added: {added_count}, Updated: {updated_count}, Skipped: {skipped_count}, Errors: {error_count}",
error_message=error_message,
error_samples=error_samples
)
return render_template("upload.html")
@bp.route("/download_error_log")
def download_error_log():
error_data = session.get("error_data")
if not error_data:
flash("No error data available.")
return redirect(url_for("upload.upload"))
buffer = StringIO(error_data)
return send_file(
buffer,
mimetype="text/csv",
as_attachment=True,
download_name=f"upload_errors_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
)

View File

@ -16,7 +16,7 @@
(title, DOI, ISSN, etc.) are stored. Errors are reported without
aborting the batch.
</p>
<a href="/import" class="btn btn-sm btn-outline-primary">Upload Now</a>
<a href="{{ url_for('upload.upload') }}" class="btn btn-sm btn-outline-primary">Upload Now</a>
</div>
</div>
</div>
@ -43,7 +43,7 @@
inspect errors. Files are stored on disk in structured folders per
DOI.
</p>
<a href="/papers" class="btn btn-sm btn-outline-success"
<a href="{{ url_for('papers.list_papers') }}" class="btn btn-sm btn-outline-success"
>Browse Papers</a
>
</div>
@ -59,7 +59,7 @@
volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or
usage pattern.
</p>
<a href="/schedule" class="btn btn-sm btn-outline-warning"
<a href="{{ url_for('schedule.schedule') }}" class="btn btn-sm btn-outline-warning"
>Adjust Schedule</a
>
</div>

View File

@ -17,13 +17,13 @@
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav me-auto mb-2 mb-lg-0">
<li class="nav-item">
<a class="nav-link" href="/upload">Import CSV</a>
<a class="nav-link" href="{{ url_for('upload.upload') }}">Import CSV</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/papers">Papers</a>
<a class="nav-link" href="{{ url_for('papers.list_papers') }}">Papers</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/schedule">Schedule</a>
<a class="nav-link" href="{{ url_for('schedule.schedule') }}">Schedule</a>
</li>
<li class="nav-item dropdown">
<a

View File

@ -88,7 +88,7 @@
<li class="page-item">
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
<a class="page-link" href="{{ url_for('main.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
<a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
<span aria-hidden="true">&laquo;</span>
</a>
</li>
@ -103,7 +103,7 @@
<li class="page-item {% if page_num == pagination.page %}active{% endif %}">
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
<a class="page-link" href="{{ url_for('main.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
<a class="page-link" href="{{ url_for('papers.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
</li>
{% else %}
<li class="page-item disabled"><span class="page-link"></span></li>
@ -114,7 +114,7 @@
<li class="page-item">
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
<a class="page-link" href="{{ url_for('main.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
<a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
<span aria-hidden="true">&raquo;</span>
</a>
</li>
@ -128,7 +128,7 @@
<!-- Buttons Section -->
<div class="ms-auto">
<a href="{{ url_for('main.export_papers') }}" class="btn btn-outline-secondary">Export CSV</a>
<a href="{{ url_for('papers.export_papers') }}" class="btn btn-outline-secondary">Export CSV</a>
</div>
</div>
<table class="table table-striped table-bordered table-smaller">
@ -137,44 +137,44 @@
<th>
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'title', 'sort_dir': title_sort}) or params %}
<a href="{{ url_for('main.list_papers', **params) }}">Title</a>
<a href="{{ url_for('papers.list_papers', **params) }}">Title</a>
</th>
<th>
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'journal', 'sort_dir': journal_sort}) or params %}
<a href="{{ url_for('main.list_papers', **params) }}">Journal</a>
<a href="{{ url_for('papers.list_papers', **params) }}">Journal</a>
</th>
<th>
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'doi', 'sort_dir': doi_sort}) or params %}
<a href="{{ url_for('main.list_papers', **params) }}">DOI</a>
<a href="{{ url_for('papers.list_papers', **params) }}">DOI</a>
</th>
<th>
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'issn', 'sort_dir': issn_sort}) or params %}
<a href="{{ url_for('main.list_papers', **params) }}">ISSN</a>
<a href="{{ url_for('papers.list_papers', **params) }}">ISSN</a>
</th>
<th>
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'status', 'sort_dir': status_sort}) or params %}
<a href="{{ url_for('main.list_papers', **params) }}">Status</a>
<a href="{{ url_for('papers.list_papers', **params) }}">Status</a>
</th>
<th>
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'created_at', 'sort_dir': created_sort}) or params %}
<a href="{{ url_for('main.list_papers', **params) }}">Created</a>
<a href="{{ url_for('papers.list_papers', **params) }}">Created</a>
</th>
<th>
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'updated_at', 'sort_dir': updated_sort}) or params %}
<a href="{{ url_for('main.list_papers', **params) }}">Updated</a>
<a href="{{ url_for('papers.list_papers', **params) }}">Updated</a>
</th>
</tr>
</thead>
<tbody>
{% for paper in papers %}
<tr>
<td><a href="#" class="paper-link" data-url="{{ url_for('main.paper_detail', paper_id=paper.id) }}">{{ paper.title }}</a></td>
<td><a href="#" class="paper-link" data-url="{{ url_for('papers.paper_detail', paper_id=paper.id) }}">{{ paper.title }}</a></td>
<td>{{ paper.journal }}</td>
<td>{{ paper.doi }}</td>
<td>{{ paper.issn }}</td>
@ -193,7 +193,7 @@
<li class="page-item">
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
<a class="page-link" href="{{ url_for('main.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
<a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
<span aria-hidden="true">&laquo;</span>
</a>
</li>
@ -208,7 +208,7 @@
<li class="page-item {% if page_num == pagination.page %}active{% endif %}">
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
<a class="page-link" href="{{ url_for('main.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
<a class="page-link" href="{{ url_for('papers.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
</li>
{% else %}
<li class="page-item disabled"><span class="page-link"></span></li>
@ -219,7 +219,7 @@
<li class="page-item">
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
<a class="page-link" href="{{ url_for('main.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
<a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
<span aria-hidden="true">&raquo;</span>
</a>
</li>

View File

@ -120,7 +120,7 @@
<div class="d-flex align-items-center mb-3">
<form
method="POST"
action="{{ url_for('main.schedule') }}"
action="{{ url_for('schedule.schedule') }}"
class="input-group w-50"
>
<label class="input-group-text">Papers per day:</label>
@ -139,7 +139,7 @@
</div>
<h2 class="mt-4">Current Schedule</h2>
<form method="POST" action="{{ url_for('main.schedule') }}">
<form method="POST" action="{{ url_for('schedule.schedule') }}">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div

View File

@ -27,7 +27,7 @@
{% endfor %}
</tbody>
</table>
<a href="{{ url_for('main.download_error_log') }}" class="btn btn-outline-secondary">Download Full Error Log</a>
<a href="{{ url_for('upload.download_error_log') }}" class="btn btn-outline-secondary">Download Full Error Log</a>
</div>
{% endif %}
@ -43,7 +43,7 @@
<p>The format of your CSV should resemble the response structure of the Crossref API's <code>/journals/{issn}/works</code> endpoint.</p>
</div>
<form method="POST" action="{{ url_for('main.upload') }}" enctype="multipart/form-data">
<form method="POST" action="{{ url_for('upload.upload') }}" enctype="multipart/form-data">
<div class="mb-3">
<label class="form-label">How to handle duplicate DOIs:</label>
<div class="form-check">

View File

@ -1,389 +0,0 @@
import codecs
import csv
import datetime
import io
from io import StringIO
import pandas as pd
from flask import (
Blueprint,
current_app,
flash,
redirect,
render_template,
request,
send_file,
session, # Add this line
url_for,
)
from sqlalchemy import asc, desc
from .db import db
from .models import PaperMetadata, ScheduleConfig, VolumeConfig
bp = Blueprint("main", __name__)
@bp.route("/")
def index():
return render_template("index.html")
REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
@bp.route("/upload", methods=["GET", "POST"])
def upload():
if request.method == "POST":
file = request.files.get("file")
delimiter = request.form.get("delimiter", ",")
duplicate_strategy = request.form.get("duplicate_strategy", "skip")
if not file:
return render_template("upload.html", error="No file selected.")
try:
stream = codecs.iterdecode(file.stream, "utf-8")
content = "".join(stream)
df = pd.read_csv(StringIO(content), delimiter=delimiter)
except Exception as e:
return render_template("upload.html", error=f"Failed to read CSV file: {e}")
missing = REQUIRED_COLUMNS - set(df.columns)
if missing:
return render_template(
"upload.html", error=f"Missing required columns: {', '.join(missing)}"
)
# Optional: parse 'published_online' to date
def parse_date(val):
if pd.isna(val):
return None
try:
return pd.to_datetime(val).date()
except Exception:
return None
# Count statistics
added_count = 0
skipped_count = 0
updated_count = 0
error_count = 0
# Collect error information
errors = []
# Process each row
for index, row in df.iterrows():
try:
# Get DOI from row for error reporting
doi = str(row.get("doi", "N/A"))
# Validate required fields
for field in ["title", "doi", "issn"]:
if pd.isna(row.get(field)) or not str(row.get(field)).strip():
raise ValueError(f"Missing required field: {field}")
# Check if paper with this DOI already exists
existing = PaperMetadata.query.filter_by(doi=doi).first()
if existing:
if duplicate_strategy == 'update':
# Update existing record
existing.title = row["title"]
existing.alt_id = row.get("alternative_id")
existing.issn = row["issn"]
existing.journal = row.get("journal")
existing.type = row.get("type")
existing.language = row.get("language")
existing.published_online = parse_date(row.get("published_online"))
updated_count += 1
else:
# Skip this record
skipped_count += 1
continue
else:
# Create new record
metadata = PaperMetadata(
title=row["title"],
doi=doi,
alt_id=row.get("alternative_id"),
issn=row["issn"],
journal=row.get("journal"),
type=row.get("type"),
language=row.get("language"),
published_online=parse_date(row.get("published_online")),
status="New",
file_path=None,
error_msg=None,
)
db.session.add(metadata)
added_count += 1
except Exception as e:
error_count += 1
errors.append({
"row": index + 2, # +2 because index is 0-based and we have a header row
"doi": row.get("doi", "N/A"),
"error": str(e)
})
continue # Skip this row and continue with the next
try:
db.session.commit()
except Exception as e:
db.session.rollback()
return render_template(
"upload.html", error=f"Failed to save data to database: {e}"
)
# Prepare error samples for display
error_samples = errors[:5] if errors else []
error_message = None
if errors:
error_message = f"Encountered {len(errors)} errors. First 5 shown below."
# Store the full errors list in the session for potential download
if errors:
error_csv = StringIO()
writer = csv.DictWriter(error_csv, fieldnames=["row", "doi", "error"])
writer.writeheader()
writer.writerows(errors)
session["error_data"] = error_csv.getvalue()
return render_template(
"upload.html",
success=f"File processed! Added: {added_count}, Updated: {updated_count}, Skipped: {skipped_count}, Errors: {error_count}",
error_message=error_message,
error_samples=error_samples
)
return render_template("upload.html")
# Add a route to download the error log
@bp.route("/download_error_log")
def download_error_log():
error_data = session.get("error_data")
if not error_data:
flash("No error data available.")
return redirect(url_for("main.upload"))
buffer = StringIO(error_data)
return send_file(
buffer,
mimetype="text/csv",
as_attachment=True,
download_name=f"upload_errors_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
)
@bp.route("/papers")
def list_papers():
page = request.args.get("page", 1, type=int)
per_page = 50
# Filters
status = request.args.get("status")
created_from = request.args.get("created_from")
created_to = request.args.get("created_to")
updated_from = request.args.get("updated_from")
updated_to = request.args.get("updated_to")
sort_by = request.args.get("sort_by", "created_at")
sort_dir = request.args.get("sort_dir", "desc")
query = PaperMetadata.query
# Apply filters
if status:
query = query.filter(PaperMetadata.status == status)
def parse_date(val):
from datetime import datetime
try:
return datetime.strptime(val, "%Y-%m-%d")
except (ValueError, TypeError):
return None
if created_from := parse_date(created_from):
query = query.filter(PaperMetadata.created_at >= created_from)
if created_to := parse_date(created_to):
query = query.filter(PaperMetadata.created_at <= created_to)
if updated_from := parse_date(updated_from):
query = query.filter(PaperMetadata.updated_at >= updated_from)
if updated_to := parse_date(updated_to):
query = query.filter(PaperMetadata.updated_at <= updated_to)
# Sorting
sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at)
sort_func = desc if sort_dir == "desc" else asc
query = query.order_by(sort_func(sort_col))
# Pagination
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
# Statistics
total_papers = PaperMetadata.query.count()
status_counts = (
db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status))
.group_by(PaperMetadata.status)
.all()
)
status_counts = {status: count for status, count in status_counts}
return render_template(
"papers.html",
papers=pagination.items,
pagination=pagination,
total_papers=total_papers,
status_counts=status_counts,
sort_by=sort_by,
sort_dir=sort_dir,
)
@bp.route("/papers/export")
def export_papers():
query = PaperMetadata.query
# Filters
status = request.args.get("status")
created_from = request.args.get("created_from")
created_to = request.args.get("created_to")
updated_from = request.args.get("updated_from")
updated_to = request.args.get("updated_to")
sort_by = request.args.get("sort_by", "created_at")
sort_dir = request.args.get("sort_dir", "desc")
query = PaperMetadata.query
# Apply filters
if status:
query = query.filter(PaperMetadata.status == status)
def parse_date(val):
try:
return datetime.datetime.strptime(val, "%Y-%m-%d")
except Exception:
return None
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(
["ID", "Title", "Journal", "DOI", "ISSN", "Status", "Created At", "Updated At"]
)
for paper in query:
writer.writerow(
[
paper.id,
paper.title,
getattr(paper, "journal", ""),
paper.doi,
paper.issn,
paper.status,
paper.created_at,
paper.updated_at,
]
)
output.seek(0)
return send_file(
io.BytesIO(output.read().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name="papers.csv",
)
@bp.route("/papers/<int:paper_id>/detail")
def paper_detail(paper_id):
paper = PaperMetadata.query.get_or_404(paper_id)
return render_template("partials/paper_detail_modal.html", paper=paper)
@bp.route("/schedule", methods=["GET", "POST"])
def schedule():
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if "total_volume" in request.form:
# Volume update
try:
new_volume = float(request.form.get("total_volume", 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
else:
# Schedule update logic
# Validate form data
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5"
)
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error")
schedule = {
sc.hour: sc.weight
for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
}
volume = VolumeConfig.query.first()
return render_template(
"schedule.html",
schedule=schedule,
volume=volume.volume,
app_title="PaperScraper",
)
@bp.route("/logs")
def logs():
return render_template("logs.html", app_title="PaperScraper")
@bp.route("/about")
def about():
return render_template("about.html", app_title="PaperScraper")