diff --git a/scipaperloader/__init__.py b/scipaperloader/__init__.py
index 534e9c4..8fbdff7 100644
--- a/scipaperloader/__init__.py
+++ b/scipaperloader/__init__.py
@@ -3,6 +3,7 @@ from flask import Flask
from .config import Config
from .db import db
from .models import init_schedule_config
+from .blueprints import register_blueprints
def create_app(test_config=None):
@@ -22,8 +23,6 @@ def create_app(test_config=None):
def inject_app_title():
return {"app_title": app.config["APP_TITLE"]}
- from . import views
-
- app.register_blueprint(views.bp)
+ register_blueprints(app)
return app
diff --git a/scipaperloader/blueprints/__init__.py b/scipaperloader/blueprints/__init__.py
new file mode 100644
index 0000000..c3b1eac
--- /dev/null
+++ b/scipaperloader/blueprints/__init__.py
@@ -0,0 +1,15 @@
+"""Blueprint registration module."""
+from flask import Flask
+
+from .main import bp as main_bp
+from .papers import bp as papers_bp
+from .upload import bp as upload_bp
+from .schedule import bp as schedule_bp
+
+
+def register_blueprints(app: Flask):
+ """Register all blueprints with the Flask application."""
+ app.register_blueprint(main_bp)
+ app.register_blueprint(papers_bp, url_prefix='/papers')
+ app.register_blueprint(upload_bp, url_prefix='/upload')
+ app.register_blueprint(schedule_bp, url_prefix='/schedule')
\ No newline at end of file
diff --git a/scipaperloader/blueprints/main.py b/scipaperloader/blueprints/main.py
new file mode 100644
index 0000000..be4cd58
--- /dev/null
+++ b/scipaperloader/blueprints/main.py
@@ -0,0 +1,19 @@
+"""Main routes for the application."""
+from flask import Blueprint, render_template
+
+bp = Blueprint("main", __name__)
+
+
+@bp.route("/")
+def index():
+ return render_template("index.html")
+
+
+@bp.route("/logs")
+def logs():
+ return render_template("logs.html", app_title="PaperScraper")
+
+
+@bp.route("/about")
+def about():
+ return render_template("about.html", app_title="PaperScraper")
\ No newline at end of file
diff --git a/scipaperloader/blueprints/papers.py b/scipaperloader/blueprints/papers.py
new file mode 100644
index 0000000..890271e
--- /dev/null
+++ b/scipaperloader/blueprints/papers.py
@@ -0,0 +1,140 @@
+"""Paper management routes."""
+import csv
+import datetime
+import io
+
+from flask import (
+ Blueprint,
+ render_template,
+ request,
+ send_file,
+)
+from sqlalchemy import asc, desc
+
+from ..db import db
+from ..models import PaperMetadata
+
+bp = Blueprint("papers", __name__)
+
+
+@bp.route("/")
+def list_papers():
+ page = request.args.get("page", 1, type=int)
+ per_page = 50
+
+ # Filters
+ status = request.args.get("status")
+ created_from = request.args.get("created_from")
+ created_to = request.args.get("created_to")
+ updated_from = request.args.get("updated_from")
+ updated_to = request.args.get("updated_to")
+ sort_by = request.args.get("sort_by", "created_at")
+ sort_dir = request.args.get("sort_dir", "desc")
+
+ query = PaperMetadata.query
+
+ # Apply filters
+ if status:
+ query = query.filter(PaperMetadata.status == status)
+
+ def parse_date(val):
+ from datetime import datetime
+
+ try:
+ return datetime.strptime(val, "%Y-%m-%d")
+ except (ValueError, TypeError):
+ return None
+
+ if created_from := parse_date(created_from):
+ query = query.filter(PaperMetadata.created_at >= created_from)
+ if created_to := parse_date(created_to):
+ query = query.filter(PaperMetadata.created_at <= created_to)
+ if updated_from := parse_date(updated_from):
+ query = query.filter(PaperMetadata.updated_at >= updated_from)
+ if updated_to := parse_date(updated_to):
+ query = query.filter(PaperMetadata.updated_at <= updated_to)
+
+ # Sorting
+ sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at)
+ sort_func = desc if sort_dir == "desc" else asc
+ query = query.order_by(sort_func(sort_col))
+
+ # Pagination
+ pagination = query.paginate(page=page, per_page=per_page, error_out=False)
+
+ # Statistics
+ total_papers = PaperMetadata.query.count()
+ status_counts = (
+ db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status))
+ .group_by(PaperMetadata.status)
+ .all()
+ )
+ status_counts = {status: count for status, count in status_counts}
+
+ return render_template(
+ "papers.html",
+ papers=pagination.items,
+ pagination=pagination,
+ total_papers=total_papers,
+ status_counts=status_counts,
+ sort_by=sort_by,
+ sort_dir=sort_dir,
+ )
+
+
+@bp.route("/export")
+def export_papers():
+ # Filters
+ status = request.args.get("status")
+ created_from = request.args.get("created_from")
+ created_to = request.args.get("created_to")
+ updated_from = request.args.get("updated_from")
+ updated_to = request.args.get("updated_to")
+ sort_by = request.args.get("sort_by", "created_at")
+ sort_dir = request.args.get("sort_dir", "desc")
+
+ query = PaperMetadata.query
+
+ # Apply filters
+ if status:
+ query = query.filter(PaperMetadata.status == status)
+
+ def parse_date(val):
+ try:
+ return datetime.datetime.strptime(val, "%Y-%m-%d")
+ except Exception:
+ return None
+
+ output = io.StringIO()
+ writer = csv.writer(output)
+ writer.writerow(
+ ["ID", "Title", "Journal", "DOI", "ISSN", "Status", "Created At", "Updated At"]
+ )
+
+ for paper in query:
+ writer.writerow(
+ [
+ paper.id,
+ paper.title,
+ getattr(paper, "journal", ""),
+ paper.doi,
+ paper.issn,
+ paper.status,
+ paper.created_at,
+ paper.updated_at,
+ ]
+ )
+
+ output.seek(0)
+ return send_file(
+ io.BytesIO(output.read().encode("utf-8")),
+ mimetype="text/csv",
+ as_attachment=True,
+ download_name="papers.csv",
+ )
+
+
+@bp.route("//detail")
+def paper_detail(paper_id):
+ paper = PaperMetadata.query.get_or_404(paper_id)
+ return render_template("partials/paper_detail_modal.html", paper=paper)
\ No newline at end of file
diff --git a/scipaperloader/blueprints/schedule.py b/scipaperloader/blueprints/schedule.py
new file mode 100644
index 0000000..7d4f878
--- /dev/null
+++ b/scipaperloader/blueprints/schedule.py
@@ -0,0 +1,79 @@
+"""Schedule configuration routes."""
+from flask import Blueprint, flash, render_template, request
+
+from ..db import db
+from ..models import ScheduleConfig, VolumeConfig
+
+bp = Blueprint("schedule", __name__)
+
+
+@bp.route("/", methods=["GET", "POST"])
+def schedule():
+ if request.method == "POST":
+ try:
+ # Check if we're updating volume or schedule
+ if "total_volume" in request.form:
+ # Volume update
+ try:
+ new_volume = float(request.form.get("total_volume", 0))
+ if new_volume <= 0 or new_volume > 1000:
+ raise ValueError("Volume must be between 1 and 1000")
+
+ volume_config = VolumeConfig.query.first()
+ if not volume_config:
+ volume_config = VolumeConfig(volume=new_volume)
+ db.session.add(volume_config)
+ else:
+ volume_config.volume = new_volume
+
+ db.session.commit()
+ flash("Volume updated successfully!", "success")
+
+ except ValueError as e:
+ db.session.rollback()
+ flash(f"Error updating volume: {str(e)}", "error")
+ else:
+ # Schedule update logic
+ # Validate form data
+ for hour in range(24):
+ key = f"hour_{hour}"
+ if key not in request.form:
+ raise ValueError(f"Missing data for hour {hour}")
+
+ try:
+ weight = float(request.form.get(key, 0))
+ if weight < 0 or weight > 5:
+ raise ValueError(
+ f"Weight for hour {hour} must be between 0 and 5"
+ )
+ except ValueError:
+ raise ValueError(f"Invalid weight value for hour {hour}")
+
+ # Update database if validation passes
+ for hour in range(24):
+ key = f"hour_{hour}"
+ weight = float(request.form.get(key, 0))
+ config = ScheduleConfig.query.get(hour)
+ if config:
+ config.weight = weight
+ else:
+ db.session.add(ScheduleConfig(hour=hour, weight=weight))
+
+ db.session.commit()
+ flash("Schedule updated successfully!", "success")
+
+ except ValueError as e:
+ db.session.rollback()
+ flash(f"Error updating schedule: {str(e)}", "error")
+
+ schedule = {
+ sc.hour: sc.weight
+ for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
+ }
+ volume = VolumeConfig.query.first()
+ return render_template(
+ "schedule.html",
+ schedule=schedule,
+ volume=volume.volume if volume else 0,
+ app_title="PaperScraper",
+ )
\ No newline at end of file
diff --git a/scipaperloader/blueprints/upload.py b/scipaperloader/blueprints/upload.py
new file mode 100644
index 0000000..07bc7d1
--- /dev/null
+++ b/scipaperloader/blueprints/upload.py
@@ -0,0 +1,170 @@
+"""Upload functionality for paper metadata."""
+import codecs
+import csv
+import datetime
+from io import StringIO
+
+import pandas as pd
+from flask import (
+ Blueprint,
+ flash,
+ redirect,
+ render_template,
+ request,
+ send_file,
+ session,
+ url_for,
+)
+
+from ..db import db
+from ..models import PaperMetadata
+
+bp = Blueprint("upload", __name__)
+
+REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
+
+
+@bp.route("/", methods=["GET", "POST"])
+def upload():
+ if request.method == "POST":
+ file = request.files.get("file")
+ delimiter = request.form.get("delimiter", ",")
+ duplicate_strategy = request.form.get("duplicate_strategy", "skip")
+
+ if not file:
+ return render_template("upload.html", error="No file selected.")
+
+ try:
+ stream = codecs.iterdecode(file.stream, "utf-8")
+ content = "".join(stream)
+ df = pd.read_csv(StringIO(content), delimiter=delimiter)
+ except Exception as e:
+ return render_template("upload.html", error=f"Failed to read CSV file: {e}")
+
+ missing = REQUIRED_COLUMNS - set(df.columns)
+ if missing:
+ return render_template(
+ "upload.html", error=f"Missing required columns: {', '.join(missing)}"
+ )
+
+ # Optional: parse 'published_online' to date
+ def parse_date(val):
+ if pd.isna(val):
+ return None
+ try:
+ return pd.to_datetime(val).date()
+ except Exception:
+ return None
+
+ # Count statistics
+ added_count = 0
+ skipped_count = 0
+ updated_count = 0
+ error_count = 0
+
+ # Collect error information
+ errors = []
+
+ # Process each row
+ for index, row in df.iterrows():
+ try:
+ # Get DOI from row for error reporting
+ doi = str(row.get("doi", "N/A"))
+
+ # Validate required fields
+ for field in ["title", "doi", "issn"]:
+ if pd.isna(row.get(field)) or not str(row.get(field)).strip():
+ raise ValueError(f"Missing required field: {field}")
+
+ # Check if paper with this DOI already exists
+ existing = PaperMetadata.query.filter_by(doi=doi).first()
+
+ if existing:
+ if duplicate_strategy == 'update':
+ # Update existing record
+ existing.title = row["title"]
+ existing.alt_id = row.get("alternative_id")
+ existing.issn = row["issn"]
+ existing.journal = row.get("journal")
+ existing.type = row.get("type")
+ existing.language = row.get("language")
+ existing.published_online = parse_date(row.get("published_online"))
+ updated_count += 1
+ else:
+ # Skip this record
+ skipped_count += 1
+ continue
+ else:
+ # Create new record
+ metadata = PaperMetadata(
+ title=row["title"],
+ doi=doi,
+ alt_id=row.get("alternative_id"),
+ issn=row["issn"],
+ journal=row.get("journal"),
+ type=row.get("type"),
+ language=row.get("language"),
+ published_online=parse_date(row.get("published_online")),
+ status="New",
+ file_path=None,
+ error_msg=None,
+ )
+ db.session.add(metadata)
+ added_count += 1
+
+ except Exception as e:
+ error_count += 1
+ errors.append({
+ "row": index + 2, # +2 because index is 0-based and we have a header row
+ "doi": row.get("doi", "N/A"),
+ "error": str(e)
+ })
+ continue # Skip this row and continue with the next
+
+ try:
+ db.session.commit()
+ except Exception as e:
+ db.session.rollback()
+ return render_template(
+ "upload.html", error=f"Failed to save data to database: {e}"
+ )
+
+ # Prepare error samples for display
+ error_samples = errors[:5] if errors else []
+
+ error_message = None
+ if errors:
+ error_message = f"Encountered {len(errors)} errors. First 5 shown below."
+
+ # Store the full errors list in the session for potential download
+ if errors:
+ error_csv = StringIO()
+ writer = csv.DictWriter(error_csv, fieldnames=["row", "doi", "error"])
+ writer.writeheader()
+ writer.writerows(errors)
+ session["error_data"] = error_csv.getvalue()
+
+ return render_template(
+ "upload.html",
+ success=f"File processed! Added: {added_count}, Updated: {updated_count}, Skipped: {skipped_count}, Errors: {error_count}",
+ error_message=error_message,
+ error_samples=error_samples
+ )
+
+ return render_template("upload.html")
+
+
+@bp.route("/download_error_log")
+def download_error_log():
+ error_data = session.get("error_data")
+ if not error_data:
+ flash("No error data available.")
+ return redirect(url_for("upload.upload"))
+
+ buffer = StringIO(error_data)
+ return send_file(
+ buffer,
+ mimetype="text/csv",
+ as_attachment=True,
+ download_name=f"upload_errors_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+ )
\ No newline at end of file
diff --git a/scipaperloader/templates/index.html b/scipaperloader/templates/index.html
index 252fb0f..e9d28a6 100644
--- a/scipaperloader/templates/index.html
+++ b/scipaperloader/templates/index.html
@@ -16,7 +16,7 @@
(title, DOI, ISSN, etc.) are stored. Errors are reported without
aborting the batch.
- Upload Now
+ Upload Now
@@ -43,7 +43,7 @@
inspect errors. Files are stored on disk in structured folders per
DOI.
- Browse Papers
@@ -59,7 +59,7 @@
volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or
usage pattern.
- Adjust Schedule
diff --git a/scipaperloader/templates/nav.html b/scipaperloader/templates/nav.html
index f1924c2..cf96974 100644
--- a/scipaperloader/templates/nav.html
+++ b/scipaperloader/templates/nav.html
@@ -17,13 +17,13 @@
@@ -137,44 +137,44 @@
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'title', 'sort_dir': title_sort}) or params %}
- Title
+ Title
|
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'journal', 'sort_dir': journal_sort}) or params %}
- Journal
+ Journal
|
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'doi', 'sort_dir': doi_sort}) or params %}
- DOI
+ DOI
|
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'issn', 'sort_dir': issn_sort}) or params %}
- ISSN
+ ISSN
|
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'status', 'sort_dir': status_sort}) or params %}
- Status
+ Status
|
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'created_at', 'sort_dir': created_sort}) or params %}
- Created
+ Created
|
{% set params = request.args.to_dict() %}
{% set params = params.update({'sort_by': 'updated_at', 'sort_dir': updated_sort}) or params %}
- Updated
+ Updated
|
{% for paper in papers %}
- {{ paper.title }} |
+ {{ paper.title }} |
{{ paper.journal }} |
{{ paper.doi }} |
{{ paper.issn }} |
@@ -193,7 +193,7 @@
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
-
+
«
@@ -208,7 +208,7 @@
{% else %}
…
@@ -219,7 +219,7 @@
{% set params = request.args.to_dict() %}
{% set _ = params.pop('page', None) %}
-
+
»
diff --git a/scipaperloader/templates/schedule.html b/scipaperloader/templates/schedule.html
index caea561..f60cff8 100644
--- a/scipaperloader/templates/schedule.html
+++ b/scipaperloader/templates/schedule.html
@@ -120,7 +120,7 @@
Current Schedule
-
- Download Full Error Log
+ Download Full Error Log
{% endif %}
@@ -43,7 +43,7 @@
The format of your CSV should resemble the response structure of the Crossref API's /journals/{issn}/works
endpoint.
-