refactors views to proper use: multiple files in blueprint folder & correct url handling using those blueprints

2025-04-11 13:47:54 +02:00 · 2025-04-11 13:47:54 +02:00 · 1a1de21759
commit 1a1de21759
parent 7ff29ee2cd
12 changed files with 450 additions and 417 deletions
--- a/scipaperloader/init.py
+++ b/scipaperloader/init.py
@ -3,6 +3,7 @@ from flask import Flask
 from .config import Config
 from .db import db
 from .models import init_schedule_config
+from .blueprints import register_blueprints


 def create_app(test_config=None):
@ -22,8 +23,6 @@ def create_app(test_config=None):
    def inject_app_title():
        return {"app_title": app.config["APP_TITLE"]}

-    from . import views
-
-    app.register_blueprint(views.bp)
+    register_blueprints(app)

    return app
--- a/scipaperloader/blueprints/init.py
+++ b/scipaperloader/blueprints/init.py
@ -0,0 +1,15 @@
+"""Blueprint registration module."""
+from flask import Flask
+
+from .main import bp as main_bp
+from .papers import bp as papers_bp
+from .upload import bp as upload_bp
+from .schedule import bp as schedule_bp
+
+
+def register_blueprints(app: Flask):
+    """Register all blueprints with the Flask application."""
+    app.register_blueprint(main_bp)
+    app.register_blueprint(papers_bp, url_prefix='/papers')
+    app.register_blueprint(upload_bp, url_prefix='/upload')
+    app.register_blueprint(schedule_bp, url_prefix='/schedule')
--- a/scipaperloader/blueprints/main.py
+++ b/scipaperloader/blueprints/main.py
@ -0,0 +1,19 @@
+"""Main routes for the application."""
+from flask import Blueprint, render_template
+
+bp = Blueprint("main", __name__)
+
+
+@bp.route("/")
+def index():
+    return render_template("index.html")
+
+
+@bp.route("/logs")
+def logs():
+    return render_template("logs.html", app_title="PaperScraper")
+
+
+@bp.route("/about")
+def about():
+    return render_template("about.html", app_title="PaperScraper")
--- a/scipaperloader/blueprints/papers.py
+++ b/scipaperloader/blueprints/papers.py
@ -0,0 +1,140 @@
+"""Paper management routes."""
+import csv
+import datetime
+import io
+
+from flask import (
+    Blueprint,
+    render_template,
+    request,
+    send_file,
+)
+from sqlalchemy import asc, desc
+
+from ..db import db
+from ..models import PaperMetadata
+
+bp = Blueprint("papers", __name__)
+
+
+@bp.route("/")
+def list_papers():
+    page = request.args.get("page", 1, type=int)
+    per_page = 50
+
+    # Filters
+    status = request.args.get("status")
+    created_from = request.args.get("created_from")
+    created_to = request.args.get("created_to")
+    updated_from = request.args.get("updated_from")
+    updated_to = request.args.get("updated_to")
+    sort_by = request.args.get("sort_by", "created_at")
+    sort_dir = request.args.get("sort_dir", "desc")
+
+    query = PaperMetadata.query
+
+    # Apply filters
+    if status:
+        query = query.filter(PaperMetadata.status == status)
+
+    def parse_date(val):
+        from datetime import datetime
+
+        try:
+            return datetime.strptime(val, "%Y-%m-%d")
+        except (ValueError, TypeError):
+            return None
+
+    if created_from := parse_date(created_from):
+        query = query.filter(PaperMetadata.created_at >= created_from)
+    if created_to := parse_date(created_to):
+        query = query.filter(PaperMetadata.created_at <= created_to)
+    if updated_from := parse_date(updated_from):
+        query = query.filter(PaperMetadata.updated_at >= updated_from)
+    if updated_to := parse_date(updated_to):
+        query = query.filter(PaperMetadata.updated_at <= updated_to)
+
+    # Sorting
+    sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at)
+    sort_func = desc if sort_dir == "desc" else asc
+    query = query.order_by(sort_func(sort_col))
+
+    # Pagination
+    pagination = query.paginate(page=page, per_page=per_page, error_out=False)
+
+    # Statistics
+    total_papers = PaperMetadata.query.count()
+    status_counts = (
+        db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status))
+        .group_by(PaperMetadata.status)
+        .all()
+    )
+    status_counts = {status: count for status, count in status_counts}
+
+    return render_template(
+        "papers.html",
+        papers=pagination.items,
+        pagination=pagination,
+        total_papers=total_papers,
+        status_counts=status_counts,
+        sort_by=sort_by,
+        sort_dir=sort_dir,
+    )
+
+
+@bp.route("/export")
+def export_papers():
+    # Filters
+    status = request.args.get("status")
+    created_from = request.args.get("created_from")
+    created_to = request.args.get("created_to")
+    updated_from = request.args.get("updated_from")
+    updated_to = request.args.get("updated_to")
+    sort_by = request.args.get("sort_by", "created_at")
+    sort_dir = request.args.get("sort_dir", "desc")
+
+    query = PaperMetadata.query
+
+    # Apply filters
+    if status:
+        query = query.filter(PaperMetadata.status == status)
+
+    def parse_date(val):
+        try:
+            return datetime.datetime.strptime(val, "%Y-%m-%d")
+        except Exception:
+            return None
+
+    output = io.StringIO()
+    writer = csv.writer(output)
+    writer.writerow(
+        ["ID", "Title", "Journal", "DOI", "ISSN", "Status", "Created At", "Updated At"]
+    )
+
+    for paper in query:
+        writer.writerow(
+            [
+                paper.id,
+                paper.title,
+                getattr(paper, "journal", ""),
+                paper.doi,
+                paper.issn,
+                paper.status,
+                paper.created_at,
+                paper.updated_at,
+            ]
+        )
+
+    output.seek(0)
+    return send_file(
+        io.BytesIO(output.read().encode("utf-8")),
+        mimetype="text/csv",
+        as_attachment=True,
+        download_name="papers.csv",
+    )
+
+
+@bp.route("/<int:paper_id>/detail")
+def paper_detail(paper_id):
+    paper = PaperMetadata.query.get_or_404(paper_id)
+    return render_template("partials/paper_detail_modal.html", paper=paper)
--- a/scipaperloader/blueprints/schedule.py
+++ b/scipaperloader/blueprints/schedule.py
@ -0,0 +1,79 @@
+"""Schedule configuration routes."""
+from flask import Blueprint, flash, render_template, request
+
+from ..db import db
+from ..models import ScheduleConfig, VolumeConfig
+
+bp = Blueprint("schedule", __name__)
+
+
+@bp.route("/", methods=["GET", "POST"])
+def schedule():
+    if request.method == "POST":
+        try:
+            # Check if we're updating volume or schedule
+            if "total_volume" in request.form:
+                # Volume update
+                try:
+                    new_volume = float(request.form.get("total_volume", 0))
+                    if new_volume <= 0 or new_volume > 1000:
+                        raise ValueError("Volume must be between 1 and 1000")
+
+                    volume_config = VolumeConfig.query.first()
+                    if not volume_config:
+                        volume_config = VolumeConfig(volume=new_volume)
+                        db.session.add(volume_config)
+                    else:
+                        volume_config.volume = new_volume
+
+                    db.session.commit()
+                    flash("Volume updated successfully!", "success")
+
+                except ValueError as e:
+                    db.session.rollback()
+                    flash(f"Error updating volume: {str(e)}", "error")
+            else:
+                # Schedule update logic
+                # Validate form data
+                for hour in range(24):
+                    key = f"hour_{hour}"
+                    if key not in request.form:
+                        raise ValueError(f"Missing data for hour {hour}")
+
+                    try:
+                        weight = float(request.form.get(key, 0))
+                        if weight < 0 or weight > 5:
+                            raise ValueError(
+                                f"Weight for hour {hour} must be between 0 and 5"
+                            )
+                    except ValueError:
+                        raise ValueError(f"Invalid weight value for hour {hour}")
+
+                # Update database if validation passes
+                for hour in range(24):
+                    key = f"hour_{hour}"
+                    weight = float(request.form.get(key, 0))
+                    config = ScheduleConfig.query.get(hour)
+                    if config:
+                        config.weight = weight
+                    else:
+                        db.session.add(ScheduleConfig(hour=hour, weight=weight))
+
+                db.session.commit()
+                flash("Schedule updated successfully!", "success")
+
+        except ValueError as e:
+            db.session.rollback()
+            flash(f"Error updating schedule: {str(e)}", "error")
+
+    schedule = {
+        sc.hour: sc.weight
+        for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
+    }
+    volume = VolumeConfig.query.first()
+    return render_template(
+        "schedule.html",
+        schedule=schedule,
+        volume=volume.volume if volume else 0,
+        app_title="PaperScraper",
+    )
--- a/scipaperloader/blueprints/upload.py
+++ b/scipaperloader/blueprints/upload.py
@ -0,0 +1,170 @@
+"""Upload functionality for paper metadata."""
+import codecs
+import csv
+import datetime
+from io import StringIO
+
+import pandas as pd
+from flask import (
+    Blueprint,
+    flash,
+    redirect,
+    render_template,
+    request,
+    send_file,
+    session,
+    url_for,
+)
+
+from ..db import db
+from ..models import PaperMetadata
+
+bp = Blueprint("upload", __name__)
+
+REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
+
+
+@bp.route("/", methods=["GET", "POST"])
+def upload():
+    if request.method == "POST":
+        file = request.files.get("file")
+        delimiter = request.form.get("delimiter", ",")
+        duplicate_strategy = request.form.get("duplicate_strategy", "skip")
+
+        if not file:
+            return render_template("upload.html", error="No file selected.")
+
+        try:
+            stream = codecs.iterdecode(file.stream, "utf-8")
+            content = "".join(stream)
+            df = pd.read_csv(StringIO(content), delimiter=delimiter)
+        except Exception as e:
+            return render_template("upload.html", error=f"Failed to read CSV file: {e}")
+
+        missing = REQUIRED_COLUMNS - set(df.columns)
+        if missing:
+            return render_template(
+                "upload.html", error=f"Missing required columns: {', '.join(missing)}"
+            )
+
+        # Optional: parse 'published_online' to date
+        def parse_date(val):
+            if pd.isna(val):
+                return None
+            try:
+                return pd.to_datetime(val).date()
+            except Exception:
+                return None
+
+        # Count statistics
+        added_count = 0
+        skipped_count = 0
+        updated_count = 0
+        error_count = 0
+        
+        # Collect error information
+        errors = []
+        
+        # Process each row
+        for index, row in df.iterrows():
+            try:
+                # Get DOI from row for error reporting
+                doi = str(row.get("doi", "N/A"))
+                
+                # Validate required fields
+                for field in ["title", "doi", "issn"]:
+                    if pd.isna(row.get(field)) or not str(row.get(field)).strip():
+                        raise ValueError(f"Missing required field: {field}")
+                
+                # Check if paper with this DOI already exists
+                existing = PaperMetadata.query.filter_by(doi=doi).first()
+                
+                if existing:
+                    if duplicate_strategy == 'update':
+                        # Update existing record
+                        existing.title = row["title"]
+                        existing.alt_id = row.get("alternative_id")
+                        existing.issn = row["issn"]
+                        existing.journal = row.get("journal")
+                        existing.type = row.get("type")
+                        existing.language = row.get("language")
+                        existing.published_online = parse_date(row.get("published_online"))
+                        updated_count += 1
+                    else:
+                        # Skip this record
+                        skipped_count += 1
+                        continue
+                else:
+                    # Create new record
+                    metadata = PaperMetadata(
+                        title=row["title"],
+                        doi=doi,
+                        alt_id=row.get("alternative_id"),
+                        issn=row["issn"],
+                        journal=row.get("journal"),
+                        type=row.get("type"),
+                        language=row.get("language"),
+                        published_online=parse_date(row.get("published_online")),
+                        status="New",
+                        file_path=None,
+                        error_msg=None,
+                    )
+                    db.session.add(metadata)
+                    added_count += 1
+                    
+            except Exception as e:
+                error_count += 1
+                errors.append({
+                    "row": index + 2,  # +2 because index is 0-based and we have a header row
+                    "doi": row.get("doi", "N/A"),
+                    "error": str(e)
+                })
+                continue  # Skip this row and continue with the next
+
+        try:
+            db.session.commit()
+        except Exception as e:
+            db.session.rollback()
+            return render_template(
+                "upload.html", error=f"Failed to save data to database: {e}"
+            )
+
+        # Prepare error samples for display
+        error_samples = errors[:5] if errors else []
+        
+        error_message = None
+        if errors:
+            error_message = f"Encountered {len(errors)} errors. First 5 shown below."
+            
+        # Store the full errors list in the session for potential download
+        if errors:
+            error_csv = StringIO()
+            writer = csv.DictWriter(error_csv, fieldnames=["row", "doi", "error"])
+            writer.writeheader()
+            writer.writerows(errors)
+            session["error_data"] = error_csv.getvalue()
+
+        return render_template(
+            "upload.html", 
+            success=f"File processed! Added: {added_count}, Updated: {updated_count}, Skipped: {skipped_count}, Errors: {error_count}",
+            error_message=error_message,
+            error_samples=error_samples
+        )
+
+    return render_template("upload.html")
+
+
+@bp.route("/download_error_log")
+def download_error_log():
+    error_data = session.get("error_data")
+    if not error_data:
+        flash("No error data available.")
+        return redirect(url_for("upload.upload"))
+        
+    buffer = StringIO(error_data)
+    return send_file(
+        buffer,
+        mimetype="text/csv",
+        as_attachment=True,
+        download_name=f"upload_errors_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+    )
--- a/scipaperloader/templates/index.html
+++ b/scipaperloader/templates/index.html
@ -16,7 +16,7 @@
          (title, DOI, ISSN, etc.) are stored. Errors are reported without
          aborting the batch.
        </p>
-        <a href="/import" class="btn btn-sm btn-outline-primary">Upload Now</a>
+        <a href="{{ url_for('upload.upload') }}" class="btn btn-sm btn-outline-primary">Upload Now</a>
      </div>
    </div>
  </div>
@ -43,7 +43,7 @@
          inspect errors. Files are stored on disk in structured folders per
          DOI.
        </p>
-        <a href="/papers" class="btn btn-sm btn-outline-success"
+        <a href="{{ url_for('papers.list_papers') }}" class="btn btn-sm btn-outline-success"
          >Browse Papers</a
        >
      </div>
@ -59,7 +59,7 @@
          volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or
          usage pattern.
        </p>
-        <a href="/schedule" class="btn btn-sm btn-outline-warning"
+        <a href="{{ url_for('schedule.schedule') }}" class="btn btn-sm btn-outline-warning"
          >Adjust Schedule</a
        >
      </div>
--- a/scipaperloader/templates/nav.html
+++ b/scipaperloader/templates/nav.html
@ -17,13 +17,13 @@
    <div class="collapse navbar-collapse" id="navbarSupportedContent">
      <ul class="navbar-nav me-auto mb-2 mb-lg-0">
        <li class="nav-item">
-          <a class="nav-link" href="/upload">Import CSV</a>
+          <a class="nav-link" href="{{ url_for('upload.upload') }}">Import CSV</a>
        </li>
        <li class="nav-item">
-            <a class="nav-link" href="/papers">Papers</a>
+            <a class="nav-link" href="{{ url_for('papers.list_papers') }}">Papers</a>
        </li>
        <li class="nav-item">
-          <a class="nav-link" href="/schedule">Schedule</a>
+          <a class="nav-link" href="{{ url_for('schedule.schedule') }}">Schedule</a>
        </li>
        <li class="nav-item dropdown">
          <a
--- a/scipaperloader/templates/papers.html
+++ b/scipaperloader/templates/papers.html
@ -88,7 +88,7 @@
            <li class="page-item">
                {% set params = request.args.to_dict() %}
                {% set _ = params.pop('page', None) %}
-                <a class="page-link" href="{{ url_for('main.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
+                <a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
                    <span aria-hidden="true">&laquo;</span>
                </a>
            </li>
@ -103,7 +103,7 @@
            <li class="page-item {% if page_num == pagination.page %}active{% endif %}">
                {% set params = request.args.to_dict() %}
                {% set _ = params.pop('page', None) %}
-                <a class="page-link" href="{{ url_for('main.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
+                <a class="page-link" href="{{ url_for('papers.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
            </li>
            {% else %}
            <li class="page-item disabled"><span class="page-link">…</span></li>
@ -114,7 +114,7 @@
            <li class="page-item">
                {% set params = request.args.to_dict() %}
                {% set _ = params.pop('page', None) %}
-                <a class="page-link" href="{{ url_for('main.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
+                <a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
                    <span aria-hidden="true">&raquo;</span>
                </a>
            </li>
@ -128,7 +128,7 @@

    <!-- Buttons Section -->
    <div class="ms-auto">
-        <a href="{{ url_for('main.export_papers') }}" class="btn btn-outline-secondary">Export CSV</a>
+        <a href="{{ url_for('papers.export_papers') }}" class="btn btn-outline-secondary">Export CSV</a>
    </div>
 </div>
    <table class="table table-striped table-bordered table-smaller">
@ -137,44 +137,44 @@
                <th>
                    {% set params = request.args.to_dict() %}
                    {% set params = params.update({'sort_by': 'title', 'sort_dir': title_sort}) or params %}
-                    <a href="{{ url_for('main.list_papers', **params) }}">Title</a>
+                    <a href="{{ url_for('papers.list_papers', **params) }}">Title</a>
                </th>
                <th>
                    {% set params = request.args.to_dict() %}
                    {% set params = params.update({'sort_by': 'journal', 'sort_dir': journal_sort}) or params %}
-                    <a href="{{ url_for('main.list_papers', **params) }}">Journal</a>
+                    <a href="{{ url_for('papers.list_papers', **params) }}">Journal</a>
                </th>
                <th>
                    {% set params = request.args.to_dict() %}
                    {% set params = params.update({'sort_by': 'doi', 'sort_dir': doi_sort}) or params %}
-                    <a href="{{ url_for('main.list_papers', **params) }}">DOI</a>
+                    <a href="{{ url_for('papers.list_papers', **params) }}">DOI</a>
                </th>
                <th>
                    {% set params = request.args.to_dict() %}
                    {% set params = params.update({'sort_by': 'issn', 'sort_dir': issn_sort}) or params %}
-                    <a href="{{ url_for('main.list_papers', **params) }}">ISSN</a>
+                    <a href="{{ url_for('papers.list_papers', **params) }}">ISSN</a>
                </th>
                <th>
                    {% set params = request.args.to_dict() %}
                    {% set params = params.update({'sort_by': 'status', 'sort_dir': status_sort}) or params %}
-                    <a href="{{ url_for('main.list_papers', **params) }}">Status</a>
+                    <a href="{{ url_for('papers.list_papers', **params) }}">Status</a>
                </th>
                <th>
                    {% set params = request.args.to_dict() %}
                    {% set params = params.update({'sort_by': 'created_at', 'sort_dir': created_sort}) or params %}
-                    <a href="{{ url_for('main.list_papers', **params) }}">Created</a>
+                    <a href="{{ url_for('papers.list_papers', **params) }}">Created</a>
                </th>
                <th>
                    {% set params = request.args.to_dict() %}
                    {% set params = params.update({'sort_by': 'updated_at', 'sort_dir': updated_sort}) or params %}
-                    <a href="{{ url_for('main.list_papers', **params) }}">Updated</a>
+                    <a href="{{ url_for('papers.list_papers', **params) }}">Updated</a>
                </th>
            </tr>
        </thead>
        <tbody>
            {% for paper in papers %}
            <tr>
-                <td><a href="#" class="paper-link" data-url="{{ url_for('main.paper_detail', paper_id=paper.id) }}">{{ paper.title }}</a></td>
+                <td><a href="#" class="paper-link" data-url="{{ url_for('papers.paper_detail', paper_id=paper.id) }}">{{ paper.title }}</a></td>
                <td>{{ paper.journal }}</td>
                <td>{{ paper.doi }}</td>
                <td>{{ paper.issn }}</td>
@ -193,7 +193,7 @@
        <li class="page-item">
            {% set params = request.args.to_dict() %}
            {% set _ = params.pop('page', None) %}
-            <a class="page-link" href="{{ url_for('main.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
+            <a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.prev_num, **params) }}" aria-label="Previous">
                <span aria-hidden="true">&laquo;</span>
            </a>
        </li>
@ -208,7 +208,7 @@
        <li class="page-item {% if page_num == pagination.page %}active{% endif %}">
            {% set params = request.args.to_dict() %}
            {% set _ = params.pop('page', None) %}
-            <a class="page-link" href="{{ url_for('main.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
+            <a class="page-link" href="{{ url_for('papers.list_papers', page=page_num, **params) }}">{{ page_num }}</a>
        </li>
        {% else %}
        <li class="page-item disabled"><span class="page-link">…</span></li>
@ -219,7 +219,7 @@
        <li class="page-item">
            {% set params = request.args.to_dict() %}
            {% set _ = params.pop('page', None) %}
-            <a class="page-link" href="{{ url_for('main.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
+            <a class="page-link" href="{{ url_for('papers.list_papers', page=pagination.next_num, **params) }}" aria-label="Next">
                <span aria-hidden="true">&raquo;</span>
            </a>
        </li>
--- a/scipaperloader/templates/schedule.html
+++ b/scipaperloader/templates/schedule.html
@ -120,7 +120,7 @@
    <div class="d-flex align-items-center mb-3">
      <form
        method="POST"
-        action="{{ url_for('main.schedule') }}"
+        action="{{ url_for('schedule.schedule') }}"
        class="input-group w-50"
      >
        <label class="input-group-text">Papers per day:</label>
@ -139,7 +139,7 @@
  </div>

  <h2 class="mt-4">Current Schedule</h2>
-  <form method="POST" action="{{ url_for('main.schedule') }}">
+  <form method="POST" action="{{ url_for('schedule.schedule') }}">
    <div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
      <template x-for="hour in Object.keys(schedule)" :key="hour">
        <div
--- a/scipaperloader/templates/upload.html
+++ b/scipaperloader/templates/upload.html
@ -27,7 +27,7 @@
                {% endfor %}
            </tbody>
        </table>
-        <a href="{{ url_for('main.download_error_log') }}" class="btn btn-outline-secondary">Download Full Error Log</a>
+        <a href="{{ url_for('upload.download_error_log') }}" class="btn btn-outline-secondary">Download Full Error Log</a>
    </div>
 {% endif %}

@ -43,7 +43,7 @@
    <p>The format of your CSV should resemble the response structure of the Crossref API's <code>/journals/{issn}/works</code> endpoint.</p>
 </div>

-<form method="POST" action="{{ url_for('main.upload') }}" enctype="multipart/form-data">
+<form method="POST" action="{{ url_for('upload.upload') }}" enctype="multipart/form-data">
    <div class="mb-3">
        <label class="form-label">How to handle duplicate DOIs:</label>
        <div class="form-check">
--- a/scipaperloader/views.py
+++ b/scipaperloader/views.py
@ -1,389 +0,0 @@
-import codecs
-import csv
-import datetime
-import io
-from io import StringIO
-
-import pandas as pd
-from flask import (
-    Blueprint,
-    current_app,
-    flash,
-    redirect,
-    render_template,
-    request,
-    send_file,
-    session,  # Add this line
-    url_for,
-)
-
-from sqlalchemy import asc, desc
-
-from .db import db
-from .models import PaperMetadata, ScheduleConfig, VolumeConfig
-
-bp = Blueprint("main", __name__)
-
-
-@bp.route("/")
-def index():
-    return render_template("index.html")
-
-
-REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
-
-
-@bp.route("/upload", methods=["GET", "POST"])
-def upload():
-    if request.method == "POST":
-        file = request.files.get("file")
-        delimiter = request.form.get("delimiter", ",")
-        duplicate_strategy = request.form.get("duplicate_strategy", "skip")
-
-        if not file:
-            return render_template("upload.html", error="No file selected.")
-
-        try:
-            stream = codecs.iterdecode(file.stream, "utf-8")
-            content = "".join(stream)
-            df = pd.read_csv(StringIO(content), delimiter=delimiter)
-        except Exception as e:
-            return render_template("upload.html", error=f"Failed to read CSV file: {e}")
-
-        missing = REQUIRED_COLUMNS - set(df.columns)
-        if missing:
-            return render_template(
-                "upload.html", error=f"Missing required columns: {', '.join(missing)}"
-            )
-
-        # Optional: parse 'published_online' to date
-        def parse_date(val):
-            if pd.isna(val):
-                return None
-            try:
-                return pd.to_datetime(val).date()
-            except Exception:
-                return None
-
-        # Count statistics
-        added_count = 0
-        skipped_count = 0
-        updated_count = 0
-        error_count = 0
-        
-        # Collect error information
-        errors = []
-        
-        # Process each row
-        for index, row in df.iterrows():
-            try:
-                # Get DOI from row for error reporting
-                doi = str(row.get("doi", "N/A"))
-                
-                # Validate required fields
-                for field in ["title", "doi", "issn"]:
-                    if pd.isna(row.get(field)) or not str(row.get(field)).strip():
-                        raise ValueError(f"Missing required field: {field}")
-                
-                # Check if paper with this DOI already exists
-                existing = PaperMetadata.query.filter_by(doi=doi).first()
-                
-                if existing:
-                    if duplicate_strategy == 'update':
-                        # Update existing record
-                        existing.title = row["title"]
-                        existing.alt_id = row.get("alternative_id")
-                        existing.issn = row["issn"]
-                        existing.journal = row.get("journal")
-                        existing.type = row.get("type")
-                        existing.language = row.get("language")
-                        existing.published_online = parse_date(row.get("published_online"))
-                        updated_count += 1
-                    else:
-                        # Skip this record
-                        skipped_count += 1
-                        continue
-                else:
-                    # Create new record
-                    metadata = PaperMetadata(
-                        title=row["title"],
-                        doi=doi,
-                        alt_id=row.get("alternative_id"),
-                        issn=row["issn"],
-                        journal=row.get("journal"),
-                        type=row.get("type"),
-                        language=row.get("language"),
-                        published_online=parse_date(row.get("published_online")),
-                        status="New",
-                        file_path=None,
-                        error_msg=None,
-                    )
-                    db.session.add(metadata)
-                    added_count += 1
-                    
-            except Exception as e:
-                error_count += 1
-                errors.append({
-                    "row": index + 2,  # +2 because index is 0-based and we have a header row
-                    "doi": row.get("doi", "N/A"),
-                    "error": str(e)
-                })
-                continue  # Skip this row and continue with the next
-
-        try:
-            db.session.commit()
-        except Exception as e:
-            db.session.rollback()
-            return render_template(
-                "upload.html", error=f"Failed to save data to database: {e}"
-            )
-
-        # Prepare error samples for display
-        error_samples = errors[:5] if errors else []
-        
-        error_message = None
-        if errors:
-            error_message = f"Encountered {len(errors)} errors. First 5 shown below."
-            
-        # Store the full errors list in the session for potential download
-        if errors:
-            error_csv = StringIO()
-            writer = csv.DictWriter(error_csv, fieldnames=["row", "doi", "error"])
-            writer.writeheader()
-            writer.writerows(errors)
-            session["error_data"] = error_csv.getvalue()
-
-        return render_template(
-            "upload.html", 
-            success=f"File processed! Added: {added_count}, Updated: {updated_count}, Skipped: {skipped_count}, Errors: {error_count}",
-            error_message=error_message,
-            error_samples=error_samples
-        )
-
-    return render_template("upload.html")
-
-# Add a route to download the error log
-@bp.route("/download_error_log")
-def download_error_log():
-    error_data = session.get("error_data")
-    if not error_data:
-        flash("No error data available.")
-        return redirect(url_for("main.upload"))
-        
-    buffer = StringIO(error_data)
-    return send_file(
-        buffer,
-        mimetype="text/csv",
-        as_attachment=True,
-        download_name=f"upload_errors_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
-    )
-    
-
-@bp.route("/papers")
-def list_papers():
-    page = request.args.get("page", 1, type=int)
-    per_page = 50
-
-    # Filters
-    status = request.args.get("status")
-    created_from = request.args.get("created_from")
-    created_to = request.args.get("created_to")
-    updated_from = request.args.get("updated_from")
-    updated_to = request.args.get("updated_to")
-    sort_by = request.args.get("sort_by", "created_at")
-    sort_dir = request.args.get("sort_dir", "desc")
-
-    query = PaperMetadata.query
-
-    # Apply filters
-    if status:
-        query = query.filter(PaperMetadata.status == status)
-
-    def parse_date(val):
-        from datetime import datetime
-
-        try:
-            return datetime.strptime(val, "%Y-%m-%d")
-        except (ValueError, TypeError):
-            return None
-
-    if created_from := parse_date(created_from):
-        query = query.filter(PaperMetadata.created_at >= created_from)
-    if created_to := parse_date(created_to):
-        query = query.filter(PaperMetadata.created_at <= created_to)
-    if updated_from := parse_date(updated_from):
-        query = query.filter(PaperMetadata.updated_at >= updated_from)
-    if updated_to := parse_date(updated_to):
-        query = query.filter(PaperMetadata.updated_at <= updated_to)
-
-    # Sorting
-    sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at)
-    sort_func = desc if sort_dir == "desc" else asc
-    query = query.order_by(sort_func(sort_col))
-
-    # Pagination
-    pagination = query.paginate(page=page, per_page=per_page, error_out=False)
-
-    # Statistics
-    total_papers = PaperMetadata.query.count()
-    status_counts = (
-        db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status))
-        .group_by(PaperMetadata.status)
-        .all()
-    )
-    status_counts = {status: count for status, count in status_counts}
-
-    return render_template(
-        "papers.html",
-        papers=pagination.items,
-        pagination=pagination,
-        total_papers=total_papers,
-        status_counts=status_counts,
-        sort_by=sort_by,
-        sort_dir=sort_dir,
-    )
-
-
-@bp.route("/papers/export")
-def export_papers():
-    query = PaperMetadata.query
-
-    # Filters
-    status = request.args.get("status")
-    created_from = request.args.get("created_from")
-    created_to = request.args.get("created_to")
-    updated_from = request.args.get("updated_from")
-    updated_to = request.args.get("updated_to")
-    sort_by = request.args.get("sort_by", "created_at")
-    sort_dir = request.args.get("sort_dir", "desc")
-
-    query = PaperMetadata.query
-
-    # Apply filters
-    if status:
-        query = query.filter(PaperMetadata.status == status)
-
-    def parse_date(val):
-        try:
-            return datetime.datetime.strptime(val, "%Y-%m-%d")
-        except Exception:
-            return None
-
-    output = io.StringIO()
-    writer = csv.writer(output)
-    writer.writerow(
-        ["ID", "Title", "Journal", "DOI", "ISSN", "Status", "Created At", "Updated At"]
-    )
-
-    for paper in query:
-        writer.writerow(
-            [
-                paper.id,
-                paper.title,
-                getattr(paper, "journal", ""),
-                paper.doi,
-                paper.issn,
-                paper.status,
-                paper.created_at,
-                paper.updated_at,
-            ]
-        )
-
-    output.seek(0)
-    return send_file(
-        io.BytesIO(output.read().encode("utf-8")),
-        mimetype="text/csv",
-        as_attachment=True,
-        download_name="papers.csv",
-    )
-
-
-
-
-@bp.route("/papers/<int:paper_id>/detail")
-def paper_detail(paper_id):
-    paper = PaperMetadata.query.get_or_404(paper_id)
-
-    return render_template("partials/paper_detail_modal.html", paper=paper)
-
-
-@bp.route("/schedule", methods=["GET", "POST"])
-def schedule():
-    if request.method == "POST":
-        try:
-            # Check if we're updating volume or schedule
-            if "total_volume" in request.form:
-                # Volume update
-                try:
-                    new_volume = float(request.form.get("total_volume", 0))
-                    if new_volume <= 0 or new_volume > 1000:
-                        raise ValueError("Volume must be between 1 and 1000")
-
-                    volume_config = VolumeConfig.query.first()
-                    if not volume_config:
-                        volume_config = VolumeConfig(volume=new_volume)
-                        db.session.add(volume_config)
-                    else:
-                        volume_config.volume = new_volume
-
-                    db.session.commit()
-                    flash("Volume updated successfully!", "success")
-
-                except ValueError as e:
-                    db.session.rollback()
-                    flash(f"Error updating volume: {str(e)}", "error")
-            else:
-                # Schedule update logic
-                # Validate form data
-                for hour in range(24):
-                    key = f"hour_{hour}"
-                    if key not in request.form:
-                        raise ValueError(f"Missing data for hour {hour}")
-
-                    try:
-                        weight = float(request.form.get(key, 0))
-                        if weight < 0 or weight > 5:
-                            raise ValueError(
-                                f"Weight for hour {hour} must be between 0 and 5"
-                            )
-                    except ValueError:
-                        raise ValueError(f"Invalid weight value for hour {hour}")
-
-                # Update database if validation passes
-                for hour in range(24):
-                    key = f"hour_{hour}"
-                    weight = float(request.form.get(key, 0))
-                    config = ScheduleConfig.query.get(hour)
-                    if config:
-                        config.weight = weight
-                    else:
-                        db.session.add(ScheduleConfig(hour=hour, weight=weight))
-
-                db.session.commit()
-                flash("Schedule updated successfully!", "success")
-
-        except ValueError as e:
-            db.session.rollback()
-            flash(f"Error updating schedule: {str(e)}", "error")
-
-    schedule = {
-        sc.hour: sc.weight
-        for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
-    }
-    volume = VolumeConfig.query.first()
-    return render_template(
-        "schedule.html",
-        schedule=schedule,
-        volume=volume.volume,
-        app_title="PaperScraper",
-    )
-
-
-@bp.route("/logs")
-def logs():
-    return render_template("logs.html", app_title="PaperScraper")
-
-
-@bp.route("/about")
-def about():
-    return render_template("about.html", app_title="PaperScraper")