303 lines
9.1 KiB
Python
303 lines
9.1 KiB
Python
import codecs
|
|
import csv
|
|
import datetime
|
|
import io
|
|
from io import StringIO
|
|
|
|
import pandas as pd
|
|
from flask import (
|
|
Blueprint,
|
|
current_app,
|
|
flash,
|
|
redirect,
|
|
render_template,
|
|
request,
|
|
send_file,
|
|
url_for,
|
|
)
|
|
from sqlalchemy import asc, desc
|
|
|
|
from .db import db
|
|
from .models import PaperMetadata, ScheduleConfig, VolumeConfig
|
|
|
|
bp = Blueprint("main", __name__)
|
|
|
|
|
|
@bp.route("/")
|
|
def index():
|
|
return render_template("index.html")
|
|
|
|
|
|
REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
|
|
|
|
|
|
@bp.route("/upload", methods=["GET", "POST"])
|
|
def upload():
|
|
if request.method == "POST":
|
|
file = request.files.get("file")
|
|
delimiter = request.form.get("delimiter", ",")
|
|
|
|
if not file:
|
|
return render_template("upload.html", error="No file selected.")
|
|
|
|
try:
|
|
stream = codecs.iterdecode(file.stream, "utf-8")
|
|
content = "".join(stream)
|
|
df = pd.read_csv(StringIO(content), delimiter=delimiter)
|
|
except Exception as e:
|
|
return render_template("upload.html", error=f"Failed to read CSV file: {e}")
|
|
|
|
missing = REQUIRED_COLUMNS - set(df.columns)
|
|
if missing:
|
|
return render_template(
|
|
"upload.html", error=f"Missing required columns: {', '.join(missing)}"
|
|
)
|
|
|
|
# Optional: parse 'published_online' to date
|
|
def parse_date(val):
|
|
if pd.isna(val):
|
|
return None
|
|
try:
|
|
return pd.to_datetime(val).date()
|
|
except Exception:
|
|
return None
|
|
|
|
for _, row in df.iterrows():
|
|
metadata = PaperMetadata(
|
|
title=row["title"],
|
|
doi=row["doi"],
|
|
alt_id=row.get("alternative_id"),
|
|
issn=row["issn"],
|
|
type=row.get("type"),
|
|
language=row.get("language"),
|
|
published_online=parse_date(row.get("published_online")),
|
|
status="New",
|
|
file_path=None,
|
|
error_msg=None,
|
|
)
|
|
db.session.add(metadata)
|
|
|
|
try:
|
|
db.session.commit()
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
return render_template(
|
|
"upload.html", error=f"Failed to save data to database: {e}"
|
|
)
|
|
|
|
return render_template(
|
|
"upload.html", success="File uploaded and validated successfully!"
|
|
)
|
|
|
|
return render_template("upload.html")
|
|
|
|
|
|
@bp.route("/papers")
|
|
def list_papers():
|
|
page = request.args.get("page", 1, type=int)
|
|
per_page = 50
|
|
|
|
# Filters
|
|
status = request.args.get("status")
|
|
created_from = request.args.get("created_from")
|
|
created_to = request.args.get("created_to")
|
|
updated_from = request.args.get("updated_from")
|
|
updated_to = request.args.get("updated_to")
|
|
sort_by = request.args.get("sort_by", "created_at")
|
|
sort_dir = request.args.get("sort_dir", "desc")
|
|
|
|
query = PaperMetadata.query
|
|
|
|
# Apply filters
|
|
if status:
|
|
query = query.filter(PaperMetadata.status == status)
|
|
|
|
def parse_date(val):
|
|
from datetime import datetime
|
|
|
|
try:
|
|
return datetime.strptime(val, "%Y-%m-%d")
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
if created_from := parse_date(created_from):
|
|
query = query.filter(PaperMetadata.created_at >= created_from)
|
|
if created_to := parse_date(created_to):
|
|
query = query.filter(PaperMetadata.created_at <= created_to)
|
|
if updated_from := parse_date(updated_from):
|
|
query = query.filter(PaperMetadata.updated_at >= updated_from)
|
|
if updated_to := parse_date(updated_to):
|
|
query = query.filter(PaperMetadata.updated_at <= updated_to)
|
|
|
|
# Sorting
|
|
sort_col = getattr(PaperMetadata, sort_by, PaperMetadata.created_at)
|
|
sort_func = desc if sort_dir == "desc" else asc
|
|
query = query.order_by(sort_func(sort_col))
|
|
|
|
# Pagination
|
|
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
|
|
|
|
# Statistics
|
|
total_papers = PaperMetadata.query.count()
|
|
status_counts = (
|
|
db.session.query(PaperMetadata.status, db.func.count(PaperMetadata.status))
|
|
.group_by(PaperMetadata.status)
|
|
.all()
|
|
)
|
|
status_counts = {status: count for status, count in status_counts}
|
|
|
|
return render_template(
|
|
"papers.html",
|
|
papers=pagination.items,
|
|
pagination=pagination,
|
|
total_papers=total_papers,
|
|
status_counts=status_counts,
|
|
sort_by=sort_by,
|
|
sort_dir=sort_dir,
|
|
)
|
|
|
|
|
|
@bp.route("/papers/export")
|
|
def export_papers():
|
|
query = PaperMetadata.query
|
|
|
|
# Filters
|
|
status = request.args.get("status")
|
|
created_from = request.args.get("created_from")
|
|
created_to = request.args.get("created_to")
|
|
updated_from = request.args.get("updated_from")
|
|
updated_to = request.args.get("updated_to")
|
|
sort_by = request.args.get("sort_by", "created_at")
|
|
sort_dir = request.args.get("sort_dir", "desc")
|
|
|
|
query = PaperMetadata.query
|
|
|
|
# Apply filters
|
|
if status:
|
|
query = query.filter(PaperMetadata.status == status)
|
|
|
|
def parse_date(val):
|
|
try:
|
|
return datetime.datetime.strptime(val, "%Y-%m-%d")
|
|
except Exception:
|
|
return None
|
|
|
|
output = io.StringIO()
|
|
writer = csv.writer(output)
|
|
writer.writerow(
|
|
["ID", "Title", "Journal", "DOI", "ISSN", "Status", "Created At", "Updated At"]
|
|
)
|
|
|
|
for paper in query:
|
|
writer.writerow(
|
|
[
|
|
paper.id,
|
|
paper.title,
|
|
getattr(paper, "journal", ""),
|
|
paper.doi,
|
|
paper.issn,
|
|
paper.status,
|
|
paper.created_at,
|
|
paper.updated_at,
|
|
]
|
|
)
|
|
|
|
output.seek(0)
|
|
return send_file(
|
|
io.BytesIO(output.read().encode("utf-8")),
|
|
mimetype="text/csv",
|
|
as_attachment=True,
|
|
download_name="papers.csv",
|
|
)
|
|
|
|
from flask import jsonify, render_template
|
|
|
|
|
|
@bp.route("/papers/<int:paper_id>/detail")
|
|
def paper_detail(paper_id):
|
|
paper = PaperMetadata.query.get_or_404(paper_id)
|
|
|
|
return render_template("partials/paper_detail_modal.html", paper=paper)
|
|
|
|
|
|
@bp.route("/schedule", methods=["GET", "POST"])
|
|
def schedule():
|
|
if request.method == "POST":
|
|
try:
|
|
# Check if we're updating volume or schedule
|
|
if "total_volume" in request.form:
|
|
# Volume update
|
|
try:
|
|
new_volume = float(request.form.get("total_volume", 0))
|
|
if new_volume <= 0 or new_volume > 1000:
|
|
raise ValueError("Volume must be between 1 and 1000")
|
|
|
|
volume_config = VolumeConfig.query.first()
|
|
if not volume_config:
|
|
volume_config = VolumeConfig(volume=new_volume)
|
|
db.session.add(volume_config)
|
|
else:
|
|
volume_config.volume = new_volume
|
|
|
|
db.session.commit()
|
|
flash("Volume updated successfully!", "success")
|
|
|
|
except ValueError as e:
|
|
db.session.rollback()
|
|
flash(f"Error updating volume: {str(e)}", "error")
|
|
else:
|
|
# Schedule update logic
|
|
# Validate form data
|
|
for hour in range(24):
|
|
key = f"hour_{hour}"
|
|
if key not in request.form:
|
|
raise ValueError(f"Missing data for hour {hour}")
|
|
|
|
try:
|
|
weight = float(request.form.get(key, 0))
|
|
if weight < 0 or weight > 5:
|
|
raise ValueError(
|
|
f"Weight for hour {hour} must be between 0 and 5"
|
|
)
|
|
except ValueError:
|
|
raise ValueError(f"Invalid weight value for hour {hour}")
|
|
|
|
# Update database if validation passes
|
|
for hour in range(24):
|
|
key = f"hour_{hour}"
|
|
weight = float(request.form.get(key, 0))
|
|
config = ScheduleConfig.query.get(hour)
|
|
if config:
|
|
config.weight = weight
|
|
else:
|
|
db.session.add(ScheduleConfig(hour=hour, weight=weight))
|
|
|
|
db.session.commit()
|
|
flash("Schedule updated successfully!", "success")
|
|
|
|
except ValueError as e:
|
|
db.session.rollback()
|
|
flash(f"Error updating schedule: {str(e)}", "error")
|
|
|
|
schedule = {
|
|
sc.hour: sc.weight
|
|
for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
|
|
}
|
|
volume = VolumeConfig.query.first()
|
|
return render_template(
|
|
"schedule.html",
|
|
schedule=schedule,
|
|
volume=volume.volume,
|
|
app_title="PaperScraper",
|
|
)
|
|
|
|
|
|
@bp.route("/logs")
|
|
def logs():
|
|
return render_template("logs.html", app_title="PaperScraper")
|
|
|
|
|
|
@bp.route("/about")
|
|
def about():
|
|
return render_template("about.html", app_title="PaperScraper")
|