diff --git a/scipaperloader/templates/base.html b/scipaperloader/templates/base.html index 6407e72..4992010 100644 --- a/scipaperloader/templates/base.html +++ b/scipaperloader/templates/base.html @@ -1,18 +1,22 @@ - - + + {{ app_title }} - + - - - -{% include 'nav.html' %} -
- {% block content %}{% endblock %} -
-{% include 'footer.html' %} - + + + + {% include 'nav.html' %} +
{% block content %}{% endblock %}
+ {% include 'footer.html' %} + diff --git a/scipaperloader/templates/index.html b/scipaperloader/templates/index.html index 4fdb7e9..252fb0f 100644 --- a/scipaperloader/templates/index.html +++ b/scipaperloader/templates/index.html @@ -1,53 +1,69 @@ -{% extends 'base.html' %} -{% block content %} +{% extends 'base.html' %} {% block content %} -
-
-

Welcome to SciPaperLoader

-

Your paper scraping tool is ready.

-

A simple tool to scrape papers from Zotero API.

-
+
+

Welcome to SciPaperLoader

+

Your paper scraping tool is ready.

+

A simple tool to scrape papers from Zotero API.

+
-
-
-
-
-
📄 CSV Import
-

Upload a 37-column CSV to import paper metadata. Only relevant fields (title, DOI, ISSN, etc.) are stored. Errors are reported without aborting the batch.

- Upload Now -
-
-
- -
-
-
-
🧠 Background Scraper
-

A daemon process runs hourly to fetch papers using Zotero API. Downloads are randomized to mimic human behavior and avoid detection.

- View Logs -
-
-
- -
-
-
-
📚 Paper Management
-

Monitor paper status (Pending, Done, Failed), download PDFs, and inspect errors. Files are stored on disk in structured folders per DOI.

- Browse Papers -
-
-
- -
-
-
-
🕒 Download Schedule
-

Control how many papers are downloaded per hour. Configure hourly volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or usage pattern.

- Adjust Schedule -
-
+
+
+
+
+
📄 CSV Import
+

+ Upload a 37-column CSV to import paper metadata. Only relevant fields + (title, DOI, ISSN, etc.) are stored. Errors are reported without + aborting the batch. +

+ Upload Now
-
-{% endblock %} \ No newline at end of file + + +
+
+
+
🧠 Background Scraper
+

+ A daemon process runs hourly to fetch papers using Zotero API. + Downloads are randomized to mimic human behavior and avoid detection. +

+ View Logs +
+
+
+ +
+
+
+
📚 Paper Management
+

+ Monitor paper status (Pending, Done, Failed), download PDFs, and + inspect errors. Files are stored on disk in structured folders per + DOI. +

+ Browse Papers +
+
+
+ +
+
+
+
🕒 Download Schedule
+

+ Control how many papers are downloaded per hour. Configure hourly + volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or + usage pattern. +

+ Adjust Schedule +
+
+
+ +{% endblock %} diff --git a/scipaperloader/templates/schedule.html b/scipaperloader/templates/schedule.html index d3101c7..caea561 100644 --- a/scipaperloader/templates/schedule.html +++ b/scipaperloader/templates/schedule.html @@ -1,222 +1,300 @@ -{% extends 'base.html' %} -{% block content %} +{% extends 'base.html' %} {% block content %} + .flash-message { + position: fixed; + top: 30%; + left: 50%; + transform: translate(-50%, -50%); + z-index: 1000; + width: 300px; + text-align: center; + font-weight: bold; + padding: 12px; + margin-bottom: 20px; + border-radius: 6px; + opacity: 1; + transition: opacity 5s ease-in-out; + } - - - -
-

🕒 Configure Hourly Download Weights

- - - {% with messages = get_flashed_messages(with_categories=true) %} - {% if messages %} -
- {% for category, message in messages %} -
- {{ message }} -
- {% endfor %} -
- {% endif %} - {% endwith %} + .flash-message.success { + background-color: #d4edda; + border-color: #c3e6cb; + color: #155724; + } -

- Click to select one or more hours below. Then assign a weight to them using the input and apply it. Color indicates relative intensity. The total daily volume will be split proportionally across these weights. + .flash-message.error { + background-color: #f8d7da; + border-color: #f5c6cb; + color: #721c24; + } + + .flash-message.fade { + opacity: 0; + } + + + + +

+

🕒 Configure Hourly Download Weights

+ + + {% with messages = get_flashed_messages(with_categories=true) %} {% if + messages %} +
+ {% for category, message in messages %} +
+ {{ message }} +
+ {% endfor %} +
+ {% endif %} {% endwith %} + + +
+

How it Works

+

+ This page allows you to configure the daily volume of papers to be + downloaded and the hourly download weights for the papers. The weights + determine how many papers will be downloaded during each hour of the day. + The total volume ( papers/day) is split + across all hours based on their relative weights. Each weight controls the + proportion of papers downloaded during that hour. Click to select one or + more hours below. Then assign a weight to them using the input and apply + it. Color indicates relative intensity. The total daily volume will be + split proportionally across these weights. + Don't forget to submit the changes! +

+

Example

+

+ If the total volume is 240 papers and hours are + weighted as 1.0, 2.0, and 3.0, they will receive + 40, 80, and 120 papers respectively.

- -

Volume

-

The total volume of data to be downloaded each day is papers.

- -
-

- Click to select one or more hours below. Then assign a weight to them using the input and apply it. Color indicates relative intensity. The total daily volume will be split proportionally across these weights. -

- -
-
14:00
-
-
example
-
-
- - -

Current Schedule

-
-
- -
- -
- - - -
- -
- ⬅ Back - -
-
- {% endblock %} \ No newline at end of file + }, + + endDrag() { + this.isDragging = false; + }, + + toggleSelect(hour) { + if (this.isSelected(hour)) { + this.selectedHours = this.selectedHours.filter((h) => h !== hour); + } else { + this.selectedHours.push(hour); + } + }, + + isSelected(hour) { + return this.selectedHours.includes(hour); + }, + + applyWeight() { + this.selectedHours.forEach((hour) => { + this.schedule[hour] = parseFloat(this.newWeight).toFixed(1); + }); + this.selectedHours = []; + }, + + getTotalWeight() { + return Object.values(this.schedule).reduce( + (sum, w) => sum + parseFloat(w), + 0 + ); + }, + + getPapersPerHour(hour) { + const total = this.getTotalWeight(); + if (total === 0) return 0; + return ( + (parseFloat(this.schedule[hour]) / total) * + this.volume + ).toFixed(1); + }, + }; + } + +{% endblock %} diff --git a/scipaperloader/views.py b/scipaperloader/views.py index 3f80f98..964dd52 100644 --- a/scipaperloader/views.py +++ b/scipaperloader/views.py @@ -4,63 +4,94 @@ from .db import db bp = Blueprint('main', __name__) + @bp.route("/") def index(): return render_template("index.html") + @bp.route("/upload", methods=["GET", "POST"]) def upload(): if request.method == "POST": # CSV upload logic here - pass + pass return render_template("upload.html") + @bp.route("/papers") def papers(): return render_template("papers.html", app_title="PaperScraper") + @bp.route("/schedule", methods=["GET", "POST"]) def schedule(): if request.method == "POST": try: - # Validate form data - for hour in range(24): - key = f"hour_{hour}" - if key not in request.form: - raise ValueError(f"Missing data for hour {hour}") - + # Check if we're updating volume or schedule + if 'total_volume' in request.form: + # Volume update try: + new_volume = float(request.form.get('total_volume', 0)) + if new_volume <= 0 or new_volume > 1000: + raise ValueError("Volume must be between 1 and 1000") + + volume_config = VolumeConfig.query.first() + if not volume_config: + volume_config = VolumeConfig(volume=new_volume) + db.session.add(volume_config) + else: + volume_config.volume = new_volume + + db.session.commit() + flash("Volume updated successfully!", "success") + + except ValueError as e: + db.session.rollback() + flash(f"Error updating volume: {str(e)}", "error") + else: + # Schedule update logic + # Validate form data + for hour in range(24): + key = f"hour_{hour}" + if key not in request.form: + raise ValueError(f"Missing data for hour {hour}") + + try: + weight = float(request.form.get(key, 0)) + if weight < 0 or weight > 5: + raise ValueError( + f"Weight for hour {hour} must be between 0 and 5") + except ValueError: + raise ValueError(f"Invalid weight value for hour {hour}") + + # Update database if validation passes + for hour in range(24): + key = f"hour_{hour}" weight = float(request.form.get(key, 0)) - if weight < 0 or weight > 5: - raise ValueError(f"Weight for hour {hour} must be between 0 and 5") - except ValueError: - raise ValueError(f"Invalid weight value for hour {hour}") - - # Update database if validation passes - for hour in range(24): - key = f"hour_{hour}" - weight = float(request.form.get(key, 0)) - config = ScheduleConfig.query.get(hour) - if config: - config.weight = weight - else: - db.session.add(ScheduleConfig(hour=hour, weight=weight)) - - db.session.commit() - flash("Schedule updated successfully!", "success") - + config = ScheduleConfig.query.get(hour) + if config: + config.weight = weight + else: + db.session.add(ScheduleConfig(hour=hour, weight=weight)) + + db.session.commit() + flash("Schedule updated successfully!", "success") + except ValueError as e: db.session.rollback() flash(f"Error updating schedule: {str(e)}", "error") - - schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()} + + schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by( + ScheduleConfig.hour).all()} volume = VolumeConfig.query.first() return render_template("schedule.html", schedule=schedule, volume=volume.volume, app_title="PaperScraper") + @bp.route("/logs") def logs(): return render_template("logs.html", app_title="PaperScraper") + @bp.route("/about") def about(): return render_template("about.html", app_title="PaperScraper")