Finishes Scheduler View & Controller

This commit is contained in:
Michael Beck 2025-03-31 01:28:07 +02:00
parent 2868916cf6
commit 1534dbb0ba
4 changed files with 427 additions and 298 deletions

View File

@ -1,18 +1,22 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8" />
<title>{{ app_title }}</title> <title>{{ app_title }}</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet"> <link
href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css"
rel="stylesheet"
/>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
<!-- Optional Alpine.js --> <!-- Optional Alpine.js -->
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script> <script
</head> defer
<body> src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
{% include 'nav.html' %} ></script>
<div class="container py-4"> </head>
{% block content %}{% endblock %} <body>
</div> {% include 'nav.html' %}
{% include 'footer.html' %} <main class="container my-5">{% block content %}{% endblock %}</main>
</body> {% include 'footer.html' %}
</body>
</html> </html>

View File

@ -1,53 +1,69 @@
{% extends 'base.html' %} {% extends 'base.html' %} {% block content %}
{% block content %}
<main class="container my-5"> <div class="container text-center">
<div class="container text-center"> <h1 class="display-4">Welcome to SciPaperLoader</h1>
<h1 class="display-4">Welcome to SciPaperLoader</h1> <p class="lead">Your paper scraping tool is ready.</p>
<p class="lead">Your paper scraping tool is ready.</p> <p class="text-muted">A simple tool to scrape papers from Zotero API.</p>
<p class="text-muted">A simple tool to scrape papers from Zotero API.</p> </div>
</div>
<div class="row g-4"> <div class="row g-4">
<div class="col-md-6"> <div class="col-md-6">
<div class="card shadow-sm"> <div class="card shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="card-title">📄 CSV Import</h5> <h5 class="card-title">📄 CSV Import</h5>
<p class="card-text">Upload a 37-column CSV to import paper metadata. Only relevant fields (title, DOI, ISSN, etc.) are stored. Errors are reported without aborting the batch.</p> <p class="card-text">
<a href="/import" class="btn btn-sm btn-outline-primary">Upload Now</a> Upload a 37-column CSV to import paper metadata. Only relevant fields
</div> (title, DOI, ISSN, etc.) are stored. Errors are reported without
</div> aborting the batch.
</div> </p>
<a href="/import" class="btn btn-sm btn-outline-primary">Upload Now</a>
<div class="col-md-6">
<div class="card shadow-sm">
<div class="card-body">
<h5 class="card-title">🧠 Background Scraper</h5>
<p class="card-text">A daemon process runs hourly to fetch papers using Zotero API. Downloads are randomized to mimic human behavior and avoid detection.</p>
<a href="/logs" class="btn btn-sm btn-outline-secondary">View Logs</a>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card shadow-sm">
<div class="card-body">
<h5 class="card-title">📚 Paper Management</h5>
<p class="card-text">Monitor paper status (Pending, Done, Failed), download PDFs, and inspect errors. Files are stored on disk in structured folders per DOI.</p>
<a href="/papers" class="btn btn-sm btn-outline-success">Browse Papers</a>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card shadow-sm">
<div class="card-body">
<h5 class="card-title">🕒 Download Schedule</h5>
<p class="card-text">Control how many papers are downloaded per hour. Configure hourly volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or usage pattern.</p>
<a href="/schedule" class="btn btn-sm btn-outline-warning">Adjust Schedule</a>
</div>
</div>
</div> </div>
</div> </div>
</main> </div>
<div class="col-md-6">
<div class="card shadow-sm">
<div class="card-body">
<h5 class="card-title">🧠 Background Scraper</h5>
<p class="card-text">
A daemon process runs hourly to fetch papers using Zotero API.
Downloads are randomized to mimic human behavior and avoid detection.
</p>
<a href="/logs" class="btn btn-sm btn-outline-secondary">View Logs</a>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card shadow-sm">
<div class="card-body">
<h5 class="card-title">📚 Paper Management</h5>
<p class="card-text">
Monitor paper status (Pending, Done, Failed), download PDFs, and
inspect errors. Files are stored on disk in structured folders per
DOI.
</p>
<a href="/papers" class="btn btn-sm btn-outline-success"
>Browse Papers</a
>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card shadow-sm">
<div class="card-body">
<h5 class="card-title">🕒 Download Schedule</h5>
<p class="card-text">
Control how many papers are downloaded per hour. Configure hourly
volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or
usage pattern.
</p>
<a href="/schedule" class="btn btn-sm btn-outline-warning"
>Adjust Schedule</a
>
</div>
</div>
</div>
</div>
{% endblock %} {% endblock %}

View File

@ -1,222 +1,300 @@
{% extends 'base.html' %} {% extends 'base.html' %} {% block content %}
{% block content %}
<style> <style>
.timeline { .timeline {
display: flex; display: flex;
flex-wrap: wrap; flex-wrap: wrap;
gap: 4px; gap: 3px;
user-select: none; /* Prevent text selection during drag */ user-select: none; /* Prevent text selection during drag */
} }
.hour-block { .hour-block {
width: 60px; width: 49px;
height: 70px; /* Increased height to fit additional text */ height: 70px; /* Increased height to fit additional text */
border-radius: 6px; border-radius: 5px;
text-align: center; text-align: center;
line-height: 1.2; line-height: 1.2;
font-size: 0.9rem; font-size: 0.9rem;
padding-top: 6px; padding-top: 6px;
cursor: pointer; cursor: pointer;
user-select: none; user-select: none;
transition: background-color 0.2s ease-in-out; transition: background-color 0.2s ease-in-out;
margin: 1px; margin: 1px;
} }
.hour-block.selected { .hour-block.selected {
outline: 2px solid #4584b8; outline: 2px solid #4584b8;
} }
.papers { .papers {
font-size: 0.7rem; font-size: 0.7rem;
margin-top: 2px; margin-top: 2px;
} }
.flash-message { .flash-message {
padding: 12px; position: fixed;
margin-bottom: 20px; top: 30%;
border-radius: 6px; left: 50%;
opacity: 1; transform: translate(-50%, -50%);
transition: opacity 2s ease-in-out; z-index: 1000;
} width: 300px;
text-align: center;
font-weight: bold;
padding: 12px;
margin-bottom: 20px;
border-radius: 6px;
opacity: 1;
transition: opacity 5s ease-in-out;
}
.flash-message.success { .flash-message.success {
background-color: #d4edda; background-color: #d4edda;
border-color: #c3e6cb; border-color: #c3e6cb;
color: #155724; color: #155724;
} }
.flash-message.error { .flash-message.error {
background-color: #f8d7da; background-color: #f8d7da;
border-color: #f5c6cb; border-color: #f5c6cb;
color: #721c24; color: #721c24;
} }
.flash-message.fade { .flash-message.fade {
opacity: 0; opacity: 0;
} }
</style>
</style> <script>
const initialSchedule = {{ schedule | tojson }};
const totalVolume = {{ volume }};
</script>
<script> <div x-data="scheduleManager(initialSchedule, totalVolume)" class="container">
const initialSchedule = {{ schedule | tojson }}; <h1 class="mb-4">🕒 Configure Hourly Download Weights</h1>
const totalVolume = {{ volume }};
</script>
<!-- Flash Messages -->
{% with messages = get_flashed_messages(with_categories=true) %} {% if
messages %}
<div id="flash-messages">
{% for category, message in messages %}
<div
class="flash-message {{ category }}"
x-data="{}"
x-init="setTimeout(() => $el.classList.add('fade'), 100); setTimeout(() => $el.remove(), 5000)"
>
{{ message }}
</div>
{% endfor %}
</div>
{% endif %} {% endwith %}
<div x-data="scheduleManager(initialSchedule, totalVolume)" class="container my-5"> <!-- Content -->
<h2 class="mb-4">🕒 Configure Hourly Download Weights</h2> <div class="mb-3">
<h3>How it Works</h3>
<!-- Flash Messages --> <p class="text-muted mb-0">
{% with messages = get_flashed_messages(with_categories=true) %} This page allows you to configure the daily volume of papers to be
{% if messages %} downloaded and the hourly download weights for the papers. The weights
<div id="flash-messages"> determine how many papers will be downloaded during each hour of the day.
{% for category, message in messages %} The total volume (<strong x-text="volume"></strong> papers/day) is split
<div class="flash-message {{ category }}" x-data="{}" x-init="setTimeout(() => $el.classList.add('fade'), 100); setTimeout(() => $el.remove(), 5000)"> across all hours based on their relative weights. Each weight controls the
{{ message }} proportion of papers downloaded during that hour. Click to select one or
</div> more hours below. Then assign a weight to them using the input and apply
{% endfor %} it. Color indicates relative intensity. The total daily volume will be
</div> split proportionally across these weights.
{% endif %} <strong>Don't forget to submit the changes!</strong>
{% endwith %} </p>
<h3>Example</h3>
<p class="text-muted"> <p class="text-muted mb-0">
Click to select one or more hours below. Then assign a weight to them using the input and apply it. Color indicates relative intensity. The total daily volume will be split proportionally across these weights. If the total volume is <strong>240 papers</strong> and hours are
<strong>weighted as 1.0, 2.0, and 3.0</strong>, they will receive
<strong>40, 80, and 120 papers</strong> respectively.
</p> </p>
<h3 class="mt-4">Volume</h3>
<p class="text-muted">The total volume of data to be downloaded each day is <strong x-text="volume"></strong> papers.</p>
<div class="d-flex justify-content-between align-items-start flex-wrap gap-2">
<p class="text-muted mb-0" style="max-width: 600px;">
Click to select one or more hours below. Then assign a weight to them using the input and apply it. Color indicates relative intensity. The total daily volume will be split proportionally across these weights.
</p>
<div class="hour-block"
style="pointer-events: none;"
x-init="$nextTick(() => previewWeight = 1.0)"
x-data="{ previewWeight: 1.0 }"
:style="getBackgroundStyleFromValue(previewWeight)">
<div><strong>14:00</strong></div>
<div class="weight"><span x-text="previewWeight.toFixed(1)"></span></div>
<div class="papers text-muted">example</div>
</div>
</div>
<h3 class="mt-4">Current Schedule</h3>
<form method="POST" action="{{ url_for('main.schedule') }}">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div class="hour-block"
:id="'hour-' + hour"
:data-hour="hour"
:style="getBackgroundStyle(hour)"
:class="{'selected': isSelected(hour)}"
@mousedown="startDrag($event, hour)"
@mouseover="dragSelect(hour)">
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers"><span x-text="getPapersPerHour(hour)"></span> p.</div>
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]">
</div>
</template>
</div>
<div class="input-group mb-4 w-50">
<label class="input-group-text">Set Weight:</label>
<input type="number" step="0.1" min="0" max="5" x-model="newWeight" class="form-control">
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">Apply to Selected</button>
</div>
<div class="d-flex justify-content-between">
<a href="/" class="btn btn-outline-secondary">⬅ Back</a>
<button type="submit" class="btn btn-success">💾 Save Schedule</button>
</div>
</form>
</div> </div>
<script> <h2 class="mt-4">Volume</h2>
function scheduleManager(initial, volume) {
return {
schedule: { ...initial },
volume: volume,
selectedHours: [],
newWeight: 1.0,
isDragging: false,
dragOperation: null,
formatHour(h) { <div class="align-items-start flex-wrap gap-2">
return String(h).padStart(2, '0') + ":00"; <p class="text-muted">
}, The total volume of data to be downloaded each day is
<strong x-text="volume"></strong> papers.
</p>
<div class="d-flex align-items-center mb-3">
<form
method="POST"
action="{{ url_for('main.schedule') }}"
class="input-group w-50"
>
<label class="input-group-text">Papers per day:</label>
<input
type="number"
class="form-control"
name="total_volume"
value="{{ volume }}"
min="1"
max="1000"
required
/>
<button type="submit" class="btn btn-primary">Update Volume</button>
</form>
</div>
</div>
getBackgroundStyle(hour) { <h2 class="mt-4">Current Schedule</h2>
const weight = parseFloat(this.schedule[hour]); <form method="POST" action="{{ url_for('main.schedule') }}">
const maxWeight = 2.5; // You can adjust this <div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div
class="hour-block"
:id="'hour-' + hour"
:data-hour="hour"
:style="getBackgroundStyle(hour)"
:class="{'selected': isSelected(hour)}"
@mousedown="startDrag($event, hour)"
@mouseover="dragSelect(hour)"
>
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers">
<span x-text="getPapersPerHour(hour)"></span> p.
</div>
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]" />
</div>
</template>
</div>
// Normalize weight (0.0 to 1.0) <div class="input-group mb-4 w-50">
const t = Math.min(weight / maxWeight, 1.0); <label class="input-group-text">Set Weight:</label>
<input
type="number"
step="0.1"
min="0"
max="5"
x-model="newWeight"
class="form-control"
/>
<button
type="button"
class="btn btn-outline-primary"
@click="applyWeight()"
>
Apply to Selected
</button>
</div>
// Interpolate HSL lightness: 95% (light) to 30% (dark) <div class="d-flex justify-content-between">
const lightness = 95 - (t * 65); // 95 → 30 <a href="/" class="btn btn-outline-secondary">⬅ Back</a>
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette <button type="submit" class="btn btn-success">💾 Save Schedule</button>
</div>
</form>
</div>
const textColor = t > 0.65 ? 'white' : 'black'; // adaptive text color <script>
function scheduleManager(initial, volume) {
return {
schedule: { ...initial },
volume: volume,
selectedHours: [],
newWeight: 1.0,
isDragging: false,
dragOperation: null,
return { formatHour(h) {
backgroundColor, return String(h).padStart(2, "0") + ":00";
color: textColor },
};
},
startDrag(event, hour) { getBackgroundStyle(hour) {
event.preventDefault(); const weight = parseFloat(this.schedule[hour]);
this.isDragging = true; const maxWeight = 2.5; // You can adjust this
this.dragOperation = this.isSelected(hour) ? 'remove' : 'add';
this.toggleSelect(hour);
},
dragSelect(hour) { // Normalize weight (0.0 to 1.0)
if (!this.isDragging) return; const t = Math.min(weight / maxWeight, 1.0);
const selected = this.isSelected(hour);
if (this.dragOperation === 'add' && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === 'remove' && selected) {
this.selectedHours = this.selectedHours.filter(h => h !== hour);
}
},
endDrag() { // Interpolate HSL lightness: 95% (light) to 30% (dark)
this.isDragging = false; const lightness = 95 - t * 65; // 95 → 30
}, const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
toggleSelect(hour) { const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter(h => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) { return {
return this.selectedHours.includes(hour); backgroundColor,
}, color: textColor,
};
},
applyWeight() { getBackgroundStyleFromValue(value) {
this.selectedHours.forEach(hour => { const weight = parseFloat(value);
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1); const maxWeight = 2.5; // You can adjust this
});
this.selectedHours = [];
},
getTotalWeight() { // Normalize weight (0.0 to 1.0)
return Object.values(this.schedule).reduce((sum, w) => sum + parseFloat(w), 0); const t = Math.min(weight / maxWeight, 1.0);
},
getPapersPerHour(hour) { // Interpolate HSL lightness: 95% (light) to 30% (dark)
const total = this.getTotalWeight(); const lightness = 95 - t * 65; // 95 → 30
if (total === 0) return 0; const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
return ((parseFloat(this.schedule[hour]) / total) * this.volume).toFixed(1);
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
startDrag(event, hour) {
event.preventDefault();
this.isDragging = true;
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
this.toggleSelect(hour);
},
dragSelect(hour) {
if (!this.isDragging) return;
const selected = this.isSelected(hour);
if (this.dragOperation === "add" && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === "remove" && selected) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} }
}; },
}
</script>{% endblock %} endDrag() {
this.isDragging = false;
},
toggleSelect(hour) {
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) {
return this.selectedHours.includes(hour);
},
applyWeight() {
this.selectedHours.forEach((hour) => {
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
});
this.selectedHours = [];
},
getTotalWeight() {
return Object.values(this.schedule).reduce(
(sum, w) => sum + parseFloat(w),
0
);
},
getPapersPerHour(hour) {
const total = this.getTotalWeight();
if (total === 0) return 0;
return (
(parseFloat(this.schedule[hour]) / total) *
this.volume
).toFixed(1);
},
};
}
</script>
{% endblock %}

View File

@ -4,10 +4,12 @@ from .db import db
bp = Blueprint('main', __name__) bp = Blueprint('main', __name__)
@bp.route("/") @bp.route("/")
def index(): def index():
return render_template("index.html") return render_template("index.html")
@bp.route("/upload", methods=["GET", "POST"]) @bp.route("/upload", methods=["GET", "POST"])
def upload(): def upload():
if request.method == "POST": if request.method == "POST":
@ -15,52 +17,81 @@ def upload():
pass pass
return render_template("upload.html") return render_template("upload.html")
@bp.route("/papers") @bp.route("/papers")
def papers(): def papers():
return render_template("papers.html", app_title="PaperScraper") return render_template("papers.html", app_title="PaperScraper")
@bp.route("/schedule", methods=["GET", "POST"]) @bp.route("/schedule", methods=["GET", "POST"])
def schedule(): def schedule():
if request.method == "POST": if request.method == "POST":
try: try:
# Validate form data # Check if we're updating volume or schedule
for hour in range(24): if 'total_volume' in request.form:
key = f"hour_{hour}" # Volume update
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try: try:
new_volume = float(request.form.get('total_volume', 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
else:
# Schedule update logic
# Validate form data
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5")
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0)) weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5: config = ScheduleConfig.query.get(hour)
raise ValueError(f"Weight for hour {hour} must be between 0 and 5") if config:
except ValueError: config.weight = weight
raise ValueError(f"Invalid weight value for hour {hour}") else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
# Update database if validation passes db.session.commit()
for hour in range(24): flash("Schedule updated successfully!", "success")
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e: except ValueError as e:
db.session.rollback() db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error") flash(f"Error updating schedule: {str(e)}", "error")
schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()} schedule = {sc.hour: sc.weight for sc in ScheduleConfig.query.order_by(
ScheduleConfig.hour).all()}
volume = VolumeConfig.query.first() volume = VolumeConfig.query.first()
return render_template("schedule.html", schedule=schedule, volume=volume.volume, app_title="PaperScraper") return render_template("schedule.html", schedule=schedule, volume=volume.volume, app_title="PaperScraper")
@bp.route("/logs") @bp.route("/logs")
def logs(): def logs():
return render_template("logs.html", app_title="PaperScraper") return render_template("logs.html", app_title="PaperScraper")
@bp.route("/about") @bp.route("/about")
def about(): def about():
return render_template("about.html", app_title="PaperScraper") return render_template("about.html", app_title="PaperScraper")