Compare commits

...

19 Commits

Author SHA1 Message Date
Michael Beck
11f086aa64 implements download path configuration 2025-04-16 22:03:17 +02:00
Michael Beck
5af3d00e39 ups 2025-04-16 22:02:57 +02:00
Michael Beck
803554a410 adds shutdown commands to makefile 2025-04-16 21:41:03 +02:00
Michael Beck
4f8040e9db deletes unnecessary file 2025-04-16 21:40:34 +02:00
Michael Beck
d6c72265f9 fixes external icon links 2025-04-16 21:40:13 +02:00
Michael Beck
a0fa541de3 fixes dummy scraper 2025-04-16 21:39:59 +02:00
Michael Beck
0adaed0bfa fixes dummy paper processing 2025-04-16 16:32:52 +02:00
Michael Beck
4085b47460 adds a MAX_VOLUME variable to defaults 2025-04-16 16:09:26 +02:00
Michael Beck
14f336fadf standardizes notification in html templates 2025-04-16 15:58:42 +02:00
Michael Beck
3d67bbbdf7 standardizes notification in html templates 2025-04-16 15:58:23 +02:00
Michael Beck
592375c67b adds script block to base 2025-04-16 15:57:53 +02:00
Michael Beck
396eaefbe9 removes old scheduler 2025-04-16 15:57:44 +02:00
Michael Beck
f36fc53b26 added dummy scraper 2025-04-16 15:57:35 +02:00
Michael Beck
1f0fb5e990 fixes typo 2025-04-16 15:44:55 +02:00
Michael Beck
5d8a2bd7c4 fixes bug due to scheduling refactor 2025-04-16 15:29:06 +02:00
Michael Beck
adf8207461 redesigns config, including some placeholders 2025-04-16 15:19:28 +02:00
Michael Beck
bb2ecd842d adds upload-duplicate_strategy config in interface and backend 2025-04-16 14:06:25 +02:00
Michael Beck
7dd7935fed adds .csv to gitignore 2025-04-16 14:04:43 +02:00
Michael Beck
a1865ef326 adds dev dependency for djlint 2025-04-16 14:03:56 +02:00
24 changed files with 1898 additions and 1088 deletions

2
.gitignore vendored
View File

@ -11,7 +11,7 @@ dist/
.mypy_cache/
*.db
*.R
*.csv
migrations/

View File

@ -4,9 +4,8 @@
# Define Python and pip executables inside virtual environment
PYTHON := venv/bin/python
PIP := venv/bin/pip
# Celery worker command
CELERY := venv/bin/celery
FLASK := venv/bin/flask
# Default target that runs the application
all: run
@ -100,15 +99,15 @@ venv:
# Run the application in debug mode
run: venv
venv/bin/flask --app scipaperloader --debug run
$(FLASK) --app scipaperloader --debug run
# Format code using Black and isort
format:
format: venv
venv/bin/black .
venv/bin/isort .
# Check if code meets formatting standards
format-check:
format-check: venv
venv/bin/black --check .
venv/bin/isort --check .
@ -116,15 +115,15 @@ format-check:
reformat: format lint
# Check code for style issues using flake8
lint:
lint: venv
venv/bin/flake8 .
# Run static type checking with mypy
mypy:
mypy: venv
venv/bin/mypy scipaperloader
# Run the test suite
test:
test: venv
venv/bin/pytest
# Build distribution package after running checks
@ -134,20 +133,62 @@ dist: format-check lint mypy test
# Set up complete development environment
dev: clean venv
# Start Celery worker for processing tasks
celery: venv
# Start Celery worker - PURGE FIRST
celery: venv redis
@echo "Purging Celery task queue before starting worker..."
# Purge the queue forcefully. Ignore errors if queue is empty/unreachable initially.
@-$(CELERY) -A celery_worker:celery purge -f
@echo "Starting Celery worker..."
$(CELERY) -A celery_worker:celery worker --loglevel=info
# Monitor Celery tasks with flower web interface
celery-flower: venv
$(PIP) install flower
$(CELERY) -A celery_worker:celery flower --port=5555
# Run Celery beat scheduler for periodic tasks
celery-beat: venv redis
@echo "Starting Celery beat scheduler..."
# Ensure celerybeat-schedule file is removed for clean start if needed
@-rm -f celerybeat-schedule.db
# Use the default file-based scheduler (removed the --scheduler flag)
$(CELERY) -A celery_worker:celery beat --loglevel=info
# Check if Redis is running, start if needed
redis:
@redis-cli ping > /dev/null 2>&1 || (echo "Starting Redis server..." && redis-server --daemonize yes)
@if ! redis-cli ping > /dev/null 2>&1; then \
echo "Starting Redis server..."; \
redis-server --daemonize yes; \
sleep 1; \
else \
echo "Redis is already running."; \
fi
# Run complete application stack (Flask app + Celery worker + Redis)
# Run complete application stack (Flask app + Celery worker + Redis + Beat scheduler)
run-all: redis
@echo "Starting Flask and Celery..."
@$(MAKE) -j2 run celery
@echo "Starting Flask, Celery worker and Beat scheduler..."
# Run them in parallel. Ctrl+C will send SIGINT to make, which propagates.
# Use trap to attempt cleanup, but primary cleanup is purge on next start.
@trap '$(MAKE) stop-all;' INT TERM; \
$(MAKE) -j3 run celery celery-beat & wait
# Stop running Celery worker and beat gracefully
stop-celery:
@echo "Attempting graceful shutdown of Celery worker and beat..."
@-pkill -TERM -f "celery -A celery_worker:celery worker" || echo "Worker not found or already stopped."
@-pkill -TERM -f "celery -A celery_worker:celery beat" || echo "Beat not found or already stopped."
@sleep 1 # Give processes a moment to terminate
@echo "Purging remaining tasks from Celery queue..."
@-$(CELERY) -A celery_worker:celery purge -f || echo "Purge failed or queue empty."
# Stop Flask development server
stop-flask:
@echo "Attempting shutdown of Flask development server..."
@-pkill -TERM -f "flask --app scipaperloader --debug run" || echo "Flask server not found or already stopped."
# Stop all components potentially started by run-all
stop-all: stop-celery stop-flask
@echo "All components stopped."
# Default target
all: run

View File

@ -1,4 +1,6 @@
from scipaperloader.celery import celery, configure_celery
# Import all task modules to ensure they are registered with Celery
import scipaperloader.blueprints.scraper # Import the scraper module with our tasks
# Configure celery with Flask app
configure_celery()

View File

@ -26,6 +26,7 @@ dev = [
"black>=24.2.0,<25",
"isort>=5.13.1,<6",
"mypy>=1.8.0,<2",
"djlint>=1.36.4,<2",
]
[tool.setuptools.package-data]

View File

@ -4,10 +4,10 @@ from flask import Flask
from .main import bp as main_bp
from .papers import bp as papers_bp
from .upload import bp as upload_bp
from .schedule import bp as schedule_bp
from .logger import bp as logger_bp
from .api import bp as api_bp
from .scraper import bp as scraper_bp
from .config import bp as config_bp
def register_blueprints(app: Flask):
@ -15,7 +15,7 @@ def register_blueprints(app: Flask):
app.register_blueprint(main_bp)
app.register_blueprint(papers_bp, url_prefix='/papers')
app.register_blueprint(upload_bp, url_prefix='/upload')
app.register_blueprint(schedule_bp, url_prefix='/schedule')
app.register_blueprint(logger_bp, url_prefix='/logs')
app.register_blueprint(api_bp, url_prefix='/api')
app.register_blueprint(scraper_bp, url_prefix='/scraper')
app.register_blueprint(scraper_bp, url_prefix='/scraper')
app.register_blueprint(config_bp)

View File

@ -0,0 +1,364 @@
"""Configuration management blueprint."""
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
from ..db import db
# Import the new model
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
from ..defaults import MAX_VOLUME
import os # Import os for path validation
bp = Blueprint("config", __name__, url_prefix="/config")
# Helper functions for configuration updates
def _update_volume(new_volume):
"""
Helper function to update volume configuration.
Args:
new_volume (float): The new volume value
Returns:
tuple: (success, message, volume_config)
"""
try:
new_volume = float(new_volume)
if new_volume <= 0 or new_volume > MAX_VOLUME:
return False, f"Volume must be between 1 and {MAX_VOLUME}", None
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
old_value = volume_config.volume
volume_config.volume = new_volume
ActivityLog.log_config_change(
config_key="scraper_volume",
old_value=old_value,
new_value=new_volume,
description="Updated scraper volume"
)
db.session.commit()
return True, "Volume updated successfully!", volume_config
except (ValueError, TypeError) as e:
db.session.rollback()
return False, f"Error updating volume: {str(e)}", None
# Add helper for download path
def _update_download_path(new_path):
"""
Helper function to update download path configuration.
Args:
new_path (str): The new download path
Returns:
tuple: (success, message, download_path_config)
"""
try:
# Basic validation: check if it's a non-empty string
if not new_path or not isinstance(new_path, str):
return False, "Download path cannot be empty.", None
# --- Add more validation like checking if path exists or is writable ---
# Check if the path exists and is a directory
if not os.path.isdir(new_path):
# Try to create it if it doesn't exist
try:
os.makedirs(new_path, exist_ok=True)
ActivityLog.log_system_activity(
action="create_directory",
status="info",
description=f"Created download directory: {new_path}"
)
except OSError as e:
ActivityLog.log_system_activity(
action="create_directory",
status="error",
description=f"Failed to create download directory: {new_path}, Error: {str(e)}"
)
return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
# Check if the path is writable
if not os.access(new_path, os.W_OK):
ActivityLog.log_system_activity(
action="check_directory_permissions",
status="error",
description=f"Download path '{new_path}' is not writable."
)
return False, f"Path '{new_path}' exists but is not writable by the application.", None
# --- End of validation ---
config = DownloadPathConfig.query.first()
if not config:
config = DownloadPathConfig(path=new_path)
db.session.add(config)
else:
old_value = config.path
config.path = new_path
ActivityLog.log_config_change(
config_key="download_path",
old_value=old_value,
new_value=new_path,
description="Updated download path"
)
db.session.commit()
return True, "Download path updated successfully!", config
except Exception as e:
db.session.rollback()
return False, f"Error updating download path: {str(e)}", None
def _update_schedule(schedule_data):
"""
Helper function to update schedule configuration.
Args:
schedule_data (dict): Dictionary with hour:weight pairs
Returns:
tuple: (success, message)
"""
try:
# Validate all entries first
for hour_str, weight in schedule_data.items():
try:
hour = int(hour_str)
weight = float(weight)
if hour < 0 or hour > 23:
return False, f"Hour value must be between 0 and 23, got {hour}"
if weight < 0.1 or weight > 5:
return False, f"Weight for hour {hour} must be between 0.1 and 5, got {weight}"
except ValueError:
return False, f"Invalid data format for hour {hour_str}"
# Update schedule after validation
for hour_str, weight in schedule_data.items():
hour = int(hour_str)
weight = float(weight)
config = ScheduleConfig.query.get(hour)
if not config:
config = ScheduleConfig(hour=hour, weight=weight)
db.session.add(config)
else:
old_value = config.weight
config.weight = weight
ActivityLog.log_config_change(
config_key=f"schedule_hour_{hour}",
old_value=old_value,
new_value=weight,
description=f"Updated schedule weight for hour {hour}"
)
db.session.commit()
return True, "Schedule updated successfully!"
except Exception as e:
db.session.rollback()
return False, f"Error updating schedule: {str(e)}"
@bp.route("/")
@bp.route("/general")
def general():
"""Show general configuration page."""
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=100) # Default value
db.session.add(volume_config)
db.session.commit()
# Fetch download path config
download_path_config = DownloadPathConfig.query.first()
if not download_path_config:
download_path_config = DownloadPathConfig() # Use default from model
db.session.add(download_path_config)
db.session.commit()
return render_template(
"config/index.html.jinja",
active_tab="general",
volume_config=volume_config,
download_path_config=download_path_config, # Pass to template
max_volume=MAX_VOLUME,
app_title="Configuration"
)
@bp.route("/schedule")
def schedule():
"""Show schedule configuration page."""
# Ensure we have schedule config for all hours
existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
schedule_config = {}
for hour in range(24):
if hour in existing_hours:
schedule_config[hour] = existing_hours[hour].weight
else:
# Create default schedule entry (weight 1.0)
new_config = ScheduleConfig(hour=hour, weight=1.0)
db.session.add(new_config)
schedule_config[hour] = 1.0
if len(existing_hours) < 24:
db.session.commit()
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=100) # Default value
db.session.add(volume_config)
db.session.commit()
return render_template(
"config/index.html.jinja",
active_tab="schedule",
schedule=schedule_config,
volume=volume_config.volume,
max_volume=MAX_VOLUME,
app_title="Configuration"
)
# Remove old update_volume route
# @bp.route("/update/volume", methods=["POST"])
# def update_volume(): ...
# Add new route to handle general settings form
@bp.route("/update/general", methods=["POST"])
def update_general():
"""Update general configuration (Volume and Download Path)."""
volume_success, volume_message = True, ""
path_success, path_message = True, ""
# Update Volume
new_volume = request.form.get("total_volume")
if new_volume is not None:
volume_success, volume_message, _ = _update_volume(new_volume)
if volume_success:
flash(volume_message, "success")
else:
flash(volume_message, "error")
# Update Download Path
new_path = request.form.get("download_path")
if new_path is not None:
path_success, path_message, _ = _update_download_path(new_path)
if path_success:
flash(path_message, "success")
else:
flash(path_message, "error")
return redirect(url_for("config.general"))
@bp.route("/update/schedule", methods=["POST"])
def update_schedule():
"""Update schedule configuration."""
schedule_data = {}
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
flash(f"Missing data for hour {hour}", "error")
return redirect(url_for("config.schedule"))
schedule_data[str(hour)] = request.form.get(key, 0)
success, message = _update_schedule(schedule_data)
if success:
flash(message, "success")
else:
flash(message, "error")
return redirect(url_for("config.schedule"))
@bp.route("/api/schedule/stats")
def schedule_stats():
"""Get statistics about the current schedule configuration."""
volume_config = VolumeConfig.query.first()
if not volume_config:
return jsonify({"error": "No volume configuration found"})
total_volume = volume_config.volume
schedule_configs = ScheduleConfig.query.all()
if not schedule_configs:
return jsonify({"error": "No schedule configuration found"})
# Calculate total weight
total_weight = sum(config.weight for config in schedule_configs)
# Calculate papers per hour
papers_per_hour = {}
hourly_weights = {}
for config in schedule_configs:
weight_ratio = config.weight / total_weight if total_weight > 0 else 0
papers = weight_ratio * total_volume
papers_per_hour[config.hour] = papers
hourly_weights[config.hour] = config.weight
return jsonify({
"total_volume": total_volume,
"total_weight": total_weight,
"papers_per_hour": papers_per_hour,
"hourly_weights": hourly_weights
})
@bp.route("/api/update_config", methods=["POST"])
def api_update_config():
"""API endpoint to update configuration."""
data = request.json
response = {"success": True, "updates": []}
try:
# Update volume if provided
if "volume" in data:
success, message, _ = _update_volume(data["volume"])
response["updates"].append({
"type": "volume",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update download path if provided
if "download_path" in data:
success, message, _ = _update_download_path(data["download_path"])
response["updates"].append({
"type": "download_path",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update schedule if provided
if "schedule" in data:
success, message = _update_schedule(data["schedule"])
response["updates"].append({
"type": "schedule",
"success": success,
"message": message
})
if not success:
response["success"] = False
return jsonify(response)
except Exception as e:
db.session.rollback()
return jsonify({
"success": False,
"message": f"Unexpected error: {str(e)}"
})

View File

@ -1,79 +0,0 @@
"""Schedule configuration routes."""
from flask import Blueprint, flash, render_template, request
from ..db import db
from ..models import ScheduleConfig, VolumeConfig
bp = Blueprint("schedule", __name__)
@bp.route("/", methods=["GET", "POST"])
def schedule():
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if "total_volume" in request.form:
# Volume update
try:
new_volume = float(request.form.get("total_volume", 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
else:
# Schedule update logic
# Validate form data
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5"
)
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error")
schedule = {
sc.hour: sc.weight
for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
}
volume = VolumeConfig.query.first()
return render_template(
"schedule.html.jinja",
schedule=schedule,
volume=volume.volume if volume else 0,
app_title="PaperScraper",
)

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@ from flask import (
from ..db import db
from ..models import PaperMetadata, ActivityLog
from ..celery import celery # Import the celery instance directly
from ..defaults import DUPLICATE_STRATEGIES
bp = Blueprint("upload", __name__)
@ -55,10 +56,10 @@ def upload():
return jsonify({"task_id": task.id})
return render_template("upload.html.jinja")
return render_template("upload.html.jinja", duplicate_strategies=DUPLICATE_STRATEGIES)
@celery.task(bind=True)
def process_csv(self, file_content, delimiter, duplicate_strategy):
def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
"""Process CSV file and import paper metadata."""
# With the ContextTask in place, we're already inside an app context

View File

@ -1,4 +1,5 @@
from celery import Celery
from celery.schedules import crontab
# Create Celery instance without Flask app initially
celery = Celery(
@ -29,6 +30,14 @@ def configure_celery(app=None):
worker_max_memory_per_child=1000000, # 1GB memory limit
task_acks_late=True, # Acknowledge tasks after completion
task_reject_on_worker_lost=True, # Requeue tasks if worker dies
# Configure Beat schedule for periodic tasks
beat_schedule={
'scheduled-scraper-hourly': {
'task': 'scipaperloader.blueprints.scraper.dummy_scheduled_scraper',
'schedule': crontab(minute=0), # Run at the start of every hour
'options': {'expires': 3600}
},
}
)
# Create a custom task class that pushes the Flask application context

View File

@ -1 +1,25 @@
DEBUG = False # make sure DEBUG is off unless enabled explicitly otherwise
# Define duplicate handling strategies with descriptions for the UI
DUPLICATE_STRATEGIES = {
"skip": {
"name": "Skip duplicates",
"description": "Skip papers that already exist in the database",
"is_default": True
},
"update": {
"name": "Update duplicates",
"description": "Update existing papers with new metadata",
"is_default": False
},
# Add new strategies here, they will automatically appear in the UI
# Example:
# "merge": {
# "name": "Merge duplicates",
# "description": "Merge new data with existing data, keeping both values",
# "is_default": False
# }
}
# Configuration limits
MAX_VOLUME = 100000 # Maximum volume limit for scraper configuration

View File

@ -210,6 +210,72 @@ class VolumeConfig(db.Model):
id = db.Column(db.Integer, primary_key=True)
volume = db.Column(db.Float) # volume of papers to scrape per day
class DownloadPathConfig(db.Model):
"""Model to store the base path for downloaded files."""
id = db.Column(db.Integer, primary_key=True)
path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path
@classmethod
def get_path(cls):
"""Get the configured download path, creating default if needed."""
config = cls.query.first()
if not config:
config = cls(path="/path/to/dummy/papers") # Ensure default exists
db.session.add(config)
db.session.commit()
return config.path
@classmethod
def set_path(cls, new_path):
"""Set the download path."""
config = cls.query.first()
if not config:
config = cls(path=new_path)
db.session.add(config)
else:
config.path = new_path
db.session.commit()
return config
class ScraperState(db.Model):
"""Model to store the current state of the scraper."""
id = db.Column(db.Integer, primary_key=True)
is_active = db.Column(db.Boolean, default=False)
is_paused = db.Column(db.Boolean, default=False)
last_updated = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
@classmethod
def get_current_state(cls):
"""Get the current scraper state, creating it if it doesn't exist."""
state = cls.query.first()
if not state:
state = cls(is_active=False, is_paused=False)
db.session.add(state)
db.session.commit()
return state
@classmethod
def set_active(cls, active):
"""Set the active state of the scraper."""
state = cls.get_current_state()
state.is_active = active
db.session.commit()
return state
@classmethod
def set_paused(cls, paused):
"""Set the paused state of the scraper."""
state = cls.get_current_state()
state.is_paused = paused
db.session.commit()
return state
@classmethod
def is_scraper_active(cls):
"""Check if the scraper is active."""
state = cls.get_current_state()
return state.is_active and not state.is_paused
def init_schedule_config():
"""Initialize ScheduleConfig with default values if empty"""
@ -241,3 +307,9 @@ def init_schedule_config():
default_volume = VolumeConfig(volume=100)
db.session.add(default_volume)
db.session.commit()
# Initialize DownloadPathConfig if it doesn't exist
if DownloadPathConfig.query.count() == 0:
default_path = DownloadPathConfig(path="/path/to/dummy/papers")
db.session.add(default_path)
db.session.commit()

View File

@ -1,21 +0,0 @@
import time
from .db import db
from .models import PaperMetadata
def run_scraper():
while True:
with db.app.app_context():
paper = Paper.query.filter_by(status="Pending").first()
if paper:
try:
# Scraping logic (e.g. download PDF)
paper.status = "Done"
paper.file_path = "papers/some_path.pdf"
except Exception as e:
paper.status = "Failed"
paper.error_message = str(e)
db.session.commit()
else:
time.sleep(60)

View File

@ -16,6 +16,8 @@
{% include "nav.html.jinja" %}
<main class="container my-5">{% block content %}{% endblock content %}</main>
{% include "footer.html.jinja" %}
{% block scripts %}{% endblock scripts %}
</body>
</html>

View File

@ -0,0 +1,61 @@
<!-- General Configuration Tab -->
<div class="tab-pane active">
<div class="config-form">
<div class="card">
<div class="card-header">
<h5>General Configuration</h5>
</div>
<div class="card-body">
<!-- include flash messages template -->
{% include "partials/flash_messages.html.jinja" %}
<form action="{{ url_for('config.update_general') }}" method="post">
<div class="form-section">
<h6>Scraper Volume</h6>
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
<div class="mb-3">
<label for="totalVolume" class="form-label">Papers per day:</label>
<input type="number" class="form-control" id="totalVolume" name="total_volume" min="1"
max="{{ max_volume }}" value="{{ volume_config.volume }}" required>
<div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
</div>
</div>
<div class="form-section">
<h6>Download Path</h6>
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
<div class="mb-3">
<label for="downloadPath" class="form-label">Download Directory:</label>
<input type="text" class="form-control" id="downloadPath" name="download_path"
value="{{ download_path_config.path }}" required>
<div class="form-text">Enter the full path to the download directory (e.g., /data/papers).
Ensure the directory exists and the application has write permissions.</div>
</div>
</div>
<div class="form-section">
<h6>System Settings</h6>
<p class="text-muted">Configure general system behavior.</p>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableNotifications" checked>
<label class="form-check-label" for="enableNotifications">
Enable email notifications
</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableLogging" checked>
<label class="form-check-label" for="enableLogging">
Enable detailed activity logging
</label>
</div>
</div>
<button type="submit" class="btn btn-primary">Save General Settings</button>
</form>
</div>
</div>
</div>
</div>

View File

@ -0,0 +1,50 @@
{% extends "base.html.jinja" %}
{% block title %}Configuration{% endblock title %}
{% block styles %}
{{ super() }}
<style>
.nav-tabs .nav-link {
color: #495057;
}
.nav-tabs .nav-link.active {
font-weight: bold;
}
.config-form {
max-width: 800px;
margin: 0 auto;
}
.form-section {
margin-bottom: 2rem;
}
</style>
{% endblock styles %}
{% block content %}
<div class="container mt-4">
<h1>Configuration</h1>
<ul class="nav nav-tabs mb-4">
<li class="nav-item">
<a class="nav-link {% if active_tab == 'general' %}active{% endif %}"
href="{{ url_for('config.general') }}">General</a>
</li>
<li class="nav-item">
<a class="nav-link {% if active_tab == 'schedule' %}active{% endif %}"
href="{{ url_for('config.schedule') }}">Schedule</a>
</li>
</ul>
<div class="tab-content">
{% if active_tab == 'general' %}
{% include "config/general.html.jinja" %}
{% elif active_tab == 'schedule' %}
{% include "config/schedule.html.jinja" %}
{% endif %}
</div>
</div>
{% endblock content %}

View File

@ -0,0 +1,285 @@
<style>
.timeline {
display: flex;
flex-wrap: wrap;
gap: 3px;
user-select: none;
/* Prevent text selection during drag */
}
.hour-block {
width: 49px;
height: 70px;
/* Increased height to fit additional text */
border-radius: 5px;
text-align: center;
line-height: 1.2;
font-size: 0.9rem;
padding-top: 6px;
cursor: pointer;
user-select: none;
transition: background-color 0.2s ease-in-out;
margin: 1px;
}
.hour-block.selected {
outline: 2px solid #4584b8;
}
.papers {
font-size: 0.7rem;
margin-top: 2px;
}
</style>
<script>
const initialSchedule = {{ schedule | tojson }};
const totalVolume = {{ volume }};
</script>
<div x-data="scheduleManager(initialSchedule, totalVolume)" class="tab-pane active">
<div class="card">
<div class="card-header">
<h5>Scheduling Configuration</h5>
</div>
<div class="card-body">
<!-- include flash messages template -->
{% include "partials/flash_messages.html.jinja" %}
<!-- Content -->
<div class="mb-3">
<h3>How it Works</h3>
<p class="text-muted mb-0">
This page allows you to configure the daily volume of papers to be
downloaded and the hourly download weights for the papers. The weights
determine how many papers will be downloaded during each hour of the day.
The total volume (<strong x-text="volume"></strong> papers/day) is split
across all hours based on their relative weights. Each weight controls the
proportion of papers downloaded during that hour. Click to select one or
more hours below. Then assign a weight to them using the input and apply
it. Color indicates relative intensity. The total daily volume will be
split proportionally across these weights.
<strong>Don't forget to submit the changes!</strong>
</p>
<h3>Example</h3>
<p class="text-muted mb-0">
If the total volume is <strong>240 papers</strong> and hours are
<strong>weighted as 1.0, 2.0, and 3.0</strong>, they will receive
<strong>40, 80, and 120 papers</strong> respectively.
</p>
</div>
<h2 class="mt-4">Volume</h2>
<div class="align-items-start flex-wrap gap-2">
<p class="text-muted">
The total volume of data to be downloaded each day is
<strong x-text="volume"></strong> papers.
</p>
<div class="d-flex align-items-center mb-3" x-data="{ volumeValue: volume }">
<div class="input-group w-50">
<label class="input-group-text">Papers per day:</label>
<input type="number" class="form-control" x-model="volumeValue" min="1" max="{{ max_volume }}"
required />
<button type="button" class="btn btn-primary" @click="updateVolume()">
Update Volume
</button>
</div>
</div>
</div>
<h2 class="mt-4">Current Schedule</h2>
<form x-data id="scheduleForm">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour" :style="getBackgroundStyle(hour)"
:class="{'selected': isSelected(hour)}" @mousedown="startDrag($event, hour)"
@mouseover="dragSelect(hour)">
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers">
<span x-text="getPapersPerHour(hour)"></span> p.
</div>
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]" />
</div>
</template>
</div>
<div class="input-group mb-4 w-50">
<label class="input-group-text">Set Weight:</label>
<input type="number" step="0.1" min="0" max="5" x-model="newWeight" class="form-control" />
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
Apply to Selected
</button>
</div>
<div class="d-flex justify-content-between">
<a href="{{ url_for('config.general') }}" class="btn btn-outline-secondary">⬅ Back</a>
<button type="button" class="btn btn-success" @click="saveSchedule()">💾 Save Schedule</button>
</div>
</form>
</div>
</div>
</div>
<script>
function scheduleManager(initial, volume) {
return {
schedule: { ...initial },
volume: volume,
selectedHours: [],
newWeight: 1.0,
volumeValue: volume,
isDragging: false,
dragOperation: null,
formatHour(h) {
return String(h).padStart(2, "0") + ":00";
},
updateVolume() {
fetch('{{ url_for('config.api_update_config') }}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
volume: this.volumeValue
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
this.volume = parseFloat(this.volumeValue);
showFlashMessage('Volume updated successfully!', 'success');
} else {
showFlashMessage(data.updates?.[0]?.message || 'Error updating volume', 'error');
}
})
.catch(error => {
console.error('Error:', error);
showFlashMessage('Network error occurred', 'error');
});
},
getBackgroundStyle(hour) {
const weight = parseFloat(this.schedule[hour]);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
getBackgroundStyleFromValue(value) {
const weight = parseFloat(value);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
startDrag(event, hour) {
event.preventDefault();
this.isDragging = true;
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
this.toggleSelect(hour);
},
dragSelect(hour) {
if (!this.isDragging) return;
const selected = this.isSelected(hour);
if (this.dragOperation === "add" && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === "remove" && selected) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
}
},
endDrag() {
this.isDragging = false;
},
toggleSelect(hour) {
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) {
return this.selectedHours.includes(hour);
},
applyWeight() {
this.selectedHours.forEach((hour) => {
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
});
this.selectedHours = [];
},
getTotalWeight() {
return Object.values(this.schedule).reduce(
(sum, w) => sum + parseFloat(w),
0
);
},
getPapersPerHour(hour) {
const total = this.getTotalWeight();
if (total === 0) return 0;
return (
(parseFloat(this.schedule[hour]) / total) *
this.volume
).toFixed(1);
},
saveSchedule() {
fetch('{{ url_for('config.api_update_config') }}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
schedule: this.schedule
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
showFlashMessage('Schedule updated successfully!', 'success');
} else {
showFlashMessage(data.updates?.[0]?.message || 'Error updating schedule', 'error');
}
})
.catch(error => {
console.error('Error:', error);
showFlashMessage('Network error occurred', 'error');
});
}
};
}
</script>

View File

@ -58,7 +58,7 @@
volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or
usage pattern.
</p>
<a href="{{ url_for('schedule.schedule') }}" class="btn btn-sm btn-outline-warning">Adjust Schedule</a>
<a href="{{ url_for('config.schedule') }}" class="btn btn-sm btn-outline-warning">Adjust Schedule</a>
</div>
</div>
</div>

View File

@ -17,7 +17,7 @@
<a class="nav-link" href="{{ url_for('papers.list_papers') }}">Papers</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{ url_for('schedule.schedule') }}">Schedule</a>
<a class="nav-link" href="{{ url_for('config.general') }}">Configuration</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-bs-toggle="dropdown"

View File

@ -192,9 +192,12 @@
<td>
<a href="https://doi.org/{{ paper.doi }}" target="_blank" class="icon-link icon-link-hover">
{{ paper.doi }}
<svg xmlns="http://www.w3.org/2000/svg" class="bi" viewBox="0 0 16 16" aria-hidden="true">
<path
d="M1 8a.5.5 0 0 1 .5-.5h11.793l-3.147-3.146a.5.5 0 0 1 .708-.708l4 4a.5.5 0 0 1 0 .708l-4 4a.5.5 0 0 1-.708-.708L13.293 8.5H1.5A.5.5 0 0 1 1 8z" />
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
class="bi bi-box-arrow-up-left" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M7.364 3.5a.5.5 0 0 1 .5-.5H14.5A1.5 1.5 0 0 1 16 4.5v10a1.5 1.5 0 0 1-1.5 1.5h-10A1.5 1.5 0 0 1 3 14.5V7.864a.5.5 0 1 1 1 0V14.5a.5.5 0 0 0 .5.5h10a.5.5 0 0 0 .5-.5v-10a.5.5 0 0 0-.5-.5H7.864a.5.5 0 0 1-.5-.5" />
<path fill-rule="evenodd"
d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 0 1H1.707l8.147 8.146a.5.5 0 0 1-.708.708L1 1.707V5.5a.5.5 0 0 1-1 0z" />
</svg>
</a>
</td>
@ -203,9 +206,12 @@
<a href="https://search.worldcat.org/search?q=issn:{{ paper.issn }}" target="_blank"
class="icon-link icon-link-hover">
{{ paper.issn }}
<svg xmlns="http://www.w3.org/2000/svg" class="bi" viewBox="0 0 16 16" aria-hidden="true">
<path
d="M1 8a.5.5 0 0 1 .5-.5h11.793l-3.147-3.146a.5.5 0 0 1 .708-.708l4 4a.5.5 0 0 1 0 .708l-4 4a.5.5 0 0 1-.708-.708L13.293 8.5H1.5A.5.5 0 0 1 1 8z" />
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
class="bi bi-box-arrow-up-left" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M7.364 3.5a.5.5 0 0 1 .5-.5H14.5A1.5 1.5 0 0 1 16 4.5v10a1.5 1.5 0 0 1-1.5 1.5h-10A1.5 1.5 0 0 1 3 14.5V7.864a.5.5 0 1 1 1 0V14.5a.5.5 0 0 0 .5.5h10a.5.5 0 0 0 .5-.5v-10a.5.5 0 0 0-.5-.5H7.864a.5.5 0 0 1-.5-.5" />
<path fill-rule="evenodd"
d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 0 1H1.707l8.147 8.146a.5.5 0 0 1-.708.708L1 1.707V5.5a.5.5 0 0 1-1 0z" />
</svg>
</a>
</td>

View File

@ -0,0 +1,93 @@
<!-- Server-side flash messages from Flask -->
{% with messages = get_flashed_messages(with_categories=true) %}
{% if messages %}
<div class="server-flash-messages">
{% for category, message in messages %}
<div class="alert alert-{{ category }} alert-dismissible fade show" role="alert">
{{ message }}
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
</div>
{% endfor %}
</div>
{% endif %}
{% endwith %}
<!-- JavaScript flash message container for client-side messages -->
<div id="clientFlashContainer"></div>
<style>
.client-flash-message {
position: fixed;
top: 30%;
left: 50%;
transform: translate(-50%, -50%);
z-index: 1000;
width: 300px;
text-align: center;
font-weight: bold;
padding: 12px;
margin-bottom: 20px;
border-radius: 6px;
opacity: 1;
transition: opacity 5s ease-in-out;
}
.client-flash-message.success {
background-color: #d4edda;
border-color: #c3e6cb;
color: #155724;
}
.client-flash-message.error {
background-color: #f8d7da;
border-color: #f5c6cb;
color: #721c24;
}
.client-flash-message.info {
background-color: #d1ecf1;
border-color: #bee5eb;
color: #0c5460;
}
.client-flash-message.warning {
background-color: #fff3cd;
border-color: #ffeeba;
color: #856404;
}
.client-flash-message.fade {
opacity: 0;
}
</style>
<script>
// Global flash message function that can be used from anywhere
function showFlashMessage(message, type = 'success', duration = 5000) {
const flashMsg = document.createElement('div');
flashMsg.className = `client-flash-message ${type}`;
flashMsg.textContent = message;
const container = document.getElementById('clientFlashContainer');
container.appendChild(flashMsg);
// Apply fade effect after some time
setTimeout(() => flashMsg.classList.add('fade'), duration - 3000);
// Remove element after duration
setTimeout(() => flashMsg.remove(), duration);
return flashMsg;
}
// Initialize toast messages if Bootstrap is used
document.addEventListener('DOMContentLoaded', function () {
// Initialize any Bootstrap toasts if they exist
if (typeof bootstrap !== 'undefined' && bootstrap.Toast) {
const toastElList = [].slice.call(document.querySelectorAll('.toast'));
toastElList.map(function (toastEl) {
return new bootstrap.Toast(toastEl);
});
}
});
</script>

View File

@ -1,270 +0,0 @@
{% extends "base.html.jinja" %} {% block content %}
<style>
.timeline {
display: flex;
flex-wrap: wrap;
gap: 3px;
user-select: none;
/* Prevent text selection during drag */
}
.hour-block {
width: 49px;
height: 70px;
/* Increased height to fit additional text */
border-radius: 5px;
text-align: center;
line-height: 1.2;
font-size: 0.9rem;
padding-top: 6px;
cursor: pointer;
user-select: none;
transition: background-color 0.2s ease-in-out;
margin: 1px;
}
.hour-block.selected {
outline: 2px solid #4584b8;
}
.papers {
font-size: 0.7rem;
margin-top: 2px;
}
.flash-message {
position: fixed;
top: 30%;
left: 50%;
transform: translate(-50%, -50%);
z-index: 1000;
width: 300px;
text-align: center;
font-weight: bold;
padding: 12px;
margin-bottom: 20px;
border-radius: 6px;
opacity: 1;
transition: opacity 5s ease-in-out;
}
.flash-message.success {
background-color: #d4edda;
border-color: #c3e6cb;
color: #155724;
}
.flash-message.error {
background-color: #f8d7da;
border-color: #f5c6cb;
color: #721c24;
}
.flash-message.fade {
opacity: 0;
}
</style>
<script>
const initialSchedule = {{ schedule | tojson }};
const totalVolume = {{ volume }};
</script>
<div x-data="scheduleManager(initialSchedule, totalVolume)" class="container">
<h1 class="mb-4">🕒 Configure Hourly Download Weights</h1>
<!-- Flash Messages -->
{% with messages = get_flashed_messages(with_categories=true) %} {% if
messages %}
<div id="flash-messages">
{% for category, message in messages %}
<div class="flash-message {{ category }}" x-data="{}"
x-init="setTimeout(() => $el.classList.add('fade'), 100); setTimeout(() => $el.remove(), 5000)">
{{ message }}
</div>
{% endfor %}
</div>
{% endif %} {% endwith %}
<!-- Content -->
<div class="mb-3">
<h3>How it Works</h3>
<p class="text-muted mb-0">
This page allows you to configure the daily volume of papers to be
downloaded and the hourly download weights for the papers. The weights
determine how many papers will be downloaded during each hour of the day.
The total volume (<strong x-text="volume"></strong> papers/day) is split
across all hours based on their relative weights. Each weight controls the
proportion of papers downloaded during that hour. Click to select one or
more hours below. Then assign a weight to them using the input and apply
it. Color indicates relative intensity. The total daily volume will be
split proportionally across these weights.
<strong>Don't forget to submit the changes!</strong>
</p>
<h3>Example</h3>
<p class="text-muted mb-0">
If the total volume is <strong>240 papers</strong> and hours are
<strong>weighted as 1.0, 2.0, and 3.0</strong>, they will receive
<strong>40, 80, and 120 papers</strong> respectively.
</p>
</div>
<h2 class="mt-4">Volume</h2>
<div class="align-items-start flex-wrap gap-2">
<p class="text-muted">
The total volume of data to be downloaded each day is
<strong x-text="volume"></strong> papers.
</p>
<div class="d-flex align-items-center mb-3">
<form method="post" action="{{ url_for('schedule.schedule') }}" class="input-group w-50">
<label class="input-group-text">Papers per day:</label>
<input type="number" class="form-control" name="total_volume" value="{{ volume }}" min="1" max="1000"
required />
<button type="submit" class="btn btn-primary">Update Volume</button>
</form>
</div>
</div>
<h2 class="mt-4">Current Schedule</h2>
<form method="post" action="{{ url_for('schedule.schedule') }}">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour" :style="getBackgroundStyle(hour)"
:class="{'selected': isSelected(hour)}" @mousedown="startDrag($event, hour)" @mouseover="dragSelect(hour)">
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers">
<span x-text="getPapersPerHour(hour)"></span> p.
</div>
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]" />
</div>
</template>
</div>
<div class="input-group mb-4 w-50">
<label class="input-group-text">Set Weight:</label>
<input type="number" step="0.1" min="0" max="5" x-model="newWeight" class="form-control" />
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
Apply to Selected
</button>
</div>
<div class="d-flex justify-content-between">
<a href="{{ url_for('main.index') }}" class="btn btn-outline-secondary">⬅ Back</a>
<button type="submit" class="btn btn-success">💾 Save Schedule</button>
</div>
</form>
</div>
<script>
function scheduleManager(initial, volume) {
return {
schedule: { ...initial },
volume: volume,
selectedHours: [],
newWeight: 1.0,
isDragging: false,
dragOperation: null,
formatHour(h) {
return String(h).padStart(2, "0") + ":00";
},
getBackgroundStyle(hour) {
const weight = parseFloat(this.schedule[hour]);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
getBackgroundStyleFromValue(value) {
const weight = parseFloat(value);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
startDrag(event, hour) {
event.preventDefault();
this.isDragging = true;
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
this.toggleSelect(hour);
},
dragSelect(hour) {
if (!this.isDragging) return;
const selected = this.isSelected(hour);
if (this.dragOperation === "add" && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === "remove" && selected) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
}
},
endDrag() {
this.isDragging = false;
},
toggleSelect(hour) {
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) {
return this.selectedHours.includes(hour);
},
applyWeight() {
this.selectedHours.forEach((hour) => {
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
});
this.selectedHours = [];
},
getTotalWeight() {
return Object.values(this.schedule).reduce(
(sum, w) => sum + parseFloat(w),
0
);
},
getPapersPerHour(hour) {
const total = this.getTotalWeight();
if (total === 0) return 0;
return (
(parseFloat(this.schedule[hour]) / total) *
this.volume
).toFixed(1);
},
};
}
</script>
{% endblock content %}

View File

@ -36,51 +36,6 @@
max-width: 350px;
z-index: 1050;
}
/* Enhanced scheduler styles */
.timeline {
display: flex;
flex-wrap: wrap;
gap: 3px;
user-select: none;
}
.hour-block {
width: 49px;
height: 70px;
border-radius: 5px;
text-align: center;
line-height: 1.2;
font-size: 0.9rem;
padding-top: 6px;
cursor: pointer;
user-select: none;
transition: background-color 0.2s ease-in-out;
margin: 1px;
}
.hour-block.selected {
outline: 2px solid #4584b8;
}
.papers {
font-size: 0.7rem;
margin-top: 2px;
}
/* Tab styles */
.nav-tabs .nav-link {
color: #495057;
}
.nav-tabs .nav-link.active {
font-weight: bold;
color: #007bff;
}
.tab-pane {
padding-top: 1rem;
}
</style>
{% endblock styles %}
@ -88,337 +43,113 @@
<div class="container mt-4">
<h1>Paper Scraper Control Panel</h1>
<!-- Navigation tabs -->
<ul class="nav nav-tabs mb-4" id="scraperTabs" role="tablist">
<li class="nav-item" role="presentation">
<button class="nav-link active" id="dashboard-tab" data-bs-toggle="tab" data-bs-target="#dashboard"
type="button" role="tab" aria-controls="dashboard" aria-selected="true">
Dashboard
</button>
</li>
<li class="nav-item" role="presentation">
<button class="nav-link" id="schedule-tab" data-bs-toggle="tab" data-bs-target="#schedule" type="button"
role="tab" aria-controls="schedule" aria-selected="false">
Schedule Configuration
</button>
</li>
</ul>
<!-- Include standardized flash messages -->
{% include "partials/flash_messages.html.jinja" %}
<div class="tab-content" id="scraperTabsContent">
<!-- Dashboard Tab -->
<div class="tab-pane fade show active" id="dashboard" role="tabpanel" aria-labelledby="dashboard-tab">
<div class="row mb-4">
<div class="col-md-6">
<div class="card">
<div class="card-header">
<h5>Scraper Status</h5>
</div>
<div class="card-body">
<div class="d-flex align-items-center mb-3">
<div id="statusIndicator" class="status-indicator status-inactive"></div>
<span id="statusText">Inactive</span>
</div>
<div class="btn-group" role="group">
<button id="startButton" class="btn btn-success">Start</button>
<button id="pauseButton" class="btn btn-warning" disabled>Pause</button>
<button id="stopButton" class="btn btn-danger" disabled>Stop</button>
</div>
</div>
</div>
<div class="row mb-4">
<div class="col-md-6">
<div class="card">
<div class="card-header">
<h5>Scraper Status</h5>
</div>
<div class="col-md-6">
<div class="card">
<div class="card-header">
<h5>Volume Configuration</h5>
</div>
<div class="card-body">
<form id="volumeForm">
<div class="form-group">
<label for="volumeInput">Papers per day:</label>
<input type="number" class="form-control" id="volumeInput"
value="{{ volume_config.volume if volume_config else 100 }}">
</div>
<button type="submit" class="btn btn-primary mt-2">Update Volume</button>
</form>
</div>
<div class="card-body">
<div class="d-flex align-items-center mb-3">
<div id="statusIndicator" class="status-indicator status-inactive"></div>
<span id="statusText">Inactive</span>
</div>
</div>
</div>
<div class="row mb-4">
<div class="col-12">
<div class="card">
<div class="card-header d-flex justify-content-between align-items-center">
<h5>Scraping Activity</h5>
<div>
<div class="form-check form-switch">
<input class="form-check-input" type="checkbox" id="notificationsToggle" checked>
<label class="form-check-label" for="notificationsToggle">Show Notifications</label>
</div>
</div>
</div>
<div class="card-body">
<div class="btn-group mb-3">
<button class="btn btn-outline-secondary time-range-btn" data-hours="6">Last 6
hours</button>
<button class="btn btn-outline-secondary time-range-btn active" data-hours="24">Last 24
hours</button>
<button class="btn btn-outline-secondary time-range-btn" data-hours="72">Last 3
days</button>
</div>
<div class="stats-chart" id="activityChart"></div>
</div>
</div>
</div>
</div>
<div class="row mb-4">
<div class="col-12">
<div class="card">
<div class="card-header">
<h5>Recent Activity</h5>
</div>
<div class="card-body">
<div class="table-responsive">
<table class="table table-striped">
<thead>
<tr>
<th>Time</th>
<th>Action</th>
<th>Status</th>
<th>Description</th>
</tr>
</thead>
<tbody id="activityLog">
<tr>
<td colspan="4" class="text-center">Loading activities...</td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="btn-group" role="group">
<button id="startButton" class="btn btn-success">Start</button>
<button id="pauseButton" class="btn btn-warning" disabled>Pause</button>
<button id="stopButton" class="btn btn-danger" disabled>Stop</button>
<button id="resetButton" class="btn btn-secondary" disabled>Reset</button>
</div>
</div>
</div>
</div>
<!-- Schedule Configuration Tab -->
<div class="tab-pane fade" id="schedule" role="tabpanel" aria-labelledby="schedule-tab"
x-data="scheduleManager({{ schedule_config | tojson }}, {{ volume_config.volume if volume_config else 100 }})">
<div class="mb-3">
<h3>How it Works</h3>
<p class="text-muted mb-0">
Configure the daily volume of papers to be downloaded and the hourly download weights.
The weights determine how many papers will be downloaded during each hour of the day.
The total volume (<strong x-text="volume"></strong> papers/day) is split across all hours based on
their relative weights.
<strong>Lower weights result in higher scraping rates</strong> for that hour.
</p>
<h5 class="mt-3">Instructions:</h5>
<p class="text-muted">
Click to select one or more hours below. Then assign a weight to them using the input and apply it.
Color indicates relative intensity. Changes are saved immediately when you click "Update Schedule".
</p>
</div>
<div class="card mb-4">
<div class="col-md-6">
<div class="card">
<div class="card-header">
<h4 class="m-0">Volume Configuration</h4>
<h5>Volume Configuration</h5>
</div>
<div class="card-body">
<p class="text-muted">
The total volume of data to be downloaded each day is
<strong x-text="volume"></strong> papers.
</p>
<div class="d-flex align-items-center mb-3">
<div class="input-group">
<span class="input-group-text">Papers per day:</span>
<input type="number" class="form-control" x-model="volume" min="1" max="1000" />
<button type="button" class="btn btn-primary" @click="updateVolume()">
Update Volume
</button>
<form id="volumeForm">
<div class="form-group">
<label for="volumeInput">Papers per day:</label>
<input type="number" class="form-control" id="volumeInput"
value="{{ volume_config.volume if volume_config else 100 }}" min="1"
max="{{ max_volume }}">
<div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
</div>
<button type="submit" class="btn btn-primary mt-2">Update Volume</button>
</form>
</div>
</div>
</div>
</div>
<div class="row mb-4">
<div class="col-12">
<div class="card">
<div class="card-header d-flex justify-content-between align-items-center">
<h5>Scraping Activity</h5>
<div>
<div class="form-check form-switch">
<input class="form-check-input" type="checkbox" id="notificationsToggle" checked>
<label class="form-check-label" for="notificationsToggle">Show Notifications</label>
</div>
</div>
</div>
<div class="card-body">
<div class="btn-group mb-3">
<button class="btn btn-outline-secondary time-range-btn" data-hours="6">Last 6
hours</button>
<button class="btn btn-outline-secondary time-range-btn active" data-hours="24">Last 24
hours</button>
<button class="btn btn-outline-secondary time-range-btn" data-hours="72">Last 3
days</button>
</div>
<div class="stats-chart" id="activityChart"></div>
</div>
</div>
</div>
</div>
<div class="row mb-4">
<div class="col-12">
<div class="card">
<div class="card-header">
<h4 class="m-0">Hourly Weights</h4>
<h5>Recent Activity</h5>
</div>
<div class="card-body">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour"
:style="getBackgroundStyle(hour)" :class="{'selected': isSelected(hour)}"
@mousedown="startDrag($event, hour)" @mouseover="dragSelect(hour)">
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers">
<span x-text="getPapersPerHour(hour)"></span> p.
</div>
</div>
</template>
<div class="table-responsive">
<table class="table table-striped">
<thead>
<tr>
<th>Time</th>
<th>Action</th>
<th>Status</th>
<th>Description</th>
</tr>
</thead>
<tbody id="activityLog">
<tr>
<td colspan="4" class="text-center">Loading activities...</td>
</tr>
</tbody>
</table>
</div>
<div class="input-group mb-4 w-50">
<span class="input-group-text">Set Weight:</span>
<input type="number" step="0.1" min="0.1" max="5" x-model="newWeight" class="form-control" />
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
Apply to Selected
</button>
</div>
<button type="button" class="btn btn-success" @click="updateSchedule()">
💾 Update Schedule
</button>
</div>
</div>
</div>
</div>
</div>
<!-- Notification template -->
<div id="notificationContainer"></div>
{% endblock content %}
{% block scripts %}
{{ super() }}
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
<script>
// Alpine.js scheduler component
function scheduleManager(initial, volume) {
return {
schedule: initial || {},
volume: volume,
selectedHours: [],
newWeight: 1.0,
isDragging: false,
dragOperation: null,
formatHour(h) {
return String(h).padStart(2, "0") + ":00";
},
getBackgroundStyle(hour) {
const weight = parseFloat(this.schedule[hour]);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`;
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
startDrag(event, hour) {
event.preventDefault();
this.isDragging = true;
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
this.toggleSelect(hour);
},
dragSelect(hour) {
if (!this.isDragging) return;
const selected = this.isSelected(hour);
if (this.dragOperation === "add" && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === "remove" && selected) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
}
},
endDrag() {
this.isDragging = false;
},
toggleSelect(hour) {
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) {
return this.selectedHours.includes(hour);
},
applyWeight() {
this.selectedHours.forEach((hour) => {
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
});
},
getTotalWeight() {
return Object.values(this.schedule).reduce(
(sum, w) => sum + parseFloat(w),
0
);
},
getPapersPerHour(hour) {
const total = this.getTotalWeight();
if (total === 0) return 0;
return (
(parseFloat(this.schedule[hour]) / total) *
this.volume
).toFixed(1);
},
updateVolume() {
fetch('/scraper/update_config', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ volume: parseFloat(this.volume) })
})
.then(response => response.json())
.then(data => {
if (data.success) {
showNotification('Volume updated successfully', 'success');
// Update the volume in the dashboard tab too
document.getElementById('volumeInput').value = this.volume;
} else {
showNotification(data.message, 'danger');
}
});
},
updateSchedule() {
fetch('/scraper/update_config', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ schedule: this.schedule })
})
.then(response => response.json())
.then(data => {
if (data.success) {
showNotification('Schedule updated successfully', 'success');
this.selectedHours = []; // Clear selections after update
} else {
showNotification(data.message, 'danger');
}
});
}
};
}
// Global variables for the scraper dashboard
let notificationsEnabled = true;
let activityChart = null;
@ -430,6 +161,7 @@
const startButton = document.getElementById('startButton');
const pauseButton = document.getElementById('pauseButton');
const stopButton = document.getElementById('stopButton');
const resetButton = document.getElementById('resetButton');
const notificationsToggle = document.getElementById('notificationsToggle');
const activityLog = document.getElementById('activityLog');
@ -443,6 +175,7 @@
startButton.addEventListener('click', startScraper);
pauseButton.addEventListener('click', togglePauseScraper);
stopButton.addEventListener('click', stopScraper);
resetButton.addEventListener('click', resetScraper);
notificationsToggle.addEventListener('click', toggleNotifications);
document.getElementById('volumeForm').addEventListener('submit', function (e) {
@ -483,28 +216,40 @@
startButton.disabled = true;
pauseButton.disabled = false;
stopButton.disabled = false;
resetButton.disabled = false; // Enable reset when active
} else {
statusIndicator.className = 'status-indicator status-inactive';
statusText.textContent = 'Inactive';
startButton.disabled = false;
pauseButton.disabled = true;
stopButton.disabled = true;
resetButton.disabled = false; // Enable reset when inactive too
}
});
}
// Action functions
function startScraper() {
console.log("Start button clicked - sending request to /scraper/start");
fetch('/scraper/start', { method: 'POST' })
.then(response => response.json())
.then(response => {
console.log("Response received:", response);
return response.json();
})
.then(data => {
console.log("Data received:", data);
if (data.success) {
showNotification('Scraper started successfully', 'success');
showFlashMessage('Scraper started successfully', 'success');
updateStatus();
setTimeout(() => { loadRecentActivity(); }, 1000);
} else {
showNotification(data.message, 'danger');
showFlashMessage(data.message, 'error');
}
})
.catch(error => {
console.error("Error starting scraper:", error);
showFlashMessage('Error starting scraper: ' + error.message, 'error');
});
}
@ -513,11 +258,11 @@
.then(response => response.json())
.then(data => {
if (data.success) {
showNotification(data.message, 'info');
showFlashMessage(data.message, 'info');
updateStatus();
setTimeout(() => { loadRecentActivity(); }, 1000);
} else {
showNotification(data.message, 'danger');
showFlashMessage(data.message, 'error');
}
});
}
@ -527,15 +272,55 @@
.then(response => response.json())
.then(data => {
if (data.success) {
showNotification('Scraper stopped successfully', 'warning');
showFlashMessage('Scraper stopped successfully', 'warning');
updateStatus();
setTimeout(() => { loadRecentActivity(); }, 1000);
} else {
showNotification(data.message, 'danger');
showFlashMessage(data.message, 'error');
}
});
}
function resetScraper() {
if (confirm("Are you sure you want to reset the scraper? This will stop all current tasks, optionally clear non-pending papers, and restart the scraper.")) {
// Disable button to prevent multiple clicks
resetButton.disabled = true;
// Show a loading message
showFlashMessage('Resetting scraper, please wait...', 'info');
fetch('/scraper/reset', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
clear_papers: true // You could make this configurable with a checkbox
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
showFlashMessage('Scraper has been completely reset and restarted', 'success');
// Update everything
updateStatus();
loadActivityStats(currentTimeRange);
setTimeout(() => { loadRecentActivity(); }, 1000);
} else {
showFlashMessage(data.message || 'Error resetting scraper', 'error');
}
// Re-enable button
resetButton.disabled = false;
})
.catch(error => {
console.error("Error resetting scraper:", error);
showFlashMessage('Error resetting scraper: ' + error.message, 'error');
// Re-enable button
resetButton.disabled = false;
});
}
}
function updateVolume() {
const volume = document.getElementById('volumeInput').value;
@ -549,9 +334,9 @@
.then(response => response.json())
.then(data => {
if (data.success) {
showNotification('Volume updated successfully', 'success');
showFlashMessage('Volume updated successfully', 'success');
} else {
showNotification(data.message, 'danger');
showFlashMessage(data.message, 'error');
}
});
}
@ -682,36 +467,6 @@
});
}
// Notification functions
function showNotification(message, type) {
if (!notificationsEnabled && type !== 'danger') {
return;
}
const container = document.getElementById('notificationContainer');
const notification = document.createElement('div');
notification.className = `alert alert-${type} notification shadow-sm`;
notification.innerHTML = `
${message}
<button type="button" class="btn-close float-end" aria-label="Close"></button>
`;
container.appendChild(notification);
// Add close handler
notification.querySelector('.btn-close').addEventListener('click', () => {
notification.remove();
});
// Auto-close after 5 seconds
setTimeout(() => {
notification.classList.add('fade');
setTimeout(() => {
notification.remove();
}, 500);
}, 5000);
}
// WebSocket for real-time notifications
function setupWebSocket() {
// If WebSocket is available, implement it here
@ -733,9 +488,9 @@
data.forEach(log => {
const extraData = log.extra_data ? JSON.parse(log.extra_data) : {};
if (log.status === 'success') {
showNotification(`New paper scraped: ${extraData.title || 'Unknown title'}`, 'success');
showFlashMessage(`New paper scraped: ${extraData.title || 'Unknown title'}`, 'success');
} else if (log.status === 'error') {
showNotification(`Failed to scrape paper: ${log.description}`, 'danger');
showFlashMessage(`Failed to scrape paper: ${log.description}`, 'error');
}
});

View File

@ -58,6 +58,16 @@
<option value="|">Pipe (|)</option>
</select>
</div>
<div class="form-group mt-3">
<label for="duplicate_strategy">Duplicate Handling Strategy</label>
<select name="duplicate_strategy" id="duplicate_strategy" class="form-control">
{% for strategy_id, strategy in duplicate_strategies.items() %}
<option value="{{ strategy_id }}" {% if strategy.is_default %}selected{% endif %}>
{{ strategy.name }} - {{ strategy.description }}
</option>
{% endfor %}
</select>
</div>
<button type="submit" class="btn btn-primary mt-3">Upload</button>
</form>