redesigns config, including some placeholders

This commit is contained in:
Michael Beck 2025-04-16 15:19:28 +02:00
parent bb2ecd842d
commit adf8207461
10 changed files with 945 additions and 630 deletions

View File

@ -8,6 +8,7 @@ from .schedule import bp as schedule_bp
from .logger import bp as logger_bp
from .api import bp as api_bp
from .scraper import bp as scraper_bp
from .config import bp as config_bp
def register_blueprints(app: Flask):
@ -19,3 +20,4 @@ def register_blueprints(app: Flask):
app.register_blueprint(logger_bp, url_prefix='/logs')
app.register_blueprint(api_bp, url_prefix='/api')
app.register_blueprint(scraper_bp, url_prefix='/scraper')
app.register_blueprint(config_bp)

View File

@ -0,0 +1,255 @@
"""Configuration management blueprint."""
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
from ..db import db
from ..models import VolumeConfig, ScheduleConfig, ActivityLog
bp = Blueprint("config", __name__, url_prefix="/config")
# Helper functions for configuration updates
def _update_volume(new_volume):
"""
Helper function to update volume configuration.
Args:
new_volume (float): The new volume value
Returns:
tuple: (success, message, volume_config)
"""
try:
new_volume = float(new_volume)
if new_volume <= 0 or new_volume > 1000:
return False, "Volume must be between 1 and 1000", None
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
old_value = volume_config.volume
volume_config.volume = new_volume
ActivityLog.log_config_change(
config_key="scraper_volume",
old_value=old_value,
new_value=new_volume,
description="Updated scraper volume"
)
db.session.commit()
return True, "Volume updated successfully!", volume_config
except (ValueError, TypeError) as e:
db.session.rollback()
return False, f"Error updating volume: {str(e)}", None
def _update_schedule(schedule_data):
"""
Helper function to update schedule configuration.
Args:
schedule_data (dict): Dictionary with hour:weight pairs
Returns:
tuple: (success, message)
"""
try:
# Validate all entries first
for hour_str, weight in schedule_data.items():
try:
hour = int(hour_str)
weight = float(weight)
if hour < 0 or hour > 23:
return False, f"Hour value must be between 0 and 23, got {hour}"
if weight < 0.1 or weight > 5:
return False, f"Weight for hour {hour} must be between 0.1 and 5, got {weight}"
except ValueError:
return False, f"Invalid data format for hour {hour_str}"
# Update schedule after validation
for hour_str, weight in schedule_data.items():
hour = int(hour_str)
weight = float(weight)
config = ScheduleConfig.query.get(hour)
if not config:
config = ScheduleConfig(hour=hour, weight=weight)
db.session.add(config)
else:
old_value = config.weight
config.weight = weight
ActivityLog.log_config_change(
config_key=f"schedule_hour_{hour}",
old_value=old_value,
new_value=weight,
description=f"Updated schedule weight for hour {hour}"
)
db.session.commit()
return True, "Schedule updated successfully!"
except Exception as e:
db.session.rollback()
return False, f"Error updating schedule: {str(e)}"
@bp.route("/")
@bp.route("/general")
def general():
"""Show general configuration page."""
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=100) # Default value
db.session.add(volume_config)
db.session.commit()
return render_template(
"config/index.html.jinja",
active_tab="general",
volume_config=volume_config,
app_title="Configuration"
)
@bp.route("/schedule")
def schedule():
"""Show schedule configuration page."""
# Ensure we have schedule config for all hours
existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
schedule_config = {}
for hour in range(24):
if hour in existing_hours:
schedule_config[hour] = existing_hours[hour].weight
else:
# Create default schedule entry (weight 1.0)
new_config = ScheduleConfig(hour=hour, weight=1.0)
db.session.add(new_config)
schedule_config[hour] = 1.0
if len(existing_hours) < 24:
db.session.commit()
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=100) # Default value
db.session.add(volume_config)
db.session.commit()
return render_template(
"config/index.html.jinja",
active_tab="schedule",
schedule=schedule_config,
volume=volume_config.volume,
app_title="Configuration"
)
@bp.route("/update/volume", methods=["POST"])
def update_volume():
"""Update volume configuration."""
new_volume = request.form.get("total_volume", 0)
success, message, _ = _update_volume(new_volume)
if success:
flash(message, "success")
else:
flash(message, "error")
return redirect(url_for("config.general"))
@bp.route("/update/schedule", methods=["POST"])
def update_schedule():
"""Update schedule configuration."""
schedule_data = {}
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
flash(f"Missing data for hour {hour}", "error")
return redirect(url_for("config.schedule"))
schedule_data[str(hour)] = request.form.get(key, 0)
success, message = _update_schedule(schedule_data)
if success:
flash(message, "success")
else:
flash(message, "error")
return redirect(url_for("config.schedule"))
@bp.route("/api/schedule/stats")
def schedule_stats():
"""Get statistics about the current schedule configuration."""
volume_config = VolumeConfig.query.first()
if not volume_config:
return jsonify({"error": "No volume configuration found"})
total_volume = volume_config.volume
schedule_configs = ScheduleConfig.query.all()
if not schedule_configs:
return jsonify({"error": "No schedule configuration found"})
# Calculate total weight
total_weight = sum(config.weight for config in schedule_configs)
# Calculate papers per hour
papers_per_hour = {}
hourly_weights = {}
for config in schedule_configs:
weight_ratio = config.weight / total_weight if total_weight > 0 else 0
papers = weight_ratio * total_volume
papers_per_hour[config.hour] = papers
hourly_weights[config.hour] = config.weight
return jsonify({
"total_volume": total_volume,
"total_weight": total_weight,
"papers_per_hour": papers_per_hour,
"hourly_weights": hourly_weights
})
@bp.route("/api/update_config", methods=["POST"])
def api_update_config():
"""API endpoint to update configuration."""
data = request.json
response = {"success": True, "updates": []}
try:
# Update volume if provided
if "volume" in data:
success, message, volume_config = _update_volume(data["volume"])
response["updates"].append({
"type": "volume",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update schedule if provided
if "schedule" in data:
success, message = _update_schedule(data["schedule"])
response["updates"].append({
"type": "schedule",
"success": success,
"message": message
})
if not success:
response["success"] = False
return jsonify(response)
except Exception as e:
db.session.rollback()
return jsonify({
"success": False,
"message": f"Unexpected error: {str(e)}"
})

View File

@ -1,79 +1,212 @@
"""Schedule configuration routes."""
from flask import Blueprint, flash, render_template, request
"""Schedule configuration and scheduling logic."""
from datetime import datetime
import random
import json
from flask import Blueprint, flash, render_template, request, jsonify
from ..db import db
from ..models import ScheduleConfig, VolumeConfig
from ..models import ScheduleConfig, VolumeConfig, ActivityLog, ActivityCategory
from ..celery import celery
from .scraper import SCRAPER_ACTIVE, SCRAPER_PAUSED, dummy_scrape_paper
from .config import _update_volume, _update_schedule
bp = Blueprint("schedule", __name__)
bp = Blueprint("schedule", __name__, url_prefix="/schedule")
@bp.route("/", methods=["GET", "POST"])
def schedule():
"""Render and handle the schedule configuration page."""
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if "total_volume" in request.form:
# Volume update
try:
new_volume = float(request.form.get("total_volume", 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
# Volume update using the centralized helper
new_volume = request.form.get("total_volume", 0)
success, message, _ = _update_volume(new_volume)
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
if success:
flash(message, "success")
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
flash(message, "error")
else:
# Schedule update logic
# Validate form data
# Schedule update using the centralized helper
schedule_data = {}
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
flash(f"Missing data for hour {hour}", "error")
break
schedule_data[str(hour)] = request.form.get(key, 0)
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5"
)
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
if len(schedule_data) == 24:
success, message = _update_schedule(schedule_data)
if success:
flash(message, "success")
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
flash(message, "error")
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e:
except Exception as e:
db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error")
flash(f"Error: {str(e)}", "error")
# Ensure we have schedule config for all hours
existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
schedule_config = {}
for hour in range(24):
if hour in existing_hours:
schedule_config[hour] = existing_hours[hour].weight
else:
# Create default schedule entry (weight 1.0)
new_config = ScheduleConfig(hour=hour, weight=1.0)
db.session.add(new_config)
schedule_config[hour] = 1.0
if len(existing_hours) < 24:
db.session.commit()
schedule = {
sc.hour: sc.weight
for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
}
volume = VolumeConfig.query.first()
return render_template(
"schedule.html.jinja",
schedule=schedule,
schedule=schedule_config,
volume=volume.volume if volume else 0,
stats=get_schedule_stats(),
app_title="PaperScraper",
)
@bp.route("/update_config", methods=["POST"])
def update_config():
"""Update schedule configuration via API."""
data = request.json
response = {"success": True, "updates": []}
try:
# Update volume if provided
if "volume" in data:
success, message, _ = _update_volume(data["volume"])
response["updates"].append({
"type": "volume",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update schedule if provided
if "schedule" in data:
success, message = _update_schedule(data["schedule"])
response["updates"].append({
"type": "schedule",
"success": success,
"message": message
})
if not success:
response["success"] = False
return jsonify(response)
except Exception as e:
db.session.rollback()
return jsonify({
"success": False,
"message": f"Unexpected error: {str(e)}"
})
# Calculate schedule information for visualization/decision making
def get_schedule_stats():
"""Get statistics about the current schedule configuration."""
volume_config = VolumeConfig.query.first()
if not volume_config:
return {"error": "No volume configuration found"}
total_volume = volume_config.volume
schedule_configs = ScheduleConfig.query.all()
if not schedule_configs:
return {"error": "No schedule configuration found"}
# Calculate total weight
total_weight = sum(config.weight for config in schedule_configs)
# Calculate papers per hour
papers_per_hour = {}
hourly_weights = {}
for config in schedule_configs:
weight_ratio = config.weight / total_weight if total_weight > 0 else 0
papers = weight_ratio * total_volume
papers_per_hour[config.hour] = papers
hourly_weights[config.hour] = config.weight
return {
"total_volume": total_volume,
"total_weight": total_weight,
"papers_per_hour": papers_per_hour,
"hourly_weights": hourly_weights
}
# API route to get schedule information
@bp.route("/schedule_info")
def schedule_info():
"""Get information about the current schedule configuration."""
stats = get_schedule_stats()
return jsonify(stats)
# Define the Celery tasks for the scheduler
@celery.task(bind=True)
def start_scheduler(self):
"""Start the scheduler when the scraper is started."""
if SCRAPER_ACTIVE and not SCRAPER_PAUSED:
# Schedule the first run immediately
scheduler_task.delay()
return {"status": "success", "message": "Scheduler started"}
return {"status": "error", "message": "Scraper not active or paused"}
@celery.task(bind=True)
def scheduler_task(self):
"""Main scheduler task for the scraper."""
if not SCRAPER_ACTIVE:
return {"status": "Scraper not active"}
if SCRAPER_PAUSED:
return {"status": "Scraper paused"}
# Calculate how many papers to scrape based on current hour and configuration
current_hour = datetime.now().hour
hour_config = ScheduleConfig.query.get(current_hour)
volume_config = VolumeConfig.query.first()
if not hour_config or not volume_config:
return {"status": "Missing configuration"}
# Calculate papers to scrape this hour
stats = get_schedule_stats()
papers_to_scrape = int(stats["papers_per_hour"].get(current_hour, 0))
# Log the scheduling decision
ActivityLog.log_scraper_activity(
action="schedule_papers",
status="success",
description=f"Scheduled {papers_to_scrape} papers for scraping at hour {current_hour}",
extra_data=json.dumps({
"hour": current_hour,
"weight": hour_config.weight,
"total_volume": volume_config.volume
})
)
# Execute the actual scraping tasks
for _ in range(papers_to_scrape):
# Queue up scraping tasks - in real implementation, this would
# call the actual scraper task
dummy_scrape_paper.delay()
return {
"status": "success",
"papers_scheduled": papers_to_scrape,
"hour": current_hour
}

View File

@ -2,7 +2,7 @@ import random
import json
from datetime import datetime
from flask import Blueprint, jsonify, render_template, request, current_app, flash
from ..models import ScheduleConfig, VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory
from ..db import db
from ..celery import celery
@ -23,26 +23,9 @@ def index():
db.session.add(volume_config)
db.session.commit()
# Ensure we have schedule config for all hours
existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
schedule_config = {}
for hour in range(24):
if hour in existing_hours:
schedule_config[hour] = existing_hours[hour].weight
else:
# Create default schedule entry (weight 1.0)
new_config = ScheduleConfig(hour=hour, weight=1.0)
db.session.add(new_config)
schedule_config[hour] = 1.0
if len(existing_hours) < 24:
db.session.commit()
return render_template(
"scraper.html.jinja",
volume_config=volume_config,
schedule_config=schedule_config,
scraper_active=SCRAPER_ACTIVE,
scraper_paused=SCRAPER_PAUSED
)
@ -63,13 +46,10 @@ def start_scraper():
description="Scraper started manually"
)
# Start the scheduler task
task = dummy_scraper_scheduler.delay()
# Trigger the schedule.py to start actual scheduling
return jsonify({
"success": True,
"message": "Scraper started",
"task_id": task.id
"message": "Scraper started"
})
else:
return jsonify({
@ -205,7 +185,7 @@ def update_config():
try:
new_volume = float(data["volume"])
# Validate volume value (from schedule.py)
# Validate volume value
if new_volume <= 0 or new_volume > 1000:
return jsonify({
"success": False,
@ -233,219 +213,12 @@ def update_config():
"message": "Invalid volume value"
})
if "schedule" in data:
try:
schedule = data["schedule"]
# Validate entire schedule
for hour_str, weight in schedule.items():
try:
hour = int(hour_str)
weight = float(weight)
if hour < 0 or hour > 23:
return jsonify({
"success": False,
"message": f"Hour value must be between 0 and 23, got {hour}"
})
if weight < 0.1 or weight > 5:
return jsonify({
"success": False,
"message": f"Weight for hour {hour} must be between 0.1 and 5, got {weight}"
})
except ValueError:
return jsonify({
"success": False,
"message": f"Invalid data format for hour {hour_str}"
})
# Update schedule after validation
for hour_str, weight in schedule.items():
hour = int(hour_str)
weight = float(weight)
schedule_config = ScheduleConfig.query.get(hour)
if not schedule_config:
schedule_config = ScheduleConfig(hour=hour, weight=weight)
db.session.add(schedule_config)
else:
old_value = schedule_config.weight
schedule_config.weight = weight
ActivityLog.log_config_change(
config_key=f"schedule_hour_{hour}",
old_value=old_value,
new_value=weight,
description=f"Updated schedule weight for hour {hour}"
)
db.session.commit()
except Exception as e:
db.session.rollback()
return jsonify({
"success": False,
"message": f"Error updating schedule: {str(e)}"
})
return jsonify({"success": True, "message": "Configuration updated"})
except Exception as e:
db.session.rollback()
return jsonify({"success": False, "message": f"Unexpected error: {str(e)}"})
@bp.route("/schedule", methods=["GET", "POST"])
def schedule():
"""Legacy route to maintain compatibility with the schedule blueprint."""
# For GET requests, redirect to the scraper index with the schedule tab active
if request.method == "GET":
return index()
# For POST requests, handle form data and process like the original schedule blueprint
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if "total_volume" in request.form:
# Volume update
try:
new_volume = float(request.form.get("total_volume", 0))
if new_volume <= 0 or new_volume > 1000:
raise ValueError("Volume must be between 1 and 1000")
volume_config = VolumeConfig.query.first()
if not volume_config:
volume_config = VolumeConfig(volume=new_volume)
db.session.add(volume_config)
else:
volume_config.volume = new_volume
db.session.commit()
flash("Volume updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating volume: {str(e)}", "error")
else:
# Schedule update logic
# Validate form data
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
raise ValueError(f"Missing data for hour {hour}")
try:
weight = float(request.form.get(key, 0))
if weight < 0 or weight > 5:
raise ValueError(
f"Weight for hour {hour} must be between 0 and 5"
)
except ValueError:
raise ValueError(f"Invalid weight value for hour {hour}")
# Update database if validation passes
for hour in range(24):
key = f"hour_{hour}"
weight = float(request.form.get(key, 0))
config = ScheduleConfig.query.get(hour)
if config:
config.weight = weight
else:
db.session.add(ScheduleConfig(hour=hour, weight=weight))
db.session.commit()
flash("Schedule updated successfully!", "success")
except ValueError as e:
db.session.rollback()
flash(f"Error updating schedule: {str(e)}", "error")
# Redirect back to the scraper page
return index()
# Calculate schedule information for visualization/decision making
def get_schedule_stats():
"""Get statistics about the current schedule configuration."""
volume_config = VolumeConfig.query.first()
if not volume_config:
return {"error": "No volume configuration found"}
total_volume = volume_config.volume
schedule_configs = ScheduleConfig.query.all()
if not schedule_configs:
return {"error": "No schedule configuration found"}
# Calculate total weight
total_weight = sum(config.weight for config in schedule_configs)
# Calculate papers per hour
papers_per_hour = {}
for config in schedule_configs:
weight_ratio = config.weight / total_weight if total_weight > 0 else 0
papers = weight_ratio * total_volume
papers_per_hour[config.hour] = papers
return {
"total_volume": total_volume,
"total_weight": total_weight,
"papers_per_hour": papers_per_hour
}
# Enhanced API route to get schedule information
@bp.route("/schedule_info")
def schedule_info():
"""Get information about the current schedule configuration."""
stats = get_schedule_stats()
return jsonify(stats)
# Define the Celery tasks
@celery.task(bind=True)
def dummy_scraper_scheduler(self):
"""Main scheduler task for the dummy scraper."""
global SCRAPER_ACTIVE, SCRAPER_PAUSED
if not SCRAPER_ACTIVE:
return {"status": "Scraper not active"}
if SCRAPER_PAUSED:
return {"status": "Scraper paused"}
# Calculate how many papers to scrape based on current hour and configuration
current_hour = datetime.now().hour
hour_config = ScheduleConfig.query.get(current_hour)
volume_config = VolumeConfig.query.first()
if not hour_config or not volume_config:
return {"status": "Missing configuration"}
# Calculate papers to scrape this hour
hourly_rate = volume_config.volume / 24 # Base rate per hour
adjusted_rate = hourly_rate * (1 / hour_config.weight) # Adjust by weight
papers_to_scrape = int(adjusted_rate)
# Log the scheduling decision
ActivityLog.log_scraper_activity(
action="schedule_papers",
status="success",
description=f"Scheduled {papers_to_scrape} papers for scraping at hour {current_hour}",
hourly_rate=hourly_rate,
weight=hour_config.weight,
adjusted_rate=adjusted_rate,
)
# Launch individual scraping tasks
for _ in range(papers_to_scrape):
if not SCRAPER_ACTIVE or SCRAPER_PAUSED:
break
# Schedule a new paper to be scraped
dummy_scrape_paper.delay()
# Schedule the next run in 5 minutes if still active
if SCRAPER_ACTIVE:
dummy_scraper_scheduler.apply_async(countdown=300) # 5 minutes
return {"status": "success", "papers_scheduled": papers_to_scrape}
@celery.task(bind=True)
def dummy_scrape_paper(self):
"""Simulate scraping a single paper."""

View File

@ -0,0 +1,49 @@
<!-- General Configuration Tab -->
<div class="tab-pane active">
<div class="config-form">
<div class="card">
<div class="card-header">
<h5>General Configuration</h5>
</div>
<div class="card-body">
<!-- include flash messages template -->
{% include "partials/flash_messages.html.jinja" %}
<form action="{{ url_for('config.update_volume') }}" method="post">
<div class="form-section">
<h6>Scraper Volume</h6>
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
<div class="mb-3">
<label for="totalVolume" class="form-label">Papers per day:</label>
<input type="number" class="form-control" id="totalVolume" name="total_volume" min="1"
max="1000" value="{{ volume_config.volume }}" required>
<div class="form-text">Enter a value between 1 and 1000</div>
</div>
</div>
<div class="form-section">
<h6>System Settings</h6>
<p class="text-muted">Configure general system behavior.</p>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableNotifications" checked>
<label class="form-check-label" for="enableNotifications">
Enable email notifications
</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableLogging" checked>
<label class="form-check-label" for="enableLogging">
Enable detailed activity logging
</label>
</div>
</div>
<button type="submit" class="btn btn-primary">Save General Settings</button>
</form>
</div>
</div>
</div>
</div>

View File

@ -0,0 +1,50 @@
{% extends "base.html.jinja" %}
{% block title %}Configuration{% endblock title %}
{% block styles %}
{{ super() }}
<style>
.nav-tabs .nav-link {
color: #495057;
}
.nav-tabs .nav-link.active {
font-weight: bold;
}
.config-form {
max-width: 800px;
margin: 0 auto;
}
.form-section {
margin-bottom: 2rem;
}
</style>
{% endblock styles %}
{% block content %}
<div class="container mt-4">
<h1>Configuration</h1>
<ul class="nav nav-tabs mb-4">
<li class="nav-item">
<a class="nav-link {% if active_tab == 'general' %}active{% endif %}"
href="{{ url_for('config.general') }}">General</a>
</li>
<li class="nav-item">
<a class="nav-link {% if active_tab == 'schedule' %}active{% endif %}"
href="{{ url_for('config.schedule') }}">Schedule</a>
</li>
</ul>
<div class="tab-content">
{% if active_tab == 'general' %}
{% include "config/general.html.jinja" %}
{% elif active_tab == 'schedule' %}
{% include "config/schedule.html.jinja" %}
{% endif %}
</div>
</div>
{% endblock content %}

View File

@ -0,0 +1,325 @@
<style>
.timeline {
display: flex;
flex-wrap: wrap;
gap: 3px;
user-select: none;
/* Prevent text selection during drag */
}
.hour-block {
width: 49px;
height: 70px;
/* Increased height to fit additional text */
border-radius: 5px;
text-align: center;
line-height: 1.2;
font-size: 0.9rem;
padding-top: 6px;
cursor: pointer;
user-select: none;
transition: background-color 0.2s ease-in-out;
margin: 1px;
}
.hour-block.selected {
outline: 2px solid #4584b8;
}
.papers {
font-size: 0.7rem;
margin-top: 2px;
}
.flash-message {
position: fixed;
top: 30%;
left: 50%;
transform: translate(-50%, -50%);
z-index: 1000;
width: 300px;
text-align: center;
font-weight: bold;
padding: 12px;
margin-bottom: 20px;
border-radius: 6px;
opacity: 1;
transition: opacity 5s ease-in-out;
}
.flash-message.success {
background-color: #d4edda;
border-color: #c3e6cb;
color: #155724;
}
.flash-message.error {
background-color: #f8d7da;
border-color: #f5c6cb;
color: #721c24;
}
.flash-message.fade {
opacity: 0;
}
</style>
<script>
const initialSchedule = {{ schedule | tojson }};
const totalVolume = {{ volume }};
</script>
<div x-data="scheduleManager(initialSchedule, totalVolume)" class="tab-pane active">
<div class="card">
<div class="card-header">
<h5>Scheduling Configuration</h5>
</div>
<div class="card-body">
<!-- include flash messages template -->
{% include "partials/flash_messages.html.jinja" %}
<!-- Content -->
<div class="mb-3">
<h3>How it Works</h3>
<p class="text-muted mb-0">
This page allows you to configure the daily volume of papers to be
downloaded and the hourly download weights for the papers. The weights
determine how many papers will be downloaded during each hour of the day.
The total volume (<strong x-text="volume"></strong> papers/day) is split
across all hours based on their relative weights. Each weight controls the
proportion of papers downloaded during that hour. Click to select one or
more hours below. Then assign a weight to them using the input and apply
it. Color indicates relative intensity. The total daily volume will be
split proportionally across these weights.
<strong>Don't forget to submit the changes!</strong>
</p>
<h3>Example</h3>
<p class="text-muted mb-0">
If the total volume is <strong>240 papers</strong> and hours are
<strong>weighted as 1.0, 2.0, and 3.0</strong>, they will receive
<strong>40, 80, and 120 papers</strong> respectively.
</p>
</div>
<h2 class="mt-4">Volume</h2>
<div class="align-items-start flex-wrap gap-2">
<p class="text-muted">
The total volume of data to be downloaded each day is
<strong x-text="volume"></strong> papers.
</p>
<div class="d-flex align-items-center mb-3" x-data="{ volumeValue: volume }">
<div class="input-group w-50">
<label class="input-group-text">Papers per day:</label>
<input type="number" class="form-control" x-model="volumeValue" min="1" max="1000" required />
<button type="button" class="btn btn-primary" @click="updateVolume()">
Update Volume
</button>
</div>
</div>
</div>
<h2 class="mt-4">Current Schedule</h2>
<form x-data id="scheduleForm">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour" :style="getBackgroundStyle(hour)"
:class="{'selected': isSelected(hour)}" @mousedown="startDrag($event, hour)"
@mouseover="dragSelect(hour)">
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers">
<span x-text="getPapersPerHour(hour)"></span> p.
</div>
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]" />
</div>
</template>
</div>
<div class="input-group mb-4 w-50">
<label class="input-group-text">Set Weight:</label>
<input type="number" step="0.1" min="0" max="5" x-model="newWeight" class="form-control" />
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
Apply to Selected
</button>
</div>
<div class="d-flex justify-content-between">
<a href="{{ url_for('config.general') }}" class="btn btn-outline-secondary">⬅ Back</a>
<button type="button" class="btn btn-success" @click="saveSchedule()">💾 Save Schedule</button>
</div>
</form>
</div>
</div>
</div>
<script>
function scheduleManager(initial, volume) {
return {
schedule: { ...initial },
volume: volume,
selectedHours: [],
newWeight: 1.0,
volumeValue: volume,
isDragging: false,
dragOperation: null,
formatHour(h) {
return String(h).padStart(2, "0") + ":00";
},
showFlashMessage(message, type = 'success') {
const flashMsg = document.createElement('div');
flashMsg.className = `flash-message ${type}`;
flashMsg.textContent = message;
document.body.appendChild(flashMsg);
setTimeout(() => flashMsg.classList.add('fade'), 2000);
setTimeout(() => flashMsg.remove(), 7000);
},
updateVolume() {
fetch('{{ url_for('config.api_update_config') }}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
volume: this.volumeValue
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
this.volume = parseFloat(this.volumeValue);
this.showFlashMessage('Volume updated successfully!');
} else {
this.showFlashMessage(data.updates?.[0]?.message || 'Error updating volume', 'error');
}
})
.catch(error => {
console.error('Error:', error);
this.showFlashMessage('Network error occurred', 'error');
});
},
getBackgroundStyle(hour) {
const weight = parseFloat(this.schedule[hour]);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
getBackgroundStyleFromValue(value) {
const weight = parseFloat(value);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
startDrag(event, hour) {
event.preventDefault();
this.isDragging = true;
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
this.toggleSelect(hour);
},
dragSelect(hour) {
if (!this.isDragging) return;
const selected = this.isSelected(hour);
if (this.dragOperation === "add" && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === "remove" && selected) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
}
},
endDrag() {
this.isDragging = false;
},
toggleSelect(hour) {
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) {
return this.selectedHours.includes(hour);
},
applyWeight() {
this.selectedHours.forEach((hour) => {
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
});
this.selectedHours = [];
},
getTotalWeight() {
return Object.values(this.schedule).reduce(
(sum, w) => sum + parseFloat(w),
0
);
},
getPapersPerHour(hour) {
const total = this.getTotalWeight();
if (total === 0) return 0;
return (
(parseFloat(this.schedule[hour]) / total) *
this.volume
).toFixed(1);
},
saveSchedule() {
fetch('{{ url_for('config.api_update_config') }}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
schedule: this.schedule
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
this.showFlashMessage('Schedule updated successfully!');
} else {
this.showFlashMessage(data.updates?.[0]?.message || 'Error updating schedule', 'error');
}
})
.catch(error => {
console.error('Error:', error);
this.showFlashMessage('Network error occurred', 'error');
});
}
};
}
</script>

View File

@ -17,7 +17,7 @@
<a class="nav-link" href="{{ url_for('papers.list_papers') }}">Papers</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{ url_for('schedule.schedule') }}">Schedule</a>
<a class="nav-link" href="{{ url_for('config.general') }}">Configuration</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-bs-toggle="dropdown"

View File

@ -36,51 +36,6 @@
max-width: 350px;
z-index: 1050;
}
/* Enhanced scheduler styles */
.timeline {
display: flex;
flex-wrap: wrap;
gap: 3px;
user-select: none;
}
.hour-block {
width: 49px;
height: 70px;
border-radius: 5px;
text-align: center;
line-height: 1.2;
font-size: 0.9rem;
padding-top: 6px;
cursor: pointer;
user-select: none;
transition: background-color 0.2s ease-in-out;
margin: 1px;
}
.hour-block.selected {
outline: 2px solid #4584b8;
}
.papers {
font-size: 0.7rem;
margin-top: 2px;
}
/* Tab styles */
.nav-tabs .nav-link {
color: #495057;
}
.nav-tabs .nav-link.active {
font-weight: bold;
color: #007bff;
}
.tab-pane {
padding-top: 1rem;
}
</style>
{% endblock styles %}
@ -88,25 +43,6 @@
<div class="container mt-4">
<h1>Paper Scraper Control Panel</h1>
<!-- Navigation tabs -->
<ul class="nav nav-tabs mb-4" id="scraperTabs" role="tablist">
<li class="nav-item" role="presentation">
<button class="nav-link active" id="dashboard-tab" data-bs-toggle="tab" data-bs-target="#dashboard"
type="button" role="tab" aria-controls="dashboard" aria-selected="true">
Dashboard
</button>
</li>
<li class="nav-item" role="presentation">
<button class="nav-link" id="schedule-tab" data-bs-toggle="tab" data-bs-target="#schedule" type="button"
role="tab" aria-controls="schedule" aria-selected="false">
Schedule Configuration
</button>
</li>
</ul>
<div class="tab-content" id="scraperTabsContent">
<!-- Dashboard Tab -->
<div class="tab-pane fade show active" id="dashboard" role="tabpanel" aria-labelledby="dashboard-tab">
<div class="row mb-4">
<div class="col-md-6">
<div class="card">
@ -204,221 +140,13 @@
</div>
</div>
<!-- Schedule Configuration Tab -->
<div class="tab-pane fade" id="schedule" role="tabpanel" aria-labelledby="schedule-tab"
x-data="scheduleManager({{ schedule_config | tojson }}, {{ volume_config.volume if volume_config else 100 }})">
<div class="mb-3">
<h3>How it Works</h3>
<p class="text-muted mb-0">
Configure the daily volume of papers to be downloaded and the hourly download weights.
The weights determine how many papers will be downloaded during each hour of the day.
The total volume (<strong x-text="volume"></strong> papers/day) is split across all hours based on
their relative weights.
<strong>Lower weights result in higher scraping rates</strong> for that hour.
</p>
<h5 class="mt-3">Instructions:</h5>
<p class="text-muted">
Click to select one or more hours below. Then assign a weight to them using the input and apply it.
Color indicates relative intensity. Changes are saved immediately when you click "Update Schedule".
</p>
</div>
<div class="card mb-4">
<div class="card-header">
<h4 class="m-0">Volume Configuration</h4>
</div>
<div class="card-body">
<p class="text-muted">
The total volume of data to be downloaded each day is
<strong x-text="volume"></strong> papers.
</p>
<div class="d-flex align-items-center mb-3">
<div class="input-group">
<span class="input-group-text">Papers per day:</span>
<input type="number" class="form-control" x-model="volume" min="1" max="1000" />
<button type="button" class="btn btn-primary" @click="updateVolume()">
Update Volume
</button>
</div>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<h4 class="m-0">Hourly Weights</h4>
</div>
<div class="card-body">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour"
:style="getBackgroundStyle(hour)" :class="{'selected': isSelected(hour)}"
@mousedown="startDrag($event, hour)" @mouseover="dragSelect(hour)">
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers">
<span x-text="getPapersPerHour(hour)"></span> p.
</div>
</div>
</template>
</div>
<div class="input-group mb-4 w-50">
<span class="input-group-text">Set Weight:</span>
<input type="number" step="0.1" min="0.1" max="5" x-model="newWeight" class="form-control" />
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
Apply to Selected
</button>
</div>
<button type="button" class="btn btn-success" @click="updateSchedule()">
💾 Update Schedule
</button>
</div>
</div>
</div>
</div>
</div>
<!-- Notification template -->
<div id="notificationContainer"></div>
{% endblock content %}
{% block scripts %}
{{ super() }}
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
<script>
// Alpine.js scheduler component
function scheduleManager(initial, volume) {
return {
schedule: initial || {},
volume: volume,
selectedHours: [],
newWeight: 1.0,
isDragging: false,
dragOperation: null,
formatHour(h) {
return String(h).padStart(2, "0") + ":00";
},
getBackgroundStyle(hour) {
const weight = parseFloat(this.schedule[hour]);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`;
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
startDrag(event, hour) {
event.preventDefault();
this.isDragging = true;
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
this.toggleSelect(hour);
},
dragSelect(hour) {
if (!this.isDragging) return;
const selected = this.isSelected(hour);
if (this.dragOperation === "add" && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === "remove" && selected) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
}
},
endDrag() {
this.isDragging = false;
},
toggleSelect(hour) {
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) {
return this.selectedHours.includes(hour);
},
applyWeight() {
this.selectedHours.forEach((hour) => {
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
});
},
getTotalWeight() {
return Object.values(this.schedule).reduce(
(sum, w) => sum + parseFloat(w),
0
);
},
getPapersPerHour(hour) {
const total = this.getTotalWeight();
if (total === 0) return 0;
return (
(parseFloat(this.schedule[hour]) / total) *
this.volume
).toFixed(1);
},
updateVolume() {
fetch('/scraper/update_config', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ volume: parseFloat(this.volume) })
})
.then(response => response.json())
.then(data => {
if (data.success) {
showNotification('Volume updated successfully', 'success');
// Update the volume in the dashboard tab too
document.getElementById('volumeInput').value = this.volume;
} else {
showNotification(data.message, 'danger');
}
});
},
updateSchedule() {
fetch('/scraper/update_config', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ schedule: this.schedule })
})
.then(response => response.json())
.then(data => {
if (data.success) {
showNotification('Schedule updated successfully', 'success');
this.selectedHours = []; // Clear selections after update
} else {
showNotification(data.message, 'danger');
}
});
}
};
}
// Global variables for the scraper dashboard
let notificationsEnabled = true;
let activityChart = null;