removes old scheduler

This commit is contained in:
Michael Beck 2025-04-16 15:57:44 +02:00
parent f36fc53b26
commit 396eaefbe9
2 changed files with 0 additions and 482 deletions

View File

@ -1,212 +0,0 @@
"""Schedule configuration and scheduling logic."""
from datetime import datetime
import random
import json
from flask import Blueprint, flash, render_template, request, jsonify
from ..db import db
from ..models import ScheduleConfig, VolumeConfig, ActivityLog, ActivityCategory
from ..celery import celery
from .scraper import SCRAPER_ACTIVE, SCRAPER_PAUSED, dummy_scrape_paper
from .config import _update_volume, _update_schedule
bp = Blueprint("schedule", __name__, url_prefix="/schedule")
@bp.route("/", methods=["GET", "POST"])
def schedule():
"""Render and handle the schedule configuration page."""
if request.method == "POST":
try:
# Check if we're updating volume or schedule
if "total_volume" in request.form:
# Volume update using the centralized helper
new_volume = request.form.get("total_volume", 0)
success, message, _ = _update_volume(new_volume)
if success:
flash(message, "success")
else:
flash(message, "error")
else:
# Schedule update using the centralized helper
schedule_data = {}
for hour in range(24):
key = f"hour_{hour}"
if key not in request.form:
flash(f"Missing data for hour {hour}", "error")
break
schedule_data[str(hour)] = request.form.get(key, 0)
if len(schedule_data) == 24:
success, message = _update_schedule(schedule_data)
if success:
flash(message, "success")
else:
flash(message, "error")
except Exception as e:
db.session.rollback()
flash(f"Error: {str(e)}", "error")
# Ensure we have schedule config for all hours
existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
schedule_config = {}
for hour in range(24):
if hour in existing_hours:
schedule_config[hour] = existing_hours[hour].weight
else:
# Create default schedule entry (weight 1.0)
new_config = ScheduleConfig(hour=hour, weight=1.0)
db.session.add(new_config)
schedule_config[hour] = 1.0
if len(existing_hours) < 24:
db.session.commit()
volume = VolumeConfig.query.first()
return render_template(
"schedule.html.jinja",
schedule=schedule_config,
volume=volume.volume if volume else 0,
stats=get_schedule_stats(),
app_title="PaperScraper",
)
@bp.route("/update_config", methods=["POST"])
def update_config():
"""Update schedule configuration via API."""
data = request.json
response = {"success": True, "updates": []}
try:
# Update volume if provided
if "volume" in data:
success, message, _ = _update_volume(data["volume"])
response["updates"].append({
"type": "volume",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update schedule if provided
if "schedule" in data:
success, message = _update_schedule(data["schedule"])
response["updates"].append({
"type": "schedule",
"success": success,
"message": message
})
if not success:
response["success"] = False
return jsonify(response)
except Exception as e:
db.session.rollback()
return jsonify({
"success": False,
"message": f"Unexpected error: {str(e)}"
})
# Calculate schedule information for visualization/decision making
def get_schedule_stats():
"""Get statistics about the current schedule configuration."""
volume_config = VolumeConfig.query.first()
if not volume_config:
return {"error": "No volume configuration found"}
total_volume = volume_config.volume
schedule_configs = ScheduleConfig.query.all()
if not schedule_configs:
return {"error": "No schedule configuration found"}
# Calculate total weight
total_weight = sum(config.weight for config in schedule_configs)
# Calculate papers per hour
papers_per_hour = {}
hourly_weights = {}
for config in schedule_configs:
weight_ratio = config.weight / total_weight if total_weight > 0 else 0
papers = weight_ratio * total_volume
papers_per_hour[config.hour] = papers
hourly_weights[config.hour] = config.weight
return {
"total_volume": total_volume,
"total_weight": total_weight,
"papers_per_hour": papers_per_hour,
"hourly_weights": hourly_weights
}
# API route to get schedule information
@bp.route("/schedule_info")
def schedule_info():
"""Get information about the current schedule configuration."""
stats = get_schedule_stats()
return jsonify(stats)
# Define the Celery tasks for the scheduler
@celery.task(bind=True)
def start_scheduler(self):
"""Start the scheduler when the scraper is started."""
if SCRAPER_ACTIVE and not SCRAPER_PAUSED:
# Schedule the first run immediately
scheduler_task.delay()
return {"status": "success", "message": "Scheduler started"}
return {"status": "error", "message": "Scraper not active or paused"}
@celery.task(bind=True)
def scheduler_task(self):
"""Main scheduler task for the scraper."""
if not SCRAPER_ACTIVE:
return {"status": "Scraper not active"}
if SCRAPER_PAUSED:
return {"status": "Scraper paused"}
# Calculate how many papers to scrape based on current hour and configuration
current_hour = datetime.now().hour
hour_config = ScheduleConfig.query.get(current_hour)
volume_config = VolumeConfig.query.first()
if not hour_config or not volume_config:
return {"status": "Missing configuration"}
# Calculate papers to scrape this hour
stats = get_schedule_stats()
papers_to_scrape = int(stats["papers_per_hour"].get(current_hour, 0))
# Log the scheduling decision
ActivityLog.log_scraper_activity(
action="schedule_papers",
status="success",
description=f"Scheduled {papers_to_scrape} papers for scraping at hour {current_hour}",
extra_data=json.dumps({
"hour": current_hour,
"weight": hour_config.weight,
"total_volume": volume_config.volume
})
)
# Execute the actual scraping tasks
for _ in range(papers_to_scrape):
# Queue up scraping tasks - in real implementation, this would
# call the actual scraper task
dummy_scrape_paper.delay()
return {
"status": "success",
"papers_scheduled": papers_to_scrape,
"hour": current_hour
}

View File

@ -1,270 +0,0 @@
{% extends "base.html.jinja" %} {% block content %}
<style>
.timeline {
display: flex;
flex-wrap: wrap;
gap: 3px;
user-select: none;
/* Prevent text selection during drag */
}
.hour-block {
width: 49px;
height: 70px;
/* Increased height to fit additional text */
border-radius: 5px;
text-align: center;
line-height: 1.2;
font-size: 0.9rem;
padding-top: 6px;
cursor: pointer;
user-select: none;
transition: background-color 0.2s ease-in-out;
margin: 1px;
}
.hour-block.selected {
outline: 2px solid #4584b8;
}
.papers {
font-size: 0.7rem;
margin-top: 2px;
}
.flash-message {
position: fixed;
top: 30%;
left: 50%;
transform: translate(-50%, -50%);
z-index: 1000;
width: 300px;
text-align: center;
font-weight: bold;
padding: 12px;
margin-bottom: 20px;
border-radius: 6px;
opacity: 1;
transition: opacity 5s ease-in-out;
}
.flash-message.success {
background-color: #d4edda;
border-color: #c3e6cb;
color: #155724;
}
.flash-message.error {
background-color: #f8d7da;
border-color: #f5c6cb;
color: #721c24;
}
.flash-message.fade {
opacity: 0;
}
</style>
<script>
const initialSchedule = {{ schedule | tojson }};
const totalVolume = {{ volume }};
</script>
<div x-data="scheduleManager(initialSchedule, totalVolume)" class="container">
<h1 class="mb-4">🕒 Configure Hourly Download Weights</h1>
<!-- Flash Messages -->
{% with messages = get_flashed_messages(with_categories=true) %} {% if
messages %}
<div id="flash-messages">
{% for category, message in messages %}
<div class="flash-message {{ category }}" x-data="{}"
x-init="setTimeout(() => $el.classList.add('fade'), 100); setTimeout(() => $el.remove(), 5000)">
{{ message }}
</div>
{% endfor %}
</div>
{% endif %} {% endwith %}
<!-- Content -->
<div class="mb-3">
<h3>How it Works</h3>
<p class="text-muted mb-0">
This page allows you to configure the daily volume of papers to be
downloaded and the hourly download weights for the papers. The weights
determine how many papers will be downloaded during each hour of the day.
The total volume (<strong x-text="volume"></strong> papers/day) is split
across all hours based on their relative weights. Each weight controls the
proportion of papers downloaded during that hour. Click to select one or
more hours below. Then assign a weight to them using the input and apply
it. Color indicates relative intensity. The total daily volume will be
split proportionally across these weights.
<strong>Don't forget to submit the changes!</strong>
</p>
<h3>Example</h3>
<p class="text-muted mb-0">
If the total volume is <strong>240 papers</strong> and hours are
<strong>weighted as 1.0, 2.0, and 3.0</strong>, they will receive
<strong>40, 80, and 120 papers</strong> respectively.
</p>
</div>
<h2 class="mt-4">Volume</h2>
<div class="align-items-start flex-wrap gap-2">
<p class="text-muted">
The total volume of data to be downloaded each day is
<strong x-text="volume"></strong> papers.
</p>
<div class="d-flex align-items-center mb-3">
<form method="post" action="{{ url_for('schedule.schedule') }}" class="input-group w-50">
<label class="input-group-text">Papers per day:</label>
<input type="number" class="form-control" name="total_volume" value="{{ volume }}" min="1" max="1000"
required />
<button type="submit" class="btn btn-primary">Update Volume</button>
</form>
</div>
</div>
<h2 class="mt-4">Current Schedule</h2>
<form method="post" action="{{ url_for('schedule.schedule') }}">
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
<template x-for="hour in Object.keys(schedule)" :key="hour">
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour" :style="getBackgroundStyle(hour)"
:class="{'selected': isSelected(hour)}" @mousedown="startDrag($event, hour)" @mouseover="dragSelect(hour)">
<div><strong x-text="formatHour(hour)"></strong></div>
<div class="weight"><span x-text="schedule[hour]"></span></div>
<div class="papers">
<span x-text="getPapersPerHour(hour)"></span> p.
</div>
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]" />
</div>
</template>
</div>
<div class="input-group mb-4 w-50">
<label class="input-group-text">Set Weight:</label>
<input type="number" step="0.1" min="0" max="5" x-model="newWeight" class="form-control" />
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
Apply to Selected
</button>
</div>
<div class="d-flex justify-content-between">
<a href="{{ url_for('main.index') }}" class="btn btn-outline-secondary">⬅ Back</a>
<button type="submit" class="btn btn-success">💾 Save Schedule</button>
</div>
</form>
</div>
<script>
function scheduleManager(initial, volume) {
return {
schedule: { ...initial },
volume: volume,
selectedHours: [],
newWeight: 1.0,
isDragging: false,
dragOperation: null,
formatHour(h) {
return String(h).padStart(2, "0") + ":00";
},
getBackgroundStyle(hour) {
const weight = parseFloat(this.schedule[hour]);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
getBackgroundStyleFromValue(value) {
const weight = parseFloat(value);
const maxWeight = 2.5; // You can adjust this
// Normalize weight (0.0 to 1.0)
const t = Math.min(weight / maxWeight, 1.0);
// Interpolate HSL lightness: 95% (light) to 30% (dark)
const lightness = 95 - t * 65; // 95 → 30
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
return {
backgroundColor,
color: textColor,
};
},
startDrag(event, hour) {
event.preventDefault();
this.isDragging = true;
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
this.toggleSelect(hour);
},
dragSelect(hour) {
if (!this.isDragging) return;
const selected = this.isSelected(hour);
if (this.dragOperation === "add" && !selected) {
this.selectedHours.push(hour);
} else if (this.dragOperation === "remove" && selected) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
}
},
endDrag() {
this.isDragging = false;
},
toggleSelect(hour) {
if (this.isSelected(hour)) {
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
} else {
this.selectedHours.push(hour);
}
},
isSelected(hour) {
return this.selectedHours.includes(hour);
},
applyWeight() {
this.selectedHours.forEach((hour) => {
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
});
this.selectedHours = [];
},
getTotalWeight() {
return Object.values(this.schedule).reduce(
(sum, w) => sum + parseFloat(w),
0
);
},
getPapersPerHour(hour) {
const total = this.getTotalWeight();
if (total === 0) return 0;
return (
(parseFloat(this.schedule[hour]) / total) *
this.volume
).toFixed(1);
},
};
}
</script>
{% endblock content %}