diff --git a/scipaperloader/blueprints/schedule.py b/scipaperloader/blueprints/schedule.py deleted file mode 100644 index 86aebc6..0000000 --- a/scipaperloader/blueprints/schedule.py +++ /dev/null @@ -1,212 +0,0 @@ -"""Schedule configuration and scheduling logic.""" -from datetime import datetime -import random -import json -from flask import Blueprint, flash, render_template, request, jsonify - -from ..db import db -from ..models import ScheduleConfig, VolumeConfig, ActivityLog, ActivityCategory -from ..celery import celery -from .scraper import SCRAPER_ACTIVE, SCRAPER_PAUSED, dummy_scrape_paper -from .config import _update_volume, _update_schedule - -bp = Blueprint("schedule", __name__, url_prefix="/schedule") - - -@bp.route("/", methods=["GET", "POST"]) -def schedule(): - """Render and handle the schedule configuration page.""" - if request.method == "POST": - try: - # Check if we're updating volume or schedule - if "total_volume" in request.form: - # Volume update using the centralized helper - new_volume = request.form.get("total_volume", 0) - success, message, _ = _update_volume(new_volume) - - if success: - flash(message, "success") - else: - flash(message, "error") - else: - # Schedule update using the centralized helper - schedule_data = {} - for hour in range(24): - key = f"hour_{hour}" - if key not in request.form: - flash(f"Missing data for hour {hour}", "error") - break - schedule_data[str(hour)] = request.form.get(key, 0) - - if len(schedule_data) == 24: - success, message = _update_schedule(schedule_data) - if success: - flash(message, "success") - else: - flash(message, "error") - - except Exception as e: - db.session.rollback() - flash(f"Error: {str(e)}", "error") - - # Ensure we have schedule config for all hours - existing_hours = {record.hour: record for record in ScheduleConfig.query.all()} - schedule_config = {} - - for hour in range(24): - if hour in existing_hours: - schedule_config[hour] = existing_hours[hour].weight - else: - # Create default schedule entry (weight 1.0) - new_config = ScheduleConfig(hour=hour, weight=1.0) - db.session.add(new_config) - schedule_config[hour] = 1.0 - - if len(existing_hours) < 24: - db.session.commit() - - volume = VolumeConfig.query.first() - return render_template( - "schedule.html.jinja", - schedule=schedule_config, - volume=volume.volume if volume else 0, - stats=get_schedule_stats(), - app_title="PaperScraper", - ) - - -@bp.route("/update_config", methods=["POST"]) -def update_config(): - """Update schedule configuration via API.""" - data = request.json - response = {"success": True, "updates": []} - - try: - # Update volume if provided - if "volume" in data: - success, message, _ = _update_volume(data["volume"]) - response["updates"].append({ - "type": "volume", - "success": success, - "message": message - }) - if not success: - response["success"] = False - - # Update schedule if provided - if "schedule" in data: - success, message = _update_schedule(data["schedule"]) - response["updates"].append({ - "type": "schedule", - "success": success, - "message": message - }) - if not success: - response["success"] = False - - return jsonify(response) - - except Exception as e: - db.session.rollback() - return jsonify({ - "success": False, - "message": f"Unexpected error: {str(e)}" - }) - - -# Calculate schedule information for visualization/decision making -def get_schedule_stats(): - """Get statistics about the current schedule configuration.""" - volume_config = VolumeConfig.query.first() - if not volume_config: - return {"error": "No volume configuration found"} - - total_volume = volume_config.volume - schedule_configs = ScheduleConfig.query.all() - - if not schedule_configs: - return {"error": "No schedule configuration found"} - - # Calculate total weight - total_weight = sum(config.weight for config in schedule_configs) - - # Calculate papers per hour - papers_per_hour = {} - hourly_weights = {} - for config in schedule_configs: - weight_ratio = config.weight / total_weight if total_weight > 0 else 0 - papers = weight_ratio * total_volume - papers_per_hour[config.hour] = papers - hourly_weights[config.hour] = config.weight - - return { - "total_volume": total_volume, - "total_weight": total_weight, - "papers_per_hour": papers_per_hour, - "hourly_weights": hourly_weights - } - - -# API route to get schedule information -@bp.route("/schedule_info") -def schedule_info(): - """Get information about the current schedule configuration.""" - stats = get_schedule_stats() - return jsonify(stats) - - -# Define the Celery tasks for the scheduler -@celery.task(bind=True) -def start_scheduler(self): - """Start the scheduler when the scraper is started.""" - if SCRAPER_ACTIVE and not SCRAPER_PAUSED: - # Schedule the first run immediately - scheduler_task.delay() - return {"status": "success", "message": "Scheduler started"} - return {"status": "error", "message": "Scraper not active or paused"} - - -@celery.task(bind=True) -def scheduler_task(self): - """Main scheduler task for the scraper.""" - if not SCRAPER_ACTIVE: - return {"status": "Scraper not active"} - - if SCRAPER_PAUSED: - return {"status": "Scraper paused"} - - # Calculate how many papers to scrape based on current hour and configuration - current_hour = datetime.now().hour - hour_config = ScheduleConfig.query.get(current_hour) - volume_config = VolumeConfig.query.first() - - if not hour_config or not volume_config: - return {"status": "Missing configuration"} - - # Calculate papers to scrape this hour - stats = get_schedule_stats() - papers_to_scrape = int(stats["papers_per_hour"].get(current_hour, 0)) - - # Log the scheduling decision - ActivityLog.log_scraper_activity( - action="schedule_papers", - status="success", - description=f"Scheduled {papers_to_scrape} papers for scraping at hour {current_hour}", - extra_data=json.dumps({ - "hour": current_hour, - "weight": hour_config.weight, - "total_volume": volume_config.volume - }) - ) - - # Execute the actual scraping tasks - for _ in range(papers_to_scrape): - # Queue up scraping tasks - in real implementation, this would - # call the actual scraper task - dummy_scrape_paper.delay() - - return { - "status": "success", - "papers_scheduled": papers_to_scrape, - "hour": current_hour - } \ No newline at end of file diff --git a/scipaperloader/templates/schedule.html.jinja b/scipaperloader/templates/schedule.html.jinja deleted file mode 100644 index a14b26b..0000000 --- a/scipaperloader/templates/schedule.html.jinja +++ /dev/null @@ -1,270 +0,0 @@ -{% extends "base.html.jinja" %} {% block content %} - - - - -
-

🕒 Configure Hourly Download Weights

- - - {% with messages = get_flashed_messages(with_categories=true) %} {% if - messages %} -
- {% for category, message in messages %} -
- {{ message }} -
- {% endfor %} -
- {% endif %} {% endwith %} - - -
-

How it Works

-

- This page allows you to configure the daily volume of papers to be - downloaded and the hourly download weights for the papers. The weights - determine how many papers will be downloaded during each hour of the day. - The total volume ( papers/day) is split - across all hours based on their relative weights. Each weight controls the - proportion of papers downloaded during that hour. Click to select one or - more hours below. Then assign a weight to them using the input and apply - it. Color indicates relative intensity. The total daily volume will be - split proportionally across these weights. - Don't forget to submit the changes! -

-

Example

-

- If the total volume is 240 papers and hours are - weighted as 1.0, 2.0, and 3.0, they will receive - 40, 80, and 120 papers respectively. -

-
- -

Volume

- -
-

- The total volume of data to be downloaded each day is - papers. -

-
-
- - - -
-
-
- -

Current Schedule

-
-
- -
- -
- - - -
- -
- ⬅ Back - -
-
-
- - -{% endblock content %} \ No newline at end of file