Compare commits
19 Commits
4968cdbae9
...
11f086aa64
Author | SHA1 | Date | |
---|---|---|---|
![]() |
11f086aa64 | ||
![]() |
5af3d00e39 | ||
![]() |
803554a410 | ||
![]() |
4f8040e9db | ||
![]() |
d6c72265f9 | ||
![]() |
a0fa541de3 | ||
![]() |
0adaed0bfa | ||
![]() |
4085b47460 | ||
![]() |
14f336fadf | ||
![]() |
3d67bbbdf7 | ||
![]() |
592375c67b | ||
![]() |
396eaefbe9 | ||
![]() |
f36fc53b26 | ||
![]() |
1f0fb5e990 | ||
![]() |
5d8a2bd7c4 | ||
![]() |
adf8207461 | ||
![]() |
bb2ecd842d | ||
![]() |
7dd7935fed | ||
![]() |
a1865ef326 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -11,7 +11,7 @@ dist/
|
||||
.mypy_cache/
|
||||
|
||||
*.db
|
||||
|
||||
*.R
|
||||
*.csv
|
||||
|
||||
migrations/
|
71
Makefile
71
Makefile
@ -4,9 +4,8 @@
|
||||
# Define Python and pip executables inside virtual environment
|
||||
PYTHON := venv/bin/python
|
||||
PIP := venv/bin/pip
|
||||
|
||||
# Celery worker command
|
||||
CELERY := venv/bin/celery
|
||||
FLASK := venv/bin/flask
|
||||
|
||||
# Default target that runs the application
|
||||
all: run
|
||||
@ -100,15 +99,15 @@ venv:
|
||||
|
||||
# Run the application in debug mode
|
||||
run: venv
|
||||
venv/bin/flask --app scipaperloader --debug run
|
||||
$(FLASK) --app scipaperloader --debug run
|
||||
|
||||
# Format code using Black and isort
|
||||
format:
|
||||
format: venv
|
||||
venv/bin/black .
|
||||
venv/bin/isort .
|
||||
|
||||
# Check if code meets formatting standards
|
||||
format-check:
|
||||
format-check: venv
|
||||
venv/bin/black --check .
|
||||
venv/bin/isort --check .
|
||||
|
||||
@ -116,15 +115,15 @@ format-check:
|
||||
reformat: format lint
|
||||
|
||||
# Check code for style issues using flake8
|
||||
lint:
|
||||
lint: venv
|
||||
venv/bin/flake8 .
|
||||
|
||||
# Run static type checking with mypy
|
||||
mypy:
|
||||
mypy: venv
|
||||
venv/bin/mypy scipaperloader
|
||||
|
||||
# Run the test suite
|
||||
test:
|
||||
test: venv
|
||||
venv/bin/pytest
|
||||
|
||||
# Build distribution package after running checks
|
||||
@ -134,20 +133,62 @@ dist: format-check lint mypy test
|
||||
# Set up complete development environment
|
||||
dev: clean venv
|
||||
|
||||
# Start Celery worker for processing tasks
|
||||
celery: venv
|
||||
# Start Celery worker - PURGE FIRST
|
||||
celery: venv redis
|
||||
@echo "Purging Celery task queue before starting worker..."
|
||||
# Purge the queue forcefully. Ignore errors if queue is empty/unreachable initially.
|
||||
@-$(CELERY) -A celery_worker:celery purge -f
|
||||
@echo "Starting Celery worker..."
|
||||
$(CELERY) -A celery_worker:celery worker --loglevel=info
|
||||
|
||||
|
||||
# Monitor Celery tasks with flower web interface
|
||||
celery-flower: venv
|
||||
$(PIP) install flower
|
||||
$(CELERY) -A celery_worker:celery flower --port=5555
|
||||
|
||||
# Run Celery beat scheduler for periodic tasks
|
||||
celery-beat: venv redis
|
||||
@echo "Starting Celery beat scheduler..."
|
||||
# Ensure celerybeat-schedule file is removed for clean start if needed
|
||||
@-rm -f celerybeat-schedule.db
|
||||
# Use the default file-based scheduler (removed the --scheduler flag)
|
||||
$(CELERY) -A celery_worker:celery beat --loglevel=info
|
||||
|
||||
# Check if Redis is running, start if needed
|
||||
redis:
|
||||
@redis-cli ping > /dev/null 2>&1 || (echo "Starting Redis server..." && redis-server --daemonize yes)
|
||||
@if ! redis-cli ping > /dev/null 2>&1; then \
|
||||
echo "Starting Redis server..."; \
|
||||
redis-server --daemonize yes; \
|
||||
sleep 1; \
|
||||
else \
|
||||
echo "Redis is already running."; \
|
||||
fi
|
||||
|
||||
# Run complete application stack (Flask app + Celery worker + Redis)
|
||||
# Run complete application stack (Flask app + Celery worker + Redis + Beat scheduler)
|
||||
run-all: redis
|
||||
@echo "Starting Flask and Celery..."
|
||||
@$(MAKE) -j2 run celery
|
||||
@echo "Starting Flask, Celery worker and Beat scheduler..."
|
||||
# Run them in parallel. Ctrl+C will send SIGINT to make, which propagates.
|
||||
# Use trap to attempt cleanup, but primary cleanup is purge on next start.
|
||||
@trap '$(MAKE) stop-all;' INT TERM; \
|
||||
$(MAKE) -j3 run celery celery-beat & wait
|
||||
|
||||
# Stop running Celery worker and beat gracefully
|
||||
stop-celery:
|
||||
@echo "Attempting graceful shutdown of Celery worker and beat..."
|
||||
@-pkill -TERM -f "celery -A celery_worker:celery worker" || echo "Worker not found or already stopped."
|
||||
@-pkill -TERM -f "celery -A celery_worker:celery beat" || echo "Beat not found or already stopped."
|
||||
@sleep 1 # Give processes a moment to terminate
|
||||
@echo "Purging remaining tasks from Celery queue..."
|
||||
@-$(CELERY) -A celery_worker:celery purge -f || echo "Purge failed or queue empty."
|
||||
|
||||
# Stop Flask development server
|
||||
stop-flask:
|
||||
@echo "Attempting shutdown of Flask development server..."
|
||||
@-pkill -TERM -f "flask --app scipaperloader --debug run" || echo "Flask server not found or already stopped."
|
||||
|
||||
# Stop all components potentially started by run-all
|
||||
stop-all: stop-celery stop-flask
|
||||
@echo "All components stopped."
|
||||
|
||||
# Default target
|
||||
all: run
|
||||
|
@ -1,4 +1,6 @@
|
||||
from scipaperloader.celery import celery, configure_celery
|
||||
# Import all task modules to ensure they are registered with Celery
|
||||
import scipaperloader.blueprints.scraper # Import the scraper module with our tasks
|
||||
|
||||
# Configure celery with Flask app
|
||||
configure_celery()
|
||||
|
@ -26,6 +26,7 @@ dev = [
|
||||
"black>=24.2.0,<25",
|
||||
"isort>=5.13.1,<6",
|
||||
"mypy>=1.8.0,<2",
|
||||
"djlint>=1.36.4,<2",
|
||||
]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
|
@ -4,10 +4,10 @@ from flask import Flask
|
||||
from .main import bp as main_bp
|
||||
from .papers import bp as papers_bp
|
||||
from .upload import bp as upload_bp
|
||||
from .schedule import bp as schedule_bp
|
||||
from .logger import bp as logger_bp
|
||||
from .api import bp as api_bp
|
||||
from .scraper import bp as scraper_bp
|
||||
from .config import bp as config_bp
|
||||
|
||||
|
||||
def register_blueprints(app: Flask):
|
||||
@ -15,7 +15,7 @@ def register_blueprints(app: Flask):
|
||||
app.register_blueprint(main_bp)
|
||||
app.register_blueprint(papers_bp, url_prefix='/papers')
|
||||
app.register_blueprint(upload_bp, url_prefix='/upload')
|
||||
app.register_blueprint(schedule_bp, url_prefix='/schedule')
|
||||
app.register_blueprint(logger_bp, url_prefix='/logs')
|
||||
app.register_blueprint(api_bp, url_prefix='/api')
|
||||
app.register_blueprint(scraper_bp, url_prefix='/scraper')
|
||||
app.register_blueprint(scraper_bp, url_prefix='/scraper')
|
||||
app.register_blueprint(config_bp)
|
364
scipaperloader/blueprints/config.py
Normal file
364
scipaperloader/blueprints/config.py
Normal file
@ -0,0 +1,364 @@
|
||||
"""Configuration management blueprint."""
|
||||
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
|
||||
from ..db import db
|
||||
# Import the new model
|
||||
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
|
||||
from ..defaults import MAX_VOLUME
|
||||
import os # Import os for path validation
|
||||
|
||||
bp = Blueprint("config", __name__, url_prefix="/config")
|
||||
|
||||
|
||||
# Helper functions for configuration updates
|
||||
def _update_volume(new_volume):
|
||||
"""
|
||||
Helper function to update volume configuration.
|
||||
|
||||
Args:
|
||||
new_volume (float): The new volume value
|
||||
|
||||
Returns:
|
||||
tuple: (success, message, volume_config)
|
||||
"""
|
||||
try:
|
||||
new_volume = float(new_volume)
|
||||
if new_volume <= 0 or new_volume > MAX_VOLUME:
|
||||
return False, f"Volume must be between 1 and {MAX_VOLUME}", None
|
||||
|
||||
volume_config = VolumeConfig.query.first()
|
||||
if not volume_config:
|
||||
volume_config = VolumeConfig(volume=new_volume)
|
||||
db.session.add(volume_config)
|
||||
else:
|
||||
old_value = volume_config.volume
|
||||
volume_config.volume = new_volume
|
||||
ActivityLog.log_config_change(
|
||||
config_key="scraper_volume",
|
||||
old_value=old_value,
|
||||
new_value=new_volume,
|
||||
description="Updated scraper volume"
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
return True, "Volume updated successfully!", volume_config
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
db.session.rollback()
|
||||
return False, f"Error updating volume: {str(e)}", None
|
||||
|
||||
|
||||
# Add helper for download path
|
||||
def _update_download_path(new_path):
|
||||
"""
|
||||
Helper function to update download path configuration.
|
||||
|
||||
Args:
|
||||
new_path (str): The new download path
|
||||
|
||||
Returns:
|
||||
tuple: (success, message, download_path_config)
|
||||
"""
|
||||
try:
|
||||
# Basic validation: check if it's a non-empty string
|
||||
if not new_path or not isinstance(new_path, str):
|
||||
return False, "Download path cannot be empty.", None
|
||||
|
||||
# --- Add more validation like checking if path exists or is writable ---
|
||||
# Check if the path exists and is a directory
|
||||
if not os.path.isdir(new_path):
|
||||
# Try to create it if it doesn't exist
|
||||
try:
|
||||
os.makedirs(new_path, exist_ok=True)
|
||||
ActivityLog.log_system_activity(
|
||||
action="create_directory",
|
||||
status="info",
|
||||
description=f"Created download directory: {new_path}"
|
||||
)
|
||||
except OSError as e:
|
||||
ActivityLog.log_system_activity(
|
||||
action="create_directory",
|
||||
status="error",
|
||||
description=f"Failed to create download directory: {new_path}, Error: {str(e)}"
|
||||
)
|
||||
return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
|
||||
|
||||
# Check if the path is writable
|
||||
if not os.access(new_path, os.W_OK):
|
||||
ActivityLog.log_system_activity(
|
||||
action="check_directory_permissions",
|
||||
status="error",
|
||||
description=f"Download path '{new_path}' is not writable."
|
||||
)
|
||||
return False, f"Path '{new_path}' exists but is not writable by the application.", None
|
||||
# --- End of validation ---
|
||||
|
||||
config = DownloadPathConfig.query.first()
|
||||
if not config:
|
||||
config = DownloadPathConfig(path=new_path)
|
||||
db.session.add(config)
|
||||
else:
|
||||
old_value = config.path
|
||||
config.path = new_path
|
||||
ActivityLog.log_config_change(
|
||||
config_key="download_path",
|
||||
old_value=old_value,
|
||||
new_value=new_path,
|
||||
description="Updated download path"
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
return True, "Download path updated successfully!", config
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
return False, f"Error updating download path: {str(e)}", None
|
||||
|
||||
|
||||
def _update_schedule(schedule_data):
|
||||
"""
|
||||
Helper function to update schedule configuration.
|
||||
|
||||
Args:
|
||||
schedule_data (dict): Dictionary with hour:weight pairs
|
||||
|
||||
Returns:
|
||||
tuple: (success, message)
|
||||
"""
|
||||
try:
|
||||
# Validate all entries first
|
||||
for hour_str, weight in schedule_data.items():
|
||||
try:
|
||||
hour = int(hour_str)
|
||||
weight = float(weight)
|
||||
|
||||
if hour < 0 or hour > 23:
|
||||
return False, f"Hour value must be between 0 and 23, got {hour}"
|
||||
|
||||
if weight < 0.1 or weight > 5:
|
||||
return False, f"Weight for hour {hour} must be between 0.1 and 5, got {weight}"
|
||||
except ValueError:
|
||||
return False, f"Invalid data format for hour {hour_str}"
|
||||
|
||||
# Update schedule after validation
|
||||
for hour_str, weight in schedule_data.items():
|
||||
hour = int(hour_str)
|
||||
weight = float(weight)
|
||||
|
||||
config = ScheduleConfig.query.get(hour)
|
||||
if not config:
|
||||
config = ScheduleConfig(hour=hour, weight=weight)
|
||||
db.session.add(config)
|
||||
else:
|
||||
old_value = config.weight
|
||||
config.weight = weight
|
||||
ActivityLog.log_config_change(
|
||||
config_key=f"schedule_hour_{hour}",
|
||||
old_value=old_value,
|
||||
new_value=weight,
|
||||
description=f"Updated schedule weight for hour {hour}"
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
return True, "Schedule updated successfully!"
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
return False, f"Error updating schedule: {str(e)}"
|
||||
|
||||
|
||||
@bp.route("/")
|
||||
@bp.route("/general")
|
||||
def general():
|
||||
"""Show general configuration page."""
|
||||
volume_config = VolumeConfig.query.first()
|
||||
if not volume_config:
|
||||
volume_config = VolumeConfig(volume=100) # Default value
|
||||
db.session.add(volume_config)
|
||||
db.session.commit()
|
||||
|
||||
# Fetch download path config
|
||||
download_path_config = DownloadPathConfig.query.first()
|
||||
if not download_path_config:
|
||||
download_path_config = DownloadPathConfig() # Use default from model
|
||||
db.session.add(download_path_config)
|
||||
db.session.commit()
|
||||
|
||||
return render_template(
|
||||
"config/index.html.jinja",
|
||||
active_tab="general",
|
||||
volume_config=volume_config,
|
||||
download_path_config=download_path_config, # Pass to template
|
||||
max_volume=MAX_VOLUME,
|
||||
app_title="Configuration"
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/schedule")
|
||||
def schedule():
|
||||
"""Show schedule configuration page."""
|
||||
# Ensure we have schedule config for all hours
|
||||
existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
|
||||
schedule_config = {}
|
||||
|
||||
for hour in range(24):
|
||||
if hour in existing_hours:
|
||||
schedule_config[hour] = existing_hours[hour].weight
|
||||
else:
|
||||
# Create default schedule entry (weight 1.0)
|
||||
new_config = ScheduleConfig(hour=hour, weight=1.0)
|
||||
db.session.add(new_config)
|
||||
schedule_config[hour] = 1.0
|
||||
|
||||
if len(existing_hours) < 24:
|
||||
db.session.commit()
|
||||
|
||||
volume_config = VolumeConfig.query.first()
|
||||
if not volume_config:
|
||||
volume_config = VolumeConfig(volume=100) # Default value
|
||||
db.session.add(volume_config)
|
||||
db.session.commit()
|
||||
|
||||
return render_template(
|
||||
"config/index.html.jinja",
|
||||
active_tab="schedule",
|
||||
schedule=schedule_config,
|
||||
volume=volume_config.volume,
|
||||
max_volume=MAX_VOLUME,
|
||||
app_title="Configuration"
|
||||
)
|
||||
|
||||
|
||||
# Remove old update_volume route
|
||||
# @bp.route("/update/volume", methods=["POST"])
|
||||
# def update_volume(): ...
|
||||
|
||||
# Add new route to handle general settings form
|
||||
@bp.route("/update/general", methods=["POST"])
|
||||
def update_general():
|
||||
"""Update general configuration (Volume and Download Path)."""
|
||||
volume_success, volume_message = True, ""
|
||||
path_success, path_message = True, ""
|
||||
|
||||
# Update Volume
|
||||
new_volume = request.form.get("total_volume")
|
||||
if new_volume is not None:
|
||||
volume_success, volume_message, _ = _update_volume(new_volume)
|
||||
if volume_success:
|
||||
flash(volume_message, "success")
|
||||
else:
|
||||
flash(volume_message, "error")
|
||||
|
||||
# Update Download Path
|
||||
new_path = request.form.get("download_path")
|
||||
if new_path is not None:
|
||||
path_success, path_message, _ = _update_download_path(new_path)
|
||||
if path_success:
|
||||
flash(path_message, "success")
|
||||
else:
|
||||
flash(path_message, "error")
|
||||
|
||||
return redirect(url_for("config.general"))
|
||||
|
||||
|
||||
@bp.route("/update/schedule", methods=["POST"])
|
||||
def update_schedule():
|
||||
"""Update schedule configuration."""
|
||||
schedule_data = {}
|
||||
for hour in range(24):
|
||||
key = f"hour_{hour}"
|
||||
if key not in request.form:
|
||||
flash(f"Missing data for hour {hour}", "error")
|
||||
return redirect(url_for("config.schedule"))
|
||||
schedule_data[str(hour)] = request.form.get(key, 0)
|
||||
|
||||
success, message = _update_schedule(schedule_data)
|
||||
|
||||
if success:
|
||||
flash(message, "success")
|
||||
else:
|
||||
flash(message, "error")
|
||||
|
||||
return redirect(url_for("config.schedule"))
|
||||
|
||||
|
||||
@bp.route("/api/schedule/stats")
|
||||
def schedule_stats():
|
||||
"""Get statistics about the current schedule configuration."""
|
||||
volume_config = VolumeConfig.query.first()
|
||||
if not volume_config:
|
||||
return jsonify({"error": "No volume configuration found"})
|
||||
|
||||
total_volume = volume_config.volume
|
||||
schedule_configs = ScheduleConfig.query.all()
|
||||
|
||||
if not schedule_configs:
|
||||
return jsonify({"error": "No schedule configuration found"})
|
||||
|
||||
# Calculate total weight
|
||||
total_weight = sum(config.weight for config in schedule_configs)
|
||||
|
||||
# Calculate papers per hour
|
||||
papers_per_hour = {}
|
||||
hourly_weights = {}
|
||||
for config in schedule_configs:
|
||||
weight_ratio = config.weight / total_weight if total_weight > 0 else 0
|
||||
papers = weight_ratio * total_volume
|
||||
papers_per_hour[config.hour] = papers
|
||||
hourly_weights[config.hour] = config.weight
|
||||
|
||||
return jsonify({
|
||||
"total_volume": total_volume,
|
||||
"total_weight": total_weight,
|
||||
"papers_per_hour": papers_per_hour,
|
||||
"hourly_weights": hourly_weights
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/update_config", methods=["POST"])
|
||||
def api_update_config():
|
||||
"""API endpoint to update configuration."""
|
||||
data = request.json
|
||||
response = {"success": True, "updates": []}
|
||||
|
||||
try:
|
||||
# Update volume if provided
|
||||
if "volume" in data:
|
||||
success, message, _ = _update_volume(data["volume"])
|
||||
response["updates"].append({
|
||||
"type": "volume",
|
||||
"success": success,
|
||||
"message": message
|
||||
})
|
||||
if not success:
|
||||
response["success"] = False
|
||||
|
||||
# Update download path if provided
|
||||
if "download_path" in data:
|
||||
success, message, _ = _update_download_path(data["download_path"])
|
||||
response["updates"].append({
|
||||
"type": "download_path",
|
||||
"success": success,
|
||||
"message": message
|
||||
})
|
||||
if not success:
|
||||
response["success"] = False
|
||||
|
||||
# Update schedule if provided
|
||||
if "schedule" in data:
|
||||
success, message = _update_schedule(data["schedule"])
|
||||
response["updates"].append({
|
||||
"type": "schedule",
|
||||
"success": success,
|
||||
"message": message
|
||||
})
|
||||
if not success:
|
||||
response["success"] = False
|
||||
|
||||
return jsonify(response)
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": f"Unexpected error: {str(e)}"
|
||||
})
|
@ -1,79 +0,0 @@
|
||||
"""Schedule configuration routes."""
|
||||
from flask import Blueprint, flash, render_template, request
|
||||
|
||||
from ..db import db
|
||||
from ..models import ScheduleConfig, VolumeConfig
|
||||
|
||||
bp = Blueprint("schedule", __name__)
|
||||
|
||||
|
||||
@bp.route("/", methods=["GET", "POST"])
|
||||
def schedule():
|
||||
if request.method == "POST":
|
||||
try:
|
||||
# Check if we're updating volume or schedule
|
||||
if "total_volume" in request.form:
|
||||
# Volume update
|
||||
try:
|
||||
new_volume = float(request.form.get("total_volume", 0))
|
||||
if new_volume <= 0 or new_volume > 1000:
|
||||
raise ValueError("Volume must be between 1 and 1000")
|
||||
|
||||
volume_config = VolumeConfig.query.first()
|
||||
if not volume_config:
|
||||
volume_config = VolumeConfig(volume=new_volume)
|
||||
db.session.add(volume_config)
|
||||
else:
|
||||
volume_config.volume = new_volume
|
||||
|
||||
db.session.commit()
|
||||
flash("Volume updated successfully!", "success")
|
||||
|
||||
except ValueError as e:
|
||||
db.session.rollback()
|
||||
flash(f"Error updating volume: {str(e)}", "error")
|
||||
else:
|
||||
# Schedule update logic
|
||||
# Validate form data
|
||||
for hour in range(24):
|
||||
key = f"hour_{hour}"
|
||||
if key not in request.form:
|
||||
raise ValueError(f"Missing data for hour {hour}")
|
||||
|
||||
try:
|
||||
weight = float(request.form.get(key, 0))
|
||||
if weight < 0 or weight > 5:
|
||||
raise ValueError(
|
||||
f"Weight for hour {hour} must be between 0 and 5"
|
||||
)
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid weight value for hour {hour}")
|
||||
|
||||
# Update database if validation passes
|
||||
for hour in range(24):
|
||||
key = f"hour_{hour}"
|
||||
weight = float(request.form.get(key, 0))
|
||||
config = ScheduleConfig.query.get(hour)
|
||||
if config:
|
||||
config.weight = weight
|
||||
else:
|
||||
db.session.add(ScheduleConfig(hour=hour, weight=weight))
|
||||
|
||||
db.session.commit()
|
||||
flash("Schedule updated successfully!", "success")
|
||||
|
||||
except ValueError as e:
|
||||
db.session.rollback()
|
||||
flash(f"Error updating schedule: {str(e)}", "error")
|
||||
|
||||
schedule = {
|
||||
sc.hour: sc.weight
|
||||
for sc in ScheduleConfig.query.order_by(ScheduleConfig.hour).all()
|
||||
}
|
||||
volume = VolumeConfig.query.first()
|
||||
return render_template(
|
||||
"schedule.html.jinja",
|
||||
schedule=schedule,
|
||||
volume=volume.volume if volume else 0,
|
||||
app_title="PaperScraper",
|
||||
)
|
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@ from flask import (
|
||||
from ..db import db
|
||||
from ..models import PaperMetadata, ActivityLog
|
||||
from ..celery import celery # Import the celery instance directly
|
||||
from ..defaults import DUPLICATE_STRATEGIES
|
||||
|
||||
bp = Blueprint("upload", __name__)
|
||||
|
||||
@ -55,10 +56,10 @@ def upload():
|
||||
|
||||
return jsonify({"task_id": task.id})
|
||||
|
||||
return render_template("upload.html.jinja")
|
||||
return render_template("upload.html.jinja", duplicate_strategies=DUPLICATE_STRATEGIES)
|
||||
|
||||
@celery.task(bind=True)
|
||||
def process_csv(self, file_content, delimiter, duplicate_strategy):
|
||||
def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
|
||||
"""Process CSV file and import paper metadata."""
|
||||
|
||||
# With the ContextTask in place, we're already inside an app context
|
||||
|
@ -1,4 +1,5 @@
|
||||
from celery import Celery
|
||||
from celery.schedules import crontab
|
||||
|
||||
# Create Celery instance without Flask app initially
|
||||
celery = Celery(
|
||||
@ -29,6 +30,14 @@ def configure_celery(app=None):
|
||||
worker_max_memory_per_child=1000000, # 1GB memory limit
|
||||
task_acks_late=True, # Acknowledge tasks after completion
|
||||
task_reject_on_worker_lost=True, # Requeue tasks if worker dies
|
||||
# Configure Beat schedule for periodic tasks
|
||||
beat_schedule={
|
||||
'scheduled-scraper-hourly': {
|
||||
'task': 'scipaperloader.blueprints.scraper.dummy_scheduled_scraper',
|
||||
'schedule': crontab(minute=0), # Run at the start of every hour
|
||||
'options': {'expires': 3600}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Create a custom task class that pushes the Flask application context
|
||||
|
@ -1 +1,25 @@
|
||||
DEBUG = False # make sure DEBUG is off unless enabled explicitly otherwise
|
||||
|
||||
# Define duplicate handling strategies with descriptions for the UI
|
||||
DUPLICATE_STRATEGIES = {
|
||||
"skip": {
|
||||
"name": "Skip duplicates",
|
||||
"description": "Skip papers that already exist in the database",
|
||||
"is_default": True
|
||||
},
|
||||
"update": {
|
||||
"name": "Update duplicates",
|
||||
"description": "Update existing papers with new metadata",
|
||||
"is_default": False
|
||||
},
|
||||
# Add new strategies here, they will automatically appear in the UI
|
||||
# Example:
|
||||
# "merge": {
|
||||
# "name": "Merge duplicates",
|
||||
# "description": "Merge new data with existing data, keeping both values",
|
||||
# "is_default": False
|
||||
# }
|
||||
}
|
||||
|
||||
# Configuration limits
|
||||
MAX_VOLUME = 100000 # Maximum volume limit for scraper configuration
|
||||
|
@ -210,6 +210,72 @@ class VolumeConfig(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
volume = db.Column(db.Float) # volume of papers to scrape per day
|
||||
|
||||
class DownloadPathConfig(db.Model):
|
||||
"""Model to store the base path for downloaded files."""
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path
|
||||
|
||||
@classmethod
|
||||
def get_path(cls):
|
||||
"""Get the configured download path, creating default if needed."""
|
||||
config = cls.query.first()
|
||||
if not config:
|
||||
config = cls(path="/path/to/dummy/papers") # Ensure default exists
|
||||
db.session.add(config)
|
||||
db.session.commit()
|
||||
return config.path
|
||||
|
||||
@classmethod
|
||||
def set_path(cls, new_path):
|
||||
"""Set the download path."""
|
||||
config = cls.query.first()
|
||||
if not config:
|
||||
config = cls(path=new_path)
|
||||
db.session.add(config)
|
||||
else:
|
||||
config.path = new_path
|
||||
db.session.commit()
|
||||
return config
|
||||
|
||||
class ScraperState(db.Model):
|
||||
"""Model to store the current state of the scraper."""
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
is_active = db.Column(db.Boolean, default=False)
|
||||
is_paused = db.Column(db.Boolean, default=False)
|
||||
last_updated = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
@classmethod
|
||||
def get_current_state(cls):
|
||||
"""Get the current scraper state, creating it if it doesn't exist."""
|
||||
state = cls.query.first()
|
||||
if not state:
|
||||
state = cls(is_active=False, is_paused=False)
|
||||
db.session.add(state)
|
||||
db.session.commit()
|
||||
return state
|
||||
|
||||
@classmethod
|
||||
def set_active(cls, active):
|
||||
"""Set the active state of the scraper."""
|
||||
state = cls.get_current_state()
|
||||
state.is_active = active
|
||||
db.session.commit()
|
||||
return state
|
||||
|
||||
@classmethod
|
||||
def set_paused(cls, paused):
|
||||
"""Set the paused state of the scraper."""
|
||||
state = cls.get_current_state()
|
||||
state.is_paused = paused
|
||||
db.session.commit()
|
||||
return state
|
||||
|
||||
@classmethod
|
||||
def is_scraper_active(cls):
|
||||
"""Check if the scraper is active."""
|
||||
state = cls.get_current_state()
|
||||
return state.is_active and not state.is_paused
|
||||
|
||||
|
||||
def init_schedule_config():
|
||||
"""Initialize ScheduleConfig with default values if empty"""
|
||||
@ -241,3 +307,9 @@ def init_schedule_config():
|
||||
default_volume = VolumeConfig(volume=100)
|
||||
db.session.add(default_volume)
|
||||
db.session.commit()
|
||||
|
||||
# Initialize DownloadPathConfig if it doesn't exist
|
||||
if DownloadPathConfig.query.count() == 0:
|
||||
default_path = DownloadPathConfig(path="/path/to/dummy/papers")
|
||||
db.session.add(default_path)
|
||||
db.session.commit()
|
||||
|
@ -1,21 +0,0 @@
|
||||
import time
|
||||
|
||||
from .db import db
|
||||
from .models import PaperMetadata
|
||||
|
||||
|
||||
def run_scraper():
|
||||
while True:
|
||||
with db.app.app_context():
|
||||
paper = Paper.query.filter_by(status="Pending").first()
|
||||
if paper:
|
||||
try:
|
||||
# Scraping logic (e.g. download PDF)
|
||||
paper.status = "Done"
|
||||
paper.file_path = "papers/some_path.pdf"
|
||||
except Exception as e:
|
||||
paper.status = "Failed"
|
||||
paper.error_message = str(e)
|
||||
db.session.commit()
|
||||
else:
|
||||
time.sleep(60)
|
@ -16,6 +16,8 @@
|
||||
{% include "nav.html.jinja" %}
|
||||
<main class="container my-5">{% block content %}{% endblock content %}</main>
|
||||
{% include "footer.html.jinja" %}
|
||||
|
||||
{% block scripts %}{% endblock scripts %}
|
||||
</body>
|
||||
|
||||
</html>
|
61
scipaperloader/templates/config/general.html.jinja
Normal file
61
scipaperloader/templates/config/general.html.jinja
Normal file
@ -0,0 +1,61 @@
|
||||
<!-- General Configuration Tab -->
|
||||
<div class="tab-pane active">
|
||||
<div class="config-form">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h5>General Configuration</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<!-- include flash messages template -->
|
||||
{% include "partials/flash_messages.html.jinja" %}
|
||||
|
||||
<form action="{{ url_for('config.update_general') }}" method="post">
|
||||
<div class="form-section">
|
||||
<h6>Scraper Volume</h6>
|
||||
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="totalVolume" class="form-label">Papers per day:</label>
|
||||
<input type="number" class="form-control" id="totalVolume" name="total_volume" min="1"
|
||||
max="{{ max_volume }}" value="{{ volume_config.volume }}" required>
|
||||
<div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-section">
|
||||
<h6>Download Path</h6>
|
||||
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
|
||||
<div class="mb-3">
|
||||
<label for="downloadPath" class="form-label">Download Directory:</label>
|
||||
<input type="text" class="form-control" id="downloadPath" name="download_path"
|
||||
value="{{ download_path_config.path }}" required>
|
||||
<div class="form-text">Enter the full path to the download directory (e.g., /data/papers).
|
||||
Ensure the directory exists and the application has write permissions.</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-section">
|
||||
<h6>System Settings</h6>
|
||||
<p class="text-muted">Configure general system behavior.</p>
|
||||
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="enableNotifications" checked>
|
||||
<label class="form-check-label" for="enableNotifications">
|
||||
Enable email notifications
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="enableLogging" checked>
|
||||
<label class="form-check-label" for="enableLogging">
|
||||
Enable detailed activity logging
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-primary">Save General Settings</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
50
scipaperloader/templates/config/index.html.jinja
Normal file
50
scipaperloader/templates/config/index.html.jinja
Normal file
@ -0,0 +1,50 @@
|
||||
{% extends "base.html.jinja" %}
|
||||
|
||||
{% block title %}Configuration{% endblock title %}
|
||||
|
||||
{% block styles %}
|
||||
{{ super() }}
|
||||
<style>
|
||||
.nav-tabs .nav-link {
|
||||
color: #495057;
|
||||
}
|
||||
|
||||
.nav-tabs .nav-link.active {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.config-form {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.form-section {
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
</style>
|
||||
{% endblock styles %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container mt-4">
|
||||
<h1>Configuration</h1>
|
||||
|
||||
<ul class="nav nav-tabs mb-4">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link {% if active_tab == 'general' %}active{% endif %}"
|
||||
href="{{ url_for('config.general') }}">General</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link {% if active_tab == 'schedule' %}active{% endif %}"
|
||||
href="{{ url_for('config.schedule') }}">Schedule</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<div class="tab-content">
|
||||
{% if active_tab == 'general' %}
|
||||
{% include "config/general.html.jinja" %}
|
||||
{% elif active_tab == 'schedule' %}
|
||||
{% include "config/schedule.html.jinja" %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock content %}
|
285
scipaperloader/templates/config/schedule.html.jinja
Normal file
285
scipaperloader/templates/config/schedule.html.jinja
Normal file
@ -0,0 +1,285 @@
|
||||
<style>
|
||||
.timeline {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 3px;
|
||||
user-select: none;
|
||||
/* Prevent text selection during drag */
|
||||
}
|
||||
|
||||
.hour-block {
|
||||
width: 49px;
|
||||
height: 70px;
|
||||
/* Increased height to fit additional text */
|
||||
border-radius: 5px;
|
||||
text-align: center;
|
||||
line-height: 1.2;
|
||||
font-size: 0.9rem;
|
||||
padding-top: 6px;
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
transition: background-color 0.2s ease-in-out;
|
||||
margin: 1px;
|
||||
}
|
||||
|
||||
.hour-block.selected {
|
||||
outline: 2px solid #4584b8;
|
||||
}
|
||||
|
||||
.papers {
|
||||
font-size: 0.7rem;
|
||||
margin-top: 2px;
|
||||
}
|
||||
</style>
|
||||
|
||||
<script>
|
||||
const initialSchedule = {{ schedule | tojson }};
|
||||
const totalVolume = {{ volume }};
|
||||
</script>
|
||||
|
||||
<div x-data="scheduleManager(initialSchedule, totalVolume)" class="tab-pane active">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h5>Scheduling Configuration</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
<!-- include flash messages template -->
|
||||
{% include "partials/flash_messages.html.jinja" %}
|
||||
|
||||
<!-- Content -->
|
||||
<div class="mb-3">
|
||||
<h3>How it Works</h3>
|
||||
<p class="text-muted mb-0">
|
||||
This page allows you to configure the daily volume of papers to be
|
||||
downloaded and the hourly download weights for the papers. The weights
|
||||
determine how many papers will be downloaded during each hour of the day.
|
||||
The total volume (<strong x-text="volume"></strong> papers/day) is split
|
||||
across all hours based on their relative weights. Each weight controls the
|
||||
proportion of papers downloaded during that hour. Click to select one or
|
||||
more hours below. Then assign a weight to them using the input and apply
|
||||
it. Color indicates relative intensity. The total daily volume will be
|
||||
split proportionally across these weights.
|
||||
<strong>Don't forget to submit the changes!</strong>
|
||||
</p>
|
||||
<h3>Example</h3>
|
||||
<p class="text-muted mb-0">
|
||||
If the total volume is <strong>240 papers</strong> and hours are
|
||||
<strong>weighted as 1.0, 2.0, and 3.0</strong>, they will receive
|
||||
<strong>40, 80, and 120 papers</strong> respectively.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<h2 class="mt-4">Volume</h2>
|
||||
|
||||
<div class="align-items-start flex-wrap gap-2">
|
||||
<p class="text-muted">
|
||||
The total volume of data to be downloaded each day is
|
||||
<strong x-text="volume"></strong> papers.
|
||||
</p>
|
||||
<div class="d-flex align-items-center mb-3" x-data="{ volumeValue: volume }">
|
||||
<div class="input-group w-50">
|
||||
<label class="input-group-text">Papers per day:</label>
|
||||
<input type="number" class="form-control" x-model="volumeValue" min="1" max="{{ max_volume }}"
|
||||
required />
|
||||
<button type="button" class="btn btn-primary" @click="updateVolume()">
|
||||
Update Volume
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2 class="mt-4">Current Schedule</h2>
|
||||
<form x-data id="scheduleForm">
|
||||
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
|
||||
<template x-for="hour in Object.keys(schedule)" :key="hour">
|
||||
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour" :style="getBackgroundStyle(hour)"
|
||||
:class="{'selected': isSelected(hour)}" @mousedown="startDrag($event, hour)"
|
||||
@mouseover="dragSelect(hour)">
|
||||
<div><strong x-text="formatHour(hour)"></strong></div>
|
||||
<div class="weight"><span x-text="schedule[hour]"></span></div>
|
||||
<div class="papers">
|
||||
<span x-text="getPapersPerHour(hour)"></span> p.
|
||||
</div>
|
||||
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]" />
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<div class="input-group mb-4 w-50">
|
||||
<label class="input-group-text">Set Weight:</label>
|
||||
<input type="number" step="0.1" min="0" max="5" x-model="newWeight" class="form-control" />
|
||||
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
|
||||
Apply to Selected
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="d-flex justify-content-between">
|
||||
<a href="{{ url_for('config.general') }}" class="btn btn-outline-secondary">⬅ Back</a>
|
||||
<button type="button" class="btn btn-success" @click="saveSchedule()">💾 Save Schedule</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function scheduleManager(initial, volume) {
|
||||
return {
|
||||
schedule: { ...initial },
|
||||
volume: volume,
|
||||
selectedHours: [],
|
||||
newWeight: 1.0,
|
||||
volumeValue: volume,
|
||||
isDragging: false,
|
||||
dragOperation: null,
|
||||
|
||||
formatHour(h) {
|
||||
return String(h).padStart(2, "0") + ":00";
|
||||
},
|
||||
|
||||
updateVolume() {
|
||||
fetch('{{ url_for('config.api_update_config') }}', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
volume: this.volumeValue
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
this.volume = parseFloat(this.volumeValue);
|
||||
showFlashMessage('Volume updated successfully!', 'success');
|
||||
} else {
|
||||
showFlashMessage(data.updates?.[0]?.message || 'Error updating volume', 'error');
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error:', error);
|
||||
showFlashMessage('Network error occurred', 'error');
|
||||
});
|
||||
},
|
||||
|
||||
getBackgroundStyle(hour) {
|
||||
const weight = parseFloat(this.schedule[hour]);
|
||||
const maxWeight = 2.5; // You can adjust this
|
||||
|
||||
// Normalize weight (0.0 to 1.0)
|
||||
const t = Math.min(weight / maxWeight, 1.0);
|
||||
|
||||
// Interpolate HSL lightness: 95% (light) to 30% (dark)
|
||||
const lightness = 95 - t * 65; // 95 → 30
|
||||
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
|
||||
|
||||
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
|
||||
|
||||
return {
|
||||
backgroundColor,
|
||||
color: textColor,
|
||||
};
|
||||
},
|
||||
|
||||
getBackgroundStyleFromValue(value) {
|
||||
const weight = parseFloat(value);
|
||||
const maxWeight = 2.5; // You can adjust this
|
||||
|
||||
// Normalize weight (0.0 to 1.0)
|
||||
const t = Math.min(weight / maxWeight, 1.0);
|
||||
|
||||
// Interpolate HSL lightness: 95% (light) to 30% (dark)
|
||||
const lightness = 95 - t * 65; // 95 → 30
|
||||
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
|
||||
|
||||
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
|
||||
|
||||
return {
|
||||
backgroundColor,
|
||||
color: textColor,
|
||||
};
|
||||
},
|
||||
|
||||
startDrag(event, hour) {
|
||||
event.preventDefault();
|
||||
this.isDragging = true;
|
||||
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
|
||||
this.toggleSelect(hour);
|
||||
},
|
||||
|
||||
dragSelect(hour) {
|
||||
if (!this.isDragging) return;
|
||||
const selected = this.isSelected(hour);
|
||||
if (this.dragOperation === "add" && !selected) {
|
||||
this.selectedHours.push(hour);
|
||||
} else if (this.dragOperation === "remove" && selected) {
|
||||
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
|
||||
}
|
||||
},
|
||||
|
||||
endDrag() {
|
||||
this.isDragging = false;
|
||||
},
|
||||
|
||||
toggleSelect(hour) {
|
||||
if (this.isSelected(hour)) {
|
||||
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
|
||||
} else {
|
||||
this.selectedHours.push(hour);
|
||||
}
|
||||
},
|
||||
|
||||
isSelected(hour) {
|
||||
return this.selectedHours.includes(hour);
|
||||
},
|
||||
|
||||
applyWeight() {
|
||||
this.selectedHours.forEach((hour) => {
|
||||
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
|
||||
});
|
||||
this.selectedHours = [];
|
||||
},
|
||||
|
||||
getTotalWeight() {
|
||||
return Object.values(this.schedule).reduce(
|
||||
(sum, w) => sum + parseFloat(w),
|
||||
0
|
||||
);
|
||||
},
|
||||
|
||||
getPapersPerHour(hour) {
|
||||
const total = this.getTotalWeight();
|
||||
if (total === 0) return 0;
|
||||
return (
|
||||
(parseFloat(this.schedule[hour]) / total) *
|
||||
this.volume
|
||||
).toFixed(1);
|
||||
},
|
||||
|
||||
saveSchedule() {
|
||||
fetch('{{ url_for('config.api_update_config') }}', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
schedule: this.schedule
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
showFlashMessage('Schedule updated successfully!', 'success');
|
||||
} else {
|
||||
showFlashMessage(data.updates?.[0]?.message || 'Error updating schedule', 'error');
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error:', error);
|
||||
showFlashMessage('Network error occurred', 'error');
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
</script>
|
@ -58,7 +58,7 @@
|
||||
volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or
|
||||
usage pattern.
|
||||
</p>
|
||||
<a href="{{ url_for('schedule.schedule') }}" class="btn btn-sm btn-outline-warning">Adjust Schedule</a>
|
||||
<a href="{{ url_for('config.schedule') }}" class="btn btn-sm btn-outline-warning">Adjust Schedule</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -17,7 +17,7 @@
|
||||
<a class="nav-link" href="{{ url_for('papers.list_papers') }}">Papers</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="{{ url_for('schedule.schedule') }}">Schedule</a>
|
||||
<a class="nav-link" href="{{ url_for('config.general') }}">Configuration</a>
|
||||
</li>
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-bs-toggle="dropdown"
|
||||
|
@ -192,9 +192,12 @@
|
||||
<td>
|
||||
<a href="https://doi.org/{{ paper.doi }}" target="_blank" class="icon-link icon-link-hover">
|
||||
{{ paper.doi }}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="bi" viewBox="0 0 16 16" aria-hidden="true">
|
||||
<path
|
||||
d="M1 8a.5.5 0 0 1 .5-.5h11.793l-3.147-3.146a.5.5 0 0 1 .708-.708l4 4a.5.5 0 0 1 0 .708l-4 4a.5.5 0 0 1-.708-.708L13.293 8.5H1.5A.5.5 0 0 1 1 8z" />
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
|
||||
class="bi bi-box-arrow-up-left" viewBox="0 0 16 16">
|
||||
<path fill-rule="evenodd"
|
||||
d="M7.364 3.5a.5.5 0 0 1 .5-.5H14.5A1.5 1.5 0 0 1 16 4.5v10a1.5 1.5 0 0 1-1.5 1.5h-10A1.5 1.5 0 0 1 3 14.5V7.864a.5.5 0 1 1 1 0V14.5a.5.5 0 0 0 .5.5h10a.5.5 0 0 0 .5-.5v-10a.5.5 0 0 0-.5-.5H7.864a.5.5 0 0 1-.5-.5" />
|
||||
<path fill-rule="evenodd"
|
||||
d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 0 1H1.707l8.147 8.146a.5.5 0 0 1-.708.708L1 1.707V5.5a.5.5 0 0 1-1 0z" />
|
||||
</svg>
|
||||
</a>
|
||||
</td>
|
||||
@ -203,9 +206,12 @@
|
||||
<a href="https://search.worldcat.org/search?q=issn:{{ paper.issn }}" target="_blank"
|
||||
class="icon-link icon-link-hover">
|
||||
{{ paper.issn }}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="bi" viewBox="0 0 16 16" aria-hidden="true">
|
||||
<path
|
||||
d="M1 8a.5.5 0 0 1 .5-.5h11.793l-3.147-3.146a.5.5 0 0 1 .708-.708l4 4a.5.5 0 0 1 0 .708l-4 4a.5.5 0 0 1-.708-.708L13.293 8.5H1.5A.5.5 0 0 1 1 8z" />
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
|
||||
class="bi bi-box-arrow-up-left" viewBox="0 0 16 16">
|
||||
<path fill-rule="evenodd"
|
||||
d="M7.364 3.5a.5.5 0 0 1 .5-.5H14.5A1.5 1.5 0 0 1 16 4.5v10a1.5 1.5 0 0 1-1.5 1.5h-10A1.5 1.5 0 0 1 3 14.5V7.864a.5.5 0 1 1 1 0V14.5a.5.5 0 0 0 .5.5h10a.5.5 0 0 0 .5-.5v-10a.5.5 0 0 0-.5-.5H7.864a.5.5 0 0 1-.5-.5" />
|
||||
<path fill-rule="evenodd"
|
||||
d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 0 1H1.707l8.147 8.146a.5.5 0 0 1-.708.708L1 1.707V5.5a.5.5 0 0 1-1 0z" />
|
||||
</svg>
|
||||
</a>
|
||||
</td>
|
||||
|
93
scipaperloader/templates/partials/flash_messages.html.jinja
Normal file
93
scipaperloader/templates/partials/flash_messages.html.jinja
Normal file
@ -0,0 +1,93 @@
|
||||
<!-- Server-side flash messages from Flask -->
|
||||
{% with messages = get_flashed_messages(with_categories=true) %}
|
||||
{% if messages %}
|
||||
<div class="server-flash-messages">
|
||||
{% for category, message in messages %}
|
||||
<div class="alert alert-{{ category }} alert-dismissible fade show" role="alert">
|
||||
{{ message }}
|
||||
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endwith %}
|
||||
|
||||
<!-- JavaScript flash message container for client-side messages -->
|
||||
<div id="clientFlashContainer"></div>
|
||||
|
||||
<style>
|
||||
.client-flash-message {
|
||||
position: fixed;
|
||||
top: 30%;
|
||||
left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
z-index: 1000;
|
||||
width: 300px;
|
||||
text-align: center;
|
||||
font-weight: bold;
|
||||
padding: 12px;
|
||||
margin-bottom: 20px;
|
||||
border-radius: 6px;
|
||||
opacity: 1;
|
||||
transition: opacity 5s ease-in-out;
|
||||
}
|
||||
|
||||
.client-flash-message.success {
|
||||
background-color: #d4edda;
|
||||
border-color: #c3e6cb;
|
||||
color: #155724;
|
||||
}
|
||||
|
||||
.client-flash-message.error {
|
||||
background-color: #f8d7da;
|
||||
border-color: #f5c6cb;
|
||||
color: #721c24;
|
||||
}
|
||||
|
||||
.client-flash-message.info {
|
||||
background-color: #d1ecf1;
|
||||
border-color: #bee5eb;
|
||||
color: #0c5460;
|
||||
}
|
||||
|
||||
.client-flash-message.warning {
|
||||
background-color: #fff3cd;
|
||||
border-color: #ffeeba;
|
||||
color: #856404;
|
||||
}
|
||||
|
||||
.client-flash-message.fade {
|
||||
opacity: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
<script>
|
||||
// Global flash message function that can be used from anywhere
|
||||
function showFlashMessage(message, type = 'success', duration = 5000) {
|
||||
const flashMsg = document.createElement('div');
|
||||
flashMsg.className = `client-flash-message ${type}`;
|
||||
flashMsg.textContent = message;
|
||||
|
||||
const container = document.getElementById('clientFlashContainer');
|
||||
container.appendChild(flashMsg);
|
||||
|
||||
// Apply fade effect after some time
|
||||
setTimeout(() => flashMsg.classList.add('fade'), duration - 3000);
|
||||
|
||||
// Remove element after duration
|
||||
setTimeout(() => flashMsg.remove(), duration);
|
||||
|
||||
return flashMsg;
|
||||
}
|
||||
|
||||
// Initialize toast messages if Bootstrap is used
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
// Initialize any Bootstrap toasts if they exist
|
||||
if (typeof bootstrap !== 'undefined' && bootstrap.Toast) {
|
||||
const toastElList = [].slice.call(document.querySelectorAll('.toast'));
|
||||
toastElList.map(function (toastEl) {
|
||||
return new bootstrap.Toast(toastEl);
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
@ -1,270 +0,0 @@
|
||||
{% extends "base.html.jinja" %} {% block content %}
|
||||
<style>
|
||||
.timeline {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 3px;
|
||||
user-select: none;
|
||||
/* Prevent text selection during drag */
|
||||
}
|
||||
|
||||
.hour-block {
|
||||
width: 49px;
|
||||
height: 70px;
|
||||
/* Increased height to fit additional text */
|
||||
border-radius: 5px;
|
||||
text-align: center;
|
||||
line-height: 1.2;
|
||||
font-size: 0.9rem;
|
||||
padding-top: 6px;
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
transition: background-color 0.2s ease-in-out;
|
||||
margin: 1px;
|
||||
}
|
||||
|
||||
.hour-block.selected {
|
||||
outline: 2px solid #4584b8;
|
||||
}
|
||||
|
||||
.papers {
|
||||
font-size: 0.7rem;
|
||||
margin-top: 2px;
|
||||
}
|
||||
|
||||
.flash-message {
|
||||
position: fixed;
|
||||
top: 30%;
|
||||
left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
z-index: 1000;
|
||||
width: 300px;
|
||||
text-align: center;
|
||||
font-weight: bold;
|
||||
padding: 12px;
|
||||
margin-bottom: 20px;
|
||||
border-radius: 6px;
|
||||
opacity: 1;
|
||||
transition: opacity 5s ease-in-out;
|
||||
}
|
||||
|
||||
.flash-message.success {
|
||||
background-color: #d4edda;
|
||||
border-color: #c3e6cb;
|
||||
color: #155724;
|
||||
}
|
||||
|
||||
.flash-message.error {
|
||||
background-color: #f8d7da;
|
||||
border-color: #f5c6cb;
|
||||
color: #721c24;
|
||||
}
|
||||
|
||||
.flash-message.fade {
|
||||
opacity: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
<script>
|
||||
const initialSchedule = {{ schedule | tojson }};
|
||||
const totalVolume = {{ volume }};
|
||||
</script>
|
||||
|
||||
<div x-data="scheduleManager(initialSchedule, totalVolume)" class="container">
|
||||
<h1 class="mb-4">🕒 Configure Hourly Download Weights</h1>
|
||||
|
||||
<!-- Flash Messages -->
|
||||
{% with messages = get_flashed_messages(with_categories=true) %} {% if
|
||||
messages %}
|
||||
<div id="flash-messages">
|
||||
{% for category, message in messages %}
|
||||
<div class="flash-message {{ category }}" x-data="{}"
|
||||
x-init="setTimeout(() => $el.classList.add('fade'), 100); setTimeout(() => $el.remove(), 5000)">
|
||||
{{ message }}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %} {% endwith %}
|
||||
|
||||
<!-- Content -->
|
||||
<div class="mb-3">
|
||||
<h3>How it Works</h3>
|
||||
<p class="text-muted mb-0">
|
||||
This page allows you to configure the daily volume of papers to be
|
||||
downloaded and the hourly download weights for the papers. The weights
|
||||
determine how many papers will be downloaded during each hour of the day.
|
||||
The total volume (<strong x-text="volume"></strong> papers/day) is split
|
||||
across all hours based on their relative weights. Each weight controls the
|
||||
proportion of papers downloaded during that hour. Click to select one or
|
||||
more hours below. Then assign a weight to them using the input and apply
|
||||
it. Color indicates relative intensity. The total daily volume will be
|
||||
split proportionally across these weights.
|
||||
<strong>Don't forget to submit the changes!</strong>
|
||||
</p>
|
||||
<h3>Example</h3>
|
||||
<p class="text-muted mb-0">
|
||||
If the total volume is <strong>240 papers</strong> and hours are
|
||||
<strong>weighted as 1.0, 2.0, and 3.0</strong>, they will receive
|
||||
<strong>40, 80, and 120 papers</strong> respectively.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<h2 class="mt-4">Volume</h2>
|
||||
|
||||
<div class="align-items-start flex-wrap gap-2">
|
||||
<p class="text-muted">
|
||||
The total volume of data to be downloaded each day is
|
||||
<strong x-text="volume"></strong> papers.
|
||||
</p>
|
||||
<div class="d-flex align-items-center mb-3">
|
||||
<form method="post" action="{{ url_for('schedule.schedule') }}" class="input-group w-50">
|
||||
<label class="input-group-text">Papers per day:</label>
|
||||
<input type="number" class="form-control" name="total_volume" value="{{ volume }}" min="1" max="1000"
|
||||
required />
|
||||
<button type="submit" class="btn btn-primary">Update Volume</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2 class="mt-4">Current Schedule</h2>
|
||||
<form method="post" action="{{ url_for('schedule.schedule') }}">
|
||||
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
|
||||
<template x-for="hour in Object.keys(schedule)" :key="hour">
|
||||
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour" :style="getBackgroundStyle(hour)"
|
||||
:class="{'selected': isSelected(hour)}" @mousedown="startDrag($event, hour)" @mouseover="dragSelect(hour)">
|
||||
<div><strong x-text="formatHour(hour)"></strong></div>
|
||||
<div class="weight"><span x-text="schedule[hour]"></span></div>
|
||||
<div class="papers">
|
||||
<span x-text="getPapersPerHour(hour)"></span> p.
|
||||
</div>
|
||||
<input type="hidden" :name="'hour_' + hour" :value="schedule[hour]" />
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<div class="input-group mb-4 w-50">
|
||||
<label class="input-group-text">Set Weight:</label>
|
||||
<input type="number" step="0.1" min="0" max="5" x-model="newWeight" class="form-control" />
|
||||
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
|
||||
Apply to Selected
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="d-flex justify-content-between">
|
||||
<a href="{{ url_for('main.index') }}" class="btn btn-outline-secondary">⬅ Back</a>
|
||||
<button type="submit" class="btn btn-success">💾 Save Schedule</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function scheduleManager(initial, volume) {
|
||||
return {
|
||||
schedule: { ...initial },
|
||||
volume: volume,
|
||||
selectedHours: [],
|
||||
newWeight: 1.0,
|
||||
isDragging: false,
|
||||
dragOperation: null,
|
||||
|
||||
formatHour(h) {
|
||||
return String(h).padStart(2, "0") + ":00";
|
||||
},
|
||||
|
||||
getBackgroundStyle(hour) {
|
||||
const weight = parseFloat(this.schedule[hour]);
|
||||
const maxWeight = 2.5; // You can adjust this
|
||||
|
||||
// Normalize weight (0.0 to 1.0)
|
||||
const t = Math.min(weight / maxWeight, 1.0);
|
||||
|
||||
// Interpolate HSL lightness: 95% (light) to 30% (dark)
|
||||
const lightness = 95 - t * 65; // 95 → 30
|
||||
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
|
||||
|
||||
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
|
||||
|
||||
return {
|
||||
backgroundColor,
|
||||
color: textColor,
|
||||
};
|
||||
},
|
||||
|
||||
getBackgroundStyleFromValue(value) {
|
||||
const weight = parseFloat(value);
|
||||
const maxWeight = 2.5; // You can adjust this
|
||||
|
||||
// Normalize weight (0.0 to 1.0)
|
||||
const t = Math.min(weight / maxWeight, 1.0);
|
||||
|
||||
// Interpolate HSL lightness: 95% (light) to 30% (dark)
|
||||
const lightness = 95 - t * 65; // 95 → 30
|
||||
const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
|
||||
|
||||
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
|
||||
|
||||
return {
|
||||
backgroundColor,
|
||||
color: textColor,
|
||||
};
|
||||
},
|
||||
|
||||
startDrag(event, hour) {
|
||||
event.preventDefault();
|
||||
this.isDragging = true;
|
||||
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
|
||||
this.toggleSelect(hour);
|
||||
},
|
||||
|
||||
dragSelect(hour) {
|
||||
if (!this.isDragging) return;
|
||||
const selected = this.isSelected(hour);
|
||||
if (this.dragOperation === "add" && !selected) {
|
||||
this.selectedHours.push(hour);
|
||||
} else if (this.dragOperation === "remove" && selected) {
|
||||
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
|
||||
}
|
||||
},
|
||||
|
||||
endDrag() {
|
||||
this.isDragging = false;
|
||||
},
|
||||
|
||||
toggleSelect(hour) {
|
||||
if (this.isSelected(hour)) {
|
||||
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
|
||||
} else {
|
||||
this.selectedHours.push(hour);
|
||||
}
|
||||
},
|
||||
|
||||
isSelected(hour) {
|
||||
return this.selectedHours.includes(hour);
|
||||
},
|
||||
|
||||
applyWeight() {
|
||||
this.selectedHours.forEach((hour) => {
|
||||
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
|
||||
});
|
||||
this.selectedHours = [];
|
||||
},
|
||||
|
||||
getTotalWeight() {
|
||||
return Object.values(this.schedule).reduce(
|
||||
(sum, w) => sum + parseFloat(w),
|
||||
0
|
||||
);
|
||||
},
|
||||
|
||||
getPapersPerHour(hour) {
|
||||
const total = this.getTotalWeight();
|
||||
if (total === 0) return 0;
|
||||
return (
|
||||
(parseFloat(this.schedule[hour]) / total) *
|
||||
this.volume
|
||||
).toFixed(1);
|
||||
},
|
||||
};
|
||||
}
|
||||
</script>
|
||||
{% endblock content %}
|
@ -36,51 +36,6 @@
|
||||
max-width: 350px;
|
||||
z-index: 1050;
|
||||
}
|
||||
|
||||
/* Enhanced scheduler styles */
|
||||
.timeline {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 3px;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.hour-block {
|
||||
width: 49px;
|
||||
height: 70px;
|
||||
border-radius: 5px;
|
||||
text-align: center;
|
||||
line-height: 1.2;
|
||||
font-size: 0.9rem;
|
||||
padding-top: 6px;
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
transition: background-color 0.2s ease-in-out;
|
||||
margin: 1px;
|
||||
}
|
||||
|
||||
.hour-block.selected {
|
||||
outline: 2px solid #4584b8;
|
||||
}
|
||||
|
||||
.papers {
|
||||
font-size: 0.7rem;
|
||||
margin-top: 2px;
|
||||
}
|
||||
|
||||
/* Tab styles */
|
||||
.nav-tabs .nav-link {
|
||||
color: #495057;
|
||||
}
|
||||
|
||||
.nav-tabs .nav-link.active {
|
||||
font-weight: bold;
|
||||
color: #007bff;
|
||||
}
|
||||
|
||||
.tab-pane {
|
||||
padding-top: 1rem;
|
||||
}
|
||||
</style>
|
||||
{% endblock styles %}
|
||||
|
||||
@ -88,337 +43,113 @@
|
||||
<div class="container mt-4">
|
||||
<h1>Paper Scraper Control Panel</h1>
|
||||
|
||||
<!-- Navigation tabs -->
|
||||
<ul class="nav nav-tabs mb-4" id="scraperTabs" role="tablist">
|
||||
<li class="nav-item" role="presentation">
|
||||
<button class="nav-link active" id="dashboard-tab" data-bs-toggle="tab" data-bs-target="#dashboard"
|
||||
type="button" role="tab" aria-controls="dashboard" aria-selected="true">
|
||||
Dashboard
|
||||
</button>
|
||||
</li>
|
||||
<li class="nav-item" role="presentation">
|
||||
<button class="nav-link" id="schedule-tab" data-bs-toggle="tab" data-bs-target="#schedule" type="button"
|
||||
role="tab" aria-controls="schedule" aria-selected="false">
|
||||
Schedule Configuration
|
||||
</button>
|
||||
</li>
|
||||
</ul>
|
||||
<!-- Include standardized flash messages -->
|
||||
{% include "partials/flash_messages.html.jinja" %}
|
||||
|
||||
<div class="tab-content" id="scraperTabsContent">
|
||||
<!-- Dashboard Tab -->
|
||||
<div class="tab-pane fade show active" id="dashboard" role="tabpanel" aria-labelledby="dashboard-tab">
|
||||
<div class="row mb-4">
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h5>Scraper Status</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="d-flex align-items-center mb-3">
|
||||
<div id="statusIndicator" class="status-indicator status-inactive"></div>
|
||||
<span id="statusText">Inactive</span>
|
||||
</div>
|
||||
|
||||
<div class="btn-group" role="group">
|
||||
<button id="startButton" class="btn btn-success">Start</button>
|
||||
<button id="pauseButton" class="btn btn-warning" disabled>Pause</button>
|
||||
<button id="stopButton" class="btn btn-danger" disabled>Stop</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row mb-4">
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h5>Scraper Status</h5>
|
||||
</div>
|
||||
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h5>Volume Configuration</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<form id="volumeForm">
|
||||
<div class="form-group">
|
||||
<label for="volumeInput">Papers per day:</label>
|
||||
<input type="number" class="form-control" id="volumeInput"
|
||||
value="{{ volume_config.volume if volume_config else 100 }}">
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary mt-2">Update Volume</button>
|
||||
</form>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="d-flex align-items-center mb-3">
|
||||
<div id="statusIndicator" class="status-indicator status-inactive"></div>
|
||||
<span id="statusText">Inactive</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row mb-4">
|
||||
<div class="col-12">
|
||||
<div class="card">
|
||||
<div class="card-header d-flex justify-content-between align-items-center">
|
||||
<h5>Scraping Activity</h5>
|
||||
<div>
|
||||
<div class="form-check form-switch">
|
||||
<input class="form-check-input" type="checkbox" id="notificationsToggle" checked>
|
||||
<label class="form-check-label" for="notificationsToggle">Show Notifications</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="btn-group mb-3">
|
||||
<button class="btn btn-outline-secondary time-range-btn" data-hours="6">Last 6
|
||||
hours</button>
|
||||
<button class="btn btn-outline-secondary time-range-btn active" data-hours="24">Last 24
|
||||
hours</button>
|
||||
<button class="btn btn-outline-secondary time-range-btn" data-hours="72">Last 3
|
||||
days</button>
|
||||
</div>
|
||||
<div class="stats-chart" id="activityChart"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row mb-4">
|
||||
<div class="col-12">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h5>Recent Activity</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="table-responsive">
|
||||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Time</th>
|
||||
<th>Action</th>
|
||||
<th>Status</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="activityLog">
|
||||
<tr>
|
||||
<td colspan="4" class="text-center">Loading activities...</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
<div class="btn-group" role="group">
|
||||
<button id="startButton" class="btn btn-success">Start</button>
|
||||
<button id="pauseButton" class="btn btn-warning" disabled>Pause</button>
|
||||
<button id="stopButton" class="btn btn-danger" disabled>Stop</button>
|
||||
<button id="resetButton" class="btn btn-secondary" disabled>Reset</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Schedule Configuration Tab -->
|
||||
<div class="tab-pane fade" id="schedule" role="tabpanel" aria-labelledby="schedule-tab"
|
||||
x-data="scheduleManager({{ schedule_config | tojson }}, {{ volume_config.volume if volume_config else 100 }})">
|
||||
|
||||
<div class="mb-3">
|
||||
<h3>How it Works</h3>
|
||||
<p class="text-muted mb-0">
|
||||
Configure the daily volume of papers to be downloaded and the hourly download weights.
|
||||
The weights determine how many papers will be downloaded during each hour of the day.
|
||||
The total volume (<strong x-text="volume"></strong> papers/day) is split across all hours based on
|
||||
their relative weights.
|
||||
<strong>Lower weights result in higher scraping rates</strong> for that hour.
|
||||
</p>
|
||||
<h5 class="mt-3">Instructions:</h5>
|
||||
<p class="text-muted">
|
||||
Click to select one or more hours below. Then assign a weight to them using the input and apply it.
|
||||
Color indicates relative intensity. Changes are saved immediately when you click "Update Schedule".
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="card mb-4">
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h4 class="m-0">Volume Configuration</h4>
|
||||
<h5>Volume Configuration</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p class="text-muted">
|
||||
The total volume of data to be downloaded each day is
|
||||
<strong x-text="volume"></strong> papers.
|
||||
</p>
|
||||
<div class="d-flex align-items-center mb-3">
|
||||
<div class="input-group">
|
||||
<span class="input-group-text">Papers per day:</span>
|
||||
<input type="number" class="form-control" x-model="volume" min="1" max="1000" />
|
||||
<button type="button" class="btn btn-primary" @click="updateVolume()">
|
||||
Update Volume
|
||||
</button>
|
||||
<form id="volumeForm">
|
||||
<div class="form-group">
|
||||
<label for="volumeInput">Papers per day:</label>
|
||||
<input type="number" class="form-control" id="volumeInput"
|
||||
value="{{ volume_config.volume if volume_config else 100 }}" min="1"
|
||||
max="{{ max_volume }}">
|
||||
<div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary mt-2">Update Volume</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row mb-4">
|
||||
<div class="col-12">
|
||||
<div class="card">
|
||||
<div class="card-header d-flex justify-content-between align-items-center">
|
||||
<h5>Scraping Activity</h5>
|
||||
<div>
|
||||
<div class="form-check form-switch">
|
||||
<input class="form-check-input" type="checkbox" id="notificationsToggle" checked>
|
||||
<label class="form-check-label" for="notificationsToggle">Show Notifications</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="btn-group mb-3">
|
||||
<button class="btn btn-outline-secondary time-range-btn" data-hours="6">Last 6
|
||||
hours</button>
|
||||
<button class="btn btn-outline-secondary time-range-btn active" data-hours="24">Last 24
|
||||
hours</button>
|
||||
<button class="btn btn-outline-secondary time-range-btn" data-hours="72">Last 3
|
||||
days</button>
|
||||
</div>
|
||||
<div class="stats-chart" id="activityChart"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row mb-4">
|
||||
<div class="col-12">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h4 class="m-0">Hourly Weights</h4>
|
||||
<h5>Recent Activity</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="timeline mb-3" @mouseup="endDrag()" @mouseleave="endDrag()">
|
||||
<template x-for="hour in Object.keys(schedule)" :key="hour">
|
||||
<div class="hour-block" :id="'hour-' + hour" :data-hour="hour"
|
||||
:style="getBackgroundStyle(hour)" :class="{'selected': isSelected(hour)}"
|
||||
@mousedown="startDrag($event, hour)" @mouseover="dragSelect(hour)">
|
||||
<div><strong x-text="formatHour(hour)"></strong></div>
|
||||
<div class="weight"><span x-text="schedule[hour]"></span></div>
|
||||
<div class="papers">
|
||||
<span x-text="getPapersPerHour(hour)"></span> p.
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
<div class="table-responsive">
|
||||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Time</th>
|
||||
<th>Action</th>
|
||||
<th>Status</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="activityLog">
|
||||
<tr>
|
||||
<td colspan="4" class="text-center">Loading activities...</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="input-group mb-4 w-50">
|
||||
<span class="input-group-text">Set Weight:</span>
|
||||
<input type="number" step="0.1" min="0.1" max="5" x-model="newWeight" class="form-control" />
|
||||
<button type="button" class="btn btn-outline-primary" @click="applyWeight()">
|
||||
Apply to Selected
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<button type="button" class="btn btn-success" @click="updateSchedule()">
|
||||
💾 Update Schedule
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Notification template -->
|
||||
<div id="notificationContainer"></div>
|
||||
{% endblock content %}
|
||||
|
||||
{% block scripts %}
|
||||
{{ super() }}
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
|
||||
<script>
|
||||
// Alpine.js scheduler component
|
||||
function scheduleManager(initial, volume) {
|
||||
return {
|
||||
schedule: initial || {},
|
||||
volume: volume,
|
||||
selectedHours: [],
|
||||
newWeight: 1.0,
|
||||
isDragging: false,
|
||||
dragOperation: null,
|
||||
|
||||
formatHour(h) {
|
||||
return String(h).padStart(2, "0") + ":00";
|
||||
},
|
||||
|
||||
getBackgroundStyle(hour) {
|
||||
const weight = parseFloat(this.schedule[hour]);
|
||||
const maxWeight = 2.5; // You can adjust this
|
||||
|
||||
// Normalize weight (0.0 to 1.0)
|
||||
const t = Math.min(weight / maxWeight, 1.0);
|
||||
|
||||
// Interpolate HSL lightness: 95% (light) to 30% (dark)
|
||||
const lightness = 95 - t * 65; // 95 → 30
|
||||
const backgroundColor = `hsl(210, 10%, ${lightness}%)`;
|
||||
|
||||
const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
|
||||
|
||||
return {
|
||||
backgroundColor,
|
||||
color: textColor,
|
||||
};
|
||||
},
|
||||
|
||||
startDrag(event, hour) {
|
||||
event.preventDefault();
|
||||
this.isDragging = true;
|
||||
this.dragOperation = this.isSelected(hour) ? "remove" : "add";
|
||||
this.toggleSelect(hour);
|
||||
},
|
||||
|
||||
dragSelect(hour) {
|
||||
if (!this.isDragging) return;
|
||||
const selected = this.isSelected(hour);
|
||||
if (this.dragOperation === "add" && !selected) {
|
||||
this.selectedHours.push(hour);
|
||||
} else if (this.dragOperation === "remove" && selected) {
|
||||
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
|
||||
}
|
||||
},
|
||||
|
||||
endDrag() {
|
||||
this.isDragging = false;
|
||||
},
|
||||
|
||||
toggleSelect(hour) {
|
||||
if (this.isSelected(hour)) {
|
||||
this.selectedHours = this.selectedHours.filter((h) => h !== hour);
|
||||
} else {
|
||||
this.selectedHours.push(hour);
|
||||
}
|
||||
},
|
||||
|
||||
isSelected(hour) {
|
||||
return this.selectedHours.includes(hour);
|
||||
},
|
||||
|
||||
applyWeight() {
|
||||
this.selectedHours.forEach((hour) => {
|
||||
this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
|
||||
});
|
||||
},
|
||||
|
||||
getTotalWeight() {
|
||||
return Object.values(this.schedule).reduce(
|
||||
(sum, w) => sum + parseFloat(w),
|
||||
0
|
||||
);
|
||||
},
|
||||
|
||||
getPapersPerHour(hour) {
|
||||
const total = this.getTotalWeight();
|
||||
if (total === 0) return 0;
|
||||
return (
|
||||
(parseFloat(this.schedule[hour]) / total) *
|
||||
this.volume
|
||||
).toFixed(1);
|
||||
},
|
||||
|
||||
updateVolume() {
|
||||
fetch('/scraper/update_config', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({ volume: parseFloat(this.volume) })
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
showNotification('Volume updated successfully', 'success');
|
||||
// Update the volume in the dashboard tab too
|
||||
document.getElementById('volumeInput').value = this.volume;
|
||||
} else {
|
||||
showNotification(data.message, 'danger');
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
updateSchedule() {
|
||||
fetch('/scraper/update_config', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({ schedule: this.schedule })
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
showNotification('Schedule updated successfully', 'success');
|
||||
this.selectedHours = []; // Clear selections after update
|
||||
} else {
|
||||
showNotification(data.message, 'danger');
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Global variables for the scraper dashboard
|
||||
let notificationsEnabled = true;
|
||||
let activityChart = null;
|
||||
@ -430,6 +161,7 @@
|
||||
const startButton = document.getElementById('startButton');
|
||||
const pauseButton = document.getElementById('pauseButton');
|
||||
const stopButton = document.getElementById('stopButton');
|
||||
const resetButton = document.getElementById('resetButton');
|
||||
const notificationsToggle = document.getElementById('notificationsToggle');
|
||||
const activityLog = document.getElementById('activityLog');
|
||||
|
||||
@ -443,6 +175,7 @@
|
||||
startButton.addEventListener('click', startScraper);
|
||||
pauseButton.addEventListener('click', togglePauseScraper);
|
||||
stopButton.addEventListener('click', stopScraper);
|
||||
resetButton.addEventListener('click', resetScraper);
|
||||
notificationsToggle.addEventListener('click', toggleNotifications);
|
||||
|
||||
document.getElementById('volumeForm').addEventListener('submit', function (e) {
|
||||
@ -483,28 +216,40 @@
|
||||
startButton.disabled = true;
|
||||
pauseButton.disabled = false;
|
||||
stopButton.disabled = false;
|
||||
resetButton.disabled = false; // Enable reset when active
|
||||
} else {
|
||||
statusIndicator.className = 'status-indicator status-inactive';
|
||||
statusText.textContent = 'Inactive';
|
||||
startButton.disabled = false;
|
||||
pauseButton.disabled = true;
|
||||
stopButton.disabled = true;
|
||||
resetButton.disabled = false; // Enable reset when inactive too
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Action functions
|
||||
function startScraper() {
|
||||
console.log("Start button clicked - sending request to /scraper/start");
|
||||
|
||||
fetch('/scraper/start', { method: 'POST' })
|
||||
.then(response => response.json())
|
||||
.then(response => {
|
||||
console.log("Response received:", response);
|
||||
return response.json();
|
||||
})
|
||||
.then(data => {
|
||||
console.log("Data received:", data);
|
||||
if (data.success) {
|
||||
showNotification('Scraper started successfully', 'success');
|
||||
showFlashMessage('Scraper started successfully', 'success');
|
||||
updateStatus();
|
||||
setTimeout(() => { loadRecentActivity(); }, 1000);
|
||||
} else {
|
||||
showNotification(data.message, 'danger');
|
||||
showFlashMessage(data.message, 'error');
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error("Error starting scraper:", error);
|
||||
showFlashMessage('Error starting scraper: ' + error.message, 'error');
|
||||
});
|
||||
}
|
||||
|
||||
@ -513,11 +258,11 @@
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
showNotification(data.message, 'info');
|
||||
showFlashMessage(data.message, 'info');
|
||||
updateStatus();
|
||||
setTimeout(() => { loadRecentActivity(); }, 1000);
|
||||
} else {
|
||||
showNotification(data.message, 'danger');
|
||||
showFlashMessage(data.message, 'error');
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -527,15 +272,55 @@
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
showNotification('Scraper stopped successfully', 'warning');
|
||||
showFlashMessage('Scraper stopped successfully', 'warning');
|
||||
updateStatus();
|
||||
setTimeout(() => { loadRecentActivity(); }, 1000);
|
||||
} else {
|
||||
showNotification(data.message, 'danger');
|
||||
showFlashMessage(data.message, 'error');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function resetScraper() {
|
||||
if (confirm("Are you sure you want to reset the scraper? This will stop all current tasks, optionally clear non-pending papers, and restart the scraper.")) {
|
||||
// Disable button to prevent multiple clicks
|
||||
resetButton.disabled = true;
|
||||
|
||||
// Show a loading message
|
||||
showFlashMessage('Resetting scraper, please wait...', 'info');
|
||||
|
||||
fetch('/scraper/reset', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
clear_papers: true // You could make this configurable with a checkbox
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
showFlashMessage('Scraper has been completely reset and restarted', 'success');
|
||||
// Update everything
|
||||
updateStatus();
|
||||
loadActivityStats(currentTimeRange);
|
||||
setTimeout(() => { loadRecentActivity(); }, 1000);
|
||||
} else {
|
||||
showFlashMessage(data.message || 'Error resetting scraper', 'error');
|
||||
}
|
||||
// Re-enable button
|
||||
resetButton.disabled = false;
|
||||
})
|
||||
.catch(error => {
|
||||
console.error("Error resetting scraper:", error);
|
||||
showFlashMessage('Error resetting scraper: ' + error.message, 'error');
|
||||
// Re-enable button
|
||||
resetButton.disabled = false;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function updateVolume() {
|
||||
const volume = document.getElementById('volumeInput').value;
|
||||
|
||||
@ -549,9 +334,9 @@
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
showNotification('Volume updated successfully', 'success');
|
||||
showFlashMessage('Volume updated successfully', 'success');
|
||||
} else {
|
||||
showNotification(data.message, 'danger');
|
||||
showFlashMessage(data.message, 'error');
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -682,36 +467,6 @@
|
||||
});
|
||||
}
|
||||
|
||||
// Notification functions
|
||||
function showNotification(message, type) {
|
||||
if (!notificationsEnabled && type !== 'danger') {
|
||||
return;
|
||||
}
|
||||
|
||||
const container = document.getElementById('notificationContainer');
|
||||
const notification = document.createElement('div');
|
||||
notification.className = `alert alert-${type} notification shadow-sm`;
|
||||
notification.innerHTML = `
|
||||
${message}
|
||||
<button type="button" class="btn-close float-end" aria-label="Close"></button>
|
||||
`;
|
||||
|
||||
container.appendChild(notification);
|
||||
|
||||
// Add close handler
|
||||
notification.querySelector('.btn-close').addEventListener('click', () => {
|
||||
notification.remove();
|
||||
});
|
||||
|
||||
// Auto-close after 5 seconds
|
||||
setTimeout(() => {
|
||||
notification.classList.add('fade');
|
||||
setTimeout(() => {
|
||||
notification.remove();
|
||||
}, 500);
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
// WebSocket for real-time notifications
|
||||
function setupWebSocket() {
|
||||
// If WebSocket is available, implement it here
|
||||
@ -733,9 +488,9 @@
|
||||
data.forEach(log => {
|
||||
const extraData = log.extra_data ? JSON.parse(log.extra_data) : {};
|
||||
if (log.status === 'success') {
|
||||
showNotification(`New paper scraped: ${extraData.title || 'Unknown title'}`, 'success');
|
||||
showFlashMessage(`New paper scraped: ${extraData.title || 'Unknown title'}`, 'success');
|
||||
} else if (log.status === 'error') {
|
||||
showNotification(`Failed to scrape paper: ${log.description}`, 'danger');
|
||||
showFlashMessage(`Failed to scrape paper: ${log.description}`, 'error');
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -58,6 +58,16 @@
|
||||
<option value="|">Pipe (|)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group mt-3">
|
||||
<label for="duplicate_strategy">Duplicate Handling Strategy</label>
|
||||
<select name="duplicate_strategy" id="duplicate_strategy" class="form-control">
|
||||
{% for strategy_id, strategy in duplicate_strategies.items() %}
|
||||
<option value="{{ strategy_id }}" {% if strategy.is_default %}selected{% endif %}>
|
||||
{{ strategy.name }} - {{ strategy.description }}
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary mt-3">Upload</button>
|
||||
</form>
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user