636 lines
22 KiB
Python
636 lines
22 KiB
Python
"""Configuration management blueprint."""
|
|
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
|
|
from ..db import db
|
|
# Import the new model
|
|
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata, TimezoneConfig
|
|
from ..defaults import MAX_VOLUME
|
|
import os # Import os for path validation
|
|
import sys
|
|
from scipaperloader.scrapers import __path__ as scrapers_path
|
|
# Import the cache invalidation function from our new module
|
|
from ..cache_utils import invalidate_hourly_quota_cache
|
|
|
|
import random
|
|
from datetime import datetime, timedelta
|
|
from uuid import uuid4
|
|
|
|
|
|
bp = Blueprint("config", __name__, url_prefix="/config")
|
|
|
|
|
|
# Helper functions for configuration updates
|
|
def _update_volume(new_volume):
|
|
"""
|
|
Helper function to update volume configuration.
|
|
|
|
Args:
|
|
new_volume (float): The new volume value
|
|
|
|
Returns:
|
|
tuple: (success, message, volume_config)
|
|
"""
|
|
try:
|
|
new_volume = float(new_volume)
|
|
if new_volume <= 0 or new_volume > MAX_VOLUME:
|
|
return False, f"Volume must be between 1 and {MAX_VOLUME}", None
|
|
|
|
# Use the new class method to set the volume
|
|
volume_config = VolumeConfig.set_volume(new_volume)
|
|
|
|
# Invalidate and recalculate the hourly quota cache
|
|
try:
|
|
# Import the calculation function from the scraper module
|
|
from ..blueprints.scraper import calculate_papers_for_current_hour
|
|
invalidate_hourly_quota_cache(calculate_papers_for_current_hour)
|
|
except Exception as e:
|
|
# Log the error but don't fail the update
|
|
ActivityLog.log_error(
|
|
error_message=f"Error invalidating hourly quota cache: {str(e)}",
|
|
source="_update_volume"
|
|
)
|
|
|
|
return True, "Volume updated successfully!", volume_config
|
|
|
|
except (ValueError, TypeError) as e:
|
|
db.session.rollback()
|
|
return False, f"Error updating volume: {str(e)}", None
|
|
|
|
|
|
# Add helper for download path
|
|
def _update_download_path(new_path):
|
|
"""
|
|
Helper function to update download path configuration.
|
|
|
|
Args:
|
|
new_path (str): The new download path
|
|
|
|
Returns:
|
|
tuple: (success, message, download_path_config)
|
|
"""
|
|
try:
|
|
# Basic validation: check if it's a non-empty string
|
|
if not new_path or not isinstance(new_path, str):
|
|
return False, "Download path cannot be empty.", None
|
|
|
|
# --- Add more validation like checking if path exists or is writable ---
|
|
# Check if the path exists and is a directory
|
|
if not os.path.isdir(new_path):
|
|
# Try to create it if it doesn't exist
|
|
try:
|
|
os.makedirs(new_path, exist_ok=True)
|
|
ActivityLog.log_scraper_activity(
|
|
action="create_directory",
|
|
status="info",
|
|
description=f"Created download directory: {new_path}"
|
|
)
|
|
except OSError as e:
|
|
ActivityLog.log_error(
|
|
error_message=f"Failed to create download directory: {new_path}, Error: {str(e)}",
|
|
source="update_download_path"
|
|
)
|
|
return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
|
|
|
|
# Check if the path is readable
|
|
if not os.access(new_path, os.R_OK):
|
|
ActivityLog.log_error(
|
|
error_message=f"Download path '{new_path}' is not readable.",
|
|
source="check_directory_permissions"
|
|
)
|
|
return False, f"Path '{new_path}' exists but is not readable by the application.", None
|
|
|
|
# Check if the path is writable
|
|
if not os.access(new_path, os.W_OK):
|
|
ActivityLog.log_error(
|
|
error_message=f"Download path '{new_path}' is not writable.",
|
|
source="check_directory_permissions"
|
|
)
|
|
return False, f"Path '{new_path}' exists but is not writable by the application.", None
|
|
# --- End of validation ---
|
|
|
|
config = DownloadPathConfig.query.first()
|
|
if not config:
|
|
config = DownloadPathConfig(path=new_path)
|
|
db.session.add(config)
|
|
else:
|
|
old_value = config.path
|
|
config.path = new_path
|
|
ActivityLog.log_config_change(
|
|
config_key="download_path",
|
|
old_value=old_value,
|
|
new_value=new_path,
|
|
description="Updated download path"
|
|
)
|
|
|
|
db.session.commit()
|
|
return True, "Download path updated successfully!", config
|
|
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
return False, f"Error updating download path: {str(e)}", None
|
|
|
|
|
|
def _update_timezone(new_timezone):
|
|
"""
|
|
Helper function to update timezone configuration.
|
|
|
|
Args:
|
|
new_timezone (str): The new timezone
|
|
|
|
Returns:
|
|
tuple: (success, message, timezone_config)
|
|
"""
|
|
try:
|
|
# Basic validation: check if it's a non-empty string
|
|
if not new_timezone or not isinstance(new_timezone, str):
|
|
return False, "Timezone cannot be empty.", None
|
|
|
|
# Validate timezone using pytz
|
|
try:
|
|
import pytz
|
|
pytz.timezone(new_timezone) # This will raise an exception if invalid
|
|
except ImportError:
|
|
# If pytz is not available, do basic validation
|
|
if '/' not in new_timezone:
|
|
return False, "Invalid timezone format. Use format like 'Europe/Berlin'.", None
|
|
except pytz.exceptions.UnknownTimeZoneError:
|
|
return False, f"Unknown timezone: {new_timezone}. Use format like 'Europe/Berlin'.", None
|
|
|
|
config = TimezoneConfig.query.first()
|
|
if not config:
|
|
config = TimezoneConfig(timezone=new_timezone)
|
|
db.session.add(config)
|
|
else:
|
|
old_value = config.timezone
|
|
config.timezone = new_timezone
|
|
ActivityLog.log_config_change(
|
|
config_key="scheduler_timezone",
|
|
old_value=old_value,
|
|
new_value=new_timezone,
|
|
description="Updated scheduler timezone"
|
|
)
|
|
|
|
db.session.commit()
|
|
return True, "Timezone updated successfully!", config
|
|
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
return False, f"Error updating timezone: {str(e)}", None
|
|
|
|
|
|
def _update_schedule(schedule_data):
|
|
"""
|
|
Helper function to update schedule configuration.
|
|
|
|
Args:
|
|
schedule_data (dict): Dictionary with hour:weight pairs
|
|
|
|
Returns:
|
|
tuple: (success, message)
|
|
"""
|
|
try:
|
|
# Validate all entries first
|
|
for hour_str, weight in schedule_data.items():
|
|
try:
|
|
hour = int(hour_str)
|
|
weight = float(weight)
|
|
|
|
if hour < 0 or hour > 23:
|
|
return False, f"Hour value must be between 0 and 23, got {hour}"
|
|
|
|
if weight < 0.1 or weight > 5:
|
|
return False, f"Weight for hour {hour} must be between 0.1 and 5, got {weight}"
|
|
except ValueError:
|
|
return False, f"Invalid data format for hour {hour_str}"
|
|
|
|
# Update schedule after validation
|
|
for hour_str, weight in schedule_data.items():
|
|
hour = int(hour_str)
|
|
weight = float(weight)
|
|
|
|
config = ScheduleConfig.query.get(hour)
|
|
if not config:
|
|
config = ScheduleConfig(hour=hour, weight=weight)
|
|
db.session.add(config)
|
|
else:
|
|
old_value = config.weight
|
|
config.weight = weight
|
|
ActivityLog.log_config_change(
|
|
config_key=f"schedule_hour_{hour}",
|
|
old_value=old_value,
|
|
new_value=weight,
|
|
description=f"Updated schedule weight for hour {hour}"
|
|
)
|
|
|
|
db.session.commit()
|
|
|
|
# Invalidate hourly quota cache and immediately recalculate
|
|
try:
|
|
# Import the calculation function from the scraper module
|
|
from ..blueprints.scraper import calculate_papers_for_current_hour
|
|
invalidate_hourly_quota_cache(calculate_papers_for_current_hour)
|
|
except Exception as e:
|
|
# Log the error but don't fail the update
|
|
ActivityLog.log_error(
|
|
error_message=f"Error invalidating hourly quota cache: {str(e)}",
|
|
source="_update_schedule"
|
|
)
|
|
|
|
return True, "Schedule updated successfully!"
|
|
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
return False, f"Error updating schedule: {str(e)}"
|
|
|
|
|
|
@bp.route("/")
|
|
@bp.route("/general")
|
|
def general():
|
|
"""Show general configuration page."""
|
|
volume_config = VolumeConfig.query.first()
|
|
if not volume_config:
|
|
volume_config = VolumeConfig(volume=100) # Default value
|
|
db.session.add(volume_config)
|
|
db.session.commit()
|
|
|
|
# Fetch download path config
|
|
download_path_config = DownloadPathConfig.query.first()
|
|
if not download_path_config:
|
|
download_path_config = DownloadPathConfig() # Use default from model
|
|
db.session.add(download_path_config)
|
|
db.session.commit()
|
|
|
|
# Fetch timezone config
|
|
timezone_config = TimezoneConfig.query.first()
|
|
if not timezone_config:
|
|
timezone_config = TimezoneConfig() # Use default from model
|
|
db.session.add(timezone_config)
|
|
db.session.commit()
|
|
|
|
return render_template(
|
|
"config/index.html.jinja",
|
|
active_tab="general",
|
|
volume_config=volume_config,
|
|
download_path_config=download_path_config, # Pass to template
|
|
timezone_config=timezone_config, # Pass to template
|
|
max_volume=MAX_VOLUME,
|
|
app_title="Configuration"
|
|
)
|
|
|
|
|
|
@bp.route("/schedule")
|
|
def schedule():
|
|
"""Show schedule configuration page."""
|
|
# Ensure we have schedule config for all hours
|
|
existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
|
|
schedule_config = {}
|
|
|
|
for hour in range(24):
|
|
if hour in existing_hours:
|
|
schedule_config[hour] = existing_hours[hour].weight
|
|
else:
|
|
# Create default schedule entry (weight 1.0)
|
|
new_config = ScheduleConfig(hour=hour, weight=1.0)
|
|
db.session.add(new_config)
|
|
schedule_config[hour] = 1.0
|
|
|
|
if len(existing_hours) < 24:
|
|
db.session.commit()
|
|
|
|
volume_config = VolumeConfig.query.first()
|
|
if not volume_config:
|
|
volume_config = VolumeConfig(volume=100) # Default value
|
|
db.session.add(volume_config)
|
|
db.session.commit()
|
|
|
|
return render_template(
|
|
"config/index.html.jinja",
|
|
active_tab="schedule",
|
|
schedule=schedule_config,
|
|
volume=volume_config.volume,
|
|
max_volume=MAX_VOLUME,
|
|
app_title="Configuration"
|
|
)
|
|
|
|
@bp.route("/database")
|
|
def database():
|
|
"""Show database configuration page."""
|
|
|
|
return render_template(
|
|
"config/index.html.jinja",
|
|
active_tab="database",
|
|
app_title="Configuration"
|
|
)
|
|
|
|
|
|
@bp.route("/generate_test_papers", methods=["POST"])
|
|
def generate_test_papers():
|
|
"""Generate random test papers for the database."""
|
|
try:
|
|
# Get the requested number of papers (with validation)
|
|
try:
|
|
paper_count = int(request.form.get("paper_count", "100"))
|
|
if paper_count < 1:
|
|
paper_count = 1
|
|
elif paper_count > 1000:
|
|
paper_count = 1000
|
|
except (ValueError, TypeError):
|
|
paper_count = 100
|
|
|
|
# Get the status settings
|
|
try:
|
|
dummy_paper_status = request.form.get("dummy_paper_status")
|
|
if dummy_paper_status == "new":
|
|
dummy_paper_status = "New"
|
|
else:
|
|
dummy_paper_status = random.choice(["New","Pending", "Done", "Failed"])
|
|
except (ValueError, TypeError):
|
|
dummy_paper_status = random.choice(["New","Pending", "Done", "Failed"])
|
|
|
|
# Get the download path for file paths
|
|
download_path = DownloadPathConfig.get_path()
|
|
|
|
# Sample journal names for realistic test data
|
|
journals = [
|
|
"Nature", "Science", "Cell", "PNAS", "Journal of Biological Chemistry",
|
|
"IEEE Transactions on Neural Networks", "Artificial Intelligence",
|
|
"Machine Learning", "Neural Computation", "Journal of Machine Learning Research",
|
|
"Journal of Artificial Intelligence Research", "Data Mining and Knowledge Discovery",
|
|
"Pattern Recognition", "Neural Networks", "Journal of Physical Chemistry"
|
|
]
|
|
|
|
# Sample paper types
|
|
paper_types = ["Article", "Review", "Conference", "Preprint", "Book Chapter"]
|
|
|
|
# Sample languages
|
|
languages = ["English", "German", "French", "Chinese", "Spanish", "Japanese"]
|
|
|
|
# Generate random papers
|
|
papers_added = 0
|
|
for i in range(paper_count):
|
|
# Generate a random DOI
|
|
doi = f"10.{random.randint(1000, 9999)}/{uuid4().hex[:8]}"
|
|
|
|
# Skip if DOI already exists
|
|
if PaperMetadata.query.filter_by(doi=doi).first():
|
|
continue
|
|
|
|
# Random publishing date within the last 5 years
|
|
days_ago = random.randint(0, 5 * 365)
|
|
pub_date = datetime.now() - timedelta(days=days_ago)
|
|
|
|
# Create paper
|
|
paper = PaperMetadata(
|
|
title=f"Test Paper {i+1}: {''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(5))}",
|
|
doi=doi,
|
|
alt_id=f"ALT-{random.randint(10000, 99999)}",
|
|
issn=f"{random.randint(1000, 9999)}-{random.randint(1000, 9999)}",
|
|
journal=random.choice(journals),
|
|
type=random.choice(paper_types),
|
|
language=random.choice(languages),
|
|
published_online=pub_date.date(),
|
|
status=dummy_paper_status,
|
|
file_path=f"{download_path}/test_paper_{i+1}.pdf" if random.random() > 0.3 else None,
|
|
error_msg="Download failed: connection timeout" if random.random() < 0.1 else None,
|
|
created_at=datetime.now() - timedelta(days=random.randint(0, 30))
|
|
)
|
|
db.session.add(paper)
|
|
papers_added += 1
|
|
|
|
# Commit in batches to improve performance
|
|
if i % 100 == 0:
|
|
db.session.commit()
|
|
|
|
# Final commit
|
|
db.session.commit()
|
|
|
|
# Log the action using the existing log_import_activity method
|
|
ActivityLog.log_import_activity(
|
|
action="generate_test_papers",
|
|
status="success",
|
|
description=f"Generated {papers_added} test papers for the database"
|
|
)
|
|
|
|
flash(f"Successfully generated {papers_added} test papers.", "success")
|
|
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
flash(f"Failed to generate test papers: {str(e)}", "error")
|
|
ActivityLog.log_error(
|
|
error_message=f"Failed to generate test papers: {str(e)}",
|
|
exception=e,
|
|
source="config.generate_test_papers"
|
|
)
|
|
|
|
return redirect(url_for("config.database"))
|
|
|
|
@bp.route("/update/general", methods=["POST"])
|
|
def update_general():
|
|
"""Update general configuration (Volume, Download Path, and Timezone)."""
|
|
volume_success, volume_message = True, ""
|
|
path_success, path_message = True, ""
|
|
timezone_success, timezone_message = True, ""
|
|
|
|
# Update Volume
|
|
new_volume = request.form.get("total_volume")
|
|
if new_volume is not None:
|
|
volume_success, volume_message, _ = _update_volume(new_volume)
|
|
if volume_success:
|
|
flash(volume_message, "success")
|
|
else:
|
|
flash(volume_message, "error")
|
|
|
|
# Update Download Path
|
|
new_path = request.form.get("download_path")
|
|
if new_path is not None:
|
|
path_success, path_message, _ = _update_download_path(new_path)
|
|
if path_success:
|
|
flash(path_message, "success")
|
|
else:
|
|
flash(path_message, "error")
|
|
|
|
# Update Timezone
|
|
new_timezone = request.form.get("timezone")
|
|
if new_timezone is not None:
|
|
timezone_success, timezone_message, _ = _update_timezone(new_timezone)
|
|
if timezone_success:
|
|
flash(timezone_message, "success")
|
|
else:
|
|
flash(timezone_message, "error")
|
|
|
|
return redirect(url_for("config.general"))
|
|
|
|
|
|
@bp.route("/update/schedule", methods=["POST"])
|
|
def update_schedule():
|
|
"""Update schedule configuration."""
|
|
schedule_data = {}
|
|
for hour in range(24):
|
|
key = f"hour_{hour}"
|
|
if key not in request.form:
|
|
flash(f"Missing data for hour {hour}", "error")
|
|
return redirect(url_for("config.schedule"))
|
|
schedule_data[str(hour)] = request.form.get(key, 0)
|
|
|
|
success, message = _update_schedule(schedule_data)
|
|
|
|
if success:
|
|
flash(message, "success")
|
|
else:
|
|
flash(message, "error")
|
|
|
|
return redirect(url_for("config.schedule"))
|
|
|
|
|
|
@bp.route("/update/scraper_module", methods=["POST"])
|
|
def update_scraper_module():
|
|
"""Update the scraper module configuration."""
|
|
from ..models import ScraperModuleConfig
|
|
|
|
new_scraper_module = request.form.get("scraper_module")
|
|
if not new_scraper_module:
|
|
flash("Scraper module cannot be empty.", "error")
|
|
return redirect(url_for("config.general"))
|
|
|
|
# Validate that the module exists and is valid
|
|
from scipaperloader.scrapers.factory import get_available_scrapers
|
|
available_modules = [m["name"] for m in get_available_scrapers()]
|
|
|
|
if new_scraper_module not in available_modules:
|
|
flash(f"Invalid scraper module: {new_scraper_module}", "error")
|
|
return redirect(url_for("config.general"))
|
|
|
|
# Update the database configuration
|
|
ScraperModuleConfig.set_module(new_scraper_module)
|
|
flash(f"Scraper module updated to '{new_scraper_module}'.", "success")
|
|
return redirect(url_for("config.general"))
|
|
|
|
|
|
@bp.context_processor
|
|
def inject_scraper_modules():
|
|
"""Inject available scraper modules into the template context."""
|
|
from scipaperloader.scrapers.factory import get_available_scrapers
|
|
from ..models import ScraperModuleConfig
|
|
|
|
available_scrapers = get_available_scrapers()
|
|
current_module = ScraperModuleConfig.get_current_module()
|
|
|
|
return {
|
|
"available_scraper_modules": [s["name"] for s in available_scrapers],
|
|
"current_scraper_module": current_module,
|
|
"scraper_details": {s["name"]: s for s in available_scrapers}
|
|
}
|
|
|
|
|
|
@bp.route("/api/schedule/stats")
|
|
def schedule_stats():
|
|
"""Get statistics about the current schedule configuration."""
|
|
volume_config = VolumeConfig.query.first()
|
|
if not volume_config:
|
|
return jsonify({"error": "No volume configuration found"})
|
|
|
|
total_volume = volume_config.volume
|
|
schedule_configs = ScheduleConfig.query.all()
|
|
|
|
if not schedule_configs:
|
|
return jsonify({"error": "No schedule configuration found"})
|
|
|
|
# Calculate total weight
|
|
total_weight = sum(config.weight for config in schedule_configs)
|
|
|
|
# Calculate papers per hour
|
|
papers_per_hour = {}
|
|
hourly_weights = {}
|
|
for config in schedule_configs:
|
|
weight_ratio = config.weight / total_weight if total_weight > 0 else 0
|
|
papers = weight_ratio * total_volume
|
|
papers_per_hour[config.hour] = papers
|
|
hourly_weights[config.hour] = config.weight
|
|
|
|
return jsonify({
|
|
"total_volume": total_volume,
|
|
"total_weight": total_weight,
|
|
"papers_per_hour": papers_per_hour,
|
|
"hourly_weights": hourly_weights
|
|
})
|
|
|
|
|
|
@bp.route("/api/update_config", methods=["POST"])
|
|
def api_update_config():
|
|
"""API endpoint to update configuration."""
|
|
data = request.json
|
|
response = {"success": True, "updates": []}
|
|
|
|
try:
|
|
# Update volume if provided
|
|
if "volume" in data:
|
|
success, message, _ = _update_volume(data["volume"])
|
|
response["updates"].append({
|
|
"type": "volume",
|
|
"success": success,
|
|
"message": message
|
|
})
|
|
if not success:
|
|
response["success"] = False
|
|
|
|
# Update download path if provided
|
|
if "download_path" in data:
|
|
success, message, _ = _update_download_path(data["download_path"])
|
|
response["updates"].append({
|
|
"type": "download_path",
|
|
"success": success,
|
|
"message": message
|
|
})
|
|
if not success:
|
|
response["success"] = False
|
|
|
|
# Update schedule if provided
|
|
if "schedule" in data:
|
|
success, message = _update_schedule(data["schedule"])
|
|
response["updates"].append({
|
|
"type": "schedule",
|
|
"success": success,
|
|
"message": message
|
|
})
|
|
if not success:
|
|
response["success"] = False
|
|
|
|
return jsonify(response)
|
|
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
return jsonify({
|
|
"success": False,
|
|
"message": f"Unexpected error: {str(e)}"
|
|
})
|
|
|
|
|
|
@bp.route("/delete_all_papers", methods=["POST"])
|
|
def delete_all_papers():
|
|
"""Delete all paper records from the database."""
|
|
try:
|
|
# Count papers before deletion for logging purposes
|
|
paper_count = PaperMetadata.query.count()
|
|
|
|
# Delete all records from the PaperMetadata table
|
|
PaperMetadata.query.delete()
|
|
db.session.commit()
|
|
|
|
# Log the action
|
|
ActivityLog.log_config_change(
|
|
config_key="database",
|
|
old_value=f"{paper_count} papers",
|
|
new_value="0 papers",
|
|
description=f"Deleted all {paper_count} papers from the database"
|
|
)
|
|
|
|
flash(f"Successfully deleted all {paper_count} papers from the database.", "success")
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
flash(f"Failed to delete papers: {str(e)}", "error")
|
|
ActivityLog.log_error(
|
|
error_message=f"Failed to delete all papers: {str(e)}",
|
|
exception=e,
|
|
source="config.delete_all_papers"
|
|
)
|
|
|
|
return redirect(url_for("config.general")) |