"""Configuration management blueprint.""" from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app from ..db import db # Import the new model from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata from ..defaults import MAX_VOLUME import os # Import os for path validation import sys from scipaperloader.scrapers import __path__ as scrapers_path # Import the cache invalidation function from our new module from ..cache_utils import invalidate_hourly_quota_cache import random from datetime import datetime, timedelta from uuid import uuid4 bp = Blueprint("config", __name__, url_prefix="/config") # Helper functions for configuration updates def _update_volume(new_volume): """ Helper function to update volume configuration. Args: new_volume (float): The new volume value Returns: tuple: (success, message, volume_config) """ try: new_volume = float(new_volume) if new_volume <= 0 or new_volume > MAX_VOLUME: return False, f"Volume must be between 1 and {MAX_VOLUME}", None volume_config = VolumeConfig.query.first() if not volume_config: volume_config = VolumeConfig(volume=new_volume) db.session.add(volume_config) else: old_value = volume_config.volume volume_config.volume = new_volume ActivityLog.log_config_change( config_key="scraper_volume", old_value=old_value, new_value=new_volume, description="Updated scraper volume" ) db.session.commit() return True, "Volume updated successfully!", volume_config except (ValueError, TypeError) as e: db.session.rollback() return False, f"Error updating volume: {str(e)}", None # Add helper for download path def _update_download_path(new_path): """ Helper function to update download path configuration. Args: new_path (str): The new download path Returns: tuple: (success, message, download_path_config) """ try: # Basic validation: check if it's a non-empty string if not new_path or not isinstance(new_path, str): return False, "Download path cannot be empty.", None # --- Add more validation like checking if path exists or is writable --- # Check if the path exists and is a directory if not os.path.isdir(new_path): # Try to create it if it doesn't exist try: os.makedirs(new_path, exist_ok=True) ActivityLog.log_scraper_activity( action="create_directory", status="info", description=f"Created download directory: {new_path}" ) except OSError as e: ActivityLog.log_error( error_message=f"Failed to create download directory: {new_path}, Error: {str(e)}", source="update_download_path" ) return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None # Check if the path is readable if not os.access(new_path, os.R_OK): ActivityLog.log_error( error_message=f"Download path '{new_path}' is not readable.", source="check_directory_permissions" ) return False, f"Path '{new_path}' exists but is not readable by the application.", None # Check if the path is writable if not os.access(new_path, os.W_OK): ActivityLog.log_error( error_message=f"Download path '{new_path}' is not writable.", source="check_directory_permissions" ) return False, f"Path '{new_path}' exists but is not writable by the application.", None # --- End of validation --- config = DownloadPathConfig.query.first() if not config: config = DownloadPathConfig(path=new_path) db.session.add(config) else: old_value = config.path config.path = new_path ActivityLog.log_config_change( config_key="download_path", old_value=old_value, new_value=new_path, description="Updated download path" ) db.session.commit() return True, "Download path updated successfully!", config except Exception as e: db.session.rollback() return False, f"Error updating download path: {str(e)}", None def _update_schedule(schedule_data): """ Helper function to update schedule configuration. Args: schedule_data (dict): Dictionary with hour:weight pairs Returns: tuple: (success, message) """ try: # Validate all entries first for hour_str, weight in schedule_data.items(): try: hour = int(hour_str) weight = float(weight) if hour < 0 or hour > 23: return False, f"Hour value must be between 0 and 23, got {hour}" if weight < 0.1 or weight > 5: return False, f"Weight for hour {hour} must be between 0.1 and 5, got {weight}" except ValueError: return False, f"Invalid data format for hour {hour_str}" # Update schedule after validation for hour_str, weight in schedule_data.items(): hour = int(hour_str) weight = float(weight) config = ScheduleConfig.query.get(hour) if not config: config = ScheduleConfig(hour=hour, weight=weight) db.session.add(config) else: old_value = config.weight config.weight = weight ActivityLog.log_config_change( config_key=f"schedule_hour_{hour}", old_value=old_value, new_value=weight, description=f"Updated schedule weight for hour {hour}" ) db.session.commit() # Invalidate hourly quota cache using the cache_utils module try: invalidate_hourly_quota_cache() except Exception as e: # Log the error but don't fail the update ActivityLog.log_error( error_message=f"Error invalidating hourly quota cache: {str(e)}", source="_update_schedule" ) return True, "Schedule updated successfully!" except Exception as e: db.session.rollback() return False, f"Error updating schedule: {str(e)}" @bp.route("/") @bp.route("/general") def general(): """Show general configuration page.""" volume_config = VolumeConfig.query.first() if not volume_config: volume_config = VolumeConfig(volume=100) # Default value db.session.add(volume_config) db.session.commit() # Fetch download path config download_path_config = DownloadPathConfig.query.first() if not download_path_config: download_path_config = DownloadPathConfig() # Use default from model db.session.add(download_path_config) db.session.commit() return render_template( "config/index.html.jinja", active_tab="general", volume_config=volume_config, download_path_config=download_path_config, # Pass to template max_volume=MAX_VOLUME, app_title="Configuration" ) @bp.route("/schedule") def schedule(): """Show schedule configuration page.""" # Ensure we have schedule config for all hours existing_hours = {record.hour: record for record in ScheduleConfig.query.all()} schedule_config = {} for hour in range(24): if hour in existing_hours: schedule_config[hour] = existing_hours[hour].weight else: # Create default schedule entry (weight 1.0) new_config = ScheduleConfig(hour=hour, weight=1.0) db.session.add(new_config) schedule_config[hour] = 1.0 if len(existing_hours) < 24: db.session.commit() volume_config = VolumeConfig.query.first() if not volume_config: volume_config = VolumeConfig(volume=100) # Default value db.session.add(volume_config) db.session.commit() return render_template( "config/index.html.jinja", active_tab="schedule", schedule=schedule_config, volume=volume_config.volume, max_volume=MAX_VOLUME, app_title="Configuration" ) @bp.route("/database") def database(): """Show database configuration page.""" return render_template( "config/index.html.jinja", active_tab="database", app_title="Configuration" ) @bp.route("/generate_test_papers", methods=["POST"]) def generate_test_papers(): """Generate random test papers for the database.""" try: # Get the requested number of papers (with validation) try: paper_count = int(request.form.get("paper_count", "100")) if paper_count < 1: paper_count = 1 elif paper_count > 1000: paper_count = 1000 except (ValueError, TypeError): paper_count = 100 # Get the status settings try: dummy_paper_status = request.form.get("dummy_paper_status") if dummy_paper_status == "new": dummy_paper_status = "New" else: dummy_paper_status = random.choice(["New","Pending", "Done", "Failed"]) except (ValueError, TypeError): dummy_paper_status = random.choice(["New","Pending", "Done", "Failed"]) # Get the download path for file paths download_path = DownloadPathConfig.get_path() # Sample journal names for realistic test data journals = [ "Nature", "Science", "Cell", "PNAS", "Journal of Biological Chemistry", "IEEE Transactions on Neural Networks", "Artificial Intelligence", "Machine Learning", "Neural Computation", "Journal of Machine Learning Research", "Journal of Artificial Intelligence Research", "Data Mining and Knowledge Discovery", "Pattern Recognition", "Neural Networks", "Journal of Physical Chemistry" ] # Sample paper types paper_types = ["Article", "Review", "Conference", "Preprint", "Book Chapter"] # Sample languages languages = ["English", "German", "French", "Chinese", "Spanish", "Japanese"] # Generate random papers papers_added = 0 for i in range(paper_count): # Generate a random DOI doi = f"10.{random.randint(1000, 9999)}/{uuid4().hex[:8]}" # Skip if DOI already exists if PaperMetadata.query.filter_by(doi=doi).first(): continue # Random publishing date within the last 5 years days_ago = random.randint(0, 5 * 365) pub_date = datetime.now() - timedelta(days=days_ago) # Create paper paper = PaperMetadata( title=f"Test Paper {i+1}: {''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(5))}", doi=doi, alt_id=f"ALT-{random.randint(10000, 99999)}", issn=f"{random.randint(1000, 9999)}-{random.randint(1000, 9999)}", journal=random.choice(journals), type=random.choice(paper_types), language=random.choice(languages), published_online=pub_date.date(), status=dummy_paper_status, file_path=f"{download_path}/test_paper_{i+1}.pdf" if random.random() > 0.3 else None, error_msg="Download failed: connection timeout" if random.random() < 0.1 else None, created_at=datetime.now() - timedelta(days=random.randint(0, 30)) ) db.session.add(paper) papers_added += 1 # Commit in batches to improve performance if i % 100 == 0: db.session.commit() # Final commit db.session.commit() # Log the action using the existing log_import_activity method ActivityLog.log_import_activity( action="generate_test_papers", status="success", description=f"Generated {papers_added} test papers for the database" ) flash(f"Successfully generated {papers_added} test papers.", "success") except Exception as e: db.session.rollback() flash(f"Failed to generate test papers: {str(e)}", "error") ActivityLog.log_error( error_message=f"Failed to generate test papers: {str(e)}", exception=e, source="config.generate_test_papers" ) return redirect(url_for("config.database")) @bp.route("/update/general", methods=["POST"]) def update_general(): """Update general configuration (Volume and Download Path).""" volume_success, volume_message = True, "" path_success, path_message = True, "" # Update Volume new_volume = request.form.get("total_volume") if new_volume is not None: volume_success, volume_message, _ = _update_volume(new_volume) if volume_success: flash(volume_message, "success") else: flash(volume_message, "error") # Update Download Path new_path = request.form.get("download_path") if new_path is not None: path_success, path_message, _ = _update_download_path(new_path) if path_success: flash(path_message, "success") else: flash(path_message, "error") return redirect(url_for("config.general")) @bp.route("/update/schedule", methods=["POST"]) def update_schedule(): """Update schedule configuration.""" schedule_data = {} for hour in range(24): key = f"hour_{hour}" if key not in request.form: flash(f"Missing data for hour {hour}", "error") return redirect(url_for("config.schedule")) schedule_data[str(hour)] = request.form.get(key, 0) success, message = _update_schedule(schedule_data) if success: flash(message, "success") else: flash(message, "error") return redirect(url_for("config.schedule")) @bp.route("/update/scraper_module", methods=["POST"]) def update_scraper_module(): """Update the scraper module configuration.""" from ..models import ScraperModuleConfig new_scraper_module = request.form.get("scraper_module") if not new_scraper_module: flash("Scraper module cannot be empty.", "error") return redirect(url_for("config.general")) # Validate that the module exists and is valid from scipaperloader.scrapers.factory import get_available_scrapers available_modules = [m["name"] for m in get_available_scrapers()] if new_scraper_module not in available_modules: flash(f"Invalid scraper module: {new_scraper_module}", "error") return redirect(url_for("config.general")) # Update the database configuration ScraperModuleConfig.set_module(new_scraper_module) flash(f"Scraper module updated to '{new_scraper_module}'.", "success") return redirect(url_for("config.general")) @bp.context_processor def inject_scraper_modules(): """Inject available scraper modules into the template context.""" from scipaperloader.scrapers.factory import get_available_scrapers from ..models import ScraperModuleConfig available_scrapers = get_available_scrapers() current_module = ScraperModuleConfig.get_current_module() return { "available_scraper_modules": [s["name"] for s in available_scrapers], "current_scraper_module": current_module, "scraper_details": {s["name"]: s for s in available_scrapers} } @bp.route("/api/schedule/stats") def schedule_stats(): """Get statistics about the current schedule configuration.""" volume_config = VolumeConfig.query.first() if not volume_config: return jsonify({"error": "No volume configuration found"}) total_volume = volume_config.volume schedule_configs = ScheduleConfig.query.all() if not schedule_configs: return jsonify({"error": "No schedule configuration found"}) # Calculate total weight total_weight = sum(config.weight for config in schedule_configs) # Calculate papers per hour papers_per_hour = {} hourly_weights = {} for config in schedule_configs: weight_ratio = config.weight / total_weight if total_weight > 0 else 0 papers = weight_ratio * total_volume papers_per_hour[config.hour] = papers hourly_weights[config.hour] = config.weight return jsonify({ "total_volume": total_volume, "total_weight": total_weight, "papers_per_hour": papers_per_hour, "hourly_weights": hourly_weights }) @bp.route("/api/update_config", methods=["POST"]) def api_update_config(): """API endpoint to update configuration.""" data = request.json response = {"success": True, "updates": []} try: # Update volume if provided if "volume" in data: success, message, _ = _update_volume(data["volume"]) response["updates"].append({ "type": "volume", "success": success, "message": message }) if not success: response["success"] = False # Update download path if provided if "download_path" in data: success, message, _ = _update_download_path(data["download_path"]) response["updates"].append({ "type": "download_path", "success": success, "message": message }) if not success: response["success"] = False # Update schedule if provided if "schedule" in data: success, message = _update_schedule(data["schedule"]) response["updates"].append({ "type": "schedule", "success": success, "message": message }) if not success: response["success"] = False return jsonify(response) except Exception as e: db.session.rollback() return jsonify({ "success": False, "message": f"Unexpected error: {str(e)}" }) @bp.route("/delete_all_papers", methods=["POST"]) def delete_all_papers(): """Delete all paper records from the database.""" try: # Count papers before deletion for logging purposes paper_count = PaperMetadata.query.count() # Delete all records from the PaperMetadata table PaperMetadata.query.delete() db.session.commit() # Log the action ActivityLog.log_config_change( config_key="database", old_value=f"{paper_count} papers", new_value="0 papers", description=f"Deleted all {paper_count} papers from the database" ) flash(f"Successfully deleted all {paper_count} papers from the database.", "success") except Exception as e: db.session.rollback() flash(f"Failed to delete papers: {str(e)}", "error") ActivityLog.log_error( error_message=f"Failed to delete all papers: {str(e)}", exception=e, source="config.delete_all_papers" ) return redirect(url_for("config.general"))