From 11f086aa64090ff3ada91c69bbcfea3996908f17 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Wed, 16 Apr 2025 22:03:17 +0200 Subject: [PATCH] implements download path configuration --- scipaperloader/blueprints/config.py | 148 +++++++++++++++--- scipaperloader/blueprints/scraper.py | 21 ++- scipaperloader/models.py | 32 ++++ .../templates/config/general.html.jinja | 14 +- 4 files changed, 189 insertions(+), 26 deletions(-) diff --git a/scipaperloader/blueprints/config.py b/scipaperloader/blueprints/config.py index 8d12b45..f33fa01 100644 --- a/scipaperloader/blueprints/config.py +++ b/scipaperloader/blueprints/config.py @@ -1,8 +1,10 @@ """Configuration management blueprint.""" from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify from ..db import db -from ..models import VolumeConfig, ScheduleConfig, ActivityLog +# Import the new model +from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig from ..defaults import MAX_VOLUME +import os # Import os for path validation bp = Blueprint("config", __name__, url_prefix="/config") @@ -43,7 +45,74 @@ def _update_volume(new_volume): except (ValueError, TypeError) as e: db.session.rollback() return False, f"Error updating volume: {str(e)}", None + + +# Add helper for download path +def _update_download_path(new_path): + """ + Helper function to update download path configuration. + + Args: + new_path (str): The new download path + + Returns: + tuple: (success, message, download_path_config) + """ + try: + # Basic validation: check if it's a non-empty string + if not new_path or not isinstance(new_path, str): + return False, "Download path cannot be empty.", None + + # --- Add more validation like checking if path exists or is writable --- + # Check if the path exists and is a directory + if not os.path.isdir(new_path): + # Try to create it if it doesn't exist + try: + os.makedirs(new_path, exist_ok=True) + ActivityLog.log_system_activity( + action="create_directory", + status="info", + description=f"Created download directory: {new_path}" + ) + except OSError as e: + ActivityLog.log_system_activity( + action="create_directory", + status="error", + description=f"Failed to create download directory: {new_path}, Error: {str(e)}" + ) + return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None + # Check if the path is writable + if not os.access(new_path, os.W_OK): + ActivityLog.log_system_activity( + action="check_directory_permissions", + status="error", + description=f"Download path '{new_path}' is not writable." + ) + return False, f"Path '{new_path}' exists but is not writable by the application.", None + # --- End of validation --- + + config = DownloadPathConfig.query.first() + if not config: + config = DownloadPathConfig(path=new_path) + db.session.add(config) + else: + old_value = config.path + config.path = new_path + ActivityLog.log_config_change( + config_key="download_path", + old_value=old_value, + new_value=new_path, + description="Updated download path" + ) + + db.session.commit() + return True, "Download path updated successfully!", config + + except Exception as e: + db.session.rollback() + return False, f"Error updating download path: {str(e)}", None + def _update_schedule(schedule_data): """ @@ -106,11 +175,19 @@ def general(): volume_config = VolumeConfig(volume=100) # Default value db.session.add(volume_config) db.session.commit() - + + # Fetch download path config + download_path_config = DownloadPathConfig.query.first() + if not download_path_config: + download_path_config = DownloadPathConfig() # Use default from model + db.session.add(download_path_config) + db.session.commit() + return render_template( "config/index.html.jinja", active_tab="general", volume_config=volume_config, + download_path_config=download_path_config, # Pass to template max_volume=MAX_VOLUME, app_title="Configuration" ) @@ -151,17 +228,35 @@ def schedule(): ) -@bp.route("/update/volume", methods=["POST"]) -def update_volume(): - """Update volume configuration.""" - new_volume = request.form.get("total_volume", 0) - success, message, _ = _update_volume(new_volume) - - if success: - flash(message, "success") - else: - flash(message, "error") - +# Remove old update_volume route +# @bp.route("/update/volume", methods=["POST"]) +# def update_volume(): ... + +# Add new route to handle general settings form +@bp.route("/update/general", methods=["POST"]) +def update_general(): + """Update general configuration (Volume and Download Path).""" + volume_success, volume_message = True, "" + path_success, path_message = True, "" + + # Update Volume + new_volume = request.form.get("total_volume") + if new_volume is not None: + volume_success, volume_message, _ = _update_volume(new_volume) + if volume_success: + flash(volume_message, "success") + else: + flash(volume_message, "error") + + # Update Download Path + new_path = request.form.get("download_path") + if new_path is not None: + path_success, path_message, _ = _update_download_path(new_path) + if path_success: + flash(path_message, "success") + else: + flash(path_message, "error") + return redirect(url_for("config.general")) @@ -224,35 +319,46 @@ def api_update_config(): """API endpoint to update configuration.""" data = request.json response = {"success": True, "updates": []} - + try: # Update volume if provided if "volume" in data: - success, message, volume_config = _update_volume(data["volume"]) + success, message, _ = _update_volume(data["volume"]) response["updates"].append({ - "type": "volume", + "type": "volume", "success": success, "message": message }) if not success: response["success"] = False - + + # Update download path if provided + if "download_path" in data: + success, message, _ = _update_download_path(data["download_path"]) + response["updates"].append({ + "type": "download_path", + "success": success, + "message": message + }) + if not success: + response["success"] = False + # Update schedule if provided if "schedule" in data: success, message = _update_schedule(data["schedule"]) response["updates"].append({ - "type": "schedule", + "type": "schedule", "success": success, "message": message }) if not success: response["success"] = False - + return jsonify(response) - + except Exception as e: db.session.rollback() return jsonify({ - "success": False, + "success": False, "message": f"Unexpected error: {str(e)}" }) \ No newline at end of file diff --git a/scipaperloader/blueprints/scraper.py b/scipaperloader/blueprints/scraper.py index 9372013..6ab74cf 100644 --- a/scipaperloader/blueprints/scraper.py +++ b/scipaperloader/blueprints/scraper.py @@ -2,9 +2,11 @@ import random import json import time import math +import os # Import os for path joining from datetime import datetime, timedelta from flask import Blueprint, jsonify, render_template, request, current_app, flash -from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState +# Import the new model +from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig from ..db import db from ..celery import celery from ..defaults import MAX_VOLUME @@ -615,16 +617,27 @@ def dummy_process_paper(self, paper_id): return False if success: - # Update paper status to "Done" + # --- Get configured download path --- + download_base_path = DownloadPathConfig.get_path() + # Ensure the base path exists (optional, but good practice) + # os.makedirs(download_base_path, exist_ok=True) + + # --- Construct the file path --- + # Sanitize DOI for use in filename + safe_doi = paper.doi.replace('/', '_').replace(':', '_') + filename = f"{safe_doi}.pdf" + full_path = os.path.join(download_base_path, filename) + + # Update paper status to "Done" and set the file path paper.status = "Done" - paper.file_path = f"/path/to/dummy/papers/{paper.doi.replace('/', '_')}.pdf" + paper.file_path = full_path # Use the constructed path # Log success ActivityLog.log_scraper_activity( action="process_paper", paper_id=paper.id, status="success", - description=f"Successfully processed paper: {paper.doi}" + description=f"Successfully processed paper: {paper.doi}. File at: {full_path}" # Log path ) else: # Update paper status to "Failed" diff --git a/scipaperloader/models.py b/scipaperloader/models.py index 159ed22..781aa6b 100644 --- a/scipaperloader/models.py +++ b/scipaperloader/models.py @@ -210,6 +210,32 @@ class VolumeConfig(db.Model): id = db.Column(db.Integer, primary_key=True) volume = db.Column(db.Float) # volume of papers to scrape per day +class DownloadPathConfig(db.Model): + """Model to store the base path for downloaded files.""" + id = db.Column(db.Integer, primary_key=True) + path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path + + @classmethod + def get_path(cls): + """Get the configured download path, creating default if needed.""" + config = cls.query.first() + if not config: + config = cls(path="/path/to/dummy/papers") # Ensure default exists + db.session.add(config) + db.session.commit() + return config.path + + @classmethod + def set_path(cls, new_path): + """Set the download path.""" + config = cls.query.first() + if not config: + config = cls(path=new_path) + db.session.add(config) + else: + config.path = new_path + db.session.commit() + return config class ScraperState(db.Model): """Model to store the current state of the scraper.""" @@ -281,3 +307,9 @@ def init_schedule_config(): default_volume = VolumeConfig(volume=100) db.session.add(default_volume) db.session.commit() + + # Initialize DownloadPathConfig if it doesn't exist + if DownloadPathConfig.query.count() == 0: + default_path = DownloadPathConfig(path="/path/to/dummy/papers") + db.session.add(default_path) + db.session.commit() diff --git a/scipaperloader/templates/config/general.html.jinja b/scipaperloader/templates/config/general.html.jinja index 36e431e..e68f4d1 100644 --- a/scipaperloader/templates/config/general.html.jinja +++ b/scipaperloader/templates/config/general.html.jinja @@ -9,7 +9,7 @@ {% include "partials/flash_messages.html.jinja" %} -
+
Scraper Volume

Configure the total number of papers to scrape per day.

@@ -22,6 +22,18 @@
+
+
Download Path
+

Base directory where scraped paper files will be stored.

+
+ + +
Enter the full path to the download directory (e.g., /data/papers). + Ensure the directory exists and the application has write permissions.
+
+
+
System Settings

Configure general system behavior.