implements download path configuration

This commit is contained in:
Michael Beck 2025-04-16 22:03:17 +02:00
parent 5af3d00e39
commit 11f086aa64
4 changed files with 189 additions and 26 deletions

View File

@ -1,8 +1,10 @@
"""Configuration management blueprint.""" """Configuration management blueprint."""
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
from ..db import db from ..db import db
from ..models import VolumeConfig, ScheduleConfig, ActivityLog # Import the new model
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
from ..defaults import MAX_VOLUME from ..defaults import MAX_VOLUME
import os # Import os for path validation
bp = Blueprint("config", __name__, url_prefix="/config") bp = Blueprint("config", __name__, url_prefix="/config")
@ -43,7 +45,74 @@ def _update_volume(new_volume):
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
db.session.rollback() db.session.rollback()
return False, f"Error updating volume: {str(e)}", None return False, f"Error updating volume: {str(e)}", None
# Add helper for download path
def _update_download_path(new_path):
"""
Helper function to update download path configuration.
Args:
new_path (str): The new download path
Returns:
tuple: (success, message, download_path_config)
"""
try:
# Basic validation: check if it's a non-empty string
if not new_path or not isinstance(new_path, str):
return False, "Download path cannot be empty.", None
# --- Add more validation like checking if path exists or is writable ---
# Check if the path exists and is a directory
if not os.path.isdir(new_path):
# Try to create it if it doesn't exist
try:
os.makedirs(new_path, exist_ok=True)
ActivityLog.log_system_activity(
action="create_directory",
status="info",
description=f"Created download directory: {new_path}"
)
except OSError as e:
ActivityLog.log_system_activity(
action="create_directory",
status="error",
description=f"Failed to create download directory: {new_path}, Error: {str(e)}"
)
return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
# Check if the path is writable
if not os.access(new_path, os.W_OK):
ActivityLog.log_system_activity(
action="check_directory_permissions",
status="error",
description=f"Download path '{new_path}' is not writable."
)
return False, f"Path '{new_path}' exists but is not writable by the application.", None
# --- End of validation ---
config = DownloadPathConfig.query.first()
if not config:
config = DownloadPathConfig(path=new_path)
db.session.add(config)
else:
old_value = config.path
config.path = new_path
ActivityLog.log_config_change(
config_key="download_path",
old_value=old_value,
new_value=new_path,
description="Updated download path"
)
db.session.commit()
return True, "Download path updated successfully!", config
except Exception as e:
db.session.rollback()
return False, f"Error updating download path: {str(e)}", None
def _update_schedule(schedule_data): def _update_schedule(schedule_data):
""" """
@ -106,11 +175,19 @@ def general():
volume_config = VolumeConfig(volume=100) # Default value volume_config = VolumeConfig(volume=100) # Default value
db.session.add(volume_config) db.session.add(volume_config)
db.session.commit() db.session.commit()
# Fetch download path config
download_path_config = DownloadPathConfig.query.first()
if not download_path_config:
download_path_config = DownloadPathConfig() # Use default from model
db.session.add(download_path_config)
db.session.commit()
return render_template( return render_template(
"config/index.html.jinja", "config/index.html.jinja",
active_tab="general", active_tab="general",
volume_config=volume_config, volume_config=volume_config,
download_path_config=download_path_config, # Pass to template
max_volume=MAX_VOLUME, max_volume=MAX_VOLUME,
app_title="Configuration" app_title="Configuration"
) )
@ -151,17 +228,35 @@ def schedule():
) )
@bp.route("/update/volume", methods=["POST"]) # Remove old update_volume route
def update_volume(): # @bp.route("/update/volume", methods=["POST"])
"""Update volume configuration.""" # def update_volume(): ...
new_volume = request.form.get("total_volume", 0)
success, message, _ = _update_volume(new_volume) # Add new route to handle general settings form
@bp.route("/update/general", methods=["POST"])
if success: def update_general():
flash(message, "success") """Update general configuration (Volume and Download Path)."""
else: volume_success, volume_message = True, ""
flash(message, "error") path_success, path_message = True, ""
# Update Volume
new_volume = request.form.get("total_volume")
if new_volume is not None:
volume_success, volume_message, _ = _update_volume(new_volume)
if volume_success:
flash(volume_message, "success")
else:
flash(volume_message, "error")
# Update Download Path
new_path = request.form.get("download_path")
if new_path is not None:
path_success, path_message, _ = _update_download_path(new_path)
if path_success:
flash(path_message, "success")
else:
flash(path_message, "error")
return redirect(url_for("config.general")) return redirect(url_for("config.general"))
@ -224,35 +319,46 @@ def api_update_config():
"""API endpoint to update configuration.""" """API endpoint to update configuration."""
data = request.json data = request.json
response = {"success": True, "updates": []} response = {"success": True, "updates": []}
try: try:
# Update volume if provided # Update volume if provided
if "volume" in data: if "volume" in data:
success, message, volume_config = _update_volume(data["volume"]) success, message, _ = _update_volume(data["volume"])
response["updates"].append({ response["updates"].append({
"type": "volume", "type": "volume",
"success": success, "success": success,
"message": message "message": message
}) })
if not success: if not success:
response["success"] = False response["success"] = False
# Update download path if provided
if "download_path" in data:
success, message, _ = _update_download_path(data["download_path"])
response["updates"].append({
"type": "download_path",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update schedule if provided # Update schedule if provided
if "schedule" in data: if "schedule" in data:
success, message = _update_schedule(data["schedule"]) success, message = _update_schedule(data["schedule"])
response["updates"].append({ response["updates"].append({
"type": "schedule", "type": "schedule",
"success": success, "success": success,
"message": message "message": message
}) })
if not success: if not success:
response["success"] = False response["success"] = False
return jsonify(response) return jsonify(response)
except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
return jsonify({ return jsonify({
"success": False, "success": False,
"message": f"Unexpected error: {str(e)}" "message": f"Unexpected error: {str(e)}"
}) })

View File

@ -2,9 +2,11 @@ import random
import json import json
import time import time
import math import math
import os # Import os for path joining
from datetime import datetime, timedelta from datetime import datetime, timedelta
from flask import Blueprint, jsonify, render_template, request, current_app, flash from flask import Blueprint, jsonify, render_template, request, current_app, flash
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState # Import the new model
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig
from ..db import db from ..db import db
from ..celery import celery from ..celery import celery
from ..defaults import MAX_VOLUME from ..defaults import MAX_VOLUME
@ -615,16 +617,27 @@ def dummy_process_paper(self, paper_id):
return False return False
if success: if success:
# Update paper status to "Done" # --- Get configured download path ---
download_base_path = DownloadPathConfig.get_path()
# Ensure the base path exists (optional, but good practice)
# os.makedirs(download_base_path, exist_ok=True)
# --- Construct the file path ---
# Sanitize DOI for use in filename
safe_doi = paper.doi.replace('/', '_').replace(':', '_')
filename = f"{safe_doi}.pdf"
full_path = os.path.join(download_base_path, filename)
# Update paper status to "Done" and set the file path
paper.status = "Done" paper.status = "Done"
paper.file_path = f"/path/to/dummy/papers/{paper.doi.replace('/', '_')}.pdf" paper.file_path = full_path # Use the constructed path
# Log success # Log success
ActivityLog.log_scraper_activity( ActivityLog.log_scraper_activity(
action="process_paper", action="process_paper",
paper_id=paper.id, paper_id=paper.id,
status="success", status="success",
description=f"Successfully processed paper: {paper.doi}" description=f"Successfully processed paper: {paper.doi}. File at: {full_path}" # Log path
) )
else: else:
# Update paper status to "Failed" # Update paper status to "Failed"

View File

@ -210,6 +210,32 @@ class VolumeConfig(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
volume = db.Column(db.Float) # volume of papers to scrape per day volume = db.Column(db.Float) # volume of papers to scrape per day
class DownloadPathConfig(db.Model):
"""Model to store the base path for downloaded files."""
id = db.Column(db.Integer, primary_key=True)
path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path
@classmethod
def get_path(cls):
"""Get the configured download path, creating default if needed."""
config = cls.query.first()
if not config:
config = cls(path="/path/to/dummy/papers") # Ensure default exists
db.session.add(config)
db.session.commit()
return config.path
@classmethod
def set_path(cls, new_path):
"""Set the download path."""
config = cls.query.first()
if not config:
config = cls(path=new_path)
db.session.add(config)
else:
config.path = new_path
db.session.commit()
return config
class ScraperState(db.Model): class ScraperState(db.Model):
"""Model to store the current state of the scraper.""" """Model to store the current state of the scraper."""
@ -281,3 +307,9 @@ def init_schedule_config():
default_volume = VolumeConfig(volume=100) default_volume = VolumeConfig(volume=100)
db.session.add(default_volume) db.session.add(default_volume)
db.session.commit() db.session.commit()
# Initialize DownloadPathConfig if it doesn't exist
if DownloadPathConfig.query.count() == 0:
default_path = DownloadPathConfig(path="/path/to/dummy/papers")
db.session.add(default_path)
db.session.commit()

View File

@ -9,7 +9,7 @@
<!-- include flash messages template --> <!-- include flash messages template -->
{% include "partials/flash_messages.html.jinja" %} {% include "partials/flash_messages.html.jinja" %}
<form action="{{ url_for('config.update_volume') }}" method="post"> <form action="{{ url_for('config.update_general') }}" method="post">
<div class="form-section"> <div class="form-section">
<h6>Scraper Volume</h6> <h6>Scraper Volume</h6>
<p class="text-muted">Configure the total number of papers to scrape per day.</p> <p class="text-muted">Configure the total number of papers to scrape per day.</p>
@ -22,6 +22,18 @@
</div> </div>
</div> </div>
<div class="form-section">
<h6>Download Path</h6>
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
<div class="mb-3">
<label for="downloadPath" class="form-label">Download Directory:</label>
<input type="text" class="form-control" id="downloadPath" name="download_path"
value="{{ download_path_config.path }}" required>
<div class="form-text">Enter the full path to the download directory (e.g., /data/papers).
Ensure the directory exists and the application has write permissions.</div>
</div>
</div>
<div class="form-section"> <div class="form-section">
<h6>System Settings</h6> <h6>System Settings</h6>
<p class="text-muted">Configure general system behavior.</p> <p class="text-muted">Configure general system behavior.</p>