implements download path configuration

This commit is contained in:
Michael Beck 2025-04-16 22:03:17 +02:00
parent 5af3d00e39
commit 11f086aa64
4 changed files with 189 additions and 26 deletions

View File

@ -1,8 +1,10 @@
"""Configuration management blueprint."""
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
from ..db import db
from ..models import VolumeConfig, ScheduleConfig, ActivityLog
# Import the new model
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
from ..defaults import MAX_VOLUME
import os # Import os for path validation
bp = Blueprint("config", __name__, url_prefix="/config")
@ -43,7 +45,74 @@ def _update_volume(new_volume):
except (ValueError, TypeError) as e:
db.session.rollback()
return False, f"Error updating volume: {str(e)}", None
# Add helper for download path
def _update_download_path(new_path):
"""
Helper function to update download path configuration.
Args:
new_path (str): The new download path
Returns:
tuple: (success, message, download_path_config)
"""
try:
# Basic validation: check if it's a non-empty string
if not new_path or not isinstance(new_path, str):
return False, "Download path cannot be empty.", None
# --- Add more validation like checking if path exists or is writable ---
# Check if the path exists and is a directory
if not os.path.isdir(new_path):
# Try to create it if it doesn't exist
try:
os.makedirs(new_path, exist_ok=True)
ActivityLog.log_system_activity(
action="create_directory",
status="info",
description=f"Created download directory: {new_path}"
)
except OSError as e:
ActivityLog.log_system_activity(
action="create_directory",
status="error",
description=f"Failed to create download directory: {new_path}, Error: {str(e)}"
)
return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
# Check if the path is writable
if not os.access(new_path, os.W_OK):
ActivityLog.log_system_activity(
action="check_directory_permissions",
status="error",
description=f"Download path '{new_path}' is not writable."
)
return False, f"Path '{new_path}' exists but is not writable by the application.", None
# --- End of validation ---
config = DownloadPathConfig.query.first()
if not config:
config = DownloadPathConfig(path=new_path)
db.session.add(config)
else:
old_value = config.path
config.path = new_path
ActivityLog.log_config_change(
config_key="download_path",
old_value=old_value,
new_value=new_path,
description="Updated download path"
)
db.session.commit()
return True, "Download path updated successfully!", config
except Exception as e:
db.session.rollback()
return False, f"Error updating download path: {str(e)}", None
def _update_schedule(schedule_data):
"""
@ -106,11 +175,19 @@ def general():
volume_config = VolumeConfig(volume=100) # Default value
db.session.add(volume_config)
db.session.commit()
# Fetch download path config
download_path_config = DownloadPathConfig.query.first()
if not download_path_config:
download_path_config = DownloadPathConfig() # Use default from model
db.session.add(download_path_config)
db.session.commit()
return render_template(
"config/index.html.jinja",
active_tab="general",
volume_config=volume_config,
download_path_config=download_path_config, # Pass to template
max_volume=MAX_VOLUME,
app_title="Configuration"
)
@ -151,17 +228,35 @@ def schedule():
)
@bp.route("/update/volume", methods=["POST"])
def update_volume():
"""Update volume configuration."""
new_volume = request.form.get("total_volume", 0)
success, message, _ = _update_volume(new_volume)
if success:
flash(message, "success")
else:
flash(message, "error")
# Remove old update_volume route
# @bp.route("/update/volume", methods=["POST"])
# def update_volume(): ...
# Add new route to handle general settings form
@bp.route("/update/general", methods=["POST"])
def update_general():
"""Update general configuration (Volume and Download Path)."""
volume_success, volume_message = True, ""
path_success, path_message = True, ""
# Update Volume
new_volume = request.form.get("total_volume")
if new_volume is not None:
volume_success, volume_message, _ = _update_volume(new_volume)
if volume_success:
flash(volume_message, "success")
else:
flash(volume_message, "error")
# Update Download Path
new_path = request.form.get("download_path")
if new_path is not None:
path_success, path_message, _ = _update_download_path(new_path)
if path_success:
flash(path_message, "success")
else:
flash(path_message, "error")
return redirect(url_for("config.general"))
@ -224,35 +319,46 @@ def api_update_config():
"""API endpoint to update configuration."""
data = request.json
response = {"success": True, "updates": []}
try:
# Update volume if provided
if "volume" in data:
success, message, volume_config = _update_volume(data["volume"])
success, message, _ = _update_volume(data["volume"])
response["updates"].append({
"type": "volume",
"type": "volume",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update download path if provided
if "download_path" in data:
success, message, _ = _update_download_path(data["download_path"])
response["updates"].append({
"type": "download_path",
"success": success,
"message": message
})
if not success:
response["success"] = False
# Update schedule if provided
if "schedule" in data:
success, message = _update_schedule(data["schedule"])
response["updates"].append({
"type": "schedule",
"type": "schedule",
"success": success,
"message": message
})
if not success:
response["success"] = False
return jsonify(response)
except Exception as e:
db.session.rollback()
return jsonify({
"success": False,
"success": False,
"message": f"Unexpected error: {str(e)}"
})

View File

@ -2,9 +2,11 @@ import random
import json
import time
import math
import os # Import os for path joining
from datetime import datetime, timedelta
from flask import Blueprint, jsonify, render_template, request, current_app, flash
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState
# Import the new model
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig
from ..db import db
from ..celery import celery
from ..defaults import MAX_VOLUME
@ -615,16 +617,27 @@ def dummy_process_paper(self, paper_id):
return False
if success:
# Update paper status to "Done"
# --- Get configured download path ---
download_base_path = DownloadPathConfig.get_path()
# Ensure the base path exists (optional, but good practice)
# os.makedirs(download_base_path, exist_ok=True)
# --- Construct the file path ---
# Sanitize DOI for use in filename
safe_doi = paper.doi.replace('/', '_').replace(':', '_')
filename = f"{safe_doi}.pdf"
full_path = os.path.join(download_base_path, filename)
# Update paper status to "Done" and set the file path
paper.status = "Done"
paper.file_path = f"/path/to/dummy/papers/{paper.doi.replace('/', '_')}.pdf"
paper.file_path = full_path # Use the constructed path
# Log success
ActivityLog.log_scraper_activity(
action="process_paper",
paper_id=paper.id,
status="success",
description=f"Successfully processed paper: {paper.doi}"
description=f"Successfully processed paper: {paper.doi}. File at: {full_path}" # Log path
)
else:
# Update paper status to "Failed"

View File

@ -210,6 +210,32 @@ class VolumeConfig(db.Model):
id = db.Column(db.Integer, primary_key=True)
volume = db.Column(db.Float) # volume of papers to scrape per day
class DownloadPathConfig(db.Model):
"""Model to store the base path for downloaded files."""
id = db.Column(db.Integer, primary_key=True)
path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path
@classmethod
def get_path(cls):
"""Get the configured download path, creating default if needed."""
config = cls.query.first()
if not config:
config = cls(path="/path/to/dummy/papers") # Ensure default exists
db.session.add(config)
db.session.commit()
return config.path
@classmethod
def set_path(cls, new_path):
"""Set the download path."""
config = cls.query.first()
if not config:
config = cls(path=new_path)
db.session.add(config)
else:
config.path = new_path
db.session.commit()
return config
class ScraperState(db.Model):
"""Model to store the current state of the scraper."""
@ -281,3 +307,9 @@ def init_schedule_config():
default_volume = VolumeConfig(volume=100)
db.session.add(default_volume)
db.session.commit()
# Initialize DownloadPathConfig if it doesn't exist
if DownloadPathConfig.query.count() == 0:
default_path = DownloadPathConfig(path="/path/to/dummy/papers")
db.session.add(default_path)
db.session.commit()

View File

@ -9,7 +9,7 @@
<!-- include flash messages template -->
{% include "partials/flash_messages.html.jinja" %}
<form action="{{ url_for('config.update_volume') }}" method="post">
<form action="{{ url_for('config.update_general') }}" method="post">
<div class="form-section">
<h6>Scraper Volume</h6>
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
@ -22,6 +22,18 @@
</div>
</div>
<div class="form-section">
<h6>Download Path</h6>
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
<div class="mb-3">
<label for="downloadPath" class="form-label">Download Directory:</label>
<input type="text" class="form-control" id="downloadPath" name="download_path"
value="{{ download_path_config.path }}" required>
<div class="form-text">Enter the full path to the download directory (e.g., /data/papers).
Ensure the directory exists and the application has write permissions.</div>
</div>
</div>
<div class="form-section">
<h6>System Settings</h6>
<p class="text-muted">Configure general system behavior.</p>