implements download path configuration
This commit is contained in:
parent
5af3d00e39
commit
11f086aa64
@ -1,8 +1,10 @@
|
|||||||
"""Configuration management blueprint."""
|
"""Configuration management blueprint."""
|
||||||
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
|
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
|
||||||
from ..db import db
|
from ..db import db
|
||||||
from ..models import VolumeConfig, ScheduleConfig, ActivityLog
|
# Import the new model
|
||||||
|
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
|
||||||
from ..defaults import MAX_VOLUME
|
from ..defaults import MAX_VOLUME
|
||||||
|
import os # Import os for path validation
|
||||||
|
|
||||||
bp = Blueprint("config", __name__, url_prefix="/config")
|
bp = Blueprint("config", __name__, url_prefix="/config")
|
||||||
|
|
||||||
@ -45,6 +47,73 @@ def _update_volume(new_volume):
|
|||||||
return False, f"Error updating volume: {str(e)}", None
|
return False, f"Error updating volume: {str(e)}", None
|
||||||
|
|
||||||
|
|
||||||
|
# Add helper for download path
|
||||||
|
def _update_download_path(new_path):
|
||||||
|
"""
|
||||||
|
Helper function to update download path configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
new_path (str): The new download path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (success, message, download_path_config)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Basic validation: check if it's a non-empty string
|
||||||
|
if not new_path or not isinstance(new_path, str):
|
||||||
|
return False, "Download path cannot be empty.", None
|
||||||
|
|
||||||
|
# --- Add more validation like checking if path exists or is writable ---
|
||||||
|
# Check if the path exists and is a directory
|
||||||
|
if not os.path.isdir(new_path):
|
||||||
|
# Try to create it if it doesn't exist
|
||||||
|
try:
|
||||||
|
os.makedirs(new_path, exist_ok=True)
|
||||||
|
ActivityLog.log_system_activity(
|
||||||
|
action="create_directory",
|
||||||
|
status="info",
|
||||||
|
description=f"Created download directory: {new_path}"
|
||||||
|
)
|
||||||
|
except OSError as e:
|
||||||
|
ActivityLog.log_system_activity(
|
||||||
|
action="create_directory",
|
||||||
|
status="error",
|
||||||
|
description=f"Failed to create download directory: {new_path}, Error: {str(e)}"
|
||||||
|
)
|
||||||
|
return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
|
||||||
|
|
||||||
|
# Check if the path is writable
|
||||||
|
if not os.access(new_path, os.W_OK):
|
||||||
|
ActivityLog.log_system_activity(
|
||||||
|
action="check_directory_permissions",
|
||||||
|
status="error",
|
||||||
|
description=f"Download path '{new_path}' is not writable."
|
||||||
|
)
|
||||||
|
return False, f"Path '{new_path}' exists but is not writable by the application.", None
|
||||||
|
# --- End of validation ---
|
||||||
|
|
||||||
|
config = DownloadPathConfig.query.first()
|
||||||
|
if not config:
|
||||||
|
config = DownloadPathConfig(path=new_path)
|
||||||
|
db.session.add(config)
|
||||||
|
else:
|
||||||
|
old_value = config.path
|
||||||
|
config.path = new_path
|
||||||
|
ActivityLog.log_config_change(
|
||||||
|
config_key="download_path",
|
||||||
|
old_value=old_value,
|
||||||
|
new_value=new_path,
|
||||||
|
description="Updated download path"
|
||||||
|
)
|
||||||
|
|
||||||
|
db.session.commit()
|
||||||
|
return True, "Download path updated successfully!", config
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
db.session.rollback()
|
||||||
|
return False, f"Error updating download path: {str(e)}", None
|
||||||
|
|
||||||
|
|
||||||
def _update_schedule(schedule_data):
|
def _update_schedule(schedule_data):
|
||||||
"""
|
"""
|
||||||
Helper function to update schedule configuration.
|
Helper function to update schedule configuration.
|
||||||
@ -107,10 +176,18 @@ def general():
|
|||||||
db.session.add(volume_config)
|
db.session.add(volume_config)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
# Fetch download path config
|
||||||
|
download_path_config = DownloadPathConfig.query.first()
|
||||||
|
if not download_path_config:
|
||||||
|
download_path_config = DownloadPathConfig() # Use default from model
|
||||||
|
db.session.add(download_path_config)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
return render_template(
|
return render_template(
|
||||||
"config/index.html.jinja",
|
"config/index.html.jinja",
|
||||||
active_tab="general",
|
active_tab="general",
|
||||||
volume_config=volume_config,
|
volume_config=volume_config,
|
||||||
|
download_path_config=download_path_config, # Pass to template
|
||||||
max_volume=MAX_VOLUME,
|
max_volume=MAX_VOLUME,
|
||||||
app_title="Configuration"
|
app_title="Configuration"
|
||||||
)
|
)
|
||||||
@ -151,16 +228,34 @@ def schedule():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route("/update/volume", methods=["POST"])
|
# Remove old update_volume route
|
||||||
def update_volume():
|
# @bp.route("/update/volume", methods=["POST"])
|
||||||
"""Update volume configuration."""
|
# def update_volume(): ...
|
||||||
new_volume = request.form.get("total_volume", 0)
|
|
||||||
success, message, _ = _update_volume(new_volume)
|
|
||||||
|
|
||||||
if success:
|
# Add new route to handle general settings form
|
||||||
flash(message, "success")
|
@bp.route("/update/general", methods=["POST"])
|
||||||
else:
|
def update_general():
|
||||||
flash(message, "error")
|
"""Update general configuration (Volume and Download Path)."""
|
||||||
|
volume_success, volume_message = True, ""
|
||||||
|
path_success, path_message = True, ""
|
||||||
|
|
||||||
|
# Update Volume
|
||||||
|
new_volume = request.form.get("total_volume")
|
||||||
|
if new_volume is not None:
|
||||||
|
volume_success, volume_message, _ = _update_volume(new_volume)
|
||||||
|
if volume_success:
|
||||||
|
flash(volume_message, "success")
|
||||||
|
else:
|
||||||
|
flash(volume_message, "error")
|
||||||
|
|
||||||
|
# Update Download Path
|
||||||
|
new_path = request.form.get("download_path")
|
||||||
|
if new_path is not None:
|
||||||
|
path_success, path_message, _ = _update_download_path(new_path)
|
||||||
|
if path_success:
|
||||||
|
flash(path_message, "success")
|
||||||
|
else:
|
||||||
|
flash(path_message, "error")
|
||||||
|
|
||||||
return redirect(url_for("config.general"))
|
return redirect(url_for("config.general"))
|
||||||
|
|
||||||
@ -228,7 +323,7 @@ def api_update_config():
|
|||||||
try:
|
try:
|
||||||
# Update volume if provided
|
# Update volume if provided
|
||||||
if "volume" in data:
|
if "volume" in data:
|
||||||
success, message, volume_config = _update_volume(data["volume"])
|
success, message, _ = _update_volume(data["volume"])
|
||||||
response["updates"].append({
|
response["updates"].append({
|
||||||
"type": "volume",
|
"type": "volume",
|
||||||
"success": success,
|
"success": success,
|
||||||
@ -237,6 +332,17 @@ def api_update_config():
|
|||||||
if not success:
|
if not success:
|
||||||
response["success"] = False
|
response["success"] = False
|
||||||
|
|
||||||
|
# Update download path if provided
|
||||||
|
if "download_path" in data:
|
||||||
|
success, message, _ = _update_download_path(data["download_path"])
|
||||||
|
response["updates"].append({
|
||||||
|
"type": "download_path",
|
||||||
|
"success": success,
|
||||||
|
"message": message
|
||||||
|
})
|
||||||
|
if not success:
|
||||||
|
response["success"] = False
|
||||||
|
|
||||||
# Update schedule if provided
|
# Update schedule if provided
|
||||||
if "schedule" in data:
|
if "schedule" in data:
|
||||||
success, message = _update_schedule(data["schedule"])
|
success, message = _update_schedule(data["schedule"])
|
||||||
|
@ -2,9 +2,11 @@ import random
|
|||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
|
import os # Import os for path joining
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from flask import Blueprint, jsonify, render_template, request, current_app, flash
|
from flask import Blueprint, jsonify, render_template, request, current_app, flash
|
||||||
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState
|
# Import the new model
|
||||||
|
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig
|
||||||
from ..db import db
|
from ..db import db
|
||||||
from ..celery import celery
|
from ..celery import celery
|
||||||
from ..defaults import MAX_VOLUME
|
from ..defaults import MAX_VOLUME
|
||||||
@ -615,16 +617,27 @@ def dummy_process_paper(self, paper_id):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
# Update paper status to "Done"
|
# --- Get configured download path ---
|
||||||
|
download_base_path = DownloadPathConfig.get_path()
|
||||||
|
# Ensure the base path exists (optional, but good practice)
|
||||||
|
# os.makedirs(download_base_path, exist_ok=True)
|
||||||
|
|
||||||
|
# --- Construct the file path ---
|
||||||
|
# Sanitize DOI for use in filename
|
||||||
|
safe_doi = paper.doi.replace('/', '_').replace(':', '_')
|
||||||
|
filename = f"{safe_doi}.pdf"
|
||||||
|
full_path = os.path.join(download_base_path, filename)
|
||||||
|
|
||||||
|
# Update paper status to "Done" and set the file path
|
||||||
paper.status = "Done"
|
paper.status = "Done"
|
||||||
paper.file_path = f"/path/to/dummy/papers/{paper.doi.replace('/', '_')}.pdf"
|
paper.file_path = full_path # Use the constructed path
|
||||||
|
|
||||||
# Log success
|
# Log success
|
||||||
ActivityLog.log_scraper_activity(
|
ActivityLog.log_scraper_activity(
|
||||||
action="process_paper",
|
action="process_paper",
|
||||||
paper_id=paper.id,
|
paper_id=paper.id,
|
||||||
status="success",
|
status="success",
|
||||||
description=f"Successfully processed paper: {paper.doi}"
|
description=f"Successfully processed paper: {paper.doi}. File at: {full_path}" # Log path
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Update paper status to "Failed"
|
# Update paper status to "Failed"
|
||||||
|
@ -210,6 +210,32 @@ class VolumeConfig(db.Model):
|
|||||||
id = db.Column(db.Integer, primary_key=True)
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
volume = db.Column(db.Float) # volume of papers to scrape per day
|
volume = db.Column(db.Float) # volume of papers to scrape per day
|
||||||
|
|
||||||
|
class DownloadPathConfig(db.Model):
|
||||||
|
"""Model to store the base path for downloaded files."""
|
||||||
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_path(cls):
|
||||||
|
"""Get the configured download path, creating default if needed."""
|
||||||
|
config = cls.query.first()
|
||||||
|
if not config:
|
||||||
|
config = cls(path="/path/to/dummy/papers") # Ensure default exists
|
||||||
|
db.session.add(config)
|
||||||
|
db.session.commit()
|
||||||
|
return config.path
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def set_path(cls, new_path):
|
||||||
|
"""Set the download path."""
|
||||||
|
config = cls.query.first()
|
||||||
|
if not config:
|
||||||
|
config = cls(path=new_path)
|
||||||
|
db.session.add(config)
|
||||||
|
else:
|
||||||
|
config.path = new_path
|
||||||
|
db.session.commit()
|
||||||
|
return config
|
||||||
|
|
||||||
class ScraperState(db.Model):
|
class ScraperState(db.Model):
|
||||||
"""Model to store the current state of the scraper."""
|
"""Model to store the current state of the scraper."""
|
||||||
@ -281,3 +307,9 @@ def init_schedule_config():
|
|||||||
default_volume = VolumeConfig(volume=100)
|
default_volume = VolumeConfig(volume=100)
|
||||||
db.session.add(default_volume)
|
db.session.add(default_volume)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
# Initialize DownloadPathConfig if it doesn't exist
|
||||||
|
if DownloadPathConfig.query.count() == 0:
|
||||||
|
default_path = DownloadPathConfig(path="/path/to/dummy/papers")
|
||||||
|
db.session.add(default_path)
|
||||||
|
db.session.commit()
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
<!-- include flash messages template -->
|
<!-- include flash messages template -->
|
||||||
{% include "partials/flash_messages.html.jinja" %}
|
{% include "partials/flash_messages.html.jinja" %}
|
||||||
|
|
||||||
<form action="{{ url_for('config.update_volume') }}" method="post">
|
<form action="{{ url_for('config.update_general') }}" method="post">
|
||||||
<div class="form-section">
|
<div class="form-section">
|
||||||
<h6>Scraper Volume</h6>
|
<h6>Scraper Volume</h6>
|
||||||
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
|
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
|
||||||
@ -22,6 +22,18 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="form-section">
|
||||||
|
<h6>Download Path</h6>
|
||||||
|
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
|
||||||
|
<div class="mb-3">
|
||||||
|
<label for="downloadPath" class="form-label">Download Directory:</label>
|
||||||
|
<input type="text" class="form-control" id="downloadPath" name="download_path"
|
||||||
|
value="{{ download_path_config.path }}" required>
|
||||||
|
<div class="form-text">Enter the full path to the download directory (e.g., /data/papers).
|
||||||
|
Ensure the directory exists and the application has write permissions.</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="form-section">
|
<div class="form-section">
|
||||||
<h6>System Settings</h6>
|
<h6>System Settings</h6>
|
||||||
<p class="text-muted">Configure general system behavior.</p>
|
<p class="text-muted">Configure general system behavior.</p>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user