modularizes the scraper methods
This commit is contained in:
parent
11f086aa64
commit
8f2375215d
@ -1,10 +1,11 @@
|
||||
"""Configuration management blueprint."""
|
||||
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
|
||||
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
|
||||
from ..db import db
|
||||
# Import the new model
|
||||
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
|
||||
from ..defaults import MAX_VOLUME
|
||||
import os # Import os for path validation
|
||||
from scipaperloader.scrapers import __path__ as scrapers_path
|
||||
|
||||
bp = Blueprint("config", __name__, url_prefix="/config")
|
||||
|
||||
@ -281,6 +282,46 @@ def update_schedule():
|
||||
return redirect(url_for("config.schedule"))
|
||||
|
||||
|
||||
@bp.route("/update/scraper_module", methods=["POST"])
|
||||
def update_scraper_module():
|
||||
"""Update the scraper module configuration."""
|
||||
from ..models import ScraperModuleConfig
|
||||
|
||||
new_scraper_module = request.form.get("scraper_module")
|
||||
if not new_scraper_module:
|
||||
flash("Scraper module cannot be empty.", "error")
|
||||
return redirect(url_for("config.general"))
|
||||
|
||||
# Validate that the module exists and is valid
|
||||
from scipaperloader.scrapers.factory import get_available_scrapers
|
||||
available_modules = [m["name"] for m in get_available_scrapers()]
|
||||
|
||||
if new_scraper_module not in available_modules:
|
||||
flash(f"Invalid scraper module: {new_scraper_module}", "error")
|
||||
return redirect(url_for("config.general"))
|
||||
|
||||
# Update the database configuration
|
||||
ScraperModuleConfig.set_module(new_scraper_module)
|
||||
flash(f"Scraper module updated to '{new_scraper_module}'.", "success")
|
||||
return redirect(url_for("config.general"))
|
||||
|
||||
|
||||
@bp.context_processor
|
||||
def inject_scraper_modules():
|
||||
"""Inject available scraper modules into the template context."""
|
||||
from scipaperloader.scrapers.factory import get_available_scrapers
|
||||
from ..models import ScraperModuleConfig
|
||||
|
||||
available_scrapers = get_available_scrapers()
|
||||
current_module = ScraperModuleConfig.get_current_module()
|
||||
|
||||
return {
|
||||
"available_scraper_modules": [s["name"] for s in available_scrapers],
|
||||
"current_scraper_module": current_module,
|
||||
"scraper_details": {s["name"]: s for s in available_scrapers}
|
||||
}
|
||||
|
||||
|
||||
@bp.route("/api/schedule/stats")
|
||||
def schedule_stats():
|
||||
"""Get statistics about the current schedule configuration."""
|
||||
|
@ -12,6 +12,7 @@ from ..celery import celery
|
||||
from ..defaults import MAX_VOLUME
|
||||
from celery.schedules import crontab
|
||||
from sqlalchemy import func
|
||||
from scipaperloader.scrapers.factory import get_scraper
|
||||
|
||||
bp = Blueprint("scraper", __name__, url_prefix="/scraper")
|
||||
|
||||
@ -153,7 +154,7 @@ def stop_scraper():
|
||||
|
||||
# Stop any running tasks
|
||||
task_types_to_revoke = [
|
||||
'scipaperloader.blueprints.scraper.dummy_process_paper',
|
||||
'scipaperloader.blueprints.scraper.process_paper',
|
||||
'scipaperloader.blueprints.scraper.dummy_scheduled_scraper',
|
||||
'scipaperloader.blueprints.scraper.run_periodic_dummy_scraper'
|
||||
]
|
||||
@ -224,7 +225,7 @@ def pause_scraper():
|
||||
# Just revoke processing tasks, but leave the periodic tasks running
|
||||
# so it can continue to check the state (which is now paused)
|
||||
task_types_to_revoke = [
|
||||
'scipaperloader.blueprints.scraper.dummy_process_paper',
|
||||
'scipaperloader.blueprints.scraper.process_paper',
|
||||
'scipaperloader.blueprints.scraper.dummy_scheduled_scraper'
|
||||
]
|
||||
|
||||
@ -373,70 +374,7 @@ def update_config():
|
||||
return jsonify({"success": False, "message": f"Unexpected error: {str(e)}"})
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def dummy_scrape_paper(self):
|
||||
"""Simulate scraping a single paper."""
|
||||
# Simulate success or failure
|
||||
success = random.random() > 0.3 # 70% success rate
|
||||
|
||||
# Simulate processing time
|
||||
import time
|
||||
time.sleep(random.randint(2, 5)) # 2-5 seconds
|
||||
|
||||
if success:
|
||||
# Create a dummy paper
|
||||
new_paper = PaperMetadata(
|
||||
title=f"Dummy Paper {random.randint(1000, 9999)}",
|
||||
doi=f"10.1234/dummy.{random.randint(1000, 9999)}",
|
||||
journal=random.choice([
|
||||
"Nature", "Science", "PLOS ONE", "Journal of Dummy Research",
|
||||
"Proceedings of the Dummy Society", "Cell", "Dummy Review Letters"
|
||||
]),
|
||||
type="article",
|
||||
language="en",
|
||||
published_online=datetime.now().date(),
|
||||
status="Done",
|
||||
file_path="/path/to/dummy/paper.pdf"
|
||||
)
|
||||
|
||||
db.session.add(new_paper)
|
||||
db.session.commit()
|
||||
|
||||
# Log the successful scrape
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="scrape_paper",
|
||||
paper_id=new_paper.id,
|
||||
status="success",
|
||||
description=f"Successfully scraped paper {new_paper.doi}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"paper_id": new_paper.id,
|
||||
"title": new_paper.title,
|
||||
"doi": new_paper.doi
|
||||
}
|
||||
else:
|
||||
# Log the failed scrape
|
||||
error_message = random.choice([
|
||||
"Connection timeout",
|
||||
"404 Not Found",
|
||||
"Access denied",
|
||||
"Invalid DOI format",
|
||||
"PDF download failed",
|
||||
"Rate limited by publisher"
|
||||
])
|
||||
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="scrape_paper",
|
||||
status="error",
|
||||
description=f"Failed to scrape paper: {error_message}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_message
|
||||
}
|
||||
|
||||
|
||||
@celery.task
|
||||
@ -545,11 +483,11 @@ def dummy_scheduled_scraper():
|
||||
)
|
||||
|
||||
# --- Now schedule processing for the newly selected "Pending" papers ---
|
||||
# (Assuming dummy_process_paper takes a paper_id)
|
||||
# (Using the new modular process_paper task)
|
||||
# Add random delays for processing within the hour (e.g., up to 3600 seconds)
|
||||
for paper_id in selected_paper_ids:
|
||||
delay = random.uniform(1, 3500) # Random delay up to ~58 minutes
|
||||
dummy_process_paper.apply_async(args=[paper_id], countdown=delay)
|
||||
process_paper.apply_async(args=[paper_id], countdown=delay)
|
||||
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="schedule_processing",
|
||||
@ -568,109 +506,6 @@ def dummy_scheduled_scraper():
|
||||
return False
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def dummy_process_paper(self, paper_id):
|
||||
"""
|
||||
Process a single paper for the dummy scraper.
|
||||
|
||||
Args:
|
||||
paper_id (int): ID of the paper to process
|
||||
"""
|
||||
# First check if the scraper is still active and not paused
|
||||
scraper_state = ScraperState.get_current_state()
|
||||
if not scraper_state.is_active or scraper_state.is_paused:
|
||||
# Log that task was skipped due to scraper being stopped or paused
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="process_paper",
|
||||
status="info",
|
||||
description=f"Skipped processing paper ID {paper_id} because scraper is {'paused' if scraper_state.is_paused else 'stopped'}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Get the paper from database
|
||||
paper = PaperMetadata.query.get(paper_id)
|
||||
if not paper:
|
||||
# Log error if paper not found
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="process_paper",
|
||||
status="error",
|
||||
description=f"Paper with ID {paper_id} not found"
|
||||
)
|
||||
return False
|
||||
|
||||
# Simulate random success/failure (70% success rate)
|
||||
success = random.random() < 0.7
|
||||
|
||||
# Simulate processing time (1-5 seconds)
|
||||
process_time = random.uniform(1, 5)
|
||||
time.sleep(process_time)
|
||||
|
||||
# Check again if scraper is still active and not paused after the time delay
|
||||
# This ensures we don't process papers if the scraper was stopped during the delay
|
||||
scraper_state = ScraperState.get_current_state()
|
||||
if not scraper_state.is_active or scraper_state.is_paused:
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="process_paper",
|
||||
status="info",
|
||||
description=f"Cancelled processing paper ID {paper_id} because scraper is {'paused' if scraper_state.is_paused else 'stopped'}"
|
||||
)
|
||||
return False
|
||||
|
||||
if success:
|
||||
# --- Get configured download path ---
|
||||
download_base_path = DownloadPathConfig.get_path()
|
||||
# Ensure the base path exists (optional, but good practice)
|
||||
# os.makedirs(download_base_path, exist_ok=True)
|
||||
|
||||
# --- Construct the file path ---
|
||||
# Sanitize DOI for use in filename
|
||||
safe_doi = paper.doi.replace('/', '_').replace(':', '_')
|
||||
filename = f"{safe_doi}.pdf"
|
||||
full_path = os.path.join(download_base_path, filename)
|
||||
|
||||
# Update paper status to "Done" and set the file path
|
||||
paper.status = "Done"
|
||||
paper.file_path = full_path # Use the constructed path
|
||||
|
||||
# Log success
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="process_paper",
|
||||
paper_id=paper.id,
|
||||
status="success",
|
||||
description=f"Successfully processed paper: {paper.doi}. File at: {full_path}" # Log path
|
||||
)
|
||||
else:
|
||||
# Update paper status to "Failed"
|
||||
paper.status = "Failed"
|
||||
|
||||
# Generate random error message
|
||||
error_message = random.choice([
|
||||
"Publisher website unavailable",
|
||||
"No PDF download link found",
|
||||
"Access restricted",
|
||||
"Download timeout",
|
||||
"Invalid DOI",
|
||||
"Rate limited by publisher"
|
||||
])
|
||||
paper.error_msg = error_message
|
||||
|
||||
# Log failure
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="process_paper",
|
||||
paper_id=paper.id,
|
||||
status="error",
|
||||
description=f"Failed to process paper: {error_message}"
|
||||
)
|
||||
|
||||
# Update the timestamp
|
||||
paper.updated_at = datetime.utcnow()
|
||||
|
||||
# Commit changes to database
|
||||
db.session.commit()
|
||||
|
||||
return success
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def process_paper_batch(self, paper_ids):
|
||||
"""
|
||||
@ -914,3 +749,21 @@ def calculate_papers_for_current_hour():
|
||||
)
|
||||
|
||||
return papers_this_hour
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def process_paper(self, paper_id):
|
||||
"""Process a paper using the configured scraper."""
|
||||
from scipaperloader.models import PaperMetadata
|
||||
paper = PaperMetadata.query.get(paper_id)
|
||||
if not paper:
|
||||
return {"status": "error", "message": f"Paper with ID {paper_id} not found"}
|
||||
|
||||
scraper = get_scraper()
|
||||
result = scraper.scrape(paper.doi)
|
||||
|
||||
return {
|
||||
"paper_id": paper_id,
|
||||
"status": result.status,
|
||||
"message": result.message
|
||||
}
|
||||
|
@ -6,3 +6,4 @@ class Config:
|
||||
SQLALCHEMY_DATABASE_URI = os.environ.get("DATABASE_URL", "sqlite:///papers.db")
|
||||
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
||||
APP_TITLE = os.environ.get("APP_TITLE", "SciPaperLoader")
|
||||
SCRAPER_MODULE = os.environ.get("SCRAPER_MODULE", "dummy")
|
||||
|
@ -277,6 +277,40 @@ class ScraperState(db.Model):
|
||||
return state.is_active and not state.is_paused
|
||||
|
||||
|
||||
class ScraperModuleConfig(db.Model):
|
||||
"""Model to store the configured scraper module."""
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
module_name = db.Column(db.String(100), default="dummy")
|
||||
|
||||
@classmethod
|
||||
def get_current_module(cls):
|
||||
"""Get the currently configured scraper module."""
|
||||
config = cls.query.first()
|
||||
if not config:
|
||||
config = cls(module_name="dummy")
|
||||
db.session.add(config)
|
||||
db.session.commit()
|
||||
return config.module_name
|
||||
|
||||
@classmethod
|
||||
def set_module(cls, module_name):
|
||||
"""Set the scraper module."""
|
||||
config = cls.query.first()
|
||||
if not config:
|
||||
config = cls(module_name=module_name)
|
||||
db.session.add(config)
|
||||
else:
|
||||
old_value = config.module_name
|
||||
config.module_name = module_name
|
||||
ActivityLog.log_config_change(
|
||||
config_key="scraper_module",
|
||||
old_value=old_value,
|
||||
new_value=module_name,
|
||||
description="Updated scraper module configuration"
|
||||
)
|
||||
db.session.commit()
|
||||
return config
|
||||
|
||||
def init_schedule_config():
|
||||
"""Initialize ScheduleConfig with default values if empty"""
|
||||
if ScheduleConfig.query.count() == 0:
|
||||
|
2
scipaperloader/scrapers/__init__.py
Normal file
2
scipaperloader/scrapers/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# This package contains all scraper modules.
|
||||
# Each scraper should implement the BaseScraper interface from base.py.
|
34
scipaperloader/scrapers/base.py
Normal file
34
scipaperloader/scrapers/base.py
Normal file
@ -0,0 +1,34 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import NamedTuple, Optional, Dict
|
||||
from datetime import datetime
|
||||
|
||||
class ScrapeResult(NamedTuple):
|
||||
status: str # "success", "error", "skipped"
|
||||
message: str # human-readable status
|
||||
data: Optional[Dict] # any extra payload (file_path, metadata, etc.)
|
||||
duration: Optional[float] = None # processing time in seconds
|
||||
timestamp: Optional[datetime] = None # when the operation completed
|
||||
|
||||
class BaseScraper(ABC):
|
||||
"""Base class for all scraper implementations."""
|
||||
|
||||
@abstractmethod
|
||||
def scrape(self, doi: str) -> ScrapeResult:
|
||||
"""
|
||||
Fetch metadata and/or download paper for the given DOI.
|
||||
|
||||
Args:
|
||||
doi: The DOI of the paper to scrape
|
||||
|
||||
Returns:
|
||||
ScrapeResult with status, message, and optional data
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_name(self) -> str:
|
||||
"""Return the name of this scraper."""
|
||||
return self.__class__.__name__
|
||||
|
||||
def get_description(self) -> str:
|
||||
"""Return a description of this scraper."""
|
||||
return getattr(self.__class__, "__doc__", "No description available")
|
94
scipaperloader/scrapers/dummy.py
Normal file
94
scipaperloader/scrapers/dummy.py
Normal file
@ -0,0 +1,94 @@
|
||||
import time
|
||||
import random
|
||||
from datetime import datetime
|
||||
from .base import BaseScraper, ScrapeResult
|
||||
from flask import current_app
|
||||
from ..models import PaperMetadata, ActivityLog, DownloadPathConfig
|
||||
from ..db import db
|
||||
|
||||
class Scraper(BaseScraper):
|
||||
"""Dummy scraper for testing purposes that simulates paper downloading."""
|
||||
|
||||
def scrape(self, doi: str) -> ScrapeResult:
|
||||
"""Simulate scraping a paper with realistic timing and random success/failure."""
|
||||
start_time = time.time()
|
||||
|
||||
paper = PaperMetadata.query.filter_by(doi=doi).first()
|
||||
if not paper:
|
||||
return ScrapeResult(
|
||||
status="error",
|
||||
message=f"No paper found for DOI {doi}",
|
||||
data=None,
|
||||
duration=time.time() - start_time,
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
|
||||
# Simulate processing time (1-3 seconds)
|
||||
processing_time = random.uniform(1, 3)
|
||||
time.sleep(processing_time)
|
||||
|
||||
# Simulate 80% success rate
|
||||
success = random.random() < 0.8
|
||||
|
||||
if success:
|
||||
# Get download path and simulate file creation
|
||||
download_path = DownloadPathConfig.get_path()
|
||||
file_name = f"{doi.replace('/', '_')}.pdf"
|
||||
file_path = f"{download_path}/{file_name}"
|
||||
|
||||
# Update paper status
|
||||
paper.status = "Done"
|
||||
paper.file_path = file_path
|
||||
paper.error_msg = None
|
||||
|
||||
# Log success
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="dummy_scrape",
|
||||
status="success",
|
||||
description=f"Successfully scraped {doi}",
|
||||
paper_id=paper.id
|
||||
)
|
||||
|
||||
result = ScrapeResult(
|
||||
status="success",
|
||||
message=f"Successfully scraped {doi}",
|
||||
data={
|
||||
"file_path": file_path,
|
||||
"title": paper.title,
|
||||
"journal": paper.journal
|
||||
},
|
||||
duration=time.time() - start_time,
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
else:
|
||||
# Simulate failure
|
||||
error_messages = [
|
||||
"Paper not found in database",
|
||||
"Access denied by publisher",
|
||||
"Rate limit exceeded",
|
||||
"Network timeout",
|
||||
"Invalid DOI format"
|
||||
]
|
||||
error_msg = random.choice(error_messages)
|
||||
|
||||
paper.status = "Failed"
|
||||
paper.error_msg = error_msg
|
||||
|
||||
# Log failure
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="dummy_scrape",
|
||||
status="error",
|
||||
description=f"Failed to scrape {doi}: {error_msg}",
|
||||
paper_id=paper.id
|
||||
)
|
||||
|
||||
result = ScrapeResult(
|
||||
status="error",
|
||||
message=f"Failed to scrape {doi}: {error_msg}",
|
||||
data={"error_code": "dummy_error"},
|
||||
duration=time.time() - start_time,
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
return result
|
59
scipaperloader/scrapers/factory.py
Normal file
59
scipaperloader/scrapers/factory.py
Normal file
@ -0,0 +1,59 @@
|
||||
import importlib
|
||||
from flask import current_app
|
||||
from .base import BaseScraper
|
||||
|
||||
def get_scraper() -> BaseScraper:
|
||||
"""Load the configured scraper module dynamically with error handling."""
|
||||
from ..models import ScraperModuleConfig, ActivityLog
|
||||
|
||||
try:
|
||||
# Get module name from database first, fallback to config
|
||||
name = ScraperModuleConfig.get_current_module()
|
||||
if not name:
|
||||
name = current_app.config.get("SCRAPER_MODULE", "dummy")
|
||||
|
||||
module = importlib.import_module(f"scipaperloader.scrapers.{name}")
|
||||
cls = getattr(module, "Scraper")
|
||||
|
||||
# Validate that it's actually a BaseScraper
|
||||
if not issubclass(cls, BaseScraper):
|
||||
raise TypeError(f"Scraper class in module '{name}' does not inherit from BaseScraper")
|
||||
|
||||
return cls()
|
||||
|
||||
except (ImportError, AttributeError, TypeError) as e:
|
||||
ActivityLog.log_error(
|
||||
error_message=f"Failed to load scraper module '{name}': {str(e)}",
|
||||
source="scraper_factory",
|
||||
severity="error"
|
||||
)
|
||||
# Fallback to dummy scraper
|
||||
from .dummy import Scraper as DummyScraper
|
||||
return DummyScraper()
|
||||
|
||||
def get_available_scrapers():
|
||||
"""Get list of available scraper modules."""
|
||||
import os
|
||||
from scipaperloader.scrapers import __path__ as scrapers_path
|
||||
|
||||
modules = []
|
||||
scrapers_dir = scrapers_path[0]
|
||||
|
||||
for filename in os.listdir(scrapers_dir):
|
||||
if filename.endswith(".py") and filename not in ("__init__.py", "base.py", "factory.py"):
|
||||
module_name = filename[:-3]
|
||||
try:
|
||||
# Try to import and validate the module
|
||||
module = importlib.import_module(f"scipaperloader.scrapers.{module_name}")
|
||||
cls = getattr(module, "Scraper", None)
|
||||
if cls and issubclass(cls, BaseScraper):
|
||||
modules.append({
|
||||
"name": module_name,
|
||||
"class": cls,
|
||||
"description": getattr(cls, "__doc__", "No description available")
|
||||
})
|
||||
except (ImportError, AttributeError, TypeError):
|
||||
# Skip invalid modules
|
||||
pass
|
||||
|
||||
return modules
|
@ -9,52 +9,87 @@
|
||||
<!-- include flash messages template -->
|
||||
{% include "partials/flash_messages.html.jinja" %}
|
||||
|
||||
<form action="{{ url_for('config.update_general') }}" method="post">
|
||||
<div class="form-section">
|
||||
<h6>Scraper Volume</h6>
|
||||
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
|
||||
<div class="row">
|
||||
<!-- General Settings Column -->
|
||||
<div class="col-md-6">
|
||||
<form action="{{ url_for('config.update_general') }}" method="post">
|
||||
<div class="form-section">
|
||||
<h6>Scraper Volume</h6>
|
||||
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="totalVolume" class="form-label">Papers per day:</label>
|
||||
<input type="number" class="form-control" id="totalVolume" name="total_volume" min="1"
|
||||
max="{{ max_volume }}" value="{{ volume_config.volume }}" required>
|
||||
<div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<label for="totalVolume" class="form-label">Papers per day:</label>
|
||||
<input type="number" class="form-control" id="totalVolume" name="total_volume"
|
||||
min="1" max="{{ max_volume }}" value="{{ volume_config.volume }}" required>
|
||||
<div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-section">
|
||||
<h6>Download Path</h6>
|
||||
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
|
||||
<div class="mb-3">
|
||||
<label for="downloadPath" class="form-label">Download Directory:</label>
|
||||
<input type="text" class="form-control" id="downloadPath" name="download_path"
|
||||
value="{{ download_path_config.path }}" required>
|
||||
<div class="form-text">Enter the full path to the download directory (e.g.,
|
||||
/data/papers).
|
||||
Ensure the directory exists and the application has write permissions.</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-section">
|
||||
<h6>System Settings</h6>
|
||||
<p class="text-muted">Configure general system behavior.</p>
|
||||
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="enableNotifications" checked>
|
||||
<label class="form-check-label" for="enableNotifications">
|
||||
Enable email notifications
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="enableLogging" checked>
|
||||
<label class="form-check-label" for="enableLogging">
|
||||
Enable detailed activity logging
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-primary">Save General Settings</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="form-section">
|
||||
<h6>Download Path</h6>
|
||||
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
|
||||
<div class="mb-3">
|
||||
<label for="downloadPath" class="form-label">Download Directory:</label>
|
||||
<input type="text" class="form-control" id="downloadPath" name="download_path"
|
||||
value="{{ download_path_config.path }}" required>
|
||||
<div class="form-text">Enter the full path to the download directory (e.g., /data/papers).
|
||||
Ensure the directory exists and the application has write permissions.</div>
|
||||
</div>
|
||||
<!-- Scraper Module Column -->
|
||||
<div class="col-md-6">
|
||||
<form method="post" action="{{ url_for('config.update_scraper_module') }}">
|
||||
<div class="form-section">
|
||||
<h6>Scraper Module</h6>
|
||||
<p class="text-muted">Select which scraper module to use for processing papers.</p>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="scraper_module" class="form-label">Active Scraper Module:</label>
|
||||
<select class="form-control" id="scraper_module" name="scraper_module">
|
||||
{% for module in available_scraper_modules %}
|
||||
<option value="{{ module }}" {% if module==current_scraper_module %} selected
|
||||
{%endif %}>
|
||||
{{ module }}
|
||||
{% if scraper_details[module] %}
|
||||
- {{ scraper_details[module].description[:50] }}...
|
||||
{% endif %}
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<div class="form-text">
|
||||
Current module: <strong>{{ current_scraper_module }}</strong>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary">Update Scraper Module</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="form-section">
|
||||
<h6>System Settings</h6>
|
||||
<p class="text-muted">Configure general system behavior.</p>
|
||||
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="enableNotifications" checked>
|
||||
<label class="form-check-label" for="enableNotifications">
|
||||
Enable email notifications
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="enableLogging" checked>
|
||||
<label class="form-check-label" for="enableLogging">
|
||||
Enable detailed activity logging
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-primary">Save General Settings</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
Loading…
x
Reference in New Issue
Block a user