adds path config checks in config and dummy scraper

fixes dummy and single paper processing
modularizes the scraper methods
2025-05-23 17:21:41 +02:00 · 2025-05-23 16:13:25 +02:00 · 2025-05-23 14:32:41 +02:00
11 changed files with 1179 additions and 251 deletions
--- a/scipaperloader/blueprints/api.py
+++ b/scipaperloader/blueprints/api.py
@ -1,6 +1,7 @@
 from datetime import datetime
 from flask import Blueprint, jsonify, request
-from ..models import ActivityLog, ActivityCategory
+from ..models import ActivityLog, ActivityCategory, PaperMetadata
 from .. import db
 bp = Blueprint("api", __name__, url_prefix="/api")
@ -47,4 +48,91 @@ def get_activity_logs():
        }
        result.append(log_data)
-    return jsonify(result)
+    return jsonify(result)
@bp.route("/papers")
 def search_papers():
    """
    Search for papers by title, DOI, or ID.
    Query parameters:
    - query: Search term (required)
    - limit: Maximum number of results (default: 10)
    """
    query = request.args.get('query', '')
    limit = int(request.args.get('limit', 10))
    if not query:
        return jsonify({
            "success": False, 
            "message": "Search query is required",
            "papers": []
        })
    # Try to parse query as an ID first
    try:
        paper_id = int(query)
        paper_by_id = PaperMetadata.query.get(paper_id)
        if paper_by_id:
            return jsonify({
                "success": True,
                "papers": [{
                    "id": paper_by_id.id,
                    "title": paper_by_id.title,
                    "doi": paper_by_id.doi,
                    "journal": paper_by_id.journal,
                    "status": paper_by_id.status,
                    "created_at": paper_by_id.created_at.isoformat() if paper_by_id.created_at else None,
                    "updated_at": paper_by_id.updated_at.isoformat() if paper_by_id.updated_at else None
                }]
            })
    except ValueError:
        pass  # Not an ID, continue with text search
    # Search in title and DOI
    search_term = f"%{query}%"
    papers = PaperMetadata.query.filter(
        db.or_(
            PaperMetadata.title.ilike(search_term),
            PaperMetadata.doi.ilike(search_term)
        )
    ).limit(limit).all()
    return jsonify({
        "success": True,
        "papers": [{
            "id": paper.id,
            "title": paper.title,
            "doi": paper.doi,
            "journal": paper.journal,
            "status": paper.status,
            "created_at": paper.created_at.isoformat() if paper.created_at else None,
            "updated_at": paper.updated_at.isoformat() if paper.updated_at else None
        } for paper in papers]
    })
@bp.route("/papers/<int:paper_id>")
 def get_paper(paper_id):
    """Get details of a single paper by ID."""
    paper = PaperMetadata.query.get(paper_id)
    if not paper:
        return jsonify({
            "success": False,
            "message": f"Paper with ID {paper_id} not found"
        })
    return jsonify({
        "success": True,
        "paper": {
            "id": paper.id,
            "title": paper.title,
            "doi": paper.doi,
            "journal": paper.journal,
            "status": paper.status,
            "error_msg": paper.error_msg,
            "file_path": paper.file_path,
            "created_at": paper.created_at.isoformat() if paper.created_at else None,
            "updated_at": paper.updated_at.isoformat() if paper.updated_at else None
        }
    })
--- a/scipaperloader/blueprints/config.py
+++ b/scipaperloader/blueprints/config.py
@ -1,10 +1,11 @@
 """Configuration management blueprint."""
-from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify
+from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
 from ..db import db
 # Import the new model
-from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
+from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
 from ..defaults import MAX_VOLUME
 import os # Import os for path validation
 from scipaperloader.scrapers import __path__ as scrapers_path
 bp = Blueprint("config", __name__, url_prefix="/config")
@ -69,25 +70,31 @@ def _update_download_path(new_path):
            # Try to create it if it doesn't exist
            try:
                os.makedirs(new_path, exist_ok=True)
-                ActivityLog.log_system_activity(
+                ActivityLog.log_scraper_activity(
                    action="create_directory",
                    status="info",
                    description=f"Created download directory: {new_path}"
                )
            except OSError as e:
-                ActivityLog.log_system_activity(
+                ActivityLog.log_error(
-                    action="create_directory",
+                    error_message=f"Failed to create download directory: {new_path}, Error: {str(e)}",
-                    status="error",
+                    source="update_download_path"
                    description=f"Failed to create download directory: {new_path}, Error: {str(e)}"
                )
                return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
        # Check if the path is readable
        if not os.access(new_path, os.R_OK):
            ActivityLog.log_error(
                error_message=f"Download path '{new_path}' is not readable.",
                source="check_directory_permissions"
            )
            return False, f"Path '{new_path}' exists but is not readable by the application.", None
        # Check if the path is writable
        if not os.access(new_path, os.W_OK):
-            ActivityLog.log_system_activity(
+            ActivityLog.log_error(
-                action="check_directory_permissions",
+                error_message=f"Download path '{new_path}' is not writable.",
-                status="error",
+                source="check_directory_permissions"
                description=f"Download path '{new_path}' is not writable."
            )
            return False, f"Path '{new_path}' exists but is not writable by the application.", None
        # --- End of validation ---
@ -281,6 +288,46 @@ def update_schedule():
    return redirect(url_for("config.schedule"))
@bp.route("/update/scraper_module", methods=["POST"])
 def update_scraper_module():
    """Update the scraper module configuration."""
    from ..models import ScraperModuleConfig
    new_scraper_module = request.form.get("scraper_module")
    if not new_scraper_module:
        flash("Scraper module cannot be empty.", "error")
        return redirect(url_for("config.general"))
    # Validate that the module exists and is valid
    from scipaperloader.scrapers.factory import get_available_scrapers
    available_modules = [m["name"] for m in get_available_scrapers()]
    if new_scraper_module not in available_modules:
        flash(f"Invalid scraper module: {new_scraper_module}", "error")
        return redirect(url_for("config.general"))
    # Update the database configuration
    ScraperModuleConfig.set_module(new_scraper_module)
    flash(f"Scraper module updated to '{new_scraper_module}'.", "success")
    return redirect(url_for("config.general"))
@bp.context_processor
 def inject_scraper_modules():
    """Inject available scraper modules into the template context."""
    from scipaperloader.scrapers.factory import get_available_scrapers
    from ..models import ScraperModuleConfig
    available_scrapers = get_available_scrapers()
    current_module = ScraperModuleConfig.get_current_module()
    return {
        "available_scraper_modules": [s["name"] for s in available_scrapers],
        "current_scraper_module": current_module,
        "scraper_details": {s["name"]: s for s in available_scrapers}
    }
@bp.route("/api/schedule/stats")
 def schedule_stats():
    """Get statistics about the current schedule configuration."""
@ -361,4 +408,36 @@ def api_update_config():
        return jsonify({
            "success": False,
            "message": f"Unexpected error: {str(e)}"
-        })
+        })
@bp.route("/delete_all_papers", methods=["POST"])
 def delete_all_papers():
    """Delete all paper records from the database."""
    try:
        # Count papers before deletion for logging purposes
        paper_count = PaperMetadata.query.count()
        # Delete all records from the PaperMetadata table
        PaperMetadata.query.delete()
        db.session.commit()
        # Log the action
        ActivityLog.log_config_change(
            config_key="database",
            old_value=f"{paper_count} papers",
            new_value="0 papers",
            description=f"Deleted all {paper_count} papers from the database"
        )
        flash(f"Successfully deleted all {paper_count} papers from the database.", "success")
    except Exception as e:
        db.session.rollback()
        flash(f"Failed to delete papers: {str(e)}", "error")
        ActivityLog.log_error(
            error_message=f"Failed to delete all papers: {str(e)}",
            exception=e,
            source="config.delete_all_papers"
        )
    return redirect(url_for("config.general"))
--- a/scipaperloader/blueprints/scraper.py
+++ b/scipaperloader/blueprints/scraper.py
@ -6,12 +6,13 @@ import os # Import os for path joining
 from datetime import datetime, timedelta
 from flask import Blueprint, jsonify, render_template, request, current_app, flash
 # Import the new model
-from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig
+from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig, ScraperModuleConfig
 from ..db import db
 from ..celery import celery
 from ..defaults import MAX_VOLUME
 from celery.schedules import crontab
 from sqlalchemy import func
 from scipaperloader.scrapers.factory import get_scraper, get_available_scrapers
 bp = Blueprint("scraper", __name__, url_prefix="/scraper")
@ -153,7 +154,7 @@ def stop_scraper():
        # Stop any running tasks
        task_types_to_revoke = [
-            'scipaperloader.blueprints.scraper.dummy_process_paper',
+            'scipaperloader.blueprints.scraper.process_paper',
            'scipaperloader.blueprints.scraper.dummy_scheduled_scraper',
            'scipaperloader.blueprints.scraper.run_periodic_dummy_scraper'
        ]
@ -224,7 +225,7 @@ def pause_scraper():
        # Just revoke processing tasks, but leave the periodic tasks running
        # so it can continue to check the state (which is now paused)
        task_types_to_revoke = [
-            'scipaperloader.blueprints.scraper.dummy_process_paper',
+            'scipaperloader.blueprints.scraper.process_paper',
            'scipaperloader.blueprints.scraper.dummy_scheduled_scraper'
        ]
@ -373,70 +374,7 @@ def update_config():
        return jsonify({"success": False, "message": f"Unexpected error: {str(e)}"})
@celery.task(bind=True)
 def dummy_scrape_paper(self):
    """Simulate scraping a single paper."""
    # Simulate success or failure
    success = random.random() > 0.3  # 70% success rate
    # Simulate processing time
    import time
    time.sleep(random.randint(2, 5))  # 2-5 seconds
    if success:
        # Create a dummy paper
        new_paper = PaperMetadata(
            title=f"Dummy Paper {random.randint(1000, 9999)}",
            doi=f"10.1234/dummy.{random.randint(1000, 9999)}",
            journal=random.choice([
                "Nature", "Science", "PLOS ONE", "Journal of Dummy Research",
                "Proceedings of the Dummy Society", "Cell", "Dummy Review Letters"
            ]),
            type="article",
            language="en",
            published_online=datetime.now().date(),
            status="Done",
            file_path="/path/to/dummy/paper.pdf"
        )
        db.session.add(new_paper)
        db.session.commit()
        # Log the successful scrape
        ActivityLog.log_scraper_activity(
            action="scrape_paper",
            paper_id=new_paper.id,
            status="success",
            description=f"Successfully scraped paper {new_paper.doi}"
        )
        return {
            "success": True,
            "paper_id": new_paper.id,
            "title": new_paper.title,
            "doi": new_paper.doi
        }
    else:
        # Log the failed scrape
        error_message = random.choice([
            "Connection timeout",
            "404 Not Found",
            "Access denied",
            "Invalid DOI format",
            "PDF download failed",
            "Rate limited by publisher"
        ])
        ActivityLog.log_scraper_activity(
            action="scrape_paper",
            status="error",
            description=f"Failed to scrape paper: {error_message}"
        )
        return {
            "success": False,
            "error": error_message
        }
@celery.task
@ -545,11 +483,11 @@ def dummy_scheduled_scraper():
        )
        # --- Now schedule processing for the newly selected "Pending" papers ---
-        # (Assuming dummy_process_paper takes a paper_id)
+        # (Using the new modular process_paper task)
        # Add random delays for processing within the hour (e.g., up to 3600 seconds)
        for paper_id in selected_paper_ids:
            delay = random.uniform(1, 3500) # Random delay up to ~58 minutes
-            dummy_process_paper.apply_async(args=[paper_id], countdown=delay)
+            process_paper.apply_async(args=[paper_id], countdown=delay)
        ActivityLog.log_scraper_activity(
            action="schedule_processing",
@ -568,109 +506,6 @@ def dummy_scheduled_scraper():
        return False
@celery.task(bind=True)
 def dummy_process_paper(self, paper_id):
    """
    Process a single paper for the dummy scraper.
    Args:
        paper_id (int): ID of the paper to process
    """
    # First check if the scraper is still active and not paused
    scraper_state = ScraperState.get_current_state()
    if not scraper_state.is_active or scraper_state.is_paused:
        # Log that task was skipped due to scraper being stopped or paused
        ActivityLog.log_scraper_activity(
            action="process_paper",
            status="info",
            description=f"Skipped processing paper ID {paper_id} because scraper is {'paused' if scraper_state.is_paused else 'stopped'}"
        )
        return False
    # Get the paper from database
    paper = PaperMetadata.query.get(paper_id)
    if not paper:
        # Log error if paper not found
        ActivityLog.log_scraper_activity(
            action="process_paper",
            status="error",
            description=f"Paper with ID {paper_id} not found"
        )
        return False
    # Simulate random success/failure (70% success rate)
    success = random.random() < 0.7
    # Simulate processing time (1-5 seconds)
    process_time = random.uniform(1, 5)
    time.sleep(process_time)
    # Check again if scraper is still active and not paused after the time delay
    # This ensures we don't process papers if the scraper was stopped during the delay
    scraper_state = ScraperState.get_current_state()
    if not scraper_state.is_active or scraper_state.is_paused:
        ActivityLog.log_scraper_activity(
            action="process_paper",
            status="info",
            description=f"Cancelled processing paper ID {paper_id} because scraper is {'paused' if scraper_state.is_paused else 'stopped'}"
        )
        return False
    if success:
        # --- Get configured download path ---
        download_base_path = DownloadPathConfig.get_path()
        # Ensure the base path exists (optional, but good practice)
        # os.makedirs(download_base_path, exist_ok=True) 
        # --- Construct the file path ---
        # Sanitize DOI for use in filename
        safe_doi = paper.doi.replace('/', '_').replace(':', '_')
        filename = f"{safe_doi}.pdf"
        full_path = os.path.join(download_base_path, filename)
        # Update paper status to "Done" and set the file path
        paper.status = "Done"
        paper.file_path = full_path # Use the constructed path
        # Log success
        ActivityLog.log_scraper_activity(
            action="process_paper",
            paper_id=paper.id,
            status="success",
            description=f"Successfully processed paper: {paper.doi}. File at: {full_path}" # Log path
        )
    else:
        # Update paper status to "Failed"
        paper.status = "Failed"
        # Generate random error message
        error_message = random.choice([
            "Publisher website unavailable",
            "No PDF download link found",
            "Access restricted",
            "Download timeout",
            "Invalid DOI",
            "Rate limited by publisher"
        ])
        paper.error_msg = error_message
        # Log failure
        ActivityLog.log_scraper_activity(
            action="process_paper",
            paper_id=paper.id,
            status="error",
            description=f"Failed to process paper: {error_message}"
        )
    # Update the timestamp
    paper.updated_at = datetime.utcnow()
    # Commit changes to database
    db.session.commit()
    return success
@celery.task(bind=True)
 def process_paper_batch(self, paper_ids):
    """
@ -914,3 +749,168 @@ def calculate_papers_for_current_hour():
    )
    return papers_this_hour
@celery.task(bind=True)
 def process_paper(self, paper_id):
    """Process a paper using the configured scraper."""
    from scipaperloader.models import PaperMetadata
    paper = PaperMetadata.query.get(paper_id)
    if not paper:
        return {"status": "error", "message": f"Paper with ID {paper_id} not found"}
    scraper = get_scraper()
    result = scraper.scrape(paper.doi)
    return {
        "paper_id": paper_id,
        "status": result.status,
        "message": result.message
    }
@celery.task(bind=True)
@celery.task(bind=True)
 def process_paper_with_scraper(self, paper_id, scraper_module):
    """Process a paper using a specific scraper module."""
    from scipaperloader.models import PaperMetadata
    import importlib
    from ..scrapers.base import BaseScraper
    paper = PaperMetadata.query.get(paper_id)
    if not paper:
        return {"status": "error", "message": f"Paper with ID {paper_id} not found"}
    try:
        # Import the specified scraper module
        module = importlib.import_module(f"scipaperloader.scrapers.{scraper_module}")
        cls = getattr(module, "Scraper")
        # Validate that it's a BaseScraper
        if not issubclass(cls, BaseScraper):
            error_msg = f"Scraper class in module '{scraper_module}' does not inherit from BaseScraper"
            ActivityLog.log_error(
                error_message=error_msg,
                source="process_paper_with_scraper"
            )
            return {"status": "error", "message": error_msg}
        # Instantiate and use the scraper
        scraper = cls()
        result = scraper.scrape(paper.doi)
        return {
            "paper_id": paper_id,
            "status": result.status,
            "message": result.message,
            "scraper": scraper_module
        }
    except (ImportError, AttributeError) as e:
        error_msg = f"Failed to load scraper module '{scraper_module}': {str(e)}"
        ActivityLog.log_error(
            error_message=error_msg,
            source="process_paper_with_scraper"
        )
        return {"status": "error", "message": error_msg}
    except Exception as e:
        error_msg = f"Error processing paper with scraper '{scraper_module}': {str(e)}"
        ActivityLog.log_error(
            error_message=error_msg,
            source="process_paper_with_scraper",
            exception=e
        )
        return {"status": "error", "message": error_msg}
@bp.route("/process_single/<int:paper_id>", methods=["POST"])
 def process_single_paper(paper_id):
    """Process a single paper by ID."""
    try:
        # Check if paper exists
        paper = PaperMetadata.query.get(paper_id)
        if not paper:
            return jsonify({
                "success": False,
                "message": f"Paper with ID {paper_id} not found"
            })
        # Get the scraper module name from the request
        scraper_module = None
        if request.is_json and request.json:
            scraper_module = request.json.get('scraper_module')
        # Update status to Pending
        old_status = paper.status
        paper.status = "Pending"
        paper.updated_at = datetime.utcnow()
        db.session.commit()
        # Log that we're processing this paper
        ActivityLog.log_scraper_activity(
            action="manual_process_paper",
            paper_id=paper_id,
            status="pending",
            description=f"Manual processing initiated for paper: {paper.title}" + 
                       (f" using {scraper_module} scraper" if scraper_module else "")
        )
        # Start the task (without delay since it's manual)
        if scraper_module:
            task = process_paper_with_scraper.delay(paper_id, scraper_module)
        else:
            task = process_paper.delay(paper_id)
        return jsonify({
            "success": True,
            "task_id": task.id,
            "message": f"Processing paper '{paper.title}' (ID: {paper_id})" + 
                      (f" using {scraper_module} scraper" if scraper_module else "") +
                      f". Previous status: {old_status}"
        })
    except Exception as e:
        db.session.rollback()
        ActivityLog.log_error(
            error_message=f"Failed to process paper {paper_id}: {str(e)}",
            exception=e,
            source="process_single_paper"
        )
        return jsonify({
            "success": False,
            "message": f"Error: {str(e)}"
        })
@bp.route("/available_scrapers")
 def available_scrapers():
    """Get list of available scraper modules."""
    from scipaperloader.scrapers.factory import get_available_scrapers
    from ..models import ScraperModuleConfig
    try:
        scrapers = get_available_scrapers()
        current_module = ScraperModuleConfig.get_current_module()
        return jsonify({
            "success": True,
            "scrapers": [
                {
                    "name": s["name"],
                    "description": s["description"],
                    "is_current": s["name"] == current_module
                } for s in scrapers
            ],
            "current": current_module
        })
    except Exception as e:
        ActivityLog.log_error(
            error_message=f"Failed to get available scrapers: {str(e)}",
            source="available_scrapers"
        )
        return jsonify({
            "success": False,
            "message": f"Error: {str(e)}",
            "scrapers": []
        })
--- a/scipaperloader/config.py
+++ b/scipaperloader/config.py
@ -6,3 +6,4 @@ class Config:
    SQLALCHEMY_DATABASE_URI = os.environ.get("DATABASE_URL", "sqlite:///papers.db")
    SQLALCHEMY_TRACK_MODIFICATIONS = False
    APP_TITLE = os.environ.get("APP_TITLE", "SciPaperLoader")
    SCRAPER_MODULE = os.environ.get("SCRAPER_MODULE", "dummy")
--- a/scipaperloader/models.py
+++ b/scipaperloader/models.py
@ -277,6 +277,40 @@ class ScraperState(db.Model):
        return state.is_active and not state.is_paused
 class ScraperModuleConfig(db.Model):
    """Model to store the configured scraper module."""
    id = db.Column(db.Integer, primary_key=True)
    module_name = db.Column(db.String(100), default="dummy")
    @classmethod
    def get_current_module(cls):
        """Get the currently configured scraper module."""
        config = cls.query.first()
        if not config:
            config = cls(module_name="dummy")
            db.session.add(config)
            db.session.commit()
        return config.module_name
    @classmethod
    def set_module(cls, module_name):
        """Set the scraper module."""
        config = cls.query.first()
        if not config:
            config = cls(module_name=module_name)
            db.session.add(config)
        else:
            old_value = config.module_name
            config.module_name = module_name
            ActivityLog.log_config_change(
                config_key="scraper_module",
                old_value=old_value,
                new_value=module_name,
                description="Updated scraper module configuration"
            )
        db.session.commit()
        return config
 def init_schedule_config():
    """Initialize ScheduleConfig with default values if empty"""
    if ScheduleConfig.query.count() == 0:
--- a/scipaperloader/scrapers/init.py
+++ b/scipaperloader/scrapers/init.py
@ -0,0 +1,2 @@
 # This package contains all scraper modules.
 # Each scraper should implement the BaseScraper interface from base.py.
--- a/scipaperloader/scrapers/base.py
+++ b/scipaperloader/scrapers/base.py
@ -0,0 +1,34 @@
 from abc import ABC, abstractmethod
 from typing import NamedTuple, Optional, Dict
 from datetime import datetime
 class ScrapeResult(NamedTuple):
    status: str            # "success", "error", "skipped"
    message: str           # human-readable status
    data: Optional[Dict]   # any extra payload (file_path, metadata, etc.)
    duration: Optional[float] = None  # processing time in seconds
    timestamp: Optional[datetime] = None  # when the operation completed
 class BaseScraper(ABC):
    """Base class for all scraper implementations."""
    @abstractmethod
    def scrape(self, doi: str) -> ScrapeResult:
        """
        Fetch metadata and/or download paper for the given DOI.
        Args:
            doi: The DOI of the paper to scrape
        Returns:
            ScrapeResult with status, message, and optional data
        """
        pass
    def get_name(self) -> str:
        """Return the name of this scraper."""
        return self.__class__.__name__
    def get_description(self) -> str:
        """Return a description of this scraper."""
        return getattr(self.__class__, "__doc__", "No description available")
--- a/scipaperloader/scrapers/dummy.py
+++ b/scipaperloader/scrapers/dummy.py
@ -0,0 +1,191 @@
 import time
 import random
 import os
 from datetime import datetime
 from .base import BaseScraper, ScrapeResult
 from flask import current_app
 from ..models import PaperMetadata, ActivityLog, DownloadPathConfig
 from ..db import db
 class Scraper(BaseScraper):
    """Dummy scraper for testing purposes that simulates paper downloading."""
    def scrape(self, doi: str) -> ScrapeResult:
        """Simulate scraping a paper with realistic timing and random success/failure."""
        start_time = time.time()
        paper = PaperMetadata.query.filter_by(doi=doi).first()
        if not paper:
            return ScrapeResult(
                status="error", 
                message=f"No paper found for DOI {doi}", 
                data=None,
                duration=time.time() - start_time,
                timestamp=datetime.utcnow()
            )
        # Simulate processing time (1-3 seconds)
        processing_time = random.uniform(1, 3)
        time.sleep(processing_time)
        # Simulate 80% success rate
        success = random.random() < 0.8
        if success:
            # Get download path and create an actual dummy file
            download_path = DownloadPathConfig.get_path()
            file_name = f"{doi.replace('/', '_')}.pdf"
            file_path = f"{download_path}/{file_name}"
            # Check if the path is readable and writable
            if not os.path.exists(download_path):
                try:
                    # Create directory if it doesn't exist
                    os.makedirs(download_path, exist_ok=True)
                except OSError as e:
                    error_msg = f"Failed to create download directory: {str(e)}"
                    paper.status = "Failed"
                    paper.error_msg = error_msg
                    ActivityLog.log_scraper_activity(
                        action="dummy_scrape_path_error",
                        status="error",
                        description=error_msg,
                        paper_id=paper.id
                    )
                    return ScrapeResult(
                        status="error",
                        message=error_msg,
                        data={"error_code": "path_creation_error"},
                        duration=time.time() - start_time,
                        timestamp=datetime.utcnow()
                    )
            # Check if the path is readable
            if not os.access(download_path, os.R_OK):
                error_msg = f"Download path '{download_path}' is not readable"
                paper.status = "Failed"
                paper.error_msg = error_msg
                ActivityLog.log_scraper_activity(
                    action="dummy_scrape_path_error",
                    status="error",
                    description=error_msg,
                    paper_id=paper.id
                )
                return ScrapeResult(
                    status="error",
                    message=error_msg,
                    data={"error_code": "path_read_error"},
                    duration=time.time() - start_time,
                    timestamp=datetime.utcnow()
                )
            # Check if the path is writable
            if not os.access(download_path, os.W_OK):
                error_msg = f"Download path '{download_path}' is not writable"
                paper.status = "Failed"
                paper.error_msg = error_msg
                ActivityLog.log_scraper_activity(
                    action="dummy_scrape_path_error",
                    status="error",
                    description=error_msg,
                    paper_id=paper.id
                )
                return ScrapeResult(
                    status="error",
                    message=error_msg,
                    data={"error_code": "path_write_error"},
                    duration=time.time() - start_time,
                    timestamp=datetime.utcnow()
                )
            # Create a simple dummy PDF file
            try:
                with open(file_path, 'w') as f:
                    f.write(f"Dummy PDF file for paper with DOI: {doi}\n")
                    f.write(f"Title: {paper.title}\n")
                    f.write(f"Journal: {paper.journal}\n")
                    f.write(f"Generated: {datetime.utcnow().isoformat()}\n")
                    f.write("\nThis is a dummy file created by the SciPaperLoader dummy scraper.\n")
                # Update paper status
                paper.status = "Done"
                paper.file_path = file_path
                paper.error_msg = None
            except Exception as e:
                # Handle file creation errors
                error_msg = f"Failed to create dummy file: {str(e)}"
                paper.status = "Failed"
                paper.error_msg = error_msg
                ActivityLog.log_scraper_activity(
                    action="dummy_scrape_file_error",
                    status="error",
                    description=error_msg,
                    paper_id=paper.id
                )
                return ScrapeResult(
                    status="error",
                    message=error_msg,
                    data={"error_code": "file_creation_error"},
                    duration=time.time() - start_time,
                    timestamp=datetime.utcnow()
                )
            # Log success
            ActivityLog.log_scraper_activity(
                action="dummy_scrape",
                status="success",
                description=f"Successfully scraped {doi}",
                paper_id=paper.id
            )
            result = ScrapeResult(
                status="success",
                message=f"Successfully scraped {doi}",
                data={
                    "file_path": file_path,
                    "title": paper.title,
                    "journal": paper.journal
                },
                duration=time.time() - start_time,
                timestamp=datetime.utcnow()
            )
        else:
            # Simulate failure
            error_messages = [
                "Paper not found in database",
                "Access denied by publisher",
                "Rate limit exceeded",
                "Network timeout",
                "Invalid DOI format"
            ]
            error_msg = random.choice(error_messages)
            paper.status = "Failed"
            paper.error_msg = error_msg
            # Log failure
            ActivityLog.log_scraper_activity(
                action="dummy_scrape",
                status="error",
                description=f"Failed to scrape {doi}: {error_msg}",
                paper_id=paper.id
            )
            result = ScrapeResult(
                status="error",
                message=f"Failed to scrape {doi}: {error_msg}",
                data={"error_code": "dummy_error"},
                duration=time.time() - start_time,
                timestamp=datetime.utcnow()
            )
        db.session.commit()
        return result
--- a/scipaperloader/scrapers/factory.py
+++ b/scipaperloader/scrapers/factory.py
@ -0,0 +1,59 @@
 import importlib
 from flask import current_app
 from .base import BaseScraper
 def get_scraper() -> BaseScraper:
    """Load the configured scraper module dynamically with error handling."""
    from ..models import ScraperModuleConfig, ActivityLog
    try:
        # Get module name from database first, fallback to config
        name = ScraperModuleConfig.get_current_module()
        if not name:
            name = current_app.config.get("SCRAPER_MODULE", "dummy")
        module = importlib.import_module(f"scipaperloader.scrapers.{name}")
        cls = getattr(module, "Scraper")
        # Validate that it's actually a BaseScraper
        if not issubclass(cls, BaseScraper):
            raise TypeError(f"Scraper class in module '{name}' does not inherit from BaseScraper")
        return cls()
    except (ImportError, AttributeError, TypeError) as e:
        ActivityLog.log_error(
            error_message=f"Failed to load scraper module '{name}': {str(e)}",
            source="scraper_factory",
            severity="error"
        )
        # Fallback to dummy scraper
        from .dummy import Scraper as DummyScraper
        return DummyScraper()
 def get_available_scrapers():
    """Get list of available scraper modules."""
    import os
    from scipaperloader.scrapers import __path__ as scrapers_path
    modules = []
    scrapers_dir = scrapers_path[0]
    for filename in os.listdir(scrapers_dir):
        if filename.endswith(".py") and filename not in ("__init__.py", "base.py", "factory.py"):
            module_name = filename[:-3]
            try:
                # Try to import and validate the module
                module = importlib.import_module(f"scipaperloader.scrapers.{module_name}")
                cls = getattr(module, "Scraper", None)
                if cls and issubclass(cls, BaseScraper):
                    modules.append({
                        "name": module_name,
                        "class": cls,
                        "description": getattr(cls, "__doc__", "No description available")
                    })
            except (ImportError, AttributeError, TypeError):
                # Skip invalid modules
                pass
    return modules
--- a/scipaperloader/templates/config/general.html.jinja
+++ b/scipaperloader/templates/config/general.html.jinja
@ -9,52 +9,112 @@
                <!-- include flash messages template -->
                {% include "partials/flash_messages.html.jinja" %}
-                <form action="{{ url_for('config.update_general') }}" method="post">
+                <div class="row">
-                    <div class="form-section">
+                    <!-- General Settings Column -->
-                        <h6>Scraper Volume</h6>
+                    <div class="col-md-6">
-                        <p class="text-muted">Configure the total number of papers to scrape per day.</p>
+                        <form action="{{ url_for('config.update_general') }}" method="post">
                            <div class="form-section">
                                <h6>Scraper Volume</h6>
                                <p class="text-muted">Configure the total number of papers to scrape per day.</p>
-                        <div class="mb-3">
+                                <div class="mb-3">
-                            <label for="totalVolume" class="form-label">Papers per day:</label>
+                                    <label for="totalVolume" class="form-label">Papers per day:</label>
-                            <input type="number" class="form-control" id="totalVolume" name="total_volume" min="1"
+                                    <input type="number" class="form-control" id="totalVolume" name="total_volume"
-                                max="{{ max_volume }}" value="{{ volume_config.volume }}" required>
+                                        min="1" max="{{ max_volume }}" value="{{ volume_config.volume }}" required>
-                            <div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
+                                    <div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
-                        </div>
+                                </div>
                            </div>
                            <div class="form-section">
                                <h6>Download Path</h6>
                                <p class="text-muted">Base directory where scraped paper files will be stored.</p>
                                <div class="mb-3">
                                    <label for="downloadPath" class="form-label">Download Directory:</label>
                                    <input type="text" class="form-control" id="downloadPath" name="download_path"
                                        value="{{ download_path_config.path }}" required>
                                    <div class="form-text">Enter the full path to the download directory (e.g.,
                                        /data/papers).
                                        Ensure the directory exists and the application has write permissions.</div>
                                </div>
                            </div>
                            <div class="form-section">
                                <h6>System Settings</h6>
                                <p class="text-muted">Configure general system behavior.</p>
                                <div class="mb-3 form-check">
                                    <input type="checkbox" class="form-check-input" id="enableNotifications" checked>
                                    <label class="form-check-label" for="enableNotifications">
                                        Enable email notifications
                                    </label>
                                </div>
                                <div class="mb-3 form-check">
                                    <input type="checkbox" class="form-check-input" id="enableLogging" checked>
                                    <label class="form-check-label" for="enableLogging">
                                        Enable detailed activity logging
                                    </label>
                                </div>
                            </div>
                            <button type="submit" class="btn btn-primary">Save General Settings</button>
                        </form>
                    </div>
-                    <div class="form-section">
+                    <!-- Scraper Module Column -->
-                        <h6>Download Path</h6>
+                    <div class="col-md-6">
-                        <p class="text-muted">Base directory where scraped paper files will be stored.</p>
+                        <form method="post" action="{{ url_for('config.update_scraper_module') }}">
-                        <div class="mb-3">
+                            <div class="form-section">
-                            <label for="downloadPath" class="form-label">Download Directory:</label>
+                                <h6>Scraper Module</h6>
-                            <input type="text" class="form-control" id="downloadPath" name="download_path"
+                                <p class="text-muted">Select which scraper module to use for processing papers.</p>
-                                value="{{ download_path_config.path }}" required>
+
-                            <div class="form-text">Enter the full path to the download directory (e.g., /data/papers).
+                                <div class="mb-3">
-                                Ensure the directory exists and the application has write permissions.</div>
+                                    <label for="scraper_module" class="form-label">Active Scraper Module:</label>
                                    <select class="form-control" id="scraper_module" name="scraper_module">
                                        {% for module in available_scraper_modules %}
                                        <option value="{{ module }}" {% if module==current_scraper_module %} selected
                                            {%endif %}>
                                            {{ module }}
                                            {% if scraper_details[module] %}
                                            - {{ scraper_details[module].description[:50] }}...
                                            {% endif %}
                                        </option>
                                        {% endfor %}
                                    </select>
                                    <div class="form-text">
                                        Current module: <strong>{{ current_scraper_module }}</strong>
                                    </div>
                                </div>
                            </div>
                            <button type="submit" class="btn btn-primary">Update Scraper Module</button>
                        </form>
                    </div>
                </div>
                <!-- Database Management Section -->
                <div class="row mt-4">
                    <div class="col-12">
                        <div class="card border-danger">
                            <div class="card-header bg-danger text-white">
                                <h5>Database Management</h5>
                            </div>
                            <div class="card-body">
                                <div class="form-section">
                                    <h6>Delete All Papers</h6>
                                    <p class="text-muted">This action will permanently delete all paper records from the
                                        database. This cannot be undone.</p>
                                    <form method="post" action="{{ url_for('config.delete_all_papers') }}" class="mt-3"
                                        onsubmit="return confirm('WARNING: You are about to delete ALL papers from the database. This action cannot be undone. Are you sure you want to proceed?');">
                                        <button type="submit" class="btn btn-danger">
                                            <i class="fas fa-trash-alt"></i> Delete All Papers
                                        </button>
                                    </form>
                                </div>
                            </div>
                        </div>
                    </div>
-
+                </div>
                    <div class="form-section">
                        <h6>System Settings</h6>
                        <p class="text-muted">Configure general system behavior.</p>
                        <div class="mb-3 form-check">
                            <input type="checkbox" class="form-check-input" id="enableNotifications" checked>
                            <label class="form-check-label" for="enableNotifications">
                                Enable email notifications
                            </label>
                        </div>
                        <div class="mb-3 form-check">
                            <input type="checkbox" class="form-check-input" id="enableLogging" checked>
                            <label class="form-check-label" for="enableLogging">
                                Enable detailed activity logging
                            </label>
                        </div>
                    </div>
                    <button type="submit" class="btn btn-primary">Save General Settings</button>
                </form>
            </div>
        </div>
    </div>
--- a/scipaperloader/templates/scraper.html.jinja
+++ b/scipaperloader/templates/scraper.html.jinja
@ -36,6 +36,28 @@
        max-width: 350px;
        z-index: 1050;
    }
    .search-results-container {
        max-height: 300px;
        overflow-y: auto;
    }
    /* Paper status badges */
    .badge-new {
        background-color: #17a2b8;
    }
    .badge-pending {
        background-color: #ffc107;
    }
    .badge-done {
        background-color: #28a745;
    }
    .badge-failed {
        background-color: #dc3545;
    }
 </style>
 {% endblock styles %}
@ -89,6 +111,61 @@
        </div>
    </div>
    <!-- New row for single paper processing -->
    <div class="row mb-4">
        <div class="col-12">
            <div class="card">
                <div class="card-header">
                    <h5>Process Single Paper</h5>
                </div>
                <div class="card-body">
                    <div class="row">
                        <div class="col-md-6">
                            <form id="searchPaperForm" class="mb-3">
                                <div class="input-group">
                                    <input type="text" id="paperSearchInput" class="form-control"
                                        placeholder="Search paper by title, DOI, or ID...">
                                    <button class="btn btn-outline-secondary" type="submit">Search</button>
                                </div>
                            </form>
                        </div>
                        <div class="col-md-6">
                            <div class="form-group">
                                <label for="scraperSelect">Scraper Module:</label>
                                <select class="form-control" id="scraperSelect">
                                    <option value="">Use default system scraper</option>
                                    <!-- Available scrapers will be populated here -->
                                </select>
                                <div class="form-text">
                                    Select which scraper to use for processing the paper
                                </div>
                            </div>
                        </div>
                    </div>
                    <div id="searchResults" class="mt-3 search-results-container d-none">
                        <table class="table table-hover table-striped">
                            <thead>
                                <tr>
                                    <th>ID</th>
                                    <th>Title</th>
                                    <th>DOI</th>
                                    <th>Status</th>
                                    <th>Actions</th>
                                </tr>
                            </thead>
                            <tbody id="paperSearchResults">
                                <!-- Search results will be populated here -->
                            </tbody>
                        </table>
                    </div>
                    <div id="processingStatus" class="alert alert-info mt-3 d-none"></div>
                </div>
            </div>
        </div>
    </div>
    <div class="row mb-4">
        <div class="col-12">
            <div class="card">
@ -164,12 +241,19 @@
    const resetButton = document.getElementById('resetButton');
    const notificationsToggle = document.getElementById('notificationsToggle');
    const activityLog = document.getElementById('activityLog');
    const searchForm = document.getElementById('searchPaperForm');
    const searchInput = document.getElementById('paperSearchInput');
    const searchResults = document.getElementById('searchResults');
    const processingStatus = document.getElementById('processingStatus');
    const paperSearchResults = document.getElementById('paperSearchResults');
    const scraperSelect = document.getElementById('scraperSelect');
    // Initialize the page
    document.addEventListener('DOMContentLoaded', function () {
        initStatusPolling();
        loadActivityStats(currentTimeRange);
        loadRecentActivity();
        loadAvailableScrapers();
        // Initialize event listeners
        startButton.addEventListener('click', startScraper);
@ -177,6 +261,10 @@
        stopButton.addEventListener('click', stopScraper);
        resetButton.addEventListener('click', resetScraper);
        notificationsToggle.addEventListener('click', toggleNotifications);
        searchForm.addEventListener('submit', function (e) {
            e.preventDefault();
            searchPapers();
        });
        document.getElementById('volumeForm').addEventListener('submit', function (e) {
            e.preventDefault();
@ -193,6 +281,185 @@
        });
    });
    // Load available scraper modules
    function loadAvailableScrapers() {
        fetch('/scraper/available_scrapers')
            .then(response => response.json())
            .then(data => {
                if (data.success && data.scrapers && data.scrapers.length > 0) {
                    // Clear previous options except the default one
                    while (scraperSelect.options.length > 1) {
                        scraperSelect.remove(1);
                    }
                    // Add each scraper as an option
                    data.scrapers.forEach(scraper => {
                        const option = document.createElement('option');
                        option.value = scraper.name;
                        option.textContent = `${scraper.name} - ${scraper.description.substring(0, 50)}${scraper.description.length > 50 ? '...' : ''}`;
                        if (scraper.is_current) {
                            option.textContent += ' (system default)';
                        }
                        scraperSelect.appendChild(option);
                    });
                } else {
                    // If no scrapers or error, add a note
                    const option = document.createElement('option');
                    option.disabled = true;
                    option.textContent = 'No scrapers available';
                    scraperSelect.appendChild(option);
                }
            })
            .catch(error => {
                console.error('Error loading scrapers:', error);
                const option = document.createElement('option');
                option.disabled = true;
                option.textContent = 'Error loading scrapers';
                scraperSelect.appendChild(option);
            });
    }
    // Search papers function
    function searchPapers() {
        const query = searchInput.value.trim();
        if (!query) {
            showFlashMessage('Please enter a search term', 'warning');
            return;
        }
        // Show loading message
        paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Searching papers...</td></tr>';
        searchResults.classList.remove('d-none');
        // Fetch papers from API
        fetch(`/api/papers?query=${encodeURIComponent(query)}`)
            .then(response => response.json())
            .then(data => {
                if (!data.papers || data.papers.length === 0) {
                    paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">No papers found matching your search</td></tr>';
                    return;
                }
                paperSearchResults.innerHTML = '';
                data.papers.forEach(paper => {
                    const row = document.createElement('tr');
                    // Create status badge
                    let statusBadge = '';
                    if (paper.status === 'New') {
                        statusBadge = '<span class="badge bg-info">New</span>';
                    } else if (paper.status === 'Pending') {
                        statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
                    } else if (paper.status === 'Done') {
                        statusBadge = '<span class="badge bg-success">Done</span>';
                    } else if (paper.status === 'Failed') {
                        statusBadge = '<span class="badge bg-danger">Failed</span>';
                    } else {
                        statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
                    }
                    // Create process button (enabled only for papers not in 'Pending' status)
                    const processButtonDisabled = paper.status === 'Pending' ? 'disabled' : '';
                    // Truncate title if too long
                    const truncatedTitle = paper.title.length > 70 ? paper.title.substring(0, 70) + '...' : paper.title;
                    row.innerHTML = `
                        <td>${paper.id}</td>
                        <td title="${paper.title}">${truncatedTitle}</td>
                        <td>${paper.doi || 'N/A'}</td>
                        <td>${statusBadge}</td>
                        <td>
                            <button class="btn btn-sm btn-primary process-paper-btn" 
                                data-paper-id="${paper.id}" 
                                ${processButtonDisabled}>
                                Process Now
                            </button>
                        </td>
                    `;
                    paperSearchResults.appendChild(row);
                });
                // Add event listeners to the process buttons
                document.querySelectorAll('.process-paper-btn').forEach(btn => {
                    btn.addEventListener('click', function () {
                        processSinglePaper(this.getAttribute('data-paper-id'));
                    });
                });
            })
            .catch(error => {
                console.error('Error searching papers:', error);
                paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Error searching papers</td></tr>';
            });
    }
    // Process a single paper
    function processSinglePaper(paperId) {
        // Disable all process buttons to prevent multiple clicks
        document.querySelectorAll('.process-paper-btn').forEach(btn => {
            btn.disabled = true;
        });
        // Show processing status
        processingStatus.textContent = 'Processing paper...';
        processingStatus.classList.remove('d-none');
        // Get selected scraper
        const selectedScraper = scraperSelect.value;
        // Send request to process the paper
        fetch(`/scraper/process_single/${paperId}`, {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json'
            },
            body: JSON.stringify({
                scraper_module: selectedScraper
            })
        })
            .then(response => response.json())
            .then(data => {
                if (data.success) {
                    processingStatus.textContent = data.message;
                    processingStatus.className = 'alert alert-success mt-3';
                    // Update status in the search results
                    const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
                    const statusCell = row.querySelector('td:nth-child(4)');
                    statusCell.innerHTML = '<span class="badge bg-warning text-dark">Pending</span>';
                    // Show notification
                    showFlashMessage(data.message, 'success');
                    // Set up polling to check paper status and refresh activity
                    pollPaperStatus(paperId, 3000, 20);
                } else {
                    processingStatus.textContent = data.message;
                    processingStatus.className = 'alert alert-danger mt-3';
                    showFlashMessage(data.message, 'error');
                }
            })
            .catch(error => {
                console.error('Error processing paper:', error);
                processingStatus.textContent = 'Error: Could not process paper';
                processingStatus.className = 'alert alert-danger mt-3';
                showFlashMessage('Error processing paper', 'error');
            })
            .finally(() => {
                // Re-enable the process buttons after a short delay
                setTimeout(() => {
                    document.querySelectorAll('.process-paper-btn').forEach(btn => {
                        if (btn.getAttribute('data-paper-id') !== paperId) {
                            btn.disabled = false;
                        }
                    });
                }, 1000);
            });
    }
    // Status polling
    function initStatusPolling() {
        updateStatus();
@ -285,39 +552,39 @@
        if (confirm("Are you sure you want to reset the scraper? This will stop all current tasks, optionally clear non-pending papers, and restart the scraper.")) {
            // Disable button to prevent multiple clicks
            resetButton.disabled = true;
-            
+
            // Show a loading message
            showFlashMessage('Resetting scraper, please wait...', 'info');
-            
+
-            fetch('/scraper/reset', { 
+            fetch('/scraper/reset', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
-                body: JSON.stringify({ 
+                body: JSON.stringify({
                    clear_papers: true  // You could make this configurable with a checkbox
                })
            })
-            .then(response => response.json())
+                .then(response => response.json())
-            .then(data => {
+                .then(data => {
-                if (data.success) {
+                    if (data.success) {
-                    showFlashMessage('Scraper has been completely reset and restarted', 'success');
+                        showFlashMessage('Scraper has been completely reset and restarted', 'success');
-                    // Update everything
+                        // Update everything
-                    updateStatus();
+                        updateStatus();
-                    loadActivityStats(currentTimeRange);
+                        loadActivityStats(currentTimeRange);
-                    setTimeout(() => { loadRecentActivity(); }, 1000);
+                        setTimeout(() => { loadRecentActivity(); }, 1000);
-                } else {
+                    } else {
-                    showFlashMessage(data.message || 'Error resetting scraper', 'error');
+                        showFlashMessage(data.message || 'Error resetting scraper', 'error');
-                }
+                    }
-                // Re-enable button
+                    // Re-enable button
-                resetButton.disabled = false;
+                    resetButton.disabled = false;
-            })
+                })
-            .catch(error => {
+                .catch(error => {
-                console.error("Error resetting scraper:", error);
+                    console.error("Error resetting scraper:", error);
-                showFlashMessage('Error resetting scraper: ' + error.message, 'error');
+                    showFlashMessage('Error resetting scraper: ' + error.message, 'error');
-                // Re-enable button
+                    // Re-enable button
-                resetButton.disabled = false;
+                    resetButton.disabled = false;
-            });
+                });
        }
    }
@ -345,6 +612,97 @@
        notificationsEnabled = notificationsToggle.checked;
    }
    // Poll paper status until it changes from Pending
    function pollPaperStatus(paperId, interval = 3000, maxAttempts = 20) {
        let attempts = 0;
        // Immediately refresh activity log to show the initial pending status
        loadRecentActivity();
        const checkStatus = () => {
            attempts++;
            console.log(`Checking status of paper ${paperId}, attempt ${attempts}/${maxAttempts}`);
            // Fetch the current paper status
            fetch(`/api/papers/${paperId}`)
                .then(response => response.json())
                .then(data => {
                    if (data && data.paper) {
                        const paper = data.paper;
                        console.log(`Paper status: ${paper.status}`);
                        // Update the UI with the current status
                        const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
                        if (row) {
                            const statusCell = row.querySelector('td:nth-child(4)');
                            let statusBadge = '';
                            if (paper.status === 'New') {
                                statusBadge = '<span class="badge bg-info">New</span>';
                            } else if (paper.status === 'Pending') {
                                statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
                            } else if (paper.status === 'Done') {
                                statusBadge = '<span class="badge bg-success">Done</span>';
                            } else if (paper.status === 'Failed') {
                                statusBadge = '<span class="badge bg-danger">Failed</span>';
                            } else {
                                statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
                            }
                            statusCell.innerHTML = statusBadge;
                            // Update processing status message if status changed
                            if (paper.status !== 'Pending') {
                                if (paper.status === 'Done') {
                                    processingStatus.textContent = `Paper processed successfully: ${paper.title}`;
                                    processingStatus.className = 'alert alert-success mt-3';
                                } else if (paper.status === 'Failed') {
                                    processingStatus.textContent = `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
                                    processingStatus.className = 'alert alert-danger mt-3';
                                }
                            }
                        }
                        // Always refresh activity log
                        loadRecentActivity();
                        // If status is still pending and we haven't reached max attempts, check again
                        if (paper.status === 'Pending' && attempts < maxAttempts) {
                            setTimeout(checkStatus, interval);
                        } else {
                            // If status changed or we reached max attempts, refresh chart data too
                            loadActivityStats(currentTimeRange);
                            // Show notification if status changed
                            if (paper.status !== 'Pending') {
                                const status = paper.status === 'Done' ? 'success' : 'error';
                                const message = paper.status === 'Done' 
                                    ? `Paper processed successfully: ${paper.title}`
                                    : `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
                                showFlashMessage(message, status);
                            }
                            // If we hit max attempts but status is still pending, show a message
                            if (paper.status === 'Pending' && attempts >= maxAttempts) {
                                processingStatus.textContent = 'Paper is still being processed. Check the activity log for updates.';
                                processingStatus.className = 'alert alert-info mt-3';
                            }
                        }
                    }
                })
                .catch(error => {
                    console.error(`Error polling paper status: ${error}`);
                    // If there's an error, we can still try again if under max attempts
                    if (attempts < maxAttempts) {
                        setTimeout(checkStatus, interval);
                    }
                });
        };
        // Start checking
        setTimeout(checkStatus, interval);
    }
    // Load data functions
    function loadActivityStats(hours) {
        fetch(`/scraper/stats?hours=${hours}`)
@ -359,8 +717,10 @@
            .then(response => response.json())
            .then(data => {
                renderActivityLog(data);
                console.log("Activity log refreshed with latest data");
            })
-            .catch(() => {
+            .catch((error) => {
                console.error("Failed to load activity logs:", error);
                // If the API endpoint doesn't exist, just show a message
                activityLog.innerHTML = '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>';
            });
@ -467,6 +827,26 @@
        });
    }
    // Flash message function
    function showFlashMessage(message, type) {
        const flashContainer = document.createElement('div');
        flashContainer.className = `alert alert-${type === 'error' ? 'danger' : type} alert-dismissible fade show notification`;
        flashContainer.innerHTML = `
            ${message}
            <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
        `;
        document.body.appendChild(flashContainer);
        // Auto dismiss after 5 seconds
        setTimeout(() => {
            flashContainer.classList.remove('show');
            setTimeout(() => {
                flashContainer.remove();
            }, 150); // Remove after fade out animation
        }, 5000);
    }
    // WebSocket for real-time notifications
    function setupWebSocket() {
        // If WebSocket is available, implement it here
Author	SHA1	Message	Date
Michael Beck	987c76969b	adds path config checks in config and dummy scraper	2025-05-23 17:21:41 +02:00
Michael Beck	012163ba3f	fixes dummy and single paper processing	2025-05-23 16:13:25 +02:00
Michael Beck	8f2375215d	modularizes the scraper methods	2025-05-23 14:32:41 +02:00
		`@ -0,0 +1,2 @@`
							`# This package contains all scraper modules.`
							`# Each scraper should implement the BaseScraper interface from base.py.`