fixes logging and scraper start / stop task planning

makes logger much more beautiful
redesign of logger frontend to streamline and unify all logger views
2025-06-13 13:56:46 +02:00 · 2025-06-13 12:57:54 +02:00 · 2025-06-13 12:30:44 +02:00 · 2025-06-13 11:47:41 +02:00 · 2025-06-13 11:14:06 +02:00 · 2025-06-13 10:11:59 +02:00
67 changed files with 9439 additions and 2365 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,4 +17,5 @@ dist/

 migrations/

-celerybeat-schedule*
+# APScheduler job store files
+jobs.sqlite
--- a/84
+++ b/84
@ -1,10 +1,9 @@
 # List of phony targets (targets that don't represent files)
-.PHONY: all clean venv run format format-check lint mypy test dist reformat dev celery celery-flower redis run-all diagnostics
+.PHONY: all clean venv run format format-check lint mypy test dist reformat dev run-scheduler diagnostics clean-papers purge-db

 # Define Python and pip executables inside virtual environment
 PYTHON := venv/bin/python
 PIP := venv/bin/pip
-CELERY := venv/bin/celery
 FLASK := venv/bin/flask

 # Default target that runs the application
@ -15,7 +14,7 @@ clean:
 	rm -rf venv build dist .pytest_cache .mypy_cache *.egg-info

 # Define database path
-DB_PATH=scipaperloader/papers.db
+DB_PATH=instance/papers.db

 # Backup the database with timestamp
 backup-db:
@ -91,6 +90,24 @@ reset-db: venv
 	$(PYTHON) -m flask --app scipaperloader db migrate -m "Initial migration"
 	$(PYTHON) -m flask --app scipaperloader db upgrade

+# Clean all papers from the database (keep other tables intact)
+clean-papers: venv
+	@echo "Cleaning all papers from the database..."
+	@$(PYTHON) -c "from scipaperloader.db import db; from scipaperloader.models import PaperMetadata; from scipaperloader import create_app; app = create_app(); app.app_context().push(); PaperMetadata.query.delete(); db.session.commit(); print('All papers have been removed from the database')"
+
+# Completely purge all database contents (removes all tables and data)
+purge-db: venv
+	@echo "WARNING: This will completely wipe all database contents!"
+	@read -p "Are you sure you want to continue? (y/N) " -n 1 -r; \
+	echo; \
+	if [[ $$REPLY =~ ^[Yy]$$ ]]; then \
+		echo "Purging database..."; \
+		rm -f $(DB_PATH); \
+		echo "Database completely purged"; \
+	else \
+		echo "Operation cancelled"; \
+	fi
+
 # Create and set up virtual environment
 venv:
 	python3 -m venv venv && \
@ -133,65 +150,12 @@ dist: format-check lint mypy test
 # Set up complete development environment
 dev: clean venv

-# Start Celery worker - PURGE FIRST
-celery: venv redis
-	@echo "Purging Celery task queue before starting worker..."
-	# Purge the queue forcefully. Ignore errors if queue is empty/unreachable initially.
-	@-$(CELERY) -A celery_worker:celery purge -f
-	@echo "Starting Celery worker..."
-	$(CELERY) -A celery_worker:celery worker --loglevel=info
-
-# Monitor Celery tasks with flower web interface
-celery-flower: venv
-	$(PIP) install flower
-	$(CELERY) -A celery_worker:celery flower --port=5555
-
-# Run Celery beat scheduler for periodic tasks
-celery-beat: venv redis
-	@echo "Starting Celery beat scheduler..."
-	# Ensure celerybeat-schedule file is removed for clean start if needed
-	@-rm -f celerybeat-schedule.db
-	# Use the default file-based scheduler (removed the --scheduler flag)
-	$(CELERY) -A celery_worker:celery beat --loglevel=info
-
-# Check if Redis is running, start if needed
-redis:
-	@if ! redis-cli ping > /dev/null 2>&1; then \
-		echo "Starting Redis server..."; \
-		redis-server --daemonize yes; \
-		sleep 1; \
-	else \
-		echo "Redis is already running."; \
-	fi
-
-# Run complete application stack (Flask app + Celery worker + Redis + Beat scheduler)
-run-all: redis
-	@echo "Starting Flask, Celery worker and Beat scheduler..."
-	# Run them in parallel. Ctrl+C will send SIGINT to make, which propagates.
-	# Use trap to attempt cleanup, but primary cleanup is purge on next start.
-	@trap '$(MAKE) stop-all;' INT TERM; \
-	$(MAKE) -j3 run celery celery-beat & wait
-
-# Stop running Celery worker and beat gracefully
-stop-celery:
-	@echo "Attempting graceful shutdown of Celery worker and beat..."
-	@-pkill -TERM -f "celery -A celery_worker:celery worker" || echo "Worker not found or already stopped."
-	@-pkill -TERM -f "celery -A celery_worker:celery beat" || echo "Beat not found or already stopped."
-	@sleep 1 # Give processes a moment to terminate
-	@echo "Purging remaining tasks from Celery queue..."
-	@-$(CELERY) -A celery_worker:celery purge -f || echo "Purge failed or queue empty."
-
-# Stop Flask development server
-stop-flask:
-	@echo "Attempting shutdown of Flask development server..."
-	@-pkill -TERM -f "flask --app scipaperloader --debug run" || echo "Flask server not found or already stopped."
-
-# Stop all components potentially started by run-all
-stop-all: stop-celery stop-flask
-	@echo "All components stopped."
+# Start the APScheduler-enabled Flask application
+run-scheduler: venv
+	@echo "Starting Flask app with APScheduler..."
+	$(PYTHON) -m flask --app scipaperloader --debug run

 # Run diagnostic tools
-# Run diagnostic tools - works with or without virtualenv
 diagnostics:
 	$(PYTHON) tools/run_diagnostics.py

--- a/README.md
+++ b/README.md
@ -15,7 +15,6 @@ And open it in the browser at [http://localhost:5000/](http://localhost:5000/)
 ## Prerequisites

 - Python >=3.8
- Redis (for Celery task queue)

 ## Development environment

@ -41,30 +40,39 @@ And open it in the browser at [http://localhost:5000/](http://localhost:5000/)
   add development dependencies under `project.optional-dependencies.*`; run
   `make clean && make venv` to reinstall the environment

-## Asynchronous Task Processing with Celery
+## Task Processing Architecture

-SciPaperLoader uses Celery for processing large CSV uploads and other background tasks. This allows the application to handle large datasets reliably without blocking the web interface.
+SciPaperLoader uses **APScheduler** for all task processing:

-### Running Celery Components
+- **Periodic Tasks**: Hourly scraper scheduling with randomized paper processing
+- **Background Tasks**: CSV uploads, manual paper processing, and all async operations
+- **Job Management**: Clean job scheduling, revocation, and status tracking

- `make redis`: ensures Redis server is running (required for Celery)
+This unified architecture provides reliable task processing with simple, maintainable code.

- `make celery`: starts a Celery worker to process background tasks
+### Running Components

- `make celery-flower`: starts Flower, a web interface for monitoring Celery tasks at http://localhost:5555
+- `make run`: starts the Flask application with integrated APScheduler

- `make run-all`: runs the entire stack (Flask app + Celery worker + Redis) in development mode
+For development monitoring:
+- Access the Flask admin interface for APScheduler job monitoring
+- View real-time logs in the application's activity log section

 ### How It Works

-When you upload a CSV file through the web interface:
+**For CSV Uploads:**
+1. File is uploaded through the web interface
+2. APScheduler creates a background job to process the file
+3. Browser shows progress updates via AJAX polling
+4. Results are displayed when processing completes

-1. The file is sent to the server
-2. A Celery task is created to process the file asynchronously
-3. The browser shows a progress bar with real-time updates
-4. The results are displayed when processing is complete
+**For Scheduled Scraping:**
+1. APScheduler runs hourly at the top of each hour
+2. Papers are selected based on volume and schedule configuration  
+3. Individual paper processing jobs are scheduled at random times within the hour
+4. All jobs are tracked in the database with complete visibility

-This architecture allows SciPaperLoader to handle CSV files with thousands of papers without timing out or blocking the web interface.
+This unified architecture provides reliable task processing without external dependencies.

 ## Configuration

@ -72,12 +80,12 @@ Default configuration is loaded from `scipaperloader.defaults` and can be
 overriden by environment variables with a `FLASK_` prefix. See
 [Configuring from Environment Variables](https://flask.palletsprojects.com/en/3.0.x/config/#configuring-from-environment-variables).

-### Celery Configuration
+### Task Processing Configuration

-The following environment variables can be set to configure Celery:
+APScheduler automatically uses your configured database for job persistence. No additional configuration required.

- `FLASK_CELERY_BROKER_URL`: Redis URL for the message broker (default: `redis://localhost:6379/0`)
- `FLASK_CELERY_RESULT_BACKEND`: Redis URL for storing task results (default: `redis://localhost:6379/0`)
+For advanced configuration, you can set:
+- `FLASK_SQLALCHEMY_DATABASE_URI`: Database URL (APScheduler uses the same database)

 Consider using
 [dotenv](https://flask.palletsprojects.com/en/3.0.x/cli/#environment-variables-from-dotenv).
@ -115,17 +123,18 @@ You must set a
 [SECRET_KEY](https://flask.palletsprojects.com/en/3.0.x/tutorial/deploy/#configure-the-secret-key)
 in production to a secret and stable value.

-### Deploying with Celery
+### Deploying with APScheduler

 When deploying to production:

-1. Configure a production-ready Redis instance or use a managed service
-2. Run Celery workers as system services or in Docker containers
-3. Consider setting up monitoring for your Celery tasks and workers
+1. APScheduler jobs are automatically persistent in your database
+2. The Flask application handles all background processing internally
+3. No external message broker or workers required
+4. Scale by running multiple Flask instances with shared database

 ## Troubleshooting and Diagnostics

-SciPaperLoader includes a collection of diagnostic and emergency tools to help address issues with the application, particularly with the scraper and Celery task system.
+SciPaperLoader includes a collection of diagnostic and emergency tools to help address issues with the application, particularly with the scraper and APScheduler task system.

 ### Quick Access

@ -151,7 +160,7 @@ All diagnostic tools are located in the `tools/diagnostics/` directory:

 - **check_state.py**: Quickly check the current state of the scraper in the database
 - **diagnose_scraper.py**: Comprehensive diagnostic tool that examines tasks, logs, and scraper state
- **inspect_tasks.py**: View currently running, scheduled, and reserved Celery tasks
+- **inspect_tasks.py**: View currently running and scheduled APScheduler tasks
 - **test_reversion.py**: Test the paper reversion functionality when stopping the scraper

 ### Emergency Recovery
@ -159,7 +168,7 @@ All diagnostic tools are located in the `tools/diagnostics/` directory:
 For cases where the scraper is stuck or behaving unexpectedly:

 - **emergency_stop.py**: Force stops all scraper activities, revokes all running tasks, and reverts papers from "Pending" state
- **quick_fix.py**: Simplified emergency stop that also restarts Celery workers to ensure code changes are applied
+- **quick_fix.py**: Simplified emergency stop that also stops Flask processes to ensure code changes are applied

 ### Usage Example

--- a/celery_worker.py
+++ b/celery_worker.py
@ -1,11 +0,0 @@
-from scipaperloader.celery import celery, configure_celery
-# Import all task modules to ensure they are registered with Celery
-import scipaperloader.scrapers.tasks  # Import new scheduler tasks
-import scipaperloader.blueprints.scraper  # Import the scraper module with our tasks
-
-# Configure celery with Flask app
-configure_celery()
-
-if __name__ == '__main__':
-    # Start the Celery worker
-    celery.start(['worker', '--loglevel=info', '--concurrency=2'])
--- a/dump.rdb
+++ b/dump.rdb
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,10 +13,10 @@ dependencies = [
    "flask-wtf>=1.2.2,<2",
    "pyzotero>=1.6.11,<2",
    "pandas>=2.2.3,<3",
-    "celery>=5.5.1,<6",
-    "redis>=5.2.1,<6",
-    "flower>=2.0.1,<3",
+    "APScheduler>=3.10.4,<4",
    "flask-migrate>=4.1.0,<5",
+    "beautifulsoup4>=4.13.4,<5 ",
+    "requests>=2.32.4,<3"
    ]

 [project.optional-dependencies]
--- a/scipaperloader/init.py
+++ b/scipaperloader/init.py
@ -5,14 +5,23 @@ from .db import db
 from .models import init_schedule_config
 from .models import ActivityLog, ActivityCategory
 from .blueprints import register_blueprints
+from .scheduler import ScraperScheduler

 def create_app(test_config=None):
-    app = Flask(__name__)
+    app = Flask(__name__, instance_relative_config=True)
    app.config.from_object(Config)

-    # Celery configuration
-    app.config['CELERY_BROKER_URL'] = app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0')
-    app.config['CELERY_RESULT_BACKEND'] = app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
+    # Ensure the instance folder exists
+    import os
+    try:
+        os.makedirs(app.instance_path)
+    except OSError:
+        pass
+
+    # Set the database URI to use absolute path if it's the default relative path
+    if app.config['SQLALCHEMY_DATABASE_URI'] == "sqlite:///instance/papers.db":
+        db_path = os.path.join(app.instance_path, 'papers.db')
+        app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}'

    if test_config:
        app.config.update(test_config)
@ -24,6 +33,12 @@ def create_app(test_config=None):
        db.create_all()
        init_schedule_config()

+    # Initialize APScheduler
+    scheduler = ScraperScheduler(app)
+    
+    # Store scheduler in app config for access from other modules
+    app.config['SCHEDULER'] = scheduler
+
    @app.context_processor
    def inject_app_title():
        return {"app_title": app.config["APP_TITLE"]}
--- a/scipaperloader/blueprints/config.py
+++ b/scipaperloader/blueprints/config.py
@ -2,7 +2,7 @@
 from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
 from ..db import db
 # Import the new model
-from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
+from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata, TimezoneConfig
 from ..defaults import MAX_VOLUME
 import os # Import os for path validation
 import sys
@ -129,6 +129,54 @@ def _update_download_path(new_path):
        return False, f"Error updating download path: {str(e)}", None


+def _update_timezone(new_timezone):
+    """
+    Helper function to update timezone configuration.
+
+    Args:
+        new_timezone (str): The new timezone
+
+    Returns:
+        tuple: (success, message, timezone_config)
+    """
+    try:
+        # Basic validation: check if it's a non-empty string
+        if not new_timezone or not isinstance(new_timezone, str):
+            return False, "Timezone cannot be empty.", None
+
+        # Validate timezone using pytz
+        try:
+            import pytz
+            pytz.timezone(new_timezone)  # This will raise an exception if invalid
+        except ImportError:
+            # If pytz is not available, do basic validation
+            if '/' not in new_timezone:
+                return False, "Invalid timezone format. Use format like 'Europe/Berlin'.", None
+        except pytz.exceptions.UnknownTimeZoneError:
+            return False, f"Unknown timezone: {new_timezone}. Use format like 'Europe/Berlin'.", None
+
+        config = TimezoneConfig.query.first()
+        if not config:
+            config = TimezoneConfig(timezone=new_timezone)
+            db.session.add(config)
+        else:
+            old_value = config.timezone
+            config.timezone = new_timezone
+            ActivityLog.log_config_change(
+                config_key="scheduler_timezone",
+                old_value=old_value,
+                new_value=new_timezone,
+                description="Updated scheduler timezone"
+            )
+
+        db.session.commit()
+        return True, "Timezone updated successfully!", config
+
+    except Exception as e:
+        db.session.rollback()
+        return False, f"Error updating timezone: {str(e)}", None
+
+
 def _update_schedule(schedule_data):
    """
    Helper function to update schedule configuration.
@ -211,11 +259,19 @@ def general():
        db.session.add(download_path_config)
        db.session.commit()

+    # Fetch timezone config
+    timezone_config = TimezoneConfig.query.first()
+    if not timezone_config:
+        timezone_config = TimezoneConfig() # Use default from model
+        db.session.add(timezone_config)
+        db.session.commit()
+
    return render_template(
        "config/index.html.jinja",
        active_tab="general",
        volume_config=volume_config,
        download_path_config=download_path_config, # Pass to template
+        timezone_config=timezone_config, # Pass to template
        max_volume=MAX_VOLUME,
        app_title="Configuration"
    )
@ -369,9 +425,10 @@ def generate_test_papers():

@bp.route("/update/general", methods=["POST"])
 def update_general():
-    """Update general configuration (Volume and Download Path)."""
+    """Update general configuration (Volume, Download Path, and Timezone)."""
    volume_success, volume_message = True, ""
    path_success, path_message = True, ""
+    timezone_success, timezone_message = True, ""

    # Update Volume
    new_volume = request.form.get("total_volume")
@ -391,6 +448,15 @@ def update_general():
        else:
            flash(path_message, "error")

+    # Update Timezone
+    new_timezone = request.form.get("timezone")
+    if new_timezone is not None:
+        timezone_success, timezone_message, _ = _update_timezone(new_timezone)
+        if timezone_success:
+            flash(timezone_message, "success")
+        else:
+            flash(timezone_message, "error")
+
    return redirect(url_for("config.general"))


--- a/scipaperloader/blueprints/logger.py
+++ b/scipaperloader/blueprints/logger.py
@ -2,7 +2,7 @@
 import csv
 import io
 import datetime
-from flask import Blueprint, render_template, request, send_file
+from flask import Blueprint, render_template, request, send_file, jsonify
 from ..db import db
 from ..models import ActivityLog, ActivityCategory

@ -11,11 +11,11 @@ bp = Blueprint("logger", __name__, url_prefix="/logs")

@bp.route("/")
 def list_logs():
-    page = request.args.get("page", 1, type=int)
-    per_page = 50
+    # For the new modern view, we only need to provide initial filter values and categories
+    # The actual data loading will be handled by JavaScript via the API endpoint
    
-    # Filters
-    category = request.args.get("category")
+    # Get filter parameters for initial state
+    categories_param = request.args.getlist("category")  # Get multiple categories
    start_date = request.args.get("start_date")
    end_date = request.args.get("end_date")
    search_term = request.args.get("search_term")
@ -23,33 +23,12 @@ def list_logs():
    if search_term == "None":
        search_term = None

-
-    query = ActivityLog.query
-
-    if category:
-        query = query.filter(ActivityLog.category == category)
-    if start_date:
-        start_date_dt = datetime.datetime.strptime(start_date, "%Y-%m-%d")
-        query = query.filter(ActivityLog.timestamp >= start_date_dt)
-    if end_date:
-        end_date_dt = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=1)
-        query = query.filter(ActivityLog.timestamp <= end_date_dt)
-    if search_term:
-        query = query.filter(db.or_(
-            ActivityLog.action.contains(search_term),
-            ActivityLog.description.contains(search_term)
-        ))
-
-    pagination = query.order_by(ActivityLog.timestamp.desc()).paginate(page=page, per_page=per_page, error_out=False)
-
    categories = [e.value for e in ActivityCategory]

    return render_template(
-        "logger.html.jinja",
-        logs=pagination.items,
-        pagination=pagination,
+        "logs.html.jinja",
        categories=categories,
-        category=category,
+        selected_categories=categories_param,  # Pass selected categories
        start_date=start_date,
        end_date=end_date,
        search_term=search_term,
@ -60,15 +39,15 @@ def list_logs():
@bp.route("/download")
 def download_logs():
    # Filters - reuse logic from list_logs
-    category = request.args.get("category")
+    categories = request.args.getlist("category")  # Get multiple categories
    start_date = request.args.get("start_date")
    end_date = request.args.get("end_date")
    search_term = request.args.get("search_term")

    query = ActivityLog.query

-    if category:
-        query = query.filter(ActivityLog.category == category)
+    if categories:
+        query = query.filter(ActivityLog.category.in_(categories))
    if start_date:
        start_date_dt = datetime.datetime.strptime(start_date, "%Y-%m-%d")
        query = query.filter(ActivityLog.timestamp >= start_date_dt)
@ -99,8 +78,12 @@ def download_logs():

    # Create response
    filename = f"logs_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+    csv_data.seek(0)
+    output = io.BytesIO(csv_data.getvalue().encode('utf-8'))
+    output.seek(0)
+    
    return send_file(
-        io.StringIO(csv_data.getvalue()),
+        output,
        mimetype="text/csv",
        as_attachment=True,
        download_name=filename
@ -110,3 +93,131 @@ def download_logs():
 def log_detail(log_id):
    log = ActivityLog.query.get_or_404(log_id)
    return render_template("partials/log_detail_modal.html.jinja", log=log)
+
+
+@bp.route("/api")
+def get_logs_api():
+    """Unified API endpoint for getting activity logs with filtering and pagination support."""
+    try:
+        # Pagination parameters
+        page = request.args.get('page', 1, type=int)
+        per_page = request.args.get('per_page', 50, type=int)
+        
+        # Legacy limit parameter for backward compatibility
+        limit = request.args.get('limit', type=int)
+        if limit and not request.args.get('page'):
+            # Legacy mode: use limit without pagination
+            query = ActivityLog.query
+            
+            # Apply filters
+            categories = request.args.getlist('category')
+            if categories:
+                query = query.filter(ActivityLog.category.in_(categories))
+            
+            status = request.args.get('status')
+            if status:
+                query = query.filter(ActivityLog.status == status)
+                
+            start_date = request.args.get('start_date')
+            if start_date:
+                start_date_dt = datetime.datetime.strptime(start_date, "%Y-%m-%d")
+                query = query.filter(ActivityLog.timestamp >= start_date_dt)
+                
+            end_date = request.args.get('end_date')
+            if end_date:
+                end_date_dt = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=1)
+                query = query.filter(ActivityLog.timestamp <= end_date_dt)
+                
+            search_term = request.args.get('search_term')
+            if search_term and search_term != "None":
+                query = query.filter(db.or_(
+                    ActivityLog.action.contains(search_term),
+                    ActivityLog.description.contains(search_term)
+                ))
+            
+            logs = query.order_by(ActivityLog.timestamp.desc()).limit(limit).all()
+            return jsonify({
+                "success": True,
+                "logs": [{
+                    "id": log.id,
+                    "timestamp": log.timestamp.isoformat(),
+                    "action": log.action,
+                    "status": log.status,
+                    "description": log.description,
+                    "category": log.category,
+                    "paper_id": log.paper_id,
+                    "extra_data": log.extra_data
+                } for log in logs]
+            })
+        
+        # Ensure reasonable per_page limits
+        per_page = min(per_page, 100)  # Cap at 100 items per page
+        
+        # Build query with filtering
+        query = ActivityLog.query
+        
+        # Filter by categories if specified
+        categories = request.args.getlist('category')
+        if categories:
+            query = query.filter(ActivityLog.category.in_(categories))
+        
+        # Filter by status if specified
+        status = request.args.get('status')
+        if status:
+            query = query.filter(ActivityLog.status == status)
+            
+        # Date filters
+        start_date = request.args.get('start_date')
+        if start_date:
+            start_date_dt = datetime.datetime.strptime(start_date, "%Y-%m-%d")
+            query = query.filter(ActivityLog.timestamp >= start_date_dt)
+            
+        end_date = request.args.get('end_date')
+        if end_date:
+            end_date_dt = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=1)
+            query = query.filter(ActivityLog.timestamp <= end_date_dt)
+            
+        # Search term filter
+        search_term = request.args.get('search_term')
+        if search_term and search_term != "None":
+            query = query.filter(db.or_(
+                ActivityLog.action.contains(search_term),
+                ActivityLog.description.contains(search_term)
+            ))
+        
+        # Order by most recent first and paginate
+        pagination = query.order_by(ActivityLog.timestamp.desc()).paginate(
+            page=page, 
+            per_page=per_page, 
+            error_out=False
+        )
+        
+        return jsonify({
+            "success": True,
+            "logs": [{
+                "id": log.id,
+                "timestamp": log.timestamp.isoformat(),
+                "action": log.action,
+                "status": log.status,
+                "description": log.description,
+                "category": log.category,
+                "paper_id": log.paper_id,
+                "extra_data": log.extra_data
+            } for log in pagination.items],
+            "pagination": {
+                "page": pagination.page,
+                "pages": pagination.pages,
+                "per_page": pagination.per_page,
+                "total": pagination.total,
+                "has_next": pagination.has_next,
+                "has_prev": pagination.has_prev,
+                "next_num": pagination.next_num if pagination.has_next else None,
+                "prev_num": pagination.prev_num if pagination.has_prev else None
+            }
+        })
+        
+    except Exception as e:
+        return jsonify({
+            "success": False,
+            "message": f"Error getting logs: {str(e)}"
+        }), 500
--- a/scipaperloader/blueprints/scraper.py
+++ b/scipaperloader/blueprints/scraper.py
@ -1,7 +1,7 @@
 """
 Simplified scraper blueprint using the new ScraperManager and hourly scheduling system.
 """
-from flask import Blueprint, jsonify, render_template, request
+from flask import Blueprint, jsonify, render_template, request, current_app
 from ..models import ActivityLog, PaperMetadata, ScraperState, VolumeConfig
 from ..scrapers.manager import ScraperManager
 from ..scrapers.factory import get_available_scrapers
@ -29,6 +29,10 @@ def index():
    # Get volume configuration
    volume_config = VolumeConfig.get_current_volume()
    
+    # Get scraper module configuration
+    from ..models import ScraperModuleConfig
+    current_scraper_module = ScraperModuleConfig.get_current_module()
+    
    # Get paper counts by status
    paper_counts = {
        'new': PaperMetadata.query.filter_by(status='New').count(),
@ -46,7 +50,10 @@ def index():
        recent_logs=recent_logs,
        paper_counts=paper_counts,
        volume_config=volume_config,
-        max_volume=MAX_VOLUME
+        max_volume=MAX_VOLUME,
+        current_scraper_module=current_scraper_module,
+        available_scraper_modules=[s["name"] for s in available_scrapers],
+        scraper_details={s["name"]: s for s in available_scrapers}
    )

@bp.route("/start", methods=["POST"])
@ -55,11 +62,12 @@ def start_scraper():
    try:
        # Handle both JSON and form data
        if request.is_json:
-            data = request.get_json() or {}
+            data = request.get_json()
+            # Allow empty JSON payload for start requests
+            if data is None:
+                data = {}
        else:
-            data = request.form.to_dict()
-        
-        scraper_name = data.get('scraper_name', 'dummy')
+            return jsonify({"success": False, "message": "Invalid payload format. Expected JSON."}), 400

        # Start the scraper using manager
        result = scraper_manager.start_scraper()
@ -68,18 +76,16 @@ def start_scraper():
            ActivityLog.log_scraper_command(
                action="start_scraper",
                status="success",
-                description="Started scraper with hourly scheduling"
+                description="Scraper started successfully."
            )
-            
-            return jsonify({
-                "success": True,
-                "message": result["message"]
-            })
+            return jsonify({"success": True, "message": result["message"]})
        else:
-            return jsonify({
-                "success": False,
-                "message": result["message"]
-            }), 400
+            ActivityLog.log_scraper_command(
+                action="start_scraper",
+                status="failure",
+                description=f"Failed to start scraper: {result['message']}"
+            )
+            return jsonify({"success": False, "message": result["message"]}), 400

    except Exception as e:
        ActivityLog.log_scraper_command(
@ -87,10 +93,7 @@ def start_scraper():
            status="error",
            description=f"Failed to start scraper: {str(e)}"
        )
-        return jsonify({
-            "success": False,
-            "message": f"Error starting scraper: {str(e)}"
-        }), 500
+        return jsonify({"success": False, "message": f"An error occurred: {str(e)}"}), 500

@bp.route("/pause", methods=["POST"])
 def pause_scraper():
@ -223,6 +226,13 @@ def get_status():
        # Get current hour quota info
        current_quota = scraper_manager.get_current_hour_quota()
        
+        # Get current scraper module configuration
+        from ..models import ScraperModuleConfig
+        current_scraper_module = ScraperModuleConfig.get_current_module()
+        
+        # Get volume configuration
+        current_volume = VolumeConfig.get_current_volume()
+        
        return jsonify({
            "success": True,
            "scraper_state": {
@ -231,7 +241,9 @@ def get_status():
                "last_updated": scraper_state.last_updated.isoformat() if scraper_state.last_updated else None
            },
            "paper_counts": paper_counts,
-            "current_quota": current_quota
+            "current_quota": current_quota,
+            "current_scraper_module": current_scraper_module,
+            "volume_config": current_volume
        })
        
    except Exception as e:
@ -242,28 +254,16 @@ def get_status():

@bp.route("/logs")
 def get_logs():
-    """Get recent activity logs."""
-    try:
-        limit = request.args.get('limit', 50, type=int)
-        logs = ActivityLog.query.order_by(ActivityLog.timestamp.desc()).limit(limit).all()
+    """Get recent activity logs with pagination support."""
+    # Redirect to the unified logs API endpoint
+    from flask import redirect, url_for
    
-        return jsonify({
-            "success": True,
-            "logs": [{
-                "id": log.id,
-                "timestamp": log.timestamp.isoformat(),
-                "action": log.action,
-                "status": log.status,
-                "description": log.description,
-                "category": log.category.name if log.category else None
-            } for log in logs]
-        })
-        
-    except Exception as e:
-        return jsonify({
-            "success": False,
-            "message": f"Error getting logs: {str(e)}"
-        }), 500
+    # Forward all query parameters to the unified endpoint
+    query_string = request.query_string.decode('utf-8')
+    if query_string:
+        return redirect(f"{url_for('logger.get_logs_api')}?{query_string}")
+    else:
+        return redirect(url_for('logger.get_logs_api'))

@bp.route("/scrapers")
 def get_scrapers():
@ -346,8 +346,6 @@ def process_papers_manually():
 def trigger_immediate_processing():
    """Trigger immediate processing of papers without waiting for hourly schedule."""
    try:
-        from ..scrapers.tasks import process_papers_batch
-        
        # Get papers that should be processed this hour
        manager = ScraperManager()
        papers = manager.select_papers_for_processing()
@ -359,23 +357,38 @@ def trigger_immediate_processing():
                "papers_scheduled": 0
            })
        
-        # Get paper IDs for batch processing
-        paper_ids = [paper.id for paper in papers]
+        # Get APScheduler instance
+        scheduler = current_app.config.get('SCHEDULER')
+        if not scheduler:
+            return jsonify({
+                "success": False,
+                "message": "APScheduler not available"
+            }), 500
        
-        # Trigger immediate batch processing (no delay)
-        task = process_papers_batch.delay(paper_ids)
+        # Schedule papers for immediate processing via APScheduler
+        scheduled_count = 0
+        for paper in papers:
+            try:
+                import uuid
+                job_id = f"immediate_paper_{paper.id}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}_{uuid.uuid4().hex[:8]}"
+                scheduler.schedule_paper_processing(paper.id, delay_seconds=1, job_id=job_id)
+                scheduled_count += 1
+            except Exception as e:
+                ActivityLog.log_error(
+                    error_message=f"Failed to schedule paper {paper.id}: {str(e)}",
+                    source="trigger_immediate_processing"
+                )
        
        ActivityLog.log_scraper_command(
            action="trigger_immediate_processing",
            status="success",
-            description=f"Triggered immediate processing of {len(paper_ids)} papers"
+            description=f"Triggered immediate processing of {scheduled_count} papers via APScheduler"
        )
        
        return jsonify({
            "success": True,
-            "message": f"Immediate processing started for {len(paper_ids)} papers",
-            "papers_scheduled": len(paper_ids),
-            "task_id": task.id
+            "message": f"Immediate processing started for {scheduled_count} papers",
+            "papers_scheduled": scheduled_count
        })
        
    except Exception as e:
@ -416,40 +429,96 @@ def get_stats():
    try:
        hours = int(request.args.get('hours', 24))
        current_time = datetime.utcnow()
-        cutoff_time = current_time.replace(minute=0, second=0, microsecond=0)
        
        # Get activity logs for scraper actions in the last N hours
        from ..models import ActivityCategory
-        start_time = cutoff_time - timedelta(hours=hours)
+        start_time = current_time - timedelta(hours=hours)
        logs = ActivityLog.query.filter(
            ActivityLog.category == ActivityCategory.SCRAPER_ACTIVITY.value,
            ActivityLog.timestamp >= start_time
        ).all()
        
-        # Group by hour and status
-        stats = {}
+        # Get scraper command logs for state changes in the same time period
+        state_logs = ActivityLog.query.filter(
+            ActivityLog.category == ActivityCategory.SCRAPER_COMMAND.value,
+            ActivityLog.action.in_(['start_scraper', 'pause_scraper', 'stop_scraper', 'reset_scraper']),
+            ActivityLog.timestamp >= start_time
+        ).order_by(ActivityLog.timestamp.asc()).all()
+
+        # Group by chronological hour buckets (not hour of day)
+        stats = []
        for hour_offset in range(hours):
-            target_hour = (current_time.hour - hour_offset) % 24
-            stats[target_hour] = {
+            # Calculate the hour bucket (most recent hour first when hour_offset=0)
+            bucket_end_time = current_time - timedelta(hours=hour_offset)
+            bucket_start_time = bucket_end_time - timedelta(hours=1)
+            
+            # Format hour label for display (e.g., "14:00-15:00" or "14:00" for simplicity)
+            hour_label = bucket_start_time.strftime("%H:%M")
+            
+            # Initialize counters for this hour bucket
+            bucket_stats = {
                "success": 0,
                "error": 0,
                "pending": 0,
-                "hour": target_hour,
+                "hour": hour_label,
+                "hour_offset": hour_offset,  # For sorting
+                "bucket_start": bucket_start_time,
+                "bucket_end": bucket_end_time,
+                "scraper_active": 0  # Default to inactive
            }
            
-        for log in logs:
-            hour = log.timestamp.hour
-            if hour in stats:
-                if log.status == "success":
-                    stats[hour]["success"] += 1
-                elif log.status == "error":
-                    stats[hour]["error"] += 1
-                elif log.status in ("pending", "info"):
-                    stats[hour]["pending"] += 1
+            # Count logs that fall within this hour bucket
+            for log in logs:
+                if bucket_start_time <= log.timestamp < bucket_end_time:
+                    if log.status == "success":
+                        bucket_stats["success"] += 1
+                    elif log.status == "error":
+                        bucket_stats["error"] += 1
+                    elif log.status in ("pending", "info"):
+                        bucket_stats["pending"] += 1
            
-        # Convert to list for easier consumption by JavaScript
-        result = [stats[hour] for hour in sorted(stats.keys())]
-        return jsonify(result)
+            # Determine scraper status for this hour by checking if scraper was active
+            # For simplicity, check if there were any successful scrapes in this hour
+            # If there were scrapes, assume scraper was active
+            bucket_stats["scraper_active"] = 1 if bucket_stats["success"] > 0 else 0
+            
+            stats.append(bucket_stats)
+        
+        # Reverse so oldest hour comes first (better for chronological chart display)
+        stats.reverse()
+
+        # Prepare precise scraper state changes for timeline
+        scraper_timeline = []
+        for log in state_logs:
+            # Calculate hours ago from current time
+            time_diff = current_time - log.timestamp
+            hours_ago = time_diff.total_seconds() / 3600
+            
+            # Only include logs within our time range
+            if hours_ago <= hours:
+                scraper_timeline.append({
+                    "timestamp": log.timestamp.isoformat(),
+                    "hours_ago": hours_ago,
+                    "action": log.action,
+                    "status": log.status,
+                    "active": 1 if log.action == "start_scraper" and log.status == "success" else 0
+                })
+        
+        # Clean up the response (remove internal fields)
+        result = []
+        for stat in stats:
+            result.append({
+                "success": stat["success"],
+                "error": stat["error"], 
+                "pending": stat["pending"],
+                "hour": stat["hour"],
+                "scraper_active": stat["scraper_active"]
+            })
+        
+        return jsonify({
+            "hourly_stats": result,
+            "scraper_timeline": scraper_timeline
+        })
        
    except Exception as e:
        return jsonify({
@ -472,20 +541,39 @@ def process_single_paper_endpoint(paper_id):
                "message": "Paper not found"
            }), 404
        
-        # Process the paper using the manager
-        result = scraper_manager.process_paper(paper)
+        # Get APScheduler instance
+        scheduler = current_app.config.get('SCHEDULER')
+        if not scheduler:
+            return jsonify({
+                "success": False,
+                "message": "APScheduler not available"
+            }), 500
        
-        ActivityLog.log_scraper_command(
-            action="manual_process_single",
-            status="success",
-            description=f"Manually processed paper {paper.doi}"
-        )
+        # Schedule the paper for immediate manual processing via APScheduler
+        # Use UUID suffix to ensure unique job IDs
+        import uuid
+        job_id = f"manual_paper_{paper_id}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}_{uuid.uuid4().hex[:8]}"
+        try:
+            scheduler.schedule_manual_paper_processing(paper_id, scraper_name=scraper_name, delay_seconds=1, job_id=job_id)
            
-        return jsonify({
-            "success": True,
-            "message": f"Processing started for paper {paper.doi}",
-            "paper_id": paper_id
-        })
+            ActivityLog.log_scraper_command(
+                action="manual_process_single",
+                status="success",
+                description=f"Scheduled manual processing for paper {paper.doi} via APScheduler" +
+                           (f" using scraper '{scraper_name}'" if scraper_name else " using system default scraper")
+            )
+            
+            return jsonify({
+                "success": True,
+                "message": f"Processing scheduled for paper {paper.doi}" + 
+                          (f" using {scraper_name} scraper" if scraper_name else " using system default scraper"),
+                "paper_id": paper_id
+            })
+        except Exception as e:
+            return jsonify({
+                "success": False,
+                "message": f"Failed to schedule processing: {str(e)}"
+            }), 500
        
    except Exception as e:
        ActivityLog.log_scraper_command(
@ -530,6 +618,35 @@ def update_scraper_config():
                    "message": message
                }), 400
        
+        # Handle scraper module configuration updates
+        if "scraper_module" in data:
+            from ..models import ScraperModuleConfig
+            
+            new_module = data["scraper_module"]
+            
+            # Validate that the module exists and is valid
+            available_modules = [m["name"] for m in get_available_scrapers()]
+            
+            if new_module not in available_modules:
+                return jsonify({
+                    "success": False,
+                    "message": f"Invalid scraper module: {new_module}"
+                }), 400
+            
+            # Update the database configuration
+            ScraperModuleConfig.set_module(new_module)
+            
+            ActivityLog.log_scraper_command(
+                action="update_scraper_module",
+                status="success",
+                description=f"Updated scraper module to '{new_module}'"
+            )
+            
+            return jsonify({
+                "success": True,
+                "message": f"Scraper module updated to '{new_module}' successfully"
+            })
+        
        # Handle other configuration updates here if needed in the future
        
        return jsonify({
@ -547,3 +664,72 @@ def update_scraper_config():
            "success": False,
            "message": f"Error updating scraper config: {str(e)}"
        }), 500
+
+@bp.route("/publishers")
+def get_publishers():
+    """Get publisher overview data for the scraper overview modal."""
+    try:
+        import os
+        import glob
+        
+        # Get available parser modules
+        parsers_dir = os.path.join(current_app.root_path, 'parsers')
+        parser_files = glob.glob(os.path.join(parsers_dir, '*_parser.py'))
+        available_parsers = []
+        
+        for parser_file in parser_files:
+            filename = os.path.basename(parser_file)
+            if filename != 'base_parser.py':  # Skip the base parser
+                parser_name = filename.replace('_parser.py', '')
+                available_parsers.append(parser_name)
+        
+        # Get publishers from database (papers that have publisher detected)
+        publisher_query = db.session.query(
+            PaperMetadata.publisher,
+            db.func.count(PaperMetadata.id).label('paper_count')
+        ).filter(
+            PaperMetadata.publisher.isnot(None),
+            PaperMetadata.publisher != ''
+        ).group_by(PaperMetadata.publisher).all()
+        
+        publishers_data = []
+        for publisher, count in publisher_query:
+            # Check if a parser exists for this publisher
+            has_parser = publisher in available_parsers
+            
+            publishers_data.append({
+                'name': publisher,
+                'paper_count': count,
+                'has_parser': has_parser,
+                'parser_status': 'available' if has_parser else 'missing'
+            })
+        
+        # Sort by paper count descending
+        publishers_data.sort(key=lambda x: x['paper_count'], reverse=True)
+        
+        # Get totals
+        total_papers_with_publisher = sum(p['paper_count'] for p in publishers_data)
+        total_papers_without_publisher = PaperMetadata.query.filter(
+            db.or_(PaperMetadata.publisher.is_(None), PaperMetadata.publisher == '')
+        ).count()
+        
+        return jsonify({
+            'success': True,
+            'data': {
+                'publishers': publishers_data,
+                'available_parsers': available_parsers,
+                'stats': {
+                    'total_publishers': len(publishers_data),
+                    'publishers_with_parsers': len([p for p in publishers_data if p['has_parser']]),
+                    'publishers_without_parsers': len([p for p in publishers_data if not p['has_parser']]),
+                    'total_papers_with_publisher': total_papers_with_publisher,
+                    'total_papers_without_publisher': total_papers_without_publisher
+                }
+            }
+        })
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting publisher data: {str(e)}'
+        }), 500
--- a/scipaperloader/blueprints/upload.py
+++ b/scipaperloader/blueprints/upload.py
@ -2,8 +2,11 @@
 import codecs
 import csv
 import datetime
-from io import StringIO
+import traceback
+from io import StringIO, BytesIO
 import json
+import uuid
+from typing import Dict, Any

 import pandas as pd
 from flask import (
@ -21,7 +24,6 @@ from flask import (

 from ..db import db
 from ..models import PaperMetadata, ActivityLog
-from ..celery import celery  # Import the celery instance directly
 from ..defaults import DUPLICATE_STRATEGIES

 bp = Blueprint("upload", __name__)
@ -29,6 +31,10 @@ bp = Blueprint("upload", __name__)
 REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
 CHUNK_SIZE = 100  # Number of rows to process per batch

+# Store task progress in memory (for simplicity)
+# In production, you might want to use Redis or database
+task_progress = {}
+
 def parse_date(date_str):
    """Parse date string into datetime object."""
    if not date_str or pd.isna(date_str):
@ -38,6 +44,76 @@ def parse_date(date_str):
    except ValueError:
        return None

+def _process_csv_background(task_id: str, file_content: str, delimiter: str, duplicate_strategy: str):
+    """Background function to process CSV file using APScheduler."""
+    print(f"DEBUG: _process_csv_background called with task_id: {task_id}")
+    
+    # Get Flask app for context
+    from flask import current_app
+    
+    # Get the Flask app from the scheduler context
+    from ..scheduler import _get_flask_app
+    app = _get_flask_app()
+    
+    print(f"DEBUG: Flask app obtained: {app}")
+    
+    if not app:
+        # Fallback: try to get current_app
+        try:
+            app = current_app
+            print(f"DEBUG: Using current_app: {app}")
+        except RuntimeError as e:
+            print(f"DEBUG: Failed to get current_app: {e}")
+            task_progress[task_id] = {
+                "state": "FAILURE",
+                "progress": 0,
+                "error": "Flask app context not available"
+            }
+            return
+    
+    with app.app_context():
+        try:
+            print(f"DEBUG: Inside app context, starting CSV processing for task {task_id}")
+            
+            # Initialize progress
+            task_progress[task_id] = {
+                "state": "PROGRESS",
+                "progress": 0,
+                "message": "Starting CSV processing..."
+            }
+            
+            result = process_csv(file_content, delimiter, duplicate_strategy, task_id)
+            
+            print(f"DEBUG: CSV processing completed for task {task_id}, result: {result}")
+            
+            # Mark as completed
+            task_progress[task_id] = {
+                "state": "SUCCESS",
+                "progress": 100,
+                "result": result
+            }
+            
+        except Exception as e:
+            print(f"DEBUG: Exception in _process_csv_background: {e}")
+            import traceback
+            traceback.print_exc()
+            
+            # Mark as failed
+            task_progress[task_id] = {
+                "state": "FAILURE",
+                "progress": 0,
+                "error": str(e)
+            }
+            
+            try:
+                ActivityLog.log_error(
+                    error_message=f"Background CSV processing failed: {str(e)}",
+                    source="upload._process_csv_background"
+                )
+            except Exception:
+                # If logging fails, just print the error
+                print(f"Background CSV processing failed: {str(e)}")
+
@bp.route("/", methods=["GET", "POST"])
 def upload():
    if request.method == "POST":
@ -51,23 +127,75 @@ def upload():
        stream = codecs.iterdecode(file.stream, "utf-8")
        content = "".join(stream)

-        # Trigger the Celery task
-        task = process_csv.delay(content, delimiter, duplicate_strategy)
+        # Generate task ID
+        task_id = str(uuid.uuid4())
        
-        return jsonify({"task_id": task.id})
+        # Get the APScheduler instance from the global variable
+        from ..scheduler import _scheduler
+        if not _scheduler:
+            return jsonify({"error": "APScheduler not initialized."})
+            
+        if not _scheduler.running:
+            return jsonify({"error": "APScheduler not running."})
+            
+        # Initialize task progress immediately
+        task_progress[task_id] = {
+            "state": "PENDING",
+            "progress": 0,
+            "message": "Task queued for processing..."
+        }
+            
+        # Schedule background task
+        job_id = f"csv_upload_{task_id}"
+        # Use UTC time to match APScheduler's timezone configuration
+        run_time = datetime.datetime.utcnow() + datetime.timedelta(seconds=1)  # Start in 1 second
+        
+        try:
+            _scheduler.add_job(
+                func=_process_csv_background,
+                trigger='date',
+                run_date=run_time,
+                args=[task_id, content, delimiter, duplicate_strategy],
+                id=job_id,
+                name=f"CSV Upload {task_id}",
+                replace_existing=True
+            )
+            
+            ActivityLog.log_import_activity(
+                action="schedule_csv_upload",
+                status="info",
+                description=f"Scheduled CSV upload task {task_id}",
+                task_id=task_id
+            )
+            
+        except Exception as e:
+            task_progress[task_id] = {
+                "state": "FAILURE",
+                "progress": 0,
+                "error": f"Failed to schedule task: {str(e)}"
+            }
+            return jsonify({"error": f"Failed to schedule background task: {str(e)}"})
+        
+        return jsonify({"task_id": task_id})

    return render_template("upload.html.jinja", duplicate_strategies=DUPLICATE_STRATEGIES)

-@celery.task(bind=True)
-def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
+def process_csv(file_content, delimiter, duplicate_strategy="skip", task_id=None):
    """Process CSV file and import paper metadata."""

-    # With the ContextTask in place, we're already inside an app context
    added_count = skipped_count = updated_count = error_count = 0
    errors = []
    skipped_records = []  # Add this to track skipped records

    try:
+        # Update task progress if provided
+        if task_id:
+            task_progress[task_id] = {
+                "state": "PROGRESS",
+                "progress": 10,
+                "message": "Starting CSV import..."
+            }
+        
        # Log the start of import using ActivityLog model
        ActivityLog.log_import_activity(
            action="start_csv_import",
@ -77,9 +205,6 @@ def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
            delimiter=delimiter
        )

-        # Set initial progress percentage
-        self.update_state(state='PROGRESS', meta={'progress': 10})
-
        # Read CSV into chunks
        csv_buffer = StringIO(file_content)
        # Count total chunks
@ -116,16 +241,16 @@ def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
                            skipped_count += 1
                            continue
                    else:
-                        metadata = PaperMetadata(
-                            title=row["title"],
-                            doi=doi,
-                            alt_id=row.get("alternative_id"),
-                            issn=row["issn"],
+                        paper = PaperMetadata(
+                            title=row.get("title"),
+                            doi=row.get("doi"),
+                            alt_id=row.get("alt_id") or row.get("alternative_id"),  # Handle both column names
+                            issn=row.get("issn"),
                            journal=row.get("journal"),
                            published_online=parse_date(row.get("published_online")),
-                            status="New",
+                            status="New"
                        )
-                        db.session.add(metadata)
+                        db.session.add(paper)
                        added_count += 1
                except Exception as e:
                    error_count += 1
@ -134,6 +259,15 @@ def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
            # Commit the chunk and roll session fresh
            db.session.commit()

+            # Update progress
+            if task_id:
+                progress = min(90, 10 + int((chunk_idx + 1) * 80 / total_chunks))
+                task_progress[task_id] = {
+                    "state": "PROGRESS",
+                    "progress": progress,
+                    "message": f"Processed {chunk_idx+1}/{total_chunks} chunks"
+                }
+
            # Log periodic progress every 5 chunks
            if (chunk_idx + 1) % 5 == 0:
                ActivityLog.log_import_activity(
@ -148,11 +282,14 @@ def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
                    }
                )

-            progress = min(90, 10 + int((chunk_idx + 1) * 80 / total_chunks))
-            self.update_state(state='PROGRESS', meta={'progress': progress})
-
        # Final progress update and completion log
-        self.update_state(state='PROGRESS', meta={'progress': 100})
+        if task_id:
+            task_progress[task_id] = {
+                "state": "PROGRESS",
+                "progress": 100,
+                "message": "Finalizing import..."
+            }
+            
        ActivityLog.log_import_activity(
            action="complete_csv_import",
            status="success",
@ -167,6 +304,12 @@ def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):

    except Exception as e:
        db.session.rollback()
+        if task_id:
+            task_progress[task_id] = {
+                "state": "FAILURE",
+                "progress": 0,
+                "error": str(e)
+            }
        ActivityLog.log_error(
            error_message="CSV import failed",
            exception=e,
@ -189,7 +332,7 @@ def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
                status="error",
                description=f"Import completed with {error_count} errors",
                error_csv=error_csv.getvalue(),
-                task_id=self.request.id,
+                task_id=task_id,
                error_count=error_count
            )
        except Exception:
@ -204,41 +347,23 @@ def process_csv(self, file_content, delimiter, duplicate_strategy="skip"):
        "skipped_records": skipped_records[:5],  # Include up to 5 examples
        "skipped_reason_summary": "Records were skipped because they already exist in the database. Use 'update' strategy to update them.",
        "errors": errors[:5],
-        "error_count": error_count,
-        "task_id": self.request.id
+        "error_count": error_count
    }
    
@bp.route("/task_status/<task_id>")
 def task_status(task_id):
    """Get status of background task."""
-    task = celery.AsyncResult(task_id)
+    progress_data = task_progress.get(task_id)
+    if not progress_data:
+        return jsonify({"error": "Task not found."})
    
-    if task.state == "PENDING":
-        response = {"state": task.state, "progress": 0}
-    elif task.state == "PROGRESS":
-        response = {
-            "state": task.state, 
-            "progress": task.info.get("progress", 0)
-        }
-    elif task.state == "SUCCESS":
-        response = {
-            "state": task.state,
-            "result": task.result
-        }
-    else:  # FAILURE, REVOKED, etc.
-        response = {
-            "state": task.state,
-            "error": str(task.info) if task.info else "Unknown error"
-        }
-    
-    return jsonify(response)
+    return jsonify(progress_data)

@bp.route("/download_error_log/<task_id>")
 def download_error_log(task_id):
    # Find the most recent error log for this task
    error_log = ActivityLog.query.filter(
-        ActivityLog.action == "import_errors",
-        ActivityLog.extra_data.like(f'%"{task_id}"%')  # Search in JSON
+        ActivityLog.action == "import_errors"
    ).order_by(ActivityLog.timestamp.desc()).first()
    
    if not error_log:
@ -255,7 +380,7 @@ def download_error_log(task_id):
        
    buffer = StringIO(error_csv)
    return send_file(
-        buffer,
+        BytesIO(buffer.getvalue().encode()),  # Corrected to use BytesIO
        mimetype="text/csv",
        as_attachment=True,
        download_name=f"upload_errors_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
--- a/scipaperloader/celery.py
+++ b/scipaperloader/celery.py
@ -1,52 +0,0 @@
-from celery import Celery
-from celery.schedules import crontab
-
-# Create Celery instance without Flask app initially
-celery = Celery(
-    'scipaperloader',
-    broker='redis://localhost:6379/0',
-    backend='redis://localhost:6379/0',
-)
-
-def configure_celery(app=None):
-    """Configure Celery with the Flask app settings and ensure tasks run in the app context."""
-    if app is None:
-        # Import here to avoid circular import
-        from scipaperloader import create_app
-        app = create_app()
-
-    # Update Celery configuration using the app settings
-    celery.conf.update(
-        broker_url=app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0'),
-        result_backend=app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0'),
-        task_serializer='json',
-        accept_content=['json'],
-        result_serializer='json',
-        timezone='UTC',
-        enable_utc=True,
-        task_time_limit=3600,         # 1 hour max runtime
-        task_soft_time_limit=3000,      # 50 minutes soft limit
-        worker_max_tasks_per_child=10,  # Restart workers after 10 tasks
-        worker_max_memory_per_child=1000000,  # 1GB memory limit
-        task_acks_late=True,            # Acknowledge tasks after completion
-        task_reject_on_worker_lost=True,  # Requeue tasks if worker dies
-        # Configure Beat schedule for periodic tasks
-        beat_schedule={
-            'hourly-scraper-scheduler': {
-                'task': 'scipaperloader.scrapers.tasks.hourly_scraper_scheduler',
-                'schedule': crontab(minute=0),  # Run at the start of every hour
-                'options': {'expires': 3600}
-            },
-        }
-    )
-
-    # Create a custom task class that pushes the Flask application context
-    class ContextTask(celery.Task):
-        abstract = True
-
-        def __call__(self, *args, **kwargs):
-            with app.app_context():
-                return self.run(*args, **kwargs)
-
-    celery.Task = ContextTask
-    return celery
--- a/scipaperloader/config.py
+++ b/scipaperloader/config.py
@ -3,7 +3,7 @@ import os

 class Config:
    SECRET_KEY = os.environ.get("SECRET_KEY", "dev")
-    SQLALCHEMY_DATABASE_URI = os.environ.get("DATABASE_URL", "sqlite:///papers.db")
+    SQLALCHEMY_DATABASE_URI = os.environ.get("DATABASE_URL", "sqlite:///instance/papers.db")
    SQLALCHEMY_TRACK_MODIFICATIONS = False
    APP_TITLE = os.environ.get("APP_TITLE", "SciPaperLoader")
    SCRAPER_MODULE = os.environ.get("SCRAPER_MODULE", "dummy")
--- a/scipaperloader/models.py
+++ b/scipaperloader/models.py
@ -191,6 +191,7 @@ class PaperMetadata(db.Model):
    type = db.Column(db.String(50))
    language = db.Column(db.String(50))
    published_online = db.Column(db.Date)  # or DateTime/String
+    publisher = db.Column(db.String(100), nullable=True)  # Detected publisher name
    status = db.Column(db.String(10))  # 'Pending','Done','Failed'
    previous_status = db.Column(db.String(10), nullable=True)  # Store previous status for reversion
    file_path = db.Column(db.Text)
@ -342,6 +343,41 @@ class ScraperModuleConfig(db.Model):
        db.session.commit()
        return config

+
+class TimezoneConfig(db.Model):
+    """Model to store the configured timezone for the scheduler."""
+    id = db.Column(db.Integer, primary_key=True)
+    timezone = db.Column(db.String(50), default="Europe/Berlin")
+    
+    @classmethod
+    def get_current_timezone(cls):
+        """Get the currently configured timezone."""
+        config = cls.query.first()
+        if not config:
+            config = cls(timezone="Europe/Berlin")
+            db.session.add(config)
+            db.session.commit()
+        return config.timezone
+    
+    @classmethod
+    def set_timezone(cls, timezone_name):
+        """Set the timezone configuration."""
+        config = cls.query.first()
+        if not config:
+            config = cls(timezone=timezone_name)
+            db.session.add(config)
+        else:
+            old_value = config.timezone
+            config.timezone = timezone_name
+            ActivityLog.log_config_change(
+                config_key="scheduler_timezone",
+                old_value=old_value,
+                new_value=timezone_name,
+                description="Updated scheduler timezone configuration"
+            )
+        db.session.commit()
+        return config
+
 def init_schedule_config():
    """Initialize ScheduleConfig with default values if empty"""
    if ScheduleConfig.query.count() == 0:
@ -379,3 +415,9 @@ def init_schedule_config():
        default_path = DownloadPathConfig(path="/path/to/dummy/papers")
        db.session.add(default_path)
        db.session.commit()
+    
+    # Initialize TimezoneConfig if it doesn't exist
+    if TimezoneConfig.query.count() == 0:
+        default_timezone = TimezoneConfig(timezone="Europe/Berlin")
+        db.session.add(default_timezone)
+        db.session.commit()
--- a/scipaperloader/parsers/init.py
+++ b/scipaperloader/parsers/init.py
@ -0,0 +1,6 @@
+# Parser modules for extracting full text from publisher-specific HTML content
+from .base_parser import BaseParser, ParsedContent, ParseError
+from .elsevier_parser import ElsevierParser
+from .arxiv_parser import ArxivParser
+
+__all__ = ['BaseParser', 'ParsedContent', 'ParseError', 'ElsevierParser', 'ArxivParser']
--- a/scipaperloader/parsers/arxiv_parser.py
+++ b/scipaperloader/parsers/arxiv_parser.py
@ -0,0 +1,227 @@
+import re
+from bs4 import BeautifulSoup
+from typing import Dict, Optional, List
+from .base_parser import BaseParser, ParsedContent, ParseError
+
+class ArxivParser(BaseParser):
+    """Parser for arXiv papers."""
+    
+    def can_parse(self, html_content: str, url: Optional[str] = None) -> bool:
+        """Check if this is an arXiv page."""
+        html_lower = html_content.lower()
+        
+        # Check for arXiv indicators
+        indicators = [
+            'arxiv.org',
+            'export.arxiv.org',
+            'arxiv:',
+            'meta name="citation_publisher" content="arxiv"',
+        ]
+        
+        return any(indicator in html_lower for indicator in indicators)
+    
+    def parse(self, html_content: str, doi: Optional[str] = None) -> ParsedContent:
+        """Parse arXiv HTML content."""
+        try:
+            soup = BeautifulSoup(html_content, 'html.parser')
+            
+            # Extract title
+            title = self._extract_title(soup)
+            
+            # Extract abstract
+            abstract = self._extract_abstract(soup)
+            
+            # Extract authors
+            authors = self._extract_authors(soup)
+            
+            # Extract full text (arXiv usually just has abstract on the HTML page)
+            full_text = self._extract_full_text(soup, abstract)
+            
+            # Extract keywords/subjects
+            keywords = self._extract_subjects(soup)
+            
+            # Extract arxiv ID
+            arxiv_id = self._extract_arxiv_id(soup)
+            
+            if not full_text or len(full_text.strip()) < 50:
+                raise ParseError("Could not extract meaningful content from arXiv page")
+            
+            return ParsedContent(
+                full_text=full_text,
+                title=title,
+                abstract=abstract,
+                authors=authors,
+                keywords=keywords,
+                sections=None,  # arXiv HTML pages don't usually have full sections
+                references=None,  # References are typically in the PDF
+                doi=doi,
+                journal="arXiv",
+                publication_date=self._extract_submission_date(soup),
+                metadata={
+                    'parser': 'arxiv',
+                    'arxiv_id': arxiv_id,
+                    'source': 'arxiv.org'
+                }
+            )
+            
+        except Exception as e:
+            raise ParseError(f"Failed to parse arXiv content: {str(e)}")
+    
+    def _extract_title(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract paper title."""
+        # Try multiple title selectors for arXiv
+        selectors = [
+            'h1.title',
+            'meta[name="citation_title"]',
+            'title'
+        ]
+        
+        for selector in selectors:
+            if 'meta' in selector:
+                element = soup.find('meta', attrs={'name': 'citation_title'})
+                if element:
+                    return element.get('content', '').strip()
+            else:
+                element = soup.select_one(selector)
+                if element:
+                    text = element.get_text(strip=True)
+                    # Remove "Title:" prefix if present
+                    text = re.sub(r'^Title:\s*', '', text)
+                    return text
+        
+        return None
+    
+    def _extract_abstract(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract paper abstract."""
+        # arXiv abstract selectors
+        selectors = [
+            'blockquote.abstract',
+            'div.abstract',
+            'meta[name="citation_abstract"]'
+        ]
+        
+        for selector in selectors:
+            if 'meta' in selector:
+                element = soup.find('meta', attrs={'name': 'citation_abstract'})
+                if element:
+                    return element.get('content', '').strip()
+            else:
+                element = soup.select_one(selector)
+                if element:
+                    text = element.get_text(strip=True)
+                    # Remove "Abstract:" prefix if present
+                    text = re.sub(r'^Abstract:\s*', '', text)
+                    return text
+        
+        return None
+    
+    def _extract_authors(self, soup: BeautifulSoup) -> Optional[List[str]]:
+        """Extract author names."""
+        authors = []
+        
+        # Try author meta tags
+        author_metas = soup.find_all('meta', attrs={'name': 'citation_author'})
+        if author_metas:
+            authors = [meta.get('content', '').strip() for meta in author_metas]
+        
+        # Try arXiv author div
+        if not authors:
+            authors_div = soup.select_one('div.authors')
+            if authors_div:
+                # Extract author links or text
+                author_links = authors_div.find_all('a')
+                if author_links:
+                    authors = [link.get_text(strip=True) for link in author_links]
+                else:
+                    # Fallback to text parsing
+                    text = authors_div.get_text()
+                    # Remove "Authors:" prefix and split by commas
+                    text = re.sub(r'^Authors?:\s*', '', text)
+                    authors = [author.strip() for author in text.split(',')]
+        
+        return authors if authors else None
+    
+    def _extract_full_text(self, soup: BeautifulSoup, abstract: Optional[str] = None) -> str:
+        """Extract main content (usually just abstract for arXiv HTML pages)."""
+        content_parts = []
+        
+        # For arXiv, the HTML page typically only contains abstract and metadata
+        # The full text is in the PDF
+        
+        if abstract:
+            content_parts.append(f"Abstract\n{abstract}")
+        
+        # Look for any additional content sections
+        comments_section = soup.select_one('td.comments')
+        if comments_section:
+            comments = comments_section.get_text(strip=True)
+            if comments:
+                content_parts.append(f"Comments\n{comments}")
+        
+        # Add note about PDF availability
+        content_parts.append(
+            "\nNote: This is the abstract and metadata from the arXiv HTML page. "
+            "The full text is available in the PDF version."
+        )
+        
+        return '\n\n'.join(content_parts)
+    
+    def _extract_subjects(self, soup: BeautifulSoup) -> Optional[List[str]]:
+        """Extract subject classifications."""
+        subjects = []
+        
+        # Look for subject classification
+        subjects_td = soup.select_one('td.subjects')
+        if subjects_td:
+            subjects_text = subjects_td.get_text(strip=True)
+            # Parse subjects (format: "Primary: subject1; Secondary: subject2")
+            subjects = [subj.strip() for subj in re.split(r'[;,]', subjects_text)]
+            # Clean up prefixes
+            subjects = [re.sub(r'^(Primary|Secondary):\s*', '', subj) for subj in subjects]
+            subjects = [subj for subj in subjects if subj]  # Remove empty strings
+        
+        return subjects if subjects else None
+    
+    def _extract_arxiv_id(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract arXiv ID."""
+        # Look for arXiv ID in various places
+        arxiv_id_patterns = [
+            r'arXiv:(\d+\.\d+(?:v\d+)?)',
+            r'(\d{4}\.\d{4,5}(?:v\d+)?)',
+        ]
+        
+        # Search in page text
+        page_text = soup.get_text()
+        for pattern in arxiv_id_patterns:
+            match = re.search(pattern, page_text)
+            if match:
+                return match.group(1)
+        
+        # Search in URL or meta tags
+        canonical_link = soup.find('link', attrs={'rel': 'canonical'})
+        if canonical_link:
+            href = canonical_link.get('href', '')
+            for pattern in arxiv_id_patterns:
+                match = re.search(pattern, href)
+                if match:
+                    return match.group(1)
+        
+        return None
+    
+    def _extract_submission_date(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract submission date."""
+        # Look for submission date
+        submission_td = soup.select_one('td.submission-history')
+        if submission_td:
+            date_text = submission_td.get_text()
+            # Extract date (format varies)
+            date_match = re.search(r'(\d{1,2}\s+\w+\s+\d{4})', date_text)
+            if date_match:
+                return date_match.group(1)
+        
+        # Try meta tag
+        date_meta = soup.find('meta', attrs={'name': 'citation_date'})
+        if date_meta:
+            return date_meta.get('content', '').strip()
+        
+        return None
--- a/scipaperloader/parsers/base_parser.py
+++ b/scipaperloader/parsers/base_parser.py
@ -0,0 +1,83 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Optional, List
+from dataclasses import dataclass
+
+@dataclass
+class ParsedContent:
+    """Container for parsed content from a publisher's HTML."""
+    full_text: str
+    title: Optional[str] = None
+    abstract: Optional[str] = None
+    authors: Optional[List[str]] = None
+    keywords: Optional[List[str]] = None
+    sections: Optional[Dict[str, str]] = None  # section_title -> section_content
+    references: Optional[List[str]] = None
+    doi: Optional[str] = None
+    journal: Optional[str] = None
+    publication_date: Optional[str] = None
+    metadata: Optional[Dict] = None  # Additional metadata specific to publisher
+
+class BaseParser(ABC):
+    """Base class for all publisher-specific parsers."""
+    
+    def __init__(self):
+        self.parser_name = self.__class__.__name__.lower().replace('parser', '')
+    
+    @abstractmethod
+    def can_parse(self, html_content: str, url: Optional[str] = None) -> bool:
+        """
+        Check if this parser can handle the given HTML content.
+        
+        Args:
+            html_content: The HTML content to check
+            url: Optional URL of the content (for additional context)
+            
+        Returns:
+            True if this parser can handle the content, False otherwise
+        """
+        pass
+    
+    @abstractmethod
+    def parse(self, html_content: str, doi: Optional[str] = None) -> ParsedContent:
+        """
+        Parse HTML content and extract structured information.
+        
+        Args:
+            html_content: The HTML content to parse
+            doi: Optional DOI of the paper
+            
+        Returns:
+            ParsedContent object with extracted information
+            
+        Raises:
+            ParseError: If parsing fails
+        """
+        pass
+    
+    def get_name(self) -> str:
+        """Return the name of this parser."""
+        return self.parser_name
+    
+    def get_description(self) -> str:
+        """Return a description of this parser."""
+        return getattr(self.__class__, "__doc__", "No description available")
+    
+    def validate_content(self, content: ParsedContent) -> bool:
+        """
+        Validate the parsed content to ensure it meets minimum requirements.
+        
+        Args:
+            content: The parsed content to validate
+            
+        Returns:
+            True if content is valid, False otherwise
+        """
+        # Basic validation - must have some full text
+        if not content.full_text or len(content.full_text.strip()) < 100:
+            return False
+        
+        return True
+
+class ParseError(Exception):
+    """Exception raised when parsing fails."""
+    pass
--- a/scipaperloader/parsers/elsevier_parser.py
+++ b/scipaperloader/parsers/elsevier_parser.py
@ -0,0 +1,252 @@
+import re
+from bs4 import BeautifulSoup
+from typing import Dict, Optional, List
+from .base_parser import BaseParser, ParsedContent, ParseError
+
+class ElsevierParser(BaseParser):
+    """Parser for Elsevier/ScienceDirect articles."""
+    
+    def can_parse(self, html_content: str, url: Optional[str] = None) -> bool:
+        """Check if this is an Elsevier/ScienceDirect page."""
+        html_lower = html_content.lower()
+        
+        # Check for Elsevier/ScienceDirect indicators
+        indicators = [
+            'sciencedirect.com',
+            'elsevier.com',
+            'meta name="citation_publisher" content="elsevier"',
+            'copyright.*elsevier',
+            'sciencedirect',
+        ]
+        
+        return any(indicator in html_lower for indicator in indicators)
+    
+    def parse(self, html_content: str, doi: Optional[str] = None) -> ParsedContent:
+        """Parse Elsevier/ScienceDirect HTML content."""
+        try:
+            soup = BeautifulSoup(html_content, 'html.parser')
+            
+            # Extract title
+            title = self._extract_title(soup)
+            
+            # Extract abstract
+            abstract = self._extract_abstract(soup)
+            
+            # Extract authors
+            authors = self._extract_authors(soup)
+            
+            # Extract full text
+            full_text = self._extract_full_text(soup)
+            
+            # Extract sections
+            sections = self._extract_sections(soup)
+            
+            # Extract keywords
+            keywords = self._extract_keywords(soup)
+            
+            # Extract references
+            references = self._extract_references(soup)
+            
+            # Extract journal info
+            journal = self._extract_journal(soup)
+            
+            # Extract publication date
+            publication_date = self._extract_publication_date(soup)
+            
+            # Combine everything into full text if sections exist
+            if sections:
+                full_text = self._combine_sections(sections, abstract)
+            
+            if not full_text or len(full_text.strip()) < 100:
+                raise ParseError("Could not extract meaningful full text content")
+            
+            return ParsedContent(
+                full_text=full_text,
+                title=title,
+                abstract=abstract,
+                authors=authors,
+                keywords=keywords,
+                sections=sections,
+                references=references,
+                doi=doi,
+                journal=journal,
+                publication_date=publication_date,
+                metadata={
+                    'parser': 'elsevier',
+                    'source': 'sciencedirect'
+                }
+            )
+            
+        except Exception as e:
+            raise ParseError(f"Failed to parse Elsevier content: {str(e)}")
+    
+    def _extract_title(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract article title."""
+        # Try multiple title selectors
+        selectors = [
+            'h1.title-text',
+            'h1[data-testid="title"]',
+            'h1.article-title',
+            'meta[name="citation_title"]',
+            'title'
+        ]
+        
+        for selector in selectors:
+            if 'meta' in selector:
+                element = soup.find('meta', attrs={'name': 'citation_title'})
+                if element:
+                    return element.get('content', '').strip()
+            else:
+                element = soup.select_one(selector)
+                if element:
+                    return element.get_text(strip=True)
+        
+        return None
+    
+    def _extract_abstract(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract article abstract."""
+        selectors = [
+            'div.abstract-content',
+            'div[data-testid="abstract"]',
+            'div.abstract',
+            'section.abstract',
+            'div#abstract'
+        ]
+        
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if element:
+                return element.get_text(strip=True)
+        
+        return None
+    
+    def _extract_authors(self, soup: BeautifulSoup) -> Optional[List[str]]:
+        """Extract author names."""
+        authors = []
+        
+        # Try author meta tags
+        author_metas = soup.find_all('meta', attrs={'name': 'citation_author'})
+        if author_metas:
+            authors = [meta.get('content', '').strip() for meta in author_metas]
+        
+        # Try author div/span elements
+        if not authors:
+            author_elements = soup.select('div.author a, span.author, .author-name')
+            authors = [elem.get_text(strip=True) for elem in author_elements]
+        
+        return authors if authors else None
+    
+    def _extract_full_text(self, soup: BeautifulSoup) -> str:
+        """Extract main article content."""
+        content_parts = []
+        
+        # Try main content selectors
+        main_selectors = [
+            'div.article-content',
+            'div.body-content',
+            'main.article-body',
+            'div[data-testid="article-body"]',
+            'section.article-section'
+        ]
+        
+        for selector in main_selectors:
+            elements = soup.select(selector)
+            for element in elements:
+                # Remove script, style, and navigation elements
+                for unwanted in element.find_all(['script', 'style', 'nav', 'footer', 'header']):
+                    unwanted.decompose()
+                
+                text = element.get_text(separator='\n', strip=True)
+                if text and len(text) > 50:  # Only add substantial content
+                    content_parts.append(text)
+        
+        return '\n\n'.join(content_parts)
+    
+    def _extract_sections(self, soup: BeautifulSoup) -> Optional[Dict[str, str]]:
+        """Extract article sections with headings."""
+        sections = {}
+        
+        # Look for section headings and content
+        section_elements = soup.find_all(['h2', 'h3', 'h4'], class_=re.compile(r'section|heading'))
+        
+        for heading in section_elements:
+            section_title = heading.get_text(strip=True)
+            
+            # Find content after this heading until next heading
+            content_parts = []
+            current = heading.next_sibling
+            
+            while current and current.name not in ['h1', 'h2', 'h3', 'h4']:
+                if hasattr(current, 'get_text'):
+                    text = current.get_text(strip=True)
+                    if text:
+                        content_parts.append(text)
+                current = current.next_sibling
+            
+            if content_parts:
+                sections[section_title] = '\n'.join(content_parts)
+        
+        return sections if sections else None
+    
+    def _extract_keywords(self, soup: BeautifulSoup) -> Optional[List[str]]:
+        """Extract article keywords."""
+        keywords = []
+        
+        # Try keyword meta tags
+        keyword_metas = soup.find_all('meta', attrs={'name': 'citation_keywords'})
+        if keyword_metas:
+            for meta in keyword_metas:
+                content = meta.get('content', '')
+                if content:
+                    keywords.extend([kw.strip() for kw in content.split(',')])
+        
+        # Try keyword sections
+        if not keywords:
+            keyword_sections = soup.select('div.keywords, section.keywords')
+            for section in keyword_sections:
+                text = section.get_text()
+                keywords.extend([kw.strip() for kw in text.split(',') if kw.strip()])
+        
+        return keywords if keywords else None
+    
+    def _extract_references(self, soup: BeautifulSoup) -> Optional[List[str]]:
+        """Extract references."""
+        references = []
+        
+        ref_sections = soup.select('section.references, div.references, ol.references li')
+        for section in ref_sections:
+            if section.name == 'li':
+                references.append(section.get_text(strip=True))
+            else:
+                ref_items = section.find_all(['li', 'div'], class_=re.compile(r'reference'))
+                references.extend([item.get_text(strip=True) for item in ref_items])
+        
+        return references if references else None
+    
+    def _extract_journal(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract journal name."""
+        journal_meta = soup.find('meta', attrs={'name': 'citation_journal_title'})
+        if journal_meta:
+            return journal_meta.get('content', '').strip()
+        
+        return None
+    
+    def _extract_publication_date(self, soup: BeautifulSoup) -> Optional[str]:
+        """Extract publication date."""
+        date_meta = soup.find('meta', attrs={'name': 'citation_publication_date'})
+        if date_meta:
+            return date_meta.get('content', '').strip()
+        
+        return None
+    
+    def _combine_sections(self, sections: Dict[str, str], abstract: Optional[str] = None) -> str:
+        """Combine all sections into full text."""
+        full_text_parts = []
+        
+        if abstract:
+            full_text_parts.append(f"Abstract\n{abstract}")
+        
+        for section_title, section_content in sections.items():
+            full_text_parts.append(f"{section_title}\n{section_content}")
+        
+        return '\n\n'.join(full_text_parts)
--- a/scipaperloader/scheduler.py
+++ b/scipaperloader/scheduler.py
@ -0,0 +1,593 @@
+"""
+APScheduler-based scheduling system to replace complex Celery delayed task management.
+This provides clean job scheduling and revocation without manual Redis manipulation.
+"""
+
+import random
+import logging
+from datetime import datetime, timedelta
+from typing import Optional, List
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
+from apscheduler.executors.pool import ThreadPoolExecutor
+from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR, EVENT_JOB_MISSED
+from apscheduler.jobstores.base import JobLookupError
+
+# Configure APScheduler logging
+logging.getLogger('apscheduler').setLevel(logging.WARNING)
+
+# Global scheduler instance
+_scheduler = None
+_flask_app = None
+
+
+def _get_flask_app():
+    """Get the Flask app instance."""
+    global _flask_app
+    if _flask_app:
+        return _flask_app
+    
+    try:
+        from flask import current_app
+        return current_app
+    except RuntimeError:
+        return None
+
+
+def _hourly_scraper_scheduler():
+    """Standalone function for hourly scheduling logic."""
+    app = _get_flask_app()
+    if not app:
+        return
+        
+    with app.app_context():
+        try:
+            from .models import ScraperState, ActivityLog
+            
+            # Check if scraper is active
+            scraper_state = ScraperState.get_current_state()
+            if not scraper_state.is_active:
+                ActivityLog.log_scraper_activity(
+                    action="hourly_scheduler_apscheduler",
+                    status="info",
+                    description="Hourly scheduler skipped - scraper not active"
+                )
+                return {"status": "inactive", "papers_scheduled": 0}
+            
+            if scraper_state.is_paused:
+                ActivityLog.log_scraper_activity(
+                    action="hourly_scheduler_apscheduler",
+                    status="info",
+                    description="Hourly scheduler skipped - scraper paused"
+                )
+                return {"status": "paused", "papers_scheduled": 0}
+            
+            # Get papers to process this hour
+            from .scrapers.manager import ScraperManager
+            manager = ScraperManager()
+            papers = manager.select_papers_for_processing()
+            
+            if not papers:
+                ActivityLog.log_scraper_activity(
+                    action="hourly_scheduler_apscheduler",
+                    status="info",
+                    description="No papers available for processing this hour"
+                )
+                return {"status": "empty", "papers_scheduled": 0}
+            
+            # Schedule papers at random times within the hour
+            scheduled_count = 0
+            current_time = datetime.now()
+            scheduled_papers = []
+            
+            for paper in papers:
+                # Random delay between 1 second and 58 minutes
+                delay_seconds = random.randint(1, 3480)  # Up to 58 minutes
+                run_time = current_time + timedelta(seconds=delay_seconds)
+                
+                # Schedule the individual paper processing job with unique ID
+                # Include microseconds and random suffix to prevent collisions
+                import uuid
+                job_id = f"process_paper_{paper.id}_{int(current_time.timestamp())}_{uuid.uuid4().hex[:8]}"
+                
+                global _scheduler
+                if _scheduler:
+                    _scheduler.add_job(
+                        func=_process_single_paper,
+                        trigger='date',
+                        run_date=run_time,
+                        args=[paper.id],
+                        id=job_id,
+                        replace_existing=True,  # Changed to True to handle conflicts gracefully
+                        name=f"Process Paper {paper.doi}"
+                    )
+                
+                scheduled_count += 1
+                
+                # Collect paper info for single log entry
+                paper_info = {
+                    "paper_id": paper.id,
+                    "paper_doi": paper.doi,
+                    "job_id": job_id,
+                    "scheduled_time": run_time.isoformat(),
+                    "delay_seconds": delay_seconds
+                }
+                scheduled_papers.append(paper_info)
+            
+            # Create single comprehensive log entry with JSON data
+            try:
+                import json
+                from .models import ActivityLog
+                
+                scheduling_data = {
+                    "total_scheduled": scheduled_count,
+                    "scheduled_papers": scheduled_papers,
+                    "timestamp": datetime.now().isoformat(),
+                    "hour_range": f"{current_time.strftime('%H:%M')} - {(current_time + timedelta(hours=1)).strftime('%H:%M')}"
+                }
+                
+                ActivityLog.log_scraper_activity(
+                    action="hourly_scheduler_apscheduler",
+                    status="success",
+                    description=f"Scheduled {scheduled_count} papers for random processing within this hour using APScheduler. See extra_data for details.",
+                    **{"scheduling_details": json.dumps(scheduling_data)}
+                )
+            except Exception:
+                # Fallback to simple logging
+                ActivityLog.log_scraper_activity(
+                    action="hourly_scheduler_apscheduler",
+                    status="success",
+                    description=f"Scheduled {scheduled_count} papers for random processing within this hour using APScheduler"
+                )
+            
+            return {"status": "success", "papers_scheduled": scheduled_count}
+            
+        except Exception as e:
+            from .models import ActivityLog
+            ActivityLog.log_error(
+                error_message=f"APScheduler hourly scheduler error: {str(e)}",
+                source="_hourly_scraper_scheduler"
+            )
+            return {"status": "error", "message": str(e)}
+
+
+def _process_single_paper(paper_id: int):
+    """Standalone function to process a single paper."""
+    app = _get_flask_app()
+    if not app:
+        return
+        
+    with app.app_context():
+        try:
+            from .models import ScraperState, ActivityLog, PaperMetadata
+            
+            # Enhanced race condition protection
+            scraper_state = ScraperState.get_current_state()
+            if not scraper_state.is_active:
+                ActivityLog.log_scraper_activity(
+                    action="process_single_paper_apscheduler",
+                    paper_id=paper_id,
+                    status="skipped",
+                    description="Task skipped - scraper not active (APScheduler)"
+                )
+                return {"status": "inactive", "paper_id": paper_id}
+            
+            if scraper_state.is_paused:
+                ActivityLog.log_scraper_activity(
+                    action="process_single_paper_apscheduler",
+                    paper_id=paper_id,
+                    status="skipped",
+                    description="Task skipped - scraper paused (APScheduler)"
+                )
+                return {"status": "paused", "paper_id": paper_id}
+            
+            # Get the paper
+            paper = PaperMetadata.query.get(paper_id)
+            if not paper:
+                return {"status": "error", "message": f"Paper {paper_id} not found"}
+            
+            # Final check before processing
+            scraper_state = ScraperState.get_current_state()
+            if not scraper_state.is_active:
+                ActivityLog.log_scraper_activity(
+                    action="process_single_paper_apscheduler",
+                    paper_id=paper_id,
+                    status="skipped",
+                    description="Task skipped - scraper not active (pre-processing check)"
+                )
+                return {"status": "inactive", "paper_id": paper_id}
+            
+            # Process the paper using scraper manager
+            from .scrapers.manager import ScraperManager
+            manager = ScraperManager()
+            result = manager.process_paper(paper)
+            
+            return result
+            
+        except Exception as e:
+            from .models import ActivityLog
+            ActivityLog.log_error(
+                error_message=f"Error processing paper {paper_id} in APScheduler: {str(e)}",
+                source="_process_single_paper"
+            )
+            return {"status": "error", "paper_id": paper_id, "message": str(e)}
+
+
+def _process_single_paper_manual(paper_id: int, scraper_name: Optional[str] = None):
+    """Standalone function to process a single paper manually (bypasses scraper state checks)."""
+    app = _get_flask_app()
+    if not app:
+        return
+        
+    with app.app_context():
+        try:
+            from .models import ActivityLog, PaperMetadata
+            
+            # Get the paper
+            paper = PaperMetadata.query.get(paper_id)
+            if not paper:
+                return {"status": "error", "message": f"Paper {paper_id} not found"}
+            
+            # Process the paper using manual method (bypasses scraper state checks)
+            from .scrapers.manager import ScraperManager
+            manager = ScraperManager()
+            result = manager.process_paper_manual(paper, scraper_name=scraper_name)
+            
+            return result
+            
+        except Exception as e:
+            from .models import ActivityLog
+            ActivityLog.log_error(
+                error_message=f"Error manually processing paper {paper_id} in APScheduler: {str(e)}",
+                source="_process_single_paper_manual"
+            )
+            return {"status": "error", "paper_id": paper_id, "message": str(e)}
+
+
+def _job_listener(event):
+    """Listen to job execution events."""
+    app = _get_flask_app()
+    if not app:
+        return
+        
+    with app.app_context():
+        try:
+            from .models import ActivityLog
+            
+            job_id = event.job_id
+            
+            if event.exception:
+                ActivityLog.log_error(
+                    error_message=f"APScheduler job {job_id} failed: {str(event.exception)}",
+                    source="ScraperScheduler.job_listener"
+                )
+            elif hasattr(event, 'retval') and event.retval:
+                # Job completed successfully
+                if job_id.startswith('process_paper_'):
+                    ActivityLog.log_scraper_activity(
+                        action="apscheduler_job_complete",
+                        status="success",
+                        description=f"Job {job_id} completed successfully"
+                    )
+        except Exception as e:
+            # Don't let logging errors break the scheduler
+            print(f"Error in job listener: {str(e)}")
+
+
+class ScraperScheduler:
+    """APScheduler-based scraper task scheduler."""
+    
+    def __init__(self, app=None):
+        self.app = app
+        if app:
+            self.init_app(app)
+    
+    @property
+    def scheduler(self):
+        """Expose the global _scheduler instance."""
+        global _scheduler
+        return _scheduler
+
+    def init_app(self, app):
+        """Initialize the scheduler with Flask app context."""
+        global _scheduler, _flask_app
+        _flask_app = app
+        self.app = app
+        
+        # Initialize scheduler within app context to access db.engine properly
+        with app.app_context():
+            # Use the existing Flask-SQLAlchemy database engine for APScheduler
+            from .db import db
+            
+            # Configure job store to use the existing database engine
+            jobstores = {
+                'default': SQLAlchemyJobStore(engine=db.engine)
+            }
+            
+            # Configure thread pool executor
+            executors = {
+                'default': ThreadPoolExecutor(max_workers=50)  # Increased from 20 to 50
+            }
+            
+            # Job defaults
+            job_defaults = {
+                'coalesce': False,  # Don't combine multiple scheduled instances
+                'max_instances': 3,  # Allow up to 3 instances of the same job
+                'misfire_grace_time': 30  # 30 seconds grace period for missed jobs
+            }
+            
+            # Get timezone from database configuration
+            from .models import TimezoneConfig
+            configured_timezone = TimezoneConfig.get_current_timezone()
+            
+            # Create the scheduler
+            _scheduler = BackgroundScheduler(
+                jobstores=jobstores,
+                executors=executors,
+                job_defaults=job_defaults,
+                timezone=configured_timezone  # Use configurable timezone from database
+            )
+            
+            # Add event listeners
+            _scheduler.add_listener(_job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED)
+            
+            # Start the scheduler FIRST, which will auto-create tables
+            _scheduler.start()
+            
+            # THEN add the hourly scraper job
+            _scheduler.add_job(
+                func=_hourly_scraper_scheduler,
+                trigger='cron',
+                minute=0,  # Run at the start of every hour
+                id='hourly_scraper_main',
+                replace_existing=True,
+                name='Hourly Scraper Scheduler'
+            )
+            
+            try:
+                from .models import ActivityLog
+                ActivityLog.log_scraper_activity(
+                    action="apscheduler_init",
+                    status="success",
+                    description="APScheduler initialized with database job store and hourly scheduling"
+                )
+            except Exception:
+                # Handle case where we're outside application context
+                print("✅ APScheduler initialized successfully")
+
+    def revoke_all_scraper_jobs(self) -> int:
+        """Clean replacement for the complex _clear_delayed_tasks_from_redis method."""
+        global _scheduler
+        if not _scheduler:
+            try:
+                from .models import ActivityLog
+                ActivityLog.log_error(
+                    error_message="Scheduler not initialized - cannot revoke jobs",
+                    source="ScraperScheduler.revoke_all_scraper_jobs"
+                )
+            except Exception:
+                print("❌ Scheduler not initialized - cannot revoke jobs")
+            return 0
+            
+        revoked_count = 0
+        revoked_jobs = []
+        already_gone_jobs = []
+        failed_jobs = []
+        
+        try:
+            # Get all jobs
+            jobs = _scheduler.get_jobs()
+            
+            for job in jobs:
+                # Remove any job that processes papers or uploads (but keep the main hourly scheduler)
+                if ('paper_process_' in job.id or 'test_paper_process_' in job.id or 
+                    'process_paper_' in job.id or 'csv_upload_' in job.id or 'manual_paper_' in job.id or
+                    'startup_paper_' in job.id):
+                    try:
+                        _scheduler.remove_job(job.id)
+                        revoked_count += 1
+                        
+                        # Collect job info for single log entry
+                        job_info = {
+                            "job_id": job.id,
+                            "job_name": job.name,
+                            "next_run_time": job.next_run_time.isoformat() if job.next_run_time else None,
+                            "args": job.args
+                        }
+                        revoked_jobs.append(job_info)
+                        
+                        print(f"✅ Revoked APScheduler job: {job.id}")
+                    
+                    except JobLookupError as e:
+                        # Job already removed/completed - this is normal
+                        already_gone_jobs.append({
+                            "job_id": job.id,
+                            "reason": str(e)
+                        })
+                        print(f"ℹ️  Job {job.id} was already completed or removed")
+                    
+                    except Exception as e:
+                        # Other error - log it but continue
+                        failed_jobs.append({
+                            "job_id": job.id,
+                            "error": str(e)
+                        })
+                        print(f"❌ Error removing job {job.id}: {str(e)}")
+            
+            # Create single comprehensive log entry with JSON data
+            if revoked_jobs or already_gone_jobs or failed_jobs:
+                try:
+                    import json
+                    from .models import ActivityLog
+                    
+                    revocation_data = {
+                        "total_revoked": revoked_count,
+                        "revoked_jobs": revoked_jobs,
+                        "already_gone_jobs": already_gone_jobs,
+                        "failed_jobs": failed_jobs,
+                        "timestamp": datetime.now().isoformat()
+                    }
+                    
+                    ActivityLog.log_scraper_activity(
+                        action="revoke_all_scraper_jobs_apscheduler",
+                        status="success",
+                        description=f"Successfully revoked {revoked_count} APScheduler jobs. See extra_data for details.",
+                        **{"revocation_details": json.dumps(revocation_data)}
+                    )
+                except Exception:
+                    print(f"✅ Successfully revoked {revoked_count} APScheduler jobs")
+            
+            return revoked_count
+            
+        except Exception as e:
+            try:
+                from .models import ActivityLog
+                ActivityLog.log_error(
+                    error_message=f"Error revoking APScheduler jobs: {str(e)}",
+                    source="ScraperScheduler.revoke_all_scraper_jobs"
+                )
+            except Exception:
+                print(f"❌ Error revoking APScheduler jobs: {str(e)}")
+            return 0
+    
+    def get_job_count(self) -> int:
+        """Get the number of scheduled jobs."""
+        global _scheduler
+        if not _scheduler:
+            return 0
+        return len(_scheduler.get_jobs())
+    
+    def get_paper_jobs(self) -> List[dict]:
+        """Get information about scheduled paper processing jobs."""
+        global _scheduler
+        if not _scheduler:
+            return []
+            
+        jobs = []
+        all_jobs = _scheduler.get_jobs()
+        
+        for job in all_jobs:
+            # Match jobs that contain paper processing patterns
+            if ('process_paper_' in job.id or 'paper_process_' in job.id or 'test_paper_process_' in job.id):
+                job_info = {
+                    'id': job.id,
+                    'name': job.name,
+                    'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
+                    'args': job.args
+                }
+                jobs.append(job_info)
+        
+        return jobs
+    
+    def shutdown(self):
+        """Gracefully shutdown the scheduler."""
+        global _scheduler
+        if _scheduler:
+            try:
+                from .models import ActivityLog
+                ActivityLog.log_scraper_activity(
+                    action="apscheduler_shutdown",
+                    status="info",
+                    description="Shutting down APScheduler"
+                )
+            except Exception:
+                print("🔄 Shutting down APScheduler")
+                
+            _scheduler.shutdown(wait=False)
+            _scheduler = None
+    
+    def schedule_paper_processing(self, paper_id: int, delay_seconds: int = 0, job_id: Optional[str] = None) -> str:
+        """Schedule a paper for processing with APScheduler.
+        
+        Args:
+            paper_id: ID of the paper to process
+            delay_seconds: Delay in seconds before processing (default: 0 for immediate)
+            job_id: Optional custom job ID (will be generated if not provided)
+            
+        Returns:
+            str: The job ID of the scheduled job
+        """
+        global _scheduler
+        if not _scheduler:
+            raise RuntimeError("APScheduler not initialized")
+        
+        # Generate job ID if not provided
+        if not job_id:
+            # Use microseconds and UUID suffix to prevent collisions
+            import uuid
+            job_id = f"process_paper_{paper_id}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}_{uuid.uuid4().hex[:8]}"
+        
+        # Calculate run time
+        run_time = datetime.now() + timedelta(seconds=delay_seconds)
+        
+        # Schedule the job
+        job = _scheduler.add_job(
+            func=_process_single_paper,
+            trigger='date',
+            run_date=run_time,
+            args=[paper_id],
+            id=job_id,
+            name=f"Process Paper {paper_id}",
+            replace_existing=True
+        )
+        
+        # Log the scheduling
+        try:
+            from .models import ActivityLog
+            ActivityLog.log_scraper_activity(
+                action="schedule_paper_processing_apscheduler",
+                paper_id=paper_id,
+                status="info",
+                description=f"Scheduled paper {paper_id} for processing at {run_time.strftime('%H:%M:%S')} (Job ID: {job_id})"
+            )
+        except Exception:
+            print(f"✅ Scheduled paper {paper_id} for processing (Job ID: {job_id})")
+        
+        return job_id
+
+    def schedule_manual_paper_processing(self, paper_id: int, scraper_name: Optional[str] = None, delay_seconds: int = 0, job_id: Optional[str] = None) -> str:
+        """
+        Schedule manual paper processing that bypasses scraper state checks.
+        
+        Args:
+            paper_id: ID of the paper to process
+            scraper_name: Optional specific scraper module to use (defaults to system scraper)
+            delay_seconds: Delay before processing starts (default: 0)
+            job_id: Optional custom job ID (auto-generated if not provided)
+            
+        Returns:
+            Job ID of the scheduled task
+        """
+        global _scheduler
+        if not _scheduler:
+            raise RuntimeError("APScheduler not initialized")
+            
+        if job_id is None:
+            job_id = f"manual_paper_{paper_id}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
+        
+        run_time = datetime.now() + timedelta(seconds=delay_seconds)
+        
+        # Schedule the manual processing job
+        job = _scheduler.add_job(
+            func=_process_single_paper_manual,
+            trigger='date',
+            run_date=run_time,
+            args=[paper_id, scraper_name],
+            id=job_id,
+            name=f"Manual Process Paper {paper_id}",
+            replace_existing=True
+        )
+        
+        # Log the scheduling
+        try:
+            from .models import ActivityLog
+            ActivityLog.log_scraper_activity(
+                action="schedule_manual_paper_processing",
+                paper_id=paper_id,
+                status="info",
+                description=f"Scheduled manual processing for paper {paper_id} at {run_time.strftime('%H:%M:%S')} (Job ID: {job_id})"
+            )
+        except Exception:
+            pass  # Don't fail if logging fails
+        
+        return job_id
--- a/scipaperloader/scrapers/base.py
+++ b/scipaperloader/scrapers/base.py
@ -18,6 +18,43 @@ class BaseScraper(ABC):
    OUTPUT_STATUS_FAILURE = "Failed"   # Status to set on failed scraping
    OUTPUT_STATUS_PROCESSING = "Pending"  # Status to set while processing
    
+    def __init__(self):
+        """Initialize the scraper."""
+        self.scraper_name = self.get_name().lower()
+    
+    def log_scrape_start(self, doi: str, paper_id: Optional[int] = None):
+        """Log the start of a scraping operation."""
+        from ..models import ActivityLog
+        
+        ActivityLog.log_scraper_activity(
+            action=f"{self.scraper_name}_scrape_start",
+            status="info",
+            description=f"Starting {self.get_name()} for DOI: {doi}",
+            paper_id=paper_id
+        )
+    
+    def log_scrape_success(self, doi: str, message: str, paper_id: Optional[int] = None):
+        """Log successful completion of scraping."""
+        from ..models import ActivityLog
+        
+        ActivityLog.log_scraper_activity(
+            action=f"{self.scraper_name}_scrape_success",
+            status="success",
+            description=f"{self.get_name()} completed successfully for DOI: {doi} - {message}",
+            paper_id=paper_id
+        )
+    
+    def log_scrape_failure(self, doi: str, message: str, paper_id: Optional[int] = None):
+        """Log failed scraping operation."""
+        from ..models import ActivityLog
+        
+        ActivityLog.log_scraper_activity(
+            action=f"{self.scraper_name}_scrape_failure",
+            status="error",
+            description=f"{self.get_name()} failed for DOI: {doi} - {message}",
+            paper_id=paper_id
+        )
+    
    @abstractmethod
    def scrape(self, doi: str) -> ScrapeResult:
        """
--- a/scipaperloader/scrapers/dummy.py
+++ b/scipaperloader/scrapers/dummy.py
@ -30,6 +30,9 @@ class Scraper(BaseScraper):
                timestamp=datetime.utcnow()
            )

+        # Log start of scraping
+        self.log_scrape_start(doi, paper.id)
+
        # Simulate processing time (1-3 seconds)
        processing_time = random.uniform(1, 3)
        time.sleep(processing_time)
@ -145,12 +148,7 @@ class Scraper(BaseScraper):
                )
            
            # Log success
-            ActivityLog.log_scraper_activity(
-                action="dummy_scrape",
-                status="success",
-                description=f"Successfully scraped {doi}",
-                paper_id=paper.id
-            )
+            self.log_scrape_success(doi, f"Successfully scraped {doi}", paper.id)
            
            result = ScrapeResult(
                status="success",
@ -178,12 +176,7 @@ class Scraper(BaseScraper):
            paper.error_msg = error_msg
            
            # Log failure
-            ActivityLog.log_scraper_activity(
-                action="dummy_scrape",
-                status="error",
-                description=f"Failed to scrape {doi}: {error_msg}",
-                paper_id=paper.id
-            )
+            self.log_scrape_failure(doi, error_msg, paper.id)
            
            result = ScrapeResult(
                status="error",
--- a/scipaperloader/scrapers/failed_retry.py
+++ b/scipaperloader/scrapers/failed_retry.py
@ -30,13 +30,8 @@ class Scraper(BaseScraper):
                timestamp=datetime.utcnow()
            )

-        # Log retry attempt
-        ActivityLog.log_scraper_activity(
-            action="retry_failed_paper",
-            status="info",
-            description=f"Retrying failed paper: {paper.title}",
-            paper_id=paper.id
-        )
+        # Log start of retry
+        self.log_scrape_start(doi, paper.id)

        # Simulate longer processing time for retry (2-5 seconds)
        processing_time = random.uniform(2, 5)
@ -64,12 +59,7 @@ class Scraper(BaseScraper):
                result_data = {"file_path": file_path}
                
                # Log success
-                ActivityLog.log_scraper_activity(
-                    action="retry_scrape_success",
-                    status="success",
-                    description=f"Successfully retried {doi} on second attempt",
-                    paper_id=paper.id
-                )
+                self.log_scrape_success(doi, f"Successfully retried {doi} on second attempt", paper.id)
                
                result = ScrapeResult(
                    status="success",
@ -81,12 +71,7 @@ class Scraper(BaseScraper):
                
            except Exception as e:
                error_msg = f"Failed to save retry file: {str(e)}"
-                ActivityLog.log_scraper_activity(
-                    action="retry_scrape_file_error",
-                    status="error", 
-                    description=error_msg,
-                    paper_id=paper.id
-                )
+                self.log_scrape_failure(doi, error_msg, paper.id)
                
                result = ScrapeResult(
                    status="error",
@ -105,12 +90,7 @@ class Scraper(BaseScraper):
            ]
            error_msg = random.choice(error_messages)
            
-            ActivityLog.log_scraper_activity(
-                action="retry_scrape_failure",
-                status="error",
-                description=f"Retry failed for {doi}: {error_msg}",
-                paper_id=paper.id
-            )
+            self.log_scrape_failure(doi, error_msg, paper.id)
            
            result = ScrapeResult(
                status="error",
--- a/scipaperloader/scrapers/html_fetcher.py
+++ b/scipaperloader/scrapers/html_fetcher.py
@ -0,0 +1,172 @@
+import time
+import os
+import requests
+from datetime import datetime
+from .base import BaseScraper, ScrapeResult
+from flask import current_app
+from ..models import PaperMetadata, ActivityLog, DownloadPathConfig
+from ..db import db
+
+class Scraper(BaseScraper):
+    """Scraper that fetches HTML content from DOI and saves it for further processing."""
+    
+    # This scraper processes "New" papers and outputs "HtmlDownloaded"/"Failed"
+    INPUT_STATUSES = ["New"]
+    OUTPUT_STATUS_SUCCESS = "HtmlDownloaded"
+    OUTPUT_STATUS_FAILURE = "Failed"
+    OUTPUT_STATUS_PROCESSING = "FetchingHtml"
+    
+    def scrape(self, doi: str) -> ScrapeResult:
+        """Fetch HTML content from DOI and save to download path."""
+        start_time = time.time()
+        
+        paper = PaperMetadata.query.filter_by(doi=doi).first()
+        if not paper:
+            return ScrapeResult(
+                status="error",
+                message=f"No paper found for DOI {doi}",
+                data=None,
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        # Log start of scraping
+        self.log_scrape_start(doi, paper.id)
+
+        # Update status to processing
+        paper.status = self.OUTPUT_STATUS_PROCESSING
+        db.session.commit()
+
+        # Prepare file paths
+        download_path = DownloadPathConfig.get_path()
+        file_name = f"{doi.replace('/', '_')}.html"
+        file_path = os.path.join(download_path, file_name)
+        
+        # Check/create download directory (same pattern as dummy)
+        if not os.path.exists(download_path):
+            try:
+                os.makedirs(download_path, exist_ok=True)
+            except OSError as e:
+                error_msg = f"Failed to create download directory: {str(e)}"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "path_creation_error"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+        
+        # Check path permissions (same pattern as dummy)
+        if not os.access(download_path, os.W_OK):
+            error_msg = f"Download path '{download_path}' is not writable"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            ActivityLog.log_scraper_activity(
+                action="html_fetch_path_error",
+                status="error",
+                description=error_msg,
+                paper_id=paper.id
+            )
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "path_write_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        try:
+            # Fetch HTML from DOI
+            doi_url = f"https://doi.org/{doi}"
+            headers = {'User-Agent': 'SciPaperLoader/1.0'}
+            response = requests.get(doi_url, headers=headers, timeout=30, allow_redirects=True)
+            
+            # Check for invalid DOI (404) or other HTTP errors
+            if response.status_code == 404:
+                error_msg = f"Invalid DOI: {doi} not found"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "invalid_doi"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+            
+            response.raise_for_status()  # Raise for other HTTP errors
+            
+            # Save HTML content
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.write(response.text)
+            
+            # Update paper status to success
+            paper.status = self.OUTPUT_STATUS_SUCCESS
+            paper.file_path = file_path
+            paper.error_msg = None
+            db.session.commit()
+            
+            # Log success
+            self.log_scrape_success(doi, f"Successfully fetched HTML for {doi}", paper.id)
+            
+            return ScrapeResult(
+                status="success",
+                message=f"Successfully fetched HTML for {doi}",
+                data={
+                    "file_path": file_path,
+                    "url": response.url,  # Final URL after redirects
+                    "title": paper.title
+                },
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        except requests.exceptions.RequestException as e:
+            error_msg = f"Failed to fetch HTML from DOI {doi}: {str(e)}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            ActivityLog.log_scraper_activity(
+                action="html_fetch",
+                status="error",
+                description=error_msg,
+                paper_id=paper.id
+            )
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "network_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+            
+        except Exception as e:
+            error_msg = f"Failed to save HTML file: {str(e)}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "file_creation_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
--- a/scipaperloader/scrapers/manager.py
+++ b/scipaperloader/scrapers/manager.py
@ -1,13 +1,14 @@
 """
 Simplified scraper management system with hourly quota scheduling.
+Uses APScheduler for all task processing - no Celery dependencies.
 """

 import random
 import math
-import redis
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from typing import List, Dict, Optional
 from sqlalchemy import func
+from flask import current_app

 from ..models import (
    PaperMetadata, 
@ -20,7 +21,6 @@ from ..models import (
 from ..db import db
 from ..cache_utils import get_cached_hourly_quota
 from .factory import get_scraper, get_available_scrapers
-from ..celery import celery


 class ScraperManager:
@ -29,237 +29,81 @@ class ScraperManager:
    def __init__(self):
        self.current_scraper = None
        self.pending_papers = []  # Track papers being processed
-        # Initialize Redis client for delayed task management
-        self.redis_client = None
-        self._init_redis_client()
+        # No more Redis client initialization - using APScheduler now
    
-    def _init_redis_client(self):
-        """Initialize Redis client for delayed task management."""
+    def _get_scheduler(self):
+        """Get the ScraperScheduler instance from Flask app config."""
        try:
-            # Use same Redis configuration as Celery
-            self.redis_client = redis.Redis(
-                host='localhost',
-                port=6379,
-                db=0,
-                decode_responses=True
-            )
-            # Test connection
-            self.redis_client.ping()
-        except Exception as e:
-            ActivityLog.log_error(
-                error_message=f"Failed to initialize Redis client: {str(e)}",
-                source="ScraperManager._init_redis_client"
-            )
-            self.redis_client = None
+            return current_app.config.get('SCHEDULER')
+        except RuntimeError:
+            # Outside application context
+            return None
    
-    def _clear_delayed_tasks_from_redis(self) -> int:
-        """Clear delayed tasks from Redis structures used by Celery.
+    def _get_raw_scheduler(self):
+        """Get the raw APScheduler instance for direct job scheduling."""
+        try:
+            scheduler_wrapper = current_app.config.get('SCHEDULER')
+            if scheduler_wrapper:
+                return scheduler_wrapper.scheduler
+            return None
+        except RuntimeError:
+            return None
    
-        Based on analysis, Celery stores delayed tasks in:
-        - 'unacked_index': Sorted set containing task IDs with execution timestamps
-        - 'unacked': Hash containing task data keyed by task ID
+    
+    def _clear_delayed_tasks_from_apscheduler(self) -> int:
+        """Clear delayed tasks from APScheduler - clean replacement for Redis manipulation.
        
        Returns:
            int: Number of delayed tasks cleared
        """
-        if not self.redis_client:
+        scheduler = self._get_scheduler()
+        if not scheduler:
            try:
                ActivityLog.log_error(
-                    error_message="Redis client not available - cannot clear delayed tasks",
-                    source="ScraperManager._clear_delayed_tasks_from_redis"
+                    error_message="APScheduler not available - cannot clear delayed tasks",
+                    source="ScraperManager._clear_delayed_tasks_from_apscheduler"
                )
            except RuntimeError:
-                # Working outside application context - just print instead
-                print("❌ Redis client not available - cannot clear delayed tasks")
+                print("❌ APScheduler not available - cannot clear delayed tasks")
            return 0
        
-        cleared_count = 0
        try:
-            # Define scraper task patterns to identify our tasks
-            scraper_patterns = [
-                'process_single_paper',
-                'process_papers_batch', 
-                'hourly_scraper_scheduler'
-            ]
+            cleared_count = scheduler.revoke_all_scraper_jobs()
            
-            try:
-                ActivityLog.log_scraper_activity(
-                    action="check_delayed_tasks",
-                    status="info",
-                    description="Checking Celery delayed task structures (unacked_index, unacked)"
-                )
-            except RuntimeError:
-                print("🔍 Checking Celery delayed task structures (unacked_index, unacked)")
-            
-            # Check 'unacked_index' (sorted set with task IDs and timestamps)
-            unacked_index_cleared = 0
-            if self.redis_client.exists('unacked_index'):
-                try:
-                    # Get all task IDs from the sorted set
-                    task_ids = self.redis_client.zrange('unacked_index', 0, -1)
-                    
-                    if task_ids:
-                        try:
-                            ActivityLog.log_scraper_activity(
-                                action="scan_unacked_index",
-                                status="info",
-                                description=f"Found {len(task_ids)} tasks in 'unacked_index'"
-                            )
-                        except RuntimeError:
-                            print(f"📋 Found {len(task_ids)} tasks in 'unacked_index'")
-                        
-                        # Check each task ID against the 'unacked' hash to get task details
-                        scraper_task_ids = []
-                        for task_id in task_ids:
-                            try:
-                                # Get task data from 'unacked' hash
-                                task_data = self.redis_client.hget('unacked', task_id)
-                                if task_data:
-                                    # Check if this task contains any of our scraper patterns
-                                    if any(pattern in str(task_data) for pattern in scraper_patterns):
-                                        scraper_task_ids.append(task_id)
-                            except Exception:
-                                # Skip individual task errors
-                                continue
-                        
-                        # Remove scraper task IDs from both structures
-                        for task_id in scraper_task_ids:
-                            try:
-                                # Remove from unacked_index (sorted set)
-                                removed_from_index = self.redis_client.zrem('unacked_index', task_id)
-                                # Remove from unacked (hash)
-                                removed_from_hash = self.redis_client.hdel('unacked', task_id)
-                                
-                                if removed_from_index or removed_from_hash:
-                                    unacked_index_cleared += 1
-                                    
-                            except Exception as e:
-                                try:
-                                    ActivityLog.log_error(
-                                        error_message=f"Error removing delayed task {task_id}: {str(e)}",
-                                        source="ScraperManager._clear_delayed_tasks_from_redis"
-                                    )
-                                except RuntimeError:
-                                    print(f"❌ Error removing delayed task {task_id}: {str(e)}")
-                                continue
-                        
-                        cleared_count += unacked_index_cleared
-                        
-                        if unacked_index_cleared > 0:
-                            try:
-                                ActivityLog.log_scraper_activity(
-                                    action="clear_unacked_tasks",
-                                    status="success",
-                                    description=f"Cleared {unacked_index_cleared} scraper tasks from unacked structures"
-                                )
-                            except RuntimeError:
-                                print(f"✅ Cleared {unacked_index_cleared} scraper tasks from unacked structures")
-                    else:
-                        try:
-                            ActivityLog.log_scraper_activity(
-                                action="check_unacked_index",
-                                status="info",
-                                description="No tasks found in 'unacked_index'"
-                            )
-                        except RuntimeError:
-                            print("ℹ️  No tasks found in 'unacked_index'")
-                            
-                except Exception as e:
-                    try:
-                        ActivityLog.log_error(
-                            error_message=f"Error accessing 'unacked_index': {str(e)}",
-                            source="ScraperManager._clear_delayed_tasks_from_redis"
-                        )
-                    except RuntimeError:
-                        print(f"❌ Error accessing 'unacked_index': {str(e)}")
-            else:
-                try:
-                    ActivityLog.log_scraper_activity(
-                        action="check_unacked_index",
-                        status="info",
-                        description="'unacked_index' key does not exist - no delayed tasks"
-                    )
-                except RuntimeError:
-                    print("ℹ️  'unacked_index' key does not exist - no delayed tasks")
-            
-            # Also check the 'celery' queue for immediate tasks (backup check)
-            celery_cleared = 0
-            try:
-                queue_length = self.redis_client.llen('celery')
-                if queue_length and queue_length > 0:
-                    # Scan for any scraper tasks in the immediate queue
-                    scraper_tasks = []
-                    for i in range(queue_length):
-                        try:
-                            task_data = self.redis_client.lindex('celery', i)
-                            if task_data and any(pattern in str(task_data) for pattern in scraper_patterns):
-                                scraper_tasks.append(task_data)
-                        except Exception:
-                            continue
-                    
-                    # Remove scraper tasks from celery queue
-                    for task_data in scraper_tasks:
-                        try:
-                            removed_count = self.redis_client.lrem('celery', 0, task_data)
-                            celery_cleared += removed_count
-                        except Exception:
-                            continue
-                    
-                    cleared_count += celery_cleared
-                    
-                    if celery_cleared > 0:
-                        try:
-                            ActivityLog.log_scraper_activity(
-                                action="clear_celery_tasks",
-                                status="success",
-                                description=f"Cleared {celery_cleared} scraper tasks from 'celery' queue"
-                            )
-                        except RuntimeError:
-                            print(f"✅ Cleared {celery_cleared} scraper tasks from 'celery' queue")
-                            
-            except Exception as e:
-                try:
-                    ActivityLog.log_error(
-                        error_message=f"Error checking 'celery' queue: {str(e)}",
-                        source="ScraperManager._clear_delayed_tasks_from_redis"
-                    )
-                except RuntimeError:
-                    print(f"❌ Error checking 'celery' queue: {str(e)}")
-            
-            # Summary
+            # Summary logging
            if cleared_count > 0:
                try:
                    ActivityLog.log_scraper_activity(
-                        action="clear_delayed_tasks_complete",
+                        action="clear_delayed_tasks_complete_apscheduler",
                        status="success",
-                        description=f"Total delayed scraper tasks cleared from Redis: {cleared_count} (unacked: {unacked_index_cleared}, celery: {celery_cleared})"
+                        description=f"Total delayed scraper tasks cleared from APScheduler: {cleared_count}"
                    )
                except RuntimeError:
-                    print(f"✅ Total delayed scraper tasks cleared from Redis: {cleared_count} (unacked: {unacked_index_cleared}, celery: {celery_cleared})")
+                    print(f"✅ Total delayed scraper tasks cleared from APScheduler: {cleared_count}")
            else:
                try:
                    ActivityLog.log_scraper_activity(
-                        action="clear_delayed_tasks_complete",
+                        action="clear_delayed_tasks_complete_apscheduler",
                        status="info",
-                        description="No delayed scraper tasks found to clear in Redis"
+                        description="No delayed scraper tasks found to clear in APScheduler"
                    )
                except RuntimeError:
-                    print("ℹ️  No delayed scraper tasks found to clear in Redis")
+                    print("ℹ️  No delayed scraper tasks found to clear in APScheduler")
            
            return cleared_count
            
        except Exception as e:
            try:
                ActivityLog.log_error(
-                    error_message=f"Failed to clear delayed tasks from Redis: {str(e)}",
-                    source="ScraperManager._clear_delayed_tasks_from_redis"
+                    error_message=f"Failed to clear delayed tasks from APScheduler: {str(e)}",
+                    source="ScraperManager._clear_delayed_tasks_from_apscheduler"
                )
            except RuntimeError:
-                print(f"❌ Failed to clear delayed tasks from Redis: {str(e)}")
+                print(f"❌ Failed to clear delayed tasks from APScheduler: {str(e)}")
            return 0
        
    def start_scraper(self) -> Dict[str, str]:
-        """Start the scraper system."""
+        """Start the scraper system and immediately schedule papers for the current hour."""
        try:
            # Get current scraper
            self.current_scraper = get_scraper()
@ -270,13 +114,25 @@ class ScraperManager:
            
            scraper_name = self.current_scraper.get_name()
            
-            ActivityLog.log_scraper_command(
-                action="start_scraper",
-                status="success",
-                description=f"Started scraper: {scraper_name}. Use /trigger-immediate endpoint to immediately schedule papers instead of waiting for the next hourly boundary."
-            )
+            # Immediately schedule papers for the remaining time in the current hour
+            immediate_scheduled_count = self._schedule_papers_for_current_hour()
            
-            return {"status": "success", "message": "Scraper started successfully. Papers will be scheduled at the next hourly boundary, or use /trigger-immediate to schedule immediately."}
+            if immediate_scheduled_count > 0:
+                ActivityLog.log_scraper_command(
+                    action="start_scraper",
+                    status="success",
+                    description=f"Started scraper: {scraper_name}. Immediately scheduled {immediate_scheduled_count} papers for the remaining time in this hour."
+                )
+                
+                return {"status": "success", "message": f"Scraper started successfully. Immediately scheduled {immediate_scheduled_count} papers for processing in the remaining time this hour."}
+            else:
+                ActivityLog.log_scraper_command(
+                    action="start_scraper",
+                    status="success",
+                    description=f"Started scraper: {scraper_name}. No papers available for immediate scheduling in the current hour."
+                )
+                
+                return {"status": "success", "message": "Scraper started successfully. No papers available for immediate scheduling this hour."}
            
        except Exception as e:
            ActivityLog.log_error(
@ -318,123 +174,29 @@ class ScraperManager:
            return {"status": "error", "message": str(e)}
    
    def stop_scraper(self) -> Dict[str, str]:
-        """Stop the scraper, revoke all running tasks, and revert pending papers."""
+        """Stop the scraper, revoke all APScheduler jobs, and revert pending papers."""
        try:
-            # First, revoke all running tasks
-            revoked_count = 0
-            delayed_cleared_count = 0
+            # STEP 1: Immediately set scraper as inactive - this is critical for race condition prevention
+            ScraperState.set_active(False)
+            ScraperState.set_paused(False)
            
            ActivityLog.log_scraper_command(
                action="stop_scraper_start",
                status="info",
-                description="Beginning scraper stop process with task revocation and delayed task clearing"
+                description="Scraper stop initiated - marked as inactive. Beginning APScheduler job revocation."
            )
            
-            try:
-                # Get Celery inspector to check for running tasks
-                i = celery.control.inspect()
-                active = i.active() or {}
-                scheduled = i.scheduled() or {}
-                reserved = i.reserved() or {}
+            # STEP 2: Brief pause to allow running jobs to see the inactive state
+            import time
+            time.sleep(0.2)
            
-                # Revoke active tasks
-                for worker, tasks in active.items():
-                    for task in tasks:
-                        if 'id' in task:
-                            celery.control.revoke(task['id'], terminate=True)
-                            revoked_count += 1
-                            ActivityLog.log_scraper_activity(
-                                action="revoke_task",
-                                status="success",
-                                description=f"Revoked active task: {task.get('name', 'unknown')} (ID: {task['id']})"
-                            )
+            # STEP 3: Revoke all APScheduler jobs
+            delayed_cleared_count = self._clear_delayed_tasks_from_apscheduler()
            
-                # Revoke scheduled tasks
-                for worker, tasks in scheduled.items():
-                    for task in tasks:
-                        if 'id' in task:
-                            celery.control.revoke(task['id'], terminate=True)
-                            revoked_count += 1
-                            ActivityLog.log_scraper_activity(
-                                action="revoke_task",
-                                status="success",
-                                description=f"Revoked scheduled task: {task.get('name', 'unknown')} (ID: {task['id']})"
-                            )
+            # STEP 4: Wait a bit for any remaining jobs to finish their checks and exit
+            time.sleep(1.0)
            
-                # Revoke reserved tasks
-                for worker, tasks in reserved.items():
-                    for task in tasks:
-                        if 'id' in task:
-                            celery.control.revoke(task['id'], terminate=True)
-                            revoked_count += 1
-                            ActivityLog.log_scraper_activity(
-                                action="revoke_task",
-                                status="success",
-                                description=f"Revoked reserved task: {task.get('name', 'unknown')} (ID: {task['id']})"
-                            )
-                
-                # Purge all task queues
-                celery.control.purge()
-                ActivityLog.log_scraper_activity(
-                    action="purge_queues",
-                    status="success",
-                    description="Purged all task queues"
-                )
-                
-                # **NEW: Clear delayed tasks from Redis sorted sets**
-                delayed_cleared_count = self._clear_delayed_tasks_from_redis()
-                
-                # Additional cleanup: revoke any remaining scraper-related tasks by name pattern
-                try:
-                    # Use broadcast to revoke tasks that match scraper patterns
-                    scraper_task_patterns = [
-                        'process_single_paper',
-                        'process_papers_batch', 
-                        'hourly_scraper_scheduler'
-                    ]
-                    
-                    # Get a fresh inspection of tasks after purge
-                    fresh_inspect = celery.control.inspect()
-                    all_tasks = {}
-                    all_tasks.update(fresh_inspect.active() or {})
-                    all_tasks.update(fresh_inspect.scheduled() or {})
-                    all_tasks.update(fresh_inspect.reserved() or {})
-                    
-                    additional_revoked = 0
-                    for worker, tasks in all_tasks.items():
-                        for task in tasks:
-                            task_name = task.get('name', '')
-                            task_id = task.get('id', '')
-                            if any(pattern in task_name for pattern in scraper_task_patterns) and task_id:
-                                celery.control.revoke(task_id, terminate=True)
-                                additional_revoked += 1
-                                ActivityLog.log_scraper_activity(
-                                    action="revoke_scraper_task",
-                                    status="success", 
-                                    description=f"Revoked lingering scraper task: {task_name} (ID: {task_id})"
-                                )
-                    
-                    if additional_revoked > 0:
-                        ActivityLog.log_scraper_activity(
-                            action="cleanup_scraper_tasks",
-                            status="success",
-                            description=f"Additional cleanup: revoked {additional_revoked} lingering scraper tasks"
-                        )
-                        
-                except Exception as e:
-                    ActivityLog.log_error(
-                        error_message=f"Error during additional scraper task cleanup: {str(e)}",
-                        source="ScraperManager.stop_scraper.cleanup"
-                    )
-                
-            except Exception as e:
-                ActivityLog.log_error(
-                    error_message=f"Error revoking tasks: {str(e)}",
-                    source="ScraperManager.stop_scraper"
-                )
-                # Continue with paper reversion even if task revocation fails
-            
-            # Get current scraper to know what status to revert to
+            # STEP 5: Revert papers from processing status
            scraper = get_scraper()
            input_statuses = scraper.get_input_statuses()
            
@ -453,7 +215,7 @@ class ScraperManager:
                        paper.status = paper.previous_status
                    else:
                        paper.status = revert_status
-                    paper.updated_at = datetime.utcnow()
+                    paper.updated_at = datetime.now(UTC)
                    reverted_count += 1
                
                db.session.commit()
@ -464,19 +226,15 @@ class ScraperManager:
                    description=f"Reverted {reverted_count} papers from '{processing_status}' to previous status"
                )
            
-            # Deactivate scraper
-            ScraperState.set_active(False)
-            ScraperState.set_paused(False)
-            
            ActivityLog.log_scraper_command(
                action="stop_scraper",
                status="success",
-                description=f"Scraper stopped. Revoked {revoked_count} tasks, cleared {delayed_cleared_count} delayed tasks, and reverted {reverted_count} papers."
+                description=f"Scraper stopped completely. Cleared {delayed_cleared_count} APScheduler jobs and reverted {reverted_count} papers."
            )
            
            return {
                "status": "success", 
-                "message": f"Scraper stopped. Revoked {revoked_count} tasks, cleared {delayed_cleared_count} delayed tasks, and reverted {reverted_count} papers to previous status."
+                "message": f"Scraper stopped. Cleared {delayed_cleared_count} APScheduler jobs and reverted {reverted_count} papers to previous status."
            }
            
        except Exception as e:
@ -487,51 +245,16 @@ class ScraperManager:
            return {"status": "error", "message": str(e)}
    
    def reset_scraper(self) -> Dict[str, str]:
-        """Reset scraper state, revoke all running tasks, and clear all processing statuses."""
+        """Reset scraper state, revoke all APScheduler jobs, and clear all processing statuses."""
        try:
-            # First, revoke all running tasks (similar to stop_scraper)
-            revoked_count = 0
-            
            ActivityLog.log_scraper_command(
                action="reset_scraper_start",
                status="info",
-                description="Beginning scraper reset process with task revocation"
+                description="Beginning scraper reset process with APScheduler job revocation"
            )
            
-            try:
-                # Get Celery inspector to check for running tasks
-                i = celery.control.inspect()
-                active = i.active() or {}
-                scheduled = i.scheduled() or {}
-                reserved = i.reserved() or {}
-                
-                # Revoke all tasks (active, scheduled, reserved)
-                for queue_name, queue_tasks in [("active", active), ("scheduled", scheduled), ("reserved", reserved)]:
-                    for worker, tasks in queue_tasks.items():
-                        for task in tasks:
-                            if 'id' in task:
-                                celery.control.revoke(task['id'], terminate=True)
-                                revoked_count += 1
-                                ActivityLog.log_scraper_activity(
-                                    action="revoke_task",
-                                    status="success",
-                                    description=f"Revoked {queue_name} task: {task.get('name', 'unknown')} (ID: {task['id']})"
-                                )
-                
-                # Purge all task queues
-                celery.control.purge()
-                ActivityLog.log_scraper_activity(
-                    action="purge_queues",
-                    status="success",
-                    description="Purged all task queues during reset"
-                )
-                
-            except Exception as e:
-                ActivityLog.log_error(
-                    error_message=f"Error revoking tasks during reset: {str(e)}",
-                    source="ScraperManager.reset_scraper"
-                )
-                # Continue with paper reversion even if task revocation fails
+            # Clear all APScheduler jobs
+            delayed_cleared_count = self._clear_delayed_tasks_from_apscheduler()
            
            # Get current scraper configuration  
            scraper = get_scraper()
@ -551,7 +274,7 @@ class ScraperManager:
                        paper.status = paper.previous_status
                    else:
                        paper.status = revert_status
-                    paper.updated_at = datetime.utcnow()
+                    paper.updated_at = datetime.now(UTC)
                    paper.error_msg = None  # Clear any error messages
                    reverted_count += 1
                
@ -564,12 +287,12 @@ class ScraperManager:
            ActivityLog.log_scraper_command(
                action="reset_scraper",
                status="success", 
-                description=f"Scraper reset. Revoked {revoked_count} tasks and reverted {reverted_count} papers."
+                description=f"Scraper reset. Cleared {delayed_cleared_count} APScheduler jobs and reverted {reverted_count} papers."
            )
            
            return {
                "status": "success",
-                "message": f"Scraper reset. Revoked {revoked_count} tasks and reverted {reverted_count} papers to original status."
+                "message": f"Scraper reset. Cleared {delayed_cleared_count} APScheduler jobs and reverted {reverted_count} papers to original status."
            }
            
        except Exception as e:
@ -638,24 +361,52 @@ class ScraperManager:
                     .limit(papers_needed)
                     .all())
            
-            ActivityLog.log_scraper_activity(
-                action="select_papers",
-                status="info",
-                description=f"Selected {len(papers)} papers from statuses {input_statuses} (requested: {papers_needed})"
-            )
+            try:
+                ActivityLog.log_scraper_activity(
+                    action="select_papers",
+                    status="info",
+                    description=f"Selected {len(papers)} papers from statuses {input_statuses} (requested: {papers_needed})"
+                )
+            except RuntimeError:
+                # Outside application context - use print fallback
+                print(f"📋 Selected {len(papers)} papers from statuses {input_statuses} (requested: {papers_needed})")
            
            return papers
            
        except Exception as e:
-            ActivityLog.log_error(
-                error_message=f"Error selecting papers: {str(e)}",
-                source="ScraperManager.select_papers_for_processing"
-            )
+            try:
+                ActivityLog.log_error(
+                    error_message=f"Error selecting papers: {str(e)}",
+                    source="ScraperManager.select_papers_for_processing"
+                )
+            except RuntimeError:
+                # Outside application context - use print fallback
+                print(f"❌ Error selecting papers: {str(e)}")
            return []
    
    def process_paper(self, paper: PaperMetadata) -> Dict:
        """Process a single paper using the current scraper."""
        try:
+            # **RACE CONDITION FIX**: Double-check scraper state before proceeding
+            scraper_state = ScraperState.get_current_state()
+            if not scraper_state.is_active:
+                ActivityLog.log_scraper_activity(
+                    action="process_paper",
+                    paper_id=paper.id,
+                    status="skipped",
+                    description="Skipped processing - scraper deactivated during task execution"
+                )
+                return {"paper_id": paper.id, "status": "skipped", "message": "Scraper not active"}
+            
+            if scraper_state.is_paused:
+                ActivityLog.log_scraper_activity(
+                    action="process_paper", 
+                    paper_id=paper.id,
+                    status="skipped",
+                    description="Skipped processing - scraper paused during task execution"
+                )
+                return {"paper_id": paper.id, "status": "skipped", "message": "Scraper paused"}
+            
            scraper = get_scraper()
            output_statuses = scraper.get_output_statuses()
            
@ -665,9 +416,25 @@ class ScraperManager:
            # Update paper status to processing
            paper.previous_status = previous_status
            paper.status = output_statuses["processing"]
-            paper.updated_at = datetime.utcnow()
+            paper.updated_at = datetime.now(UTC)
            db.session.commit()
            
+            # **ADDITIONAL RACE CONDITION CHECK**: Verify scraper is still active before expensive scraping operation
+            scraper_state = ScraperState.get_current_state()
+            if not scraper_state.is_active:
+                # Scraper was deactivated after we marked paper as processing - revert and exit
+                paper.status = previous_status
+                paper.updated_at = datetime.now(UTC)
+                db.session.commit()
+                
+                ActivityLog.log_scraper_activity(
+                    action="process_paper",
+                    paper_id=paper.id,
+                    status="cancelled",
+                    description="Cancelled processing - scraper deactivated after paper marked as processing"
+                )
+                return {"paper_id": paper.id, "status": "cancelled", "message": "Scraper deactivated during processing"}
+            
            # Perform scraping
            result = scraper.scrape(paper.doi)
            
@ -681,7 +448,7 @@ class ScraperManager:
                paper.status = output_statuses["failure"]
                paper.error_msg = result.message
            
-            paper.updated_at = datetime.utcnow()
+            paper.updated_at = datetime.now(UTC)
            db.session.commit()
            
            # Log result
@ -706,7 +473,7 @@ class ScraperManager:
                if input_statuses:
                    paper.status = input_statuses[0]
                    paper.error_msg = f"Processing error: {str(e)}"
-                    paper.updated_at = datetime.utcnow()
+                    paper.updated_at = datetime.now(UTC)
                    db.session.commit()
            except:
                pass  # Don't fail if reversion fails
@ -718,6 +485,91 @@ class ScraperManager:
            
            return {"paper_id": paper.id, "status": "error", "message": str(e)}
    
+    def process_paper_manual(self, paper: PaperMetadata, scraper_name: Optional[str] = None) -> Dict:
+        """Process a single paper manually, bypassing scraper state checks."""
+        try:
+            # Get scraper configuration but skip state validation for manual processing
+            if scraper_name:
+                # Use the specified scraper
+                import importlib
+                from .base import BaseScraper
+                try:
+                    module = importlib.import_module(f"scipaperloader.scrapers.{scraper_name}")
+                    scraper_cls = getattr(module, "Scraper")
+                    if not issubclass(scraper_cls, BaseScraper):
+                        raise TypeError(f"Scraper class in module '{scraper_name}' does not inherit from BaseScraper")
+                    scraper = scraper_cls()
+                except (ImportError, AttributeError, TypeError) as e:
+                    ActivityLog.log_error(
+                        error_message=f"Failed to load specified scraper '{scraper_name}': {str(e)}. Falling back to system default.",
+                        source="ScraperManager.process_paper_manual"
+                    )
+                    scraper = get_scraper()
+            else:
+                # Use system default scraper
+                scraper = get_scraper()
+            
+            output_statuses = scraper.get_output_statuses()
+            
+            # Store the previous status before changing it
+            previous_status = paper.status
+            
+            # Update paper status to processing
+            paper.previous_status = previous_status
+            paper.status = output_statuses["processing"]
+            paper.updated_at = datetime.now(UTC)
+            db.session.commit()
+            
+            # Perform scraping (no state checks for manual processing)
+            result = scraper.scrape(paper.doi)
+            
+            # Update paper status based on result
+            if result.status == "success":
+                paper.status = output_statuses["success"]
+                paper.error_msg = None
+                if result.data and "file_path" in result.data:
+                    paper.file_path = result.data["file_path"]
+            else:
+                paper.status = output_statuses["failure"]
+                paper.error_msg = result.message
+            
+            paper.updated_at = datetime.now(UTC)
+            db.session.commit()
+            
+            # Log result
+            ActivityLog.log_scraper_activity(
+                action="process_paper_manual",
+                paper_id=paper.id,
+                status=result.status,
+                description=f"Manually processed {paper.doi}: {result.message}"
+            )
+            
+            return {
+                "paper_id": paper.id,
+                "status": result.status,
+                "message": result.message,
+                "duration": result.duration
+            }
+            
+        except Exception as e:
+            # Revert paper status on error
+            try:
+                input_statuses = get_scraper().get_input_statuses()
+                if input_statuses:
+                    paper.status = input_statuses[0]
+                    paper.error_msg = f"Manual processing error: {str(e)}"
+                    paper.updated_at = datetime.now(UTC)
+                    db.session.commit()
+            except:
+                pass  # Don't fail if reversion fails
+            
+            ActivityLog.log_error(
+                error_message=f"Error manually processing paper {paper.id}: {str(e)}",
+                source="ScraperManager.process_paper_manual"
+            )
+            
+            return {"paper_id": paper.id, "status": "error", "message": str(e)}
+
    def get_status(self) -> Dict:
        """Get current scraper status."""
        scraper_state = ScraperState.get_current_state()
@ -745,3 +597,119 @@ class ScraperManager:
            "processing_papers": processing_count,
            "current_hour_quota": self.get_current_hour_quota()
        }
+    
+    def _schedule_papers_for_current_hour(self) -> int:
+        """Schedule papers for processing in the remaining time of the current hour.
+        
+        Returns:
+            int: Number of papers scheduled
+        """
+        try:
+            # Get papers that should be processed this hour
+            papers = self.select_papers_for_processing()
+            
+            if not papers:
+                return 0
+            
+            # Get raw APScheduler instance for direct job scheduling
+            scheduler = self._get_raw_scheduler()
+            if not scheduler:
+                ActivityLog.log_error(
+                    error_message="Raw APScheduler not available for immediate paper scheduling",
+                    source="ScraperManager._schedule_papers_for_current_hour"
+                )
+                return 0
+            
+            # Calculate remaining time in current hour
+            current_time = datetime.now()
+            next_hour = current_time.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
+            remaining_seconds = int((next_hour - current_time).total_seconds())
+            
+            # Don't schedule if less than 2 minutes remaining
+            if remaining_seconds < 120:
+                ActivityLog.log_scraper_activity(
+                    action="start_scraper_immediate_scheduling",
+                    status="info",
+                    description=f"Skipping immediate scheduling - only {remaining_seconds} seconds remaining in current hour"
+                )
+                return 0
+            
+            # Schedule papers at random times within the remaining time
+            scheduled_count = 0
+            scheduled_papers = []
+            
+            for paper in papers:
+                try:
+                    # Random delay between 1 second and remaining time minus 60 seconds buffer
+                    max_delay = max(1, remaining_seconds - 60)
+                    delay_seconds = random.randint(1, max_delay)
+                    run_time = current_time + timedelta(seconds=delay_seconds)
+                    
+                    # Generate unique job ID
+                    import uuid
+                    job_id = f"startup_paper_{paper.id}_{int(current_time.timestamp())}_{uuid.uuid4().hex[:8]}"
+                    
+                    # Schedule the job
+                    from ..scheduler import _process_single_paper
+                    scheduler.add_job(
+                        func=_process_single_paper,
+                        trigger='date',
+                        run_date=run_time,
+                        args=[paper.id],
+                        id=job_id,
+                        name=f"Startup Process Paper {paper.id}",
+                        replace_existing=True
+                    )
+                    
+                    scheduled_count += 1
+                    
+                    # Collect paper info for logging
+                    paper_info = {
+                        "paper_id": paper.id,
+                        "paper_doi": paper.doi,
+                        "job_id": job_id,
+                        "scheduled_time": run_time.isoformat(),
+                        "delay_seconds": delay_seconds
+                    }
+                    scheduled_papers.append(paper_info)
+                    
+                except Exception as e:
+                    ActivityLog.log_error(
+                        error_message=f"Failed to schedule paper {paper.id} during startup: {str(e)}",
+                        source="ScraperManager._schedule_papers_for_current_hour"
+                    )
+            
+            # Create single comprehensive log entry
+            if scheduled_papers:
+                try:
+                    import json
+                    scheduling_data = {
+                        "total_scheduled": scheduled_count,
+                        "scheduled_papers": scheduled_papers,
+                        "timestamp": current_time.isoformat(),
+                        "remaining_time_seconds": remaining_seconds,
+                        "trigger": "startup_immediate_scheduling"
+                    }
+                    
+                    ActivityLog.log_scraper_activity(
+                        action="startup_immediate_scheduling",
+                        status="success",
+                        description=f"Scheduled {scheduled_count} papers for immediate processing during startup for remaining {remaining_seconds}s in current hour. See extra_data for details.",
+                        **{"scheduling_details": json.dumps(scheduling_data)}
+                    )
+                except Exception:
+                    # Fallback to simple logging
+                    ActivityLog.log_scraper_activity(
+                        action="startup_immediate_scheduling",
+                        status="success",
+                        description=f"Scheduled {scheduled_count} papers for immediate processing during startup"
+                    )
+            
+            return scheduled_count
+            
+        except Exception as e:
+            ActivityLog.log_error(
+                error_message=f"Error in startup immediate scheduling: {str(e)}",
+                source="ScraperManager._schedule_papers_for_current_hour"
+            )
+            return 0
--- a/scipaperloader/scrapers/publisher_detector.py
+++ b/scipaperloader/scrapers/publisher_detector.py
@ -0,0 +1,282 @@
+import time
+import requests
+import re
+from urllib.parse import urlparse
+from datetime import datetime
+from typing import Optional
+from .base import BaseScraper, ScrapeResult
+from flask import current_app
+from ..models import PaperMetadata, ActivityLog, DownloadPathConfig
+from ..db import db
+
+class Scraper(BaseScraper):
+    """Publisher detection scraper that identifies the publisher from the final URL after DOI redirect."""
+    
+    # This scraper processes "New" papers and outputs "PublisherDetected"/"Failed"
+    INPUT_STATUSES = ["New"]
+    OUTPUT_STATUS_SUCCESS = "PublisherDetected"
+    OUTPUT_STATUS_FAILURE = "Failed"
+    OUTPUT_STATUS_PROCESSING = "DetectingPublisher"
+    
+    # Publisher detection patterns based on URL domains and paths
+    PUBLISHER_URL_PATTERNS = {
+        'elsevier': [
+            r'sciencedirect\.com',
+            r'elsevier\.com',
+            r'.*\.elsevier\.com'
+        ],
+        'springer': [
+            r'link\.springer\.com',
+            r'springer\.com',
+            r'.*\.springer\.com'
+        ],
+        'wiley': [
+            r'onlinelibrary\.wiley\.com',
+            r'wiley\.com',
+            r'.*\.wiley\.com'
+        ],
+        'ieee': [
+            r'ieeexplore\.ieee\.org',
+            r'ieee\.org',
+            r'.*\.ieee\.org'
+        ],
+        'plos': [
+            r'journals\.plos\.org',
+            r'plos\.org',
+            r'.*\.plos\.org'
+        ],
+        'nature': [
+            r'nature\.com',
+            r'.*\.nature\.com'
+        ],
+        'sage': [
+            r'journals\.sagepub\.com',
+            r'sagepub\.com',
+            r'.*\.sagepub\.com'
+        ],
+        'taylor_francis': [
+            r'tandfonline\.com',
+            r'.*\.tandfonline\.com'
+        ],
+        'acs': [
+            r'pubs\.acs\.org',
+            r'acs\.org',
+            r'.*\.acs\.org'
+        ],
+        'arxiv': [
+            r'arxiv\.org',
+            r'export\.arxiv\.org'
+        ],
+        'pubmed': [
+            r'pubmed\.ncbi\.nlm\.nih\.gov',
+            r'ncbi\.nlm\.nih\.gov'
+        ],
+        'oxford': [
+            r'academic\.oup\.com',
+            r'oup\.com',
+            r'.*\.oup\.com'
+        ],
+        'cambridge': [
+            r'cambridge\.org',
+            r'.*\.cambridge\.org'
+        ],
+        'biorxiv': [
+            r'biorxiv\.org',
+            r'.*\.biorxiv\.org'
+        ],
+        'researchgate': [
+            r'researchgate\.net',
+            r'.*\.researchgate\.net'
+        ]
+    }
+    
+    def scrape(self, doi: str) -> ScrapeResult:
+        """Detect publisher from the final URL after DOI redirect."""
+        start_time = time.time()
+        
+        paper = PaperMetadata.query.filter_by(doi=doi).first()
+        if not paper:
+            return ScrapeResult(
+                status="error",
+                message=f"No paper found for DOI {doi}",
+                data=None,
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        # Log start of scraping
+        self.log_scrape_start(doi, paper.id)
+
+        # Update status to processing
+        paper.status = self.OUTPUT_STATUS_PROCESSING
+        db.session.commit()
+
+        try:
+            # Get the final URL by following the DOI redirect
+            final_url = self._get_final_url(doi)
+            
+            if not final_url:
+                error_msg = f"Could not resolve DOI {doi} to a URL"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "doi_resolution_failed"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+            
+            # Detect publisher from URL
+            detected_publisher = self._detect_publisher_from_url(final_url)
+            
+            if detected_publisher:
+                # Update paper with detected publisher
+                paper.publisher = detected_publisher
+                paper.status = self.OUTPUT_STATUS_SUCCESS
+                paper.error_msg = None
+                db.session.commit()
+                
+                success_msg = f"Publisher '{detected_publisher}' detected from URL: {final_url}"
+                self.log_scrape_success(doi, success_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="success",
+                    message=success_msg,
+                    data={
+                        "publisher": detected_publisher,
+                        "final_url": final_url
+                    },
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+            else:
+                error_msg = f"Could not detect publisher from URL: {final_url}"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={
+                        "final_url": final_url,
+                        "error_code": "publisher_not_detected"
+                    },
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+
+        except Exception as e:
+            error_msg = f"Error detecting publisher for DOI {doi}: {str(e)}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "publisher_detection_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+    
+    def _get_final_url(self, doi: str) -> Optional[str]:
+        """
+        Get the final URL after following DOI redirects.
+        
+        Args:
+            doi: The DOI to resolve
+            
+        Returns:
+            Final URL after redirects, or None if resolution fails
+        """
+        try:
+            doi_url = f"https://doi.org/{doi}"
+            headers = {
+                'User-Agent': 'SciPaperLoader/1.0 (Academic Research Tool)',
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
+            }
+            
+            # Make a HEAD request to get the final URL without downloading content
+            response = requests.head(
+                doi_url, 
+                headers=headers, 
+                timeout=15, 
+                allow_redirects=True
+            )
+            
+            # If HEAD is not allowed, try GET but with minimal content
+            if response.status_code == 405:  # Method Not Allowed
+                response = requests.get(
+                    doi_url,
+                    headers=headers,
+                    timeout=15,
+                    allow_redirects=True,
+                    stream=True  # Don't download the full content
+                )
+                response.close()  # Close connection after getting headers
+            
+            if response.status_code in [200, 302, 301]:
+                return response.url
+            else:
+                return None
+                
+        except Exception as e:
+            # Log error but don't raise - we'll handle this gracefully
+            return None
+    
+    def _detect_publisher_from_url(self, url: str) -> Optional[str]:
+        """
+        Detect publisher from URL using domain patterns.
+        
+        Args:
+            url: The URL to analyze
+            
+        Returns:
+            Publisher name if detected, None otherwise
+        """
+        if not url:
+            return None
+            
+        # Parse the URL to get the domain
+        parsed_url = urlparse(url)
+        domain = parsed_url.netloc.lower()
+        
+        # Remove 'www.' prefix if present
+        if domain.startswith('www.'):
+            domain = domain[4:]
+        
+        # Score each publisher based on URL pattern matches
+        publisher_scores = {}
+        
+        for publisher, patterns in self.PUBLISHER_URL_PATTERNS.items():
+            score = 0
+            for pattern in patterns:
+                if re.search(pattern, domain, re.IGNORECASE):
+                    score += 10  # Strong match for domain patterns
+                    
+                # Also check the full URL for path-based patterns
+                if re.search(pattern, url.lower(), re.IGNORECASE):
+                    score += 5
+            
+            if score > 0:
+                publisher_scores[publisher] = score
+        
+        # Return the publisher with the highest score
+        if publisher_scores:
+            best_publisher = max(publisher_scores.keys(), key=lambda x: publisher_scores[x])
+            
+            # Only return if we have a reasonable confidence (score > 5)
+            if publisher_scores[best_publisher] > 5:
+                return best_publisher
+        
+        return None
--- a/scipaperloader/scrapers/tasks.py
+++ b/scipaperloader/scrapers/tasks.py
@ -1,18 +1,17 @@
 """
-Hourly scheduler task that processes papers at random times within each hour.
+APScheduler-based task functions that replace Celery tasks for paper processing.
 """

 import random
 from datetime import datetime, timedelta
 from typing import Optional
-from celery import shared_task
+from flask import current_app

-from ..models import ScraperState, ActivityLog
+from ..models import ScraperState, ActivityLog, PaperMetadata
 from .manager import ScraperManager


-@shared_task(bind=True)
-def hourly_scraper_scheduler(self):
+def hourly_scraper_scheduler():
    """
    Hourly task that schedules paper processing at random times within the hour.
    
@ -29,8 +28,6 @@ def hourly_scraper_scheduler(self):
                status="info",
                description="Hourly scheduler skipped - scraper not active"
            )
-            # Disable retries for inactive scheduler
-            self.retry = False
            return {"status": "inactive", "papers_scheduled": 0}
        
        if scraper_state.is_paused:
@ -39,8 +36,6 @@ def hourly_scraper_scheduler(self):
                status="info", 
                description="Hourly scheduler skipped - scraper paused"
            )
-            # Disable retries for paused scheduler  
-            self.retry = False
            return {"status": "paused", "papers_scheduled": 0}
        
        # Initialize scraper manager
@ -57,6 +52,15 @@ def hourly_scraper_scheduler(self):
            )
            return {"status": "empty", "papers_scheduled": 0}
        
+        # Get scheduler from Flask app config
+        scheduler = current_app.config.get('SCHEDULER')
+        if not scheduler:
+            ActivityLog.log_error(
+                error_message="APScheduler not available for paper scheduling",
+                source="hourly_scraper_scheduler"
+            )
+            return {"status": "error", "message": "APScheduler not available"}
+        
        # Schedule papers at random times within the hour (0-3600 seconds)
        scheduled_count = 0
        current_time = datetime.now()
@ -64,24 +68,27 @@ def hourly_scraper_scheduler(self):
        for paper in papers:
            # Random delay between 1 second and 58 minutes
            delay_seconds = random.randint(1, 3480)  # Up to 58 minutes
+            run_date = current_time + timedelta(seconds=delay_seconds)
            
-            # Schedule the task using Celery's task registry to avoid circular import issues
-            from ..celery import celery
-            celery.send_task(
-                'scipaperloader.scrapers.tasks.process_single_paper',
+            # Schedule the task using APScheduler
+            job_id = f"paper_process_{paper.id}_{int(current_time.timestamp())}"
+            scheduler.add_job(
+                func=process_single_paper,
+                trigger='date',
+                run_date=run_date,
                args=[paper.id],
-                countdown=delay_seconds
+                id=job_id,
+                replace_existing=True
            )
            
            scheduled_count += 1
            
            # Log each scheduled paper
-            schedule_time = current_time + timedelta(seconds=delay_seconds)
            ActivityLog.log_scraper_activity(
                action="schedule_paper",
                paper_id=paper.id,
                status="info",
-                description=f"Scheduled paper {paper.doi} for processing at {schedule_time.strftime('%H:%M:%S')}"
+                description=f"Scheduled paper {paper.doi} for processing at {run_date.strftime('%H:%M:%S')}"
            )
        
        ActivityLog.log_scraper_activity(
@ -100,8 +107,7 @@ def hourly_scraper_scheduler(self):
        return {"status": "error", "message": str(e)}


-@shared_task(bind=True)
-def process_single_paper(self, paper_id: int):
+def process_single_paper(paper_id: int):
    """
    Process a single paper. This task is scheduled at random times within each hour.
    
@ -109,17 +115,17 @@ def process_single_paper(self, paper_id: int):
        paper_id: ID of the paper to process
    """
    try:
-        # Double-check scraper state before processing
+        # ENHANCED RACE CONDITION PROTECTION: Check scraper state multiple times
+        
+        # Initial check before any processing
        scraper_state = ScraperState.get_current_state()
        if not scraper_state.is_active:
            ActivityLog.log_scraper_activity(
                action="process_single_paper",
                paper_id=paper_id,
                status="skipped",
-                description="Skipped processing - scraper not active"
+                description="Task skipped - scraper not active (initial check)"
            )
-            # Use Celery's ignore to mark this task as completed without error
-            self.retry = False
            return {"status": "inactive", "paper_id": paper_id}
        
        if scraper_state.is_paused:
@ -127,18 +133,50 @@ def process_single_paper(self, paper_id: int):
                action="process_single_paper", 
                paper_id=paper_id,
                status="skipped",
-                description="Skipped processing - scraper paused"
+                description="Task skipped - scraper paused (initial check)"
+            )
+            return {"status": "paused", "paper_id": paper_id}
+        
+        # Brief pause to allow stop commands to take effect
+        import time
+        time.sleep(0.1)
+        
+        # Second check after brief delay
+        scraper_state = ScraperState.get_current_state()
+        if not scraper_state.is_active:
+            ActivityLog.log_scraper_activity(
+                action="process_single_paper",
+                paper_id=paper_id,
+                status="skipped",
+                description="Task skipped - scraper not active (secondary check)"
+            )
+            return {"status": "inactive", "paper_id": paper_id}
+        
+        if scraper_state.is_paused:
+            ActivityLog.log_scraper_activity(
+                action="process_single_paper", 
+                paper_id=paper_id,
+                status="skipped",
+                description="Task skipped - scraper paused (secondary check)"
            )
-            # Use Celery's ignore for paused state too
-            self.retry = False
            return {"status": "paused", "paper_id": paper_id}
        
        # Get the paper
-        from ..models import PaperMetadata
        paper = PaperMetadata.query.get(paper_id)
        if not paper:
            return {"status": "error", "message": f"Paper {paper_id} not found"}
        
+        # Third check before starting actual processing
+        scraper_state = ScraperState.get_current_state()
+        if not scraper_state.is_active:
+            ActivityLog.log_scraper_activity(
+                action="process_single_paper",
+                paper_id=paper_id,
+                status="skipped",
+                description="Task skipped - scraper not active (pre-processing check)"
+            )
+            return {"status": "inactive", "paper_id": paper_id}
+        
        # Process the paper using scraper manager
        manager = ScraperManager()
        result = manager.process_paper(paper)
@ -153,8 +191,48 @@ def process_single_paper(self, paper_id: int):
        return {"status": "error", "paper_id": paper_id, "message": str(e)}


-@shared_task(bind=True)
-def process_papers_batch(self, paper_ids: list, scraper_module: Optional[str] = None):
+def process_single_paper_manual(paper_id: int, scraper_name: Optional[str] = None):
+    """
+    Process a single paper manually, bypassing scraper state checks.
+    Used for manual paper processing from the UI.
+    
+    Args:
+        paper_id: ID of the paper to process
+        scraper_name: Optional specific scraper module to use
+    """
+    try:
+        # Get the paper without checking scraper state
+        paper = PaperMetadata.query.get(paper_id)
+        if not paper:
+            ActivityLog.log_error(
+                error_message=f"Paper {paper_id} not found for manual processing",
+                source="process_single_paper_manual"
+            )
+            return {"status": "error", "message": f"Paper {paper_id} not found"}
+        
+        # Process the paper using the manual processing method (bypasses state checks)
+        manager = ScraperManager()
+        result = manager.process_paper_manual(paper, scraper_name=scraper_name)
+        
+        ActivityLog.log_scraper_activity(
+            action="manual_process_complete",
+            paper_id=paper_id,
+            status=result["status"],
+            description=f"Manual processing completed for paper {paper.doi}" + 
+                       (f" using scraper '{scraper_name}'" if scraper_name else " using system default scraper")
+        )
+        
+        return result
+        
+    except Exception as e:
+        ActivityLog.log_error(
+            error_message=f"Error manually processing paper {paper_id}: {str(e)}",
+            source="process_single_paper_manual"
+        )
+        return {"status": "error", "paper_id": paper_id, "message": str(e)}
+
+
+def process_papers_batch(paper_ids: list, scraper_module: Optional[str] = None):
    """
    Process multiple papers in a batch for immediate processing.
    
@ -167,7 +245,6 @@ def process_papers_batch(self, paper_ids: list, scraper_module: Optional[str] =
        manager = ScraperManager()
        
        for paper_id in paper_ids:
-            from ..models import PaperMetadata
            paper = PaperMetadata.query.get(paper_id)
            if paper:
                result = manager.process_paper(paper)
--- a/scipaperloader/scrapers/text_extractor.py
+++ b/scipaperloader/scrapers/text_extractor.py
@ -0,0 +1,237 @@
+import time
+import os
+from datetime import datetime
+from typing import Optional
+from .base import BaseScraper, ScrapeResult
+from flask import current_app
+from ..models import PaperMetadata, ActivityLog, DownloadPathConfig
+from ..db import db
+from ..parsers.base_parser import BaseParser, ParseError
+from ..parsers.elsevier_parser import ElsevierParser
+from ..parsers.arxiv_parser import ArxivParser
+
+class Scraper(BaseScraper):
+    """Full text extraction scraper that uses publisher-specific parsers."""
+    
+    # This scraper processes papers with HTML content and outputs "TextExtracted"/"Failed"
+    INPUT_STATUSES = ["WebContentDownloaded", "PublisherDetected"]
+    OUTPUT_STATUS_SUCCESS = "TextExtracted"
+    OUTPUT_STATUS_FAILURE = "Failed"
+    OUTPUT_STATUS_PROCESSING = "ExtractingText"
+    
+    def __init__(self):
+        super().__init__()
+        # Registry of available parsers
+        self.parsers = [
+            ElsevierParser(),
+            ArxivParser(),
+            # Add more parsers here as you create them
+            # SpringerParser(),
+            # WileyParser(),
+            # IEEEParser(),
+        ]
+    
+    def scrape(self, doi: str) -> ScrapeResult:
+        """Extract full text using appropriate publisher parser."""
+        start_time = time.time()
+        
+        paper = PaperMetadata.query.filter_by(doi=doi).first()
+        if not paper:
+            return ScrapeResult(
+                status="error",
+                message=f"No paper found for DOI {doi}",
+                data=None,
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        # Log start of scraping
+        self.log_scrape_start(doi, paper.id)
+
+        # Update status to processing
+        paper.status = self.OUTPUT_STATUS_PROCESSING
+        db.session.commit()
+
+        # Check if HTML file exists
+        if not paper.file_path or not os.path.exists(paper.file_path):
+            error_msg = f"HTML file not found for DOI {doi}. Expected at: {paper.file_path}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "html_file_not_found"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        try:
+            # Read HTML content
+            with open(paper.file_path, 'r', encoding='utf-8') as f:
+                html_content = f.read()
+            
+            # Find appropriate parser
+            parser = self._select_parser(html_content)
+            
+            if not parser:
+                error_msg = f"No suitable parser found for DOI {doi}"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "no_parser_available"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+            
+            # Parse content
+            parsed_content = parser.parse(html_content, doi)
+            
+            # Validate parsed content
+            if not parser.validate_content(parsed_content):
+                error_msg = f"Parsed content validation failed for DOI {doi}"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "content_validation_failed"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+            
+            # Save extracted text to file
+            text_file_path = self._save_extracted_text(parsed_content, doi)
+            
+            # Update paper status to success
+            paper.status = self.OUTPUT_STATUS_SUCCESS
+            paper.error_msg = None
+            # You might want to add a text_file_path field to store the text file location
+            # paper.text_file_path = text_file_path
+            db.session.commit()
+            
+            success_msg = f"Successfully extracted text using {parser.get_name()} parser"
+            self.log_scrape_success(doi, success_msg, paper.id)
+            
+            return ScrapeResult(
+                status="success",
+                message=f"Successfully extracted full text for {doi}",
+                data={
+                    "text_file_path": text_file_path,
+                    "parser_used": parser.get_name(),
+                    "title": parsed_content.title,
+                    "word_count": len(parsed_content.full_text.split()),
+                    "has_abstract": bool(parsed_content.abstract),
+                    "has_sections": bool(parsed_content.sections),
+                    "author_count": len(parsed_content.authors) if parsed_content.authors else 0,
+                    "keyword_count": len(parsed_content.keywords) if parsed_content.keywords else 0,
+                    "reference_count": len(parsed_content.references) if parsed_content.references else 0
+                },
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        except ParseError as e:
+            error_msg = f"Parser error for DOI {doi}: {str(e)}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "parser_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+            
+        except Exception as e:
+            error_msg = f"Unexpected error extracting text for DOI {doi}: {str(e)}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "extraction_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+    
+    def _select_parser(self, html_content: str) -> Optional[BaseParser]:
+        """
+        Select the most appropriate parser for the HTML content.
+        
+        Args:
+            html_content: The HTML content to analyze
+            
+        Returns:
+            The best parser for this content, or None if no parser can handle it
+        """
+        for parser in self.parsers:
+            if parser.can_parse(html_content):
+                return parser
+        
+        return None
+    
+    def _save_extracted_text(self, parsed_content, doi: str) -> str:
+        """
+        Save extracted text to a file.
+        
+        Args:
+            parsed_content: The parsed content object
+            doi: The DOI of the paper
+            
+        Returns:
+            Path to the saved text file
+        """
+        download_path = DownloadPathConfig.get_path()
+        text_file_name = f"{doi.replace('/', '_')}_fulltext.txt"
+        text_file_path = os.path.join(download_path, text_file_name)
+        
+        with open(text_file_path, 'w', encoding='utf-8') as f:
+            # Write structured content
+            f.write(f"DOI: {parsed_content.doi or doi}\n")
+            f.write(f"Title: {parsed_content.title or 'Unknown'}\n")
+            f.write(f"Journal: {parsed_content.journal or 'Unknown'}\n")
+            f.write(f"Publication Date: {parsed_content.publication_date or 'Unknown'}\n")
+            
+            if parsed_content.authors:
+                f.write(f"Authors: {', '.join(parsed_content.authors)}\n")
+            
+            if parsed_content.keywords:
+                f.write(f"Keywords: {', '.join(parsed_content.keywords)}\n")
+            
+            f.write(f"Extracted: {datetime.utcnow().isoformat()}\n")
+            f.write("=" * 80 + "\n\n")
+            
+            # Write full text
+            f.write(parsed_content.full_text)
+            
+            # Optionally write references at the end
+            if parsed_content.references:
+                f.write("\n\n" + "=" * 80 + "\n")
+                f.write("REFERENCES\n")
+                f.write("=" * 80 + "\n")
+                for i, ref in enumerate(parsed_content.references, 1):
+                    f.write(f"{i}. {ref}\n")
+        
+        return text_file_path
--- a/scipaperloader/scrapers/web_fetcher.py
+++ b/scipaperloader/scrapers/web_fetcher.py
@ -0,0 +1,201 @@
+import time
+import os
+import requests
+from urllib.parse import urlparse
+from datetime import datetime
+from .base import BaseScraper, ScrapeResult
+from flask import current_app
+from ..models import PaperMetadata, ActivityLog, DownloadPathConfig
+from ..db import db
+
+class Scraper(BaseScraper):
+    """Web fetcher scraper that downloads HTML content from DOI URLs."""
+    
+    # This scraper processes "New" papers and outputs "WebContentDownloaded"/"Failed"
+    INPUT_STATUSES = ["New"]
+    OUTPUT_STATUS_SUCCESS = "WebContentDownloaded"
+    OUTPUT_STATUS_FAILURE = "Failed"
+    OUTPUT_STATUS_PROCESSING = "FetchingWebContent"
+    
+    def scrape(self, doi: str) -> ScrapeResult:
+        """Fetch HTML content from DOI and save to download path."""
+        start_time = time.time()
+        
+        paper = PaperMetadata.query.filter_by(doi=doi).first()
+        if not paper:
+            return ScrapeResult(
+                status="error",
+                message=f"No paper found for DOI {doi}",
+                data=None,
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        # Log start of scraping
+        self.log_scrape_start(doi, paper.id)
+
+        # Update status to processing
+        paper.status = self.OUTPUT_STATUS_PROCESSING
+        db.session.commit()
+
+        # Prepare file paths
+        download_path = DownloadPathConfig.get_path()
+        file_name = f"{doi.replace('/', '_')}.html"
+        file_path = os.path.join(download_path, file_name)
+        
+        # Check/create download directory
+        if not os.path.exists(download_path):
+            try:
+                os.makedirs(download_path, exist_ok=True)
+            except OSError as e:
+                error_msg = f"Failed to create download directory: {str(e)}"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "path_creation_error"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+        
+        # Check path permissions
+        if not os.access(download_path, os.W_OK):
+            error_msg = f"Download path '{download_path}' is not writable"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "path_write_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        try:
+            # Fetch HTML from DOI
+            doi_url = f"https://doi.org/{doi}"
+            headers = {
+                'User-Agent': 'SciPaperLoader/1.0 (Academic Research Tool)',
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+                'Accept-Language': 'en-US,en;q=0.5',
+                'Accept-Encoding': 'gzip, deflate',
+                'Connection': 'keep-alive',
+                'Upgrade-Insecure-Requests': '1'
+            }
+            
+            response = requests.get(
+                doi_url, 
+                headers=headers, 
+                timeout=30, 
+                allow_redirects=True,
+                verify=True
+            )
+            
+            # Check for invalid DOI (404) or other HTTP errors
+            if response.status_code == 404:
+                error_msg = f"Invalid DOI: {doi} not found (404)"
+                paper.status = self.OUTPUT_STATUS_FAILURE
+                paper.error_msg = error_msg
+                db.session.commit()
+                
+                self.log_scrape_failure(doi, error_msg, paper.id)
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "invalid_doi"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
+            
+            # Check for other HTTP errors
+            response.raise_for_status()
+            
+            # Save HTML content
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.write(response.text)
+            
+            # Extract final URL after redirects (for publisher detection)
+            final_url = response.url
+            
+            # Update paper status to success
+            paper.status = self.OUTPUT_STATUS_SUCCESS
+            paper.file_path = file_path
+            paper.error_msg = None
+            db.session.commit()
+            
+            # Log success
+            success_msg = f"Successfully fetched HTML content for {doi} from {final_url}"
+            self.log_scrape_success(doi, success_msg, paper.id)
+            
+            return ScrapeResult(
+                status="success",
+                message=f"Successfully fetched HTML for {doi}",
+                data={
+                    "file_path": file_path,
+                    "final_url": final_url,
+                    "content_length": len(response.text),
+                    "content_type": response.headers.get('content-type', 'unknown'),
+                    "title": paper.title,
+                    "domain": urlparse(final_url).netloc if final_url else None
+                },
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+
+        except requests.exceptions.HTTPError as e:
+            error_msg = f"HTTP error fetching {doi_url}: {e.response.status_code} - {e}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "http_error", "status_code": e.response.status_code},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+            
+        except requests.exceptions.RequestException as e:
+            error_msg = f"Network error fetching {doi_url}: {str(e)}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "network_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
+            
+        except Exception as e:
+            error_msg = f"Failed to save HTML file: {str(e)}"
+            paper.status = self.OUTPUT_STATUS_FAILURE
+            paper.error_msg = error_msg
+            db.session.commit()
+            
+            self.log_scrape_failure(doi, error_msg, paper.id)
+            
+            return ScrapeResult(
+                status="error",
+                message=error_msg,
+                data={"error_code": "file_creation_error"},
+                duration=time.time() - start_time,
+                timestamp=datetime.utcnow()
+            )
--- a/scipaperloader/static/js/README.md
+++ b/scipaperloader/static/js/README.md
@ -0,0 +1,384 @@
+# JavaScript Modularization Documentation
+
+## Overview
+
+The JavaScript code in the SciPaperLoader application has been modularized into reusable components to improve maintainability, reduce code duplication, and enable easier testing and updates.
+
+## Modularization Task Completed
+
+### Problem Statement
+The original codebase had ~800+ lines of inline JavaScript scattered across multiple Jinja templates with several critical issues:
+- **Code Duplication**: Similar functionality replicated across templates
+- **Maintenance Difficulty**: Changes required editing multiple template files
+- **Linter Issues**: Jinja template syntax mixed with JavaScript caused linting errors
+- **Testing Challenges**: Inline code was difficult to unit test
+- **Poor Separation of Concerns**: Template logic mixed with application logic
+
+### Solution Implemented
+Successfully transformed the codebase by:
+
+1. **Extracted 10 Modular JavaScript Files** (~800+ lines of code moved from templates)
+2. **Eliminated Code Duplication** by creating reusable components
+3. **Fixed Linter Compatibility** by separating template syntax from JavaScript logic
+4. **Implemented Clean Variable Passing** using JSON script tags instead of direct Jinja embedding
+5. **Created Class-Based Architecture** with proper inheritance and composition patterns
+6. **Established Inter-Component Communication** through callback systems
+7. **Added Comprehensive Error Handling** and loading states throughout
+
+### Key Achievements
+- ✅ **5 templates modularized**: `scraper.html.jinja`, `papers.html.jinja`, `upload.html.jinja`, `logger.html.jinja`, `config/schedule.html.jinja`
+- ✅ **10 JavaScript modules created**: Covering all functionality from utilities to dashboard coordination
+- ✅ **Zero functionality loss**: All existing features preserved during modularization
+- ✅ **Improved maintainability**: Changes now require editing single module files
+- ✅ **Enhanced testability**: Individual modules can be unit tested
+- ✅ **Clean variable handling**: Jinja variables passed as JSON configuration instead of inline embedding
+
+### Before vs After Example
+**Before (inline in template)**:
+```html
+<script>
+var maxVolume = {{ max_volume }};  // Linter error
+$('#start-scraper').click(function() {
+    // 50+ lines of mixed template/JS code
+});
+</script>
+```
+
+**After (modular)**:
+```html
+<script type="application/json" id="config-data">
+{"maxVolume": {{ max_volume|tojson }}}
+</script>
+<script src="{{ url_for('static', filename='js/scraper-control.js') }}"></script>
+<script>
+const config = JSON.parse(document.getElementById('config-data').textContent);
+new ScraperControl(config).init();
+</script>
+```
+
+## Modular JavaScript Files
+
+### 1. `/static/js/common.js`
+**Purpose**: Common utilities used across the application
+
+**Key Functions**:
+- `showFlashMessage(message, type)` - Display flash messages to users
+- `createStatusBadge(status)` - Generate status badge HTML
+- `formatTimestamp(timestamp)` - Format timestamps for display
+- `truncateText(text, maxLength)` - Truncate text with ellipsis
+- `toggleButtonLoading(button, loading, loadingText)` - Handle button loading states
+- `apiRequest(url, options)` - Generic API request wrapper
+
+**Used by**: All templates that need basic utilities
+
+### 2. `/static/js/modal-handler.js`
+**Purpose**: Handle modal dialogs with dynamic content loading
+
+**Key Features**:
+- AJAX content loading
+- Error handling
+- Automatic click handler setup
+- Bootstrap modal integration
+
+**Used by**: 
+- `papers.html.jinja` (paper details modal)
+- `logger.html.jinja` (log details modal)
+
+### 3. `/static/js/form-handler.js`
+**Purpose**: Handle form submissions with progress tracking
+
+**Key Features**:
+- Progress modal display
+- Task status polling
+- Error handling
+- Customizable callbacks
+
+**Used by**: 
+- `upload.html.jinja` (CSV upload form)
+
+### 4. `/static/js/chart.js`
+**Purpose**: Handle Chart.js activity visualization
+
+**Key Features**:
+- Chart initialization and rendering
+- Data loading from API
+- Error handling for missing Chart.js
+
+**Used by**: 
+- `scraper.html.jinja` (activity charts)
+
+### 5. `/static/js/scraper-control.js`
+**Purpose**: Handle scraper control operations (start/stop/pause/reset)
+
+**Key Features**:
+- Status polling
+- Volume configuration
+- Callback system for refreshing other components
+
+**Used by**: 
+- `scraper.html.jinja`
+
+### 6. `/static/js/paper-processor.js`
+**Purpose**: Handle paper search and processing functionality
+
+**Key Features**:
+- Paper search
+- Single paper processing
+- Status polling
+- Scraper selection
+
+**Used by**: 
+- `scraper.html.jinja`
+
+### 7. `/static/js/activity-monitor.js`
+**Purpose**: Handle activity log display and real-time notifications
+
+**Key Features**:
+- Activity log loading
+- Real-time updates
+- Notification management
+
+**Used by**: 
+- `scraper.html.jinja`
+
+### 8. `/static/js/scraper-dashboard.js`
+**Purpose**: Coordinate all scraper dashboard components
+
+**Key Features**:
+- Component initialization
+- Inter-component communication
+- Configuration management
+
+**Used by**: 
+- `scraper.html.jinja`
+
+### 9. `/static/js/config-handler.js`
+**Purpose**: Handle configuration forms and Alpine.js integration
+
+**Key Features**:
+- Configuration API calls
+- Alpine.js data objects
+- Schedule management
+- Volume updates
+
+**Used by**: 
+- `config/schedule.html.jinja`
+
+## Template Updates
+
+### Templates Using Modular JavaScript
+
+1. **scraper.html.jinja**
+   - Uses all scraper-related modules
+   - Passes Jinja variables as configuration parameters
+   - Initializes dashboard with `initScraperDashboard(config)`
+
+2. **papers.html.jinja**
+   - Uses `modal-handler.js` for paper detail modals
+   - Simplified from custom modal code to single line initialization
+
+3. **upload.html.jinja**
+   - Uses `form-handler.js` for upload progress tracking
+   - Custom result display function
+   - Automatic task status polling
+
+4. **logger.html.jinja**
+   - Uses `modal-handler.js` for log detail modals
+   - Custom URL construction for log endpoints
+
+5. **config/schedule.html.jinja**
+   - Uses `config-handler.js` for Alpine.js integration
+   - Modular schedule management functions
+
+## Benefits of Modularization
+
+### 1. **Reusability**
+- Modal functionality shared between papers and logger templates
+- Common utilities used across all templates
+- Form handling can be reused for other forms
+
+### 2. **Maintainability**
+- Single place to update common functionality
+- Clear separation of concerns
+- Easier debugging and testing
+
+### 3. **Parameter Passing**
+- Jinja variables passed as configuration objects
+- No more hardcoded values in JavaScript
+- Environment-specific settings easily configurable
+
+### 4. **Extensibility**
+- Easy to add new functionality to existing modules
+- New templates can easily use existing modules
+- Plugin-like architecture for components
+
+## Usage Examples
+
+### Basic Modal Usage
+```javascript
+const modal = new ModalHandler('modalId', 'contentElementId');
+modal.setupClickHandlers('.clickable-items');
+```
+
+### Form with Progress Tracking
+```javascript
+const formHandler = new FormHandler('formId', {
+    onSuccess: (result) => console.log('Success:', result),
+    onError: (error) => console.log('Error:', error)
+});
+```
+
+### Configuration Management
+```javascript
+// In Alpine.js template
+x-data="configHandler.createScheduleManager(initialData, volume)"
+```
+
+## Migration Notes
+
+### Old vs New Approach
+
+**Before**: Inline JavaScript in each template
+```html
+<script>
+document.addEventListener('DOMContentLoaded', function() {
+    // Lots of inline JavaScript code
+});
+</script>
+```
+
+**After**: Modular imports with configuration
+```html
+<script src="{{ url_for('static', filename='js/common.js') }}"></script>
+<script src="{{ url_for('static', filename='js/modal-handler.js') }}"></script>
+<script>
+const modal = new ModalHandler('modalId', 'contentId');
+modal.setupClickHandlers('.links');
+</script>
+```
+
+### Jinja Variable Handling
+
+To properly separate Jinja template variables from JavaScript code and avoid linting issues, we use a clean JSON configuration approach:
+
+**Before**: Variables embedded directly in JavaScript (causes linting issues)
+```javascript
+if (volume > {{ max_volume }}) {
+    // Error handling - JSLint will complain about {{ }}
+}
+```
+
+**After**: Clean separation using JSON script tags
+```html
+<!-- Jinja variables in JSON format -->
+<script type="application/json" id="config-data">
+{
+    "maxVolume": {{ max_volume|tojson }},
+    "currentVolume": {{ volume|tojson }},
+    "apiUrl": {{ url_for('api.endpoint')|tojson }},
+    "csrfToken": {{ csrf_token()|tojson }}
+}
+</script>
+
+<!-- Clean JavaScript that reads the configuration -->
+<script>
+document.addEventListener('DOMContentLoaded', function() {
+    const config = JSON.parse(document.getElementById('config-data').textContent);
+    const handler = new VolumeHandler(config);
+});
+</script>
+```
+
+**Benefits of this approach**:
+- **Linter-friendly**: No template syntax in JavaScript files
+- **Type-safe**: JSON ensures proper data types
+- **Maintainable**: Clear separation of concerns
+- **Secure**: Automatic escaping with `|tojson` filter
+- **Debuggable**: Easy to inspect configuration in DevTools
+
+**Real-world example from scraper.html.jinja**:
+```html
+<script type="application/json" id="scraper-config">
+{
+    "statusUrl": {{ url_for('api.scraper_status')|tojson }},
+    "startUrl": {{ url_for('api.start_scraper')|tojson }},
+    "volume": {{ volume|tojson }},
+    "scraperType": {{ scraper_type|tojson }},
+    "csrfToken": {{ csrf_token()|tojson }}
+}
+</script>
+
+<script>
+const config = JSON.parse(document.getElementById('scraper-config').textContent);
+initScraperDashboard(config);
+</script>
+```
+
+## Future Improvements
+
+### Potential Enhancements
+1. **Bundle Management**: Consider using webpack or similar for production builds
+2. **Unit Testing**: Add comprehensive test suite for individual modules
+3. **JSDoc Comments**: Add detailed documentation for better IDE support
+4. **Centralized Error Reporting**: Implement global error handling system
+5. **Performance Optimization**: Implement lazy loading for non-critical modules
+6. **TypeScript Migration**: Consider migrating to TypeScript for better type safety
+
+### Adding New Modules
+When creating new JavaScript modules:
+1. Follow the established class-based pattern
+2. Include proper error handling
+3. Use the configuration pattern for Jinja variables
+4. Add documentation to this README
+5. Update templates to use the new module
+
+## Testing
+
+A test file `test_js_modularization.py` has been created to verify the modularization. To run comprehensive testing:
+
+```bash
+python test_js_modularization.py
+```
+
+This will verify:
+- All JavaScript files exist and are properly formatted
+- Templates correctly reference the modular files
+- Configuration patterns are properly implemented
+- No inline JavaScript remains in templates
+
+## Maintenance
+
+### When Making Changes
+1. **Update Single Module**: Changes to functionality only require editing one file
+2. **Test Affected Templates**: Ensure all templates using the module still work
+3. **Update Documentation**: Keep this README current with any changes
+4. **Consider Dependencies**: Check if changes affect other modules
+
+### File Organization
+```
+/static/js/
+├── README.md              # This documentation
+├── common.js              # Shared utilities
+├── modal-handler.js       # Modal functionality
+├── form-handler.js        # Form processing
+├── chart.js               # Chart visualization
+├── scraper-control.js     # Scraper operations
+├── paper-processor.js     # Paper management
+├── activity-monitor.js    # Activity tracking
+├── scraper-dashboard.js   # Dashboard coordination
+├── config-handler.js      # Configuration management
+└── table-handler.js       # Table utilities
+```
+
+## Migration Summary
+
+The modularization successfully transformed **~800+ lines of inline JavaScript** from templates into a maintainable, reusable module system. This improvement provides:
+
+- **Enhanced maintainability** through single-responsibility modules
+- **Reduced code duplication** via shared utility functions  
+- **Improved linter compatibility** by separating template and JavaScript concerns
+- **Better testability** with isolated, unit-testable modules
+- **Cleaner templates** with minimal, configuration-only JavaScript
+- **Easier debugging** with clearly separated concerns and proper error handling
+
+All existing functionality has been preserved while significantly improving the codebase architecture and developer experience.
--- a/scipaperloader/static/js/activity-monitor.js
+++ b/scipaperloader/static/js/activity-monitor.js
@ -0,0 +1,328 @@
+/**
+ * Activity monitoring and display functionality
+ */
+
+class ActivityMonitor {
+  constructor() {
+    this.activityLog = document.getElementById("activityLog");
+    this.notificationsToggle = document.getElementById("notificationsToggle");
+    this.notificationsEnabled = true;
+    this.lastPaperTimestamp = new Date().toISOString();
+
+    // Pagination state
+    this.currentPage = 1;
+    this.perPage = 20;
+    this.statusFilter = "";
+    this.totalPages = 1;
+    this.totalEntries = 0;
+
+    // Pagination elements
+    this.paginationContainer = document.getElementById("activityPagination");
+    this.paginationInfo = document.getElementById("activityPaginationInfo");
+    this.prevPageBtn = document.getElementById("activityPrevPage");
+    this.nextPageBtn = document.getElementById("activityNextPage");
+    this.currentPageSpan = document.getElementById("activityCurrentPage");
+    this.pageSizeSelect = document.getElementById("activityPageSize");
+    this.statusFilterSelect = document.getElementById("activityStatusFilter");
+
+    this.initEventListeners();
+    this.setupWebSocket();
+  }
+
+  /**
+   * Initialize event listeners
+   */
+  initEventListeners() {
+    if (this.notificationsToggle) {
+      this.notificationsToggle.addEventListener("click", () => {
+        this.notificationsEnabled = this.notificationsToggle.checked;
+      });
+    }
+
+    // Time range buttons
+    document.querySelectorAll(".time-range-btn").forEach((btn) => {
+      btn.addEventListener("click", () => {
+        document
+          .querySelectorAll(".time-range-btn")
+          .forEach((b) => b.classList.remove("active"));
+        btn.classList.add("active");
+        const currentTimeRange = parseInt(btn.dataset.hours);
+
+        // Trigger chart refresh if callback is provided
+        if (this.onChartRefresh) {
+          this.onChartRefresh(currentTimeRange);
+        }
+      });
+    });
+
+    // Pagination event listeners
+    if (this.prevPageBtn) {
+      this.prevPageBtn.addEventListener("click", (e) => {
+        e.preventDefault();
+        if (this.currentPage > 1) {
+          this.currentPage--;
+          this.loadRecentActivity();
+        }
+      });
+    }
+
+    if (this.nextPageBtn) {
+      this.nextPageBtn.addEventListener("click", (e) => {
+        e.preventDefault();
+        if (this.currentPage < this.totalPages) {
+          this.currentPage++;
+          this.loadRecentActivity();
+        }
+      });
+    }
+
+    // Page size change
+    if (this.pageSizeSelect) {
+      this.pageSizeSelect.addEventListener("change", () => {
+        this.perPage = parseInt(this.pageSizeSelect.value);
+        this.currentPage = 1; // Reset to first page
+        this.loadRecentActivity();
+      });
+    }
+
+    // Status filter change
+    if (this.statusFilterSelect) {
+      this.statusFilterSelect.addEventListener("change", () => {
+        this.statusFilter = this.statusFilterSelect.value;
+        this.currentPage = 1; // Reset to first page
+        this.loadRecentActivity();
+      });
+    }
+  }
+
+  /**
+   * Load and render recent activity
+   */
+  async loadRecentActivity() {
+    if (!this.activityLog) return;
+
+    try {
+      // Build query parameters for pagination
+      const params = new URLSearchParams({
+        page: this.currentPage,
+        per_page: this.perPage,
+      });
+
+      // Add multiple category parameters
+      params.append("category", "scraper_activity");
+      params.append("category", "scraper_command");
+
+      if (this.statusFilter) {
+        params.append("status", this.statusFilter);
+      }
+
+      const data = await apiRequest(`/logs/api?${params.toString()}`);
+
+      if (data.success) {
+        this.renderActivityLog(data.logs);
+        this.updatePagination(data.pagination);
+        console.log("Activity log refreshed with latest data");
+      } else {
+        throw new Error(data.message || "Failed to load logs");
+      }
+    } catch (error) {
+      console.error("Failed to load activity logs:", error);
+      // If the API endpoint doesn't exist, just show a message
+      this.activityLog.innerHTML =
+        '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>';
+      this.hidePagination();
+    }
+  }
+
+  /**
+   * Render activity log data
+   * @param {Array} logs - Array of log entries
+   */
+  renderActivityLog(logs) {
+    if (!this.activityLog) return;
+
+    this.activityLog.innerHTML = "";
+
+    if (!logs || logs.length === 0) {
+      this.activityLog.innerHTML =
+        '<tr><td colspan="4" class="text-center">No recent activity</td></tr>';
+      return;
+    }
+
+    logs.forEach((log) => {
+      const row = document.createElement("tr");
+
+      // Format timestamp
+      const timeStr = formatTimestamp(log.timestamp);
+
+      // Create status badge
+      const statusBadge = createStatusBadge(log.status);
+
+      row.innerHTML = `
+                <td>${timeStr}</td>
+                <td>${log.action}</td>
+                <td>${statusBadge}</td>
+                <td>${log.description || ""}</td>
+            `;
+
+      this.activityLog.appendChild(row);
+    });
+  }
+
+  /**
+   * Update pagination controls based on API response
+   * @param {Object} pagination - Pagination data from API
+   */
+  updatePagination(pagination) {
+    if (!pagination || !this.paginationContainer) return;
+
+    this.currentPage = pagination.page;
+    this.totalPages = pagination.pages;
+    this.totalEntries = pagination.total;
+
+    // Show pagination container
+    this.paginationContainer.classList.remove("d-none");
+
+    // Update pagination info
+    const startEntry = (pagination.page - 1) * pagination.per_page + 1;
+    const endEntry = Math.min(
+      pagination.page * pagination.per_page,
+      pagination.total
+    );
+
+    if (this.paginationInfo) {
+      this.paginationInfo.textContent = `Showing ${startEntry} - ${endEntry} of ${pagination.total} entries`;
+    }
+
+    // Update current page display
+    if (this.currentPageSpan) {
+      this.currentPageSpan.textContent = `${pagination.page} of ${pagination.pages}`;
+    }
+
+    // Update previous button
+    if (this.prevPageBtn) {
+      if (pagination.has_prev) {
+        this.prevPageBtn.classList.remove("disabled");
+        this.prevPageBtn.querySelector("a").removeAttribute("tabindex");
+        this.prevPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "false");
+      } else {
+        this.prevPageBtn.classList.add("disabled");
+        this.prevPageBtn.querySelector("a").setAttribute("tabindex", "-1");
+        this.prevPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "true");
+      }
+    }
+
+    // Update next button
+    if (this.nextPageBtn) {
+      if (pagination.has_next) {
+        this.nextPageBtn.classList.remove("disabled");
+        this.nextPageBtn.querySelector("a").removeAttribute("tabindex");
+        this.nextPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "false");
+      } else {
+        this.nextPageBtn.classList.add("disabled");
+        this.nextPageBtn.querySelector("a").setAttribute("tabindex", "-1");
+        this.nextPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "true");
+      }
+    }
+  }
+
+  /**
+   * Hide pagination controls when not needed
+   */
+  hidePagination() {
+    if (this.paginationContainer) {
+      this.paginationContainer.classList.add("d-none");
+    }
+  }
+
+  /**
+   * Setup WebSocket for real-time notifications
+   */
+  setupWebSocket() {
+    // If WebSocket is available, implement it here
+    // For now we'll poll the server periodically for new papers
+    setInterval(() => this.checkForNewPapers(), 10000); // Check every 10 seconds
+  }
+
+  /**
+   * Check for new papers and show notifications
+   */
+  async checkForNewPapers() {
+    if (!this.notificationsEnabled) return;
+
+    try {
+      // Use the API endpoint for checking new papers, with limit for efficiency
+      const data = await apiRequest(
+        `/logs/api?category=scraper_activity&category=scraper_command&action=scrape_paper&after=${this.lastPaperTimestamp}&limit=5`
+      );
+
+      if (data && data.length > 0) {
+        // Update the timestamp
+        this.lastPaperTimestamp = new Date().toISOString();
+
+        // Show notifications for new papers
+        data.forEach((log) => {
+          const extraData = log.extra_data ? JSON.parse(log.extra_data) : {};
+          if (log.status === "success") {
+            showFlashMessage(
+              `New paper scraped: ${extraData.title || "Unknown title"}`,
+              "success"
+            );
+          } else if (log.status === "error") {
+            showFlashMessage(
+              `Failed to scrape paper: ${log.description}`,
+              "error"
+            );
+          }
+        });
+
+        // Refresh the activity chart and log
+        if (this.onChartRefresh) {
+          this.onChartRefresh();
+        }
+        // Only reload if we're on page 1 to avoid disrupting user navigation
+        if (this.currentPage === 1) {
+          this.loadRecentActivity();
+        }
+      }
+    } catch (error) {
+      // If the API endpoint doesn't exist, do nothing
+      console.debug("Activity polling failed (this may be expected):", error);
+    }
+  }
+
+  /**
+   * Set callback for chart refresh
+   */
+  setChartRefreshCallback(callback) {
+    this.onChartRefresh = callback;
+  }
+
+  /**
+   * Refresh activity log manually (useful for external triggers)
+   */
+  refresh() {
+    this.loadRecentActivity();
+  }
+
+  /**
+   * Reset pagination to first page
+   */
+  resetToFirstPage() {
+    this.currentPage = 1;
+    this.loadRecentActivity();
+  }
+}
+
+// Export for use in other modules
+if (typeof window !== "undefined") {
+  window.ActivityMonitor = ActivityMonitor;
+}
--- a/scipaperloader/static/js/chart.js
+++ b/scipaperloader/static/js/chart.js
@ -0,0 +1,436 @@
+/**
+ * Chart utilities for activity visualization
+ */
+
+/**
+ * Chart utilities for activity visualization
+ */
+
+class ActivityChart {
+  constructor(canvasId) {
+    this.canvasId = canvasId;
+    this.chart = null;
+    this.scraperChart = null;
+    this.initChart();
+  }
+
+  initChart() {
+    // Check if Chart.js is available
+    if (typeof Chart === "undefined") {
+      console.error("Chart.js is not loaded");
+      return;
+    }
+
+    const chartElement = document.getElementById(this.canvasId);
+    if (!chartElement) {
+      console.error(
+        `Chart canvas element with id "${this.canvasId}" not found`
+      );
+      return;
+    }
+
+    // Set canvas height directly
+    chartElement.style.height = "300px";
+    chartElement.height = 300;
+
+    this.ctx = chartElement.getContext("2d");
+
+    // Initialize scraper activity chart
+    this.initScraperChart();
+  }
+
+  initScraperChart() {
+    const scraperChartElement = document.getElementById("scraperActivityChart");
+    if (!scraperChartElement) {
+      console.warn("Scraper activity chart element not found");
+      return;
+    }
+
+    this.scraperCtx = scraperChartElement.getContext("2d");
+  }
+
+  /**
+   * Render the activity chart with provided data
+   * @param {Object} data - Chart data object with hourly_stats and scraper_timeline
+   */
+  render(data) {
+    if (!this.ctx) {
+      console.error("Chart context not available");
+      return;
+    }
+
+    console.log("Render received data:", data);
+
+    // Handle both old and new data formats for compatibility
+    const hourlyStats = data.hourly_stats || data;
+    const scraperTimeline = data.scraper_timeline || [];
+
+    console.log("Extracted hourlyStats:", hourlyStats);
+    console.log("Extracted scraperTimeline:", scraperTimeline);
+
+    // Extract the data for the main chart (papers only)
+    const labels = hourlyStats.map((item) => item.hour);
+    const successData = hourlyStats.map((item) => item.success);
+    const errorData = hourlyStats.map((item) => item.error);
+    const pendingData = hourlyStats.map((item) => item.pending);
+
+    // Destroy existing charts if they exist
+    if (this.chart) {
+      this.chart.destroy();
+    }
+    if (this.scraperChart) {
+      this.scraperChart.destroy();
+    }
+
+    // Render main chart (papers only)
+    this.chart = new Chart(this.ctx, {
+      type: "bar",
+      data: {
+        labels: labels,
+        datasets: [
+          {
+            label: "Success",
+            data: successData,
+            backgroundColor: "#28a745",
+            stack: "Papers",
+          },
+          {
+            label: "Error",
+            data: errorData,
+            backgroundColor: "#dc3545",
+            stack: "Papers",
+          },
+          {
+            label: "Pending",
+            data: pendingData,
+            backgroundColor: "#ffc107",
+            stack: "Papers",
+          },
+        ],
+      },
+      options: {
+        responsive: true,
+        maintainAspectRatio: true,
+        aspectRatio: 2.5,
+        layout: {
+          padding: {
+            top: 20,
+            bottom: 20,
+          },
+        },
+        plugins: {
+          legend: {
+            position: "top",
+          },
+          tooltip: {
+            mode: "index",
+            intersect: false,
+          },
+        },
+        scales: {
+          x: {
+            stacked: true,
+            title: {
+              display: true,
+              text: "Time (Last Hours)",
+            },
+          },
+          y: {
+            type: "linear",
+            display: true,
+            stacked: true,
+            beginAtZero: true,
+            title: {
+              display: true,
+              text: "Papers Scraped",
+            },
+          },
+        },
+      },
+    });
+
+    // Render scraper activity timeline chart with precise timing
+    this.renderScraperChart(labels, scraperTimeline, hourlyStats.length);
+
+    // Show simple legend for scraper activity
+    this.showScraperStateLegend();
+  }
+
+  /**
+   * Render the separate scraper activity timeline chart with precise timestamps
+   * @param {Array} hourLabels - Hour labels for main chart
+   * @param {Array} scraperTimeline - Timeline of scraper state changes
+   * @param {number} totalHours - Total hours range being displayed
+   */
+  renderScraperChart(hourLabels, scraperTimeline, totalHours) {
+    if (!this.scraperCtx) {
+      console.warn("Scraper chart context not available");
+      return;
+    }
+
+    let timelineData = [];
+
+    if (scraperTimeline && scraperTimeline.length > 0) {
+      console.log("Original scraper timeline:", scraperTimeline);
+
+      // Filter out duplicate events with the same action, status, and hours_ago
+      const uniqueTimeline = scraperTimeline.filter((event, index, self) => {
+        return (
+          index ===
+          self.findIndex(
+            (e) =>
+              e.action === event.action &&
+              e.status === event.status &&
+              e.hours_ago === event.hours_ago
+          )
+        );
+      });
+
+      console.log("Filtered unique timeline:", uniqueTimeline);
+
+      // Sort timeline by hours_ago (oldest first = highest hours_ago first)
+      const sortedTimeline = [...uniqueTimeline].sort(
+        (a, b) => b.hours_ago - a.hours_ago
+      );
+
+      console.log("Sorted scraper timeline:", sortedTimeline);
+
+      // Create simple timeline with relative positions
+      let currentState = 0;
+
+      // Use hours_ago directly as x-coordinates (inverted so recent is on right)
+      for (let i = 0; i < sortedTimeline.length; i++) {
+        const event = sortedTimeline[i];
+
+        console.log(`Processing event ${i}:`, event);
+
+        // Set the new state based on the action
+        if (event.action === "start_scraper" && event.status === "success") {
+          currentState = 1;
+        } else if (
+          event.action === "stop_scraper" &&
+          event.status === "success"
+        ) {
+          currentState = 0;
+        } else if (
+          event.action === "reset_scraper" &&
+          event.status === "success"
+        ) {
+          currentState = 0;
+        } else if (
+          event.action === "pause_scraper" &&
+          event.status === "success"
+        ) {
+          currentState = 0; // Treat pause as inactive
+        }
+
+        console.log(
+          `New state for ${event.action}: ${currentState} at ${event.hours_ago}h ago`
+        );
+
+        // Use negative hours_ago so recent events are on the right
+        timelineData.push({
+          x: -event.hours_ago,
+          y: currentState,
+        });
+      }
+
+      // Add current time point
+      timelineData.push({
+        x: 0, // Current time
+        y: currentState,
+      });
+
+      console.log("Final timeline data:", timelineData);
+    } else {
+      // No timeline data, show as inactive for the full time range
+      timelineData = [
+        { x: -totalHours, y: 0 }, // Start of time range
+        { x: 0, y: 0 }, // Current time
+      ];
+    }
+
+    // Ensure we always have data points at the boundaries for proper scaling
+    const hasStartPoint = timelineData.some(
+      (point) => point.x <= -totalHours + 1
+    );
+    const hasEndPoint = timelineData.some((point) => point.x >= -1);
+
+    if (!hasStartPoint) {
+      // Add a point at the start of the time range with current state
+      const currentState =
+        timelineData.length > 0 ? timelineData[timelineData.length - 1].y : 0;
+      timelineData.unshift({ x: -totalHours, y: currentState });
+    }
+
+    if (!hasEndPoint) {
+      // Add a point near the current time with current state
+      const currentState =
+        timelineData.length > 0 ? timelineData[timelineData.length - 1].y : 0;
+      timelineData.push({ x: 0, y: currentState });
+    }
+
+    this.scraperChart = new Chart(this.scraperCtx, {
+      type: "line",
+      data: {
+        datasets: [
+          {
+            label: "Scraper Active",
+            data: timelineData,
+            borderColor: "#28a745",
+            backgroundColor: "rgba(40, 167, 69, 0.1)",
+            borderWidth: 3,
+            fill: true,
+            stepped: "before", // Creates step transitions
+            pointRadius: 5,
+            pointHoverRadius: 7,
+            pointBackgroundColor: "#28a745",
+            pointBorderColor: "#ffffff",
+            pointBorderWidth: 2,
+            tension: 0,
+          },
+        ],
+      },
+      options: {
+        responsive: true,
+        maintainAspectRatio: true,
+        aspectRatio: 10,
+        layout: {
+          padding: {
+            top: 10,
+            bottom: 10,
+          },
+        },
+        plugins: {
+          legend: {
+            display: false,
+          },
+          tooltip: {
+            callbacks: {
+              label: function (context) {
+                const status =
+                  context.parsed.y === 1 ? "Activated" : "Deactivated";
+                const timestamp = new Date();
+                timestamp.setHours(
+                  timestamp.getHours() - Math.abs(context.parsed.x)
+                );
+                const formattedTime = timestamp.toLocaleString("en-GB", {
+                  hour: "2-digit",
+                  minute: "2-digit",
+                  second: "2-digit",
+                  day: "2-digit",
+                  month: "2-digit",
+                  year: "numeric",
+                });
+                return `Scraper: ${status} at ${formattedTime}`;
+              },
+            },
+          },
+        },
+        scales: {
+          x: {
+            type: "linear",
+            min: -totalHours,
+            max: 0,
+            title: {
+              display: true,
+              text: "Timeline (Hours Ago → Now)",
+            },
+            ticks: {
+              callback: function (value) {
+                if (value === 0) return "Now";
+                return `-${Math.abs(value)}h`;
+              },
+              stepSize: Math.max(1, Math.floor(totalHours / 8)), // Show reasonable number of ticks
+            },
+            grid: {
+              display: true,
+            },
+          },
+          y: {
+            type: "linear",
+            display: true,
+            beginAtZero: true,
+            max: 1.2,
+            min: -0.2,
+            title: {
+              display: true,
+              text: "Active Status",
+            },
+            ticks: {
+              stepSize: 1,
+              callback: function (value) {
+                return value === 1 ? "Active" : value === 0 ? "Inactive" : "";
+              },
+            },
+            grid: {
+              color: function (context) {
+                return context.tick.value === 0.5
+                  ? "rgba(0,0,0,0.1)"
+                  : "rgba(0,0,0,0.05)";
+              },
+            },
+          },
+        },
+      },
+    });
+  }
+
+  /**
+   * Show a simple legend for scraper states
+   */
+  showScraperStateLegend() {
+    let legendContainer = document.getElementById("scraper-state-legend");
+    if (!legendContainer) {
+      return;
+    }
+
+    legendContainer.classList.remove("d-none");
+    legendContainer.innerHTML = `
+      <small class="text-muted">
+        <i class="fas fa-info-circle"></i> 
+        The line chart below shows exact timestamps when the scraper was started or stopped with proper time intervals.
+      </small>
+    `;
+  }
+
+  /**
+   * Load and render chart data for specified time range
+   * @param {number} hours - Number of hours to show data for
+   */
+  async loadData(hours) {
+    try {
+      const response = await fetch(`/scraper/stats?hours=${hours}`);
+      if (!response.ok) {
+        throw new Error(`HTTP error! status: ${response.status}`);
+      }
+      const data = await response.json();
+      console.log("Stats data loaded:", data);
+      this.render(data);
+    } catch (error) {
+      console.error("Failed to load activity stats:", error);
+      // Hide the chart or show an error message
+      const chartContainer = document.getElementById(
+        this.canvasId
+      ).parentElement;
+      if (chartContainer) {
+        chartContainer.innerHTML =
+          '<p class="text-muted">Chart data unavailable</p>';
+      }
+    }
+  }
+
+  /**
+   * Destroy the chart instance
+   */
+  destroy() {
+    if (this.chart) {
+      this.chart.destroy();
+      this.chart = null;
+    }
+    if (this.scraperChart) {
+      this.scraperChart.destroy();
+      this.scraperChart = null;
+    }
+  }
+}
--- a/scipaperloader/static/js/common.js
+++ b/scipaperloader/static/js/common.js
@ -0,0 +1,175 @@
+/**
+ * Common utilities for the SciPaperLoader application
+ */
+
+/**
+ * Display a flash message to the user as an overlay
+ * @param {string} message - The message to display
+ * @param {string} type - The type of message (success, error, warning, info)
+ * @param {number} duration - Duration in milliseconds (default: 5000)
+ */
+function showFlashMessage(message, type = "success", duration = 5000) {
+  const flashMsg = document.createElement("div");
+  const normalizedType = type === "error" ? "danger" : type;
+  flashMsg.className = `flash-overlay flash-${normalizedType}`;
+
+  // Get the appropriate icon based on type
+  const getIcon = (messageType) => {
+    switch (messageType) {
+      case "success":
+        return '<svg class="flash-icon" role="img" aria-label="Success:"><use xlink:href="#check-circle-fill"/></svg>';
+      case "danger":
+        return '<svg class="flash-icon" role="img" aria-label="Error:"><use xlink:href="#x-circle-fill"/></svg>';
+      case "warning":
+        return '<svg class="flash-icon" role="img" aria-label="Warning:"><use xlink:href="#exclamation-triangle-fill"/></svg>';
+      case "info":
+        return '<svg class="flash-icon" role="img" aria-label="Info:"><use xlink:href="#info-fill"/></svg>';
+      default:
+        return '<svg class="flash-icon" role="img" aria-label="Info:"><use xlink:href="#info-fill"/></svg>';
+    }
+  };
+
+  flashMsg.innerHTML = `
+    <div class="flash-content">
+      ${getIcon(normalizedType)}
+      <div class="flash-message">${message}</div>
+      <button type="button" class="flash-close" onclick="removeFlashMessage(this.parentElement.parentElement)">×</button>
+    </div>
+  `;
+
+  // Add to page first
+  document.body.appendChild(flashMsg);
+
+  // Position all messages in stack
+  updateFlashMessagePositions();
+
+  // Auto dismiss
+  setTimeout(() => {
+    removeFlashMessage(flashMsg);
+  }, duration);
+
+  return flashMsg;
+}
+
+/**
+ * Remove a flash message and update positions
+ * @param {HTMLElement} flashMsg - The flash message element to remove
+ */
+function removeFlashMessage(flashMsg) {
+  if (!flashMsg || !flashMsg.parentNode) return;
+
+  flashMsg.classList.add("fade-out");
+  setTimeout(() => {
+    if (flashMsg.parentNode) {
+      flashMsg.remove();
+      updateFlashMessagePositions();
+    }
+  }, 300);
+}
+
+/**
+ * Update positions of all flash messages to create a proper stack
+ */
+function updateFlashMessagePositions() {
+  const messages = document.querySelectorAll(".flash-overlay:not(.fade-out)");
+  messages.forEach((msg, index) => {
+    const topPosition = 20 + index * 90; // 90px spacing between messages
+    msg.style.top = `${topPosition}px`;
+    msg.style.zIndex = 9999 - index; // Higher z-index for newer messages
+  });
+}
+
+/**
+ * Create a status badge HTML element
+ * @param {string} status - The status to create a badge for
+ * @returns {string} HTML string for the status badge
+ */
+function createStatusBadge(status) {
+  switch (status) {
+    case "New":
+      return '<span class="badge bg-info">New</span>';
+    case "Pending":
+      return '<span class="badge bg-warning text-dark">Pending</span>';
+    case "Done":
+      return '<span class="badge bg-success">Done</span>';
+    case "Failed":
+      return '<span class="badge bg-danger">Failed</span>';
+    case "success":
+      return '<span class="badge bg-success">Success</span>';
+    case "error":
+      return '<span class="badge bg-danger">Error</span>';
+    case "pending":
+      return '<span class="badge bg-warning text-dark">Pending</span>';
+    default:
+      return `<span class="badge bg-secondary">${status}</span>`;
+  }
+}
+
+/**
+ * Format a timestamp to a readable time string
+ * @param {string} timestamp - ISO timestamp string
+ * @returns {string} Formatted time string
+ */
+function formatTimestamp(timestamp) {
+  const date = new Date(timestamp);
+  return date.toLocaleTimeString("de-DE", {
+    year: "2-digit",
+    month: "numeric",
+    day: "numeric",
+    hour: "2-digit",
+    minute: "2-digit",
+    second: "2-digit",
+  });
+}
+
+/**
+ * Truncate text to a specified length
+ * @param {string} text - The text to truncate
+ * @param {number} maxLength - Maximum length before truncation
+ * @returns {string} Truncated text with ellipsis if needed
+ */
+function truncateText(text, maxLength) {
+  return text.length > maxLength ? text.substring(0, maxLength) + "..." : text;
+}
+
+/**
+ * Toggle button loading state
+ * @param {HTMLElement} button - The button element
+ * @param {boolean} loading - Whether to show loading state
+ * @param {string} loadingText - Text to show when loading
+ */
+function toggleButtonLoading(button, loading, loadingText = "Loading...") {
+  if (loading) {
+    button.disabled = true;
+    button.dataset.originalText = button.innerHTML;
+    button.innerHTML = `<i class="fas fa-spinner fa-spin"></i> ${loadingText}`;
+  } else {
+    button.disabled = false;
+    button.innerHTML = button.dataset.originalText || button.innerHTML;
+  }
+}
+
+/**
+ * Generic fetch wrapper with error handling
+ * @param {string} url - The URL to fetch
+ * @param {object} options - Fetch options
+ * @returns {Promise} Fetch promise
+ */
+async function apiRequest(url, options = {}) {
+  const defaultOptions = {
+    headers: {
+      "Content-Type": "application/json",
+    },
+  };
+
+  const mergedOptions = { ...defaultOptions, ...options };
+
+  try {
+    const response = await fetch(url, mergedOptions);
+    const data = await response.json();
+    return data;
+  } catch (error) {
+    console.error(`API request failed for ${url}:`, error);
+    throw error;
+  }
+}
--- a/scipaperloader/static/js/config-handler.js
+++ b/scipaperloader/static/js/config-handler.js
@ -0,0 +1,195 @@
+/**
+ * Configuration utilities for handling settings and form submissions
+ */
+
+class ConfigHandler {
+  constructor(options = {}) {
+    this.options = {
+      apiEndpoint: options.apiEndpoint || "/config/api/update_config",
+      ...options,
+    };
+  }
+
+  /**
+   * Update configuration via API
+   * @param {object} configData - Configuration data to send
+   * @returns {Promise} API response promise
+   */
+  async updateConfig(configData) {
+    try {
+      const response = await fetch(this.options.apiEndpoint, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify(configData),
+      });
+
+      const data = await response.json();
+
+      if (data.success) {
+        showFlashMessage(
+          data.message || "Configuration updated successfully!",
+          "success"
+        );
+      } else {
+        const errorMessage =
+          data.updates?.[0]?.message ||
+          data.message ||
+          "Error updating configuration";
+        showFlashMessage(errorMessage, "error");
+      }
+
+      return data;
+    } catch (error) {
+      console.error("Error updating configuration:", error);
+      showFlashMessage("Network error occurred", "error");
+      throw error;
+    }
+  }
+
+  /**
+   * Update volume configuration
+   * @param {number} volume - New volume value
+   */
+  async updateVolume(volume) {
+    return this.updateConfig({ volume: volume });
+  }
+
+  /**
+   * Update schedule configuration
+   * @param {object} schedule - Schedule configuration object
+   */
+  async updateSchedule(schedule) {
+    return this.updateConfig({ schedule: schedule });
+  }
+
+  /**
+   * Create an Alpine.js data object for schedule management
+   * Reads configuration from JSON script tag in the template
+   * @returns {object} Alpine.js data object
+   */
+  createScheduleManager() {
+    const self = this;
+
+    // Read configuration from JSON script tag
+    const configElement = document.getElementById("schedule-config");
+    const config = configElement ? JSON.parse(configElement.textContent) : {};
+    const initialSchedule = config.initialSchedule || {};
+    const volume = config.totalVolume || 0;
+
+    return {
+      schedule: { ...initialSchedule },
+      volume: volume,
+      selectedHours: [],
+      newWeight: 1.0,
+      volumeValue: volume,
+      isDragging: false,
+      dragOperation: null,
+
+      formatHour(h) {
+        return String(h).padStart(2, "0") + ":00";
+      },
+
+      async updateVolume() {
+        try {
+          const data = await self.updateVolume(this.volumeValue);
+          if (data.success) {
+            this.volume = parseFloat(this.volumeValue);
+          }
+        } catch (error) {
+          // Error handling is done in updateConfig
+        }
+      },
+
+      getBackgroundStyle(hour) {
+        const weight = parseFloat(this.schedule[hour]);
+        const maxWeight = 2.5;
+
+        // Normalize weight (0.0 to 1.0)
+        const t = Math.min(weight / maxWeight, 1.0);
+
+        // Interpolate HSL lightness: 95% (light) to 30% (dark)
+        const lightness = 95 - t * 65;
+        const backgroundColor = `hsl(210, 10%, ${lightness}%)`;
+
+        const textColor = t > 0.65 ? "white" : "black";
+
+        return {
+          backgroundColor,
+          color: textColor,
+        };
+      },
+
+      startDrag(event, hour) {
+        event.preventDefault();
+        this.isDragging = true;
+        this.dragOperation = this.isSelected(hour) ? "remove" : "add";
+        this.toggleSelect(hour);
+      },
+
+      dragSelect(hour) {
+        if (!this.isDragging) return;
+        const selected = this.isSelected(hour);
+
+        if (this.dragOperation === "add" && !selected) {
+          this.selectedHours.push(hour);
+        } else if (this.dragOperation === "remove" && selected) {
+          this.selectedHours = this.selectedHours.filter((h) => h !== hour);
+        }
+      },
+
+      endDrag() {
+        this.isDragging = false;
+      },
+
+      toggleSelect(hour) {
+        if (this.isSelected(hour)) {
+          this.selectedHours = this.selectedHours.filter((h) => h !== hour);
+        } else {
+          this.selectedHours.push(hour);
+        }
+      },
+
+      isSelected(hour) {
+        return this.selectedHours.includes(hour);
+      },
+
+      applyWeight() {
+        this.selectedHours.forEach((hour) => {
+          this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
+        });
+        this.selectedHours = [];
+      },
+
+      getTotalWeight() {
+        return Object.values(this.schedule).reduce(
+          (sum, w) => sum + parseFloat(w),
+          0
+        );
+      },
+
+      getPapersPerHour(hour) {
+        const total = this.getTotalWeight();
+        if (total === 0) return 0;
+        return (
+          (parseFloat(this.schedule[hour]) / total) *
+          this.volume
+        ).toFixed(1);
+      },
+
+      async saveSchedule() {
+        try {
+          await self.updateSchedule(this.schedule);
+        } catch (error) {
+          // Error handling is done in updateConfig
+        }
+      },
+    };
+  }
+}
+
+/**
+ * Global instance for easy access
+ */
+window.configHandler = new ConfigHandler();
--- a/scipaperloader/static/js/form-handler.js
+++ b/scipaperloader/static/js/form-handler.js
@ -0,0 +1,231 @@
+/**
+ * Form utilities for handling form submissions with progress tracking
+ */
+
+class FormHandler {
+  constructor(formId, options = {}) {
+    this.form = document.getElementById(formId);
+    this.options = {
+      progressModalId: "progressModal",
+      progressBarId: "progressBar",
+      progressStatusId: "progressStatus",
+      statusCheckInterval: 1000,
+      onSuccess: null,
+      onError: null,
+      onProgress: null,
+      ...options,
+    };
+
+    this.progressModal = null;
+    this.progressBar = null;
+    this.progressStatus = null;
+    this.submitButton = null;
+
+    this.initElements();
+    this.initEventListeners();
+  }
+
+  /**
+   * Initialize DOM elements
+   */
+  initElements() {
+    if (this.options.progressModalId) {
+      const modalElement = document.getElementById(
+        this.options.progressModalId
+      );
+      if (modalElement && typeof bootstrap !== "undefined") {
+        this.progressModal = new bootstrap.Modal(modalElement);
+      }
+    }
+
+    this.progressBar = document.getElementById(this.options.progressBarId);
+    this.progressStatus = document.getElementById(
+      this.options.progressStatusId
+    );
+    this.submitButton = this.form?.querySelector('button[type="submit"]');
+  }
+
+  /**
+   * Initialize event listeners
+   */
+  initEventListeners() {
+    if (this.form) {
+      this.form.addEventListener("submit", (e) => this.handleSubmit(e));
+    }
+  }
+
+  /**
+   * Handle form submission
+   * @param {Event} e - Form submit event
+   */
+  async handleSubmit(e) {
+    e.preventDefault();
+
+    // Show progress modal
+    this.showProgress();
+    this.updateProgress(5, "Starting...");
+
+    // Disable submit button
+    if (this.submitButton) {
+      this.submitButton.disabled = true;
+    }
+
+    const formData = new FormData(this.form);
+
+    try {
+      const response = await fetch(this.form.action, {
+        method: "POST",
+        body: formData,
+      });
+
+      const data = await response.json();
+
+      if (data.error) {
+        this.handleError(data.error);
+        return;
+      }
+
+      // Start polling for task status if task_id is provided
+      if (data.task_id) {
+        this.pollTaskStatus(data.task_id);
+      } else {
+        // Handle immediate response
+        this.handleSuccess(data);
+      }
+    } catch (error) {
+      console.error("Form submission failed:", error);
+      this.handleError("Form submission failed. Please try again.");
+    }
+  }
+
+  /**
+   * Poll task status for long-running operations
+   * @param {string} taskId - Task ID to poll
+   */
+  async pollTaskStatus(taskId) {
+    const checkStatus = async () => {
+      try {
+        // Construct status URL - this should be customizable
+        const statusUrl = this.options.statusUrlTemplate
+          ? this.options.statusUrlTemplate.replace("{taskId}", taskId)
+          : `/upload/task_status/${taskId}`;
+
+        const response = await fetch(statusUrl);
+        const status = await response.json();
+
+        console.log("Task status:", status);
+
+        if (status.state === "SUCCESS") {
+          this.updateProgress(100, "Completed!");
+          setTimeout(() => {
+            this.hideProgress();
+            this.handleSuccess(status.result);
+          }, 1000);
+        } else if (status.state === "FAILURE") {
+          this.updateProgress(100, "Failed!", true);
+          setTimeout(() => {
+            this.hideProgress();
+            this.handleError(status.error || "Unknown error occurred");
+          }, 1000);
+        } else {
+          // Update progress
+          const progress = status.progress || 0;
+          this.updateProgress(progress, `Processing... (${status.state})`);
+
+          // Continue polling
+          setTimeout(checkStatus, this.options.statusCheckInterval);
+        }
+      } catch (error) {
+        console.error("Failed to check task status:", error);
+        // Continue polling on error
+        setTimeout(checkStatus, this.options.statusCheckInterval);
+      }
+    };
+
+    checkStatus();
+  }
+
+  /**
+   * Show progress modal
+   */
+  showProgress() {
+    if (this.progressModal) {
+      this.progressModal.show();
+    }
+  }
+
+  /**
+   * Hide progress modal
+   */
+  hideProgress() {
+    if (this.progressModal) {
+      this.progressModal.hide();
+    }
+  }
+
+  /**
+   * Update progress display
+   * @param {number} percentage - Progress percentage (0-100)
+   * @param {string} message - Status message
+   * @param {boolean} isError - Whether this is an error state
+   */
+  updateProgress(percentage, message, isError = false) {
+    if (this.progressBar) {
+      this.progressBar.style.width = `${percentage}%`;
+      this.progressBar.textContent = `${percentage}%`;
+
+      if (isError) {
+        this.progressBar.classList.add("bg-danger");
+      }
+    }
+
+    if (this.progressStatus) {
+      this.progressStatus.textContent = message;
+    }
+
+    // Call custom progress callback
+    if (this.options.onProgress) {
+      this.options.onProgress(percentage, message, isError);
+    }
+  }
+
+  /**
+   * Handle successful form submission
+   * @param {object} result - Success result data
+   */
+  handleSuccess(result) {
+    // Re-enable submit button
+    if (this.submitButton) {
+      this.submitButton.disabled = false;
+    }
+
+    // Call custom success callback
+    if (this.options.onSuccess) {
+      this.options.onSuccess(result);
+    } else {
+      // Default success handling
+      showFlashMessage("Operation completed successfully!", "success");
+    }
+  }
+
+  /**
+   * Handle form submission error
+   * @param {string} error - Error message
+   */
+  handleError(error) {
+    this.hideProgress();
+
+    // Re-enable submit button
+    if (this.submitButton) {
+      this.submitButton.disabled = false;
+    }
+
+    // Call custom error callback
+    if (this.options.onError) {
+      this.options.onError(error);
+    } else {
+      // Default error handling
+      showFlashMessage(`Error: ${error}`, "error");
+    }
+  }
+}
--- a/scipaperloader/static/js/logger-manager.js
+++ b/scipaperloader/static/js/logger-manager.js
@ -0,0 +1,485 @@
+/**
+ * Logger Manager - Modern activity log management for the unified logger view
+ */
+
+class LoggerManager {
+  constructor(options = {}) {
+    this.categories = options.categories || [];
+    this.initialFilters = options.initialFilters || {};
+
+    // Pagination state
+    this.currentPage = 1;
+    this.perPage = 50;
+    this.totalPages = 1;
+    this.totalEntries = 0;
+
+    // Current filter state
+    this.filters = { ...this.initialFilters };
+
+    // DOM elements
+    this.initElements();
+    this.initEventListeners();
+
+    // Apply initial filters and load data
+    this.applyInitialFilters();
+    this.loadLogs();
+  }
+
+  initElements() {
+    // Form elements
+    this.filtersForm = document.getElementById("filterForm");
+    this.categoryCheckboxes = document.querySelectorAll(".category-checkbox");
+    this.selectAllCategories = document.getElementById("selectAllCategories");
+    this.statusSelect = document.getElementById("statusFilter");
+    this.startDateInput = document.getElementById("startDate");
+    this.endDateInput = document.getElementById("endDate");
+    this.searchTermInput = document.getElementById("searchTerm");
+    this.clearFiltersBtn = document.getElementById("clearFilters");
+    this.downloadLogsBtn = document.getElementById("downloadLogs");
+    this.refreshLogsBtn = document.getElementById("refreshLogs");
+
+    // Logs display elements
+    this.logsTableBody = document.getElementById("logsTableBody");
+    this.pageSizeSelect = document.getElementById("pageSize");
+
+    // Pagination elements
+    this.paginationContainer = document.getElementById("logsPagination");
+    this.paginationInfo = document.getElementById("paginationDetails");
+    this.prevPageBtn = document.getElementById("prevPage");
+    this.nextPageBtn = document.getElementById("nextPage");
+    this.currentPageSpan = document.getElementById("currentPageSpan");
+
+    // Modal
+    this.logModal = new ModalHandler("logDetailModal", "log-detail-content");
+  }
+
+  initEventListeners() {
+    // Filter form submission
+    if (this.filtersForm) {
+      this.filtersForm.addEventListener("submit", (e) => {
+        e.preventDefault();
+        this.applyFilters();
+      });
+    }
+
+    // Handle "Select All" checkbox for categories
+    if (this.selectAllCategories) {
+      this.selectAllCategories.addEventListener("change", () => {
+        const isChecked = this.selectAllCategories.checked;
+        this.categoryCheckboxes.forEach((checkbox) => {
+          checkbox.checked = isChecked;
+        });
+        this.applyFilters();
+      });
+    }
+
+    // Handle individual category checkboxes
+    this.categoryCheckboxes.forEach((checkbox) => {
+      checkbox.addEventListener("change", () => {
+        // Update "Select All" checkbox state
+        this.updateSelectAllState();
+        this.applyFilters();
+      });
+    });
+
+    // Individual filter changes for immediate application
+    [this.statusSelect, this.startDateInput, this.endDateInput].forEach(
+      (element) => {
+        if (element) {
+          element.addEventListener("change", () => {
+            this.applyFilters();
+          });
+        }
+      }
+    );
+
+    // Search term with debounce
+    if (this.searchTermInput) {
+      let searchTimeout;
+      this.searchTermInput.addEventListener("input", () => {
+        clearTimeout(searchTimeout);
+        searchTimeout = setTimeout(() => {
+          this.applyFilters();
+        }, 500);
+      });
+    }
+
+    // Clear filters
+    if (this.clearFiltersBtn) {
+      this.clearFiltersBtn.addEventListener("click", () => {
+        this.clearAllFilters();
+      });
+    }
+
+    // Download logs
+    if (this.downloadLogsBtn) {
+      this.downloadLogsBtn.addEventListener("click", (e) => {
+        e.preventDefault();
+        this.downloadLogs();
+      });
+    }
+
+    // Refresh logs
+    if (this.refreshLogsBtn) {
+      this.refreshLogsBtn.addEventListener("click", () => {
+        this.loadLogs();
+      });
+    }
+
+    // Page size change
+    if (this.pageSizeSelect) {
+      this.pageSizeSelect.addEventListener("change", () => {
+        this.perPage = parseInt(this.pageSizeSelect.value);
+        this.currentPage = 1; // Reset to first page
+        this.loadLogs();
+      });
+    }
+
+    // Pagination buttons
+    if (this.prevPageBtn) {
+      this.prevPageBtn.addEventListener("click", (e) => {
+        e.preventDefault();
+        if (this.currentPage > 1) {
+          this.currentPage--;
+          this.loadLogs();
+        }
+      });
+    }
+
+    if (this.nextPageBtn) {
+      this.nextPageBtn.addEventListener("click", (e) => {
+        e.preventDefault();
+        if (this.currentPage < this.totalPages) {
+          this.currentPage++;
+          this.loadLogs();
+        }
+      });
+    }
+  }
+
+  updateSelectAllState() {
+    const checkedCount = Array.from(this.categoryCheckboxes).filter(
+      (cb) => cb.checked
+    ).length;
+    const totalCount = this.categoryCheckboxes.length;
+
+    if (checkedCount === 0) {
+      this.selectAllCategories.checked = false;
+      this.selectAllCategories.indeterminate = false;
+    } else if (checkedCount === totalCount) {
+      this.selectAllCategories.checked = true;
+      this.selectAllCategories.indeterminate = false;
+    } else {
+      this.selectAllCategories.checked = false;
+      this.selectAllCategories.indeterminate = true;
+    }
+  }
+
+  getSelectedCategories() {
+    return Array.from(this.categoryCheckboxes)
+      .filter((checkbox) => checkbox.checked)
+      .map((checkbox) => checkbox.value);
+  }
+
+  applyInitialFilters() {
+    // Set category checkboxes from initial filters
+    if (this.initialFilters.category) {
+      const selectedCategories = Array.isArray(this.initialFilters.category)
+        ? this.initialFilters.category
+        : [this.initialFilters.category];
+
+      this.categoryCheckboxes.forEach((checkbox) => {
+        checkbox.checked = selectedCategories.includes(checkbox.value);
+      });
+      this.updateSelectAllState();
+    }
+
+    if (this.startDateInput && this.initialFilters.start_date) {
+      this.startDateInput.value = this.initialFilters.start_date;
+    }
+    if (this.endDateInput && this.initialFilters.end_date) {
+      this.endDateInput.value = this.initialFilters.end_date;
+    }
+    if (this.searchTermInput && this.initialFilters.search_term) {
+      this.searchTermInput.value = this.initialFilters.search_term;
+    }
+  }
+
+  applyFilters() {
+    // Collect current filter values
+    const selectedCategories = this.getSelectedCategories();
+
+    this.filters = {
+      category: selectedCategories, // Now an array
+      status: this.statusSelect?.value || "",
+      start_date: this.startDateInput?.value || "",
+      end_date: this.endDateInput?.value || "",
+      search_term: this.searchTermInput?.value || "",
+    };
+
+    // Reset to first page when filters change
+    this.currentPage = 1;
+
+    // Load logs with new filters
+    this.loadLogs();
+
+    // Update URL to reflect current filters (for bookmarking/sharing)
+    this.updateUrl();
+  }
+
+  clearAllFilters() {
+    // Clear all category checkboxes and select all
+    this.categoryCheckboxes.forEach((checkbox) => {
+      checkbox.checked = true; // Default to all selected
+    });
+    if (this.selectAllCategories) {
+      this.selectAllCategories.checked = true;
+      this.selectAllCategories.indeterminate = false;
+    }
+
+    if (this.statusSelect) this.statusSelect.value = "";
+    if (this.startDateInput) this.startDateInput.value = "";
+    if (this.endDateInput) this.endDateInput.value = "";
+    if (this.searchTermInput) this.searchTermInput.value = "";
+
+    // Apply empty filters
+    this.applyFilters();
+  }
+
+  async loadLogs() {
+    if (!this.logsTableBody) return;
+
+    try {
+      // Show loading state
+      this.logsTableBody.innerHTML =
+        '<tr><td colspan="5" class="text-center"><div class="spinner-border spinner-border-sm text-primary" role="status"><span class="visually-hidden">Loading...</span></div> Loading logs...</td></tr>';
+
+      // Build query parameters
+      const params = new URLSearchParams({
+        page: this.currentPage,
+        per_page: this.perPage,
+      });
+
+      // Add filters to query
+      Object.entries(this.filters).forEach(([key, value]) => {
+        if (value) {
+          if (key === "category" && Array.isArray(value)) {
+            // Handle multiple categories
+            value.forEach((cat) => {
+              if (cat) params.append("category", cat);
+            });
+          } else if (value) {
+            params.append(key, value);
+          }
+        }
+      });
+
+      // Fetch logs from unified API
+      const data = await apiRequest(`/logs/api?${params.toString()}`);
+
+      if (data.success) {
+        this.renderLogs(data.logs);
+        this.updatePagination(data.pagination);
+        console.log("Logs loaded successfully");
+      } else {
+        throw new Error(data.message || "Failed to load logs");
+      }
+    } catch (error) {
+      console.error("Failed to load logs:", error);
+      this.logsTableBody.innerHTML =
+        '<tr><td colspan="5" class="text-center text-danger">Error loading logs. Please try again.</td></tr>';
+      this.hidePagination();
+    }
+  }
+
+  renderLogs(logs) {
+    if (!this.logsTableBody) return;
+
+    this.logsTableBody.innerHTML = "";
+
+    if (!logs || logs.length === 0) {
+      this.logsTableBody.innerHTML =
+        '<tr><td colspan="5" class="text-center">No logs found matching the current filters.</td></tr>';
+      return;
+    }
+
+    logs.forEach((log) => {
+      const row = document.createElement("tr");
+      row.className = "log-entry";
+      row.setAttribute("data-log-id", log.id);
+
+      // Format timestamp
+      const timeStr = formatTimestamp(log.timestamp);
+
+      // Create status badge
+      const statusBadge = createStatusBadge(log.status);
+
+      // Create category badge
+      const categoryBadge = this.createCategoryBadge(log.category);
+
+      row.innerHTML = `
+        <td>${timeStr}</td>
+        <td>${categoryBadge}</td>
+        <td>${log.action}</td>
+        <td>${statusBadge}</td>
+        <td>${log.description || ""}</td>
+      `;
+
+      // Add click handler for details modal - whole row is clickable
+      row.addEventListener("click", () => {
+        const url = `/logs/${log.id}/detail`;
+        this.logModal.loadAndShow(url, "Error loading log details.");
+      });
+
+      this.logsTableBody.appendChild(row);
+    });
+  }
+
+  createCategoryBadge(category) {
+    const categoryColors = {
+      gui_interaction: "bg-primary",
+      config_change: "bg-warning",
+      scraper_command: "bg-info",
+      scraper_activity: "bg-success",
+      system: "bg-danger",
+      data_import: "bg-secondary",
+    };
+
+    const colorClass = categoryColors[category] || "bg-secondary";
+    const displayName = category
+      .replace(/_/g, " ")
+      .replace(/\b\w/g, (l) => l.toUpperCase());
+
+    return `<span class="badge ${colorClass}">${displayName}</span>`;
+  }
+
+  updatePagination(pagination) {
+    if (!pagination || !this.paginationContainer) return;
+
+    this.currentPage = pagination.page;
+    this.totalPages = pagination.pages;
+    this.totalEntries = pagination.total;
+
+    // Show pagination container
+    this.paginationContainer.classList.remove("d-none");
+
+    // Update pagination info
+    const startEntry = (pagination.page - 1) * pagination.per_page + 1;
+    const endEntry = Math.min(
+      pagination.page * pagination.per_page,
+      pagination.total
+    );
+
+    if (this.paginationInfo) {
+      this.paginationInfo.textContent = `Showing ${startEntry} - ${endEntry} of ${pagination.total} entries`;
+    }
+
+    // Update current page display
+    if (this.currentPageSpan) {
+      this.currentPageSpan.innerHTML = `<span class="page-link">${pagination.page} of ${pagination.pages}</span>`;
+    }
+
+    // Update previous button
+    if (this.prevPageBtn) {
+      if (pagination.has_prev) {
+        this.prevPageBtn.classList.remove("disabled");
+        this.prevPageBtn.querySelector("a").removeAttribute("tabindex");
+        this.prevPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "false");
+      } else {
+        this.prevPageBtn.classList.add("disabled");
+        this.prevPageBtn.querySelector("a").setAttribute("tabindex", "-1");
+        this.prevPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "true");
+      }
+    }
+
+    // Update next button
+    if (this.nextPageBtn) {
+      if (pagination.has_next) {
+        this.nextPageBtn.classList.remove("disabled");
+        this.nextPageBtn.querySelector("a").removeAttribute("tabindex");
+        this.nextPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "false");
+      } else {
+        this.nextPageBtn.classList.add("disabled");
+        this.nextPageBtn.querySelector("a").setAttribute("tabindex", "-1");
+        this.nextPageBtn
+          .querySelector("a")
+          .setAttribute("aria-disabled", "true");
+      }
+    }
+  }
+
+  hidePagination() {
+    if (this.paginationContainer) {
+      this.paginationContainer.classList.add("d-none");
+    }
+  }
+
+  updateUrl() {
+    // Update URL with current filters for bookmarking
+    const params = new URLSearchParams();
+
+    Object.entries(this.filters).forEach(([key, value]) => {
+      if (value) {
+        if (key === "category" && Array.isArray(value)) {
+          // Handle multiple categories
+          value.forEach((cat) => {
+            if (cat) params.append("category", cat);
+          });
+        } else if (value) {
+          params.append(key, value);
+        }
+      }
+    });
+
+    const newUrl = `${window.location.pathname}${
+      params.toString() ? "?" + params.toString() : ""
+    }`;
+    window.history.replaceState({}, "", newUrl);
+  }
+
+  downloadLogs() {
+    // Build download URL with current filters
+    const params = new URLSearchParams();
+
+    Object.entries(this.filters).forEach(([key, value]) => {
+      if (value) {
+        if (key === "category" && Array.isArray(value)) {
+          // Handle multiple categories
+          value.forEach((cat) => {
+            if (cat) params.append("category", cat);
+          });
+        } else if (value) {
+          params.append(key, value);
+        }
+      }
+    });
+
+    const downloadUrl = `/logs/download${
+      params.toString() ? "?" + params.toString() : ""
+    }`;
+    window.location.href = downloadUrl;
+  }
+
+  refresh() {
+    this.loadLogs();
+  }
+
+  /**
+   * Set modal handler for log details
+   * @param {ModalHandler} modalHandler - Modal handler instance
+   */
+  setModalHandler(modalHandler) {
+    this.logModal = modalHandler;
+  }
+}
+
+// Export for use in other modules
+if (typeof window !== "undefined") {
+  window.LoggerManager = LoggerManager;
+}
--- a/scipaperloader/static/js/modal-handler.js
+++ b/scipaperloader/static/js/modal-handler.js
@ -0,0 +1,221 @@
+/**
+ * Modal utilities for handling dynamic content loading
+ */
+
+class ModalHandler {
+  constructor(modalId, contentElementId) {
+    this.modalElement = document.getElementById(modalId);
+    this.contentElement = document.getElementById(contentElementId);
+    this.modal = null;
+
+    if (this.modalElement && typeof bootstrap !== "undefined") {
+      this.modal = new bootstrap.Modal(this.modalElement);
+
+      // Set up global event delegation for modal close buttons
+      this.setupGlobalCloseHandlers();
+    }
+  }
+
+  /**
+   * Load content into modal via AJAX and show it
+   * @param {string} url - URL to fetch content from
+   * @param {string} errorMessage - Message to show on error
+   */
+  async loadAndShow(url, errorMessage = "Error loading content.") {
+    if (!this.modal || !this.contentElement) {
+      console.error("Modal or content element not found");
+      return;
+    }
+
+    try {
+      const response = await fetch(url);
+      const html = await response.text();
+      this.contentElement.innerHTML = html;
+
+      // Set up close button handlers after content is loaded
+      this.setupCloseHandlers();
+
+      // Format any JSON content in the modal
+      this.formatJsonContent();
+
+      this.modal.show();
+    } catch (error) {
+      console.error("Error loading modal content:", error);
+      this.contentElement.innerHTML = `<div class="modal-body text-danger">${errorMessage}</div>`;
+      this.modal.show();
+    }
+  }
+
+  /**
+   * Set up click handlers for elements that should open the modal
+   * @param {string} selector - CSS selector for clickable elements
+   * @param {string} urlAttribute - Attribute name containing the URL (default: 'data-url')
+   */
+  setupClickHandlers(selector, urlAttribute = "data-url") {
+    document.addEventListener("DOMContentLoaded", () => {
+      document.querySelectorAll(selector).forEach((element) => {
+        element.addEventListener("click", (e) => {
+          e.preventDefault();
+          const url = element.getAttribute(urlAttribute);
+          if (url) {
+            this.loadAndShow(url);
+          }
+        });
+      });
+    });
+  }
+
+  /**
+   * Show the modal with custom content
+   * @param {string} content - HTML content to display
+   */
+  showWithContent(content) {
+    if (!this.modal || !this.contentElement) return;
+
+    this.contentElement.innerHTML = content;
+
+    // Set up close button handlers after content is loaded
+    this.setupCloseHandlers();
+
+    this.modal.show();
+  }
+
+  /**
+   * Set up global event delegation for modal close buttons
+   */
+  setupGlobalCloseHandlers() {
+    // Use event delegation to handle dynamically loaded close buttons
+    this.modalElement.addEventListener("click", (e) => {
+      if (
+        e.target.matches('[data-bs-dismiss="modal"]') ||
+        e.target.closest('[data-bs-dismiss="modal"]') ||
+        e.target.matches(".btn-close") ||
+        e.target.closest(".btn-close")
+      ) {
+        e.preventDefault();
+        this.hide();
+      }
+    });
+
+    // Handle ESC key press
+    document.addEventListener("keydown", (e) => {
+      if (
+        e.key === "Escape" &&
+        this.modal &&
+        this.modalElement.classList.contains("show")
+      ) {
+        this.hide();
+      }
+    });
+  }
+
+  /**
+   * Set up close button event handlers for dynamically loaded content
+   */
+  setupCloseHandlers() {
+    // This method is now mostly redundant due to global event delegation
+    // but we'll keep it for backward compatibility
+
+    // Handle close buttons with data-bs-dismiss="modal"
+    const closeButtons = this.contentElement.querySelectorAll(
+      '[data-bs-dismiss="modal"]'
+    );
+    closeButtons.forEach((button) => {
+      button.addEventListener("click", (e) => {
+        e.preventDefault();
+        this.hide();
+      });
+    });
+
+    // Handle close buttons with .btn-close class
+    const closeButtonsClass =
+      this.contentElement.querySelectorAll(".btn-close");
+    closeButtonsClass.forEach((button) => {
+      button.addEventListener("click", (e) => {
+        e.preventDefault();
+        this.hide();
+      });
+    });
+
+    // Also handle ESC key press
+    document.addEventListener("keydown", (e) => {
+      if (
+        e.key === "Escape" &&
+        this.modal &&
+        this.modalElement.classList.contains("show")
+      ) {
+        this.hide();
+      }
+    });
+  }
+
+  /**
+   * Format JSON content in the modal after it's loaded
+   */
+  formatJsonContent() {
+    // Format JSON in extra data if present
+    const extraDataElement = this.contentElement.querySelector(
+      "#extra-data-content"
+    );
+    if (extraDataElement && extraDataElement.textContent.trim()) {
+      try {
+        const jsonData = JSON.parse(extraDataElement.textContent);
+
+        // Pretty-format the JSON with proper indentation
+        const formattedJson = JSON.stringify(jsonData, null, 2);
+        extraDataElement.textContent = formattedJson;
+
+        // Add syntax highlighting classes if the JSON is complex
+        if (typeof jsonData === "object" && jsonData !== null) {
+          extraDataElement.parentElement.classList.add("json-formatted");
+        }
+      } catch (e) {
+        // If it's not valid JSON, leave it as is but still format if it looks like JSON
+        const text = extraDataElement.textContent.trim();
+        if (text.startsWith("{") || text.startsWith("[")) {
+          // Try to fix common JSON issues and reformat
+          try {
+            const fixedJson = text
+              .replace(/'/g, '"')
+              .replace(/None/g, "null")
+              .replace(/True/g, "true")
+              .replace(/False/g, "false");
+            const parsed = JSON.parse(fixedJson);
+            extraDataElement.textContent = JSON.stringify(parsed, null, 2);
+          } catch (fixError) {
+            // If still can't parse, just leave as is
+            console.debug("Extra data is not valid JSON:", e);
+          }
+        }
+      }
+    }
+
+    // Also format old_value and new_value if they contain JSON
+    const preElements = this.contentElement.querySelectorAll("pre code");
+    preElements.forEach(function (codeElement) {
+      if (codeElement && codeElement.textContent.trim()) {
+        const text = codeElement.textContent.trim();
+        if (
+          (text.startsWith("{") && text.endsWith("}")) ||
+          (text.startsWith("[") && text.endsWith("]"))
+        ) {
+          try {
+            const jsonData = JSON.parse(text);
+            codeElement.textContent = JSON.stringify(jsonData, null, 2);
+          } catch (e) {
+            // Not JSON, leave as is
+          }
+        }
+      }
+    });
+  }
+
+  /**
+   * Hide the modal
+   */
+  hide() {
+    if (this.modal) {
+      this.modal.hide();
+    }
+  }
+}
--- a/scipaperloader/static/js/paper-processor.js
+++ b/scipaperloader/static/js/paper-processor.js
@ -0,0 +1,315 @@
+/**
+ * Paper search and processing functionality
+ */
+
+class PaperProcessor {
+  constructor() {
+    // DOM elements
+    this.searchForm = document.getElementById("searchPaperForm");
+    this.searchInput = document.getElementById("paperSearchInput");
+    this.searchResults = document.getElementById("searchResults");
+    this.paperSearchResults = document.getElementById("paperSearchResults");
+    this.scraperSelect = document.getElementById("scraperSelect");
+
+    this.initEventListeners();
+    this.loadAvailableScrapers();
+  }
+
+  /**
+   * Initialize event listeners
+   */
+  initEventListeners() {
+    if (this.searchForm) {
+      this.searchForm.addEventListener("submit", (e) => {
+        e.preventDefault();
+        this.searchPapers();
+      });
+    }
+  }
+
+  /**
+   * Load available scraper modules
+   */
+  async loadAvailableScrapers() {
+    if (!this.scraperSelect) return;
+
+    try {
+      const data = await apiRequest("/scraper/available_scrapers");
+
+      if (data.success && data.scrapers && data.scrapers.length > 0) {
+        // Clear previous options except the default one
+        while (this.scraperSelect.options.length > 1) {
+          this.scraperSelect.remove(1);
+        }
+
+        // Add each scraper as an option
+        data.scrapers.forEach((scraper) => {
+          const option = document.createElement("option");
+          option.value = scraper.name;
+          option.textContent = `${
+            scraper.name
+          } - ${scraper.description.substring(0, 50)}${
+            scraper.description.length > 50 ? "..." : ""
+          }`;
+          if (scraper.is_current) {
+            option.textContent += " (system default)";
+          }
+          this.scraperSelect.appendChild(option);
+        });
+      } else {
+        // If no scrapers or error, add a note
+        const option = document.createElement("option");
+        option.disabled = true;
+        option.textContent = "No scrapers available";
+        this.scraperSelect.appendChild(option);
+      }
+    } catch (error) {
+      console.error("Error loading scrapers:", error);
+      const option = document.createElement("option");
+      option.disabled = true;
+      option.textContent = "Error loading scrapers";
+      this.scraperSelect.appendChild(option);
+    }
+  }
+
+  /**
+   * Search for papers
+   */
+  async searchPapers() {
+    if (!this.searchInput || !this.paperSearchResults || !this.searchResults)
+      return;
+
+    const query = this.searchInput.value.trim();
+
+    if (!query) {
+      showFlashMessage("Please enter a search term", "warning");
+      return;
+    }
+
+    // Show loading message
+    this.paperSearchResults.innerHTML =
+      '<tr><td colspan="5" class="text-center">Searching papers...</td></tr>';
+    this.searchResults.classList.remove("d-none");
+
+    try {
+      const data = await apiRequest(
+        `/api/papers?query=${encodeURIComponent(query)}`
+      );
+
+      if (!data.papers || data.papers.length === 0) {
+        this.paperSearchResults.innerHTML =
+          '<tr><td colspan="5" class="text-center">No papers found matching your search</td></tr>';
+        return;
+      }
+
+      this.paperSearchResults.innerHTML = "";
+
+      data.papers.forEach((paper) => {
+        const row = document.createElement("tr");
+
+        // Create status badge
+        const statusBadge = createStatusBadge(paper.status);
+
+        // Create process button (enabled only for papers not in 'Pending' status)
+        const processButtonDisabled =
+          paper.status === "Pending" ? "disabled" : "";
+
+        // Truncate title if too long
+        const truncatedTitle = truncateText(paper.title, 70);
+
+        row.innerHTML = `
+                    <td>${paper.id}</td>
+                    <td title="${paper.title}">${truncatedTitle}</td>
+                    <td>${paper.doi || "N/A"}</td>
+                    <td>${statusBadge}</td>
+                    <td>
+                        <button class="btn btn-sm btn-primary process-paper-btn" 
+                            data-paper-id="${paper.id}" 
+                            ${processButtonDisabled}>
+                            Process Now
+                        </button>
+                    </td>
+                `;
+
+        this.paperSearchResults.appendChild(row);
+      });
+
+      // Add event listeners to the process buttons
+      document.querySelectorAll(".process-paper-btn").forEach((btn) => {
+        btn.addEventListener("click", () => {
+          this.processSinglePaper(btn.getAttribute("data-paper-id"));
+        });
+      });
+    } catch (error) {
+      console.error("Error searching papers:", error);
+      this.paperSearchResults.innerHTML =
+        '<tr><td colspan="5" class="text-center">Error searching papers</td></tr>';
+    }
+  }
+
+  /**
+   * Process a single paper
+   * @param {string} paperId - The ID of the paper to process
+   */
+  async processSinglePaper(paperId) {
+    if (!this.scraperSelect) return;
+
+    // Disable all process buttons to prevent multiple clicks
+    document.querySelectorAll(".process-paper-btn").forEach((btn) => {
+      btn.disabled = true;
+    });
+
+    // Show processing status via flash message
+    showFlashMessage("Processing paper...", "info");
+
+    // Get selected scraper
+    const selectedScraper = this.scraperSelect.value;
+
+    try {
+      const data = await apiRequest(`/scraper/process_single/${paperId}`, {
+        method: "POST",
+        body: JSON.stringify({
+          scraper_module: selectedScraper,
+        }),
+      });
+
+      if (data.success) {
+        // Update status in the search results
+        const row = document
+          .querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`)
+          ?.closest("tr");
+        if (row) {
+          const statusCell = row.querySelector("td:nth-child(4)");
+          if (statusCell) {
+            statusCell.innerHTML = createStatusBadge("Pending");
+          }
+        }
+
+        // Show success notification
+        showFlashMessage(data.message, "success");
+
+        // Set up polling to check paper status and refresh activity
+        this.pollPaperStatus(paperId, 3000, 20);
+      } else {
+        showFlashMessage(data.message, "error");
+      }
+    } catch (error) {
+      console.error("Error processing paper:", error);
+      showFlashMessage("Error processing paper", "error");
+    } finally {
+      // Re-enable the process buttons after a short delay
+      setTimeout(() => {
+        document.querySelectorAll(".process-paper-btn").forEach((btn) => {
+          if (btn.getAttribute("data-paper-id") !== paperId) {
+            btn.disabled = false;
+          }
+        });
+      }, 1000);
+    }
+  }
+
+  /**
+   * Poll paper status until it changes from Pending
+   * @param {string} paperId - The paper ID to poll
+   * @param {number} interval - Polling interval in milliseconds
+   * @param {number} maxAttempts - Maximum number of polling attempts
+   */
+  pollPaperStatus(paperId, interval = 3000, maxAttempts = 20) {
+    let attempts = 0;
+
+    // Immediately refresh activity log to show the initial pending status
+    if (this.onActivityRefresh) {
+      this.onActivityRefresh();
+    }
+
+    const checkStatus = async () => {
+      attempts++;
+      console.log(
+        `Checking status of paper ${paperId}, attempt ${attempts}/${maxAttempts}`
+      );
+
+      try {
+        const data = await apiRequest(`/api/papers/${paperId}`);
+
+        if (data && data.paper) {
+          const paper = data.paper;
+          console.log(`Paper status: ${paper.status}`);
+
+          // Update the UI with the current status
+          const row = document
+            .querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`)
+            ?.closest("tr");
+          if (row) {
+            const statusCell = row.querySelector("td:nth-child(4)");
+            if (statusCell) {
+              statusCell.innerHTML = createStatusBadge(paper.status);
+            }
+
+            // Update processing status message if status changed
+            if (paper.status !== "Pending") {
+              if (paper.status === "Done") {
+                showFlashMessage(
+                  `Paper processed successfully: ${paper.title}`,
+                  "success"
+                );
+              } else if (paper.status === "Failed") {
+                showFlashMessage(
+                  `Paper processing failed: ${
+                    paper.error_msg || "Unknown error"
+                  }`,
+                  "error"
+                );
+              }
+            }
+          }
+
+          // Always refresh activity log
+          if (this.onActivityRefresh) {
+            this.onActivityRefresh();
+          }
+
+          // If status is still pending and we haven't reached max attempts, check again
+          if (paper.status === "Pending" && attempts < maxAttempts) {
+            setTimeout(checkStatus, interval);
+          } else {
+            // If status changed or we reached max attempts, refresh chart data too
+            if (this.onChartRefresh) {
+              this.onChartRefresh();
+            }
+
+            // If we hit max attempts but status is still pending, show a message
+            if (paper.status === "Pending" && attempts >= maxAttempts) {
+              showFlashMessage(
+                "Paper is still being processed. Check the activity log for updates.",
+                "info"
+              );
+            }
+          }
+        }
+      } catch (error) {
+        console.error(`Error polling paper status: ${error}`);
+        // If there's an error, we can still try again if under max attempts
+        if (attempts < maxAttempts) {
+          setTimeout(checkStatus, interval);
+        }
+      }
+    };
+
+    // Start checking
+    setTimeout(checkStatus, interval);
+  }
+
+  /**
+   * Set callback for activity refresh
+   */
+  setActivityRefreshCallback(callback) {
+    this.onActivityRefresh = callback;
+  }
+
+  /**
+   * Set callback for chart refresh
+   */
+  setChartRefreshCallback(callback) {
+    this.onChartRefresh = callback;
+  }
+}
--- a/scipaperloader/static/js/scraper-control.js
+++ b/scipaperloader/static/js/scraper-control.js
@ -0,0 +1,335 @@
+/**
+ * Scraper control functionality
+ */
+
+class ScraperController {
+  constructor(options = {}) {
+    this.maxVolume = options.maxVolume || 1000;
+    this.volumeConfig = options.volumeConfig || 100;
+
+    // DOM elements
+    this.statusIndicator = document.getElementById("statusIndicator");
+    this.statusText = document.getElementById("statusText");
+    this.startButton = document.getElementById("startButton");
+    this.pauseButton = document.getElementById("pauseButton");
+    this.stopButton = document.getElementById("stopButton");
+    this.resetButton = document.getElementById("resetButton");
+
+    this.initEventListeners();
+    this.initStatusPolling();
+  }
+
+  /**
+   * Initialize event listeners for scraper controls
+   */
+  initEventListeners() {
+    if (this.startButton) {
+      this.startButton.addEventListener("click", () => this.startScraper());
+    }
+    if (this.pauseButton) {
+      this.pauseButton.addEventListener("click", () =>
+        this.togglePauseScraper()
+      );
+    }
+    if (this.stopButton) {
+      this.stopButton.addEventListener("click", () => this.stopScraper());
+    }
+    if (this.resetButton) {
+      this.resetButton.addEventListener("click", () => this.resetScraper());
+    }
+
+    // Configuration form (handles both volume and scraper module)
+    const configForm = document.getElementById("volumeForm");
+    if (configForm) {
+      configForm.addEventListener("submit", (e) => {
+        e.preventDefault();
+        this.updateConfiguration();
+      });
+    }
+  }
+
+  /**
+   * Initialize status polling
+   */
+  initStatusPolling() {
+    this.updateStatus();
+    setInterval(() => this.updateStatus(), 5000); // Poll every 5 seconds
+  }
+
+  /**
+   * Update scraper status display
+   */
+  async updateStatus() {
+    try {
+      const data = await apiRequest("/scraper/status");
+      console.log("Status data received:", data);
+
+      // Remove all status classes first
+      if (this.statusIndicator) {
+        this.statusIndicator.classList.remove(
+          "status-active",
+          "status-paused",
+          "status-inactive"
+        );
+      }
+
+      // Handle the new JSON structure with scraper_state
+      const scraperState = data.scraper_state || data; // Fallback for old structure
+
+      if (scraperState.active) {
+        if (scraperState.paused) {
+          this.statusIndicator?.classList.add("status-paused");
+          if (this.statusText) this.statusText.textContent = "Paused";
+          if (this.pauseButton) this.pauseButton.textContent = "Resume";
+        } else {
+          this.statusIndicator?.classList.add("status-active");
+          if (this.statusText) this.statusText.textContent = "Active";
+          if (this.pauseButton) this.pauseButton.textContent = "Pause";
+        }
+        if (this.startButton) this.startButton.disabled = true;
+        if (this.pauseButton) this.pauseButton.disabled = false;
+        if (this.stopButton) this.stopButton.disabled = false;
+        if (this.resetButton) this.resetButton.disabled = false;
+      } else {
+        this.statusIndicator?.classList.add("status-inactive");
+        if (this.statusText) this.statusText.textContent = "Inactive";
+        if (this.startButton) this.startButton.disabled = false;
+        if (this.pauseButton) this.pauseButton.disabled = true;
+        if (this.stopButton) this.stopButton.disabled = true;
+        if (this.resetButton) this.resetButton.disabled = false;
+      }
+    } catch (error) {
+      console.error("Error fetching status:", error);
+      // On error, show inactive state
+      if (this.statusIndicator) {
+        this.statusIndicator.classList.remove(
+          "status-active",
+          "status-paused",
+          "status-inactive"
+        );
+        this.statusIndicator.classList.add("status-inactive");
+      }
+      if (this.statusText) this.statusText.textContent = "Error";
+    }
+  }
+
+  /**
+   * Start the scraper
+   */
+  async startScraper() {
+    console.log("Start button clicked - sending request to /scraper/start");
+
+    try {
+      const data = await apiRequest("/scraper/start", {
+        method: "POST",
+        body: JSON.stringify({}),
+      });
+      console.log("Data received:", data);
+
+      if (data.success) {
+        showFlashMessage("Scraper started successfully", "success");
+        this.updateStatus();
+        // Trigger activity refresh if callback is provided
+        if (this.onActivityRefresh) {
+          setTimeout(() => this.onActivityRefresh(), 1000);
+        }
+      } else {
+        showFlashMessage(data.message, "error");
+      }
+    } catch (error) {
+      console.error("Error starting scraper:", error);
+      showFlashMessage("Error starting scraper: " + error.message, "error");
+    }
+  }
+
+  /**
+   * Toggle pause/resume scraper
+   */
+  async togglePauseScraper() {
+    try {
+      const data = await apiRequest("/scraper/pause", {
+        method: "POST",
+        body: JSON.stringify({}),
+      });
+
+      if (data.success) {
+        showFlashMessage(data.message, "info");
+        this.updateStatus();
+        if (this.onActivityRefresh) {
+          setTimeout(() => this.onActivityRefresh(), 1000);
+        }
+      } else {
+        showFlashMessage(data.message, "error");
+      }
+    } catch (error) {
+      console.error("Error toggling pause:", error);
+      showFlashMessage("Error controlling scraper: " + error.message, "error");
+    }
+  }
+
+  /**
+   * Stop the scraper
+   */
+  async stopScraper() {
+    try {
+      const data = await apiRequest("/scraper/stop", {
+        method: "POST",
+        body: JSON.stringify({}),
+      });
+
+      if (data.success) {
+        showFlashMessage("Scraper stopped successfully", "warning");
+        this.updateStatus();
+        if (this.onActivityRefresh) {
+          setTimeout(() => this.onActivityRefresh(), 1000);
+        }
+      } else {
+        showFlashMessage(data.message, "error");
+      }
+    } catch (error) {
+      console.error("Error stopping scraper:", error);
+      showFlashMessage("Error stopping scraper: " + error.message, "error");
+    }
+  }
+
+  /**
+   * Reset the scraper
+   */
+  async resetScraper() {
+    if (
+      !confirm(
+        "Are you sure you want to reset the scraper? This will stop all current tasks, optionally clear non-pending papers, and restart the scraper."
+      )
+    ) {
+      return;
+    }
+
+    // Disable button to prevent multiple clicks
+    if (this.resetButton) this.resetButton.disabled = true;
+
+    // Show a loading message
+    showFlashMessage("Resetting scraper, please wait...", "info");
+
+    try {
+      const data = await apiRequest("/scraper/reset", {
+        method: "POST",
+        body: JSON.stringify({
+          clear_papers: true, // You could make this configurable with a checkbox
+        }),
+      });
+
+      if (data.success) {
+        showFlashMessage(
+          "Scraper has been completely reset and restarted",
+          "success"
+        );
+        // Update everything
+        this.updateStatus();
+        if (this.onActivityRefresh) {
+          this.onActivityRefresh();
+          setTimeout(() => this.onActivityRefresh(), 1000);
+        }
+        if (this.onChartRefresh) {
+          this.onChartRefresh();
+        }
+      } else {
+        showFlashMessage(data.message || "Error resetting scraper", "error");
+      }
+    } catch (error) {
+      console.error("Error resetting scraper:", error);
+      showFlashMessage("Error resetting scraper: " + error.message, "error");
+    } finally {
+      // Re-enable button
+      if (this.resetButton) this.resetButton.disabled = false;
+    }
+  }
+
+  /**
+   * Update configuration (volume and/or scraper module)
+   */
+  async updateConfiguration() {
+    const volumeInput = document.getElementById("volumeInput");
+    const scraperSelect = document.getElementById("mainScraperSelect");
+    const submitButton = document.querySelector(
+      '#volumeForm button[type="submit"]'
+    );
+
+    if (!submitButton) return;
+
+    const updates = {};
+    let hasChanges = false;
+
+    // Check volume changes
+    if (volumeInput) {
+      const volume = volumeInput.value;
+
+      // Basic validation
+      if (!volume || volume < 1 || volume > this.maxVolume) {
+        showFlashMessage(
+          `Please enter a valid volume between 1 and ${this.maxVolume}`,
+          "warning"
+        );
+        volumeInput.focus();
+        return;
+      }
+
+      updates.volume = volume;
+      hasChanges = true;
+    }
+
+    // Check scraper module changes
+    if (scraperSelect && scraperSelect.value) {
+      updates.scraper_module = scraperSelect.value;
+      hasChanges = true;
+    }
+
+    if (!hasChanges) {
+      showFlashMessage("No changes to save", "info");
+      return;
+    }
+
+    // Toggle loading state
+    toggleButtonLoading(submitButton, true, "Updating...");
+
+    try {
+      const data = await apiRequest("/scraper/update_config", {
+        method: "POST",
+        body: JSON.stringify(updates),
+      });
+
+      if (data.success) {
+        showFlashMessage(
+          data.message || "Configuration updated successfully",
+          "success"
+        );
+      } else {
+        showFlashMessage(
+          data.message || "Failed to update configuration",
+          "error"
+        );
+      }
+    } catch (error) {
+      console.error("Error updating configuration:", error);
+      showFlashMessage(
+        "Network error while updating configuration. Please try again.",
+        "error"
+      );
+    } finally {
+      toggleButtonLoading(submitButton, false);
+    }
+  }
+
+  /**
+   * Set callback for activity refresh
+   */
+  setActivityRefreshCallback(callback) {
+    this.onActivityRefresh = callback;
+  }
+
+  /**
+   * Set callback for chart refresh
+   */
+  setChartRefreshCallback(callback) {
+    this.onChartRefresh = callback;
+  }
+}
--- a/scipaperloader/static/js/scraper-dashboard.js
+++ b/scipaperloader/static/js/scraper-dashboard.js
@ -0,0 +1,87 @@
+/**
+ * Main scraper dashboard initialization and coordination
+ */
+
+class ScraperDashboard {
+  constructor(config = {}) {
+    this.config = {
+      maxVolume: config.maxVolume || 1000,
+      volumeConfig: config.volumeConfig || 100,
+      currentTimeRange: 24,
+    };
+
+    this.initComponents();
+    this.setupCallbacks();
+    this.initializeData();
+  }
+
+  /**
+   * Initialize all dashboard components
+   */
+  initComponents() {
+    // Initialize chart
+    this.activityChart = new ActivityChart("activityChart");
+
+    // Initialize scraper controller
+    this.scraperController = new ScraperController({
+      maxVolume: this.config.maxVolume,
+      volumeConfig: this.config.volumeConfig,
+    });
+
+    // Initialize paper processor
+    this.paperProcessor = new PaperProcessor();
+
+    // Initialize activity monitor
+    this.activityMonitor = new ActivityMonitor();
+  }
+
+  /**
+   * Setup callbacks between components
+   */
+  setupCallbacks() {
+    // Set up activity refresh callbacks
+    const activityRefreshCallback = () =>
+      this.activityMonitor.loadRecentActivity();
+    this.scraperController.setActivityRefreshCallback(activityRefreshCallback);
+    this.paperProcessor.setActivityRefreshCallback(activityRefreshCallback);
+
+    // Set up chart refresh callbacks
+    const chartRefreshCallback = (timeRange = this.config.currentTimeRange) => {
+      this.config.currentTimeRange = timeRange;
+      this.activityChart.loadData(timeRange);
+    };
+    this.scraperController.setChartRefreshCallback(chartRefreshCallback);
+    this.paperProcessor.setChartRefreshCallback(chartRefreshCallback);
+    this.activityMonitor.setChartRefreshCallback(chartRefreshCallback);
+  }
+
+  /**
+   * Initialize data on page load
+   */
+  initializeData() {
+    // Load recent activity
+    this.activityMonitor.loadRecentActivity();
+
+    // Load chart data after a short delay to ensure Chart.js is loaded
+    setTimeout(() => {
+      this.activityChart.loadData(this.config.currentTimeRange);
+    }, 100);
+  }
+
+  /**
+   * Refresh all dashboard data
+   */
+  refreshAll() {
+    this.activityMonitor.loadRecentActivity();
+    this.activityChart.loadData(this.config.currentTimeRange);
+    this.scraperController.updateStatus();
+  }
+}
+
+/**
+ * Initialize the scraper dashboard
+ * @param {Object} config - Configuration object with Jinja variables
+ */
+function initScraperDashboard(config = {}) {
+  return new ScraperDashboard(config);
+}
--- a/scipaperloader/static/js/scraper-overview.js
+++ b/scipaperloader/static/js/scraper-overview.js
@ -0,0 +1,500 @@
+/**
+ * Scraper Overview functionality
+ */
+
+class ScraperOverview {
+  constructor() {
+    this.modal = null;
+    this.scrapers = [];
+    this.systemConfig = {};
+    this.init();
+  }
+
+  init() {
+    // Initialize modal reference
+    this.modal = document.getElementById("scraperOverviewModal");
+
+    // Load data when modal is shown
+    if (this.modal) {
+      this.modal.addEventListener("show.bs.modal", () => {
+        this.loadScraperOverview();
+      });
+    }
+  }
+
+  async loadScraperOverview() {
+    const loadingEl = document.getElementById("scraperOverviewLoading");
+    const errorEl = document.getElementById("scraperOverviewError");
+    const contentEl = document.getElementById("scraperOverviewContent");
+
+    // Show loading state
+    loadingEl?.classList.remove("d-none");
+    errorEl?.classList.add("d-none");
+    contentEl?.classList.add("d-none");
+
+    try {
+      // Load scrapers, system config, and publishers in parallel
+      const [scrapersResponse, statusResponse, publishersResponse] =
+        await Promise.all([
+          fetch("/scraper/scrapers"),
+          fetch("/scraper/status"),
+          fetch("/scraper/publishers"),
+        ]);
+
+      if (
+        !scrapersResponse.ok ||
+        !statusResponse.ok ||
+        !publishersResponse.ok
+      ) {
+        throw new Error("Failed to load scraper information");
+      }
+
+      const scrapersData = await scrapersResponse.json();
+      const statusData = await statusResponse.json();
+      const publishersData = await publishersResponse.json();
+
+      if (
+        !scrapersData.success ||
+        !statusData.success ||
+        !publishersData.success
+      ) {
+        throw new Error(
+          scrapersData.message ||
+            statusData.message ||
+            publishersData.message ||
+            "Unknown error"
+        );
+      }
+
+      this.scrapers = scrapersData.scrapers;
+      this.systemConfig = statusData;
+      this.publishersData = publishersData.data;
+
+      // Update UI
+      this.updateSystemConfig();
+      this.updateScrapersTable();
+      this.updatePublishersSection();
+      this.updateStatusFlowDiagram();
+
+      // Show content
+      loadingEl?.classList.add("d-none");
+      contentEl?.classList.remove("d-none");
+    } catch (error) {
+      console.error("Error loading scraper overview:", error);
+
+      // Show error state
+      loadingEl?.classList.add("d-none");
+      const errorMessage = document.getElementById(
+        "scraperOverviewErrorMessage"
+      );
+      if (errorMessage) {
+        errorMessage.textContent =
+          error.message || "Failed to load scraper information";
+      }
+      errorEl?.classList.remove("d-none");
+    }
+  }
+
+  updateSystemConfig() {
+    // Current scraper module
+    const currentModuleEl = document.getElementById("currentScraperModule");
+    if (currentModuleEl) {
+      const currentModule =
+        this.systemConfig.current_scraper_module || "System Default";
+      currentModuleEl.textContent = currentModule;
+      currentModuleEl.className = "badge bg-primary";
+    }
+
+    // Volume limit
+    const volumeLimitEl = document.getElementById("currentVolumeLimit");
+    if (volumeLimitEl) {
+      const volumeLimit = this.systemConfig.volume_config || "Unknown";
+      volumeLimitEl.textContent = volumeLimit;
+    }
+
+    // Total modules
+    const totalModulesEl = document.getElementById("totalScraperModules");
+    if (totalModulesEl) {
+      totalModulesEl.textContent = this.scrapers.length;
+    }
+
+    // Paper counts summary
+    const paperCountsEl = document.getElementById("paperCountsSummary");
+    if (paperCountsEl && this.systemConfig.paper_counts) {
+      const counts = this.systemConfig.paper_counts;
+      paperCountsEl.innerHTML = `
+                <div class="d-flex flex-wrap gap-2">
+                    <span class="badge bg-primary">${counts.new || 0} New</span>
+                    <span class="badge bg-warning">${
+                      counts.processing || 0
+                    } Processing</span>
+                    <span class="badge bg-success">${
+                      counts.done || 0
+                    } Done</span>
+                    <span class="badge bg-danger">${
+                      counts.failed || 0
+                    } Failed</span>
+                    <span class="badge bg-info">${
+                      counts.pending || 0
+                    } Pending</span>
+                    <span class="badge bg-secondary">${
+                      counts.retrying || 0
+                    } Retrying</span>
+                </div>
+            `;
+    }
+  }
+
+  updateScrapersTable() {
+    const tbody = document.getElementById("scrapersTableBody");
+    if (!tbody) return;
+
+    tbody.innerHTML = "";
+
+    this.scrapers.forEach((scraper) => {
+      const row = document.createElement("tr");
+
+      // Check if this is the current active scraper
+      const isCurrentScraper =
+        scraper.name === this.systemConfig.current_scraper_module;
+
+      if (scraper.error) {
+        row.innerHTML = `
+                    <td>${scraper.name}</td>
+                    <td colspan="5" class="text-danger">
+                        <i class="fas fa-exclamation-triangle"></i> ${scraper.error}
+                    </td>
+                `;
+      } else {
+        row.innerHTML = `
+                    <td>
+                        <strong>${scraper.name}</strong>
+                        ${
+                          scraper.name === "dummy"
+                            ? '<span class="badge bg-info ms-2">Test Module</span>'
+                            : ""
+                        }
+                        ${
+                          isCurrentScraper
+                            ? '<span class="badge bg-success ms-2"><i class="fas fa-check"></i> Active</span>'
+                            : ""
+                        }
+                    </td>
+                    <td class="scraper-description">
+                        ${this.truncateDescription(scraper.description)}
+                    </td>
+                    <td class="input-status-list">
+                        ${this.renderStatusBadges(
+                          scraper.input_statuses,
+                          "bg-info"
+                        )}
+                    </td>
+                    <td class="status-output">
+                        <span class="badge bg-success">${
+                          scraper.output_status_success
+                        }</span>
+                    </td>
+                    <td class="status-output">
+                        <span class="badge bg-danger">${
+                          scraper.output_status_failure
+                        }</span>
+                    </td>
+                    <td class="status-output">
+                        <span class="badge bg-warning">${
+                          scraper.output_status_processing
+                        }</span>
+                    </td>
+                `;
+      }
+
+      // Highlight the current scraper row
+      if (isCurrentScraper) {
+        row.classList.add("table-success");
+      }
+
+      tbody.appendChild(row);
+    });
+  }
+
+  updateStatusFlowDiagram() {
+    const diagramEl = document.getElementById("statusFlowDiagram");
+    if (!diagramEl) return;
+
+    // Analyze actual scrapers to build real flow
+    const statusFlow = this.analyzeScraperFlow();
+
+    let diagramHTML = '<div class="status-flow-container">';
+
+    // Create visual flow based on actual scrapers
+    statusFlow.forEach((stage, index) => {
+      if (index > 0) {
+        diagramHTML +=
+          '<div class="status-flow-arrow text-center my-2"><i class="fas fa-arrow-down fa-2x text-muted"></i></div>';
+      }
+
+      diagramHTML += '<div class="status-flow-stage mb-4 p-3 border rounded">';
+      diagramHTML += `<div class="fw-bold mb-2 text-primary">${stage.title}</div>`;
+
+      if (stage.scrapers && stage.scrapers.length > 0) {
+        diagramHTML +=
+          '<div class="mb-2"><small class="text-muted">Handled by: ' +
+          stage.scrapers.map((s) => `<strong>${s}</strong>`).join(", ") +
+          "</small></div>";
+      }
+
+      diagramHTML += '<div class="status-badges">';
+      stage.statuses.forEach((status, statusIndex) => {
+        if (statusIndex > 0) {
+          diagramHTML += '<i class="fas fa-arrow-right status-flow-arrow"></i>';
+        }
+
+        const badgeClass = this.getStatusBadgeClass(status);
+        diagramHTML += `<span class="status-flow-node badge ${badgeClass}">${status}</span>`;
+      });
+      diagramHTML += "</div>";
+
+      if (stage.description) {
+        diagramHTML += `<div class="small text-muted mt-2">${stage.description}</div>`;
+      }
+
+      diagramHTML += "</div>";
+    });
+
+    diagramHTML += "</div>";
+
+    // Add explanation
+    diagramHTML += `
+            <div class="mt-4 p-3 bg-light rounded">
+                <h6><i class="fas fa-info-circle"></i> Flow Explanation:</h6>
+                <ul class="small mb-0">
+                    <li><strong>Modular Processing:</strong> Each scraper handles specific input statuses</li>
+                    <li><strong>Status Transitions:</strong> Papers move through statuses as they are processed</li>
+                    <li><strong>Pipeline Architecture:</strong> Output from one scraper can become input to another</li>
+                    <li><strong>Error Handling:</strong> Failed papers can be retried by specialized scrapers</li>
+                    <li><strong>Parallel Processing:</strong> Multiple scrapers can work on different papers simultaneously</li>
+                </ul>
+            </div>
+        `;
+
+    diagramEl.innerHTML = diagramHTML;
+  }
+
+  analyzeScraperFlow() {
+    // Build actual flow based on available scrapers
+    const stages = [];
+    const allInputStatuses = new Set();
+    const allOutputStatuses = new Set();
+    const scrapersByInput = {};
+
+    // Analyze scrapers to understand the flow
+    this.scrapers.forEach((scraper) => {
+      if (scraper.input_statuses) {
+        scraper.input_statuses.forEach((status) => {
+          allInputStatuses.add(status);
+          if (!scrapersByInput[status]) {
+            scrapersByInput[status] = [];
+          }
+          scrapersByInput[status].push(scraper.name);
+        });
+      }
+
+      if (scraper.output_status_success)
+        allOutputStatuses.add(scraper.output_status_success);
+      if (scraper.output_status_failure)
+        allOutputStatuses.add(scraper.output_status_failure);
+    });
+
+    // Entry point
+    if (allInputStatuses.has("New")) {
+      stages.push({
+        title: "Entry Point",
+        statuses: ["New"],
+        scrapers: scrapersByInput["New"] || [],
+        description: "Newly uploaded papers enter the processing pipeline",
+      });
+    }
+
+    // Processing stages
+    const processingStatuses = Array.from(allInputStatuses).filter(
+      (status) => !["New", "Done", "Failed"].includes(status)
+    );
+
+    if (processingStatuses.length > 0) {
+      stages.push({
+        title: "Processing Stages",
+        statuses: processingStatuses,
+        scrapers: [],
+        description: "Papers move through various processing stages",
+      });
+    }
+
+    // Final outputs
+    const finalStatuses = ["Done", "Failed"];
+    stages.push({
+      title: "Final States",
+      statuses: finalStatuses.filter((status) => allOutputStatuses.has(status)),
+      scrapers: [],
+      description: "Papers end up in final success or failure states",
+    });
+
+    // Retry handling
+    if (allInputStatuses.has("Failed")) {
+      stages.push({
+        title: "Retry Processing",
+        statuses: ["Failed", "Retrying"],
+        scrapers: scrapersByInput["Failed"] || [],
+        description: "Failed papers can be retried with specialized scrapers",
+      });
+    }
+
+    return stages;
+  }
+
+  getStatusBadgeClass(status) {
+    const statusClasses = {
+      New: "bg-primary",
+      Pending: "bg-warning",
+      Processing: "bg-warning",
+      Retrying: "bg-warning",
+      Done: "bg-success",
+      Failed: "bg-danger",
+      HtmlDownloaded: "bg-info",
+      PublisherDetected: "bg-info",
+      TextExtracted: "bg-info",
+    };
+
+    return statusClasses[status] || "bg-secondary";
+  }
+
+  renderStatusBadges(statuses, defaultClass = "bg-secondary") {
+    if (!Array.isArray(statuses)) return "";
+
+    return statuses
+      .map(
+        (status) =>
+          `<span class="badge ${this.getStatusBadgeClass(
+            status
+          )} status-badge">${status}</span>`
+      )
+      .join("");
+  }
+
+  truncateDescription(description, maxLength = 100) {
+    if (!description) return "No description available";
+
+    if (description.length <= maxLength) return description;
+
+    return description.substring(0, maxLength).trim() + "...";
+  }
+
+  updatePublishersSection() {
+    // Update publisher statistics
+    const publisherStatsEl = document.getElementById("publisherStats");
+    if (publisherStatsEl && this.publishersData && this.publishersData.stats) {
+      const stats = this.publishersData.stats;
+      publisherStatsEl.innerHTML = `
+        <div class="col-md-3">
+          <div class="text-center">
+            <div class="h4 text-primary mb-1">${stats.total_publishers}</div>
+            <div class="text-muted small">Total Publishers</div>
+          </div>
+        </div>
+        <div class="col-md-3">
+          <div class="text-center">
+            <div class="h4 text-success mb-1">${stats.publishers_with_parsers}</div>
+            <div class="text-muted small">With Parsers</div>
+          </div>
+        </div>
+        <div class="col-md-3">
+          <div class="text-center">
+            <div class="h4 text-warning mb-1">${stats.publishers_without_parsers}</div>
+            <div class="text-muted small">Missing Parsers</div>
+          </div>
+        </div>
+        <div class="col-md-3">
+          <div class="text-center">
+            <div class="h4 text-info mb-1">${stats.total_papers_with_publisher}</div>
+            <div class="text-muted small">Papers with Publisher</div>
+          </div>
+        </div>
+      `;
+    }
+
+    // Update publishers table
+    const publishersTableBody = document.getElementById("publishersTableBody");
+    if (
+      publishersTableBody &&
+      this.publishersData &&
+      this.publishersData.publishers
+    ) {
+      publishersTableBody.innerHTML = "";
+
+      if (this.publishersData.publishers.length === 0) {
+        publishersTableBody.innerHTML = `
+          <tr>
+            <td colspan="4" class="text-center text-muted py-4">
+              <i class="fas fa-info-circle"></i> No publishers detected yet.<br>
+              <small>Run the publisher_detector scraper to identify publishers from paper URLs.</small>
+            </td>
+          </tr>
+        `;
+        return;
+      }
+
+      this.publishersData.publishers.forEach((publisher) => {
+        const row = document.createElement("tr");
+
+        // Publisher status badge
+        const statusBadge = publisher.has_parser
+          ? '<span class="badge bg-success"><i class="fas fa-check"></i> Available</span>'
+          : '<span class="badge bg-warning"><i class="fas fa-exclamation-triangle"></i> Missing</span>';
+
+        // Parser availability indicator
+        const parserIndicator = publisher.has_parser
+          ? '<i class="fas fa-check-circle text-success" title="Parser available"></i>'
+          : '<i class="fas fa-times-circle text-warning" title="Parser not available"></i>';
+
+        row.innerHTML = `
+          <td>
+            <strong>${publisher.name}</strong>
+          </td>
+          <td>
+            <span class="badge bg-info">${publisher.paper_count}</span>
+          </td>
+          <td>${statusBadge}</td>
+          <td class="text-center">${parserIndicator}</td>
+        `;
+
+        publishersTableBody.appendChild(row);
+      });
+    }
+  }
+
+  // Public method to show the modal
+  show() {
+    if (this.modal) {
+      const bootstrapModal = new bootstrap.Modal(this.modal);
+      bootstrapModal.show();
+    }
+  }
+}
+
+// Global function to load scraper overview (used by retry button)
+function loadScraperOverview() {
+  if (window.scraperOverview) {
+    window.scraperOverview.loadScraperOverview();
+  }
+}
+
+// Global function to show scraper overview modal
+function showScraperOverview() {
+  if (!window.scraperOverview) {
+    window.scraperOverview = new ScraperOverview();
+  }
+  window.scraperOverview.show();
+}
+
+// Initialize when DOM is ready
+document.addEventListener("DOMContentLoaded", function () {
+  window.scraperOverview = new ScraperOverview();
+});
--- a/scipaperloader/static/js/table-handler.js
+++ b/scipaperloader/static/js/table-handler.js
@ -0,0 +1,337 @@
+/**
+ * Table utilities for handling data tables with pagination, sorting, and filtering
+ */
+
+class TableHandler {
+  constructor(tableId, options = {}) {
+    this.table = document.getElementById(tableId);
+    this.options = {
+      enableSorting: true,
+      enableFiltering: true,
+      enablePagination: true,
+      loadingText: "Loading...",
+      noDataText: "No data available",
+      ...options,
+    };
+
+    this.currentPage = 1;
+    this.itemsPerPage = options.itemsPerPage || 20;
+    this.sortColumn = null;
+    this.sortDirection = "asc";
+    this.filters = {};
+
+    this.initializeTable();
+  }
+
+  /**
+   * Initialize table features
+   */
+  initializeTable() {
+    if (!this.table) return;
+
+    if (this.options.enableSorting) {
+      this.setupSortingHandlers();
+    }
+
+    if (this.options.enableFiltering) {
+      this.setupFilteringHandlers();
+    }
+  }
+
+  /**
+   * Set up sorting handlers for table headers
+   */
+  setupSortingHandlers() {
+    const headers = this.table.querySelectorAll("th[data-sortable]");
+
+    headers.forEach((header) => {
+      header.style.cursor = "pointer";
+      header.addEventListener("click", () => {
+        const column = header.dataset.sortable;
+        this.sortByColumn(column);
+      });
+    });
+  }
+
+  /**
+   * Sort table by column
+   * @param {string} column - Column to sort by
+   */
+  sortByColumn(column) {
+    if (this.sortColumn === column) {
+      this.sortDirection = this.sortDirection === "asc" ? "desc" : "asc";
+    } else {
+      this.sortColumn = column;
+      this.sortDirection = "asc";
+    }
+
+    this.updateSortIndicators();
+    this.refreshData();
+  }
+
+  /**
+   * Update sort direction indicators in table headers
+   */
+  updateSortIndicators() {
+    // Remove existing sort indicators
+    this.table.querySelectorAll("th .sort-indicator").forEach((indicator) => {
+      indicator.remove();
+    });
+
+    // Add indicator to current sort column
+    if (this.sortColumn) {
+      const header = this.table.querySelector(
+        `th[data-sortable="${this.sortColumn}"]`
+      );
+      if (header) {
+        const indicator = document.createElement("span");
+        indicator.className = "sort-indicator";
+        indicator.innerHTML = this.sortDirection === "asc" ? " ↑" : " ↓";
+        header.appendChild(indicator);
+      }
+    }
+  }
+
+  /**
+   * Set up filtering handlers
+   */
+  setupFilteringHandlers() {
+    const filterInputs = document.querySelectorAll("[data-table-filter]");
+
+    filterInputs.forEach((input) => {
+      input.addEventListener("input", (e) => {
+        const filterKey = e.target.dataset.tableFilter;
+        this.setFilter(filterKey, e.target.value);
+      });
+    });
+  }
+
+  /**
+   * Set a filter value
+   * @param {string} key - Filter key
+   * @param {string} value - Filter value
+   */
+  setFilter(key, value) {
+    if (value && value.trim() !== "") {
+      this.filters[key] = value.trim();
+    } else {
+      delete this.filters[key];
+    }
+
+    this.currentPage = 1; // Reset to first page when filtering
+    this.refreshData();
+  }
+
+  /**
+   * Show loading state
+   */
+  showLoading() {
+    const tbody = this.table.querySelector("tbody");
+    if (tbody) {
+      const colCount = this.table.querySelectorAll("th").length;
+      tbody.innerHTML = `
+                <tr>
+                    <td colspan="${colCount}" class="text-center">${this.options.loadingText}</td>
+                </tr>
+            `;
+    }
+  }
+
+  /**
+   * Show no data message
+   */
+  showNoData() {
+    const tbody = this.table.querySelector("tbody");
+    if (tbody) {
+      const colCount = this.table.querySelectorAll("th").length;
+      tbody.innerHTML = `
+                <tr>
+                    <td colspan="${colCount}" class="text-center">${this.options.noDataText}</td>
+                </tr>
+            `;
+    }
+  }
+
+  /**
+   * Render table data
+   * @param {Array} data - Array of data objects
+   * @param {Function} rowRenderer - Function to render each row
+   */
+  renderData(data, rowRenderer) {
+    const tbody = this.table.querySelector("tbody");
+    if (!tbody) return;
+
+    if (!data || data.length === 0) {
+      this.showNoData();
+      return;
+    }
+
+    tbody.innerHTML = data.map(rowRenderer).join("");
+  }
+
+  /**
+   * Build query parameters for API requests
+   * @returns {object} Query parameters object
+   */
+  buildQueryParams() {
+    const params = {
+      page: this.currentPage,
+      per_page: this.itemsPerPage,
+      ...this.filters,
+    };
+
+    if (this.sortColumn) {
+      params.sort_by = this.sortColumn;
+      params.sort_dir = this.sortDirection;
+    }
+
+    return params;
+  }
+
+  /**
+   * Refresh table data (to be implemented by subclasses or passed as callback)
+   */
+  refreshData() {
+    if (this.options.onRefresh) {
+      this.options.onRefresh(this.buildQueryParams());
+    }
+  }
+
+  /**
+   * Update pagination controls
+   * @param {object} paginationInfo - Pagination information
+   */
+  updatePagination(paginationInfo) {
+    const paginationContainer = document.querySelector(".pagination-container");
+    if (!paginationContainer || !paginationInfo) return;
+
+    // This is a basic implementation - you might want to enhance this
+    const { current_page, total_pages, has_prev, has_next } = paginationInfo;
+
+    let paginationHTML = '<nav><ul class="pagination justify-content-center">';
+
+    // Previous button
+    if (has_prev) {
+      paginationHTML += `<li class="page-item"><a class="page-link" href="#" data-page="${
+        current_page - 1
+      }">Previous</a></li>`;
+    } else {
+      paginationHTML +=
+        '<li class="page-item disabled"><span class="page-link">Previous</span></li>';
+    }
+
+    // Page numbers (simplified - show current and adjacent pages)
+    const startPage = Math.max(1, current_page - 2);
+    const endPage = Math.min(total_pages, current_page + 2);
+
+    for (let i = startPage; i <= endPage; i++) {
+      if (i === current_page) {
+        paginationHTML += `<li class="page-item active"><span class="page-link">${i}</span></li>`;
+      } else {
+        paginationHTML += `<li class="page-item"><a class="page-link" href="#" data-page="${i}">${i}</a></li>`;
+      }
+    }
+
+    // Next button
+    if (has_next) {
+      paginationHTML += `<li class="page-item"><a class="page-link" href="#" data-page="${
+        current_page + 1
+      }">Next</a></li>`;
+    } else {
+      paginationHTML +=
+        '<li class="page-item disabled"><span class="page-link">Next</span></li>';
+    }
+
+    paginationHTML += "</ul></nav>";
+    paginationContainer.innerHTML = paginationHTML;
+
+    // Add click handlers for pagination links
+    paginationContainer.querySelectorAll("a[data-page]").forEach((link) => {
+      link.addEventListener("click", (e) => {
+        e.preventDefault();
+        this.currentPage = parseInt(e.target.dataset.page);
+        this.refreshData();
+      });
+    });
+  }
+}
+
+/**
+ * Specialized table handler for papers
+ */
+class PapersTableHandler extends TableHandler {
+  constructor(tableId, options = {}) {
+    super(tableId, {
+      apiEndpoint: "/api/papers",
+      ...options,
+    });
+  }
+
+  /**
+   * Render a paper row
+   * @param {object} paper - Paper data object
+   * @returns {string} HTML string for table row
+   */
+  renderPaperRow(paper) {
+    const statusBadge = createStatusBadge(paper.status);
+    const truncatedTitle = truncateText(paper.title, 70);
+
+    return `
+            <tr>
+                <td>
+                    <a href="#" class="paper-link" data-url="/papers/${
+                      paper.id
+                    }/detail">
+                        ${truncatedTitle}
+                    </a>
+                </td>
+                <td>
+                    <a href="https://doi.org/${paper.doi}" target="_blank">
+                        ${paper.doi || "N/A"}
+                    </a>
+                </td>
+                <td>${paper.journal || "N/A"}</td>
+                <td>${paper.issn || "N/A"}</td>
+                <td>${statusBadge}</td>
+                <td>${formatTimestamp(paper.created_at)}</td>
+                <td>${formatTimestamp(paper.updated_at)}</td>
+            </tr>
+        `;
+  }
+
+  /**
+   * Load and display papers data
+   * @param {object} params - Query parameters
+   */
+  async loadPapers(params = {}) {
+    this.showLoading();
+
+    try {
+      const queryString = new URLSearchParams(params).toString();
+      const url = `${this.options.apiEndpoint}?${queryString}`;
+
+      const response = await fetch(url);
+      const data = await response.json();
+
+      if (data.papers) {
+        this.renderData(data.papers, (paper) => this.renderPaperRow(paper));
+
+        if (data.pagination) {
+          this.updatePagination(data.pagination);
+        }
+      } else {
+        this.showNoData();
+      }
+    } catch (error) {
+      console.error("Error loading papers:", error);
+      this.showNoData();
+    }
+  }
+
+  /**
+   * Refresh data implementation
+   */
+  refreshData() {
+    this.loadPapers(this.buildQueryParams());
+  }
+}
--- a/scipaperloader/static/styles.css
+++ b/scipaperloader/static/styles.css
@ -7,3 +7,34 @@
 .progress-bar {
  width: 0%;
 }
+
+/* JSON formatting styles */
+.json-formatted {
+  background-color: #f8f9fa;
+  border: 1px solid #dee2e6;
+  border-radius: 0.375rem;
+  font-family: "Monaco", "Menlo", "Ubuntu Mono", monospace;
+  font-size: 0.875rem;
+  line-height: 1.4;
+}
+
+.json-formatted code {
+  color: #212529;
+  background-color: transparent;
+  padding: 0;
+}
+
+/* Improve readability of JSON in modals */
+#extra-data-content {
+  white-space: pre-wrap;
+  word-break: break-word;
+  font-family: "Monaco", "Menlo", "Ubuntu Mono", monospace;
+  font-size: 0.875rem;
+  line-height: 1.4;
+}
+
+/* Style for old/new value code blocks */
+pre code {
+  white-space: pre-wrap;
+  word-break: break-word;
+}
--- a/scipaperloader/templates/about.html.jinja
+++ b/scipaperloader/templates/about.html.jinja
@ -1,4 +1,8 @@
-{% extends "base.html.jinja" %} {% block content %}
+{% extends "base.html.jinja" %}
+
+{% block title %}About{% endblock title %}
+
+{% block content %}
 <h1 class="mb-4">📘 About This App</h1>

 <p class="lead">
--- a/scipaperloader/templates/base.html.jinja
+++ b/scipaperloader/templates/base.html.jinja
@ -7,6 +7,7 @@
  <meta name="keywords" content="science, papers, research, management" />
  <title>{{ app_title }}</title>
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" />
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.0/font/bootstrap-icons.css">
  <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
  <!-- Optional Alpine.js -->
  <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
@ -17,6 +18,8 @@
  <main class="container my-5">{% block content %}{% endblock content %}</main>
  {% include "footer.html.jinja" %}

+  <!-- Include common utilities globally -->
+  <script src="{{ url_for('static', filename='js/common.js') }}"></script>
  {% block scripts %}{% endblock scripts %}
 </body>

--- a/scipaperloader/templates/config/general.html.jinja
+++ b/scipaperloader/templates/config/general.html.jinja
@ -38,6 +38,43 @@
                                </div>
                            </div>

+                            <div class="form-section">
+                                <h6>Scheduler Timezone</h6>
+                                <p class="text-muted">Configure the timezone for the APScheduler to use for job
+                                    scheduling.</p>
+                                <div class="mb-3">
+                                    <label for="timezone" class="form-label">Timezone:</label>
+                                    <select class="form-control" id="timezone" name="timezone" required>
+                                        <option value="UTC" {% if timezone_config.timezone=='UTC' %}selected{% endif %}>
+                                            UTC</option>
+                                        <option value="Europe/Berlin" {% if timezone_config.timezone=='Europe/Berlin'
+                                            %}selected{% endif %}>Europe/Berlin (CET/CEST)</option>
+                                        <option value="Europe/London" {% if timezone_config.timezone=='Europe/London'
+                                            %}selected{% endif %}>Europe/London (GMT/BST)</option>
+                                        <option value="Europe/Paris" {% if timezone_config.timezone=='Europe/Paris'
+                                            %}selected{% endif %}>Europe/Paris (CET/CEST)</option>
+                                        <option value="Europe/Rome" {% if timezone_config.timezone=='Europe/Rome'
+                                            %}selected{% endif %}>Europe/Rome (CET/CEST)</option>
+                                        <option value="US/Eastern" {% if timezone_config.timezone=='US/Eastern'
+                                            %}selected{% endif %}>US/Eastern (EST/EDT)</option>
+                                        <option value="US/Central" {% if timezone_config.timezone=='US/Central'
+                                            %}selected{% endif %}>US/Central (CST/CDT)</option>
+                                        <option value="US/Mountain" {% if timezone_config.timezone=='US/Mountain'
+                                            %}selected{% endif %}>US/Mountain (MST/MDT)</option>
+                                        <option value="US/Pacific" {% if timezone_config.timezone=='US/Pacific'
+                                            %}selected{% endif %}>US/Pacific (PST/PDT)</option>
+                                        <option value="Asia/Tokyo" {% if timezone_config.timezone=='Asia/Tokyo'
+                                            %}selected{% endif %}>Asia/Tokyo (JST)</option>
+                                        <option value="Asia/Shanghai" {% if timezone_config.timezone=='Asia/Shanghai'
+                                            %}selected{% endif %}>Asia/Shanghai (CST)</option>
+                                        <option value="Australia/Sydney" {% if
+                                            timezone_config.timezone=='Australia/Sydney' %}selected{% endif %}>
+                                            Australia/Sydney (AEST/AEDT)</option>
+                                    </select>
+                                    <div class="form-text">Current: {{ timezone_config.timezone }}</div>
+                                </div>
+                            </div>
+
                            <div class="form-section">
                                <h6>System Settings</h6>
                                <p class="text-muted">Configure general system behavior.</p>
@ -65,15 +102,21 @@
                    <div class="col-md-6">
                        <form method="post" action="{{ url_for('config.update_scraper_module') }}">
                            <div class="form-section">
-                                <h6>Scraper Module</h6>
+                                <div class="d-flex justify-content-between align-items-center mb-2">
+                                    <h6>Scraper Module</h6>
+                                    <button type="button" class="btn btn-outline-info btn-sm"
+                                        onclick="showScraperOverview()" title="View scraper modules overview">
+                                        <i class="fas fa-info-circle"></i> How Scrapers Work
+                                    </button>
+                                </div>
                                <p class="text-muted">Select which scraper module to use for processing papers.</p>

                                <div class="mb-3">
                                    <label for="scraper_module" class="form-label">Active Scraper Module:</label>
                                    <select class="form-control" id="scraper_module" name="scraper_module">
                                        {% for module in available_scraper_modules %}
-                                        <option value="{{ module }}" {% if module==current_scraper_module %} selected
-                                            {%endif %}>
+                                        <option value="{{ module }}" {% if module==current_scraper_module %} selected {%
+                                            endif %}>
                                            {{ module }}
                                            {% if scraper_details[module] %}
                                            - {{ scraper_details[module].description[:50] }}...
--- a/scipaperloader/templates/config/index.html.jinja
+++ b/scipaperloader/templates/config/index.html.jinja
@ -53,4 +53,13 @@
        {% endif %}
    </div>
 </div>
+
+<!-- Include the scraper overview modal -->
+{% include "partials/scraper_overview_modal.html.jinja" %}
+
 {% endblock content %}
+
+{% block scripts %}
+{{ super() }}
+<script src="{{ url_for('static', filename='js/scraper-overview.js') }}"></script>
+{% endblock scripts %}
--- a/scipaperloader/templates/config/schedule.html.jinja
+++ b/scipaperloader/templates/config/schedule.html.jinja
@ -39,12 +39,19 @@
    }
 </style>

-<script>
-    const initialSchedule = {{ schedule | tojson }};
-    const totalVolume = {{ volume }};
+<!-- Configuration data in JSON format for clean separation -->
+<script type="application/json" id="schedule-config">
+{
+    "initialSchedule": {{ schedule | tojson }},
+    "totalVolume": {{ volume | tojson }},
+    "maxVolume": {{ max_volume | tojson }}
+}
 </script>

-<div x-data="scheduleManager(initialSchedule, totalVolume)" class="tab-pane active">
+<!-- Load config handler for modular functionality -->
+<script src="{{ url_for('static', filename='js/config-handler.js') }}"></script>
+
+<div x-data="configHandler.createScheduleManager()" class="tab-pane active">
    <div class="card">
        <div class="card-header d-flex justify-content-between">
            <h5>Scheduling Configuration</h5>
@ -211,164 +218,3 @@
        </div>
    </div>
 </div>
-
-<script>
-    function scheduleManager(initial, volume) {
-        return {
-            schedule: { ...initial },
-            volume: volume,
-            selectedHours: [],
-            newWeight: 1.0,
-            volumeValue: volume,
-            isDragging: false,
-            dragOperation: null,
-
-            formatHour(h) {
-                return String(h).padStart(2, "0") + ":00";
-            },
-
-            updateVolume() {
-                fetch('{{ url_for('config.api_update_config') }}', {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json',
-                    },
-                    body: JSON.stringify({
-                        volume: this.volumeValue
-                    })
-                })
-                    .then(response => response.json())
-                    .then(data => {
-                        if (data.success) {
-                            this.volume = parseFloat(this.volumeValue);
-                            showFlashMessage('Volume updated successfully!', 'success');
-                        } else {
-                            showFlashMessage(data.updates?.[0]?.message || 'Error updating volume', 'error');
-                        }
-                    })
-                    .catch(error => {
-                        console.error('Error:', error);
-                        showFlashMessage('Network error occurred', 'error');
-                    });
-            },
-
-            getBackgroundStyle(hour) {
-                const weight = parseFloat(this.schedule[hour]);
-                const maxWeight = 2.5; // You can adjust this
-
-                // Normalize weight (0.0 to 1.0)
-                const t = Math.min(weight / maxWeight, 1.0);
-
-                // Interpolate HSL lightness: 95% (light) to 30% (dark)
-                const lightness = 95 - t * 65; // 95 → 30
-                const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
-
-                const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
-
-                return {
-                    backgroundColor,
-                    color: textColor,
-                };
-            },
-
-            getBackgroundStyleFromValue(value) {
-                const weight = parseFloat(value);
-                const maxWeight = 2.5; // You can adjust this
-
-                // Normalize weight (0.0 to 1.0)
-                const t = Math.min(weight / maxWeight, 1.0);
-
-                // Interpolate HSL lightness: 95% (light) to 30% (dark)
-                const lightness = 95 - t * 65; // 95 → 30
-                const backgroundColor = `hsl(210, 10%, ${lightness}%)`; // soft gray-blue palette
-
-                const textColor = t > 0.65 ? "white" : "black"; // adaptive text color
-
-                return {
-                    backgroundColor,
-                    color: textColor,
-                };
-            },
-
-            startDrag(event, hour) {
-                event.preventDefault();
-                this.isDragging = true;
-                this.dragOperation = this.isSelected(hour) ? "remove" : "add";
-                this.toggleSelect(hour);
-            },
-
-            dragSelect(hour) {
-                if (!this.isDragging) return;
-                const selected = this.isSelected(hour);
-                if (this.dragOperation === "add" && !selected) {
-                    this.selectedHours.push(hour);
-                } else if (this.dragOperation === "remove" && selected) {
-                    this.selectedHours = this.selectedHours.filter((h) => h !== hour);
-                }
-            },
-
-            endDrag() {
-                this.isDragging = false;
-            },
-
-            toggleSelect(hour) {
-                if (this.isSelected(hour)) {
-                    this.selectedHours = this.selectedHours.filter((h) => h !== hour);
-                } else {
-                    this.selectedHours.push(hour);
-                }
-            },
-
-            isSelected(hour) {
-                return this.selectedHours.includes(hour);
-            },
-
-            applyWeight() {
-                this.selectedHours.forEach((hour) => {
-                    this.schedule[hour] = parseFloat(this.newWeight).toFixed(1);
-                });
-                this.selectedHours = [];
-            },
-
-            getTotalWeight() {
-                return Object.values(this.schedule).reduce(
-                    (sum, w) => sum + parseFloat(w),
-                    0
-                );
-            },
-
-            getPapersPerHour(hour) {
-                const total = this.getTotalWeight();
-                if (total === 0) return 0;
-                return (
-                    (parseFloat(this.schedule[hour]) / total) *
-                    this.volume
-                ).toFixed(1);
-            },
-
-            saveSchedule() {
-                fetch('{{ url_for('config.api_update_config') }}', {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json',
-                    },
-                    body: JSON.stringify({
-                        schedule: this.schedule
-                    })
-                })
-                    .then(response => response.json())
-                    .then(data => {
-                        if (data.success) {
-                            showFlashMessage('Schedule updated successfully!', 'success');
-                        } else {
-                            showFlashMessage(data.updates?.[0]?.message || 'Error updating schedule', 'error');
-                        }
-                    })
-                    .catch(error => {
-                        console.error('Error:', error);
-                        showFlashMessage('Network error occurred', 'error');
-                    });
-            }
-        };
-    }
-</script>
--- a/scipaperloader/templates/index.html.jinja
+++ b/scipaperloader/templates/index.html.jinja
@ -1,66 +1,146 @@
 {% extends "base.html.jinja" %}
+
+<!-- Include flash messages template -->
+{% include "partials/flash_messages.html.jinja" %}
+
+{% block title %}Home - SciPaperLoader{% endblock title %}
+
 {% block content %}

-<div class="container text-center">
+<div class="container text-center mb-5">
  <h1 class="display-4">Welcome to SciPaperLoader</h1>
-  <p class="lead">Your paper scraping tool is ready.</p>
-  <p class="text-muted">A simple tool to scrape papers from Zotero API.</p>
+  <p class="lead">Your comprehensive paper management and scraping platform</p>
+  <p class="text-muted">Automate paper collection, manage metadata, and monitor download progress with intelligent
+    scheduling</p>
 </div>

-<div class="row g-4">
-  <div class="col-md-6">
-    <div class="card shadow-sm">
+<!-- Main Features Section -->
+<div class="row g-4 mb-5">
+  <div class="col-12">
+    <h2 class="text-center mb-4">🚀 Core Features</h2>
+  </div>
+
+  <div class="col-lg-4 col-md-6">
+    <div class="card shadow-sm h-100">
      <div class="card-body">
-        <h5 class="card-title">📄 CSV Import</h5>
+        <h5 class="card-title">🎛️ Scraper Control Panel</h5>
        <p class="card-text">
-          Upload a 37-column CSV to import paper metadata. Only relevant fields
-          (title, DOI, ISSN, etc.) are stored. Errors are reported without
-          aborting the batch.
+          Start, pause, and monitor the automated paper scraping process. View real-time statistics,
+          activity charts, and process individual papers on demand.
        </p>
-        <a href="{{ url_for('upload.upload') }}" class="btn btn-sm btn-outline-primary">Upload Now</a>
+        <a href="{{ url_for('scraper.index') }}" class="btn btn-primary">Open Control Panel</a>
      </div>
    </div>
  </div>

-  <div class="col-md-6">
-    <div class="card shadow-sm">
-      <div class="card-body">
-        <h5 class="card-title">🧠 Background Scraper</h5>
-        <p class="card-text">
-          A daemon process runs hourly to fetch papers using Zotero API.
-          Downloads are randomized to mimic human behavior and avoid detection.
-        </p>
-        <a href="{{ url_for('logger.list_logs') }}" class="btn btn-sm btn-outline-secondary">View Logs</a>
-      </div>
-    </div>
-  </div>
-
-  <div class="col-md-6">
-    <div class="card shadow-sm">
+  <div class="col-lg-4 col-md-6">
+    <div class="card shadow-sm h-100">
      <div class="card-body">
        <h5 class="card-title">📚 Paper Management</h5>
        <p class="card-text">
-          Monitor paper status (Pending, Done, Failed), download PDFs, and
-          inspect errors. Files are stored on disk in structured folders per
-          DOI.
+          Browse, search, and manage your paper collection. View download status,
+          inspect metadata, export data, and handle failed downloads.
        </p>
-        <a href="{{ url_for('papers.list_papers') }}" class="btn btn-sm btn-outline-success">Browse Papers</a>
+        <a href="{{ url_for('papers.list_papers') }}" class="btn btn-success">Browse Papers</a>
      </div>
    </div>
  </div>

-  <div class="col-md-6">
-    <div class="card shadow-sm">
+  <div class="col-lg-4 col-md-6">
+    <div class="card shadow-sm h-100">
      <div class="card-body">
-        <h5 class="card-title">🕒 Download Schedule</h5>
+        <h5 class="card-title">📄 CSV Data Import</h5>
        <p class="card-text">
-          Control how many papers are downloaded per hour. Configure hourly
-          volume (e.g. 2/hour at daytime, 0 at night) to match your bandwidth or
-          usage pattern.
+          Bulk import paper metadata from CSV files. Supports 37-column format with
+          intelligent duplicate detection and comprehensive error reporting.
        </p>
-        <a href="{{ url_for('config.schedule') }}" class="btn btn-sm btn-outline-warning">Adjust Schedule</a>
+        <a href="{{ url_for('upload.upload') }}" class="btn btn-outline-primary">Import Data</a>
      </div>
    </div>
  </div>
 </div>
+
+<!-- Configuration & Monitoring Section -->
+<div class="row g-4 mb-5">
+  <div class="col-12">
+    <h2 class="text-center mb-4">⚙️ Configuration & Monitoring</h2>
+  </div>
+
+  <div class="col-lg-4 col-md-6">
+    <div class="card shadow-sm h-100">
+      <div class="card-body">
+        <h5 class="card-title">🕒 Download Scheduling</h5>
+        <p class="card-text">
+          Configure hourly download quotas and timing patterns. Set different rates for
+          day/night hours to optimize bandwidth usage and avoid detection.
+        </p>
+        <a href="{{ url_for('config.schedule') }}" class="btn btn-warning">Manage Schedule</a>
+      </div>
+    </div>
+  </div>
+
+  <div class="col-lg-4 col-md-6">
+    <div class="card shadow-sm h-100">
+      <div class="card-body">
+        <h5 class="card-title">🔧 System Configuration</h5>
+        <p class="card-text">
+          Adjust global settings including daily volume limits, download paths,
+          and scraper module selection for optimal performance.
+        </p>
+        <a href="{{ url_for('config.general') }}" class="btn btn-outline-secondary">System Settings</a>
+      </div>
+    </div>
+  </div>
+
+  <div class="col-lg-4 col-md-6">
+    <div class="card shadow-sm h-100">
+      <div class="card-body">
+        <h5 class="card-title">📊 Activity Logs</h5>
+        <p class="card-text">
+          Monitor system activity, track scraping progress, and troubleshoot issues
+          with comprehensive logging and activity timeline views.
+        </p>
+        <a href="{{ url_for('logger.list_logs') }}" class="btn btn-info">View Logs</a>
+      </div>
+    </div>
+  </div>
+</div>
+
+<!-- Advanced Features Section -->
+<div class="row g-4 mb-5">
+  <div class="col-12">
+    <h2 class="text-center mb-4">🔬 Advanced Features</h2>
+  </div>
+
+  <div class="col-lg-6 col-md-6">
+    <div class="card shadow-sm h-100">
+      <div class="card-body">
+        <h5 class="card-title">🗄️ Database Management</h5>
+        <p class="card-text">
+          Manage your paper database with tools for generating test data,
+          cleaning up records, and database maintenance operations.
+        </p>
+        <a href="{{ url_for('config.database') }}" class="btn btn-outline-danger">Database Tools</a>
+      </div>
+    </div>
+  </div>
+
+  <div class="col-lg-6 col-md-6">
+    <div class="card shadow-sm h-100">
+      <div class="card-body">
+        <h5 class="card-title">🧠 Intelligent Processing</h5>
+        <p class="card-text">
+          Background daemon with randomized timing, human-like behavior patterns,
+          and automatic retry mechanisms for robust paper collection.
+        </p>
+        <div class="mt-3">
+          <span class="badge bg-success me-2">Auto-Retry</span>
+          <span class="badge bg-info me-2">Smart Timing</span>
+          <span class="badge bg-warning">Rate Limiting</span>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+
 {% endblock content %}
--- a/scipaperloader/templates/logger.html.jinja
+++ b/scipaperloader/templates/logger.html.jinja
@ -1,117 +0,0 @@
-{% extends "base.html.jinja" %}
-{% block content %}
-<h1>Activity Logs</h1>
-
-<form method="get" class="mb-3">
-  <div class="row g-2">
-    <div class="col-md-3">
-      <label for="category" class="form-label">Category:</label>
-      <select name="category" id="category" class="form-select">
-        <option value="">All</option>
-        {% for cat in categories %}
-        <option value="{{ cat }}" {% if category==cat %}selected{% endif %}>{{ cat }}</option>
-        {% endfor %}
-      </select>
-    </div>
-
-    <div class="col-md-3">
-      <label for="start_date" class="form-label">Start Date:</label>
-      <input type="date" name="start_date" id="start_date" value="{{ start_date }}" class="form-control">
-    </div>
-
-    <div class="col-md-3">
-      <label for="end_date" class="form-label">End Date:</label>
-      <input type="date" name="end_date" id="end_date" value="{{ end_date }}" class="form-control">
-    </div>
-
-    <div class="col-md-3">
-      <label for="search_term" class="form-label">Search:</label>
-      <input type="text" name="search_term" id="search_term" value="{{ search_term }}" class="form-control">
-    </div>
-  </div>
-
-  <div class="mt-3">
-    <button type="submit" class="btn btn-primary">Filter</button>
-    <a href="{{ url_for('logger.download_logs', category=category, start_date=start_date, end_date=end_date, search_term=search_term) }}"
-      class="btn btn-secondary">Download CSV</a>
-  </div>
-</form>
-
-<ul class="list-group">
-  {% for log in logs %}
-  <li class="list-group-item log-item" data-log-id="{{ log.id }}">
-    <div class="d-flex justify-content-between align-items-center">
-      <div class="ms-2 me-auto">
-        <div class="fw-bold">{{ log.timestamp }}</div>
-        {{ log.action }} - {{ log.description }}
-      </div>
-      <span class="badge bg-primary rounded-pill">{{ log.category }}</span>
-    </div>
-  </li>
-  {% endfor %}
-</ul>
-
-{% if pagination %}
-<nav aria-label="Page navigation" class="mt-4">
-  <ul class="pagination justify-content-center">
-    {% if pagination.has_prev %}
-    <li class="page-item">
-      <a class="page-link"
-        href="{{ url_for('logger.list_logs', page=pagination.prev_num, category=category, start_date=start_date, end_date=end_date, search_term=search_term) }}">Previous</a>
-    </li>
-    {% else %}
-    <li class="page-item disabled">
-      <span class="page-link">Previous</span>
-    </li>
-    {% endif %}
-
-    <li class="page-item disabled">
-      <span class="page-link">Page {{ pagination.page }} of {{ pagination.pages }}</span>
-    </li>
-
-    {% if pagination.has_next %}
-    <li class="page-item">
-      <a class="page-link"
-        href="{{ url_for('logger.list_logs', page=pagination.next_num, category=category, start_date=start_date, end_date=end_date, search_term=search_term) }}">Next</a>
-    </li>
-    {% else %}
-    <li class="page-item disabled">
-      <span class="page-link">Next</span>
-    </li>
-    {% endif %}
-  </ul>
-</nav>
-{% endif %}
-
-<!-- Modal for log details -->
-<div class="modal fade" id="logDetailModal" tabindex="-1" aria-hidden="true">
-  <div class="modal-dialog modal-lg modal-dialog-scrollable">
-    <div class="modal-content" id="log-detail-content">
-      <!-- Log details will be loaded here via AJAX -->
-    </div>
-  </div>
-</div>
-
-<script>
-  document.addEventListener("DOMContentLoaded", function () {
-    const modal = new bootstrap.Modal(document.getElementById('logDetailModal'));
-    const content = document.getElementById('log-detail-content');
-
-    document.querySelectorAll('.log-item').forEach(item => {
-      item.addEventListener('click', function () {
-        const logId = this.getAttribute('data-log-id');
-        fetch(`/logs/${logId}/detail`)
-          .then(response => response.text())
-          .then(html => {
-            content.innerHTML = html;
-            modal.show();
-          })
-          .catch(err => {
-            content.innerHTML = '<div class="modal-body text-danger">Error loading log details.</div>';
-            modal.show();
-          });
-      });
-    });
-  });
-</script>
-{% endblock content %}
--- a/scipaperloader/templates/logs.html.jinja
+++ b/scipaperloader/templates/logs.html.jinja
@ -0,0 +1,263 @@
+{% extends "base.html.jinja" %}
+
+{% block title %}Activity Logs{% endblock title %}
+
+{% block styles %}
+{{ super() }}
+<style>
+    .logs-container {
+        background: white;
+        border-radius: 8px;
+        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+    }
+
+    .filter-panel {
+        background: #f8f9fa;
+        border-bottom: 1px solid #dee2e6;
+        padding: 1rem;
+    }
+
+    .log-entry {
+        cursor: pointer;
+        transition: background-color 0.2s ease;
+    }
+
+    .log-entry:hover {
+        background-color: #f8f9fa;
+    }
+
+    .category-badge {
+        font-size: 0.75rem;
+        padding: 0.25rem 0.5rem;
+    }
+
+    .activity-controls {
+        width: auto;
+        display: inline-block;
+    }
+
+    .logs-table th {
+        background-color: #f8f9fa;
+        font-weight: 600;
+    }
+
+    .log-entry {
+        cursor: pointer;
+        transition: background-color 0.2s ease;
+    }
+
+    .log-entry:hover {
+        background-color: #f8f9fa;
+    }
+
+    .pagination-info {
+        font-size: 0.875rem;
+        color: #6c757d;
+    }
+
+    .search-results-container {
+        max-height: 600px;
+        overflow-y: auto;
+    }
+
+    /* JSON formatting styles */
+    .json-formatted {
+        background-color: #f8f9fa;
+        border: 1px solid #e9ecef;
+        border-radius: 0.375rem;
+        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+        font-size: 0.875rem;
+        line-height: 1.4;
+    }
+
+    .json-formatted code {
+        color: #495057;
+        background: transparent;
+    }
+</style>
+{% endblock styles %}
+
+{% block content %}
+<div class="container-fluid mt-4">
+    <h1><i class="bi bi-list-ul"></i> Activity Logs</h1>
+
+    <!-- Include standardized flash messages -->
+    {% include "partials/flash_messages.html.jinja" %}
+
+    <div class="logs-container">
+        <!-- Filter Panel -->
+        <div class="filter-panel">
+            <form id="filterForm" class="row g-3">
+                <div class="col-md-3">
+                    <label class="form-label">Categories:</label>
+                    <div class="category-checkbox-container p-2"
+                        style="max-height: 200px; overflow-y: auto; background-color: white; border: 1px solid #ced4da; border-radius: 0.375rem;">
+                        <div class="form-check">
+                            <input class="form-check-input" type="checkbox" id="selectAllCategories" {% if not
+                                selected_categories or selected_categories|length==categories|length %}checked{% endif
+                                %}>
+                            <label class="form-check-label fw-bold" for="selectAllCategories">
+                                All Categories
+                            </label>
+                        </div>
+                        <hr class="my-2">
+                        {% for cat in categories %}
+                        <div class="form-check">
+                            <input class="form-check-input category-checkbox" type="checkbox" id="category_{{ cat }}"
+                                value="{{ cat }}" {% if not selected_categories or cat in selected_categories
+                                %}checked{% endif %}>
+                            <label class="form-check-label" for="category_{{ cat }}">
+                                {{ cat.replace('_', ' ').title() }}
+                            </label>
+                        </div>
+                        {% endfor %}
+                    </div>
+                </div>
+
+                <div class="col-md-3">
+                    <div class="row">
+                        <label for="statusFilter" class="form-label">Status:</label>
+                        <select id="statusFilter" class="form-select form-select-sm">
+                            <option value="">All Statuses</option>
+                            <option value="success">Success</option>
+                            <option value="error">Error</option>
+                            <option value="warning">Warning</option>
+                            <option value="info">Info</option>
+                            <option value="pending">Pending</option>
+                        </select>
+                    </div>
+
+                </div>
+
+                <div class="col-md-3">
+                    <label for="startDate" class="form-label">Start Date:</label>
+                    <input type="date" id="startDate" class="form-control form-control-sm"
+                        value="{{ start_date or '' }}">
+
+                    <label for="endDate" class="form-label mt-2">End Date:</label>
+                    <input type="date" id="endDate" class="form-control form-control-sm" value="{{ end_date or '' }}">
+                </div>
+
+                <div class="col-md-3">
+                    <label for="searchTerm" class="form-label">Search:</label>
+                    <input type="text" id="searchTerm" class="form-control form-control-sm"
+                        placeholder="Search in actions and descriptions" value="{{ search_term or '' }}">
+                </div>
+
+                <div class="col-12 d-flex justify-content-end mt-3">
+                    <button type="button" id="clearFilters" class="btn btn-outline-secondary btn-sm">
+                        <i class="bi bi-x"></i> Clear Filters
+                    </button>
+                </div>
+            </form>
+        </div>
+
+        <!-- Controls Panel -->
+        <div class="d-flex justify-content-between align-items-center p-3 border-bottom">
+            <div class="d-flex align-items-center gap-3">
+                <div class="form-group mb-0">
+                    <label for="pageSize" class="form-label mb-0 me-2">Show:</label>
+                    <select id="pageSize" class="form-select form-select-sm activity-controls">
+                        <option value="20">20</option>
+                        <option value="50" selected>50</option>
+                        <option value="100">100</option>
+                    </select>
+                </div>
+                <span id="paginationInfo" class="pagination-info">Loading...</span>
+            </div>
+
+            <div class="d-flex gap-2">
+                <button type="button" id="refreshLogs" class="btn btn-outline-primary btn-sm">
+                    <i class="bi bi-arrow-clockwise"></i> Refresh
+                </button>
+                <button type="button" id="downloadLogs" class="btn btn-outline-success btn-sm">
+                    <i class="bi bi-download"></i> Download CSV
+                </button>
+            </div>
+        </div>
+
+        <!-- Logs Table -->
+        <div class="search-results-container">
+            <table class="table table-hover logs-table mb-0">
+                <thead class="sticky-top">
+                    <tr>
+                        <th style="width: 150px;">Timestamp</th>
+                        <th style="width: 120px;">Category</th>
+                        <th style="width: 180px;">Action</th>
+                        <th style="width: 100px;">Status</th>
+                        <th>Description</th>
+                    </tr>
+                </thead>
+                <tbody id="logsTableBody">
+                    <tr>
+                        <td colspan="5" class="text-center py-4">
+                            <div class="spinner-border spinner-border-sm text-primary" role="status">
+                                <span class="visually-hidden">Loading...</span>
+                            </div>
+                            Loading logs...
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        </div>
+
+        <!-- Pagination Controls -->
+        <nav id="logsPagination" aria-label="Logs pagination" class="p-3 border-top d-none">
+            <div class="d-flex justify-content-between align-items-center">
+                <div class="pagination-info">
+                    <span id="paginationDetails">Showing 0 - 0 of 0 entries</span>
+                </div>
+                <ul class="pagination pagination-sm mb-0">
+                    <li class="page-item" id="prevPage">
+                        <a class="page-link" href="#" aria-label="Previous">
+                            <span aria-hidden="true">«</span>
+                        </a>
+                    </li>
+                    <li class="page-item active" id="currentPageItem">
+                        <span class="page-link" id="currentPageSpan">1</span>
+                    </li>
+                    <li class="page-item" id="nextPage">
+                        <a class="page-link" href="#" aria-label="Next">
+                            <span aria-hidden="true">»</span>
+                        </a>
+                    </li>
+                </ul>
+            </div>
+        </nav>
+    </div>
+</div>
+
+<!-- Modal for log details -->
+<div class="modal fade" id="logDetailModal" tabindex="-1" aria-hidden="true" data-bs-backdrop="true"
+    data-bs-keyboard="true">
+    <div class="modal-dialog modal-lg modal-dialog-scrollable">
+        <div class="modal-content" id="log-detail-content">
+            <!-- Log details will be loaded here via AJAX -->
+        </div>
+    </div>
+</div>
+{% endblock content %}
+
+{% block scripts %}
+{{ super() }}
+<script src="{{ url_for('static', filename='js/modal-handler.js') }}"></script>
+<script src="{{ url_for('static', filename='js/logger-manager.js') }}"></script>
+
+<script>
+    document.addEventListener('DOMContentLoaded', function () {
+        // Initialize the logger manager
+        window.loggerManager = new LoggerManager({
+            initialFilters: {
+                category: {{ selected_categories | tojson }},
+        start_date: "{{ start_date or '' }}",
+        end_date: "{{ end_date or '' }}",
+        search_term: "{{ search_term or '' }}"
+            }
+        });
+
+    // Set up modal handler for log details
+    const logModal = new ModalHandler('logDetailModal', 'log-detail-content');
+    window.loggerManager.setModalHandler(logModal);
+    });
+</script>
+{% endblock scripts %}
--- a/scipaperloader/templates/nav.html.jinja
+++ b/scipaperloader/templates/nav.html.jinja
@ -8,7 +8,7 @@
    <div class="collapse navbar-collapse" id="navbarSupportedContent">
      <ul class="navbar-nav me-auto mb-2 mb-lg-0">
        <li class="nav-item">
-          <a class="nav-link" href="{{ url_for('scraper.index') }}">Scraper</a>
+          <a class="nav-link" href="{{ url_for('scraper.index') }}">Control Panel</a>
        </li>
        <li class="nav-item">
          <a class="nav-link" href="{{ url_for('upload.upload') }}">Import CSV</a>
--- a/scipaperloader/templates/papers.html.jinja
+++ b/scipaperloader/templates/papers.html.jinja
@ -1,7 +1,12 @@
 {% extends "base.html.jinja" %}
+
 {% block title %}Papers{% endblock title %}
+
 {% block content %}

+<!-- Include flash messages template -->
+{% include "partials/flash_messages.html.jinja" %}
+
 {# --- Sort direction logic for each column --- #}
 {% set title_sort = 'asc' if sort_by != 'title' or sort_dir == 'desc' else 'desc' %}
 {% set journal_sort = 'asc' if sort_by != 'journal' or sort_dir == 'desc' else 'desc' %}
@ -275,28 +280,14 @@
    </ul>
 </nav>

-<script>
-    document.addEventListener("DOMContentLoaded", function () {
-        const modal = new bootstrap.Modal(document.getElementById('paperDetailModal'));
-        const content = document.getElementById('paper-detail-content');
-
-        document.querySelectorAll('.paper-link').forEach(link => {
-            link.addEventListener('click', function (e) {
-                e.preventDefault();
-                const url = this.getAttribute('data-url');
-
-                fetch(url)
-                    .then(response => response.text())
-                    .then(html => {
-                        content.innerHTML = html;
-                        modal.show();
-                    })
-                    .catch(err => {
-                        content.innerHTML = '<div class="modal-body text-danger">Error loading details.</div>';
-                        modal.show();
-                    });
-            });
-        });
-    });
-</script>
 {% endblock content %}
+
+{% block scripts %}
+{{ super() }}
+<script src="{{ url_for('static', filename='js/modal-handler.js') }}"></script>
+<script>
+    // Use the reusable ModalHandler for paper details
+    const paperModal = new ModalHandler('paperDetailModal', 'paper-detail-content');
+    paperModal.setupClickHandlers('.paper-link');
+</script>
+{% endblock scripts %}
--- a/scipaperloader/templates/partials/flash_messages.html.jinja
+++ b/scipaperloader/templates/partials/flash_messages.html.jinja
@ -1,93 +1,145 @@
-<!-- Server-side flash messages from Flask -->
-{% with messages = get_flashed_messages(with_categories=true) %}
-{% if messages %}
-<div class="server-flash-messages">
-    {% for category, message in messages %}
-    <div class="alert alert-{{ category }} alert-dismissible fade show" role="alert">
-        {{ message }}
-        <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
-    </div>
-    {% endfor %}
-</div>
-{% endif %}
-{% endwith %}
-
-<!-- JavaScript flash message container for client-side messages -->
-<div id="clientFlashContainer"></div>
+<!-- SVG Icons for Flash Messages -->
+<svg xmlns="http://www.w3.org/2000/svg" class="d-none">
+    <symbol id="check-circle-fill" viewBox="0 0 16 16">
+        <path
+            d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z" />
+    </symbol>
+    <symbol id="info-fill" viewBox="0 0 16 16">
+        <path
+            d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z" />
+    </symbol>
+    <symbol id="exclamation-triangle-fill" viewBox="0 0 16 16">
+        <path
+            d="M8.982 1.566a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566zM8 5c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995A.905.905 0 0 1 8 5zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z" />
+    </symbol>
+    <symbol id="x-circle-fill" viewBox="0 0 16 16">
+        <path
+            d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z" />
+    </symbol>
+</svg>

+<!-- CSS styles for flash overlay messages -->
 <style>
-    .client-flash-message {
+    .flash-overlay {
        position: fixed;
-        top: 30%;
-        left: 50%;
-        transform: translate(-50%, -50%);
-        z-index: 1000;
-        width: 300px;
-        text-align: center;
-        font-weight: bold;
-        padding: 12px;
-        margin-bottom: 20px;
-        border-radius: 6px;
+        top: 20px;
+        right: 20px;
+        z-index: 9999;
+        max-width: 420px;
        opacity: 1;
-        transition: opacity 5s ease-in-out;
+        transition: all 0.3s ease-in-out;
+        transform: translateX(0);
+        margin-bottom: 10px;
    }

-    .client-flash-message.success {
-        background-color: #d4edda;
-        border-color: #c3e6cb;
-        color: #155724;
+    .flash-content {
+        padding: 16px 20px;
+        border-radius: 8px;
+        box-shadow: 0 6px 20px rgba(0, 0, 0, 0.15);
+        display: flex;
+        align-items: flex-start;
+        font-weight: 500;
+        border-left: 4px solid;
+        position: relative;
    }

-    .client-flash-message.error {
+    .flash-icon {
+        width: 20px;
+        height: 20px;
+        margin-right: 12px;
+        margin-top: 1px;
+        flex-shrink: 0;
+    }
+
+    .flash-message {
+        flex: 1;
+        line-height: 1.4;
+    }
+
+    .flash-close {
+        background: none;
+        border: none;
+        font-size: 20px;
+        cursor: pointer;
+        padding: 0;
+        margin-left: 12px;
+        opacity: 0.6;
+        line-height: 1;
+        font-weight: bold;
+        flex-shrink: 0;
+        margin-top: -2px;
+    }
+
+    .flash-close:hover {
+        opacity: 1;
+    }
+
+    .flash-success .flash-content {
+        background-color: #d1e7dd;
+        border-left-color: #198754;
+        color: #0f5132;
+    }
+
+    .flash-danger .flash-content {
        background-color: #f8d7da;
-        border-color: #f5c6cb;
+        border-left-color: #dc3545;
        color: #721c24;
    }

-    .client-flash-message.info {
-        background-color: #d1ecf1;
-        border-color: #bee5eb;
-        color: #0c5460;
-    }
-
-    .client-flash-message.warning {
+    .flash-warning .flash-content {
        background-color: #fff3cd;
-        border-color: #ffeeba;
-        color: #856404;
+        border-left-color: #ffc107;
+        color: #664d03;
    }

-    .client-flash-message.fade {
+    .flash-info .flash-content {
+        background-color: #cff4fc;
+        border-left-color: #0dcaf0;
+        color: #055160;
+    }
+
+    .flash-overlay.fade-out {
        opacity: 0;
+        transform: translateX(100%);
+    }
+
+    /* Stack multiple flash messages with smooth transitions */
+    .flash-overlay {
+        /* Dynamic positioning will be set by JavaScript */
+    }
+
+    /* Ensure proper z-index stacking */
+    .flash-overlay:nth-child(1) {
+        z-index: 9999;
+    }
+
+    .flash-overlay:nth-child(2) {
+        z-index: 9998;
+    }
+
+    .flash-overlay:nth-child(3) {
+        z-index: 9997;
+    }
+
+    .flash-overlay:nth-child(4) {
+        z-index: 9996;
+    }
+
+    .flash-overlay:nth-child(5) {
+        z-index: 9995;
    }
 </style>

+<!-- Server-side flash messages from Flask -->
+{% with messages = get_flashed_messages(with_categories=true) %}
+{% if messages %}
 <script>
-    // Global flash message function that can be used from anywhere
-    function showFlashMessage(message, type = 'success', duration = 5000) {
-        const flashMsg = document.createElement('div');
-        flashMsg.className = `client-flash-message ${type}`;
-        flashMsg.textContent = message;
-
-        const container = document.getElementById('clientFlashContainer');
-        container.appendChild(flashMsg);
-
-        // Apply fade effect after some time
-        setTimeout(() => flashMsg.classList.add('fade'), duration - 3000);
-
-        // Remove element after duration
-        setTimeout(() => flashMsg.remove(), duration);
-
-        return flashMsg;
-    }
-
-    // Initialize toast messages if Bootstrap is used
+    // Convert server-side flash messages to overlay messages
    document.addEventListener('DOMContentLoaded', function () {
-        // Initialize any Bootstrap toasts if they exist
-        if (typeof bootstrap !== 'undefined' && bootstrap.Toast) {
-            const toastElList = [].slice.call(document.querySelectorAll('.toast'));
-            toastElList.map(function (toastEl) {
-                return new bootstrap.Toast(toastEl);
-            });
-        }
-    });
+        {% for category, message in messages %}
+        showFlashMessage({{ message| tojson }}, {{ (category if category != 'error' else 'danger')| tojson }});
+    {% endfor %}
+  });
 </script>
+{% endif %}
+{% endwith %}
--- a/scipaperloader/templates/partials/log_detail_modal.html.jinja
+++ b/scipaperloader/templates/partials/log_detail_modal.html.jinja
@ -1,18 +1,82 @@
 <div class="modal-header">
-  <h5 class="modal-title">Log Details</h5>
-  <button type="button" class="btn-close" data-bs-dismiss="modal"></button>
+  <h5 class="modal-title"><i class="fas fa-info-circle"></i> Log Details</h5>
+  <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
 </div>
 <div class="modal-body">
-  <p><strong>Timestamp:</strong> {{ log.timestamp }}</p>
-  <p><strong>Category:</strong> {{ log.category }}</p>
-  <p><strong>Action:</strong> {{ log.action }}</p>
-  <p><strong>Description:</strong> {{ log.description }}</p>
+  <div class="row">
+    <div class="col-md-6">
+      <p><strong>Timestamp:</strong> <span class="text-muted">{{ log.timestamp }}</span></p>
+      <p><strong>Category:</strong>
+        <span class="badge bg-secondary">{{ log.category.replace('_', ' ').title() }}</span>
+      </p>
+      <p><strong>Action:</strong> <code>{{ log.action }}</code></p>
+      {% if log.status %}
+      <p><strong>Status:</strong>
+        {% if log.status == 'success' %}
+        <span class="badge bg-success">{{ log.status.title() }}</span>
+        {% elif log.status == 'error' %}
+        <span class="badge bg-danger">{{ log.status.title() }}</span>
+        {% elif log.status == 'warning' %}
+        <span class="badge bg-warning">{{ log.status.title() }}</span>
+        {% else %}
+        <span class="badge bg-info">{{ log.status.title() }}</span>
+        {% endif %}
+      </p>
+      {% endif %}
+    </div>
+    <div class="col-md-6">
+      {% if log.paper_id %}
+      <p><strong>Paper ID:</strong> <a href="/papers/{{ log.paper_id }}" target="_blank">{{ log.paper_id }}</a></p>
+      {% endif %}
+      {% if log.user_id %}
+      <p><strong>User ID:</strong> {{ log.user_id }}</p>
+      {% endif %}
+      {% if log.config_key %}
+      <p><strong>Config Key:</strong> <code>{{ log.config_key }}</code></p>
+      {% endif %}
+      {% if log.source_ip %}
+      <p><strong>Source IP:</strong> {{ log.source_ip }}</p>
+      {% endif %}
+    </div>
+  </div>
+
+  {% if log.description %}
+  <div class="mt-3">
+    <p><strong>Description:</strong></p>
+    <div class="alert alert-light">{{ log.description }}</div>
+  </div>
+  {% endif %}
+
+  {% if log.old_value or log.new_value %}
+  <div class="mt-3">
+    <p><strong>Configuration Changes:</strong></p>
+    <div class="row">
+      {% if log.old_value %}
+      <div class="col-md-6">
+        <label class="form-label"><strong>Old Value:</strong></label>
+        <pre class="bg-light p-2"><code>{{ log.old_value }}</code></pre>
+      </div>
+      {% endif %}
+      {% if log.new_value %}
+      <div class="col-md-6">
+        <label class="form-label"><strong>New Value:</strong></label>
+        <pre class="bg-light p-2"><code>{{ log.new_value }}</code></pre>
+      </div>
+      {% endif %}
+    </div>
+  </div>
+  {% endif %}
+
  {% if log.extra_data %}
-  <p><strong>Extra Data:</strong>
-  <pre><code>{{ log.extra_data }}</code></pre>
-  </p>
+  <div class="mt-3">
+    <p><strong>Additional Data:</strong></p>
+    <pre class="bg-light p-3"
+      style="max-height: 300px; overflow-y: auto;"><code id="extra-data-content">{{ log.extra_data }}</code></pre>
+  </div>
  {% endif %}
 </div>
 <div class="modal-footer">
-  <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
+  <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">
+    <i class="fas fa-times"></i> Close
+  </button>
 </div>
--- a/scipaperloader/templates/partials/scraper_overview_modal.html.jinja
+++ b/scipaperloader/templates/partials/scraper_overview_modal.html.jinja
@ -0,0 +1,249 @@
+<!-- Scraper Overview Modal -->
+<div class="modal fade" id="scraperOverviewModal" tabindex="-1" role="dialog"
+    aria-labelledby="scraperOverviewModalLabel" aria-hidden="true">
+    <div class="modal-dialog modal-xl" role="document">
+        <div class="modal-content">
+            <div class="modal-header">
+                <h5 class="modal-title" id="scraperOverviewModalLabel">
+                    <i class="fas fa-cogs"></i> Scraper Modules Overview
+                </h5>
+                <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+            </div>
+            <div class="modal-body">
+                <!-- Loading state -->
+                <div id="scraperOverviewLoading" class="text-center py-4">
+                    <div class="spinner-border text-primary" role="status">
+                        <span class="visually-hidden">Loading...</span>
+                    </div>
+                    <p class="mt-2 text-muted">Loading scraper information...</p>
+                </div>
+
+                <!-- Error state -->
+                <div id="scraperOverviewError" class="alert alert-danger d-none" role="alert">
+                    <h6 class="alert-heading">Error Loading Scrapers</h6>
+                    <p id="scraperOverviewErrorMessage"></p>
+                    <button class="btn btn-outline-danger btn-sm" onclick="loadScraperOverview()">
+                        <i class="fas fa-redo"></i> Retry
+                    </button>
+                </div>
+
+                <!-- Content -->
+                <div id="scraperOverviewContent" class="d-none">
+                    <!-- Scraper Architecture Overview -->
+                    <div class="card mb-4">
+                        <div class="card-header">
+                            <h6 class="mb-0">
+                                <i class="fas fa-info-circle"></i> How Scraper Modules Work
+                            </h6>
+                        </div>
+                        <div class="card-body">
+                            <p class="mb-3">
+                                SciPaperLoader uses a modular scraper architecture where each scraper module handles
+                                specific paper processing stages. Papers flow through different statuses as they are
+                                processed by various scrapers.
+                            </p>
+
+                            <div class="row">
+                                <div class="col-md-6">
+                                    <h6>Key Concepts:</h6>
+                                    <ul class="small">
+                                        <li><strong>Input Statuses:</strong> Paper statuses this scraper can process
+                                        </li>
+                                        <li><strong>Output Statuses:</strong> Statuses papers get after processing</li>
+                                        <li><strong>Processing Status:</strong> Temporary status while scraper works
+                                        </li>
+                                        <li><strong>Pipeline:</strong> Scrapers can be chained together</li>
+                                    </ul>
+                                </div>
+                                <div class="col-md-6">
+                                    <h6>Status Flow Example:</h6>
+                                    <div class="d-flex align-items-center small">
+                                        <span class="badge bg-info">New</span>
+                                        <i class="fas fa-arrow-right mx-2"></i>
+                                        <span class="badge bg-warning">Processing</span>
+                                        <i class="fas fa-arrow-right mx-2"></i>
+                                        <span class="badge bg-success">Done</span>
+                                    </div>
+                                    <div class="text-muted mt-1">Papers transition through these statuses</div>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <!-- Current System Configuration -->
+                    <div class="card mb-4">
+                        <div class="card-header">
+                            <h6 class="mb-0">
+                                <i class="fas fa-server"></i> System Configuration
+                            </h6>
+                        </div>
+                        <div class="card-body">
+                            <div class="row">
+                                <div class="col-md-4">
+                                    <p><strong>Active Scraper Module:</strong> <span id="currentScraperModule"
+                                            class="badge bg-primary">Loading...</span></p>
+                                    <p><strong>Daily Volume Limit:</strong> <span
+                                            id="currentVolumeLimit">Loading...</span> papers</p>
+                                </div>
+                                <div class="col-md-4">
+                                    <p><strong>Total Available Modules:</strong> <span
+                                            id="totalScraperModules">Loading...</span></p>
+                                    <p><strong>Processing Pipeline:</strong> <span
+                                            id="processingPipeline">Multi-stage</span></p>
+                                </div>
+                                <div class="col-md-4">
+                                    <p><strong>Current Paper Counts:</strong></p>
+                                    <div id="paperCountsSummary" class="small">
+                                        <!-- Will be populated by JavaScript -->
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <!-- Available Scrapers Table -->
+                    <div class="card">
+                        <div class="card-header">
+                            <h6 class="mb-0">
+                                <i class="fas fa-list"></i> Available Scraper Modules
+                            </h6>
+                        </div>
+                        <div class="card-body">
+                            <div class="table-responsive">
+                                <table class="table table-hover">
+                                    <thead>
+                                        <tr>
+                                            <th>Module Name</th>
+                                            <th>Description</th>
+                                            <th>Input Statuses</th>
+                                            <th>Success Output</th>
+                                            <th>Failure Output</th>
+                                            <th>Processing Status</th>
+                                        </tr>
+                                    </thead>
+                                    <tbody id="scrapersTableBody">
+                                        <!-- Table content will be populated by JavaScript -->
+                                    </tbody>
+                                </table>
+                            </div>
+                        </div>
+                    </div>
+
+                    <!-- Publisher Parser Overview -->
+                    <div class="card mt-4">
+                        <div class="card-header">
+                            <h6 class="mb-0">
+                                <i class="fas fa-building"></i> Publisher Parser Overview
+                            </h6>
+                        </div>
+                        <div class="card-body">
+                            <div class="row mb-3">
+                                <div class="col-md-12">
+                                    <p class="text-muted mb-2">
+                                        <i class="fas fa-info-circle"></i>
+                                        Publishers are detected from paper URLs and mapped to specific parser modules
+                                        for content extraction.
+                                    </p>
+                                </div>
+                            </div>
+
+                            <!-- Publisher Statistics -->
+                            <div class="row mb-4" id="publisherStats">
+                                <!-- Will be populated by JavaScript -->
+                            </div>
+
+                            <!-- Publishers Table -->
+                            <div class="table-responsive">
+                                <table class="table table-hover table-sm">
+                                    <thead>
+                                        <tr>
+                                            <th>Publisher</th>
+                                            <th>Papers</th>
+                                            <th>Parser Status</th>
+                                            <th>Parser Available</th>
+                                        </tr>
+                                    </thead>
+                                    <tbody id="publishersTableBody">
+                                        <!-- Table content will be populated by JavaScript -->
+                                    </tbody>
+                                </table>
+                            </div>
+                        </div>
+                    </div>
+
+                    <!-- Status Flow Diagram -->
+                    <div class="card mt-4">
+                        <div class="card-header">
+                            <h6 class="mb-0">
+                                <i class="fas fa-project-diagram"></i> Paper Status Flow Diagram
+                            </h6>
+                        </div>
+                        <div class="card-body">
+                            <div id="statusFlowDiagram" class="text-center py-4">
+                                <!-- This will be populated by JavaScript -->
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <div class="modal-footer">
+                <div class="d-flex justify-content-between w-100">
+                    <small class="text-muted">
+                        <i class="fas fa-lightbulb"></i>
+                        Tip: Scrapers can be chained to create complex processing pipelines
+                    </small>
+                    <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+
+<style>
+    /* Custom styles for the scraper overview modal */
+    #scraperOverviewModal .modal-xl {
+        max-width: 1200px;
+    }
+
+    #scraperOverviewModal .table th {
+        font-size: 0.9rem;
+        background-color: #f8f9fa;
+    }
+
+    #scraperOverviewModal .badge {
+        font-size: 0.75rem;
+    }
+
+    #scraperOverviewModal .status-badge {
+        margin: 2px;
+        display: inline-block;
+    }
+
+    .status-flow-node {
+        display: inline-block;
+        padding: 8px 16px;
+        margin: 4px;
+        border-radius: 20px;
+        font-size: 0.9rem;
+        font-weight: 500;
+    }
+
+    .status-flow-arrow {
+        color: #6c757d;
+        margin: 0 8px;
+    }
+
+    .scraper-description {
+        max-width: 300px;
+        word-break: break-word;
+    }
+
+    .input-status-list {
+        max-width: 150px;
+    }
+
+    .status-output {
+        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+        font-size: 0.8rem;
+    }
+</style>
--- a/scipaperloader/templates/scraper.html.jinja
+++ b/scipaperloader/templates/scraper.html.jinja
--- a/scipaperloader/templates/upload.html.jinja
+++ b/scipaperloader/templates/upload.html.jinja
@ -1,34 +1,14 @@
-{% extends "base.html.jinja" %} {% block content %}
+{% extends "base.html.jinja" %}
+
+{% block title %}Import CSV{% endblock title %}
+
+{% block content %}
 <h1>Welcome to SciPaperLoader</h1>

-<div id="results-container"></div>
+<!-- Include flash messages template -->
+{% include "partials/flash_messages.html.jinja" %}

-{% if success %}
-<div class="alert alert-success mt-3">{{ success }}</div>
-{% endif %} {% if error_message %}
-<div class="alert alert-warning mt-3">
-  <h4>{{ error_message }}</h4>
-  <table class="table table-sm table-bordered">
-    <thead>
-      <tr>
-        <th>Row</th>
-        <th>DOI</th>
-        <th>Error</th>
-      </tr>
-    </thead>
-    <tbody>
-      {% for error in error_samples %}
-      <tr>
-        <td>{{ error.row }}</td>
-        <td>{{ error.doi }}</td>
-        <td>{{ error.error }}</td>
-      </tr>
-      {% endfor %}
-    </tbody>
-  </table>
-  <a href="{{ url_for('upload.download_error_log') }}" class="btn btn-outline-secondary">Download Full Error Log</a>
-</div>
-{% endif %}
+<div id="results-container"></div>

 <div class="alert alert-info">
  <p>
@ -88,93 +68,42 @@
  </div>
 </div>

+{% endblock content %}
+
+{% block scripts %}
+{{ super() }}
+<!-- Configuration data in JSON format for clean separation -->
+<script type="application/json" id="upload-config">
+{
+    "statusUrlTemplate": {{ (url_for('upload.task_status', task_id='') ~ '{taskId}')|tojson }}
+}
+</script>
+
+<script src="{{ url_for('static', filename='js/form-handler.js') }}"></script>
 <script>
-  const form = document.getElementById("upload-form");
-  form.addEventListener("submit", function (e) {
-    e.preventDefault();
+  document.addEventListener('DOMContentLoaded', function () {
+    // Read configuration from JSON
+    const config = JSON.parse(document.getElementById('upload-config').textContent);

-    // Display loading state immediately
-    const progressModal = new bootstrap.Modal(document.getElementById("progressModal"));
-    progressModal.show();
-    const progressBar = document.getElementById("progressBar");
-    progressBar.style.width = "5%";
-    progressBar.textContent = "Starting...";
-
-    const formData = new FormData(form);
-
-    // Disable the form while processing
-    const submitButton = form.querySelector("button[type='submit']");
-    submitButton.disabled = true;
-
-    fetch(form.action, {
-      method: "POST",
-      body: formData,
-    })
-      .then((response) => response.json())
-      .then((data) => {
-        if (data.error) {
-          // Handle error
-          progressModal.hide();
-          alert(`Error: ${data.error}`);
-          submitButton.disabled = false;
-          return;
-        }
-
-        const taskId = data.task_id;
-        const interval = setInterval(() => {
-          fetch("{{ url_for('upload.task_status', task_id='') }}" + taskId)
-            .then((response) => response.json())
-            .then((status) => {
-              console.log("Task status:", status);
-              if (status.state === "SUCCESS") {
-                clearInterval(interval);
-                progressBar.style.width = "100%";
-                progressBar.textContent = "Completed!";
-
-                setTimeout(() => {
-                  progressModal.hide();
-                  showResults(status.result);
-                  submitButton.disabled = false;
-                }, 1000);
-              } else if (status.state === "FAILURE") {
-                clearInterval(interval);
-                progressBar.style.width = "100%";
-                progressBar.classList.add("bg-danger");
-                progressBar.textContent = "Failed!";
-
-                setTimeout(() => {
-                  progressModal.hide();
-                  alert(`Task failed: ${status.error || "Unknown error"}`);
-                  submitButton.disabled = false;
-                }, 1000);
-              } else {
-                // Update progress bar with more information
-                const progress = status.progress || 0;
-                progressBar.style.width = `${progress}%`;
-                progressBar.textContent = `${progress}% complete`;
-                document.getElementById("progressStatus").innerText = `Processing... (${status.state})`;
-              }
-            })
-            .catch((err) => {
-              console.error("Failed to check task status:", err);
-            });
-        }, 1000);
-      })
-      .catch((err) => {
-        console.error("Upload failed:", err);
-        progressModal.hide();
-        alert("Upload failed. Please try again.");
-        submitButton.disabled = false;
-      });
+    // Initialize form handler with custom callbacks
+    const uploadFormHandler = new FormHandler('upload-form', {
+      statusUrlTemplate: config.statusUrlTemplate,
+      onSuccess: showResults,
+      onError: (error) => showFlashMessage(`Upload failed: ${error}`, 'error')
+    });
  });

  const showResults = (result) => {
+    // Show main success message as overlay
    const message = `Upload completed! Added: ${result.added}, Updated: ${result.updated}, Skipped: ${result.skipped}, Errors: ${result.error_count}`;
+    showFlashMessage(message, 'success');

-    let resultHTML = `<div class="alert alert-success">${message}</div>`;
+    // Build detailed results HTML for the results container
+    let resultHTML = '';

    // Add skipped records information
    if (result.skipped > 0) {
+      showFlashMessage(`${result.skipped} records were skipped`, 'info');
      resultHTML += `
        <div class="alert alert-info">
          <h4>${result.skipped} records were skipped</h4>
@ -205,6 +134,7 @@

    // Existing error display code
    if (result.error_count > 0) {
+      showFlashMessage(`${result.error_count} errors occurred during upload`, 'warning');
      resultHTML += `
        <div class="alert alert-warning">
          <h4>Some errors occurred (${result.error_count} total)</h4>
@ -238,7 +168,8 @@
        </div>`;
    }

+    // Display detailed results in container
    document.getElementById("results-container").innerHTML = resultHTML;
  };
 </script>
-{% endblock content %}
+{% endblock scripts %}
--- a/tests/test_csv_upload.py
+++ b/tests/test_csv_upload.py
@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Test script to verify CSV upload functionality works with APScheduler.
+"""
+import requests
+import time
+import io
+import csv
+from scipaperloader import create_app
+
+def create_test_csv():
+    """Create a simple test CSV file."""
+    csv_content = """title,doi,issn,journal,alternative_id,published_online
+Test Paper 1,10.1000/test_upload_001,1234-5678,Test Journal,ALT001,2024-01-01
+Test Paper 2,10.1000/test_upload_002,1234-5678,Test Journal,ALT002,2024-01-02
+Test Paper 3,10.1000/test_upload_003,1234-5678,Test Journal,ALT003,2024-01-03
+"""
+    return csv_content
+
+def test_csv_upload():
+    """Test the CSV upload functionality."""
+    print("🧪 Testing CSV Upload Functionality")
+    print("=" * 50)
+    
+    # Create Flask app
+    app = create_app()
+    
+    with app.test_client() as client:
+        # Create test CSV
+        csv_content = create_test_csv()
+        
+        # Prepare file data
+        csv_file = io.BytesIO(csv_content.encode('utf-8'))
+        csv_file.name = 'test_upload.csv'
+        
+        print("📤 Uploading CSV file...")
+        
+        # Make upload request
+        response = client.post('/upload/', data={
+            'file': (csv_file, 'test_upload.csv'),
+            'delimiter': ',',
+            'duplicate_strategy': 'skip'
+        }, content_type='multipart/form-data')
+        
+        print(f"Response Status: {response.status_code}")
+        print(f"Response Data: {response.get_json()}")
+        
+        if response.status_code == 200:
+            response_data = response.get_json()
+            if 'task_id' in response_data:
+                task_id = response_data['task_id']
+                print(f"✅ Task scheduled successfully: {task_id}")
+                
+                # Monitor task progress
+                print("\n📊 Monitoring task progress...")
+                for i in range(30):  # Wait up to 30 seconds
+                    progress_response = client.get(f'/upload/task_status/{task_id}')
+                    if progress_response.status_code == 200:
+                        progress_data = progress_response.get_json()
+                        print(f"Progress: {progress_data}")
+                        
+                        if progress_data.get('state') == 'SUCCESS':
+                            print("✅ CSV upload completed successfully!")
+                            result = progress_data.get('result', {})
+                            print(f"   Added: {result.get('added', 0)}")
+                            print(f"   Skipped: {result.get('skipped', 0)}")
+                            print(f"   Errors: {result.get('error_count', 0)}")
+                            return True
+                        elif progress_data.get('state') == 'FAILURE':
+                            print(f"❌ CSV upload failed: {progress_data.get('error')}")
+                            return False
+                    else:
+                        print(f"❌ Failed to get task status: {progress_response.status_code}")
+                        return False
+                    
+                    time.sleep(1)
+                
+                print("⏰ Task did not complete within 30 seconds")
+                return False
+            else:
+                print(f"❌ No task_id in response: {response_data}")
+                return False
+        else:
+            print(f"❌ Upload request failed: {response.status_code}")
+            print(f"Response: {response.get_data(as_text=True)}")
+            return False
+
+def check_scheduler_status():
+    """Check APScheduler status."""
+    print("\n🔍 Checking APScheduler Status")
+    print("=" * 50)
+    
+    app = create_app()
+    with app.app_context():
+        from scipaperloader.scheduler import _scheduler
+        
+        if not _scheduler:
+            print("❌ APScheduler not initialized")
+            return False
+        
+        if not _scheduler.running:
+            print("❌ APScheduler not running")
+            return False
+        
+        jobs = _scheduler.get_jobs()
+        print(f"✅ APScheduler running with {len(jobs)} jobs")
+        
+        # Show current jobs
+        for job in jobs:
+            print(f"   - {job.id}: {job.name}")
+        
+        return True
+
+if __name__ == "__main__":
+    print("🚀 CSV Upload Test Suite")
+    print("=" * 50)
+    
+    # First check scheduler status
+    if not check_scheduler_status():
+        print("❌ APScheduler issues detected, cannot proceed with test")
+        exit(1)
+    
+    # Run the upload test
+    success = test_csv_upload()
+    
+    if success:
+        print("\n🎉 All tests passed! CSV upload is working correctly.")
+        exit(0)
+    else:
+        print("\n❌ Test failed! CSV upload needs debugging.")
+        exit(1)
--- a/tests/test_scheduler_functionality.py
+++ b/tests/test_scheduler_functionality.py
@ -0,0 +1,397 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test for APScheduler functionality in SciPaperLoader.
+Tests job scheduling, execution, revocation, and hourly scheduler functionality.
+"""
+
+import sys
+import os
+import time
+import threading
+from datetime import datetime, timedelta
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from scipaperloader import create_app
+from scipaperloader.models import PaperMetadata, ScraperState, ActivityLog, ScheduleConfig, VolumeConfig
+from scipaperloader.scrapers.manager import ScraperManager
+from scipaperloader.db import db
+
+
+def test_scheduler_functionality():
+    """Comprehensive test of APScheduler functionality."""
+    
+    print("🧪 Testing APScheduler Functionality")
+    print("=" * 50)
+    
+    # Create test app with in-memory database
+    app = create_app({
+        'TESTING': True,
+        'SQLALCHEMY_DATABASE_URI': 'sqlite:///:memory:',
+    })
+    
+    with app.app_context():
+        # Test 1: Basic scheduler availability
+        print("\n📋 Test 1: Scheduler Initialization")
+        scheduler = app.config.get('SCHEDULER')
+        if not scheduler:
+            print("❌ APScheduler not found in app config")
+            return False
+        
+        print("✅ APScheduler available and initialized")
+        print(f"📊 Initial job count: {scheduler.get_job_count()}")
+        
+        # Test 2: Database table creation
+        print("\n📋 Test 2: APScheduler Database Tables")
+        try:
+            # Check if we can query jobs (which requires tables to exist)
+            jobs = scheduler.get_paper_jobs()
+            print("✅ APScheduler database tables exist and accessible")
+            print(f"📋 Current paper jobs: {len(jobs)}")
+        except Exception as e:
+            print(f"❌ APScheduler database tables not accessible: {e}")
+            return False
+        
+        # Test 3: Job scheduling functionality
+        print("\n📋 Test 3: Job Scheduling")
+        
+        # Create test paper
+        test_paper = PaperMetadata(
+            title="Test Paper for Scheduler",
+            doi="10.1000/test_scheduler_001",
+            issn="1234-5678",
+            journal="Test Journal",
+            status="New"
+        )
+        db.session.add(test_paper)
+        db.session.commit()
+        
+        # Schedule a paper for processing in 30 seconds (longer delay)
+        try:
+            job_id = scheduler.schedule_paper_processing(
+                paper_id=test_paper.id,
+                delay_seconds=30  # Increased delay to 30 seconds
+                # Removed explicit job_id to allow default "paper_job_" prefix
+            )
+            print(f"✅ Paper scheduling works: Job ID {job_id}")
+        except Exception as e:
+            print(f"❌ Paper scheduling failed: {e}")
+            return False
+        
+        # Verify job was scheduled
+        jobs_after = scheduler.get_paper_jobs()
+        if len(jobs_after) == 0:
+            print("❌ No jobs found after scheduling")
+            return False
+        
+        print(f"✅ Job successfully scheduled: {len(jobs_after)} paper job(s) found")
+        
+        # Test 4: Job information retrieval
+        print("\n📋 Test 4: Job Information Retrieval")
+        
+        scheduled_job = jobs_after[0]
+        print(f"✅ Job details accessible:")
+        print(f"   📝 Job ID: {scheduled_job['id']}")
+        print(f"   📝 Job Name: {scheduled_job['name']}")
+        print(f"   📝 Next Run Time: {scheduled_job['next_run_time']}")
+        print(f"   📝 Args: {scheduled_job['args']}")
+        
+        # Test 5: Job revocation
+        print("\n📋 Test 5: Job Revocation")
+        
+        initial_count = len(jobs_after)
+        revoked_count = scheduler.revoke_all_scraper_jobs()
+        
+        if revoked_count != initial_count:
+            print(f"⚠️  Warning: Expected to revoke {initial_count} jobs, but revoked {revoked_count}")
+        else:
+            print(f"✅ Job revocation works: {revoked_count} job(s) revoked")
+        
+        # Verify jobs were revoked
+        jobs_after_revocation = scheduler.get_paper_jobs()
+        if len(jobs_after_revocation) > 0:
+            print(f"❌ Jobs still exist after revocation: {len(jobs_after_revocation)}")
+            return False
+        
+        print("✅ All paper jobs successfully revoked")
+        
+        # Test 6: Multiple job scheduling
+        print("\n📋 Test 6: Multiple Job Scheduling")
+        
+        # Create more test papers
+        test_papers = []
+        for i in range(3):
+            paper = PaperMetadata(
+                title=f"Test Paper {i+1}",
+                doi=f"10.1000/test_scheduler_{i+2:03d}",
+                issn="1234-5678",
+                journal="Test Journal",
+                status="New"
+            )
+            db.session.add(paper)
+            test_papers.append(paper)
+        
+        db.session.commit()
+        
+        # Schedule multiple papers
+        scheduled_jobs = []
+        for i, paper in enumerate(test_papers):
+            job_id = scheduler.schedule_paper_processing(
+                paper_id=paper.id,
+                delay_seconds=10 + i  # Stagger the scheduling
+                # Removed explicit job_id to allow default "paper_job_" prefix
+            )
+            scheduled_jobs.append(job_id)
+        
+        print(f"✅ Multiple job scheduling works: {len(scheduled_jobs)} jobs scheduled")
+        
+        # Verify all jobs are scheduled
+        all_jobs = scheduler.get_paper_jobs()
+        if len(all_jobs) != len(test_papers):
+            print(f"❌ Expected {len(test_papers)} jobs, found {len(all_jobs)}")
+            return False
+        
+        print(f"✅ All jobs properly scheduled: {len(all_jobs)} total jobs")
+        
+        # Test 7: ScraperManager integration
+        print("\n📋 Test 7: ScraperManager Integration")
+        
+        manager = ScraperManager()
+        
+        # Test paper selection
+        papers = manager.select_papers_for_processing(limit=2)
+        print(f"✅ ScraperManager paper selection: {len(papers)} papers selected")
+        
+        # Test scraper state management with APScheduler
+        start_result = manager.start_scraper()
+        if start_result["status"] != "success":
+            print(f"❌ Failed to start scraper: {start_result['message']}")
+            return False
+        
+        print("✅ Scraper started successfully")
+        
+        # Test job clearing through manager
+        cleared_count = manager._clear_delayed_tasks_from_apscheduler()
+        print(f"✅ ScraperManager job clearing: {cleared_count} jobs cleared")
+        
+        # Verify jobs were cleared
+        remaining_jobs = scheduler.get_paper_jobs()
+        if len(remaining_jobs) > 0:
+            print(f"❌ Jobs still exist after manager clearing: {len(remaining_jobs)}")
+            return False
+        
+        print("✅ ScraperManager successfully clears APScheduler jobs")
+        
+        # Test 8: Hourly scheduler configuration
+        print("\n📋 Test 8: Hourly Scheduler Configuration")
+        
+        # Ensure the hourly job is scheduled correctly
+        all_scheduler_jobs = scheduler._scheduler.get_jobs() if hasattr(scheduler, '_scheduler') and scheduler._scheduler else []
+        hourly_jobs = [job for job in all_scheduler_jobs if job.id == 'hourly_scraper_main']
+        
+        if not hourly_jobs:
+            print("❌ Hourly scheduler job not found")
+            return False
+        
+        hourly_job = hourly_jobs[0]
+        print("✅ Hourly scheduler job found:")
+        print(f"   📝 Job ID: {hourly_job.id}")
+        print(f"   📝 Job Name: {hourly_job.name}")
+        print(f"   📝 Trigger: {hourly_job.trigger}")
+        print(f"   📝 Next Run: {hourly_job.next_run_time}")
+        
+        # Test 9: Configuration-based scheduling
+        print("\n📋 Test 9: Configuration-based Scheduling")
+        
+        # Set up volume configuration
+        volume_config = VolumeConfig.query.first()
+        if not volume_config:
+            volume_config = VolumeConfig(volume=10)  # 10 papers per day
+            db.session.add(volume_config)
+            db.session.commit()
+        
+        # Test quota calculation
+        quota = manager.get_current_hour_quota()
+        print(f"✅ Hourly quota calculation: {quota} papers per hour")
+        
+        if quota < 0:
+            print("❌ Invalid quota calculation")
+            return False
+        
+        # Test 10: Activity logging integration
+        print("\n📋 Test 10: Activity Logging Integration")
+        
+        # Check recent APScheduler-related logs
+        recent_logs = ActivityLog.query.filter(
+            ActivityLog.action.like('%apscheduler%')
+        ).order_by(ActivityLog.timestamp.desc()).limit(5).all()
+        
+        print(f"✅ APScheduler activity logging: {len(recent_logs)} related log entries")
+        
+        if recent_logs:
+            for log in recent_logs[:3]:
+                print(f"   📝 {log.action}: {log.description}")
+        
+        # Test 11: Error handling
+        print("\n📋 Test 11: Error Handling")
+        
+        # Test scheduling with invalid paper ID
+        try:
+            scheduler.schedule_paper_processing(
+                paper_id=99999,  # Non-existent paper
+                delay_seconds=1,
+                job_id="test_error_job"
+            )
+            print("✅ Scheduling with invalid paper ID handled gracefully")
+        except Exception as e:
+            print(f"✅ Scheduling with invalid paper ID properly raises exception: {e}")
+        
+        # Test 12: Cleanup and shutdown
+        print("\n📋 Test 12: Cleanup and Shutdown")
+        
+        # Stop scraper
+        stop_result = manager.stop_scraper()
+        if stop_result["status"] != "success":
+            print(f"❌ Failed to stop scraper: {stop_result['message']}")
+            return False
+        
+        print("✅ Scraper stopped successfully")
+        
+        # Final job count should be minimal (only hourly scheduler)
+        final_job_count = scheduler.get_job_count()
+        final_paper_jobs = len(scheduler.get_paper_jobs())
+        
+        print(f"📊 Final state:")
+        print(f"   📝 Total jobs: {final_job_count}")
+        print(f"   📝 Paper jobs: {final_paper_jobs}")
+        
+        if final_paper_jobs > 0:
+            print("❌ Paper jobs still exist after cleanup")
+            return False
+        
+        print("✅ Cleanup completed successfully")
+        
+        print("\n🎉 ALL SCHEDULER TESTS PASSED!")
+        print("\n📋 Test Summary:")
+        print("   ✅ APScheduler initialization works")
+        print("   ✅ Database tables created and accessible")
+        print("   ✅ Job scheduling functionality works")
+        print("   ✅ Job information retrieval works")
+        print("   ✅ Job revocation works")
+        print("   ✅ Multiple job scheduling works")
+        print("   ✅ ScraperManager integration works")
+        print("   ✅ Hourly scheduler configured correctly")
+        print("   ✅ Configuration-based scheduling works")
+        print("   ✅ Activity logging integration works")
+        print("   ✅ Error handling works")
+        print("   ✅ Cleanup and shutdown works")
+        
+        return True
+
+
+def test_job_execution():
+    """Test that jobs actually execute (requires waiting)."""
+    print("\n🔄 Testing Job Execution (5-second test)")
+    print("-" * 40)
+
+    app = create_app({
+        'TESTING': True,
+        'SQLALCHEMY_DATABASE_URI': 'sqlite:///:memory:',
+    })
+
+    with app.app_context():
+        # Initialize database and scheduler
+        db.create_all()
+        scheduler = app.config.get('SCHEDULER')
+        if not scheduler:
+            print("❌ Scheduler not initialized")
+            return False
+
+        # Create test paper
+        test_paper = PaperMetadata(
+            title="Test Paper for Execution",
+            doi="10.1000/test_execution",
+            issn="1234-5678",
+            journal="Test Journal",
+            status="Pending"
+        )
+        db.session.add(test_paper)
+        db.session.commit()
+
+        # Verify paper is added to the database
+        test_paper_id = test_paper.id
+        if not test_paper_id:
+            print("❌ Test paper not added to the database")
+            return False
+
+        # Schedule paper for processing in 2 seconds
+        job_id = scheduler.schedule_paper_processing(
+            paper_id=test_paper_id,
+            delay_seconds=2
+        )
+
+        print(f"📅 Scheduled job {job_id} for execution in 2 seconds")
+
+        # Wait and check for execution
+        print("⏳ Waiting for job execution...")
+        time.sleep(3)
+
+        # Check if job completed (should be removed from scheduler)
+        remaining_jobs = scheduler.get_paper_jobs()
+
+        if remaining_jobs:
+            print(f"⚠️  Job still in scheduler: {len(remaining_jobs)} remaining")
+            for job in remaining_jobs:
+                print(f"   📝 Job ID: {job['id']}, Next Run Time: {job['next_run_time']}")
+        else:
+            print("✅ Job executed and removed from scheduler")
+
+        # Check activity logs for execution evidence
+        execution_logs = ActivityLog.query.filter(
+            ActivityLog.action.like('%process_single_paper%')
+        ).order_by(ActivityLog.timestamp.desc()).limit(3).all()
+
+        if execution_logs:
+            print("✅ Job execution logged in activity:")
+            for log in execution_logs:
+                print(f"   📝 {log.action}: {log.description}")
+        else:
+            print("⚠️  No execution logs found")
+
+        # Validate job execution status in the database
+        updated_paper = PaperMetadata.query.get(test_paper_id)
+        if updated_paper:
+            print(f"🔍 Retrieved paper: {updated_paper.title}, Status: {updated_paper.status}")
+            if updated_paper.status == "Done":
+                print("✅ Paper status updated to 'Done'")
+            else:
+                print(f"❌ Paper status not updated: {updated_paper.status}")
+        else:
+            print("❌ Paper not found in the database")
+
+        return True
+
+
+if __name__ == "__main__":
+    print(f"📅 Starting scheduler tests at {datetime.now()}")
+    
+    try:
+        # Run main functionality tests
+        success = test_scheduler_functionality()
+        
+        if success:
+            print("\n" + "="*50)
+            # Run execution test if main tests pass
+            test_job_execution()
+        
+        print(f"\n📅 Tests completed at {datetime.now()}")
+        sys.exit(0 if success else 1)
+        
+    except KeyboardInterrupt:
+        print("\n⏹️  Tests interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Test error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
--- a/tests/test_scipaperloader.py
+++ b/tests/test_scipaperloader.py
@ -18,4 +18,5 @@ def client(app):

 def test_index(client):
    response = client.get("/")
-    assert b"It works!" in response.data
+    # Updated assertion to check for actual content in the index page
+    assert b"Welcome to SciPaperLoader" in response.data
--- a/tools/DIAGNOSTIC_GUIDE.md
+++ b/tools/DIAGNOSTIC_GUIDE.md
@ -10,7 +10,7 @@ especially for addressing issues with the scraper module.
 **Symptoms:**
 - Web interface shows scraper as stopped but papers are still being processed
 - `/scraper/stop` endpoint returns success but processing continues
- Active tasks show up in Celery inspector
+- Active tasks show up in APScheduler inspector

 **Solutions:**

@ -24,7 +24,7 @@ python tools/diagnostics/emergency_stop.py

 The emergency stop performs these actions:
 - Sets scraper state to inactive in the database
- Revokes all running, reserved, and scheduled Celery tasks
+- Revokes all running and scheduled APScheduler tasks
 - Purges all task queues
 - Reverts papers with "Pending" status to their previous state

@ -33,12 +33,12 @@ The emergency stop performs these actions:
 **Symptoms:**
 - Code changes don't seem to have any effect
 - Bug fixes don't work even though the code is updated
- Workers might be using cached versions of modified code
+- APScheduler might be using cached versions of modified code

 **Solution:**

 ```bash
-# Use the quick fix to stop tasks and restart workers
+# Use the quick fix to stop tasks and restart the application
 make diagnostics   # Then select option 6 (Quick fix)

 # Or directly:
@ -57,7 +57,7 @@ python tools/diagnostics/diagnose_scraper.py

 This tool will:
 - Show current scraper state
- List all active, scheduled, and reserved tasks
+- List all active and scheduled APScheduler tasks
 - Display recent activity and error logs

 ## Preventative Measures
@ -67,11 +67,10 @@ This tool will:
   - Deploying code changes
   - Modifying the database

-2. **Monitor task queue size** using Flower web interface:
+2. **Monitor APScheduler jobs** through the diagnostic tools:
   ```bash
-   make celery-flower
+   make diagnostics   # Then select option 2 (Inspect tasks)
   ```
-   Then visit http://localhost:5555

 3. **Check logs for failed tasks** regularly in the Logger tab of the application

--- a/tools/diagnostics/README.md
+++ b/tools/diagnostics/README.md
@ -7,14 +7,14 @@ This directory contains various scripts for diagnosing issues, debugging, and ha
 ### Scraper Management

 - **emergency_stop.py**: Force stops all scraper activities, revokes running tasks, and reverts papers from "Pending" state
- **quick_fix.py**: A simplified emergency stop that also restarts Celery workers to ensure code changes are applied
+- **quick_fix.py**: A simplified emergency stop that also stops Flask processes to ensure code changes are applied
 - **test_reversion.py**: Tests the paper reversion functionality when stopping the scraper

 ### Monitoring and Diagnostics

 - **check_state.py**: Checks the current state of the scraper in the database
 - **diagnose_scraper.py**: Comprehensive diagnostic tool that examines tasks, logs, and scraper state
- **inspect_tasks.py**: Displays currently running, scheduled, and reserved Celery tasks
+- **inspect_tasks.py**: Displays currently running and scheduled APScheduler tasks

 ## Usage

@ -59,5 +59,5 @@ python tools/diagnostics/quick_fix.py
 ## Notes

 - Always run these scripts from the project root directory
- Some scripts may require a running Redis server
+- Some scripts may require a running Flask application with APScheduler
 - After using emergency tools, the application may need to be restarted completely
--- a/tools/diagnostics/diagnose_scraper.py
+++ b/tools/diagnostics/diagnose_scraper.py
@ -3,7 +3,6 @@ Diagnose and fix scraper stopping issues.
 """

 from scipaperloader import create_app
-from scipaperloader.celery import celery
 from scipaperloader.models import ScraperState, ActivityLog
 from scipaperloader.scrapers.factory import get_scraper

@ -18,21 +17,15 @@ def check_scraper_status():
        else:
            print("No scraper state found in database")

-def check_celery_tasks():
-    """Check currently running Celery tasks."""
-    i = celery.control.inspect()
-    
-    print("\n=== ACTIVE TASKS ===")
-    active_tasks = i.active() or {}
-    for worker, tasks in active_tasks.items():
-        for task in tasks:
-            print(f"Worker: {worker}, Task: {task.get('name', 'Unknown')}, ID: {task.get('id', 'Unknown')}")
-    
-    print("\n=== SCHEDULED TASKS ===")
-    scheduled_tasks = i.scheduled() or {}
-    for worker, tasks in scheduled_tasks.items():
-        for task in tasks:
-            print(f"Worker: {worker}, Task: {task.get('name', 'Unknown')}, ID: {task.get('id', 'Unknown')}")
+def check_scheduler_jobs():
+    """Check the current jobs in APScheduler."""
+    with app.app_context():
+        scheduler = app.config.get('SCHEDULER')
+        if not scheduler:
+            print("❌ APScheduler not found in app config")
+        else:
+            jobs = scheduler.get_paper_jobs()
+            print("Scheduled jobs:", jobs)

 def check_recent_logs():
    """Check recent activity logs for clues."""
@ -60,41 +53,26 @@ def force_stop_scraper():
            print("Set scraper state to inactive")
        
        # Revoke all tasks
-        i = celery.control.inspect()
-        revoked_ids = []
-        
-        # Check all queues
-        for queue_name, queue_func in [
-            ("scheduled", i.scheduled),
-            ("active", i.active),
-            ("reserved", i.reserved)
-        ]:
-            queue = queue_func() or {}
-            for worker, tasks in queue.items():
-                for task in tasks:
-                    task_id = task.get('id')
-                    if task_id and task_id not in revoked_ids:
-                        celery.control.revoke(task_id, terminate=True)
-                        revoked_ids.append(task_id)
-                        print(f"Revoked task: {task_id}")
-        
-        # Purge all queues
-        celery.control.purge()
-        print("Purged all task queues")
+        scheduler = app.config.get('SCHEDULER')
+        if not scheduler:
+            print("❌ APScheduler not found in app config")
+        else:
+            revoked_count = scheduler.revoke_all_scraper_jobs()
+            print(f"✅ Revoked {revoked_count} jobs from APScheduler")
        
        # Log the action
        ActivityLog.log_scraper_command(
            action="force_stop_scraper",
            status="success",
-            description=f"Force stopped scraper, revoked {len(revoked_ids)} tasks"
+            description=f"Force stopped scraper, revoked {revoked_count} tasks"
        )
        
-        print(f"\nRevoked {len(revoked_ids)} tasks in total")
+        print(f"\nRevoked {revoked_count} tasks in total")

 if __name__ == "__main__":
    print("=== SCRAPER STATUS DIAGNOSTIC TOOL ===")
    check_scraper_status()
-    check_celery_tasks()
+    check_scheduler_jobs()
    check_recent_logs()
    
    stop_confirmation = input("\nDo you want to force stop the scraper? (y/n): ")
--- a/tools/diagnostics/emergency_stop.py
+++ b/tools/diagnostics/emergency_stop.py
@ -23,7 +23,6 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../.
 from scipaperloader import create_app
 from scipaperloader.db import db
 from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState
-from scipaperloader.celery import celery

 app = create_app()

@ -38,46 +37,18 @@ def emergency_stop():
        ScraperState.set_paused(False)
        print("✓ Set scraper state to inactive")
        
-        # 2. Revoke all tasks
-        print("\nRevoking running tasks...")
-        try:
-            i = celery.control.inspect()
-            active = i.active() or {}
-            scheduled = i.scheduled() or {}
-            reserved = i.reserved() or {}
+        # 2. Revoke all jobs in APScheduler
+        scheduler = app.config.get('SCHEDULER')
+        if scheduler:
+            revoked_count = scheduler.revoke_all_scraper_jobs()
+            print(f"✅ Revoked {revoked_count} jobs from APScheduler")
+        else:
+            print("❌ APScheduler not found in app config")
        
-            revoked_count = 0
-            
-            # Revoke active tasks
-            for worker, tasks in active.items():
-                for task in tasks:
-                    if 'id' in task:
-                        celery.control.revoke(task['id'], terminate=True)
-                        revoked_count += 1
-                        print(f"  Revoked active task: {task.get('name', 'unknown')}")
-            
-            # Revoke scheduled tasks
-            for worker, tasks in scheduled.items():
-                for task in tasks:
-                    if 'id' in task:
-                        celery.control.revoke(task['id'], terminate=True)
-                        revoked_count += 1
-            
-            # Revoke reserved tasks
-            for worker, tasks in reserved.items():
-                for task in tasks:
-                    if 'id' in task:
-                        celery.control.revoke(task['id'], terminate=True)
-                        revoked_count += 1
-            
-            print(f"✓ Revoked {revoked_count} tasks")
-            
-            # 3. Purge queues
-            celery.control.purge()
-            print("✓ Purged all task queues")
-            
-        except Exception as e:
-            print(f"⚠ Error revoking tasks: {str(e)}")
+        # 3. Revert all papers to 'Pending' state
+        PaperMetadata.query.filter_by(status="Processing").update({"status": "Pending"})
+        db.session.commit()
+        print("✅ Reverted all 'Processing' papers to 'Pending' state")
        
        # 4. Revert papers in "Pending" status
        try:
--- a/tools/diagnostics/inspect_tasks.py
+++ b/tools/diagnostics/inspect_tasks.py
@ -1,11 +1,78 @@
 #!/usr/bin/env python3
 """
-Inspect current Celery tasks (active, reserved, and scheduled)
+Inspect current APScheduler jobs (active and scheduled).
 """

-from scipaperloader.celery import celery
+import sys
+import os
+from datetime import datetime

-i = celery.control.inspect()
-print("Active tasks:", i.active())
-print("Reserved tasks:", i.reserved())
-print("Scheduled tasks:", i.scheduled())
+# Add project root to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from scipaperloader import create_app
+from scipaperloader.models import ScraperState
+
+def main():
+    print("=== APScheduler Task Inspector ===")
+    print(f"Time: {datetime.now()}\n")
+    
+    app = create_app()
+
+    with app.app_context():
+        # Check scraper state
+        scraper_state = ScraperState.get_current_state()
+        print(f"🔄 Scraper State:")
+        print(f"   Active: {'✅' if scraper_state.is_active else '❌'} {scraper_state.is_active}")
+        print(f"   Paused: {'⏸️' if scraper_state.is_paused else '▶️'} {scraper_state.is_paused}")
+        print()
+        
+        # Check APScheduler
+        scheduler = app.config.get('SCHEDULER')
+        if not scheduler:
+            print("❌ APScheduler not found in app config")
+            return
+        
+        print("📋 APScheduler Status:")
+        # Access the underlying scheduler
+        if hasattr(scheduler, 'scheduler') and scheduler.scheduler:
+            print(f"   Running: {'✅' if scheduler.scheduler.running else '❌'} {scheduler.scheduler.running}")
+        else:
+            print("❌ APScheduler instance not accessible")
+        print()
+        
+        # Get all jobs
+        if hasattr(scheduler, 'scheduler') and scheduler.scheduler:
+            all_jobs = scheduler.scheduler.get_jobs()
+        else:
+            all_jobs = []
+        paper_jobs = scheduler.get_paper_jobs()
+        
+        print(f"📊 Job Statistics:")
+        print(f"   Total jobs: {len(all_jobs)}")
+        print(f"   Paper processing jobs: {len(paper_jobs)}")
+        print()
+        
+        if paper_jobs:
+            print("📝 Active Paper Processing Jobs:")
+            for job in paper_jobs:
+                next_run = job.get('next_run_time', 'Not scheduled')
+                print(f"   • {job['id']}")
+                print(f"     Next run: {next_run}")
+                print(f"     Name: {job.get('name', 'N/A')}")
+                if job.get('args'):
+                    print(f"     Paper ID: {job['args'][0] if job['args'] else 'N/A'}")
+                print()
+        else:
+            print("✅ No active paper processing jobs")
+        
+        # Show other jobs if any
+        other_jobs = [job for job in all_jobs if not any(pattern in job.id for pattern in ['paper_process_', 'test_paper_process_', 'process_paper_'])]
+        if other_jobs:
+            print(f"🔧 Other Scheduled Jobs ({len(other_jobs)}):")
+            for job in other_jobs:
+                next_run = job.next_run_time.strftime('%Y-%m-%d %H:%M:%S') if job.next_run_time else 'Not scheduled'
+                print(f"   • {job.id} - Next run: {next_run}")
+
+if __name__ == "__main__":
+    main()
--- a/tools/diagnostics/quick_fix.py
+++ b/tools/diagnostics/quick_fix.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-Quick fix script to stop all running scraper tasks and restart Celery workers.
-This ensures the updated code is loaded and tasks are properly terminated.
+Quick fix script to stop all running scraper tasks using APScheduler.
+This ensures all scheduled tasks are properly terminated.
 """

 import os
@ -9,45 +9,55 @@ import sys
 import signal
 import subprocess
 import time
-from datetime import datetime
+from datetime import datetime, UTC

 # Add project root to path
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))

-def kill_celery_processes():
-    """Kill all running Celery processes"""
-    print("Killing Celery processes...")
+def stop_apscheduler_jobs():
+    """Stop all APScheduler jobs through the Flask app"""
+    print("Stopping APScheduler jobs...")
    try:
-        # Get all celery processes
-        result = subprocess.run(['pgrep', '-f', 'celery'], capture_output=True, text=True)
+        from scipaperloader import create_app
+        
+        app = create_app()
+        with app.app_context():
+            scheduler = app.config.get('SCHEDULER')
+            if scheduler:
+                revoked_count = scheduler.revoke_all_scraper_jobs()
+                print(f"✓ Revoked {revoked_count} APScheduler jobs")
+            else:
+                print("❌ APScheduler not found in app config")
+                
+    except Exception as e:
+        print(f"⚠ Error stopping APScheduler jobs: {e}")
+
+def kill_python_processes():
+    """Kill any running Python processes that might be Flask/APScheduler workers"""
+    print("Checking for running Flask/APScheduler processes...")
+    try:
+        # Look for Flask processes
+        result = subprocess.run(['pgrep', '-f', 'flask'], capture_output=True, text=True)
        if result.returncode == 0:
            pids = result.stdout.strip().split('\n')
            for pid in pids:
                if pid:
                    try:
-                        os.kill(int(pid), signal.SIGTERM)
-                        print(f"  Killed process {pid}")
-                    except ProcessLookupError:
-                        pass  # Process already dead
+                        # Check if this is our process before killing
+                        cmdline_result = subprocess.run(['ps', '-p', pid, '-o', 'cmd='], capture_output=True, text=True)
+                        if 'scipaperloader' in cmdline_result.stdout:
+                            os.kill(int(pid), signal.SIGTERM)
+                            print(f"  Killed Flask process {pid}")
+                    except (ProcessLookupError, ValueError):
+                        pass  # Process already dead or invalid PID
            
            # Wait a moment for graceful shutdown
            time.sleep(2)
+        else:
+            print("✓ No Flask processes found")
        
-            # Force kill any remaining processes
-            result = subprocess.run(['pgrep', '-f', 'celery'], capture_output=True, text=True)
-            if result.returncode == 0:
-                pids = result.stdout.strip().split('\n')
-                for pid in pids:
-                    if pid:
-                        try:
-                            os.kill(int(pid), signal.SIGKILL)
-                            print(f"  Force killed process {pid}")
-                        except ProcessLookupError:
-                            pass
-        
-        print("✓ All Celery processes terminated")
    except Exception as e:
-        print(f"⚠ Error killing processes: {e}")
+        print(f"⚠ Error checking processes: {e}")

 def stop_scraper_state():
    """Set scraper state to inactive using Flask app context"""
@ -55,6 +65,7 @@ def stop_scraper_state():
        from scipaperloader import create_app
        from scipaperloader.models import ScraperState, PaperMetadata
        from scipaperloader.db import db
+        from scipaperloader.scrapers.factory import get_scraper
        
        app = create_app()
        with app.app_context():
@ -63,41 +74,57 @@ def stop_scraper_state():
            ScraperState.set_paused(False)
            print("✓ Set scraper state to inactive")
            
-            # Revert any pending papers to "New" status (simple approach since we don't have previous_status data yet)
-            pending_papers = PaperMetadata.query.filter_by(status="Pending").all()
+            # Get scraper configuration for proper status reversion
+            scraper = get_scraper()
+            input_statuses = scraper.get_input_statuses()
+            output_statuses = scraper.get_output_statuses()
+            processing_status = output_statuses.get("processing", "Processing")
+            
+            # Revert any papers in processing status
+            processing_papers = PaperMetadata.query.filter_by(status=processing_status).all()
            reverted_count = 0
            
-            for paper in pending_papers:
-                paper.status = "New"  # Simple fallback - revert all to "New"
-                reverted_count += 1
+            if processing_papers and input_statuses:
+                revert_status = input_statuses[0]  # Use first input status as default
+                
+                for paper in processing_papers:
+                    # Try to use previous_status if available, otherwise use first input status
+                    if hasattr(paper, 'previous_status') and paper.previous_status:
+                        paper.status = paper.previous_status
+                    else:
+                        paper.status = revert_status
+                    paper.updated_at = datetime.now(UTC)
+                    reverted_count += 1
                
-            if reverted_count > 0:
                db.session.commit()
-                print(f"✓ Reverted {reverted_count} papers from 'Pending' to 'New'")
+                print(f"✓ Reverted {reverted_count} papers from '{processing_status}' to previous status")
            else:
-                print("✓ No pending papers to revert")
+                print("✓ No papers in processing status to revert")
                
    except Exception as e:
        print(f"⚠ Error setting scraper state: {e}")

 def main():
-    print("=== QUICK SCRAPER FIX ===")
+    print("=== QUICK SCRAPER FIX (APScheduler) ===")
    print(f"Time: {datetime.now()}")
    print()
    
-    # Step 1: Stop scraper state
+    # Step 1: Stop scraper state and revert papers
    stop_scraper_state()
    
-    # Step 2: Kill all Celery processes
-    kill_celery_processes()
+    # Step 2: Stop all APScheduler jobs
+    stop_apscheduler_jobs()
+    
+    # Step 3: Kill any running Flask processes
+    kill_python_processes()
    
    print()
    print("=== FIX COMPLETE ===")
    print("The scraper has been stopped and all tasks terminated.")
-    print("You can now restart the Celery workers with:")
-    print("  make celery")
-    print("or")
+    print("You can now restart the application with:")
    print("  make run")
+    print("or")
+    print("  python -m flask --app scipaperloader run")

 if __name__ == "__main__":
    main()
--- a/tools/diagnostics/test_reversion.py
+++ b/tools/diagnostics/test_reversion.py
@ -1,16 +1,17 @@
 #!/usr/bin/env python3
 """
-Test script for verifying the paper reversion fix.
+Test script for verifying the paper reversion fix with APScheduler.
 This script:
-1. Simulates stopping the scraper 
-2. Checks that all pending papers were reverted to their previous status
-3. Ensures all running tasks were terminated
+1. Creates test papers and simulates processing
+2. Tests the stop_scraper functionality 
+3. Checks that all pending papers were reverted to their previous status
+4. Ensures all running tasks were terminated
 """

 import os
 import sys
 import time
-from datetime import datetime
+from datetime import datetime, UTC, timedelta
 from sqlalchemy import func
 from flask import Flask

@ -21,81 +22,136 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../.
 from scipaperloader import create_app
 from scipaperloader.db import db
 from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState
-from scipaperloader.celery import celery
+from scipaperloader.scrapers.factory import get_scraper
+from scipaperloader.scrapers.manager import ScraperManager

+print("[DEBUG] Initializing Flask app...")
 app = create_app()

+print("[DEBUG] Flask app initialized.")
+
 def test_stop_scraper():
-    """Test the stop_scraper functionality"""
+    """Test the stop_scraper functionality with proper APScheduler integration"""
    
+    print("[DEBUG] Entering app context...")
    with app.app_context():
-        # First check current scraper state
+        print("[DEBUG] App context entered.")
+
+        # Clear existing test data
+        print("[DEBUG] Clearing existing test data...")
+        PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete()
+        db.session.commit()
+        print("[DEBUG] Existing test data cleared.")
+
+        # Get scraper configuration
+        scraper = get_scraper()
+        input_statuses = scraper.get_input_statuses()
+        output_statuses = scraper.get_output_statuses()
+        
+        if not input_statuses:
+            print("❌ No input statuses found for current scraper")
+            return
+        
+        input_status = input_statuses[0]  # Use first input status
+        processing_status = output_statuses.get("processing", "Processing")
+        
+        print(f"[DEBUG] Using input status: {input_status}")
+        print(f"[DEBUG] Using processing status: {processing_status}")
+
+        # Create test papers in input status
+        test_papers = []
+        print("[DEBUG] Creating test papers...")
+        for i in range(3):
+            test_paper = PaperMetadata()
+            test_paper.title = f"Test Paper {i+1}"
+            test_paper.doi = f"10.1234/test{i+1}"
+            test_paper.status = input_status
+            test_paper.created_at = datetime.now(UTC)
+            test_paper.updated_at = datetime.now(UTC)
+            db.session.add(test_paper)
+            test_papers.append(test_paper)
+        db.session.commit()
+        print(f"[DEBUG] Created {len(test_papers)} test papers in '{input_status}' status.")
+
+        # Simulate some papers being moved to processing status
+        print("[DEBUG] Simulating papers in processing...")
+        for i, paper in enumerate(test_papers[:2]):  # Move first 2 papers to processing
+            paper.previous_status = paper.status  # Store previous status
+            paper.status = processing_status
+            paper.updated_at = datetime.now(UTC)
+        db.session.commit()
+        print(f"[DEBUG] Moved 2 papers to '{processing_status}' status.")
+
+        # Check current scraper state
        scraper_state = ScraperState.get_current_state()
-        print(f"Current scraper state: active={scraper_state.is_active}, paused={scraper_state.is_paused}")
+        print(f"[DEBUG] Current scraper state: active={scraper_state.is_active}, paused={scraper_state.is_paused}")
        
-        # Check if there are any papers in "Pending" state
-        pending_count = PaperMetadata.query.filter_by(status="Pending").count()
-        print(f"Papers in 'Pending' state before stopping: {pending_count}")
+        # Check paper counts before stopping
+        input_count = PaperMetadata.query.filter_by(status=input_status).count()
+        processing_count = PaperMetadata.query.filter_by(status=processing_status).count()
+        print(f"[DEBUG] Papers before stopping: {input_count} in '{input_status}', {processing_count} in '{processing_status}'")
        
-        if pending_count == 0:
-            print("No papers in 'Pending' state to test with.")
-            print("Would you like to create a test paper in Pending state? (y/n)")
-            choice = input().lower()
-            if choice == 'y':
-                # Create a test paper
-                paper = PaperMetadata(
-                    title="Test Paper for Reversion",
-                    doi="10.1234/test.123",
-                    status="Pending",
-                    previous_status="New",  # Test value we expect to be reverted to
-                    created_at=datetime.utcnow(),
-                    updated_at=datetime.utcnow()
+        # Test APScheduler job management
+        scheduler = app.config.get('SCHEDULER')
+        if scheduler:
+            print("[DEBUG] Testing APScheduler job management...")
+            
+            # Create some test jobs using the correct API
+            for paper in test_papers:
+                job_id = scheduler.schedule_paper_processing(
+                    paper_id=paper.id,
+                    delay_seconds=60,  # 1 minute from now
+                    job_id=f"test_paper_process_{paper.id}"
                )
-                db.session.add(paper)
-                db.session.commit()
-                print(f"Created test paper with ID {paper.id}, status='Pending', previous_status='New'")
-                pending_count = 1
+                print(f"[DEBUG] Scheduled job {job_id} for paper {paper.id}")
            
-        # Simulate the stop_scraper API call
-        from scipaperloader.blueprints.scraper import revert_pending_papers
-        print("Reverting pending papers...")
-        reverted = revert_pending_papers()
-        print(f"Reverted {reverted} papers from 'Pending' state")
+            jobs_before = len(scheduler.get_paper_jobs())
+            print(f"[DEBUG] Created {jobs_before} test jobs in APScheduler")
            
-        # Check if any papers are still in "Pending" state
-        still_pending = PaperMetadata.query.filter_by(status="Pending").count()
-        print(f"Papers still in 'Pending' state after stopping: {still_pending}")
+            # Test the manager's stop_scraper method
+            print("[DEBUG] Testing ScraperManager.stop_scraper()...")
+            manager = ScraperManager()
+            result = manager.stop_scraper()
            
-        # List any that were reverted and their current status
-        if reverted > 0:
-            print("\nPapers that were reverted:")
-            recent_logs = ActivityLog.query.filter_by(action="revert_pending").order_by(
-                ActivityLog.timestamp.desc()).limit(10).all()
+            print(f"[DEBUG] stop_scraper result: {result}")
            
-            for log in recent_logs:
-                paper = PaperMetadata.query.get(log.paper_id)
-                if paper:
-                    print(f"Paper ID {paper.id}: '{paper.title}' - Now status='{paper.status}'")
+            # Check jobs after stopping
+            jobs_after = len(scheduler.get_paper_jobs())
+            print(f"[DEBUG] Jobs after stopping: {jobs_after} (should be 0)")
            
-        # Check active celery tasks
-        i = celery.control.inspect()
-        active = i.active() or {}
-        reserved = i.reserved() or {}
-        scheduled = i.scheduled() or {}
+            if jobs_after == 0:
+                print("✅ All APScheduler jobs successfully revoked")
+            else:
+                print(f"❌ {jobs_after} jobs still exist after revocation")
+        else:
+            print("❌ APScheduler not found in app config")
        
-        active_count = sum(len(tasks) for worker, tasks in active.items())
-        reserved_count = sum(len(tasks) for worker, tasks in reserved.items())
-        scheduled_count = sum(len(tasks) for worker, tasks in scheduled.items())
+        # Check paper counts after stopping
+        input_count_after = PaperMetadata.query.filter_by(status=input_status).count()
+        processing_count_after = PaperMetadata.query.filter_by(status=processing_status).count()
+        print(f"[DEBUG] Papers after stopping: {input_count_after} in '{input_status}', {processing_count_after} in '{processing_status}'")
        
-        print(f"\nCurrently {active_count} active, {reserved_count} reserved, and {scheduled_count} scheduled tasks")
+        # Verify that processing papers were reverted
+        if processing_count_after == 0 and input_count_after >= processing_count:
+            print("✅ Papers successfully reverted from processing to previous status")
+        else:
+            print(f"❌ Paper reversion failed: expected 0 processing papers, got {processing_count_after}")
        
-        # Print conclusion
-        if still_pending == 0 and reverted > 0:
-            print("\nSUCCESS: All pending papers were properly reverted!")
-        elif still_pending > 0:
-            print(f"\nWARNING: {still_pending} papers are still in 'Pending' state!")
-        elif pending_count == 0 and reverted == 0:
-            print("\nNo papers to revert. Can't fully test.")
+        # Check scraper state after stopping
+        scraper_state_after = ScraperState.get_current_state()
+        print(f"[DEBUG] Scraper state after stopping: active={scraper_state_after.is_active}, paused={scraper_state_after.is_paused}")
        
-if __name__ == "__main__":
-    test_stop_scraper()
+        if not scraper_state_after.is_active and not scraper_state_after.is_paused:
+            print("✅ Scraper state correctly set to inactive")
+        else:
+            print("❌ Scraper state not properly updated")
+        
+        # Clean up test data
+        print("[DEBUG] Cleaning up test data...")
+        PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete()
+        db.session.commit()
+        print("[DEBUG] Test data cleaned up.")
+
+print("[DEBUG] Starting test_stop_scraper...")
+test_stop_scraper()
+print("[DEBUG] test_stop_scraper completed.")
Author	SHA1	Message	Date
Michael Beck	fe202b56d0	fixes logging and scraper start / stop task planning	2025-06-13 13:56:46 +02:00
Michael Beck	24f9eb5766	makes logger much more beautiful	2025-06-13 12:57:54 +02:00
Michael Beck	4a10052eae	redesign of logger frontend to streamline and unify all logger views	2025-06-13 12:30:44 +02:00
Michael Beck	8f064cda34	adds timezone config option	2025-06-13 11:47:41 +02:00
Michael Beck	7fd403bd40	timezone fix	2025-06-13 11:14:06 +02:00
Michael Beck	a7964a2f3d	adds scraper modules and modular publisher parser system	2025-06-13 10:11:59 +02:00
Michael Beck	ce6bc03b46	new landing page	2025-06-11 23:54:17 +02:00
Michael Beck	70e2e2e900	again lol	2025-06-11 23:50:29 +02:00
Michael Beck	793f6f9a7e	fix	2025-06-11 23:48:46 +02:00
Michael Beck	8f84774880	fixes flash messages for single paper scraping	2025-06-11 23:48:38 +02:00
Michael Beck	98901ce38e	also fixes flash messages	2025-06-11 23:45:19 +02:00
Michael Beck	d730137d20	fixes flash messages	2025-06-11 23:44:01 +02:00
Michael Beck	e2ae95cea0	fix api path in config js	2025-06-11 23:15:25 +02:00
Michael Beck	676a3c96eb	adds pagination to scraper and improves timestamp formatting	2025-06-11 23:11:49 +02:00
Michael Beck	7a1ab3d7e6	fixes scraper activity chart	2025-06-11 22:25:35 +02:00
Michael Beck	a4eb7648d5	fixes scraper	2025-06-11 21:32:01 +02:00
Michael Beck	88e180bc94	creates timeline for scraper activity	2025-06-11 14:03:35 +02:00
Michael Beck	5c5afefe40	modularizes the templates' js	2025-06-11 11:37:09 +02:00
Michael Beck	8ffcf4d65c	fix some ui stuff	2025-06-10 19:40:28 +02:00
Michael Beck	ceeb6c375d	refactor to apscheduler instead of redis and celery	2025-06-10 19:14:59 +02:00
Michael Beck	3b42010fab	fixes scheduling	2025-06-10 11:40:36 +02:00