From b09c6f1b9b1b66c405bb4d01481640bec1d65104 Mon Sep 17 00:00:00 2001
From: Michael Beck <ich@mbeck.cologne>
Date: Wed, 16 Apr 2025 12:28:39 +0200
Subject: [PATCH] adds initial scraper logic

---
 scipaperloader/__init__.py                  |   2 +-
 scipaperloader/blueprints/__init__.py       |   6 +-
 scipaperloader/blueprints/api.py            |  50 ++
 scipaperloader/blueprints/scraper.py        | 344 ++++++++++++
 scipaperloader/templates/nav.html.jinja     |   3 +
 scipaperloader/templates/papers.html.jinja  |  23 +-
 scipaperloader/templates/scraper.html.jinja | 581 ++++++++++++++++++++
 7 files changed, 1000 insertions(+), 9 deletions(-)
 create mode 100644 scipaperloader/blueprints/api.py
 create mode 100644 scipaperloader/blueprints/scraper.py
 create mode 100644 scipaperloader/templates/scraper.html.jinja

diff --git a/scipaperloader/__init__.py b/scipaperloader/__init__.py
index 2e7f21e..2e4151f 100644
--- a/scipaperloader/__init__.py
+++ b/scipaperloader/__init__.py
@@ -18,7 +18,7 @@ def create_app(test_config=None):
         app.config.update(test_config)
 
     db.init_app(app)
-    migrate = Migrate(app, db)  # Add this line to initialize Flask-Migrate
+    migrate = Migrate(app, db) 
 
     with app.app_context():
         db.create_all()
diff --git a/scipaperloader/blueprints/__init__.py b/scipaperloader/blueprints/__init__.py
index ad98b8b..c6cefe1 100644
--- a/scipaperloader/blueprints/__init__.py
+++ b/scipaperloader/blueprints/__init__.py
@@ -6,6 +6,8 @@ from .papers import bp as papers_bp
 from .upload import bp as upload_bp
 from .schedule import bp as schedule_bp
 from .logger import bp as logger_bp
+from .api import bp as api_bp
+from .scraper import bp as scraper_bp
 
 
 def register_blueprints(app: Flask):
@@ -14,4 +16,6 @@ def register_blueprints(app: Flask):
     app.register_blueprint(papers_bp, url_prefix='/papers')
     app.register_blueprint(upload_bp, url_prefix='/upload')
     app.register_blueprint(schedule_bp, url_prefix='/schedule')
-    app.register_blueprint(logger_bp, url_prefix='/logs')
\ No newline at end of file
+    app.register_blueprint(logger_bp, url_prefix='/logs')
+    app.register_blueprint(api_bp, url_prefix='/api')
+    app.register_blueprint(scraper_bp, url_prefix='/scraper')
\ No newline at end of file
diff --git a/scipaperloader/blueprints/api.py b/scipaperloader/blueprints/api.py
new file mode 100644
index 0000000..e0c3d8f
--- /dev/null
+++ b/scipaperloader/blueprints/api.py
@@ -0,0 +1,50 @@
+from datetime import datetime
+from flask import Blueprint, jsonify, request
+from ..models import ActivityLog, ActivityCategory
+
+bp = Blueprint("api", __name__, url_prefix="/api")
+
+@bp.route("/activity_logs")
+def get_activity_logs():
+    """Get activity logs with filtering options."""
+    # Get query parameters
+    category = request.args.get("category")
+    action = request.args.get("action")
+    after = request.args.get("after")
+    limit = request.args.get("limit", 20, type=int)
+    
+    # Build query
+    query = ActivityLog.query
+    
+    if category:
+        query = query.filter(ActivityLog.category == category)
+    
+    if action:
+        query = query.filter(ActivityLog.action == action)
+    
+    if after:
+        try:
+            after_date = datetime.fromisoformat(after.replace("Z", "+00:00"))
+            query = query.filter(ActivityLog.timestamp > after_date)
+        except (ValueError, TypeError):
+            pass
+    
+    # Order by most recent first and limit results
+    logs = query.order_by(ActivityLog.timestamp.desc()).limit(limit).all()
+    
+    # Format the results
+    result = []
+    for log in logs:
+        log_data = {
+            "id": log.id,
+            "timestamp": log.timestamp.isoformat(),
+            "category": log.category,
+            "action": log.action,
+            "description": log.description,
+            "status": log.status,
+            "paper_id": log.paper_id,
+            "extra_data": log.extra_data
+        }
+        result.append(log_data)
+    
+    return jsonify(result)
\ No newline at end of file
diff --git a/scipaperloader/blueprints/scraper.py b/scipaperloader/blueprints/scraper.py
new file mode 100644
index 0000000..499174b
--- /dev/null
+++ b/scipaperloader/blueprints/scraper.py
@@ -0,0 +1,344 @@
+import random
+import json
+from datetime import datetime
+from flask import Blueprint, jsonify, render_template, request, current_app
+from ..models import ScheduleConfig, VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory
+from ..db import db
+from ..celery import celery
+
+bp = Blueprint("scraper", __name__, url_prefix="/scraper")
+
+# Global variables to track scraper state
+SCRAPER_ACTIVE = False
+SCRAPER_PAUSED = False
+
+@bp.route("/")
+def index():
+    """Render the scraper control panel."""
+    volume_config = VolumeConfig.query.first()
+    schedule_config = {record.hour: record.weight for record in ScheduleConfig.query.all()}
+    
+    return render_template(
+        "scraper.html.jinja", 
+        volume_config=volume_config,
+        schedule_config=schedule_config,
+        scraper_active=SCRAPER_ACTIVE,
+        scraper_paused=SCRAPER_PAUSED
+    )
+
+@bp.route("/start", methods=["POST"])
+def start_scraper():
+    """Start the scraper."""
+    global SCRAPER_ACTIVE, SCRAPER_PAUSED
+    
+    if not SCRAPER_ACTIVE:
+        SCRAPER_ACTIVE = True
+        SCRAPER_PAUSED = False
+        
+        # Log the action
+        ActivityLog.log_scraper_command(
+            action="start_scraper",
+            status="success",
+            description="Scraper started manually"
+        )
+        
+        # Start the scheduler task
+        task = dummy_scraper_scheduler.delay()
+        
+        return jsonify({
+            "success": True, 
+            "message": "Scraper started",
+            "task_id": task.id
+        })
+    else:
+        return jsonify({
+            "success": False, 
+            "message": "Scraper is already running"
+        })
+
+@bp.route("/stop", methods=["POST"])
+def stop_scraper():
+    """Stop the scraper."""
+    global SCRAPER_ACTIVE, SCRAPER_PAUSED
+    
+    if SCRAPER_ACTIVE:
+        SCRAPER_ACTIVE = False
+        SCRAPER_PAUSED = False
+        
+        ActivityLog.log_scraper_command(
+            action="stop_scraper",
+            status="success",
+            description="Scraper stopped manually"
+        )
+        
+        return jsonify({
+            "success": True, 
+            "message": "Scraper stopped"
+        })
+    else:
+        return jsonify({
+            "success": False, 
+            "message": "Scraper is not running"
+        })
+
+@bp.route("/pause", methods=["POST"])
+def pause_scraper():
+    """Pause the scraper."""
+    global SCRAPER_ACTIVE, SCRAPER_PAUSED
+    
+    if SCRAPER_ACTIVE and not SCRAPER_PAUSED:
+        SCRAPER_PAUSED = True
+        
+        ActivityLog.log_scraper_command(
+            action="pause_scraper",
+            status="success",
+            description="Scraper paused manually"
+        )
+        
+        return jsonify({
+            "success": True, 
+            "message": "Scraper paused"
+        })
+    elif SCRAPER_ACTIVE and SCRAPER_PAUSED:
+        SCRAPER_PAUSED = False
+        
+        ActivityLog.log_scraper_command(
+            action="resume_scraper",
+            status="success",
+            description="Scraper resumed manually"
+        )
+        
+        return jsonify({
+            "success": True, 
+            "message": "Scraper resumed"
+        })
+    else:
+        return jsonify({
+            "success": False, 
+            "message": "Scraper is not running"
+        })
+
+@bp.route("/status")
+def scraper_status():
+    """Get the current status of the scraper."""
+    return jsonify({
+        "active": SCRAPER_ACTIVE,
+        "paused": SCRAPER_PAUSED,
+        "current_hour": datetime.now().hour,
+    })
+
+@bp.route("/stats")
+def scraper_stats():
+    """Get scraper statistics for the dashboard."""
+    # Get the last 24 hours of activity
+    hours = 24
+    if request.args.get('hours'):
+        try:
+            hours = int(request.args.get('hours'))
+        except ValueError:
+            pass
+    
+    cutoff_time = datetime.utcnow().replace(
+        minute=0, second=0, microsecond=0
+    )
+    
+    # Get activity logs for scraper actions
+    logs = ActivityLog.query.filter(
+        ActivityLog.category == ActivityCategory.SCRAPER_ACTIVITY.value,
+        ActivityLog.timestamp >= cutoff_time.replace(hour=cutoff_time.hour - hours)
+    ).all()
+    
+    # Group by hour and status
+    stats = {}
+    for hour in range(hours):
+        target_hour = (cutoff_time.hour - hour) % 24
+        stats[target_hour] = {
+            "success": 0,
+            "error": 0,
+            "pending": 0,
+            "hour": target_hour,
+        }
+    
+    for log in logs:
+        hour = log.timestamp.hour
+        if hour in stats:
+            if log.status == "success":
+                stats[hour]["success"] += 1
+            elif log.status == "error":
+                stats[hour]["error"] += 1
+            elif log.status == "pending":
+                stats[hour]["pending"] += 1
+    
+    # Convert to list for easier consumption by JavaScript
+    result = [stats[hour] for hour in sorted(stats.keys())]
+    
+    return jsonify(result)
+
+@bp.route("/update_config", methods=["POST"])
+def update_config():
+    """Update scraper configuration."""
+    data = request.json
+    
+    if "volume" in data:
+        try:
+            new_volume = float(data["volume"])
+            volume_config = VolumeConfig.query.first()
+            if not volume_config:
+                volume_config = VolumeConfig(volume=new_volume)
+                db.session.add(volume_config)
+            else:
+                old_value = volume_config.volume
+                volume_config.volume = new_volume
+                ActivityLog.log_config_change(
+                    config_key="scraper_volume",
+                    old_value=old_value,
+                    new_value=new_volume,
+                    description="Updated scraper volume"
+                )
+            
+            db.session.commit()
+        except (ValueError, TypeError):
+            return jsonify({"success": False, "message": "Invalid volume value"})
+    
+    if "schedule" in data:
+        try:
+            schedule = data["schedule"]
+            
+            for hour_str, weight in schedule.items():
+                hour = int(hour_str)
+                weight = float(weight)
+                
+                if 0 <= hour <= 23 and weight >= 0:
+                    schedule_config = ScheduleConfig.query.get(hour)
+                    if not schedule_config:
+                        schedule_config = ScheduleConfig(hour=hour, weight=weight)
+                        db.session.add(schedule_config)
+                    else:
+                        old_value = schedule_config.weight
+                        schedule_config.weight = weight
+                        ActivityLog.log_config_change(
+                            config_key=f"schedule_hour_{hour}",
+                            old_value=old_value,
+                            new_value=weight,
+                            description=f"Updated schedule weight for hour {hour}"
+                        )
+            
+            db.session.commit()
+        except (ValueError, TypeError):
+            return jsonify({"success": False, "message": "Invalid schedule format"})
+    
+    return jsonify({"success": True, "message": "Configuration updated"})
+
+# Define the Celery tasks
+@celery.task(bind=True)
+def dummy_scraper_scheduler(self):
+    """Main scheduler task for the dummy scraper."""
+    global SCRAPER_ACTIVE, SCRAPER_PAUSED
+    
+    if not SCRAPER_ACTIVE:
+        return {"status": "Scraper not active"}
+    
+    if SCRAPER_PAUSED:
+        return {"status": "Scraper paused"}
+    
+    # Calculate how many papers to scrape based on current hour and configuration
+    current_hour = datetime.now().hour
+    hour_config = ScheduleConfig.query.get(current_hour)
+    volume_config = VolumeConfig.query.first()
+    
+    if not hour_config or not volume_config:
+        return {"status": "Missing configuration"}
+    
+    # Calculate papers to scrape this hour
+    hourly_rate = volume_config.volume / 24  # Base rate per hour
+    adjusted_rate = hourly_rate * (1 / hour_config.weight)  # Adjust by weight
+    papers_to_scrape = int(adjusted_rate)
+    
+    # Log the scheduling decision
+    ActivityLog.log_scraper_activity(
+        action="schedule_papers",
+        status="success",
+        description=f"Scheduled {papers_to_scrape} papers for scraping at hour {current_hour}",
+        hourly_rate=hourly_rate,
+        weight=hour_config.weight,
+        adjusted_rate=adjusted_rate,
+    )
+    
+    # Launch individual scraping tasks
+    for _ in range(papers_to_scrape):
+        if not SCRAPER_ACTIVE or SCRAPER_PAUSED:
+            break
+            
+        # Schedule a new paper to be scraped
+        dummy_scrape_paper.delay()
+    
+    # Schedule the next run in 5 minutes if still active
+    if SCRAPER_ACTIVE:
+        dummy_scraper_scheduler.apply_async(countdown=300)  # 5 minutes
+    
+    return {"status": "success", "papers_scheduled": papers_to_scrape}
+
+@celery.task(bind=True)
+def dummy_scrape_paper(self):
+    """Simulate scraping a single paper."""
+    # Simulate success or failure
+    success = random.random() > 0.3  # 70% success rate
+    
+    # Simulate processing time
+    import time
+    time.sleep(random.randint(2, 5))  # 2-5 seconds
+    
+    if success:
+        # Create a dummy paper
+        new_paper = PaperMetadata(
+            title=f"Dummy Paper {random.randint(1000, 9999)}",
+            doi=f"10.1234/dummy.{random.randint(1000, 9999)}",
+            journal=random.choice([
+                "Nature", "Science", "PLOS ONE", "Journal of Dummy Research",
+                "Proceedings of the Dummy Society", "Cell", "Dummy Review Letters"
+            ]),
+            type="article",
+            language="en",
+            published_online=datetime.now().date(),
+            status="Done",
+            file_path="/path/to/dummy/paper.pdf"
+        )
+        
+        db.session.add(new_paper)
+        db.session.commit()
+        
+        # Log the successful scrape
+        ActivityLog.log_scraper_activity(
+            action="scrape_paper",
+            paper_id=new_paper.id,
+            status="success",
+            description=f"Successfully scraped paper {new_paper.doi}"
+        )
+        
+        return {
+            "success": True,
+            "paper_id": new_paper.id,
+            "title": new_paper.title,
+            "doi": new_paper.doi
+        }
+    else:
+        # Log the failed scrape
+        error_message = random.choice([
+            "Connection timeout",
+            "404 Not Found",
+            "Access denied",
+            "Invalid DOI format",
+            "PDF download failed",
+            "Rate limited by publisher"
+        ])
+        
+        ActivityLog.log_scraper_activity(
+            action="scrape_paper",
+            status="error",
+            description=f"Failed to scrape paper: {error_message}"
+        )
+        
+        return {
+            "success": False,
+            "error": error_message
+        }
\ No newline at end of file
diff --git a/scipaperloader/templates/nav.html.jinja b/scipaperloader/templates/nav.html.jinja
index 85e3809..a0dd9b4 100644
--- a/scipaperloader/templates/nav.html.jinja
+++ b/scipaperloader/templates/nav.html.jinja
@@ -7,6 +7,9 @@
     </button>
     <div class="collapse navbar-collapse" id="navbarSupportedContent">
       <ul class="navbar-nav me-auto mb-2 mb-lg-0">
+        <li class="nav-item">
+          <a class="nav-link" href="{{ url_for('scraper.index') }}">Scraper</a>
+        </li>
         <li class="nav-item">
           <a class="nav-link" href="{{ url_for('upload.upload') }}">Import CSV</a>
         </li>
diff --git a/scipaperloader/templates/papers.html.jinja b/scipaperloader/templates/papers.html.jinja
index 26e5d68..32054ee 100644
--- a/scipaperloader/templates/papers.html.jinja
+++ b/scipaperloader/templates/papers.html.jinja
@@ -144,13 +144,13 @@
             </th>
             <th>
                 {% set params = request.args.to_dict() %}
-                {% set params = params.update({'sort_by': 'journal', 'sort_dir': journal_sort}) or params %}
-                <a href="{{ url_for('papers.list_papers', **params) }}">Journal</a>
+                {% set params = params.update({'sort_by': 'doi', 'sort_dir': doi_sort}) or params %}
+                <a href="{{ url_for('papers.list_papers', **params) }}">DOI</a>
             </th>
             <th>
                 {% set params = request.args.to_dict() %}
-                {% set params = params.update({'sort_by': 'doi', 'sort_dir': doi_sort}) or params %}
-                <a href="{{ url_for('papers.list_papers', **params) }}">DOI</a>
+                {% set params = params.update({'sort_by': 'journal', 'sort_dir': journal_sort}) or params %}
+                <a href="{{ url_for('papers.list_papers', **params) }}">Journal</a>
             </th>
             <th>
                 {% set params = request.args.to_dict() %}
@@ -186,10 +186,9 @@
                         <path
                             d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z" />
                     </svg>
-                    {{ paper.title }}
+                    {{ paper.title|escape }}
                 </a>
             </td>
-            <td>{{ paper.journal }}</td>
             <td>
                 <a href="https://doi.org/{{ paper.doi }}" target="_blank" class="icon-link icon-link-hover">
                     {{ paper.doi }}
@@ -199,7 +198,17 @@
                     </svg>
                 </a>
             </td>
-            <td>{{ paper.issn }}</td>
+            <td>{{ paper.journal }}</td>
+            <td>
+                <a href="https://search.worldcat.org/search?q=issn:{{ paper.issn }}" target="_blank"
+                    class="icon-link icon-link-hover">
+                    {{ paper.issn }}
+                    <svg xmlns="http://www.w3.org/2000/svg" class="bi" viewBox="0 0 16 16" aria-hidden="true">
+                        <path
+                            d="M1 8a.5.5 0 0 1 .5-.5h11.793l-3.147-3.146a.5.5 0 0 1 .708-.708l4 4a.5.5 0 0 1 0 .708l-4 4a.5.5 0 0 1-.708-.708L13.293 8.5H1.5A.5.5 0 0 1 1 8z" />
+                    </svg>
+                </a>
+            </td>
             <td>{{ paper.status }}</td>
             <td>{{ paper.created_at.strftime('%Y-%m-%d %H:%M:%S') }}</td>
             <td>{{ paper.updated_at.strftime('%Y-%m-%d %H:%M:%S') }}</td>
diff --git a/scipaperloader/templates/scraper.html.jinja b/scipaperloader/templates/scraper.html.jinja
new file mode 100644
index 0000000..b89c2d7
--- /dev/null
+++ b/scipaperloader/templates/scraper.html.jinja
@@ -0,0 +1,581 @@
+{% extends "base.html.jinja" %}
+
+{% block title %}Paper Scraper Control Panel{% endblock title %}
+
+{% block styles %}
+{{ super() }}
+<style>
+    .status-indicator {
+        width: 15px;
+        height: 15px;
+        border-radius: 50%;
+        display: inline-block;
+        margin-right: 5px;
+    }
+
+    .status-active {
+        background-color: #28a745;
+    }
+
+    .status-paused {
+        background-color: #ffc107;
+    }
+
+    .status-inactive {
+        background-color: #dc3545;
+    }
+
+    .stats-chart {
+        height: 400px;
+    }
+
+    .notification {
+        position: fixed;
+        bottom: 20px;
+        right: 20px;
+        max-width: 350px;
+        z-index: 1050;
+    }
+
+    .schedule-grid {
+        display: grid;
+        grid-template-columns: repeat(6, 1fr);
+        gap: 10px;
+    }
+
+    .hour-block {
+        padding: 10px;
+        border-radius: 5px;
+        text-align: center;
+    }
+
+    .weight-1 {
+        background-color: #d4edda;
+    }
+
+    .weight-0-7 {
+        background-color: #d1ecf1;
+    }
+
+    .weight-0-5 {
+        background-color: #fff3cd;
+    }
+
+    .weight-0-2 {
+        background-color: #f8d7da;
+    }
+
+    .weight-0-1 {
+        background-color: #f5c6cb;
+    }
+</style>
+{% endblock styles %}
+
+{% block content %}
+<div class="container mt-4">
+    <h1>Paper Scraper Control Panel</h1>
+
+    <div class="row mb-4">
+        <div class="col-md-6">
+            <div class="card">
+                <div class="card-header">
+                    <h5>Scraper Status</h5>
+                </div>
+                <div class="card-body">
+                    <div class="d-flex align-items-center mb-3">
+                        <div id="statusIndicator" class="status-indicator status-inactive"></div>
+                        <span id="statusText">Inactive</span>
+                    </div>
+
+                    <div class="btn-group" role="group">
+                        <button id="startButton" class="btn btn-success">Start</button>
+                        <button id="pauseButton" class="btn btn-warning" disabled>Pause</button>
+                        <button id="stopButton" class="btn btn-danger" disabled>Stop</button>
+                    </div>
+                </div>
+            </div>
+        </div>
+
+        <div class="col-md-6">
+            <div class="card">
+                <div class="card-header">
+                    <h5>Volume Configuration</h5>
+                </div>
+                <div class="card-body">
+                    <form id="volumeForm">
+                        <div class="form-group">
+                            <label for="volumeInput">Papers per day:</label>
+                            <input type="number" class="form-control" id="volumeInput"
+                                value="{{ volume_config.volume }}">
+                        </div>
+                        <button type="submit" class="btn btn-primary mt-2">Update Volume</button>
+                    </form>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <div class="row mb-4">
+        <div class="col-12">
+            <div class="card">
+                <div class="card-header">
+                    <h5>Schedule Configuration</h5>
+                    <small class="text-muted">Weight factor for each hour (lower value = higher scraping rate)</small>
+                </div>
+                <div class="card-body">
+                    <div class="schedule-grid">
+                        {% for hour in range(24) %}
+                        {% set weight = schedule_config.get(hour, 1.0) %}
+                        {% set weight_class = "weight-1" %}
+                        {% if weight == 0.1 %}
+                        {% set weight_class = "weight-0-1" %}
+                        {% elif weight == 0.2 %}
+                        {% set weight_class = "weight-0-2" %}
+                        {% elif weight == 0.5 %}
+                        {% set weight_class = "weight-0-5" %}
+                        {% elif weight == 0.7 %}
+                        {% set weight_class = "weight-0-7" %}
+                        {% endif %}
+
+                        <div class="hour-block border {{ weight_class }}" data-hour="{{ hour }}">
+                            <div class="hour-label">{{ "%02d:00"|format(hour) }}</div>
+                            <select class="form-control hour-weight mt-1" data-hour="{{ hour }}">
+                                <option value="0.1" {% if weight==0.1 %}selected{% endif %}>Very High</option>
+                                <option value="0.2" {% if weight==0.2 %}selected{% endif %}>High</option>
+                                <option value="0.5" {% if weight==0.5 %}selected{% endif %}>Medium</option>
+                                <option value="0.7" {% if weight==0.7 %}selected{% endif %}>Low</option>
+                                <option value="1.0" {% if weight==1.0 %}selected{% endif %}>Very Low</option>
+                            </select>
+                        </div>
+                        {% endfor %}
+                    </div>
+                    <button id="updateScheduleButton" class="btn btn-primary mt-3">Update Schedule</button>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <div class="row mb-4">
+        <div class="col-12">
+            <div class="card">
+                <div class="card-header d-flex justify-content-between align-items-center">
+                    <h5>Scraping Activity</h5>
+                    <div>
+                        <div class="form-check form-switch">
+                            <input class="form-check-input" type="checkbox" id="notificationsToggle" checked>
+                            <label class="form-check-label" for="notificationsToggle">Show Notifications</label>
+                        </div>
+                    </div>
+                </div>
+                <div class="card-body">
+                    <div class="btn-group mb-3">
+                        <button class="btn btn-outline-secondary time-range-btn" data-hours="6">Last 6 hours</button>
+                        <button class="btn btn-outline-secondary time-range-btn active" data-hours="24">Last 24
+                            hours</button>
+                        <button class="btn btn-outline-secondary time-range-btn" data-hours="72">Last 3 days</button>
+                    </div>
+                    <div class="stats-chart" id="activityChart"></div>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <div class="row mb-4">
+        <div class="col-12">
+            <div class="card">
+                <div class="card-header">
+                    <h5>Recent Activity</h5>
+                </div>
+                <div class="card-body">
+                    <div class="table-responsive">
+                        <table class="table table-striped">
+                            <thead>
+                                <tr>
+                                    <th>Time</th>
+                                    <th>Action</th>
+                                    <th>Status</th>
+                                    <th>Description</th>
+                                </tr>
+                            </thead>
+                            <tbody id="activityLog">
+                                <tr>
+                                    <td colspan="4" class="text-center">Loading activities...</td>
+                                </tr>
+                            </tbody>
+                        </table>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+
+<!-- Notification template -->
+<div id="notificationContainer"></div>
+{% endblock content %}
+
+{% block scripts %}
+{{ super() }}
+<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+<script>
+    // Global variables
+    let notificationsEnabled = true;
+    let activityChart = null;
+    let currentTimeRange = 24;
+
+    // DOM elements
+    const statusIndicator = document.getElementById('statusIndicator');
+    const statusText = document.getElementById('statusText');
+    const startButton = document.getElementById('startButton');
+    const pauseButton = document.getElementById('pauseButton');
+    const stopButton = document.getElementById('stopButton');
+    const notificationsToggle = document.getElementById('notificationsToggle');
+    const activityLog = document.getElementById('activityLog');
+
+    // Initialize the page
+    document.addEventListener('DOMContentLoaded', function () {
+        initStatusPolling();
+        loadActivityStats(currentTimeRange);
+        loadRecentActivity();
+
+        // Initialize event listeners
+        startButton.addEventListener('click', startScraper);
+        pauseButton.addEventListener('click', togglePauseScraper);
+        stopButton.addEventListener('click', stopScraper);
+        notificationsToggle.addEventListener('click', toggleNotifications);
+
+        document.getElementById('volumeForm').addEventListener('submit', function (e) {
+            e.preventDefault();
+            updateVolume();
+        });
+
+        document.getElementById('updateScheduleButton').addEventListener('click', updateSchedule);
+
+        document.querySelectorAll('.time-range-btn').forEach(btn => {
+            btn.addEventListener('click', function () {
+                document.querySelectorAll('.time-range-btn').forEach(b => b.classList.remove('active'));
+                this.classList.add('active');
+                currentTimeRange = parseInt(this.dataset.hours);
+                loadActivityStats(currentTimeRange);
+            });
+        });
+    });
+
+    // Status polling
+    function initStatusPolling() {
+        updateStatus();
+        setInterval(updateStatus, 5000); // Poll every 5 seconds
+    }
+
+    function updateStatus() {
+        fetch('/scraper/status')
+            .then(response => response.json())
+            .then(data => {
+                if (data.active) {
+                    if (data.paused) {
+                        statusIndicator.className = 'status-indicator status-paused';
+                        statusText.textContent = 'Paused';
+                        pauseButton.textContent = 'Resume';
+                    } else {
+                        statusIndicator.className = 'status-indicator status-active';
+                        statusText.textContent = 'Active';
+                        pauseButton.textContent = 'Pause';
+                    }
+                    startButton.disabled = true;
+                    pauseButton.disabled = false;
+                    stopButton.disabled = false;
+                } else {
+                    statusIndicator.className = 'status-indicator status-inactive';
+                    statusText.textContent = 'Inactive';
+                    startButton.disabled = false;
+                    pauseButton.disabled = true;
+                    stopButton.disabled = true;
+                }
+            });
+    }
+
+    // Action functions
+    function startScraper() {
+        fetch('/scraper/start', { method: 'POST' })
+            .then(response => response.json())
+            .then(data => {
+                if (data.success) {
+                    showNotification('Scraper started successfully', 'success');
+                    updateStatus();
+                    setTimeout(() => { loadRecentActivity(); }, 1000);
+                } else {
+                    showNotification(data.message, 'danger');
+                }
+            });
+    }
+
+    function togglePauseScraper() {
+        fetch('/scraper/pause', { method: 'POST' })
+            .then(response => response.json())
+            .then(data => {
+                if (data.success) {
+                    showNotification(data.message, 'info');
+                    updateStatus();
+                    setTimeout(() => { loadRecentActivity(); }, 1000);
+                } else {
+                    showNotification(data.message, 'danger');
+                }
+            });
+    }
+
+    function stopScraper() {
+        fetch('/scraper/stop', { method: 'POST' })
+            .then(response => response.json())
+            .then(data => {
+                if (data.success) {
+                    showNotification('Scraper stopped successfully', 'warning');
+                    updateStatus();
+                    setTimeout(() => { loadRecentActivity(); }, 1000);
+                } else {
+                    showNotification(data.message, 'danger');
+                }
+            });
+    }
+
+    function updateVolume() {
+        const volume = document.getElementById('volumeInput').value;
+
+        fetch('/scraper/update_config', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({ volume: volume })
+        })
+            .then(response => response.json())
+            .then(data => {
+                if (data.success) {
+                    showNotification('Volume updated successfully', 'success');
+                } else {
+                    showNotification(data.message, 'danger');
+                }
+            });
+    }
+
+    function updateSchedule() {
+        const schedule = {};
+        document.querySelectorAll('.hour-weight').forEach(select => {
+            const hour = select.dataset.hour;
+            const weight = select.value;
+            schedule[hour] = weight;
+        });
+
+        fetch('/scraper/update_config', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({ schedule: schedule })
+        })
+            .then(response => response.json())
+            .then(data => {
+                if (data.success) {
+                    showNotification('Schedule updated successfully', 'success');
+                } else {
+                    showNotification(data.message, 'danger');
+                }
+            });
+    }
+
+    function toggleNotifications() {
+        notificationsEnabled = notificationsToggle.checked;
+    }
+
+    // Load data functions
+    function loadActivityStats(hours) {
+        fetch(`/scraper/stats?hours=${hours}`)
+            .then(response => response.json())
+            .then(data => {
+                renderActivityChart(data);
+            });
+    }
+
+    function loadRecentActivity() {
+        fetch('/api/activity_logs?category=scraper_activity&limit=20')
+            .then(response => response.json())
+            .then(data => {
+                renderActivityLog(data);
+            })
+            .catch(() => {
+                // If the API endpoint doesn't exist, just show a message
+                activityLog.innerHTML = '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>';
+            });
+    }
+
+    // Rendering functions
+    function renderActivityChart(data) {
+        const ctx = document.getElementById('activityChart').getContext('2d');
+
+        // Extract the data for the chart
+        const labels = data.map(item => `${item.hour}:00`);
+        const successData = data.map(item => item.success);
+        const errorData = data.map(item => item.error);
+        const pendingData = data.map(item => item.pending);
+
+        if (activityChart) {
+            activityChart.destroy();
+        }
+
+        activityChart = new Chart(ctx, {
+            type: 'bar',
+            data: {
+                labels: labels,
+                datasets: [
+                    {
+                        label: 'Success',
+                        data: successData,
+                        backgroundColor: '#28a745',
+                        stack: 'Stack 0'
+                    },
+                    {
+                        label: 'Error',
+                        data: errorData,
+                        backgroundColor: '#dc3545',
+                        stack: 'Stack 0'
+                    },
+                    {
+                        label: 'Pending',
+                        data: pendingData,
+                        backgroundColor: '#ffc107',
+                        stack: 'Stack 0'
+                    }
+                ]
+            },
+            options: {
+                responsive: true,
+                maintainAspectRatio: false,
+                scales: {
+                    x: {
+                        stacked: true,
+                        title: {
+                            display: true,
+                            text: 'Hour'
+                        }
+                    },
+                    y: {
+                        stacked: true,
+                        beginAtZero: true,
+                        title: {
+                            display: true,
+                            text: 'Papers Scraped'
+                        }
+                    }
+                }
+            }
+        });
+    }
+
+    function renderActivityLog(logs) {
+        activityLog.innerHTML = '';
+
+        if (!logs || logs.length === 0) {
+            activityLog.innerHTML = '<tr><td colspan="4" class="text-center">No recent activity</td></tr>';
+            return;
+        }
+
+        logs.forEach(log => {
+            const row = document.createElement('tr');
+
+            // Format timestamp
+            const date = new Date(log.timestamp);
+            const timeStr = date.toLocaleTimeString();
+
+            // Create status badge
+            let statusBadge = '';
+            if (log.status === 'success') {
+                statusBadge = '<span class="badge bg-success">Success</span>';
+            } else if (log.status === 'error') {
+                statusBadge = '<span class="badge bg-danger">Error</span>';
+            } else if (log.status === 'pending') {
+                statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
+            } else {
+                statusBadge = `<span class="badge bg-secondary">${log.status || 'Unknown'}</span>`;
+            }
+
+            row.innerHTML = `
+        <td>${timeStr}</td>
+        <td>${log.action}</td>
+        <td>${statusBadge}</td>
+        <td>${log.description || ''}</td>
+      `;
+
+            activityLog.appendChild(row);
+        });
+    }
+
+    // Notification functions
+    function showNotification(message, type) {
+        if (!notificationsEnabled && type !== 'danger') {
+            return;
+        }
+
+        const container = document.getElementById('notificationContainer');
+        const notification = document.createElement('div');
+        notification.className = `alert alert-${type} notification shadow-sm`;
+        notification.innerHTML = `
+      ${message}
+      <button type="button" class="btn-close float-end" aria-label="Close"></button>
+    `;
+
+        container.appendChild(notification);
+
+        // Add close handler
+        notification.querySelector('.btn-close').addEventListener('click', () => {
+            notification.remove();
+        });
+
+        // Auto-close after 5 seconds
+        setTimeout(() => {
+            notification.classList.add('fade');
+            setTimeout(() => {
+                notification.remove();
+            }, 500);
+        }, 5000);
+    }
+
+    // WebSocket for real-time notifications
+    function setupWebSocket() {
+        // If WebSocket is available, implement it here
+        // For now we'll poll the server periodically for new papers
+        setInterval(checkForNewPapers, 10000); // Check every 10 seconds
+    }
+
+    let lastPaperTimestamp = new Date().toISOString();
+
+    function checkForNewPapers() {
+        fetch(`/api/activity_logs?category=scraper_activity&action=scrape_paper&after=${lastPaperTimestamp}&limit=5`)
+            .then(response => response.json())
+            .then(data => {
+                if (data && data.length > 0) {
+                    // Update the timestamp
+                    lastPaperTimestamp = new Date().toISOString();
+
+                    // Show notifications for new papers
+                    data.forEach(log => {
+                        const extraData = log.extra_data ? JSON.parse(log.extra_data) : {};
+                        if (log.status === 'success') {
+                            showNotification(`New paper scraped: ${extraData.title || 'Unknown title'}`, 'success');
+                        } else if (log.status === 'error') {
+                            showNotification(`Failed to scrape paper: ${log.description}`, 'danger');
+                        }
+                    });
+
+                    // Refresh the activity chart and log
+                    loadActivityStats(currentTimeRange);
+                    loadRecentActivity();
+                }
+            })
+            .catch(() => {
+                // If the API endpoint doesn't exist, do nothing
+            });
+    }
+
+    // Start checking for new papers
+    setupWebSocket();
+</script>
+{% endblock scripts %}
\ No newline at end of file