fixes dummy and single paper processing

2025-05-23 16:13:25 +02:00 · 2025-05-23 16:13:25 +02:00 · 012163ba3f
commit 012163ba3f
parent 8f2375215d
6 changed files with 741 additions and 36 deletions
--- a/scipaperloader/blueprints/api.py
+++ b/scipaperloader/blueprints/api.py
@ -1,6 +1,7 @@
 from datetime import datetime
 from flask import Blueprint, jsonify, request
-from ..models import ActivityLog, ActivityCategory
+from ..models import ActivityLog, ActivityCategory, PaperMetadata
+from .. import db

 bp = Blueprint("api", __name__, url_prefix="/api")

@ -47,4 +48,91 @@ def get_activity_logs():
        }
        result.append(log_data)
    
-    return jsonify(result)
+    return jsonify(result)
+
+@bp.route("/papers")
+def search_papers():
+    """
+    Search for papers by title, DOI, or ID.
+    
+    Query parameters:
+    - query: Search term (required)
+    - limit: Maximum number of results (default: 10)
+    """
+    query = request.args.get('query', '')
+    limit = int(request.args.get('limit', 10))
+    
+    if not query:
+        return jsonify({
+            "success": False, 
+            "message": "Search query is required",
+            "papers": []
+        })
+        
+    # Try to parse query as an ID first
+    try:
+        paper_id = int(query)
+        paper_by_id = PaperMetadata.query.get(paper_id)
+        if paper_by_id:
+            return jsonify({
+                "success": True,
+                "papers": [{
+                    "id": paper_by_id.id,
+                    "title": paper_by_id.title,
+                    "doi": paper_by_id.doi,
+                    "journal": paper_by_id.journal,
+                    "status": paper_by_id.status,
+                    "created_at": paper_by_id.created_at.isoformat() if paper_by_id.created_at else None,
+                    "updated_at": paper_by_id.updated_at.isoformat() if paper_by_id.updated_at else None
+                }]
+            })
+    except ValueError:
+        pass  # Not an ID, continue with text search
+        
+    # Search in title and DOI
+    search_term = f"%{query}%"
+    papers = PaperMetadata.query.filter(
+        db.or_(
+            PaperMetadata.title.ilike(search_term),
+            PaperMetadata.doi.ilike(search_term)
+        )
+    ).limit(limit).all()
+    
+    return jsonify({
+        "success": True,
+        "papers": [{
+            "id": paper.id,
+            "title": paper.title,
+            "doi": paper.doi,
+            "journal": paper.journal,
+            "status": paper.status,
+            "created_at": paper.created_at.isoformat() if paper.created_at else None,
+            "updated_at": paper.updated_at.isoformat() if paper.updated_at else None
+        } for paper in papers]
+    })
+
+@bp.route("/papers/<int:paper_id>")
+def get_paper(paper_id):
+    """Get details of a single paper by ID."""
+    paper = PaperMetadata.query.get(paper_id)
+    
+    if not paper:
+        return jsonify({
+            "success": False,
+            "message": f"Paper with ID {paper_id} not found"
+        })
+    
+    return jsonify({
+        "success": True,
+        "paper": {
+            "id": paper.id,
+            "title": paper.title,
+            "doi": paper.doi,
+            "journal": paper.journal,
+            "status": paper.status,
+            "error_msg": paper.error_msg,
+            "file_path": paper.file_path,
+            "created_at": paper.created_at.isoformat() if paper.created_at else None,
+            "updated_at": paper.updated_at.isoformat() if paper.updated_at else None
+        }
+    })
--- a/scipaperloader/blueprints/config.py
+++ b/scipaperloader/blueprints/config.py
@ -2,7 +2,7 @@
 from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
 from ..db import db
 # Import the new model
-from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
+from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
 from ..defaults import MAX_VOLUME
 import os # Import os for path validation
 from scipaperloader.scrapers import __path__ as scrapers_path
@ -402,4 +402,36 @@ def api_update_config():
        return jsonify({
            "success": False,
            "message": f"Unexpected error: {str(e)}"
-        })
+        })
+
+
+@bp.route("/delete_all_papers", methods=["POST"])
+def delete_all_papers():
+    """Delete all paper records from the database."""
+    try:
+        # Count papers before deletion for logging purposes
+        paper_count = PaperMetadata.query.count()
+        
+        # Delete all records from the PaperMetadata table
+        PaperMetadata.query.delete()
+        db.session.commit()
+        
+        # Log the action
+        ActivityLog.log_config_change(
+            config_key="database",
+            old_value=f"{paper_count} papers",
+            new_value="0 papers",
+            description=f"Deleted all {paper_count} papers from the database"
+        )
+        
+        flash(f"Successfully deleted all {paper_count} papers from the database.", "success")
+    except Exception as e:
+        db.session.rollback()
+        flash(f"Failed to delete papers: {str(e)}", "error")
+        ActivityLog.log_error(
+            error_message=f"Failed to delete all papers: {str(e)}",
+            exception=e,
+            source="config.delete_all_papers"
+        )
+    
+    return redirect(url_for("config.general"))
--- a/scipaperloader/blueprints/scraper.py
+++ b/scipaperloader/blueprints/scraper.py
@ -6,13 +6,13 @@ import os # Import os for path joining
 from datetime import datetime, timedelta
 from flask import Blueprint, jsonify, render_template, request, current_app, flash
 # Import the new model
-from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig
+from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig, ScraperModuleConfig
 from ..db import db
 from ..celery import celery
 from ..defaults import MAX_VOLUME
 from celery.schedules import crontab
 from sqlalchemy import func
-from scipaperloader.scrapers.factory import get_scraper
+from scipaperloader.scrapers.factory import get_scraper, get_available_scrapers

 bp = Blueprint("scraper", __name__, url_prefix="/scraper")

@ -767,3 +767,150 @@ def process_paper(self, paper_id):
        "status": result.status,
        "message": result.message
    }
+
+
+@celery.task(bind=True)
+@celery.task(bind=True)
+def process_paper_with_scraper(self, paper_id, scraper_module):
+    """Process a paper using a specific scraper module."""
+    from scipaperloader.models import PaperMetadata
+    import importlib
+    from ..scrapers.base import BaseScraper
+    
+    paper = PaperMetadata.query.get(paper_id)
+    if not paper:
+        return {"status": "error", "message": f"Paper with ID {paper_id} not found"}
+
+    try:
+        # Import the specified scraper module
+        module = importlib.import_module(f"scipaperloader.scrapers.{scraper_module}")
+        cls = getattr(module, "Scraper")
+        
+        # Validate that it's a BaseScraper
+        if not issubclass(cls, BaseScraper):
+            error_msg = f"Scraper class in module '{scraper_module}' does not inherit from BaseScraper"
+            ActivityLog.log_error(
+                error_message=error_msg,
+                source="process_paper_with_scraper"
+            )
+            return {"status": "error", "message": error_msg}
+            
+        # Instantiate and use the scraper
+        scraper = cls()
+        result = scraper.scrape(paper.doi)
+        
+        return {
+            "paper_id": paper_id,
+            "status": result.status,
+            "message": result.message,
+            "scraper": scraper_module
+        }
+    
+    except (ImportError, AttributeError) as e:
+        error_msg = f"Failed to load scraper module '{scraper_module}': {str(e)}"
+        ActivityLog.log_error(
+            error_message=error_msg,
+            source="process_paper_with_scraper"
+        )
+        return {"status": "error", "message": error_msg}
+    except Exception as e:
+        error_msg = f"Error processing paper with scraper '{scraper_module}': {str(e)}"
+        ActivityLog.log_error(
+            error_message=error_msg,
+            source="process_paper_with_scraper",
+            exception=e
+        )
+        return {"status": "error", "message": error_msg}
+
+
+@bp.route("/process_single/<int:paper_id>", methods=["POST"])
+def process_single_paper(paper_id):
+    """Process a single paper by ID."""
+    try:
+        # Check if paper exists
+        paper = PaperMetadata.query.get(paper_id)
+        if not paper:
+            return jsonify({
+                "success": False,
+                "message": f"Paper with ID {paper_id} not found"
+            })
+        
+        # Get the scraper module name from the request
+        scraper_module = None
+        if request.is_json and request.json:
+            scraper_module = request.json.get('scraper_module')
+        
+        # Update status to Pending
+        old_status = paper.status
+        paper.status = "Pending"
+        paper.updated_at = datetime.utcnow()
+        db.session.commit()
+        
+        # Log that we're processing this paper
+        ActivityLog.log_scraper_activity(
+            action="manual_process_paper",
+            paper_id=paper_id,
+            status="pending",
+            description=f"Manual processing initiated for paper: {paper.title}" + 
+                       (f" using {scraper_module} scraper" if scraper_module else "")
+        )
+        
+        # Start the task (without delay since it's manual)
+        if scraper_module:
+            task = process_paper_with_scraper.delay(paper_id, scraper_module)
+        else:
+            task = process_paper.delay(paper_id)
+        
+        return jsonify({
+            "success": True,
+            "task_id": task.id,
+            "message": f"Processing paper '{paper.title}' (ID: {paper_id})" + 
+                      (f" using {scraper_module} scraper" if scraper_module else "") +
+                      f". Previous status: {old_status}"
+        })
+        
+    except Exception as e:
+        db.session.rollback()
+        ActivityLog.log_error(
+            error_message=f"Failed to process paper {paper_id}: {str(e)}",
+            exception=e,
+            source="process_single_paper"
+        )
+        return jsonify({
+            "success": False,
+            "message": f"Error: {str(e)}"
+        })
+
+
+@bp.route("/available_scrapers")
+def available_scrapers():
+    """Get list of available scraper modules."""
+    from scipaperloader.scrapers.factory import get_available_scrapers
+    from ..models import ScraperModuleConfig
+    
+    try:
+        scrapers = get_available_scrapers()
+        current_module = ScraperModuleConfig.get_current_module()
+        
+        return jsonify({
+            "success": True,
+            "scrapers": [
+                {
+                    "name": s["name"],
+                    "description": s["description"],
+                    "is_current": s["name"] == current_module
+                } for s in scrapers
+            ],
+            "current": current_module
+        })
+        
+    except Exception as e:
+        ActivityLog.log_error(
+            error_message=f"Failed to get available scrapers: {str(e)}",
+            source="available_scrapers"
+        )
+        return jsonify({
+            "success": False,
+            "message": f"Error: {str(e)}",
+            "scrapers": []
+        })
--- a/scipaperloader/scrapers/dummy.py
+++ b/scipaperloader/scrapers/dummy.py
@ -1,5 +1,6 @@
 import time
 import random
+import os
 from datetime import datetime
 from .base import BaseScraper, ScrapeResult
 from flask import current_app
@ -31,15 +32,47 @@ class Scraper(BaseScraper):
        success = random.random() < 0.8
        
        if success:
-            # Get download path and simulate file creation
+            # Get download path and create an actual dummy file
            download_path = DownloadPathConfig.get_path()
            file_name = f"{doi.replace('/', '_')}.pdf"
            file_path = f"{download_path}/{file_name}"
            
-            # Update paper status
-            paper.status = "Done"
-            paper.file_path = file_path
-            paper.error_msg = None
+            # Create directory if it doesn't exist
+            os.makedirs(download_path, exist_ok=True)
+            
+            # Create a simple dummy PDF file
+            try:
+                with open(file_path, 'w') as f:
+                    f.write(f"Dummy PDF file for paper with DOI: {doi}\n")
+                    f.write(f"Title: {paper.title}\n")
+                    f.write(f"Journal: {paper.journal}\n")
+                    f.write(f"Generated: {datetime.utcnow().isoformat()}\n")
+                    f.write("\nThis is a dummy file created by the SciPaperLoader dummy scraper.\n")
+                
+                # Update paper status
+                paper.status = "Done"
+                paper.file_path = file_path
+                paper.error_msg = None
+            except Exception as e:
+                # Handle file creation errors
+                error_msg = f"Failed to create dummy file: {str(e)}"
+                paper.status = "Failed"
+                paper.error_msg = error_msg
+                
+                ActivityLog.log_scraper_activity(
+                    action="dummy_scrape_file_error",
+                    status="error",
+                    description=error_msg,
+                    paper_id=paper.id
+                )
+                
+                return ScrapeResult(
+                    status="error",
+                    message=error_msg,
+                    data={"error_code": "file_creation_error"},
+                    duration=time.time() - start_time,
+                    timestamp=datetime.utcnow()
+                )
            
            # Log success
            ActivityLog.log_scraper_activity(
--- a/scipaperloader/templates/config/general.html.jinja
+++ b/scipaperloader/templates/config/general.html.jinja
@ -90,6 +90,31 @@
                        </form>
                    </div>
                </div>
+
+                <!-- Database Management Section -->
+                <div class="row mt-4">
+                    <div class="col-12">
+                        <div class="card border-danger">
+                            <div class="card-header bg-danger text-white">
+                                <h5>Database Management</h5>
+                            </div>
+                            <div class="card-body">
+                                <div class="form-section">
+                                    <h6>Delete All Papers</h6>
+                                    <p class="text-muted">This action will permanently delete all paper records from the
+                                        database. This cannot be undone.</p>
+
+                                    <form method="post" action="{{ url_for('config.delete_all_papers') }}" class="mt-3"
+                                        onsubmit="return confirm('WARNING: You are about to delete ALL papers from the database. This action cannot be undone. Are you sure you want to proceed?');">
+                                        <button type="submit" class="btn btn-danger">
+                                            <i class="fas fa-trash-alt"></i> Delete All Papers
+                                        </button>
+                                    </form>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
            </div>
        </div>
    </div>
--- a/scipaperloader/templates/scraper.html.jinja
+++ b/scipaperloader/templates/scraper.html.jinja
@ -36,6 +36,28 @@
        max-width: 350px;
        z-index: 1050;
    }
+
+    .search-results-container {
+        max-height: 300px;
+        overflow-y: auto;
+    }
+
+    /* Paper status badges */
+    .badge-new {
+        background-color: #17a2b8;
+    }
+
+    .badge-pending {
+        background-color: #ffc107;
+    }
+
+    .badge-done {
+        background-color: #28a745;
+    }
+
+    .badge-failed {
+        background-color: #dc3545;
+    }
 </style>
 {% endblock styles %}

@ -89,6 +111,61 @@
        </div>
    </div>

+    <!-- New row for single paper processing -->
+    <div class="row mb-4">
+        <div class="col-12">
+            <div class="card">
+                <div class="card-header">
+                    <h5>Process Single Paper</h5>
+                </div>
+                <div class="card-body">
+                    <div class="row">
+                        <div class="col-md-6">
+                            <form id="searchPaperForm" class="mb-3">
+                                <div class="input-group">
+                                    <input type="text" id="paperSearchInput" class="form-control"
+                                        placeholder="Search paper by title, DOI, or ID...">
+                                    <button class="btn btn-outline-secondary" type="submit">Search</button>
+                                </div>
+                            </form>
+                        </div>
+                        <div class="col-md-6">
+                            <div class="form-group">
+                                <label for="scraperSelect">Scraper Module:</label>
+                                <select class="form-control" id="scraperSelect">
+                                    <option value="">Use default system scraper</option>
+                                    <!-- Available scrapers will be populated here -->
+                                </select>
+                                <div class="form-text">
+                                    Select which scraper to use for processing the paper
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <div id="searchResults" class="mt-3 search-results-container d-none">
+                        <table class="table table-hover table-striped">
+                            <thead>
+                                <tr>
+                                    <th>ID</th>
+                                    <th>Title</th>
+                                    <th>DOI</th>
+                                    <th>Status</th>
+                                    <th>Actions</th>
+                                </tr>
+                            </thead>
+                            <tbody id="paperSearchResults">
+                                <!-- Search results will be populated here -->
+                            </tbody>
+                        </table>
+                    </div>
+
+                    <div id="processingStatus" class="alert alert-info mt-3 d-none"></div>
+                </div>
+            </div>
+        </div>
+    </div>
+
    <div class="row mb-4">
        <div class="col-12">
            <div class="card">
@ -164,12 +241,19 @@
    const resetButton = document.getElementById('resetButton');
    const notificationsToggle = document.getElementById('notificationsToggle');
    const activityLog = document.getElementById('activityLog');
+    const searchForm = document.getElementById('searchPaperForm');
+    const searchInput = document.getElementById('paperSearchInput');
+    const searchResults = document.getElementById('searchResults');
+    const processingStatus = document.getElementById('processingStatus');
+    const paperSearchResults = document.getElementById('paperSearchResults');
+    const scraperSelect = document.getElementById('scraperSelect');

    // Initialize the page
    document.addEventListener('DOMContentLoaded', function () {
        initStatusPolling();
        loadActivityStats(currentTimeRange);
        loadRecentActivity();
+        loadAvailableScrapers();

        // Initialize event listeners
        startButton.addEventListener('click', startScraper);
@ -177,6 +261,10 @@
        stopButton.addEventListener('click', stopScraper);
        resetButton.addEventListener('click', resetScraper);
        notificationsToggle.addEventListener('click', toggleNotifications);
+        searchForm.addEventListener('submit', function (e) {
+            e.preventDefault();
+            searchPapers();
+        });

        document.getElementById('volumeForm').addEventListener('submit', function (e) {
            e.preventDefault();
@ -193,6 +281,185 @@
        });
    });

+    // Load available scraper modules
+    function loadAvailableScrapers() {
+        fetch('/scraper/available_scrapers')
+            .then(response => response.json())
+            .then(data => {
+                if (data.success && data.scrapers && data.scrapers.length > 0) {
+                    // Clear previous options except the default one
+                    while (scraperSelect.options.length > 1) {
+                        scraperSelect.remove(1);
+                    }
+
+                    // Add each scraper as an option
+                    data.scrapers.forEach(scraper => {
+                        const option = document.createElement('option');
+                        option.value = scraper.name;
+                        option.textContent = `${scraper.name} - ${scraper.description.substring(0, 50)}${scraper.description.length > 50 ? '...' : ''}`;
+                        if (scraper.is_current) {
+                            option.textContent += ' (system default)';
+                        }
+                        scraperSelect.appendChild(option);
+                    });
+                } else {
+                    // If no scrapers or error, add a note
+                    const option = document.createElement('option');
+                    option.disabled = true;
+                    option.textContent = 'No scrapers available';
+                    scraperSelect.appendChild(option);
+                }
+            })
+            .catch(error => {
+                console.error('Error loading scrapers:', error);
+                const option = document.createElement('option');
+                option.disabled = true;
+                option.textContent = 'Error loading scrapers';
+                scraperSelect.appendChild(option);
+            });
+    }
+
+    // Search papers function
+    function searchPapers() {
+        const query = searchInput.value.trim();
+
+        if (!query) {
+            showFlashMessage('Please enter a search term', 'warning');
+            return;
+        }
+
+        // Show loading message
+        paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Searching papers...</td></tr>';
+        searchResults.classList.remove('d-none');
+
+        // Fetch papers from API
+        fetch(`/api/papers?query=${encodeURIComponent(query)}`)
+            .then(response => response.json())
+            .then(data => {
+                if (!data.papers || data.papers.length === 0) {
+                    paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">No papers found matching your search</td></tr>';
+                    return;
+                }
+
+                paperSearchResults.innerHTML = '';
+
+                data.papers.forEach(paper => {
+                    const row = document.createElement('tr');
+
+                    // Create status badge
+                    let statusBadge = '';
+                    if (paper.status === 'New') {
+                        statusBadge = '<span class="badge bg-info">New</span>';
+                    } else if (paper.status === 'Pending') {
+                        statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
+                    } else if (paper.status === 'Done') {
+                        statusBadge = '<span class="badge bg-success">Done</span>';
+                    } else if (paper.status === 'Failed') {
+                        statusBadge = '<span class="badge bg-danger">Failed</span>';
+                    } else {
+                        statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
+                    }
+
+                    // Create process button (enabled only for papers not in 'Pending' status)
+                    const processButtonDisabled = paper.status === 'Pending' ? 'disabled' : '';
+
+                    // Truncate title if too long
+                    const truncatedTitle = paper.title.length > 70 ? paper.title.substring(0, 70) + '...' : paper.title;
+
+                    row.innerHTML = `
+                        <td>${paper.id}</td>
+                        <td title="${paper.title}">${truncatedTitle}</td>
+                        <td>${paper.doi || 'N/A'}</td>
+                        <td>${statusBadge}</td>
+                        <td>
+                            <button class="btn btn-sm btn-primary process-paper-btn" 
+                                data-paper-id="${paper.id}" 
+                                ${processButtonDisabled}>
+                                Process Now
+                            </button>
+                        </td>
+                    `;
+
+                    paperSearchResults.appendChild(row);
+                });
+
+                // Add event listeners to the process buttons
+                document.querySelectorAll('.process-paper-btn').forEach(btn => {
+                    btn.addEventListener('click', function () {
+                        processSinglePaper(this.getAttribute('data-paper-id'));
+                    });
+                });
+            })
+            .catch(error => {
+                console.error('Error searching papers:', error);
+                paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Error searching papers</td></tr>';
+            });
+    }
+
+    // Process a single paper
+    function processSinglePaper(paperId) {
+        // Disable all process buttons to prevent multiple clicks
+        document.querySelectorAll('.process-paper-btn').forEach(btn => {
+            btn.disabled = true;
+        });
+
+        // Show processing status
+        processingStatus.textContent = 'Processing paper...';
+        processingStatus.classList.remove('d-none');
+
+        // Get selected scraper
+        const selectedScraper = scraperSelect.value;
+
+        // Send request to process the paper
+        fetch(`/scraper/process_single/${paperId}`, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({
+                scraper_module: selectedScraper
+            })
+        })
+            .then(response => response.json())
+            .then(data => {
+                if (data.success) {
+                    processingStatus.textContent = data.message;
+                    processingStatus.className = 'alert alert-success mt-3';
+
+                    // Update status in the search results
+                    const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
+                    const statusCell = row.querySelector('td:nth-child(4)');
+                    statusCell.innerHTML = '<span class="badge bg-warning text-dark">Pending</span>';
+
+                    // Show notification
+                    showFlashMessage(data.message, 'success');
+                    
+                    // Set up polling to check paper status and refresh activity
+                    pollPaperStatus(paperId, 3000, 20);
+                } else {
+                    processingStatus.textContent = data.message;
+                    processingStatus.className = 'alert alert-danger mt-3';
+                    showFlashMessage(data.message, 'error');
+                }
+            })
+            .catch(error => {
+                console.error('Error processing paper:', error);
+                processingStatus.textContent = 'Error: Could not process paper';
+                processingStatus.className = 'alert alert-danger mt-3';
+                showFlashMessage('Error processing paper', 'error');
+            })
+            .finally(() => {
+                // Re-enable the process buttons after a short delay
+                setTimeout(() => {
+                    document.querySelectorAll('.process-paper-btn').forEach(btn => {
+                        if (btn.getAttribute('data-paper-id') !== paperId) {
+                            btn.disabled = false;
+                        }
+                    });
+                }, 1000);
+            });
+    }
+
    // Status polling
    function initStatusPolling() {
        updateStatus();
@ -285,39 +552,39 @@
        if (confirm("Are you sure you want to reset the scraper? This will stop all current tasks, optionally clear non-pending papers, and restart the scraper.")) {
            // Disable button to prevent multiple clicks
            resetButton.disabled = true;
-            
+
            // Show a loading message
            showFlashMessage('Resetting scraper, please wait...', 'info');
-            
-            fetch('/scraper/reset', { 
+
+            fetch('/scraper/reset', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
-                body: JSON.stringify({ 
+                body: JSON.stringify({
                    clear_papers: true  // You could make this configurable with a checkbox
                })
            })
-            .then(response => response.json())
-            .then(data => {
-                if (data.success) {
-                    showFlashMessage('Scraper has been completely reset and restarted', 'success');
-                    // Update everything
-                    updateStatus();
-                    loadActivityStats(currentTimeRange);
-                    setTimeout(() => { loadRecentActivity(); }, 1000);
-                } else {
-                    showFlashMessage(data.message || 'Error resetting scraper', 'error');
-                }
-                // Re-enable button
-                resetButton.disabled = false;
-            })
-            .catch(error => {
-                console.error("Error resetting scraper:", error);
-                showFlashMessage('Error resetting scraper: ' + error.message, 'error');
-                // Re-enable button
-                resetButton.disabled = false;
-            });
+                .then(response => response.json())
+                .then(data => {
+                    if (data.success) {
+                        showFlashMessage('Scraper has been completely reset and restarted', 'success');
+                        // Update everything
+                        updateStatus();
+                        loadActivityStats(currentTimeRange);
+                        setTimeout(() => { loadRecentActivity(); }, 1000);
+                    } else {
+                        showFlashMessage(data.message || 'Error resetting scraper', 'error');
+                    }
+                    // Re-enable button
+                    resetButton.disabled = false;
+                })
+                .catch(error => {
+                    console.error("Error resetting scraper:", error);
+                    showFlashMessage('Error resetting scraper: ' + error.message, 'error');
+                    // Re-enable button
+                    resetButton.disabled = false;
+                });
        }
    }

@ -345,6 +612,97 @@
        notificationsEnabled = notificationsToggle.checked;
    }

+    // Poll paper status until it changes from Pending
+    function pollPaperStatus(paperId, interval = 3000, maxAttempts = 20) {
+        let attempts = 0;
+        
+        // Immediately refresh activity log to show the initial pending status
+        loadRecentActivity();
+        
+        const checkStatus = () => {
+            attempts++;
+            console.log(`Checking status of paper ${paperId}, attempt ${attempts}/${maxAttempts}`);
+            
+            // Fetch the current paper status
+            fetch(`/api/papers/${paperId}`)
+                .then(response => response.json())
+                .then(data => {
+                    if (data && data.paper) {
+                        const paper = data.paper;
+                        console.log(`Paper status: ${paper.status}`);
+                        
+                        // Update the UI with the current status
+                        const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
+                        if (row) {
+                            const statusCell = row.querySelector('td:nth-child(4)');
+                            let statusBadge = '';
+                            
+                            if (paper.status === 'New') {
+                                statusBadge = '<span class="badge bg-info">New</span>';
+                            } else if (paper.status === 'Pending') {
+                                statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
+                            } else if (paper.status === 'Done') {
+                                statusBadge = '<span class="badge bg-success">Done</span>';
+                            } else if (paper.status === 'Failed') {
+                                statusBadge = '<span class="badge bg-danger">Failed</span>';
+                            } else {
+                                statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
+                            }
+                            
+                            statusCell.innerHTML = statusBadge;
+                            
+                            // Update processing status message if status changed
+                            if (paper.status !== 'Pending') {
+                                if (paper.status === 'Done') {
+                                    processingStatus.textContent = `Paper processed successfully: ${paper.title}`;
+                                    processingStatus.className = 'alert alert-success mt-3';
+                                } else if (paper.status === 'Failed') {
+                                    processingStatus.textContent = `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
+                                    processingStatus.className = 'alert alert-danger mt-3';
+                                }
+                            }
+                        }
+                        
+                        // Always refresh activity log
+                        loadRecentActivity();
+                        
+                        // If status is still pending and we haven't reached max attempts, check again
+                        if (paper.status === 'Pending' && attempts < maxAttempts) {
+                            setTimeout(checkStatus, interval);
+                        } else {
+                            // If status changed or we reached max attempts, refresh chart data too
+                            loadActivityStats(currentTimeRange);
+                            
+                            // Show notification if status changed
+                            if (paper.status !== 'Pending') {
+                                const status = paper.status === 'Done' ? 'success' : 'error';
+                                const message = paper.status === 'Done' 
+                                    ? `Paper processed successfully: ${paper.title}`
+                                    : `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
+                                showFlashMessage(message, status);
+                            }
+                            
+                            // If we hit max attempts but status is still pending, show a message
+                            if (paper.status === 'Pending' && attempts >= maxAttempts) {
+                                processingStatus.textContent = 'Paper is still being processed. Check the activity log for updates.';
+                                processingStatus.className = 'alert alert-info mt-3';
+                            }
+                        }
+                    }
+                })
+                .catch(error => {
+                    console.error(`Error polling paper status: ${error}`);
+                    // If there's an error, we can still try again if under max attempts
+                    if (attempts < maxAttempts) {
+                        setTimeout(checkStatus, interval);
+                    }
+                });
+        };
+        
+        // Start checking
+        setTimeout(checkStatus, interval);
+    }
+
    // Load data functions
    function loadActivityStats(hours) {
        fetch(`/scraper/stats?hours=${hours}`)
@ -359,8 +717,10 @@
            .then(response => response.json())
            .then(data => {
                renderActivityLog(data);
+                console.log("Activity log refreshed with latest data");
            })
-            .catch(() => {
+            .catch((error) => {
+                console.error("Failed to load activity logs:", error);
                // If the API endpoint doesn't exist, just show a message
                activityLog.innerHTML = '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>';
            });
@ -467,6 +827,26 @@
        });
    }

+    // Flash message function
+    function showFlashMessage(message, type) {
+        const flashContainer = document.createElement('div');
+        flashContainer.className = `alert alert-${type === 'error' ? 'danger' : type} alert-dismissible fade show notification`;
+        flashContainer.innerHTML = `
+            ${message}
+            <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
+        `;
+
+        document.body.appendChild(flashContainer);
+
+        // Auto dismiss after 5 seconds
+        setTimeout(() => {
+            flashContainer.classList.remove('show');
+            setTimeout(() => {
+                flashContainer.remove();
+            }, 150); // Remove after fade out animation
+        }, 5000);
+    }
+
    // WebSocket for real-time notifications
    function setupWebSocket() {
        // If WebSocket is available, implement it here