Compare commits

...

3 Commits

Author SHA1 Message Date
Michael Beck
987c76969b adds path config checks in config and dummy scraper 2025-05-23 17:21:41 +02:00
Michael Beck
012163ba3f fixes dummy and single paper processing 2025-05-23 16:13:25 +02:00
Michael Beck
8f2375215d modularizes the scraper methods 2025-05-23 14:32:41 +02:00
11 changed files with 1179 additions and 251 deletions

View File

@ -1,6 +1,7 @@
from datetime import datetime from datetime import datetime
from flask import Blueprint, jsonify, request from flask import Blueprint, jsonify, request
from ..models import ActivityLog, ActivityCategory from ..models import ActivityLog, ActivityCategory, PaperMetadata
from .. import db
bp = Blueprint("api", __name__, url_prefix="/api") bp = Blueprint("api", __name__, url_prefix="/api")
@ -47,4 +48,91 @@ def get_activity_logs():
} }
result.append(log_data) result.append(log_data)
return jsonify(result) return jsonify(result)
@bp.route("/papers")
def search_papers():
"""
Search for papers by title, DOI, or ID.
Query parameters:
- query: Search term (required)
- limit: Maximum number of results (default: 10)
"""
query = request.args.get('query', '')
limit = int(request.args.get('limit', 10))
if not query:
return jsonify({
"success": False,
"message": "Search query is required",
"papers": []
})
# Try to parse query as an ID first
try:
paper_id = int(query)
paper_by_id = PaperMetadata.query.get(paper_id)
if paper_by_id:
return jsonify({
"success": True,
"papers": [{
"id": paper_by_id.id,
"title": paper_by_id.title,
"doi": paper_by_id.doi,
"journal": paper_by_id.journal,
"status": paper_by_id.status,
"created_at": paper_by_id.created_at.isoformat() if paper_by_id.created_at else None,
"updated_at": paper_by_id.updated_at.isoformat() if paper_by_id.updated_at else None
}]
})
except ValueError:
pass # Not an ID, continue with text search
# Search in title and DOI
search_term = f"%{query}%"
papers = PaperMetadata.query.filter(
db.or_(
PaperMetadata.title.ilike(search_term),
PaperMetadata.doi.ilike(search_term)
)
).limit(limit).all()
return jsonify({
"success": True,
"papers": [{
"id": paper.id,
"title": paper.title,
"doi": paper.doi,
"journal": paper.journal,
"status": paper.status,
"created_at": paper.created_at.isoformat() if paper.created_at else None,
"updated_at": paper.updated_at.isoformat() if paper.updated_at else None
} for paper in papers]
})
@bp.route("/papers/<int:paper_id>")
def get_paper(paper_id):
"""Get details of a single paper by ID."""
paper = PaperMetadata.query.get(paper_id)
if not paper:
return jsonify({
"success": False,
"message": f"Paper with ID {paper_id} not found"
})
return jsonify({
"success": True,
"paper": {
"id": paper.id,
"title": paper.title,
"doi": paper.doi,
"journal": paper.journal,
"status": paper.status,
"error_msg": paper.error_msg,
"file_path": paper.file_path,
"created_at": paper.created_at.isoformat() if paper.created_at else None,
"updated_at": paper.updated_at.isoformat() if paper.updated_at else None
}
})

View File

@ -1,10 +1,11 @@
"""Configuration management blueprint.""" """Configuration management blueprint."""
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
from ..db import db from ..db import db
# Import the new model # Import the new model
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
from ..defaults import MAX_VOLUME from ..defaults import MAX_VOLUME
import os # Import os for path validation import os # Import os for path validation
from scipaperloader.scrapers import __path__ as scrapers_path
bp = Blueprint("config", __name__, url_prefix="/config") bp = Blueprint("config", __name__, url_prefix="/config")
@ -69,25 +70,31 @@ def _update_download_path(new_path):
# Try to create it if it doesn't exist # Try to create it if it doesn't exist
try: try:
os.makedirs(new_path, exist_ok=True) os.makedirs(new_path, exist_ok=True)
ActivityLog.log_system_activity( ActivityLog.log_scraper_activity(
action="create_directory", action="create_directory",
status="info", status="info",
description=f"Created download directory: {new_path}" description=f"Created download directory: {new_path}"
) )
except OSError as e: except OSError as e:
ActivityLog.log_system_activity( ActivityLog.log_error(
action="create_directory", error_message=f"Failed to create download directory: {new_path}, Error: {str(e)}",
status="error", source="update_download_path"
description=f"Failed to create download directory: {new_path}, Error: {str(e)}"
) )
return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None return False, f"Path '{new_path}' is not a valid directory and could not be created: {e}", None
# Check if the path is readable
if not os.access(new_path, os.R_OK):
ActivityLog.log_error(
error_message=f"Download path '{new_path}' is not readable.",
source="check_directory_permissions"
)
return False, f"Path '{new_path}' exists but is not readable by the application.", None
# Check if the path is writable # Check if the path is writable
if not os.access(new_path, os.W_OK): if not os.access(new_path, os.W_OK):
ActivityLog.log_system_activity( ActivityLog.log_error(
action="check_directory_permissions", error_message=f"Download path '{new_path}' is not writable.",
status="error", source="check_directory_permissions"
description=f"Download path '{new_path}' is not writable."
) )
return False, f"Path '{new_path}' exists but is not writable by the application.", None return False, f"Path '{new_path}' exists but is not writable by the application.", None
# --- End of validation --- # --- End of validation ---
@ -281,6 +288,46 @@ def update_schedule():
return redirect(url_for("config.schedule")) return redirect(url_for("config.schedule"))
@bp.route("/update/scraper_module", methods=["POST"])
def update_scraper_module():
"""Update the scraper module configuration."""
from ..models import ScraperModuleConfig
new_scraper_module = request.form.get("scraper_module")
if not new_scraper_module:
flash("Scraper module cannot be empty.", "error")
return redirect(url_for("config.general"))
# Validate that the module exists and is valid
from scipaperloader.scrapers.factory import get_available_scrapers
available_modules = [m["name"] for m in get_available_scrapers()]
if new_scraper_module not in available_modules:
flash(f"Invalid scraper module: {new_scraper_module}", "error")
return redirect(url_for("config.general"))
# Update the database configuration
ScraperModuleConfig.set_module(new_scraper_module)
flash(f"Scraper module updated to '{new_scraper_module}'.", "success")
return redirect(url_for("config.general"))
@bp.context_processor
def inject_scraper_modules():
"""Inject available scraper modules into the template context."""
from scipaperloader.scrapers.factory import get_available_scrapers
from ..models import ScraperModuleConfig
available_scrapers = get_available_scrapers()
current_module = ScraperModuleConfig.get_current_module()
return {
"available_scraper_modules": [s["name"] for s in available_scrapers],
"current_scraper_module": current_module,
"scraper_details": {s["name"]: s for s in available_scrapers}
}
@bp.route("/api/schedule/stats") @bp.route("/api/schedule/stats")
def schedule_stats(): def schedule_stats():
"""Get statistics about the current schedule configuration.""" """Get statistics about the current schedule configuration."""
@ -361,4 +408,36 @@ def api_update_config():
return jsonify({ return jsonify({
"success": False, "success": False,
"message": f"Unexpected error: {str(e)}" "message": f"Unexpected error: {str(e)}"
}) })
@bp.route("/delete_all_papers", methods=["POST"])
def delete_all_papers():
"""Delete all paper records from the database."""
try:
# Count papers before deletion for logging purposes
paper_count = PaperMetadata.query.count()
# Delete all records from the PaperMetadata table
PaperMetadata.query.delete()
db.session.commit()
# Log the action
ActivityLog.log_config_change(
config_key="database",
old_value=f"{paper_count} papers",
new_value="0 papers",
description=f"Deleted all {paper_count} papers from the database"
)
flash(f"Successfully deleted all {paper_count} papers from the database.", "success")
except Exception as e:
db.session.rollback()
flash(f"Failed to delete papers: {str(e)}", "error")
ActivityLog.log_error(
error_message=f"Failed to delete all papers: {str(e)}",
exception=e,
source="config.delete_all_papers"
)
return redirect(url_for("config.general"))

View File

@ -6,12 +6,13 @@ import os # Import os for path joining
from datetime import datetime, timedelta from datetime import datetime, timedelta
from flask import Blueprint, jsonify, render_template, request, current_app, flash from flask import Blueprint, jsonify, render_template, request, current_app, flash
# Import the new model # Import the new model
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig, ScraperModuleConfig
from ..db import db from ..db import db
from ..celery import celery from ..celery import celery
from ..defaults import MAX_VOLUME from ..defaults import MAX_VOLUME
from celery.schedules import crontab from celery.schedules import crontab
from sqlalchemy import func from sqlalchemy import func
from scipaperloader.scrapers.factory import get_scraper, get_available_scrapers
bp = Blueprint("scraper", __name__, url_prefix="/scraper") bp = Blueprint("scraper", __name__, url_prefix="/scraper")
@ -153,7 +154,7 @@ def stop_scraper():
# Stop any running tasks # Stop any running tasks
task_types_to_revoke = [ task_types_to_revoke = [
'scipaperloader.blueprints.scraper.dummy_process_paper', 'scipaperloader.blueprints.scraper.process_paper',
'scipaperloader.blueprints.scraper.dummy_scheduled_scraper', 'scipaperloader.blueprints.scraper.dummy_scheduled_scraper',
'scipaperloader.blueprints.scraper.run_periodic_dummy_scraper' 'scipaperloader.blueprints.scraper.run_periodic_dummy_scraper'
] ]
@ -224,7 +225,7 @@ def pause_scraper():
# Just revoke processing tasks, but leave the periodic tasks running # Just revoke processing tasks, but leave the periodic tasks running
# so it can continue to check the state (which is now paused) # so it can continue to check the state (which is now paused)
task_types_to_revoke = [ task_types_to_revoke = [
'scipaperloader.blueprints.scraper.dummy_process_paper', 'scipaperloader.blueprints.scraper.process_paper',
'scipaperloader.blueprints.scraper.dummy_scheduled_scraper' 'scipaperloader.blueprints.scraper.dummy_scheduled_scraper'
] ]
@ -373,70 +374,7 @@ def update_config():
return jsonify({"success": False, "message": f"Unexpected error: {str(e)}"}) return jsonify({"success": False, "message": f"Unexpected error: {str(e)}"})
@celery.task(bind=True)
def dummy_scrape_paper(self):
"""Simulate scraping a single paper."""
# Simulate success or failure
success = random.random() > 0.3 # 70% success rate
# Simulate processing time
import time
time.sleep(random.randint(2, 5)) # 2-5 seconds
if success:
# Create a dummy paper
new_paper = PaperMetadata(
title=f"Dummy Paper {random.randint(1000, 9999)}",
doi=f"10.1234/dummy.{random.randint(1000, 9999)}",
journal=random.choice([
"Nature", "Science", "PLOS ONE", "Journal of Dummy Research",
"Proceedings of the Dummy Society", "Cell", "Dummy Review Letters"
]),
type="article",
language="en",
published_online=datetime.now().date(),
status="Done",
file_path="/path/to/dummy/paper.pdf"
)
db.session.add(new_paper)
db.session.commit()
# Log the successful scrape
ActivityLog.log_scraper_activity(
action="scrape_paper",
paper_id=new_paper.id,
status="success",
description=f"Successfully scraped paper {new_paper.doi}"
)
return {
"success": True,
"paper_id": new_paper.id,
"title": new_paper.title,
"doi": new_paper.doi
}
else:
# Log the failed scrape
error_message = random.choice([
"Connection timeout",
"404 Not Found",
"Access denied",
"Invalid DOI format",
"PDF download failed",
"Rate limited by publisher"
])
ActivityLog.log_scraper_activity(
action="scrape_paper",
status="error",
description=f"Failed to scrape paper: {error_message}"
)
return {
"success": False,
"error": error_message
}
@celery.task @celery.task
@ -545,11 +483,11 @@ def dummy_scheduled_scraper():
) )
# --- Now schedule processing for the newly selected "Pending" papers --- # --- Now schedule processing for the newly selected "Pending" papers ---
# (Assuming dummy_process_paper takes a paper_id) # (Using the new modular process_paper task)
# Add random delays for processing within the hour (e.g., up to 3600 seconds) # Add random delays for processing within the hour (e.g., up to 3600 seconds)
for paper_id in selected_paper_ids: for paper_id in selected_paper_ids:
delay = random.uniform(1, 3500) # Random delay up to ~58 minutes delay = random.uniform(1, 3500) # Random delay up to ~58 minutes
dummy_process_paper.apply_async(args=[paper_id], countdown=delay) process_paper.apply_async(args=[paper_id], countdown=delay)
ActivityLog.log_scraper_activity( ActivityLog.log_scraper_activity(
action="schedule_processing", action="schedule_processing",
@ -568,109 +506,6 @@ def dummy_scheduled_scraper():
return False return False
@celery.task(bind=True)
def dummy_process_paper(self, paper_id):
"""
Process a single paper for the dummy scraper.
Args:
paper_id (int): ID of the paper to process
"""
# First check if the scraper is still active and not paused
scraper_state = ScraperState.get_current_state()
if not scraper_state.is_active or scraper_state.is_paused:
# Log that task was skipped due to scraper being stopped or paused
ActivityLog.log_scraper_activity(
action="process_paper",
status="info",
description=f"Skipped processing paper ID {paper_id} because scraper is {'paused' if scraper_state.is_paused else 'stopped'}"
)
return False
# Get the paper from database
paper = PaperMetadata.query.get(paper_id)
if not paper:
# Log error if paper not found
ActivityLog.log_scraper_activity(
action="process_paper",
status="error",
description=f"Paper with ID {paper_id} not found"
)
return False
# Simulate random success/failure (70% success rate)
success = random.random() < 0.7
# Simulate processing time (1-5 seconds)
process_time = random.uniform(1, 5)
time.sleep(process_time)
# Check again if scraper is still active and not paused after the time delay
# This ensures we don't process papers if the scraper was stopped during the delay
scraper_state = ScraperState.get_current_state()
if not scraper_state.is_active or scraper_state.is_paused:
ActivityLog.log_scraper_activity(
action="process_paper",
status="info",
description=f"Cancelled processing paper ID {paper_id} because scraper is {'paused' if scraper_state.is_paused else 'stopped'}"
)
return False
if success:
# --- Get configured download path ---
download_base_path = DownloadPathConfig.get_path()
# Ensure the base path exists (optional, but good practice)
# os.makedirs(download_base_path, exist_ok=True)
# --- Construct the file path ---
# Sanitize DOI for use in filename
safe_doi = paper.doi.replace('/', '_').replace(':', '_')
filename = f"{safe_doi}.pdf"
full_path = os.path.join(download_base_path, filename)
# Update paper status to "Done" and set the file path
paper.status = "Done"
paper.file_path = full_path # Use the constructed path
# Log success
ActivityLog.log_scraper_activity(
action="process_paper",
paper_id=paper.id,
status="success",
description=f"Successfully processed paper: {paper.doi}. File at: {full_path}" # Log path
)
else:
# Update paper status to "Failed"
paper.status = "Failed"
# Generate random error message
error_message = random.choice([
"Publisher website unavailable",
"No PDF download link found",
"Access restricted",
"Download timeout",
"Invalid DOI",
"Rate limited by publisher"
])
paper.error_msg = error_message
# Log failure
ActivityLog.log_scraper_activity(
action="process_paper",
paper_id=paper.id,
status="error",
description=f"Failed to process paper: {error_message}"
)
# Update the timestamp
paper.updated_at = datetime.utcnow()
# Commit changes to database
db.session.commit()
return success
@celery.task(bind=True) @celery.task(bind=True)
def process_paper_batch(self, paper_ids): def process_paper_batch(self, paper_ids):
""" """
@ -914,3 +749,168 @@ def calculate_papers_for_current_hour():
) )
return papers_this_hour return papers_this_hour
@celery.task(bind=True)
def process_paper(self, paper_id):
"""Process a paper using the configured scraper."""
from scipaperloader.models import PaperMetadata
paper = PaperMetadata.query.get(paper_id)
if not paper:
return {"status": "error", "message": f"Paper with ID {paper_id} not found"}
scraper = get_scraper()
result = scraper.scrape(paper.doi)
return {
"paper_id": paper_id,
"status": result.status,
"message": result.message
}
@celery.task(bind=True)
@celery.task(bind=True)
def process_paper_with_scraper(self, paper_id, scraper_module):
"""Process a paper using a specific scraper module."""
from scipaperloader.models import PaperMetadata
import importlib
from ..scrapers.base import BaseScraper
paper = PaperMetadata.query.get(paper_id)
if not paper:
return {"status": "error", "message": f"Paper with ID {paper_id} not found"}
try:
# Import the specified scraper module
module = importlib.import_module(f"scipaperloader.scrapers.{scraper_module}")
cls = getattr(module, "Scraper")
# Validate that it's a BaseScraper
if not issubclass(cls, BaseScraper):
error_msg = f"Scraper class in module '{scraper_module}' does not inherit from BaseScraper"
ActivityLog.log_error(
error_message=error_msg,
source="process_paper_with_scraper"
)
return {"status": "error", "message": error_msg}
# Instantiate and use the scraper
scraper = cls()
result = scraper.scrape(paper.doi)
return {
"paper_id": paper_id,
"status": result.status,
"message": result.message,
"scraper": scraper_module
}
except (ImportError, AttributeError) as e:
error_msg = f"Failed to load scraper module '{scraper_module}': {str(e)}"
ActivityLog.log_error(
error_message=error_msg,
source="process_paper_with_scraper"
)
return {"status": "error", "message": error_msg}
except Exception as e:
error_msg = f"Error processing paper with scraper '{scraper_module}': {str(e)}"
ActivityLog.log_error(
error_message=error_msg,
source="process_paper_with_scraper",
exception=e
)
return {"status": "error", "message": error_msg}
@bp.route("/process_single/<int:paper_id>", methods=["POST"])
def process_single_paper(paper_id):
"""Process a single paper by ID."""
try:
# Check if paper exists
paper = PaperMetadata.query.get(paper_id)
if not paper:
return jsonify({
"success": False,
"message": f"Paper with ID {paper_id} not found"
})
# Get the scraper module name from the request
scraper_module = None
if request.is_json and request.json:
scraper_module = request.json.get('scraper_module')
# Update status to Pending
old_status = paper.status
paper.status = "Pending"
paper.updated_at = datetime.utcnow()
db.session.commit()
# Log that we're processing this paper
ActivityLog.log_scraper_activity(
action="manual_process_paper",
paper_id=paper_id,
status="pending",
description=f"Manual processing initiated for paper: {paper.title}" +
(f" using {scraper_module} scraper" if scraper_module else "")
)
# Start the task (without delay since it's manual)
if scraper_module:
task = process_paper_with_scraper.delay(paper_id, scraper_module)
else:
task = process_paper.delay(paper_id)
return jsonify({
"success": True,
"task_id": task.id,
"message": f"Processing paper '{paper.title}' (ID: {paper_id})" +
(f" using {scraper_module} scraper" if scraper_module else "") +
f". Previous status: {old_status}"
})
except Exception as e:
db.session.rollback()
ActivityLog.log_error(
error_message=f"Failed to process paper {paper_id}: {str(e)}",
exception=e,
source="process_single_paper"
)
return jsonify({
"success": False,
"message": f"Error: {str(e)}"
})
@bp.route("/available_scrapers")
def available_scrapers():
"""Get list of available scraper modules."""
from scipaperloader.scrapers.factory import get_available_scrapers
from ..models import ScraperModuleConfig
try:
scrapers = get_available_scrapers()
current_module = ScraperModuleConfig.get_current_module()
return jsonify({
"success": True,
"scrapers": [
{
"name": s["name"],
"description": s["description"],
"is_current": s["name"] == current_module
} for s in scrapers
],
"current": current_module
})
except Exception as e:
ActivityLog.log_error(
error_message=f"Failed to get available scrapers: {str(e)}",
source="available_scrapers"
)
return jsonify({
"success": False,
"message": f"Error: {str(e)}",
"scrapers": []
})

View File

@ -6,3 +6,4 @@ class Config:
SQLALCHEMY_DATABASE_URI = os.environ.get("DATABASE_URL", "sqlite:///papers.db") SQLALCHEMY_DATABASE_URI = os.environ.get("DATABASE_URL", "sqlite:///papers.db")
SQLALCHEMY_TRACK_MODIFICATIONS = False SQLALCHEMY_TRACK_MODIFICATIONS = False
APP_TITLE = os.environ.get("APP_TITLE", "SciPaperLoader") APP_TITLE = os.environ.get("APP_TITLE", "SciPaperLoader")
SCRAPER_MODULE = os.environ.get("SCRAPER_MODULE", "dummy")

View File

@ -277,6 +277,40 @@ class ScraperState(db.Model):
return state.is_active and not state.is_paused return state.is_active and not state.is_paused
class ScraperModuleConfig(db.Model):
"""Model to store the configured scraper module."""
id = db.Column(db.Integer, primary_key=True)
module_name = db.Column(db.String(100), default="dummy")
@classmethod
def get_current_module(cls):
"""Get the currently configured scraper module."""
config = cls.query.first()
if not config:
config = cls(module_name="dummy")
db.session.add(config)
db.session.commit()
return config.module_name
@classmethod
def set_module(cls, module_name):
"""Set the scraper module."""
config = cls.query.first()
if not config:
config = cls(module_name=module_name)
db.session.add(config)
else:
old_value = config.module_name
config.module_name = module_name
ActivityLog.log_config_change(
config_key="scraper_module",
old_value=old_value,
new_value=module_name,
description="Updated scraper module configuration"
)
db.session.commit()
return config
def init_schedule_config(): def init_schedule_config():
"""Initialize ScheduleConfig with default values if empty""" """Initialize ScheduleConfig with default values if empty"""
if ScheduleConfig.query.count() == 0: if ScheduleConfig.query.count() == 0:

View File

@ -0,0 +1,2 @@
# This package contains all scraper modules.
# Each scraper should implement the BaseScraper interface from base.py.

View File

@ -0,0 +1,34 @@
from abc import ABC, abstractmethod
from typing import NamedTuple, Optional, Dict
from datetime import datetime
class ScrapeResult(NamedTuple):
status: str # "success", "error", "skipped"
message: str # human-readable status
data: Optional[Dict] # any extra payload (file_path, metadata, etc.)
duration: Optional[float] = None # processing time in seconds
timestamp: Optional[datetime] = None # when the operation completed
class BaseScraper(ABC):
"""Base class for all scraper implementations."""
@abstractmethod
def scrape(self, doi: str) -> ScrapeResult:
"""
Fetch metadata and/or download paper for the given DOI.
Args:
doi: The DOI of the paper to scrape
Returns:
ScrapeResult with status, message, and optional data
"""
pass
def get_name(self) -> str:
"""Return the name of this scraper."""
return self.__class__.__name__
def get_description(self) -> str:
"""Return a description of this scraper."""
return getattr(self.__class__, "__doc__", "No description available")

View File

@ -0,0 +1,191 @@
import time
import random
import os
from datetime import datetime
from .base import BaseScraper, ScrapeResult
from flask import current_app
from ..models import PaperMetadata, ActivityLog, DownloadPathConfig
from ..db import db
class Scraper(BaseScraper):
"""Dummy scraper for testing purposes that simulates paper downloading."""
def scrape(self, doi: str) -> ScrapeResult:
"""Simulate scraping a paper with realistic timing and random success/failure."""
start_time = time.time()
paper = PaperMetadata.query.filter_by(doi=doi).first()
if not paper:
return ScrapeResult(
status="error",
message=f"No paper found for DOI {doi}",
data=None,
duration=time.time() - start_time,
timestamp=datetime.utcnow()
)
# Simulate processing time (1-3 seconds)
processing_time = random.uniform(1, 3)
time.sleep(processing_time)
# Simulate 80% success rate
success = random.random() < 0.8
if success:
# Get download path and create an actual dummy file
download_path = DownloadPathConfig.get_path()
file_name = f"{doi.replace('/', '_')}.pdf"
file_path = f"{download_path}/{file_name}"
# Check if the path is readable and writable
if not os.path.exists(download_path):
try:
# Create directory if it doesn't exist
os.makedirs(download_path, exist_ok=True)
except OSError as e:
error_msg = f"Failed to create download directory: {str(e)}"
paper.status = "Failed"
paper.error_msg = error_msg
ActivityLog.log_scraper_activity(
action="dummy_scrape_path_error",
status="error",
description=error_msg,
paper_id=paper.id
)
return ScrapeResult(
status="error",
message=error_msg,
data={"error_code": "path_creation_error"},
duration=time.time() - start_time,
timestamp=datetime.utcnow()
)
# Check if the path is readable
if not os.access(download_path, os.R_OK):
error_msg = f"Download path '{download_path}' is not readable"
paper.status = "Failed"
paper.error_msg = error_msg
ActivityLog.log_scraper_activity(
action="dummy_scrape_path_error",
status="error",
description=error_msg,
paper_id=paper.id
)
return ScrapeResult(
status="error",
message=error_msg,
data={"error_code": "path_read_error"},
duration=time.time() - start_time,
timestamp=datetime.utcnow()
)
# Check if the path is writable
if not os.access(download_path, os.W_OK):
error_msg = f"Download path '{download_path}' is not writable"
paper.status = "Failed"
paper.error_msg = error_msg
ActivityLog.log_scraper_activity(
action="dummy_scrape_path_error",
status="error",
description=error_msg,
paper_id=paper.id
)
return ScrapeResult(
status="error",
message=error_msg,
data={"error_code": "path_write_error"},
duration=time.time() - start_time,
timestamp=datetime.utcnow()
)
# Create a simple dummy PDF file
try:
with open(file_path, 'w') as f:
f.write(f"Dummy PDF file for paper with DOI: {doi}\n")
f.write(f"Title: {paper.title}\n")
f.write(f"Journal: {paper.journal}\n")
f.write(f"Generated: {datetime.utcnow().isoformat()}\n")
f.write("\nThis is a dummy file created by the SciPaperLoader dummy scraper.\n")
# Update paper status
paper.status = "Done"
paper.file_path = file_path
paper.error_msg = None
except Exception as e:
# Handle file creation errors
error_msg = f"Failed to create dummy file: {str(e)}"
paper.status = "Failed"
paper.error_msg = error_msg
ActivityLog.log_scraper_activity(
action="dummy_scrape_file_error",
status="error",
description=error_msg,
paper_id=paper.id
)
return ScrapeResult(
status="error",
message=error_msg,
data={"error_code": "file_creation_error"},
duration=time.time() - start_time,
timestamp=datetime.utcnow()
)
# Log success
ActivityLog.log_scraper_activity(
action="dummy_scrape",
status="success",
description=f"Successfully scraped {doi}",
paper_id=paper.id
)
result = ScrapeResult(
status="success",
message=f"Successfully scraped {doi}",
data={
"file_path": file_path,
"title": paper.title,
"journal": paper.journal
},
duration=time.time() - start_time,
timestamp=datetime.utcnow()
)
else:
# Simulate failure
error_messages = [
"Paper not found in database",
"Access denied by publisher",
"Rate limit exceeded",
"Network timeout",
"Invalid DOI format"
]
error_msg = random.choice(error_messages)
paper.status = "Failed"
paper.error_msg = error_msg
# Log failure
ActivityLog.log_scraper_activity(
action="dummy_scrape",
status="error",
description=f"Failed to scrape {doi}: {error_msg}",
paper_id=paper.id
)
result = ScrapeResult(
status="error",
message=f"Failed to scrape {doi}: {error_msg}",
data={"error_code": "dummy_error"},
duration=time.time() - start_time,
timestamp=datetime.utcnow()
)
db.session.commit()
return result

View File

@ -0,0 +1,59 @@
import importlib
from flask import current_app
from .base import BaseScraper
def get_scraper() -> BaseScraper:
"""Load the configured scraper module dynamically with error handling."""
from ..models import ScraperModuleConfig, ActivityLog
try:
# Get module name from database first, fallback to config
name = ScraperModuleConfig.get_current_module()
if not name:
name = current_app.config.get("SCRAPER_MODULE", "dummy")
module = importlib.import_module(f"scipaperloader.scrapers.{name}")
cls = getattr(module, "Scraper")
# Validate that it's actually a BaseScraper
if not issubclass(cls, BaseScraper):
raise TypeError(f"Scraper class in module '{name}' does not inherit from BaseScraper")
return cls()
except (ImportError, AttributeError, TypeError) as e:
ActivityLog.log_error(
error_message=f"Failed to load scraper module '{name}': {str(e)}",
source="scraper_factory",
severity="error"
)
# Fallback to dummy scraper
from .dummy import Scraper as DummyScraper
return DummyScraper()
def get_available_scrapers():
"""Get list of available scraper modules."""
import os
from scipaperloader.scrapers import __path__ as scrapers_path
modules = []
scrapers_dir = scrapers_path[0]
for filename in os.listdir(scrapers_dir):
if filename.endswith(".py") and filename not in ("__init__.py", "base.py", "factory.py"):
module_name = filename[:-3]
try:
# Try to import and validate the module
module = importlib.import_module(f"scipaperloader.scrapers.{module_name}")
cls = getattr(module, "Scraper", None)
if cls and issubclass(cls, BaseScraper):
modules.append({
"name": module_name,
"class": cls,
"description": getattr(cls, "__doc__", "No description available")
})
except (ImportError, AttributeError, TypeError):
# Skip invalid modules
pass
return modules

View File

@ -9,52 +9,112 @@
<!-- include flash messages template --> <!-- include flash messages template -->
{% include "partials/flash_messages.html.jinja" %} {% include "partials/flash_messages.html.jinja" %}
<form action="{{ url_for('config.update_general') }}" method="post"> <div class="row">
<div class="form-section"> <!-- General Settings Column -->
<h6>Scraper Volume</h6> <div class="col-md-6">
<p class="text-muted">Configure the total number of papers to scrape per day.</p> <form action="{{ url_for('config.update_general') }}" method="post">
<div class="form-section">
<h6>Scraper Volume</h6>
<p class="text-muted">Configure the total number of papers to scrape per day.</p>
<div class="mb-3"> <div class="mb-3">
<label for="totalVolume" class="form-label">Papers per day:</label> <label for="totalVolume" class="form-label">Papers per day:</label>
<input type="number" class="form-control" id="totalVolume" name="total_volume" min="1" <input type="number" class="form-control" id="totalVolume" name="total_volume"
max="{{ max_volume }}" value="{{ volume_config.volume }}" required> min="1" max="{{ max_volume }}" value="{{ volume_config.volume }}" required>
<div class="form-text">Enter a value between 1 and {{ max_volume }}</div> <div class="form-text">Enter a value between 1 and {{ max_volume }}</div>
</div> </div>
</div>
<div class="form-section">
<h6>Download Path</h6>
<p class="text-muted">Base directory where scraped paper files will be stored.</p>
<div class="mb-3">
<label for="downloadPath" class="form-label">Download Directory:</label>
<input type="text" class="form-control" id="downloadPath" name="download_path"
value="{{ download_path_config.path }}" required>
<div class="form-text">Enter the full path to the download directory (e.g.,
/data/papers).
Ensure the directory exists and the application has write permissions.</div>
</div>
</div>
<div class="form-section">
<h6>System Settings</h6>
<p class="text-muted">Configure general system behavior.</p>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableNotifications" checked>
<label class="form-check-label" for="enableNotifications">
Enable email notifications
</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableLogging" checked>
<label class="form-check-label" for="enableLogging">
Enable detailed activity logging
</label>
</div>
</div>
<button type="submit" class="btn btn-primary">Save General Settings</button>
</form>
</div> </div>
<div class="form-section"> <!-- Scraper Module Column -->
<h6>Download Path</h6> <div class="col-md-6">
<p class="text-muted">Base directory where scraped paper files will be stored.</p> <form method="post" action="{{ url_for('config.update_scraper_module') }}">
<div class="mb-3"> <div class="form-section">
<label for="downloadPath" class="form-label">Download Directory:</label> <h6>Scraper Module</h6>
<input type="text" class="form-control" id="downloadPath" name="download_path" <p class="text-muted">Select which scraper module to use for processing papers.</p>
value="{{ download_path_config.path }}" required>
<div class="form-text">Enter the full path to the download directory (e.g., /data/papers). <div class="mb-3">
Ensure the directory exists and the application has write permissions.</div> <label for="scraper_module" class="form-label">Active Scraper Module:</label>
<select class="form-control" id="scraper_module" name="scraper_module">
{% for module in available_scraper_modules %}
<option value="{{ module }}" {% if module==current_scraper_module %} selected
{%endif %}>
{{ module }}
{% if scraper_details[module] %}
- {{ scraper_details[module].description[:50] }}...
{% endif %}
</option>
{% endfor %}
</select>
<div class="form-text">
Current module: <strong>{{ current_scraper_module }}</strong>
</div>
</div>
</div>
<button type="submit" class="btn btn-primary">Update Scraper Module</button>
</form>
</div>
</div>
<!-- Database Management Section -->
<div class="row mt-4">
<div class="col-12">
<div class="card border-danger">
<div class="card-header bg-danger text-white">
<h5>Database Management</h5>
</div>
<div class="card-body">
<div class="form-section">
<h6>Delete All Papers</h6>
<p class="text-muted">This action will permanently delete all paper records from the
database. This cannot be undone.</p>
<form method="post" action="{{ url_for('config.delete_all_papers') }}" class="mt-3"
onsubmit="return confirm('WARNING: You are about to delete ALL papers from the database. This action cannot be undone. Are you sure you want to proceed?');">
<button type="submit" class="btn btn-danger">
<i class="fas fa-trash-alt"></i> Delete All Papers
</button>
</form>
</div>
</div>
</div> </div>
</div> </div>
</div>
<div class="form-section">
<h6>System Settings</h6>
<p class="text-muted">Configure general system behavior.</p>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableNotifications" checked>
<label class="form-check-label" for="enableNotifications">
Enable email notifications
</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="enableLogging" checked>
<label class="form-check-label" for="enableLogging">
Enable detailed activity logging
</label>
</div>
</div>
<button type="submit" class="btn btn-primary">Save General Settings</button>
</form>
</div> </div>
</div> </div>
</div> </div>

View File

@ -36,6 +36,28 @@
max-width: 350px; max-width: 350px;
z-index: 1050; z-index: 1050;
} }
.search-results-container {
max-height: 300px;
overflow-y: auto;
}
/* Paper status badges */
.badge-new {
background-color: #17a2b8;
}
.badge-pending {
background-color: #ffc107;
}
.badge-done {
background-color: #28a745;
}
.badge-failed {
background-color: #dc3545;
}
</style> </style>
{% endblock styles %} {% endblock styles %}
@ -89,6 +111,61 @@
</div> </div>
</div> </div>
<!-- New row for single paper processing -->
<div class="row mb-4">
<div class="col-12">
<div class="card">
<div class="card-header">
<h5>Process Single Paper</h5>
</div>
<div class="card-body">
<div class="row">
<div class="col-md-6">
<form id="searchPaperForm" class="mb-3">
<div class="input-group">
<input type="text" id="paperSearchInput" class="form-control"
placeholder="Search paper by title, DOI, or ID...">
<button class="btn btn-outline-secondary" type="submit">Search</button>
</div>
</form>
</div>
<div class="col-md-6">
<div class="form-group">
<label for="scraperSelect">Scraper Module:</label>
<select class="form-control" id="scraperSelect">
<option value="">Use default system scraper</option>
<!-- Available scrapers will be populated here -->
</select>
<div class="form-text">
Select which scraper to use for processing the paper
</div>
</div>
</div>
</div>
<div id="searchResults" class="mt-3 search-results-container d-none">
<table class="table table-hover table-striped">
<thead>
<tr>
<th>ID</th>
<th>Title</th>
<th>DOI</th>
<th>Status</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="paperSearchResults">
<!-- Search results will be populated here -->
</tbody>
</table>
</div>
<div id="processingStatus" class="alert alert-info mt-3 d-none"></div>
</div>
</div>
</div>
</div>
<div class="row mb-4"> <div class="row mb-4">
<div class="col-12"> <div class="col-12">
<div class="card"> <div class="card">
@ -164,12 +241,19 @@
const resetButton = document.getElementById('resetButton'); const resetButton = document.getElementById('resetButton');
const notificationsToggle = document.getElementById('notificationsToggle'); const notificationsToggle = document.getElementById('notificationsToggle');
const activityLog = document.getElementById('activityLog'); const activityLog = document.getElementById('activityLog');
const searchForm = document.getElementById('searchPaperForm');
const searchInput = document.getElementById('paperSearchInput');
const searchResults = document.getElementById('searchResults');
const processingStatus = document.getElementById('processingStatus');
const paperSearchResults = document.getElementById('paperSearchResults');
const scraperSelect = document.getElementById('scraperSelect');
// Initialize the page // Initialize the page
document.addEventListener('DOMContentLoaded', function () { document.addEventListener('DOMContentLoaded', function () {
initStatusPolling(); initStatusPolling();
loadActivityStats(currentTimeRange); loadActivityStats(currentTimeRange);
loadRecentActivity(); loadRecentActivity();
loadAvailableScrapers();
// Initialize event listeners // Initialize event listeners
startButton.addEventListener('click', startScraper); startButton.addEventListener('click', startScraper);
@ -177,6 +261,10 @@
stopButton.addEventListener('click', stopScraper); stopButton.addEventListener('click', stopScraper);
resetButton.addEventListener('click', resetScraper); resetButton.addEventListener('click', resetScraper);
notificationsToggle.addEventListener('click', toggleNotifications); notificationsToggle.addEventListener('click', toggleNotifications);
searchForm.addEventListener('submit', function (e) {
e.preventDefault();
searchPapers();
});
document.getElementById('volumeForm').addEventListener('submit', function (e) { document.getElementById('volumeForm').addEventListener('submit', function (e) {
e.preventDefault(); e.preventDefault();
@ -193,6 +281,185 @@
}); });
}); });
// Load available scraper modules
function loadAvailableScrapers() {
fetch('/scraper/available_scrapers')
.then(response => response.json())
.then(data => {
if (data.success && data.scrapers && data.scrapers.length > 0) {
// Clear previous options except the default one
while (scraperSelect.options.length > 1) {
scraperSelect.remove(1);
}
// Add each scraper as an option
data.scrapers.forEach(scraper => {
const option = document.createElement('option');
option.value = scraper.name;
option.textContent = `${scraper.name} - ${scraper.description.substring(0, 50)}${scraper.description.length > 50 ? '...' : ''}`;
if (scraper.is_current) {
option.textContent += ' (system default)';
}
scraperSelect.appendChild(option);
});
} else {
// If no scrapers or error, add a note
const option = document.createElement('option');
option.disabled = true;
option.textContent = 'No scrapers available';
scraperSelect.appendChild(option);
}
})
.catch(error => {
console.error('Error loading scrapers:', error);
const option = document.createElement('option');
option.disabled = true;
option.textContent = 'Error loading scrapers';
scraperSelect.appendChild(option);
});
}
// Search papers function
function searchPapers() {
const query = searchInput.value.trim();
if (!query) {
showFlashMessage('Please enter a search term', 'warning');
return;
}
// Show loading message
paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Searching papers...</td></tr>';
searchResults.classList.remove('d-none');
// Fetch papers from API
fetch(`/api/papers?query=${encodeURIComponent(query)}`)
.then(response => response.json())
.then(data => {
if (!data.papers || data.papers.length === 0) {
paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">No papers found matching your search</td></tr>';
return;
}
paperSearchResults.innerHTML = '';
data.papers.forEach(paper => {
const row = document.createElement('tr');
// Create status badge
let statusBadge = '';
if (paper.status === 'New') {
statusBadge = '<span class="badge bg-info">New</span>';
} else if (paper.status === 'Pending') {
statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
} else if (paper.status === 'Done') {
statusBadge = '<span class="badge bg-success">Done</span>';
} else if (paper.status === 'Failed') {
statusBadge = '<span class="badge bg-danger">Failed</span>';
} else {
statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
}
// Create process button (enabled only for papers not in 'Pending' status)
const processButtonDisabled = paper.status === 'Pending' ? 'disabled' : '';
// Truncate title if too long
const truncatedTitle = paper.title.length > 70 ? paper.title.substring(0, 70) + '...' : paper.title;
row.innerHTML = `
<td>${paper.id}</td>
<td title="${paper.title}">${truncatedTitle}</td>
<td>${paper.doi || 'N/A'}</td>
<td>${statusBadge}</td>
<td>
<button class="btn btn-sm btn-primary process-paper-btn"
data-paper-id="${paper.id}"
${processButtonDisabled}>
Process Now
</button>
</td>
`;
paperSearchResults.appendChild(row);
});
// Add event listeners to the process buttons
document.querySelectorAll('.process-paper-btn').forEach(btn => {
btn.addEventListener('click', function () {
processSinglePaper(this.getAttribute('data-paper-id'));
});
});
})
.catch(error => {
console.error('Error searching papers:', error);
paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Error searching papers</td></tr>';
});
}
// Process a single paper
function processSinglePaper(paperId) {
// Disable all process buttons to prevent multiple clicks
document.querySelectorAll('.process-paper-btn').forEach(btn => {
btn.disabled = true;
});
// Show processing status
processingStatus.textContent = 'Processing paper...';
processingStatus.classList.remove('d-none');
// Get selected scraper
const selectedScraper = scraperSelect.value;
// Send request to process the paper
fetch(`/scraper/process_single/${paperId}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
scraper_module: selectedScraper
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
processingStatus.textContent = data.message;
processingStatus.className = 'alert alert-success mt-3';
// Update status in the search results
const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
const statusCell = row.querySelector('td:nth-child(4)');
statusCell.innerHTML = '<span class="badge bg-warning text-dark">Pending</span>';
// Show notification
showFlashMessage(data.message, 'success');
// Set up polling to check paper status and refresh activity
pollPaperStatus(paperId, 3000, 20);
} else {
processingStatus.textContent = data.message;
processingStatus.className = 'alert alert-danger mt-3';
showFlashMessage(data.message, 'error');
}
})
.catch(error => {
console.error('Error processing paper:', error);
processingStatus.textContent = 'Error: Could not process paper';
processingStatus.className = 'alert alert-danger mt-3';
showFlashMessage('Error processing paper', 'error');
})
.finally(() => {
// Re-enable the process buttons after a short delay
setTimeout(() => {
document.querySelectorAll('.process-paper-btn').forEach(btn => {
if (btn.getAttribute('data-paper-id') !== paperId) {
btn.disabled = false;
}
});
}, 1000);
});
}
// Status polling // Status polling
function initStatusPolling() { function initStatusPolling() {
updateStatus(); updateStatus();
@ -285,39 +552,39 @@
if (confirm("Are you sure you want to reset the scraper? This will stop all current tasks, optionally clear non-pending papers, and restart the scraper.")) { if (confirm("Are you sure you want to reset the scraper? This will stop all current tasks, optionally clear non-pending papers, and restart the scraper.")) {
// Disable button to prevent multiple clicks // Disable button to prevent multiple clicks
resetButton.disabled = true; resetButton.disabled = true;
// Show a loading message // Show a loading message
showFlashMessage('Resetting scraper, please wait...', 'info'); showFlashMessage('Resetting scraper, please wait...', 'info');
fetch('/scraper/reset', { fetch('/scraper/reset', {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
body: JSON.stringify({ body: JSON.stringify({
clear_papers: true // You could make this configurable with a checkbox clear_papers: true // You could make this configurable with a checkbox
}) })
}) })
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {
if (data.success) { if (data.success) {
showFlashMessage('Scraper has been completely reset and restarted', 'success'); showFlashMessage('Scraper has been completely reset and restarted', 'success');
// Update everything // Update everything
updateStatus(); updateStatus();
loadActivityStats(currentTimeRange); loadActivityStats(currentTimeRange);
setTimeout(() => { loadRecentActivity(); }, 1000); setTimeout(() => { loadRecentActivity(); }, 1000);
} else { } else {
showFlashMessage(data.message || 'Error resetting scraper', 'error'); showFlashMessage(data.message || 'Error resetting scraper', 'error');
} }
// Re-enable button // Re-enable button
resetButton.disabled = false; resetButton.disabled = false;
}) })
.catch(error => { .catch(error => {
console.error("Error resetting scraper:", error); console.error("Error resetting scraper:", error);
showFlashMessage('Error resetting scraper: ' + error.message, 'error'); showFlashMessage('Error resetting scraper: ' + error.message, 'error');
// Re-enable button // Re-enable button
resetButton.disabled = false; resetButton.disabled = false;
}); });
} }
} }
@ -345,6 +612,97 @@
notificationsEnabled = notificationsToggle.checked; notificationsEnabled = notificationsToggle.checked;
} }
// Poll paper status until it changes from Pending
function pollPaperStatus(paperId, interval = 3000, maxAttempts = 20) {
let attempts = 0;
// Immediately refresh activity log to show the initial pending status
loadRecentActivity();
const checkStatus = () => {
attempts++;
console.log(`Checking status of paper ${paperId}, attempt ${attempts}/${maxAttempts}`);
// Fetch the current paper status
fetch(`/api/papers/${paperId}`)
.then(response => response.json())
.then(data => {
if (data && data.paper) {
const paper = data.paper;
console.log(`Paper status: ${paper.status}`);
// Update the UI with the current status
const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
if (row) {
const statusCell = row.querySelector('td:nth-child(4)');
let statusBadge = '';
if (paper.status === 'New') {
statusBadge = '<span class="badge bg-info">New</span>';
} else if (paper.status === 'Pending') {
statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
} else if (paper.status === 'Done') {
statusBadge = '<span class="badge bg-success">Done</span>';
} else if (paper.status === 'Failed') {
statusBadge = '<span class="badge bg-danger">Failed</span>';
} else {
statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
}
statusCell.innerHTML = statusBadge;
// Update processing status message if status changed
if (paper.status !== 'Pending') {
if (paper.status === 'Done') {
processingStatus.textContent = `Paper processed successfully: ${paper.title}`;
processingStatus.className = 'alert alert-success mt-3';
} else if (paper.status === 'Failed') {
processingStatus.textContent = `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
processingStatus.className = 'alert alert-danger mt-3';
}
}
}
// Always refresh activity log
loadRecentActivity();
// If status is still pending and we haven't reached max attempts, check again
if (paper.status === 'Pending' && attempts < maxAttempts) {
setTimeout(checkStatus, interval);
} else {
// If status changed or we reached max attempts, refresh chart data too
loadActivityStats(currentTimeRange);
// Show notification if status changed
if (paper.status !== 'Pending') {
const status = paper.status === 'Done' ? 'success' : 'error';
const message = paper.status === 'Done'
? `Paper processed successfully: ${paper.title}`
: `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
showFlashMessage(message, status);
}
// If we hit max attempts but status is still pending, show a message
if (paper.status === 'Pending' && attempts >= maxAttempts) {
processingStatus.textContent = 'Paper is still being processed. Check the activity log for updates.';
processingStatus.className = 'alert alert-info mt-3';
}
}
}
})
.catch(error => {
console.error(`Error polling paper status: ${error}`);
// If there's an error, we can still try again if under max attempts
if (attempts < maxAttempts) {
setTimeout(checkStatus, interval);
}
});
};
// Start checking
setTimeout(checkStatus, interval);
}
// Load data functions // Load data functions
function loadActivityStats(hours) { function loadActivityStats(hours) {
fetch(`/scraper/stats?hours=${hours}`) fetch(`/scraper/stats?hours=${hours}`)
@ -359,8 +717,10 @@
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {
renderActivityLog(data); renderActivityLog(data);
console.log("Activity log refreshed with latest data");
}) })
.catch(() => { .catch((error) => {
console.error("Failed to load activity logs:", error);
// If the API endpoint doesn't exist, just show a message // If the API endpoint doesn't exist, just show a message
activityLog.innerHTML = '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>'; activityLog.innerHTML = '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>';
}); });
@ -467,6 +827,26 @@
}); });
} }
// Flash message function
function showFlashMessage(message, type) {
const flashContainer = document.createElement('div');
flashContainer.className = `alert alert-${type === 'error' ? 'danger' : type} alert-dismissible fade show notification`;
flashContainer.innerHTML = `
${message}
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
`;
document.body.appendChild(flashContainer);
// Auto dismiss after 5 seconds
setTimeout(() => {
flashContainer.classList.remove('show');
setTimeout(() => {
flashContainer.remove();
}, 150); // Remove after fade out animation
}, 5000);
}
// WebSocket for real-time notifications // WebSocket for real-time notifications
function setupWebSocket() { function setupWebSocket() {
// If WebSocket is available, implement it here // If WebSocket is available, implement it here