65 lines
2.4 KiB
Python

import importlib
from .base import BaseScraper
def get_scraper() -> BaseScraper:
"""Load the configured scraper module dynamically with error handling."""
from ..models import ScraperModuleConfig, ActivityLog
try:
# Get module name from database first, fallback to dummy
name = ScraperModuleConfig.get_current_module()
if not name:
# Only try to access Flask config if we're in app context
try:
from flask import current_app
name = current_app.config.get("SCRAPER_MODULE", "dummy")
except RuntimeError:
# No app context, use dummy
name = "dummy"
module = importlib.import_module(f"scipaperloader.scrapers.{name}")
cls = getattr(module, "Scraper")
# Validate that it's actually a BaseScraper
if not issubclass(cls, BaseScraper):
raise TypeError(f"Scraper class in module '{name}' does not inherit from BaseScraper")
return cls()
except (ImportError, AttributeError, TypeError) as e:
ActivityLog.log_error(
error_message=f"Failed to load scraper module '{name}': {str(e)}",
source="scraper_factory",
severity="error"
)
# Fallback to dummy scraper
from .dummy import Scraper as DummyScraper
return DummyScraper()
def get_available_scrapers():
"""Get list of available scraper modules."""
import os
from scipaperloader.scrapers import __path__ as scrapers_path
modules = []
scrapers_dir = scrapers_path[0]
for filename in os.listdir(scrapers_dir):
if filename.endswith(".py") and filename not in ("__init__.py", "base.py", "factory.py"):
module_name = filename[:-3]
try:
# Try to import and validate the module
module = importlib.import_module(f"scipaperloader.scrapers.{module_name}")
cls = getattr(module, "Scraper", None)
if cls and issubclass(cls, BaseScraper):
modules.append({
"name": module_name,
"class": cls,
"description": getattr(cls, "__doc__", "No description available")
})
except (ImportError, AttributeError, TypeError):
# Skip invalid modules
pass
return modules