import importlib from flask import current_app from .base import BaseScraper def get_scraper() -> BaseScraper: """Load the configured scraper module dynamically with error handling.""" from ..models import ScraperModuleConfig, ActivityLog try: # Get module name from database first, fallback to config name = ScraperModuleConfig.get_current_module() if not name: name = current_app.config.get("SCRAPER_MODULE", "dummy") module = importlib.import_module(f"scipaperloader.scrapers.{name}") cls = getattr(module, "Scraper") # Validate that it's actually a BaseScraper if not issubclass(cls, BaseScraper): raise TypeError(f"Scraper class in module '{name}' does not inherit from BaseScraper") return cls() except (ImportError, AttributeError, TypeError) as e: ActivityLog.log_error( error_message=f"Failed to load scraper module '{name}': {str(e)}", source="scraper_factory", severity="error" ) # Fallback to dummy scraper from .dummy import Scraper as DummyScraper return DummyScraper() def get_available_scrapers(): """Get list of available scraper modules.""" import os from scipaperloader.scrapers import __path__ as scrapers_path modules = [] scrapers_dir = scrapers_path[0] for filename in os.listdir(scrapers_dir): if filename.endswith(".py") and filename not in ("__init__.py", "base.py", "factory.py"): module_name = filename[:-3] try: # Try to import and validate the module module = importlib.import_module(f"scipaperloader.scrapers.{module_name}") cls = getattr(module, "Scraper", None) if cls and issubclass(cls, BaseScraper): modules.append({ "name": module_name, "class": cls, "description": getattr(cls, "__doc__", "No description available") }) except (ImportError, AttributeError, TypeError): # Skip invalid modules pass return modules