65 lines
2.4 KiB
Python
65 lines
2.4 KiB
Python
import importlib
|
|
from .base import BaseScraper
|
|
|
|
def get_scraper() -> BaseScraper:
|
|
"""Load the configured scraper module dynamically with error handling."""
|
|
from ..models import ScraperModuleConfig, ActivityLog
|
|
|
|
try:
|
|
# Get module name from database first, fallback to dummy
|
|
name = ScraperModuleConfig.get_current_module()
|
|
if not name:
|
|
# Only try to access Flask config if we're in app context
|
|
try:
|
|
from flask import current_app
|
|
name = current_app.config.get("SCRAPER_MODULE", "dummy")
|
|
except RuntimeError:
|
|
# No app context, use dummy
|
|
name = "dummy"
|
|
|
|
module = importlib.import_module(f"scipaperloader.scrapers.{name}")
|
|
cls = getattr(module, "Scraper")
|
|
|
|
# Validate that it's actually a BaseScraper
|
|
if not issubclass(cls, BaseScraper):
|
|
raise TypeError(f"Scraper class in module '{name}' does not inherit from BaseScraper")
|
|
|
|
return cls()
|
|
|
|
except (ImportError, AttributeError, TypeError) as e:
|
|
ActivityLog.log_error(
|
|
error_message=f"Failed to load scraper module '{name}': {str(e)}",
|
|
source="scraper_factory",
|
|
severity="error"
|
|
)
|
|
# Fallback to dummy scraper
|
|
from .dummy import Scraper as DummyScraper
|
|
return DummyScraper()
|
|
|
|
def get_available_scrapers():
|
|
"""Get list of available scraper modules."""
|
|
import os
|
|
from scipaperloader.scrapers import __path__ as scrapers_path
|
|
|
|
modules = []
|
|
scrapers_dir = scrapers_path[0]
|
|
|
|
for filename in os.listdir(scrapers_dir):
|
|
if filename.endswith(".py") and filename not in ("__init__.py", "base.py", "factory.py"):
|
|
module_name = filename[:-3]
|
|
try:
|
|
# Try to import and validate the module
|
|
module = importlib.import_module(f"scipaperloader.scrapers.{module_name}")
|
|
cls = getattr(module, "Scraper", None)
|
|
if cls and issubclass(cls, BaseScraper):
|
|
modules.append({
|
|
"name": module_name,
|
|
"class": cls,
|
|
"description": getattr(cls, "__doc__", "No description available")
|
|
})
|
|
except (ImportError, AttributeError, TypeError):
|
|
# Skip invalid modules
|
|
pass
|
|
|
|
return modules
|