from abc import ABC, abstractmethod from typing import NamedTuple, Optional, Dict from datetime import datetime class ScrapeResult(NamedTuple): status: str # "success", "error", "skipped" message: str # human-readable status data: Optional[Dict] # any extra payload (file_path, metadata, etc.) duration: Optional[float] = None # processing time in seconds timestamp: Optional[datetime] = None # when the operation completed class BaseScraper(ABC): """Base class for all scraper implementations.""" @abstractmethod def scrape(self, doi: str) -> ScrapeResult: """ Fetch metadata and/or download paper for the given DOI. Args: doi: The DOI of the paper to scrape Returns: ScrapeResult with status, message, and optional data """ pass def get_name(self) -> str: """Return the name of this scraper.""" return self.__class__.__name__ def get_description(self) -> str: """Return a description of this scraper.""" return getattr(self.__class__, "__doc__", "No description available")