35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
from abc import ABC, abstractmethod
|
|
from typing import NamedTuple, Optional, Dict
|
|
from datetime import datetime
|
|
|
|
class ScrapeResult(NamedTuple):
|
|
status: str # "success", "error", "skipped"
|
|
message: str # human-readable status
|
|
data: Optional[Dict] # any extra payload (file_path, metadata, etc.)
|
|
duration: Optional[float] = None # processing time in seconds
|
|
timestamp: Optional[datetime] = None # when the operation completed
|
|
|
|
class BaseScraper(ABC):
|
|
"""Base class for all scraper implementations."""
|
|
|
|
@abstractmethod
|
|
def scrape(self, doi: str) -> ScrapeResult:
|
|
"""
|
|
Fetch metadata and/or download paper for the given DOI.
|
|
|
|
Args:
|
|
doi: The DOI of the paper to scrape
|
|
|
|
Returns:
|
|
ScrapeResult with status, message, and optional data
|
|
"""
|
|
pass
|
|
|
|
def get_name(self) -> str:
|
|
"""Return the name of this scraper."""
|
|
return self.__class__.__name__
|
|
|
|
def get_description(self) -> str:
|
|
"""Return a description of this scraper."""
|
|
return getattr(self.__class__, "__doc__", "No description available")
|