from .db import db import json from datetime import datetime from enum import Enum class ActivityCategory(Enum): """Categories for activity logs.""" GUI_INTERACTION = "gui_interaction" CONFIG_CHANGE = "config_change" SCRAPER_COMMAND = "scraper_command" SCRAPER_ACTIVITY = "scraper_activity" SYSTEM = "system" DATA_IMPORT = "data_import" class ErrorSeverity(Enum): """Severity levels for error logging.""" DEBUG = "debug" INFO = "info" WARNING = "warning" ERROR = "error" CRITICAL = "critical" class ActivityLog(db.Model): """Model for logging various activities in the application.""" id = db.Column(db.Integer, primary_key=True) timestamp = db.Column(db.DateTime, default=datetime.utcnow, index=True) category = db.Column(db.String(50), nullable=False, index=True) action = db.Column(db.String(100), nullable=False) description = db.Column(db.Text) # Reference to related entities (optional) paper_id = db.Column(db.Integer, db.ForeignKey('paper_metadata.id'), nullable=True) user_id = db.Column(db.Integer, nullable=True) # For future authentication # For config changes config_key = db.Column(db.String(100), nullable=True) old_value = db.Column(db.Text, nullable=True) new_value = db.Column(db.Text, nullable=True) # For scraper activities status = db.Column(db.String(50), nullable=True) source_ip = db.Column(db.String(50), nullable=True) # Extra data as JSON extra_data = db.Column(db.Text, nullable=True) def set_extra_data(self, data_dict): """Serialize extra data as JSON string.""" if data_dict: self.extra_data = json.dumps(data_dict) def get_extra_data(self): """Deserialize JSON string to dictionary.""" if self.extra_data: return json.loads(self.extra_data) return {} @classmethod def log_gui_interaction(cls, action, description=None, paper_id=None, user_id=None, **extra): """Log a GUI interaction.""" log = cls( category=ActivityCategory.GUI_INTERACTION.value, action=action, description=description, paper_id=paper_id, user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_config_change(cls, config_key, old_value, new_value, user_id=None, **extra): """Log a configuration change.""" log = cls( category=ActivityCategory.CONFIG_CHANGE.value, action=f"Changed {config_key}", config_key=config_key, old_value=str(old_value), new_value=str(new_value), user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_scraper_command(cls, action, status=None, user_id=None, **extra): """Log a scraper command (start/stop/pause).""" log = cls( category=ActivityCategory.SCRAPER_COMMAND.value, action=action, status=status, user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_scraper_activity(cls, action, paper_id=None, status=None, description=None, **extra): """Log a scraper activity (downloading, processing papers, etc.).""" log = cls( category=ActivityCategory.SCRAPER_ACTIVITY.value, action=action, paper_id=paper_id, status=status, description=description ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_error(cls, error_message, exception=None, severity=ErrorSeverity.ERROR.value, source=None, paper_id=None, user_id=None, **extra): """Log system errors or warnings. Args: error_message: Brief description of the error exception: The exception object if available severity: Error severity level (debug, info, warning, error, critical) source: Component/module where the error occurred paper_id: Related paper ID if applicable user_id: Related user ID if applicable **extra: Any additional data to store """ details = {} if exception: details.update({ 'exception_type': type(exception).__name__, 'exception_message': str(exception) }) # Get traceback if available import traceback details['traceback'] = traceback.format_exc() if source: extra['source'] = source log = cls( category=ActivityCategory.SYSTEM.value, action=f"{severity.upper()}: {error_message}"[:100], # Limit action length description=error_message, paper_id=paper_id, user_id=user_id, status=severity ) # Add exception details to extra data extra.update(details) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_import_activity(cls, action, status=None, description=None, user_id=None, **extra): """Log data import activities (CSV uploads, bulk imports, etc.).""" log = cls( category=ActivityCategory.DATA_IMPORT.value, action=action, status=status, description=description, user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log class PaperMetadata(db.Model): id = db.Column(db.Integer, primary_key=True) title = db.Column(db.Text) doi = db.Column(db.String, unique=True, index=True) alt_id = db.Column(db.String) issn = db.Column(db.String(32)) journal = db.Column(db.String(255)) type = db.Column(db.String(50)) language = db.Column(db.String(50)) published_online = db.Column(db.Date) # or DateTime/String status = db.Column(db.String(10)) # 'Pending','Done','Failed' file_path = db.Column(db.Text) error_msg = db.Column(db.Text) created_at = db.Column(db.DateTime, default=db.func.current_timestamp()) updated_at = db.Column( db.DateTime, default=db.func.current_timestamp(), onupdate=db.func.current_timestamp(), ) class ScheduleConfig(db.Model): hour = db.Column(db.Integer, primary_key=True) # 0-23 weight = db.Column(db.Float) # weight class VolumeConfig(db.Model): id = db.Column(db.Integer, primary_key=True) volume = db.Column(db.Float) # volume of papers to scrape per day class DownloadPathConfig(db.Model): """Model to store the base path for downloaded files.""" id = db.Column(db.Integer, primary_key=True) path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path @classmethod def get_path(cls): """Get the configured download path, creating default if needed.""" config = cls.query.first() if not config: config = cls(path="/path/to/dummy/papers") # Ensure default exists db.session.add(config) db.session.commit() return config.path @classmethod def set_path(cls, new_path): """Set the download path.""" config = cls.query.first() if not config: config = cls(path=new_path) db.session.add(config) else: config.path = new_path db.session.commit() return config class ScraperState(db.Model): """Model to store the current state of the scraper.""" id = db.Column(db.Integer, primary_key=True) is_active = db.Column(db.Boolean, default=False) is_paused = db.Column(db.Boolean, default=False) last_updated = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) @classmethod def get_current_state(cls): """Get the current scraper state, creating it if it doesn't exist.""" state = cls.query.first() if not state: state = cls(is_active=False, is_paused=False) db.session.add(state) db.session.commit() return state @classmethod def set_active(cls, active): """Set the active state of the scraper.""" state = cls.get_current_state() state.is_active = active db.session.commit() return state @classmethod def set_paused(cls, paused): """Set the paused state of the scraper.""" state = cls.get_current_state() state.is_paused = paused db.session.commit() return state @classmethod def is_scraper_active(cls): """Check if the scraper is active.""" state = cls.get_current_state() return state.is_active and not state.is_paused def init_schedule_config(): """Initialize ScheduleConfig with default values if empty""" if ScheduleConfig.query.count() == 0: # Default schedule: Lower volume during business hours, # higher volume at night default_schedule = [ # Night hours (higher volume) *[(hour, 0.1) for hour in range(0, 6)], # Morning hours (low volume) *[(hour, 0.5) for hour in range(6, 7)], *[(hour, 0.7) for hour in range(8, 9)], # Business hours (very low volume) *[(hour, 1) for hour in range(9, 17)], # Evening hours (medium volume) *[(hour, 0.5) for hour in range(17, 21)], # Late evening (high volume) *[(hour, 0.2) for hour in range(21, 24)], ] for hour, weight in default_schedule: config = ScheduleConfig(hour=hour, weight=weight) db.session.add(config) db.session.commit() if VolumeConfig.query.count() == 0: # Default volume configuration default_volume = VolumeConfig(volume=100) db.session.add(default_volume) db.session.commit() # Initialize DownloadPathConfig if it doesn't exist if DownloadPathConfig.query.count() == 0: default_path = DownloadPathConfig(path="/path/to/dummy/papers") db.session.add(default_path) db.session.commit()