from .db import db import json from datetime import datetime from enum import Enum class ActivityCategory(Enum): """Categories for activity logs.""" GUI_INTERACTION = "gui_interaction" CONFIG_CHANGE = "config_change" SCRAPER_COMMAND = "scraper_command" SCRAPER_ACTIVITY = "scraper_activity" SYSTEM = "system" DATA_IMPORT = "data_import" class ErrorSeverity(Enum): """Severity levels for error logging.""" DEBUG = "debug" INFO = "info" WARNING = "warning" ERROR = "error" CRITICAL = "critical" class ActivityLog(db.Model): """Model for logging various activities in the application.""" id = db.Column(db.Integer, primary_key=True) timestamp = db.Column(db.DateTime, default=datetime.utcnow, index=True) category = db.Column(db.String(50), nullable=False, index=True) action = db.Column(db.String(100), nullable=False) description = db.Column(db.Text) # Reference to related entities (optional) paper_id = db.Column(db.Integer, db.ForeignKey('paper_metadata.id'), nullable=True) user_id = db.Column(db.Integer, nullable=True) # For future authentication # For config changes config_key = db.Column(db.String(100), nullable=True) old_value = db.Column(db.Text, nullable=True) new_value = db.Column(db.Text, nullable=True) # For scraper activities status = db.Column(db.String(50), nullable=True) source_ip = db.Column(db.String(50), nullable=True) # Extra data as JSON extra_data = db.Column(db.Text, nullable=True) def set_extra_data(self, data_dict): """Serialize extra data as JSON string.""" if data_dict: self.extra_data = json.dumps(data_dict) def get_extra_data(self): """Deserialize JSON string to dictionary.""" if self.extra_data: return json.loads(self.extra_data) return {} @classmethod def log_gui_interaction(cls, action, description=None, paper_id=None, user_id=None, **extra): """Log a GUI interaction.""" log = cls( category=ActivityCategory.GUI_INTERACTION.value, action=action, description=description, paper_id=paper_id, user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_config_change(cls, config_key, old_value, new_value, user_id=None, **extra): """Log a configuration change.""" log = cls( category=ActivityCategory.CONFIG_CHANGE.value, action=f"Changed {config_key}", config_key=config_key, old_value=str(old_value), new_value=str(new_value), user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_scraper_command(cls, action, status=None, user_id=None, **extra): """Log a scraper command (start/stop/pause).""" log = cls( category=ActivityCategory.SCRAPER_COMMAND.value, action=action, status=status, user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_scraper_activity(cls, action, paper_id=None, status=None, description=None, **extra): """Log a scraper activity (downloading, processing papers, etc.).""" log = cls( category=ActivityCategory.SCRAPER_ACTIVITY.value, action=action, paper_id=paper_id, status=status, description=description ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_error(cls, error_message, exception=None, severity=ErrorSeverity.ERROR.value, source=None, paper_id=None, user_id=None, **extra): """Log system errors or warnings. Args: error_message: Brief description of the error exception: The exception object if available severity: Error severity level (debug, info, warning, error, critical) source: Component/module where the error occurred paper_id: Related paper ID if applicable user_id: Related user ID if applicable **extra: Any additional data to store """ details = {} if exception: details.update({ 'exception_type': type(exception).__name__, 'exception_message': str(exception) }) # Get traceback if available import traceback details['traceback'] = traceback.format_exc() if source: extra['source'] = source log = cls( category=ActivityCategory.SYSTEM.value, action=f"{severity.upper()}: {error_message}"[:100], # Limit action length description=error_message, paper_id=paper_id, user_id=user_id, status=severity ) # Add exception details to extra data extra.update(details) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log @classmethod def log_import_activity(cls, action, status=None, description=None, user_id=None, **extra): """Log data import activities (CSV uploads, bulk imports, etc.).""" log = cls( category=ActivityCategory.DATA_IMPORT.value, action=action, status=status, description=description, user_id=user_id ) log.set_extra_data(extra) db.session.add(log) db.session.commit() return log class PaperMetadata(db.Model): id = db.Column(db.Integer, primary_key=True) title = db.Column(db.Text) doi = db.Column(db.String, unique=True, index=True) alt_id = db.Column(db.String) issn = db.Column(db.String(32)) journal = db.Column(db.String(255)) type = db.Column(db.String(50)) language = db.Column(db.String(50)) published_online = db.Column(db.Date) # or DateTime/String status = db.Column(db.String(10)) # 'Pending','Done','Failed' file_path = db.Column(db.Text) error_msg = db.Column(db.Text) created_at = db.Column(db.DateTime, default=db.func.current_timestamp()) updated_at = db.Column( db.DateTime, default=db.func.current_timestamp(), onupdate=db.func.current_timestamp(), ) class ScheduleConfig(db.Model): hour = db.Column(db.Integer, primary_key=True) # 0-23 weight = db.Column(db.Float) # weight class VolumeConfig(db.Model): id = db.Column(db.Integer, primary_key=True) volume = db.Column(db.Float) # volume of papers to scrape per day def init_schedule_config(): """Initialize ScheduleConfig with default values if empty""" if ScheduleConfig.query.count() == 0: # Default schedule: Lower volume during business hours, # higher volume at night default_schedule = [ # Night hours (higher volume) *[(hour, 0.1) for hour in range(0, 6)], # Morning hours (low volume) *[(hour, 0.5) for hour in range(6, 7)], *[(hour, 0.7) for hour in range(8, 9)], # Business hours (very low volume) *[(hour, 1) for hour in range(9, 17)], # Evening hours (medium volume) *[(hour, 0.5) for hour in range(17, 21)], # Late evening (high volume) *[(hour, 0.2) for hour in range(21, 24)], ] for hour, weight in default_schedule: config = ScheduleConfig(hour=hour, weight=weight) db.session.add(config) db.session.commit() if VolumeConfig.query.count() == 0: # Default volume configuration default_volume = VolumeConfig(volume=100) db.session.add(default_volume) db.session.commit()