2025-04-16 22:03:17 +02:00

316 lines
10 KiB
Python

from .db import db
import json
from datetime import datetime
from enum import Enum
class ActivityCategory(Enum):
"""Categories for activity logs."""
GUI_INTERACTION = "gui_interaction"
CONFIG_CHANGE = "config_change"
SCRAPER_COMMAND = "scraper_command"
SCRAPER_ACTIVITY = "scraper_activity"
SYSTEM = "system"
DATA_IMPORT = "data_import"
class ErrorSeverity(Enum):
"""Severity levels for error logging."""
DEBUG = "debug"
INFO = "info"
WARNING = "warning"
ERROR = "error"
CRITICAL = "critical"
class ActivityLog(db.Model):
"""Model for logging various activities in the application."""
id = db.Column(db.Integer, primary_key=True)
timestamp = db.Column(db.DateTime, default=datetime.utcnow, index=True)
category = db.Column(db.String(50), nullable=False, index=True)
action = db.Column(db.String(100), nullable=False)
description = db.Column(db.Text)
# Reference to related entities (optional)
paper_id = db.Column(db.Integer, db.ForeignKey('paper_metadata.id'), nullable=True)
user_id = db.Column(db.Integer, nullable=True) # For future authentication
# For config changes
config_key = db.Column(db.String(100), nullable=True)
old_value = db.Column(db.Text, nullable=True)
new_value = db.Column(db.Text, nullable=True)
# For scraper activities
status = db.Column(db.String(50), nullable=True)
source_ip = db.Column(db.String(50), nullable=True)
# Extra data as JSON
extra_data = db.Column(db.Text, nullable=True)
def set_extra_data(self, data_dict):
"""Serialize extra data as JSON string."""
if data_dict:
self.extra_data = json.dumps(data_dict)
def get_extra_data(self):
"""Deserialize JSON string to dictionary."""
if self.extra_data:
return json.loads(self.extra_data)
return {}
@classmethod
def log_gui_interaction(cls, action, description=None, paper_id=None, user_id=None, **extra):
"""Log a GUI interaction."""
log = cls(
category=ActivityCategory.GUI_INTERACTION.value,
action=action,
description=description,
paper_id=paper_id,
user_id=user_id
)
log.set_extra_data(extra)
db.session.add(log)
db.session.commit()
return log
@classmethod
def log_config_change(cls, config_key, old_value, new_value, user_id=None, **extra):
"""Log a configuration change."""
log = cls(
category=ActivityCategory.CONFIG_CHANGE.value,
action=f"Changed {config_key}",
config_key=config_key,
old_value=str(old_value),
new_value=str(new_value),
user_id=user_id
)
log.set_extra_data(extra)
db.session.add(log)
db.session.commit()
return log
@classmethod
def log_scraper_command(cls, action, status=None, user_id=None, **extra):
"""Log a scraper command (start/stop/pause)."""
log = cls(
category=ActivityCategory.SCRAPER_COMMAND.value,
action=action,
status=status,
user_id=user_id
)
log.set_extra_data(extra)
db.session.add(log)
db.session.commit()
return log
@classmethod
def log_scraper_activity(cls, action, paper_id=None, status=None, description=None, **extra):
"""Log a scraper activity (downloading, processing papers, etc.)."""
log = cls(
category=ActivityCategory.SCRAPER_ACTIVITY.value,
action=action,
paper_id=paper_id,
status=status,
description=description
)
log.set_extra_data(extra)
db.session.add(log)
db.session.commit()
return log
@classmethod
def log_error(cls, error_message, exception=None, severity=ErrorSeverity.ERROR.value,
source=None, paper_id=None, user_id=None, **extra):
"""Log system errors or warnings.
Args:
error_message: Brief description of the error
exception: The exception object if available
severity: Error severity level (debug, info, warning, error, critical)
source: Component/module where the error occurred
paper_id: Related paper ID if applicable
user_id: Related user ID if applicable
**extra: Any additional data to store
"""
details = {}
if exception:
details.update({
'exception_type': type(exception).__name__,
'exception_message': str(exception)
})
# Get traceback if available
import traceback
details['traceback'] = traceback.format_exc()
if source:
extra['source'] = source
log = cls(
category=ActivityCategory.SYSTEM.value,
action=f"{severity.upper()}: {error_message}"[:100], # Limit action length
description=error_message,
paper_id=paper_id,
user_id=user_id,
status=severity
)
# Add exception details to extra data
extra.update(details)
log.set_extra_data(extra)
db.session.add(log)
db.session.commit()
return log
@classmethod
def log_import_activity(cls, action, status=None, description=None, user_id=None, **extra):
"""Log data import activities (CSV uploads, bulk imports, etc.)."""
log = cls(
category=ActivityCategory.DATA_IMPORT.value,
action=action,
status=status,
description=description,
user_id=user_id
)
log.set_extra_data(extra)
db.session.add(log)
db.session.commit()
return log
class PaperMetadata(db.Model):
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.Text)
doi = db.Column(db.String, unique=True, index=True)
alt_id = db.Column(db.String)
issn = db.Column(db.String(32))
journal = db.Column(db.String(255))
type = db.Column(db.String(50))
language = db.Column(db.String(50))
published_online = db.Column(db.Date) # or DateTime/String
status = db.Column(db.String(10)) # 'Pending','Done','Failed'
file_path = db.Column(db.Text)
error_msg = db.Column(db.Text)
created_at = db.Column(db.DateTime, default=db.func.current_timestamp())
updated_at = db.Column(
db.DateTime,
default=db.func.current_timestamp(),
onupdate=db.func.current_timestamp(),
)
class ScheduleConfig(db.Model):
hour = db.Column(db.Integer, primary_key=True) # 0-23
weight = db.Column(db.Float) # weight
class VolumeConfig(db.Model):
id = db.Column(db.Integer, primary_key=True)
volume = db.Column(db.Float) # volume of papers to scrape per day
class DownloadPathConfig(db.Model):
"""Model to store the base path for downloaded files."""
id = db.Column(db.Integer, primary_key=True)
path = db.Column(db.String(255), default="/path/to/dummy/papers") # Default path
@classmethod
def get_path(cls):
"""Get the configured download path, creating default if needed."""
config = cls.query.first()
if not config:
config = cls(path="/path/to/dummy/papers") # Ensure default exists
db.session.add(config)
db.session.commit()
return config.path
@classmethod
def set_path(cls, new_path):
"""Set the download path."""
config = cls.query.first()
if not config:
config = cls(path=new_path)
db.session.add(config)
else:
config.path = new_path
db.session.commit()
return config
class ScraperState(db.Model):
"""Model to store the current state of the scraper."""
id = db.Column(db.Integer, primary_key=True)
is_active = db.Column(db.Boolean, default=False)
is_paused = db.Column(db.Boolean, default=False)
last_updated = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
@classmethod
def get_current_state(cls):
"""Get the current scraper state, creating it if it doesn't exist."""
state = cls.query.first()
if not state:
state = cls(is_active=False, is_paused=False)
db.session.add(state)
db.session.commit()
return state
@classmethod
def set_active(cls, active):
"""Set the active state of the scraper."""
state = cls.get_current_state()
state.is_active = active
db.session.commit()
return state
@classmethod
def set_paused(cls, paused):
"""Set the paused state of the scraper."""
state = cls.get_current_state()
state.is_paused = paused
db.session.commit()
return state
@classmethod
def is_scraper_active(cls):
"""Check if the scraper is active."""
state = cls.get_current_state()
return state.is_active and not state.is_paused
def init_schedule_config():
"""Initialize ScheduleConfig with default values if empty"""
if ScheduleConfig.query.count() == 0:
# Default schedule: Lower volume during business hours,
# higher volume at night
default_schedule = [
# Night hours (higher volume)
*[(hour, 0.1) for hour in range(0, 6)],
# Morning hours (low volume)
*[(hour, 0.5) for hour in range(6, 7)],
*[(hour, 0.7) for hour in range(8, 9)],
# Business hours (very low volume)
*[(hour, 1) for hour in range(9, 17)],
# Evening hours (medium volume)
*[(hour, 0.5) for hour in range(17, 21)],
# Late evening (high volume)
*[(hour, 0.2) for hour in range(21, 24)],
]
for hour, weight in default_schedule:
config = ScheduleConfig(hour=hour, weight=weight)
db.session.add(config)
db.session.commit()
if VolumeConfig.query.count() == 0:
# Default volume configuration
default_volume = VolumeConfig(volume=100)
db.session.add(default_volume)
db.session.commit()
# Initialize DownloadPathConfig if it doesn't exist
if DownloadPathConfig.query.count() == 0:
default_path = DownloadPathConfig(path="/path/to/dummy/papers")
db.session.add(default_path)
db.session.commit()