228 lines
7.4 KiB
Python
228 lines
7.4 KiB
Python
from .db import db
|
|
|
|
import json
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
|
|
|
|
class ActivityCategory(Enum):
|
|
"""Categories for activity logs."""
|
|
GUI_INTERACTION = "gui_interaction"
|
|
CONFIG_CHANGE = "config_change"
|
|
SCRAPER_COMMAND = "scraper_command"
|
|
SCRAPER_ACTIVITY = "scraper_activity"
|
|
SYSTEM = "system"
|
|
|
|
|
|
class ErrorSeverity(Enum):
|
|
"""Severity levels for error logging."""
|
|
DEBUG = "debug"
|
|
INFO = "info"
|
|
WARNING = "warning"
|
|
ERROR = "error"
|
|
CRITICAL = "critical"
|
|
|
|
|
|
class ActivityLog(db.Model):
|
|
"""Model for logging various activities in the application."""
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
timestamp = db.Column(db.DateTime, default=datetime.utcnow, index=True)
|
|
category = db.Column(db.String(50), nullable=False, index=True)
|
|
action = db.Column(db.String(100), nullable=False)
|
|
description = db.Column(db.Text)
|
|
|
|
# Reference to related entities (optional)
|
|
paper_id = db.Column(db.Integer, db.ForeignKey('paper_metadata.id'), nullable=True)
|
|
user_id = db.Column(db.Integer, nullable=True) # For future authentication
|
|
|
|
# For config changes
|
|
config_key = db.Column(db.String(100), nullable=True)
|
|
old_value = db.Column(db.Text, nullable=True)
|
|
new_value = db.Column(db.Text, nullable=True)
|
|
|
|
# For scraper activities
|
|
status = db.Column(db.String(50), nullable=True)
|
|
source_ip = db.Column(db.String(50), nullable=True)
|
|
|
|
# Extra data as JSON
|
|
extra_data = db.Column(db.Text, nullable=True)
|
|
|
|
def set_extra_data(self, data_dict):
|
|
"""Serialize extra data as JSON string."""
|
|
if data_dict:
|
|
self.extra_data = json.dumps(data_dict)
|
|
|
|
def get_extra_data(self):
|
|
"""Deserialize JSON string to dictionary."""
|
|
if self.extra_data:
|
|
return json.loads(self.extra_data)
|
|
return {}
|
|
|
|
@classmethod
|
|
def log_gui_interaction(cls, action, description=None, paper_id=None, user_id=None, **extra):
|
|
"""Log a GUI interaction."""
|
|
log = cls(
|
|
category=ActivityCategory.GUI_INTERACTION.value,
|
|
action=action,
|
|
description=description,
|
|
paper_id=paper_id,
|
|
user_id=user_id
|
|
)
|
|
log.set_extra_data(extra)
|
|
db.session.add(log)
|
|
db.session.commit()
|
|
return log
|
|
|
|
@classmethod
|
|
def log_config_change(cls, config_key, old_value, new_value, user_id=None, **extra):
|
|
"""Log a configuration change."""
|
|
log = cls(
|
|
category=ActivityCategory.CONFIG_CHANGE.value,
|
|
action=f"Changed {config_key}",
|
|
config_key=config_key,
|
|
old_value=str(old_value),
|
|
new_value=str(new_value),
|
|
user_id=user_id
|
|
)
|
|
log.set_extra_data(extra)
|
|
db.session.add(log)
|
|
db.session.commit()
|
|
return log
|
|
|
|
@classmethod
|
|
def log_scraper_command(cls, action, status=None, user_id=None, **extra):
|
|
"""Log a scraper command (start/stop/pause)."""
|
|
log = cls(
|
|
category=ActivityCategory.SCRAPER_COMMAND.value,
|
|
action=action,
|
|
status=status,
|
|
user_id=user_id
|
|
)
|
|
log.set_extra_data(extra)
|
|
db.session.add(log)
|
|
db.session.commit()
|
|
return log
|
|
|
|
@classmethod
|
|
def log_scraper_activity(cls, action, paper_id=None, status=None, description=None, **extra):
|
|
"""Log a scraper activity (downloading, processing papers, etc.)."""
|
|
log = cls(
|
|
category=ActivityCategory.SCRAPER_ACTIVITY.value,
|
|
action=action,
|
|
paper_id=paper_id,
|
|
status=status,
|
|
description=description
|
|
)
|
|
log.set_extra_data(extra)
|
|
db.session.add(log)
|
|
db.session.commit()
|
|
return log
|
|
|
|
@classmethod
|
|
def log_error(cls, error_message, exception=None, severity=ErrorSeverity.ERROR.value,
|
|
source=None, paper_id=None, user_id=None, **extra):
|
|
"""Log system errors or warnings.
|
|
|
|
Args:
|
|
error_message: Brief description of the error
|
|
exception: The exception object if available
|
|
severity: Error severity level (debug, info, warning, error, critical)
|
|
source: Component/module where the error occurred
|
|
paper_id: Related paper ID if applicable
|
|
user_id: Related user ID if applicable
|
|
**extra: Any additional data to store
|
|
"""
|
|
details = {}
|
|
|
|
if exception:
|
|
details.update({
|
|
'exception_type': type(exception).__name__,
|
|
'exception_message': str(exception)
|
|
})
|
|
|
|
# Get traceback if available
|
|
import traceback
|
|
details['traceback'] = traceback.format_exc()
|
|
|
|
if source:
|
|
extra['source'] = source
|
|
|
|
log = cls(
|
|
category=ActivityCategory.SYSTEM.value,
|
|
action=f"{severity.upper()}: {error_message}"[:100], # Limit action length
|
|
description=error_message,
|
|
paper_id=paper_id,
|
|
user_id=user_id,
|
|
status=severity
|
|
)
|
|
|
|
# Add exception details to extra data
|
|
extra.update(details)
|
|
log.set_extra_data(extra)
|
|
|
|
db.session.add(log)
|
|
db.session.commit()
|
|
return log
|
|
|
|
|
|
class PaperMetadata(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
title = db.Column(db.Text)
|
|
doi = db.Column(db.String, unique=True, index=True)
|
|
alt_id = db.Column(db.String)
|
|
issn = db.Column(db.String(32))
|
|
type = db.Column(db.String(50))
|
|
language = db.Column(db.String(50))
|
|
published_online = db.Column(db.Date) # or DateTime/String
|
|
status = db.Column(db.String(10)) # 'Pending','Done','Failed'
|
|
file_path = db.Column(db.Text)
|
|
error_msg = db.Column(db.Text)
|
|
created_at = db.Column(db.DateTime, default=db.func.current_timestamp())
|
|
updated_at = db.Column(
|
|
db.DateTime,
|
|
default=db.func.current_timestamp(),
|
|
onupdate=db.func.current_timestamp(),
|
|
)
|
|
|
|
|
|
class ScheduleConfig(db.Model):
|
|
hour = db.Column(db.Integer, primary_key=True) # 0-23
|
|
weight = db.Column(db.Float) # weight
|
|
|
|
|
|
class VolumeConfig(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
volume = db.Column(db.Float) # volume of papers to scrape per day
|
|
|
|
|
|
def init_schedule_config():
|
|
"""Initialize ScheduleConfig with default values if empty"""
|
|
if ScheduleConfig.query.count() == 0:
|
|
# Default schedule: Lower volume during business hours,
|
|
# higher volume at night
|
|
default_schedule = [
|
|
# Night hours (higher volume)
|
|
*[(hour, 0.1) for hour in range(0, 6)],
|
|
# Morning hours (low volume)
|
|
*[(hour, 0.5) for hour in range(6, 7)],
|
|
*[(hour, 0.7) for hour in range(8, 9)],
|
|
# Business hours (very low volume)
|
|
*[(hour, 1) for hour in range(9, 17)],
|
|
# Evening hours (medium volume)
|
|
*[(hour, 0.5) for hour in range(17, 21)],
|
|
# Late evening (high volume)
|
|
*[(hour, 0.2) for hour in range(21, 24)],
|
|
]
|
|
|
|
for hour, weight in default_schedule:
|
|
config = ScheduleConfig(hour=hour, weight=weight)
|
|
db.session.add(config)
|
|
|
|
db.session.commit()
|
|
|
|
if VolumeConfig.query.count() == 0:
|
|
# Default volume configuration
|
|
default_volume = VolumeConfig(volume=100)
|
|
db.session.add(default_volume)
|
|
db.session.commit()
|