from .db import db class PaperMetadata(db.Model): id = db.Column(db.Integer, primary_key=True) title = db.Column(db.Text) doi = db.Column(db.String, unique=True, index=True) alt_id = db.Column(db.String) issn = db.Column(db.String(32)) type = db.Column(db.String(50)) language = db.Column(db.String(50)) published_online = db.Column(db.Date) # or DateTime/String status = db.Column(db.String(10)) # 'Pending','Done','Failed' file_path = db.Column(db.Text) error_msg = db.Column(db.Text) created_at = db.Column(db.DateTime, default=db.func.current_timestamp()) updated_at = db.Column(db.DateTime, default=db.func.current_timestamp(), onupdate=db.func.current_timestamp()) # plus maybe timestamps for created/updated class ScheduleConfig(db.Model): hour = db.Column(db.Integer, primary_key=True) # 0-23 weight = db.Column(db.Float) # weight class VolumeConfig(db.Model): id = db.Column(db.Integer, primary_key=True) volume = db.Column(db.Float) # volume of papers to scrape per day def init_schedule_config(): """Initialize ScheduleConfig with default values if empty""" if ScheduleConfig.query.count() == 0: # Default schedule: Lower volume during business hours, # higher volume at night default_schedule = [ # Night hours (higher volume) *[(hour, 1.0) for hour in range(0, 6)], # Morning hours (low volume) *[(hour, 0.3) for hour in range(6, 9)], # Business hours (very low volume) *[(hour, 0.2) for hour in range(9, 17)], # Evening hours (medium volume) *[(hour, 0.5) for hour in range(17, 21)], # Late evening (high volume) *[(hour, 0.8) for hour in range(21, 24)] ] for hour, weight in default_schedule: config = ScheduleConfig(hour=hour, weight=weight) db.session.add(config) db.session.commit() if VolumeConfig.query.count() == 0: # Default volume configuration default_volume = VolumeConfig(volume=100) db.session.add(default_volume) db.session.commit()