53 lines
2.0 KiB
Python

from .db import db
class Paper(db.Model):
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.Text)
doi = db.Column(db.String, unique=True, index=True)
alt_id = db.Column(db.String)
issn = db.Column(db.String(32))
type = db.Column(db.String(50))
language = db.Column(db.String(50))
published_date = db.Column(db.Date) # or DateTime/String
status = db.Column(db.String(10)) # 'Pending','Done','Failed'
file_path = db.Column(db.Text)
error_msg = db.Column(db.Text)
# plus maybe timestamps for created/updated
class ScheduleConfig(db.Model):
hour = db.Column(db.Integer, primary_key=True) # 0-23
weight = db.Column(db.Float) # weight
class VolumeConfig(db.Model):
id = db.Column(db.Integer, primary_key=True)
volume = db.Column(db.Float) # volume of papers to scrape per day
def init_schedule_config():
"""Initialize ScheduleConfig with default values if empty"""
if ScheduleConfig.query.count() == 0:
# Default schedule: Lower volume during business hours,
# higher volume at night
default_schedule = [
# Night hours (higher volume)
*[(hour, 1.0) for hour in range(0, 6)],
# Morning hours (low volume)
*[(hour, 0.3) for hour in range(6, 9)],
# Business hours (very low volume)
*[(hour, 0.2) for hour in range(9, 17)],
# Evening hours (medium volume)
*[(hour, 0.5) for hour in range(17, 21)],
# Late evening (high volume)
*[(hour, 0.8) for hour in range(21, 24)]
]
for hour, weight in default_schedule:
config = ScheduleConfig(hour=hour, weight=weight)
db.session.add(config)
db.session.commit()
if VolumeConfig.query.count() == 0:
# Default volume configuration
default_volume = VolumeConfig(volume=100)
db.session.add(default_volume)
db.session.commit()