53 lines
2.0 KiB
Python
53 lines
2.0 KiB
Python
from .db import db
|
|
|
|
class Paper(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
title = db.Column(db.Text)
|
|
doi = db.Column(db.String, unique=True, index=True)
|
|
alt_id = db.Column(db.String)
|
|
issn = db.Column(db.String(32))
|
|
type = db.Column(db.String(50))
|
|
language = db.Column(db.String(50))
|
|
published_date = db.Column(db.Date) # or DateTime/String
|
|
status = db.Column(db.String(10)) # 'Pending','Done','Failed'
|
|
file_path = db.Column(db.Text)
|
|
error_msg = db.Column(db.Text)
|
|
# plus maybe timestamps for created/updated
|
|
|
|
class ScheduleConfig(db.Model):
|
|
hour = db.Column(db.Integer, primary_key=True) # 0-23
|
|
weight = db.Column(db.Float) # weight
|
|
|
|
class VolumeConfig(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
volume = db.Column(db.Float) # volume of papers to scrape per day
|
|
|
|
def init_schedule_config():
|
|
"""Initialize ScheduleConfig with default values if empty"""
|
|
if ScheduleConfig.query.count() == 0:
|
|
# Default schedule: Lower volume during business hours,
|
|
# higher volume at night
|
|
default_schedule = [
|
|
# Night hours (higher volume)
|
|
*[(hour, 1.0) for hour in range(0, 6)],
|
|
# Morning hours (low volume)
|
|
*[(hour, 0.3) for hour in range(6, 9)],
|
|
# Business hours (very low volume)
|
|
*[(hour, 0.2) for hour in range(9, 17)],
|
|
# Evening hours (medium volume)
|
|
*[(hour, 0.5) for hour in range(17, 21)],
|
|
# Late evening (high volume)
|
|
*[(hour, 0.8) for hour in range(21, 24)]
|
|
]
|
|
|
|
for hour, weight in default_schedule:
|
|
config = ScheduleConfig(hour=hour, weight=weight)
|
|
db.session.add(config)
|
|
|
|
db.session.commit()
|
|
|
|
if VolumeConfig.query.count() == 0:
|
|
# Default volume configuration
|
|
default_volume = VolumeConfig(volume=100)
|
|
db.session.add(default_volume)
|
|
db.session.commit() |