adds cache management
This commit is contained in:
parent
987c76969b
commit
36ba835980
@ -5,7 +5,10 @@ from ..db import db
|
||||
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
|
||||
from ..defaults import MAX_VOLUME
|
||||
import os # Import os for path validation
|
||||
import sys
|
||||
from scipaperloader.scrapers import __path__ as scrapers_path
|
||||
# Import the cache invalidation function from our new module
|
||||
from ..cache_utils import invalidate_hourly_quota_cache
|
||||
|
||||
bp = Blueprint("config", __name__, url_prefix="/config")
|
||||
|
||||
@ -166,6 +169,17 @@ def _update_schedule(schedule_data):
|
||||
)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Invalidate hourly quota cache using the cache_utils module
|
||||
try:
|
||||
invalidate_hourly_quota_cache()
|
||||
except Exception as e:
|
||||
# Log the error but don't fail the update
|
||||
ActivityLog.log_error(
|
||||
error_message=f"Error invalidating hourly quota cache: {str(e)}",
|
||||
source="_update_schedule"
|
||||
)
|
||||
|
||||
return True, "Schedule updated successfully!"
|
||||
|
||||
except Exception as e:
|
||||
|
@ -10,6 +10,7 @@ from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory,
|
||||
from ..db import db
|
||||
from ..celery import celery
|
||||
from ..defaults import MAX_VOLUME
|
||||
from ..cache_utils import get_cached_hourly_quota, invalidate_hourly_quota_cache
|
||||
from celery.schedules import crontab
|
||||
from sqlalchemy import func
|
||||
from scipaperloader.scrapers.factory import get_scraper, get_available_scrapers
|
||||
@ -360,6 +361,9 @@ def update_config():
|
||||
description="Updated scraper volume"
|
||||
)
|
||||
|
||||
# Invalidate hourly quota cache when volume changes
|
||||
invalidate_hourly_quota_cache()
|
||||
|
||||
db.session.commit()
|
||||
except (ValueError, TypeError):
|
||||
return jsonify({
|
||||
@ -441,7 +445,8 @@ def dummy_scheduled_scraper():
|
||||
)
|
||||
return False # Stop if not active/paused
|
||||
|
||||
papers_to_select = calculate_papers_for_current_hour()
|
||||
# Use cached hourly quota instead of calculating each time
|
||||
papers_to_select = get_cached_hourly_quota(calculate_papers_for_current_hour)
|
||||
|
||||
if papers_to_select <= 0:
|
||||
ActivityLog.log_scraper_activity(
|
||||
@ -463,11 +468,18 @@ def dummy_scheduled_scraper():
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="dummy_scheduled_scraper_info",
|
||||
status="info",
|
||||
description="No 'New' papers found in the database to select."
|
||||
description="No 'New' papers found in the database. Stopping scraper."
|
||||
)
|
||||
# Optional: Depending on requirements, you might want to check later
|
||||
# or handle this case differently. For now, we just log and exit.
|
||||
return True
|
||||
|
||||
# Stop the scraper since there are no more papers to process
|
||||
ScraperState.set_active(False)
|
||||
ActivityLog.log_scraper_command(
|
||||
action="auto_stop_scraper",
|
||||
status="success",
|
||||
description="Scraper automatically stopped due to no 'New' papers left to process."
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
selected_paper_ids = [p.id for p in new_papers]
|
||||
|
||||
|
61
scipaperloader/cache_utils.py
Normal file
61
scipaperloader/cache_utils.py
Normal file
@ -0,0 +1,61 @@
|
||||
"""
|
||||
Utility module for cache management in the SciPaperLoader application.
|
||||
This module contains functions for managing the hourly quota cache and other caching mechanisms.
|
||||
"""
|
||||
from datetime import datetime
|
||||
from .models import ActivityLog
|
||||
|
||||
# Global cache for hourly quota
|
||||
HOURLY_QUOTA_CACHE = {
|
||||
'hour': None, # Current hour
|
||||
'quota': None, # Calculated quota
|
||||
'last_config_update': None, # Last time volume or schedule config was updated
|
||||
}
|
||||
|
||||
def invalidate_hourly_quota_cache():
|
||||
"""Invalidate the hourly quota cache when configuration changes."""
|
||||
global HOURLY_QUOTA_CACHE
|
||||
HOURLY_QUOTA_CACHE['last_config_update'] = None
|
||||
|
||||
# Log the cache invalidation
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="cache_invalidated",
|
||||
status="info",
|
||||
description="Hourly quota cache was invalidated due to configuration changes"
|
||||
)
|
||||
|
||||
def get_cached_hourly_quota(calculate_function):
|
||||
"""
|
||||
Get the cached hourly quota if it's still valid, or recalculate if needed.
|
||||
|
||||
Args:
|
||||
calculate_function: Function to call when recalculation is needed
|
||||
|
||||
Returns:
|
||||
int: Number of papers to download this hour
|
||||
"""
|
||||
global HOURLY_QUOTA_CACHE
|
||||
current_hour = datetime.now().hour
|
||||
|
||||
# Check if we need to recalculate
|
||||
if (HOURLY_QUOTA_CACHE['hour'] != current_hour or
|
||||
HOURLY_QUOTA_CACHE['quota'] is None or
|
||||
HOURLY_QUOTA_CACHE['last_config_update'] is None):
|
||||
|
||||
# Recalculate and update cache
|
||||
quota = calculate_function()
|
||||
HOURLY_QUOTA_CACHE['hour'] = current_hour
|
||||
HOURLY_QUOTA_CACHE['quota'] = quota
|
||||
HOURLY_QUOTA_CACHE['last_config_update'] = datetime.now()
|
||||
|
||||
# Log cache update
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="cache_updated",
|
||||
status="info",
|
||||
description=f"Hourly quota cache updated for hour {current_hour}: {quota} papers"
|
||||
)
|
||||
|
||||
return quota
|
||||
else:
|
||||
# Use cached value
|
||||
return HOURLY_QUOTA_CACHE['quota']
|
Loading…
x
Reference in New Issue
Block a user