adds cache management
This commit is contained in:
parent
987c76969b
commit
36ba835980
@ -5,7 +5,10 @@ from ..db import db
|
|||||||
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
|
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
|
||||||
from ..defaults import MAX_VOLUME
|
from ..defaults import MAX_VOLUME
|
||||||
import os # Import os for path validation
|
import os # Import os for path validation
|
||||||
|
import sys
|
||||||
from scipaperloader.scrapers import __path__ as scrapers_path
|
from scipaperloader.scrapers import __path__ as scrapers_path
|
||||||
|
# Import the cache invalidation function from our new module
|
||||||
|
from ..cache_utils import invalidate_hourly_quota_cache
|
||||||
|
|
||||||
bp = Blueprint("config", __name__, url_prefix="/config")
|
bp = Blueprint("config", __name__, url_prefix="/config")
|
||||||
|
|
||||||
@ -166,6 +169,17 @@ def _update_schedule(schedule_data):
|
|||||||
)
|
)
|
||||||
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
# Invalidate hourly quota cache using the cache_utils module
|
||||||
|
try:
|
||||||
|
invalidate_hourly_quota_cache()
|
||||||
|
except Exception as e:
|
||||||
|
# Log the error but don't fail the update
|
||||||
|
ActivityLog.log_error(
|
||||||
|
error_message=f"Error invalidating hourly quota cache: {str(e)}",
|
||||||
|
source="_update_schedule"
|
||||||
|
)
|
||||||
|
|
||||||
return True, "Schedule updated successfully!"
|
return True, "Schedule updated successfully!"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -10,6 +10,7 @@ from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory,
|
|||||||
from ..db import db
|
from ..db import db
|
||||||
from ..celery import celery
|
from ..celery import celery
|
||||||
from ..defaults import MAX_VOLUME
|
from ..defaults import MAX_VOLUME
|
||||||
|
from ..cache_utils import get_cached_hourly_quota, invalidate_hourly_quota_cache
|
||||||
from celery.schedules import crontab
|
from celery.schedules import crontab
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from scipaperloader.scrapers.factory import get_scraper, get_available_scrapers
|
from scipaperloader.scrapers.factory import get_scraper, get_available_scrapers
|
||||||
@ -360,6 +361,9 @@ def update_config():
|
|||||||
description="Updated scraper volume"
|
description="Updated scraper volume"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Invalidate hourly quota cache when volume changes
|
||||||
|
invalidate_hourly_quota_cache()
|
||||||
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
return jsonify({
|
return jsonify({
|
||||||
@ -441,7 +445,8 @@ def dummy_scheduled_scraper():
|
|||||||
)
|
)
|
||||||
return False # Stop if not active/paused
|
return False # Stop if not active/paused
|
||||||
|
|
||||||
papers_to_select = calculate_papers_for_current_hour()
|
# Use cached hourly quota instead of calculating each time
|
||||||
|
papers_to_select = get_cached_hourly_quota(calculate_papers_for_current_hour)
|
||||||
|
|
||||||
if papers_to_select <= 0:
|
if papers_to_select <= 0:
|
||||||
ActivityLog.log_scraper_activity(
|
ActivityLog.log_scraper_activity(
|
||||||
@ -463,11 +468,18 @@ def dummy_scheduled_scraper():
|
|||||||
ActivityLog.log_scraper_activity(
|
ActivityLog.log_scraper_activity(
|
||||||
action="dummy_scheduled_scraper_info",
|
action="dummy_scheduled_scraper_info",
|
||||||
status="info",
|
status="info",
|
||||||
description="No 'New' papers found in the database to select."
|
description="No 'New' papers found in the database. Stopping scraper."
|
||||||
)
|
)
|
||||||
# Optional: Depending on requirements, you might want to check later
|
|
||||||
# or handle this case differently. For now, we just log and exit.
|
# Stop the scraper since there are no more papers to process
|
||||||
return True
|
ScraperState.set_active(False)
|
||||||
|
ActivityLog.log_scraper_command(
|
||||||
|
action="auto_stop_scraper",
|
||||||
|
status="success",
|
||||||
|
description="Scraper automatically stopped due to no 'New' papers left to process."
|
||||||
|
)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
selected_paper_ids = [p.id for p in new_papers]
|
selected_paper_ids = [p.id for p in new_papers]
|
||||||
|
|
||||||
|
61
scipaperloader/cache_utils.py
Normal file
61
scipaperloader/cache_utils.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
"""
|
||||||
|
Utility module for cache management in the SciPaperLoader application.
|
||||||
|
This module contains functions for managing the hourly quota cache and other caching mechanisms.
|
||||||
|
"""
|
||||||
|
from datetime import datetime
|
||||||
|
from .models import ActivityLog
|
||||||
|
|
||||||
|
# Global cache for hourly quota
|
||||||
|
HOURLY_QUOTA_CACHE = {
|
||||||
|
'hour': None, # Current hour
|
||||||
|
'quota': None, # Calculated quota
|
||||||
|
'last_config_update': None, # Last time volume or schedule config was updated
|
||||||
|
}
|
||||||
|
|
||||||
|
def invalidate_hourly_quota_cache():
|
||||||
|
"""Invalidate the hourly quota cache when configuration changes."""
|
||||||
|
global HOURLY_QUOTA_CACHE
|
||||||
|
HOURLY_QUOTA_CACHE['last_config_update'] = None
|
||||||
|
|
||||||
|
# Log the cache invalidation
|
||||||
|
ActivityLog.log_scraper_activity(
|
||||||
|
action="cache_invalidated",
|
||||||
|
status="info",
|
||||||
|
description="Hourly quota cache was invalidated due to configuration changes"
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_cached_hourly_quota(calculate_function):
|
||||||
|
"""
|
||||||
|
Get the cached hourly quota if it's still valid, or recalculate if needed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
calculate_function: Function to call when recalculation is needed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: Number of papers to download this hour
|
||||||
|
"""
|
||||||
|
global HOURLY_QUOTA_CACHE
|
||||||
|
current_hour = datetime.now().hour
|
||||||
|
|
||||||
|
# Check if we need to recalculate
|
||||||
|
if (HOURLY_QUOTA_CACHE['hour'] != current_hour or
|
||||||
|
HOURLY_QUOTA_CACHE['quota'] is None or
|
||||||
|
HOURLY_QUOTA_CACHE['last_config_update'] is None):
|
||||||
|
|
||||||
|
# Recalculate and update cache
|
||||||
|
quota = calculate_function()
|
||||||
|
HOURLY_QUOTA_CACHE['hour'] = current_hour
|
||||||
|
HOURLY_QUOTA_CACHE['quota'] = quota
|
||||||
|
HOURLY_QUOTA_CACHE['last_config_update'] = datetime.now()
|
||||||
|
|
||||||
|
# Log cache update
|
||||||
|
ActivityLog.log_scraper_activity(
|
||||||
|
action="cache_updated",
|
||||||
|
status="info",
|
||||||
|
description=f"Hourly quota cache updated for hour {current_hour}: {quota} papers"
|
||||||
|
)
|
||||||
|
|
||||||
|
return quota
|
||||||
|
else:
|
||||||
|
# Use cached value
|
||||||
|
return HOURLY_QUOTA_CACHE['quota']
|
Loading…
x
Reference in New Issue
Block a user