131 lines
4.9 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Quick fix script to stop all running scraper tasks using APScheduler.
This ensures all scheduled tasks are properly terminated.
"""
import os
import sys
import signal
import subprocess
import time
from datetime import datetime, UTC
# Add project root to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
def stop_apscheduler_jobs():
"""Stop all APScheduler jobs through the Flask app"""
print("Stopping APScheduler jobs...")
try:
from scipaperloader import create_app
app = create_app()
with app.app_context():
scheduler = app.config.get('SCHEDULER')
if scheduler:
revoked_count = scheduler.revoke_all_scraper_jobs()
print(f"✓ Revoked {revoked_count} APScheduler jobs")
else:
print("❌ APScheduler not found in app config")
except Exception as e:
print(f"⚠ Error stopping APScheduler jobs: {e}")
def kill_python_processes():
"""Kill any running Python processes that might be Flask/APScheduler workers"""
print("Checking for running Flask/APScheduler processes...")
try:
# Look for Flask processes
result = subprocess.run(['pgrep', '-f', 'flask'], capture_output=True, text=True)
if result.returncode == 0:
pids = result.stdout.strip().split('\n')
for pid in pids:
if pid:
try:
# Check if this is our process before killing
cmdline_result = subprocess.run(['ps', '-p', pid, '-o', 'cmd='], capture_output=True, text=True)
if 'scipaperloader' in cmdline_result.stdout:
os.kill(int(pid), signal.SIGTERM)
print(f" Killed Flask process {pid}")
except (ProcessLookupError, ValueError):
pass # Process already dead or invalid PID
# Wait a moment for graceful shutdown
time.sleep(2)
else:
print("✓ No Flask processes found")
except Exception as e:
print(f"⚠ Error checking processes: {e}")
def stop_scraper_state():
"""Set scraper state to inactive using Flask app context"""
try:
from scipaperloader import create_app
from scipaperloader.models import ScraperState, PaperMetadata
from scipaperloader.db import db
from scipaperloader.scrapers.factory import get_scraper
app = create_app()
with app.app_context():
# Set scraper to inactive
ScraperState.set_active(False)
ScraperState.set_paused(False)
print("✓ Set scraper state to inactive")
# Get scraper configuration for proper status reversion
scraper = get_scraper()
input_statuses = scraper.get_input_statuses()
output_statuses = scraper.get_output_statuses()
processing_status = output_statuses.get("processing", "Processing")
# Revert any papers in processing status
processing_papers = PaperMetadata.query.filter_by(status=processing_status).all()
reverted_count = 0
if processing_papers and input_statuses:
revert_status = input_statuses[0] # Use first input status as default
for paper in processing_papers:
# Try to use previous_status if available, otherwise use first input status
if hasattr(paper, 'previous_status') and paper.previous_status:
paper.status = paper.previous_status
else:
paper.status = revert_status
paper.updated_at = datetime.now(UTC)
reverted_count += 1
db.session.commit()
print(f"✓ Reverted {reverted_count} papers from '{processing_status}' to previous status")
else:
print("✓ No papers in processing status to revert")
except Exception as e:
print(f"⚠ Error setting scraper state: {e}")
def main():
print("=== QUICK SCRAPER FIX (APScheduler) ===")
print(f"Time: {datetime.now()}")
print()
# Step 1: Stop scraper state and revert papers
stop_scraper_state()
# Step 2: Stop all APScheduler jobs
stop_apscheduler_jobs()
# Step 3: Kill any running Flask processes
kill_python_processes()
print()
print("=== FIX COMPLETE ===")
print("The scraper has been stopped and all tasks terminated.")
print("You can now restart the application with:")
print(" make run")
print("or")
print(" python -m flask --app scipaperloader run")
if __name__ == "__main__":
main()