#!/usr/bin/env python3 """ Quick fix script to stop all running scraper tasks using APScheduler. This ensures all scheduled tasks are properly terminated. """ import os import sys import signal import subprocess import time from datetime import datetime, UTC # Add project root to path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) def stop_apscheduler_jobs(): """Stop all APScheduler jobs through the Flask app""" print("Stopping APScheduler jobs...") try: from scipaperloader import create_app app = create_app() with app.app_context(): scheduler = app.config.get('SCHEDULER') if scheduler: revoked_count = scheduler.revoke_all_scraper_jobs() print(f"✓ Revoked {revoked_count} APScheduler jobs") else: print("❌ APScheduler not found in app config") except Exception as e: print(f"⚠ Error stopping APScheduler jobs: {e}") def kill_python_processes(): """Kill any running Python processes that might be Flask/APScheduler workers""" print("Checking for running Flask/APScheduler processes...") try: # Look for Flask processes result = subprocess.run(['pgrep', '-f', 'flask'], capture_output=True, text=True) if result.returncode == 0: pids = result.stdout.strip().split('\n') for pid in pids: if pid: try: # Check if this is our process before killing cmdline_result = subprocess.run(['ps', '-p', pid, '-o', 'cmd='], capture_output=True, text=True) if 'scipaperloader' in cmdline_result.stdout: os.kill(int(pid), signal.SIGTERM) print(f" Killed Flask process {pid}") except (ProcessLookupError, ValueError): pass # Process already dead or invalid PID # Wait a moment for graceful shutdown time.sleep(2) else: print("✓ No Flask processes found") except Exception as e: print(f"⚠ Error checking processes: {e}") def stop_scraper_state(): """Set scraper state to inactive using Flask app context""" try: from scipaperloader import create_app from scipaperloader.models import ScraperState, PaperMetadata from scipaperloader.db import db from scipaperloader.scrapers.factory import get_scraper app = create_app() with app.app_context(): # Set scraper to inactive ScraperState.set_active(False) ScraperState.set_paused(False) print("✓ Set scraper state to inactive") # Get scraper configuration for proper status reversion scraper = get_scraper() input_statuses = scraper.get_input_statuses() output_statuses = scraper.get_output_statuses() processing_status = output_statuses.get("processing", "Processing") # Revert any papers in processing status processing_papers = PaperMetadata.query.filter_by(status=processing_status).all() reverted_count = 0 if processing_papers and input_statuses: revert_status = input_statuses[0] # Use first input status as default for paper in processing_papers: # Try to use previous_status if available, otherwise use first input status if hasattr(paper, 'previous_status') and paper.previous_status: paper.status = paper.previous_status else: paper.status = revert_status paper.updated_at = datetime.now(UTC) reverted_count += 1 db.session.commit() print(f"✓ Reverted {reverted_count} papers from '{processing_status}' to previous status") else: print("✓ No papers in processing status to revert") except Exception as e: print(f"⚠ Error setting scraper state: {e}") def main(): print("=== QUICK SCRAPER FIX (APScheduler) ===") print(f"Time: {datetime.now()}") print() # Step 1: Stop scraper state and revert papers stop_scraper_state() # Step 2: Stop all APScheduler jobs stop_apscheduler_jobs() # Step 3: Kill any running Flask processes kill_python_processes() print() print("=== FIX COMPLETE ===") print("The scraper has been stopped and all tasks terminated.") print("You can now restart the application with:") print(" make run") print("or") print(" python -m flask --app scipaperloader run") if __name__ == "__main__": main()