131 lines
4.9 KiB
Python
Executable File
131 lines
4.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Quick fix script to stop all running scraper tasks using APScheduler.
|
|
This ensures all scheduled tasks are properly terminated.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import signal
|
|
import subprocess
|
|
import time
|
|
from datetime import datetime, UTC
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
|
|
|
|
def stop_apscheduler_jobs():
|
|
"""Stop all APScheduler jobs through the Flask app"""
|
|
print("Stopping APScheduler jobs...")
|
|
try:
|
|
from scipaperloader import create_app
|
|
|
|
app = create_app()
|
|
with app.app_context():
|
|
scheduler = app.config.get('SCHEDULER')
|
|
if scheduler:
|
|
revoked_count = scheduler.revoke_all_scraper_jobs()
|
|
print(f"✓ Revoked {revoked_count} APScheduler jobs")
|
|
else:
|
|
print("❌ APScheduler not found in app config")
|
|
|
|
except Exception as e:
|
|
print(f"⚠ Error stopping APScheduler jobs: {e}")
|
|
|
|
def kill_python_processes():
|
|
"""Kill any running Python processes that might be Flask/APScheduler workers"""
|
|
print("Checking for running Flask/APScheduler processes...")
|
|
try:
|
|
# Look for Flask processes
|
|
result = subprocess.run(['pgrep', '-f', 'flask'], capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
pids = result.stdout.strip().split('\n')
|
|
for pid in pids:
|
|
if pid:
|
|
try:
|
|
# Check if this is our process before killing
|
|
cmdline_result = subprocess.run(['ps', '-p', pid, '-o', 'cmd='], capture_output=True, text=True)
|
|
if 'scipaperloader' in cmdline_result.stdout:
|
|
os.kill(int(pid), signal.SIGTERM)
|
|
print(f" Killed Flask process {pid}")
|
|
except (ProcessLookupError, ValueError):
|
|
pass # Process already dead or invalid PID
|
|
|
|
# Wait a moment for graceful shutdown
|
|
time.sleep(2)
|
|
else:
|
|
print("✓ No Flask processes found")
|
|
|
|
except Exception as e:
|
|
print(f"⚠ Error checking processes: {e}")
|
|
|
|
def stop_scraper_state():
|
|
"""Set scraper state to inactive using Flask app context"""
|
|
try:
|
|
from scipaperloader import create_app
|
|
from scipaperloader.models import ScraperState, PaperMetadata
|
|
from scipaperloader.db import db
|
|
from scipaperloader.scrapers.factory import get_scraper
|
|
|
|
app = create_app()
|
|
with app.app_context():
|
|
# Set scraper to inactive
|
|
ScraperState.set_active(False)
|
|
ScraperState.set_paused(False)
|
|
print("✓ Set scraper state to inactive")
|
|
|
|
# Get scraper configuration for proper status reversion
|
|
scraper = get_scraper()
|
|
input_statuses = scraper.get_input_statuses()
|
|
output_statuses = scraper.get_output_statuses()
|
|
processing_status = output_statuses.get("processing", "Processing")
|
|
|
|
# Revert any papers in processing status
|
|
processing_papers = PaperMetadata.query.filter_by(status=processing_status).all()
|
|
reverted_count = 0
|
|
|
|
if processing_papers and input_statuses:
|
|
revert_status = input_statuses[0] # Use first input status as default
|
|
|
|
for paper in processing_papers:
|
|
# Try to use previous_status if available, otherwise use first input status
|
|
if hasattr(paper, 'previous_status') and paper.previous_status:
|
|
paper.status = paper.previous_status
|
|
else:
|
|
paper.status = revert_status
|
|
paper.updated_at = datetime.now(UTC)
|
|
reverted_count += 1
|
|
|
|
db.session.commit()
|
|
print(f"✓ Reverted {reverted_count} papers from '{processing_status}' to previous status")
|
|
else:
|
|
print("✓ No papers in processing status to revert")
|
|
|
|
except Exception as e:
|
|
print(f"⚠ Error setting scraper state: {e}")
|
|
|
|
def main():
|
|
print("=== QUICK SCRAPER FIX (APScheduler) ===")
|
|
print(f"Time: {datetime.now()}")
|
|
print()
|
|
|
|
# Step 1: Stop scraper state and revert papers
|
|
stop_scraper_state()
|
|
|
|
# Step 2: Stop all APScheduler jobs
|
|
stop_apscheduler_jobs()
|
|
|
|
# Step 3: Kill any running Flask processes
|
|
kill_python_processes()
|
|
|
|
print()
|
|
print("=== FIX COMPLETE ===")
|
|
print("The scraper has been stopped and all tasks terminated.")
|
|
print("You can now restart the application with:")
|
|
print(" make run")
|
|
print("or")
|
|
print(" python -m flask --app scipaperloader run")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|