""" Diagnose and fix scraper stopping issues. """ from scipaperloader import create_app from scipaperloader.celery import celery from scipaperloader.models import ScraperState, ActivityLog from scipaperloader.scrapers.factory import get_scraper app = create_app() def check_scraper_status(): """Check the current status of the scraper in the database.""" with app.app_context(): scraper_state = ScraperState.query.first() if scraper_state: print(f"Scraper state in DB: active={scraper_state.is_active}, paused={scraper_state.is_paused}") else: print("No scraper state found in database") def check_celery_tasks(): """Check currently running Celery tasks.""" i = celery.control.inspect() print("\n=== ACTIVE TASKS ===") active_tasks = i.active() or {} for worker, tasks in active_tasks.items(): for task in tasks: print(f"Worker: {worker}, Task: {task.get('name', 'Unknown')}, ID: {task.get('id', 'Unknown')}") print("\n=== SCHEDULED TASKS ===") scheduled_tasks = i.scheduled() or {} for worker, tasks in scheduled_tasks.items(): for task in tasks: print(f"Worker: {worker}, Task: {task.get('name', 'Unknown')}, ID: {task.get('id', 'Unknown')}") def check_recent_logs(): """Check recent activity logs for clues.""" with app.app_context(): logs = ActivityLog.query.filter_by(category='scraper_command').order_by(ActivityLog.timestamp.desc()).limit(5).all() print("\n=== RECENT COMMAND LOGS ===") for log in logs: print(f"[{log.timestamp}] {log.action}: {log.description}") activity_logs = ActivityLog.query.filter_by(category='scraper_activity').order_by(ActivityLog.timestamp.desc()).limit(5).all() print("\n=== RECENT ACTIVITY LOGS ===") for log in activity_logs: print(f"[{log.timestamp}] {log.action}: {log.description}") def force_stop_scraper(): """Force stop the scraper by setting the state and revoking all tasks.""" with app.app_context(): # Update scraper state scraper_state = ScraperState.query.first() if scraper_state: scraper_state.is_active = False scraper_state.is_paused = False from scipaperloader.db import db db.session.commit() print("Set scraper state to inactive") # Revoke all tasks i = celery.control.inspect() revoked_ids = [] # Check all queues for queue_name, queue_func in [ ("scheduled", i.scheduled), ("active", i.active), ("reserved", i.reserved) ]: queue = queue_func() or {} for worker, tasks in queue.items(): for task in tasks: task_id = task.get('id') if task_id and task_id not in revoked_ids: celery.control.revoke(task_id, terminate=True) revoked_ids.append(task_id) print(f"Revoked task: {task_id}") # Purge all queues celery.control.purge() print("Purged all task queues") # Log the action ActivityLog.log_scraper_command( action="force_stop_scraper", status="success", description=f"Force stopped scraper, revoked {len(revoked_ids)} tasks" ) print(f"\nRevoked {len(revoked_ids)} tasks in total") if __name__ == "__main__": print("=== SCRAPER STATUS DIAGNOSTIC TOOL ===") check_scraper_status() check_celery_tasks() check_recent_logs() stop_confirmation = input("\nDo you want to force stop the scraper? (y/n): ") if stop_confirmation.lower() == 'y': force_stop_scraper() print("\nScraper force stopped. Current state:") check_scraper_status() else: print("No changes made.")