adds debugging scripts
This commit is contained in:
parent
0104b04eef
commit
3a21c4429b
8
check_state.py
Normal file
8
check_state.py
Normal file
@ -0,0 +1,8 @@
|
||||
from scipaperloader.models import ScraperState
|
||||
from scipaperloader import create_app
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
scraper_state = ScraperState.query.first()
|
||||
print(f"Active: {scraper_state.is_active}, Paused: {scraper_state.is_paused}")
|
106
diagnose_scraper.py
Normal file
106
diagnose_scraper.py
Normal file
@ -0,0 +1,106 @@
|
||||
"""
|
||||
Diagnose and fix scraper stopping issues.
|
||||
"""
|
||||
|
||||
from scipaperloader import create_app
|
||||
from scipaperloader.celery import celery
|
||||
from scipaperloader.models import ScraperState, ActivityLog
|
||||
from scipaperloader.scrapers.factory import get_scraper
|
||||
|
||||
app = create_app()
|
||||
|
||||
def check_scraper_status():
|
||||
"""Check the current status of the scraper in the database."""
|
||||
with app.app_context():
|
||||
scraper_state = ScraperState.query.first()
|
||||
if scraper_state:
|
||||
print(f"Scraper state in DB: active={scraper_state.is_active}, paused={scraper_state.is_paused}")
|
||||
else:
|
||||
print("No scraper state found in database")
|
||||
|
||||
def check_celery_tasks():
|
||||
"""Check currently running Celery tasks."""
|
||||
i = celery.control.inspect()
|
||||
|
||||
print("\n=== ACTIVE TASKS ===")
|
||||
active_tasks = i.active() or {}
|
||||
for worker, tasks in active_tasks.items():
|
||||
for task in tasks:
|
||||
print(f"Worker: {worker}, Task: {task.get('name', 'Unknown')}, ID: {task.get('id', 'Unknown')}")
|
||||
|
||||
print("\n=== SCHEDULED TASKS ===")
|
||||
scheduled_tasks = i.scheduled() or {}
|
||||
for worker, tasks in scheduled_tasks.items():
|
||||
for task in tasks:
|
||||
print(f"Worker: {worker}, Task: {task.get('name', 'Unknown')}, ID: {task.get('id', 'Unknown')}")
|
||||
|
||||
def check_recent_logs():
|
||||
"""Check recent activity logs for clues."""
|
||||
with app.app_context():
|
||||
logs = ActivityLog.query.filter_by(category='scraper_command').order_by(ActivityLog.timestamp.desc()).limit(5).all()
|
||||
print("\n=== RECENT COMMAND LOGS ===")
|
||||
for log in logs:
|
||||
print(f"[{log.timestamp}] {log.action}: {log.description}")
|
||||
|
||||
activity_logs = ActivityLog.query.filter_by(category='scraper_activity').order_by(ActivityLog.timestamp.desc()).limit(5).all()
|
||||
print("\n=== RECENT ACTIVITY LOGS ===")
|
||||
for log in activity_logs:
|
||||
print(f"[{log.timestamp}] {log.action}: {log.description}")
|
||||
|
||||
def force_stop_scraper():
|
||||
"""Force stop the scraper by setting the state and revoking all tasks."""
|
||||
with app.app_context():
|
||||
# Update scraper state
|
||||
scraper_state = ScraperState.query.first()
|
||||
if scraper_state:
|
||||
scraper_state.is_active = False
|
||||
scraper_state.is_paused = False
|
||||
from scipaperloader.db import db
|
||||
db.session.commit()
|
||||
print("Set scraper state to inactive")
|
||||
|
||||
# Revoke all tasks
|
||||
i = celery.control.inspect()
|
||||
revoked_ids = []
|
||||
|
||||
# Check all queues
|
||||
for queue_name, queue_func in [
|
||||
("scheduled", i.scheduled),
|
||||
("active", i.active),
|
||||
("reserved", i.reserved)
|
||||
]:
|
||||
queue = queue_func() or {}
|
||||
for worker, tasks in queue.items():
|
||||
for task in tasks:
|
||||
task_id = task.get('id')
|
||||
if task_id and task_id not in revoked_ids:
|
||||
celery.control.revoke(task_id, terminate=True)
|
||||
revoked_ids.append(task_id)
|
||||
print(f"Revoked task: {task_id}")
|
||||
|
||||
# Purge all queues
|
||||
celery.control.purge()
|
||||
print("Purged all task queues")
|
||||
|
||||
# Log the action
|
||||
ActivityLog.log_scraper_command(
|
||||
action="force_stop_scraper",
|
||||
status="success",
|
||||
description=f"Force stopped scraper, revoked {len(revoked_ids)} tasks"
|
||||
)
|
||||
|
||||
print(f"\nRevoked {len(revoked_ids)} tasks in total")
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=== SCRAPER STATUS DIAGNOSTIC TOOL ===")
|
||||
check_scraper_status()
|
||||
check_celery_tasks()
|
||||
check_recent_logs()
|
||||
|
||||
stop_confirmation = input("\nDo you want to force stop the scraper? (y/n): ")
|
||||
if stop_confirmation.lower() == 'y':
|
||||
force_stop_scraper()
|
||||
print("\nScraper force stopped. Current state:")
|
||||
check_scraper_status()
|
||||
else:
|
||||
print("No changes made.")
|
120
emergency_stop.py
Executable file
120
emergency_stop.py
Executable file
@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Emergency force stop utility for the scraper.
|
||||
|
||||
This script will:
|
||||
1. Set the scraper state to inactive
|
||||
2. Revoke all running/scheduled tasks
|
||||
3. Purge task queues
|
||||
4. Revert any papers in "Pending" state to their previous status
|
||||
|
||||
Use this to recover from a misbehaving scraper or when the web UI is unresponsive.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
# Add project root to path
|
||||
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
|
||||
|
||||
# Import required modules
|
||||
from scipaperloader import create_app
|
||||
from scipaperloader.db import db
|
||||
from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState
|
||||
from scipaperloader.celery import celery
|
||||
|
||||
app = create_app()
|
||||
|
||||
def emergency_stop():
|
||||
"""Force stop the scraper and revert all pending papers"""
|
||||
with app.app_context():
|
||||
print("Emergency Scraper Stop")
|
||||
print("-" * 50)
|
||||
|
||||
# 1. Set scraper state to inactive
|
||||
ScraperState.set_active(False)
|
||||
ScraperState.set_paused(False)
|
||||
print("✓ Set scraper state to inactive")
|
||||
|
||||
# 2. Revoke all tasks
|
||||
print("\nRevoking running tasks...")
|
||||
try:
|
||||
i = celery.control.inspect()
|
||||
active = i.active() or {}
|
||||
scheduled = i.scheduled() or {}
|
||||
reserved = i.reserved() or {}
|
||||
|
||||
revoked_count = 0
|
||||
|
||||
# Revoke active tasks
|
||||
for worker, tasks in active.items():
|
||||
for task in tasks:
|
||||
if 'id' in task:
|
||||
celery.control.revoke(task['id'], terminate=True)
|
||||
revoked_count += 1
|
||||
print(f" Revoked active task: {task.get('name', 'unknown')}")
|
||||
|
||||
# Revoke scheduled tasks
|
||||
for worker, tasks in scheduled.items():
|
||||
for task in tasks:
|
||||
if 'id' in task:
|
||||
celery.control.revoke(task['id'], terminate=True)
|
||||
revoked_count += 1
|
||||
|
||||
# Revoke reserved tasks
|
||||
for worker, tasks in reserved.items():
|
||||
for task in tasks:
|
||||
if 'id' in task:
|
||||
celery.control.revoke(task['id'], terminate=True)
|
||||
revoked_count += 1
|
||||
|
||||
print(f"✓ Revoked {revoked_count} tasks")
|
||||
|
||||
# 3. Purge queues
|
||||
celery.control.purge()
|
||||
print("✓ Purged all task queues")
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠ Error revoking tasks: {str(e)}")
|
||||
|
||||
# 4. Revert papers in "Pending" status
|
||||
try:
|
||||
print("\nReverting papers from 'Pending' status...")
|
||||
pending_papers = PaperMetadata.query.filter_by(status="Pending").all()
|
||||
reverted_count = 0
|
||||
|
||||
for paper in pending_papers:
|
||||
# Get previous status or use "New" as fallback
|
||||
previous_status = paper.previous_status if hasattr(paper, 'previous_status') and paper.previous_status else "New"
|
||||
paper.status = previous_status
|
||||
|
||||
ActivityLog.log_scraper_activity(
|
||||
action="emergency_revert",
|
||||
paper_id=paper.id,
|
||||
status="info",
|
||||
description=f"Emergency reversion from 'Pending' to '{previous_status}'",
|
||||
)
|
||||
reverted_count += 1
|
||||
print(f" Reverted paper ID {paper.id}: {paper.title} -> {previous_status}")
|
||||
|
||||
# Commit changes
|
||||
db.session.commit()
|
||||
print(f"✓ Reverted {reverted_count} papers")
|
||||
|
||||
ActivityLog.log_scraper_command(
|
||||
action="emergency_stop",
|
||||
status="success",
|
||||
description=f"Emergency stop performed. Revoked {revoked_count} tasks and reverted {reverted_count} papers."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
print(f"⚠ Error reverting papers: {str(e)}")
|
||||
|
||||
print("\nEmergency stop completed!")
|
||||
print(f"Current time: {datetime.now()}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
emergency_stop()
|
Loading…
x
Reference in New Issue
Block a user