SciPaperLoader/tools/diagnostics/test_reversion.py
2025-05-24 12:39:23 +02:00

102 lines
4.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Test script for verifying the paper reversion fix.
This script:
1. Simulates stopping the scraper
2. Checks that all pending papers were reverted to their previous status
3. Ensures all running tasks were terminated
"""
import os
import sys
import time
from datetime import datetime
from sqlalchemy import func
from flask import Flask
# Add project root to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
# Import the app and models
from scipaperloader import create_app
from scipaperloader.db import db
from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState
from scipaperloader.celery import celery
app = create_app()
def test_stop_scraper():
"""Test the stop_scraper functionality"""
with app.app_context():
# First check current scraper state
scraper_state = ScraperState.get_current_state()
print(f"Current scraper state: active={scraper_state.is_active}, paused={scraper_state.is_paused}")
# Check if there are any papers in "Pending" state
pending_count = PaperMetadata.query.filter_by(status="Pending").count()
print(f"Papers in 'Pending' state before stopping: {pending_count}")
if pending_count == 0:
print("No papers in 'Pending' state to test with.")
print("Would you like to create a test paper in Pending state? (y/n)")
choice = input().lower()
if choice == 'y':
# Create a test paper
paper = PaperMetadata(
title="Test Paper for Reversion",
doi="10.1234/test.123",
status="Pending",
previous_status="New", # Test value we expect to be reverted to
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
db.session.add(paper)
db.session.commit()
print(f"Created test paper with ID {paper.id}, status='Pending', previous_status='New'")
pending_count = 1
# Simulate the stop_scraper API call
from scipaperloader.blueprints.scraper import revert_pending_papers
print("Reverting pending papers...")
reverted = revert_pending_papers()
print(f"Reverted {reverted} papers from 'Pending' state")
# Check if any papers are still in "Pending" state
still_pending = PaperMetadata.query.filter_by(status="Pending").count()
print(f"Papers still in 'Pending' state after stopping: {still_pending}")
# List any that were reverted and their current status
if reverted > 0:
print("\nPapers that were reverted:")
recent_logs = ActivityLog.query.filter_by(action="revert_pending").order_by(
ActivityLog.timestamp.desc()).limit(10).all()
for log in recent_logs:
paper = PaperMetadata.query.get(log.paper_id)
if paper:
print(f"Paper ID {paper.id}: '{paper.title}' - Now status='{paper.status}'")
# Check active celery tasks
i = celery.control.inspect()
active = i.active() or {}
reserved = i.reserved() or {}
scheduled = i.scheduled() or {}
active_count = sum(len(tasks) for worker, tasks in active.items())
reserved_count = sum(len(tasks) for worker, tasks in reserved.items())
scheduled_count = sum(len(tasks) for worker, tasks in scheduled.items())
print(f"\nCurrently {active_count} active, {reserved_count} reserved, and {scheduled_count} scheduled tasks")
# Print conclusion
if still_pending == 0 and reverted > 0:
print("\nSUCCESS: All pending papers were properly reverted!")
elif still_pending > 0:
print(f"\nWARNING: {still_pending} papers are still in 'Pending' state!")
elif pending_count == 0 and reverted == 0:
print("\nNo papers to revert. Can't fully test.")
if __name__ == "__main__":
test_stop_scraper()