#!/usr/bin/env python3 """ Test script for verifying the paper reversion fix. This script: 1. Simulates stopping the scraper 2. Checks that all pending papers were reverted to their previous status 3. Ensures all running tasks were terminated """ import os import sys import time from datetime import datetime from sqlalchemy import func from flask import Flask # Add project root to path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) # Import the app and models from scipaperloader import create_app from scipaperloader.db import db from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState from scipaperloader.celery import celery app = create_app() def test_stop_scraper(): """Test the stop_scraper functionality""" with app.app_context(): # First check current scraper state scraper_state = ScraperState.get_current_state() print(f"Current scraper state: active={scraper_state.is_active}, paused={scraper_state.is_paused}") # Check if there are any papers in "Pending" state pending_count = PaperMetadata.query.filter_by(status="Pending").count() print(f"Papers in 'Pending' state before stopping: {pending_count}") if pending_count == 0: print("No papers in 'Pending' state to test with.") print("Would you like to create a test paper in Pending state? (y/n)") choice = input().lower() if choice == 'y': # Create a test paper paper = PaperMetadata( title="Test Paper for Reversion", doi="10.1234/test.123", status="Pending", previous_status="New", # Test value we expect to be reverted to created_at=datetime.utcnow(), updated_at=datetime.utcnow() ) db.session.add(paper) db.session.commit() print(f"Created test paper with ID {paper.id}, status='Pending', previous_status='New'") pending_count = 1 # Simulate the stop_scraper API call from scipaperloader.blueprints.scraper import revert_pending_papers print("Reverting pending papers...") reverted = revert_pending_papers() print(f"Reverted {reverted} papers from 'Pending' state") # Check if any papers are still in "Pending" state still_pending = PaperMetadata.query.filter_by(status="Pending").count() print(f"Papers still in 'Pending' state after stopping: {still_pending}") # List any that were reverted and their current status if reverted > 0: print("\nPapers that were reverted:") recent_logs = ActivityLog.query.filter_by(action="revert_pending").order_by( ActivityLog.timestamp.desc()).limit(10).all() for log in recent_logs: paper = PaperMetadata.query.get(log.paper_id) if paper: print(f"Paper ID {paper.id}: '{paper.title}' - Now status='{paper.status}'") # Check active celery tasks i = celery.control.inspect() active = i.active() or {} reserved = i.reserved() or {} scheduled = i.scheduled() or {} active_count = sum(len(tasks) for worker, tasks in active.items()) reserved_count = sum(len(tasks) for worker, tasks in reserved.items()) scheduled_count = sum(len(tasks) for worker, tasks in scheduled.items()) print(f"\nCurrently {active_count} active, {reserved_count} reserved, and {scheduled_count} scheduled tasks") # Print conclusion if still_pending == 0 and reverted > 0: print("\nSUCCESS: All pending papers were properly reverted!") elif still_pending > 0: print(f"\nWARNING: {still_pending} papers are still in 'Pending' state!") elif pending_count == 0 and reverted == 0: print("\nNo papers to revert. Can't fully test.") if __name__ == "__main__": test_stop_scraper()