#!/usr/bin/env python3 """ Test script for verifying the paper reversion fix with APScheduler. This script: 1. Creates test papers and simulates processing 2. Tests the stop_scraper functionality 3. Checks that all pending papers were reverted to their previous status 4. Ensures all running tasks were terminated """ import os import sys import time from datetime import datetime, UTC, timedelta from sqlalchemy import func from flask import Flask # Add project root to path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) # Import the app and models from scipaperloader import create_app from scipaperloader.db import db from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState from scipaperloader.scrapers.factory import get_scraper from scipaperloader.scrapers.manager import ScraperManager print("[DEBUG] Initializing Flask app...") app = create_app() print("[DEBUG] Flask app initialized.") def test_stop_scraper(): """Test the stop_scraper functionality with proper APScheduler integration""" print("[DEBUG] Entering app context...") with app.app_context(): print("[DEBUG] App context entered.") # Clear existing test data print("[DEBUG] Clearing existing test data...") PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete() db.session.commit() print("[DEBUG] Existing test data cleared.") # Get scraper configuration scraper = get_scraper() input_statuses = scraper.get_input_statuses() output_statuses = scraper.get_output_statuses() if not input_statuses: print("❌ No input statuses found for current scraper") return input_status = input_statuses[0] # Use first input status processing_status = output_statuses.get("processing", "Processing") print(f"[DEBUG] Using input status: {input_status}") print(f"[DEBUG] Using processing status: {processing_status}") # Create test papers in input status test_papers = [] print("[DEBUG] Creating test papers...") for i in range(3): test_paper = PaperMetadata() test_paper.title = f"Test Paper {i+1}" test_paper.doi = f"10.1234/test{i+1}" test_paper.status = input_status test_paper.created_at = datetime.now(UTC) test_paper.updated_at = datetime.now(UTC) db.session.add(test_paper) test_papers.append(test_paper) db.session.commit() print(f"[DEBUG] Created {len(test_papers)} test papers in '{input_status}' status.") # Simulate some papers being moved to processing status print("[DEBUG] Simulating papers in processing...") for i, paper in enumerate(test_papers[:2]): # Move first 2 papers to processing paper.previous_status = paper.status # Store previous status paper.status = processing_status paper.updated_at = datetime.now(UTC) db.session.commit() print(f"[DEBUG] Moved 2 papers to '{processing_status}' status.") # Check current scraper state scraper_state = ScraperState.get_current_state() print(f"[DEBUG] Current scraper state: active={scraper_state.is_active}, paused={scraper_state.is_paused}") # Check paper counts before stopping input_count = PaperMetadata.query.filter_by(status=input_status).count() processing_count = PaperMetadata.query.filter_by(status=processing_status).count() print(f"[DEBUG] Papers before stopping: {input_count} in '{input_status}', {processing_count} in '{processing_status}'") # Test APScheduler job management scheduler = app.config.get('SCHEDULER') if scheduler: print("[DEBUG] Testing APScheduler job management...") # Create some test jobs using the correct API for paper in test_papers: job_id = scheduler.schedule_paper_processing( paper_id=paper.id, delay_seconds=60, # 1 minute from now job_id=f"test_paper_process_{paper.id}" ) print(f"[DEBUG] Scheduled job {job_id} for paper {paper.id}") jobs_before = len(scheduler.get_paper_jobs()) print(f"[DEBUG] Created {jobs_before} test jobs in APScheduler") # Test the manager's stop_scraper method print("[DEBUG] Testing ScraperManager.stop_scraper()...") manager = ScraperManager() result = manager.stop_scraper() print(f"[DEBUG] stop_scraper result: {result}") # Check jobs after stopping jobs_after = len(scheduler.get_paper_jobs()) print(f"[DEBUG] Jobs after stopping: {jobs_after} (should be 0)") if jobs_after == 0: print("✅ All APScheduler jobs successfully revoked") else: print(f"❌ {jobs_after} jobs still exist after revocation") else: print("❌ APScheduler not found in app config") # Check paper counts after stopping input_count_after = PaperMetadata.query.filter_by(status=input_status).count() processing_count_after = PaperMetadata.query.filter_by(status=processing_status).count() print(f"[DEBUG] Papers after stopping: {input_count_after} in '{input_status}', {processing_count_after} in '{processing_status}'") # Verify that processing papers were reverted if processing_count_after == 0 and input_count_after >= processing_count: print("✅ Papers successfully reverted from processing to previous status") else: print(f"❌ Paper reversion failed: expected 0 processing papers, got {processing_count_after}") # Check scraper state after stopping scraper_state_after = ScraperState.get_current_state() print(f"[DEBUG] Scraper state after stopping: active={scraper_state_after.is_active}, paused={scraper_state_after.is_paused}") if not scraper_state_after.is_active and not scraper_state_after.is_paused: print("✅ Scraper state correctly set to inactive") else: print("❌ Scraper state not properly updated") # Clean up test data print("[DEBUG] Cleaning up test data...") PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete() db.session.commit() print("[DEBUG] Test data cleaned up.") print("[DEBUG] Starting test_stop_scraper...") test_stop_scraper() print("[DEBUG] test_stop_scraper completed.")