SciPaperLoader/tools/diagnostics/test_reversion.py

158 lines
6.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Test script for verifying the paper reversion fix with APScheduler.
This script:
1. Creates test papers and simulates processing
2. Tests the stop_scraper functionality
3. Checks that all pending papers were reverted to their previous status
4. Ensures all running tasks were terminated
"""
import os
import sys
import time
from datetime import datetime, UTC, timedelta
from sqlalchemy import func
from flask import Flask
# Add project root to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
# Import the app and models
from scipaperloader import create_app
from scipaperloader.db import db
from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState
from scipaperloader.scrapers.factory import get_scraper
from scipaperloader.scrapers.manager import ScraperManager
print("[DEBUG] Initializing Flask app...")
app = create_app()
print("[DEBUG] Flask app initialized.")
def test_stop_scraper():
"""Test the stop_scraper functionality with proper APScheduler integration"""
print("[DEBUG] Entering app context...")
with app.app_context():
print("[DEBUG] App context entered.")
# Clear existing test data
print("[DEBUG] Clearing existing test data...")
PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete()
db.session.commit()
print("[DEBUG] Existing test data cleared.")
# Get scraper configuration
scraper = get_scraper()
input_statuses = scraper.get_input_statuses()
output_statuses = scraper.get_output_statuses()
if not input_statuses:
print("❌ No input statuses found for current scraper")
return
input_status = input_statuses[0] # Use first input status
processing_status = output_statuses.get("processing", "Processing")
print(f"[DEBUG] Using input status: {input_status}")
print(f"[DEBUG] Using processing status: {processing_status}")
# Create test papers in input status
test_papers = []
print("[DEBUG] Creating test papers...")
for i in range(3):
test_paper = PaperMetadata()
test_paper.title = f"Test Paper {i+1}"
test_paper.doi = f"10.1234/test{i+1}"
test_paper.status = input_status
test_paper.created_at = datetime.now(UTC)
test_paper.updated_at = datetime.now(UTC)
db.session.add(test_paper)
test_papers.append(test_paper)
db.session.commit()
print(f"[DEBUG] Created {len(test_papers)} test papers in '{input_status}' status.")
# Simulate some papers being moved to processing status
print("[DEBUG] Simulating papers in processing...")
for i, paper in enumerate(test_papers[:2]): # Move first 2 papers to processing
paper.previous_status = paper.status # Store previous status
paper.status = processing_status
paper.updated_at = datetime.now(UTC)
db.session.commit()
print(f"[DEBUG] Moved 2 papers to '{processing_status}' status.")
# Check current scraper state
scraper_state = ScraperState.get_current_state()
print(f"[DEBUG] Current scraper state: active={scraper_state.is_active}, paused={scraper_state.is_paused}")
# Check paper counts before stopping
input_count = PaperMetadata.query.filter_by(status=input_status).count()
processing_count = PaperMetadata.query.filter_by(status=processing_status).count()
print(f"[DEBUG] Papers before stopping: {input_count} in '{input_status}', {processing_count} in '{processing_status}'")
# Test APScheduler job management
scheduler = app.config.get('SCHEDULER')
if scheduler:
print("[DEBUG] Testing APScheduler job management...")
# Create some test jobs using the correct API
for paper in test_papers:
job_id = scheduler.schedule_paper_processing(
paper_id=paper.id,
delay_seconds=60, # 1 minute from now
job_id=f"test_paper_process_{paper.id}"
)
print(f"[DEBUG] Scheduled job {job_id} for paper {paper.id}")
jobs_before = len(scheduler.get_paper_jobs())
print(f"[DEBUG] Created {jobs_before} test jobs in APScheduler")
# Test the manager's stop_scraper method
print("[DEBUG] Testing ScraperManager.stop_scraper()...")
manager = ScraperManager()
result = manager.stop_scraper()
print(f"[DEBUG] stop_scraper result: {result}")
# Check jobs after stopping
jobs_after = len(scheduler.get_paper_jobs())
print(f"[DEBUG] Jobs after stopping: {jobs_after} (should be 0)")
if jobs_after == 0:
print("✅ All APScheduler jobs successfully revoked")
else:
print(f"{jobs_after} jobs still exist after revocation")
else:
print("❌ APScheduler not found in app config")
# Check paper counts after stopping
input_count_after = PaperMetadata.query.filter_by(status=input_status).count()
processing_count_after = PaperMetadata.query.filter_by(status=processing_status).count()
print(f"[DEBUG] Papers after stopping: {input_count_after} in '{input_status}', {processing_count_after} in '{processing_status}'")
# Verify that processing papers were reverted
if processing_count_after == 0 and input_count_after >= processing_count:
print("✅ Papers successfully reverted from processing to previous status")
else:
print(f"❌ Paper reversion failed: expected 0 processing papers, got {processing_count_after}")
# Check scraper state after stopping
scraper_state_after = ScraperState.get_current_state()
print(f"[DEBUG] Scraper state after stopping: active={scraper_state_after.is_active}, paused={scraper_state_after.is_paused}")
if not scraper_state_after.is_active and not scraper_state_after.is_paused:
print("✅ Scraper state correctly set to inactive")
else:
print("❌ Scraper state not properly updated")
# Clean up test data
print("[DEBUG] Cleaning up test data...")
PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete()
db.session.commit()
print("[DEBUG] Test data cleaned up.")
print("[DEBUG] Starting test_stop_scraper...")
test_stop_scraper()
print("[DEBUG] test_stop_scraper completed.")