158 lines
6.7 KiB
Python
Executable File
158 lines
6.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Test script for verifying the paper reversion fix with APScheduler.
|
|
This script:
|
|
1. Creates test papers and simulates processing
|
|
2. Tests the stop_scraper functionality
|
|
3. Checks that all pending papers were reverted to their previous status
|
|
4. Ensures all running tasks were terminated
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
from datetime import datetime, UTC, timedelta
|
|
from sqlalchemy import func
|
|
from flask import Flask
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
|
|
|
|
# Import the app and models
|
|
from scipaperloader import create_app
|
|
from scipaperloader.db import db
|
|
from scipaperloader.models import PaperMetadata, ActivityLog, ScraperState
|
|
from scipaperloader.scrapers.factory import get_scraper
|
|
from scipaperloader.scrapers.manager import ScraperManager
|
|
|
|
print("[DEBUG] Initializing Flask app...")
|
|
app = create_app()
|
|
|
|
print("[DEBUG] Flask app initialized.")
|
|
|
|
def test_stop_scraper():
|
|
"""Test the stop_scraper functionality with proper APScheduler integration"""
|
|
|
|
print("[DEBUG] Entering app context...")
|
|
with app.app_context():
|
|
print("[DEBUG] App context entered.")
|
|
|
|
# Clear existing test data
|
|
print("[DEBUG] Clearing existing test data...")
|
|
PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete()
|
|
db.session.commit()
|
|
print("[DEBUG] Existing test data cleared.")
|
|
|
|
# Get scraper configuration
|
|
scraper = get_scraper()
|
|
input_statuses = scraper.get_input_statuses()
|
|
output_statuses = scraper.get_output_statuses()
|
|
|
|
if not input_statuses:
|
|
print("❌ No input statuses found for current scraper")
|
|
return
|
|
|
|
input_status = input_statuses[0] # Use first input status
|
|
processing_status = output_statuses.get("processing", "Processing")
|
|
|
|
print(f"[DEBUG] Using input status: {input_status}")
|
|
print(f"[DEBUG] Using processing status: {processing_status}")
|
|
|
|
# Create test papers in input status
|
|
test_papers = []
|
|
print("[DEBUG] Creating test papers...")
|
|
for i in range(3):
|
|
test_paper = PaperMetadata()
|
|
test_paper.title = f"Test Paper {i+1}"
|
|
test_paper.doi = f"10.1234/test{i+1}"
|
|
test_paper.status = input_status
|
|
test_paper.created_at = datetime.now(UTC)
|
|
test_paper.updated_at = datetime.now(UTC)
|
|
db.session.add(test_paper)
|
|
test_papers.append(test_paper)
|
|
db.session.commit()
|
|
print(f"[DEBUG] Created {len(test_papers)} test papers in '{input_status}' status.")
|
|
|
|
# Simulate some papers being moved to processing status
|
|
print("[DEBUG] Simulating papers in processing...")
|
|
for i, paper in enumerate(test_papers[:2]): # Move first 2 papers to processing
|
|
paper.previous_status = paper.status # Store previous status
|
|
paper.status = processing_status
|
|
paper.updated_at = datetime.now(UTC)
|
|
db.session.commit()
|
|
print(f"[DEBUG] Moved 2 papers to '{processing_status}' status.")
|
|
|
|
# Check current scraper state
|
|
scraper_state = ScraperState.get_current_state()
|
|
print(f"[DEBUG] Current scraper state: active={scraper_state.is_active}, paused={scraper_state.is_paused}")
|
|
|
|
# Check paper counts before stopping
|
|
input_count = PaperMetadata.query.filter_by(status=input_status).count()
|
|
processing_count = PaperMetadata.query.filter_by(status=processing_status).count()
|
|
print(f"[DEBUG] Papers before stopping: {input_count} in '{input_status}', {processing_count} in '{processing_status}'")
|
|
|
|
# Test APScheduler job management
|
|
scheduler = app.config.get('SCHEDULER')
|
|
if scheduler:
|
|
print("[DEBUG] Testing APScheduler job management...")
|
|
|
|
# Create some test jobs using the correct API
|
|
for paper in test_papers:
|
|
job_id = scheduler.schedule_paper_processing(
|
|
paper_id=paper.id,
|
|
delay_seconds=60, # 1 minute from now
|
|
job_id=f"test_paper_process_{paper.id}"
|
|
)
|
|
print(f"[DEBUG] Scheduled job {job_id} for paper {paper.id}")
|
|
|
|
jobs_before = len(scheduler.get_paper_jobs())
|
|
print(f"[DEBUG] Created {jobs_before} test jobs in APScheduler")
|
|
|
|
# Test the manager's stop_scraper method
|
|
print("[DEBUG] Testing ScraperManager.stop_scraper()...")
|
|
manager = ScraperManager()
|
|
result = manager.stop_scraper()
|
|
|
|
print(f"[DEBUG] stop_scraper result: {result}")
|
|
|
|
# Check jobs after stopping
|
|
jobs_after = len(scheduler.get_paper_jobs())
|
|
print(f"[DEBUG] Jobs after stopping: {jobs_after} (should be 0)")
|
|
|
|
if jobs_after == 0:
|
|
print("✅ All APScheduler jobs successfully revoked")
|
|
else:
|
|
print(f"❌ {jobs_after} jobs still exist after revocation")
|
|
else:
|
|
print("❌ APScheduler not found in app config")
|
|
|
|
# Check paper counts after stopping
|
|
input_count_after = PaperMetadata.query.filter_by(status=input_status).count()
|
|
processing_count_after = PaperMetadata.query.filter_by(status=processing_status).count()
|
|
print(f"[DEBUG] Papers after stopping: {input_count_after} in '{input_status}', {processing_count_after} in '{processing_status}'")
|
|
|
|
# Verify that processing papers were reverted
|
|
if processing_count_after == 0 and input_count_after >= processing_count:
|
|
print("✅ Papers successfully reverted from processing to previous status")
|
|
else:
|
|
print(f"❌ Paper reversion failed: expected 0 processing papers, got {processing_count_after}")
|
|
|
|
# Check scraper state after stopping
|
|
scraper_state_after = ScraperState.get_current_state()
|
|
print(f"[DEBUG] Scraper state after stopping: active={scraper_state_after.is_active}, paused={scraper_state_after.is_paused}")
|
|
|
|
if not scraper_state_after.is_active and not scraper_state_after.is_paused:
|
|
print("✅ Scraper state correctly set to inactive")
|
|
else:
|
|
print("❌ Scraper state not properly updated")
|
|
|
|
# Clean up test data
|
|
print("[DEBUG] Cleaning up test data...")
|
|
PaperMetadata.query.filter(PaperMetadata.doi.like('10.1234/test%')).delete()
|
|
db.session.commit()
|
|
print("[DEBUG] Test data cleaned up.")
|
|
|
|
print("[DEBUG] Starting test_stop_scraper...")
|
|
test_stop_scraper()
|
|
print("[DEBUG] test_stop_scraper completed.")
|