#!/usr/bin/env python3 """ Comprehensive test for APScheduler functionality in SciPaperLoader. Tests job scheduling, execution, revocation, and hourly scheduler functionality. """ import sys import os import time import threading from datetime import datetime, timedelta sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from scipaperloader import create_app from scipaperloader.models import PaperMetadata, ScraperState, ActivityLog, ScheduleConfig, VolumeConfig from scipaperloader.scrapers.manager import ScraperManager from scipaperloader.db import db def test_scheduler_functionality(): """Comprehensive test of APScheduler functionality.""" print("๐Ÿงช Testing APScheduler Functionality") print("=" * 50) # Create test app with in-memory database app = create_app({ 'TESTING': True, 'SQLALCHEMY_DATABASE_URI': 'sqlite:///:memory:', }) with app.app_context(): # Test 1: Basic scheduler availability print("\n๐Ÿ“‹ Test 1: Scheduler Initialization") scheduler = app.config.get('SCHEDULER') if not scheduler: print("โŒ APScheduler not found in app config") return False print("โœ… APScheduler available and initialized") print(f"๐Ÿ“Š Initial job count: {scheduler.get_job_count()}") # Test 2: Database table creation print("\n๐Ÿ“‹ Test 2: APScheduler Database Tables") try: # Check if we can query jobs (which requires tables to exist) jobs = scheduler.get_paper_jobs() print("โœ… APScheduler database tables exist and accessible") print(f"๐Ÿ“‹ Current paper jobs: {len(jobs)}") except Exception as e: print(f"โŒ APScheduler database tables not accessible: {e}") return False # Test 3: Job scheduling functionality print("\n๐Ÿ“‹ Test 3: Job Scheduling") # Create test paper test_paper = PaperMetadata( title="Test Paper for Scheduler", doi="10.1000/test_scheduler_001", issn="1234-5678", journal="Test Journal", status="New" ) db.session.add(test_paper) db.session.commit() # Schedule a paper for processing in 30 seconds (longer delay) try: job_id = scheduler.schedule_paper_processing( paper_id=test_paper.id, delay_seconds=30 # Increased delay to 30 seconds # Removed explicit job_id to allow default "paper_job_" prefix ) print(f"โœ… Paper scheduling works: Job ID {job_id}") except Exception as e: print(f"โŒ Paper scheduling failed: {e}") return False # Verify job was scheduled jobs_after = scheduler.get_paper_jobs() if len(jobs_after) == 0: print("โŒ No jobs found after scheduling") return False print(f"โœ… Job successfully scheduled: {len(jobs_after)} paper job(s) found") # Test 4: Job information retrieval print("\n๐Ÿ“‹ Test 4: Job Information Retrieval") scheduled_job = jobs_after[0] print(f"โœ… Job details accessible:") print(f" ๐Ÿ“ Job ID: {scheduled_job['id']}") print(f" ๐Ÿ“ Job Name: {scheduled_job['name']}") print(f" ๐Ÿ“ Next Run Time: {scheduled_job['next_run_time']}") print(f" ๐Ÿ“ Args: {scheduled_job['args']}") # Test 5: Job revocation print("\n๐Ÿ“‹ Test 5: Job Revocation") initial_count = len(jobs_after) revoked_count = scheduler.revoke_all_scraper_jobs() if revoked_count != initial_count: print(f"โš ๏ธ Warning: Expected to revoke {initial_count} jobs, but revoked {revoked_count}") else: print(f"โœ… Job revocation works: {revoked_count} job(s) revoked") # Verify jobs were revoked jobs_after_revocation = scheduler.get_paper_jobs() if len(jobs_after_revocation) > 0: print(f"โŒ Jobs still exist after revocation: {len(jobs_after_revocation)}") return False print("โœ… All paper jobs successfully revoked") # Test 6: Multiple job scheduling print("\n๐Ÿ“‹ Test 6: Multiple Job Scheduling") # Create more test papers test_papers = [] for i in range(3): paper = PaperMetadata( title=f"Test Paper {i+1}", doi=f"10.1000/test_scheduler_{i+2:03d}", issn="1234-5678", journal="Test Journal", status="New" ) db.session.add(paper) test_papers.append(paper) db.session.commit() # Schedule multiple papers scheduled_jobs = [] for i, paper in enumerate(test_papers): job_id = scheduler.schedule_paper_processing( paper_id=paper.id, delay_seconds=10 + i # Stagger the scheduling # Removed explicit job_id to allow default "paper_job_" prefix ) scheduled_jobs.append(job_id) print(f"โœ… Multiple job scheduling works: {len(scheduled_jobs)} jobs scheduled") # Verify all jobs are scheduled all_jobs = scheduler.get_paper_jobs() if len(all_jobs) != len(test_papers): print(f"โŒ Expected {len(test_papers)} jobs, found {len(all_jobs)}") return False print(f"โœ… All jobs properly scheduled: {len(all_jobs)} total jobs") # Test 7: ScraperManager integration print("\n๐Ÿ“‹ Test 7: ScraperManager Integration") manager = ScraperManager() # Test paper selection papers = manager.select_papers_for_processing(limit=2) print(f"โœ… ScraperManager paper selection: {len(papers)} papers selected") # Test scraper state management with APScheduler start_result = manager.start_scraper() if start_result["status"] != "success": print(f"โŒ Failed to start scraper: {start_result['message']}") return False print("โœ… Scraper started successfully") # Test job clearing through manager cleared_count = manager._clear_delayed_tasks_from_apscheduler() print(f"โœ… ScraperManager job clearing: {cleared_count} jobs cleared") # Verify jobs were cleared remaining_jobs = scheduler.get_paper_jobs() if len(remaining_jobs) > 0: print(f"โŒ Jobs still exist after manager clearing: {len(remaining_jobs)}") return False print("โœ… ScraperManager successfully clears APScheduler jobs") # Test 8: Hourly scheduler configuration print("\n๐Ÿ“‹ Test 8: Hourly Scheduler Configuration") # Ensure the hourly job is scheduled correctly all_scheduler_jobs = scheduler._scheduler.get_jobs() if hasattr(scheduler, '_scheduler') and scheduler._scheduler else [] hourly_jobs = [job for job in all_scheduler_jobs if job.id == 'hourly_scraper_main'] if not hourly_jobs: print("โŒ Hourly scheduler job not found") return False hourly_job = hourly_jobs[0] print("โœ… Hourly scheduler job found:") print(f" ๐Ÿ“ Job ID: {hourly_job.id}") print(f" ๐Ÿ“ Job Name: {hourly_job.name}") print(f" ๐Ÿ“ Trigger: {hourly_job.trigger}") print(f" ๐Ÿ“ Next Run: {hourly_job.next_run_time}") # Test 9: Configuration-based scheduling print("\n๐Ÿ“‹ Test 9: Configuration-based Scheduling") # Set up volume configuration volume_config = VolumeConfig.query.first() if not volume_config: volume_config = VolumeConfig(volume=10) # 10 papers per day db.session.add(volume_config) db.session.commit() # Test quota calculation quota = manager.get_current_hour_quota() print(f"โœ… Hourly quota calculation: {quota} papers per hour") if quota < 0: print("โŒ Invalid quota calculation") return False # Test 10: Activity logging integration print("\n๐Ÿ“‹ Test 10: Activity Logging Integration") # Check recent APScheduler-related logs recent_logs = ActivityLog.query.filter( ActivityLog.action.like('%apscheduler%') ).order_by(ActivityLog.timestamp.desc()).limit(5).all() print(f"โœ… APScheduler activity logging: {len(recent_logs)} related log entries") if recent_logs: for log in recent_logs[:3]: print(f" ๐Ÿ“ {log.action}: {log.description}") # Test 11: Error handling print("\n๐Ÿ“‹ Test 11: Error Handling") # Test scheduling with invalid paper ID try: scheduler.schedule_paper_processing( paper_id=99999, # Non-existent paper delay_seconds=1, job_id="test_error_job" ) print("โœ… Scheduling with invalid paper ID handled gracefully") except Exception as e: print(f"โœ… Scheduling with invalid paper ID properly raises exception: {e}") # Test 12: Cleanup and shutdown print("\n๐Ÿ“‹ Test 12: Cleanup and Shutdown") # Stop scraper stop_result = manager.stop_scraper() if stop_result["status"] != "success": print(f"โŒ Failed to stop scraper: {stop_result['message']}") return False print("โœ… Scraper stopped successfully") # Final job count should be minimal (only hourly scheduler) final_job_count = scheduler.get_job_count() final_paper_jobs = len(scheduler.get_paper_jobs()) print(f"๐Ÿ“Š Final state:") print(f" ๐Ÿ“ Total jobs: {final_job_count}") print(f" ๐Ÿ“ Paper jobs: {final_paper_jobs}") if final_paper_jobs > 0: print("โŒ Paper jobs still exist after cleanup") return False print("โœ… Cleanup completed successfully") print("\n๐ŸŽ‰ ALL SCHEDULER TESTS PASSED!") print("\n๐Ÿ“‹ Test Summary:") print(" โœ… APScheduler initialization works") print(" โœ… Database tables created and accessible") print(" โœ… Job scheduling functionality works") print(" โœ… Job information retrieval works") print(" โœ… Job revocation works") print(" โœ… Multiple job scheduling works") print(" โœ… ScraperManager integration works") print(" โœ… Hourly scheduler configured correctly") print(" โœ… Configuration-based scheduling works") print(" โœ… Activity logging integration works") print(" โœ… Error handling works") print(" โœ… Cleanup and shutdown works") return True def test_job_execution(): """Test that jobs actually execute (requires waiting).""" print("\n๐Ÿ”„ Testing Job Execution (5-second test)") print("-" * 40) app = create_app({ 'TESTING': True, 'SQLALCHEMY_DATABASE_URI': 'sqlite:///:memory:', }) with app.app_context(): # Initialize database and scheduler db.create_all() scheduler = app.config.get('SCHEDULER') if not scheduler: print("โŒ Scheduler not initialized") return False # Create test paper test_paper = PaperMetadata( title="Test Paper for Execution", doi="10.1000/test_execution", issn="1234-5678", journal="Test Journal", status="Pending" ) db.session.add(test_paper) db.session.commit() # Verify paper is added to the database test_paper_id = test_paper.id if not test_paper_id: print("โŒ Test paper not added to the database") return False # Schedule paper for processing in 2 seconds job_id = scheduler.schedule_paper_processing( paper_id=test_paper_id, delay_seconds=2 ) print(f"๐Ÿ“… Scheduled job {job_id} for execution in 2 seconds") # Wait and check for execution print("โณ Waiting for job execution...") time.sleep(3) # Check if job completed (should be removed from scheduler) remaining_jobs = scheduler.get_paper_jobs() if remaining_jobs: print(f"โš ๏ธ Job still in scheduler: {len(remaining_jobs)} remaining") for job in remaining_jobs: print(f" ๐Ÿ“ Job ID: {job['id']}, Next Run Time: {job['next_run_time']}") else: print("โœ… Job executed and removed from scheduler") # Check activity logs for execution evidence execution_logs = ActivityLog.query.filter( ActivityLog.action.like('%process_single_paper%') ).order_by(ActivityLog.timestamp.desc()).limit(3).all() if execution_logs: print("โœ… Job execution logged in activity:") for log in execution_logs: print(f" ๐Ÿ“ {log.action}: {log.description}") else: print("โš ๏ธ No execution logs found") # Validate job execution status in the database updated_paper = PaperMetadata.query.get(test_paper_id) if updated_paper: print(f"๐Ÿ” Retrieved paper: {updated_paper.title}, Status: {updated_paper.status}") if updated_paper.status == "Done": print("โœ… Paper status updated to 'Done'") else: print(f"โŒ Paper status not updated: {updated_paper.status}") else: print("โŒ Paper not found in the database") return True if __name__ == "__main__": print(f"๐Ÿ“… Starting scheduler tests at {datetime.now()}") try: # Run main functionality tests success = test_scheduler_functionality() if success: print("\n" + "="*50) # Run execution test if main tests pass test_job_execution() print(f"\n๐Ÿ“… Tests completed at {datetime.now()}") sys.exit(0 if success else 1) except KeyboardInterrupt: print("\nโน๏ธ Tests interrupted by user") sys.exit(1) except Exception as e: print(f"\nโŒ Test error: {e}") import traceback traceback.print_exc() sys.exit(1)