SciPaperLoader/tests/test_csv_upload.py

132 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""
Test script to verify CSV upload functionality works with APScheduler.
"""
import requests
import time
import io
import csv
from scipaperloader import create_app
def create_test_csv():
"""Create a simple test CSV file."""
csv_content = """title,doi,issn,journal,alternative_id,published_online
Test Paper 1,10.1000/test_upload_001,1234-5678,Test Journal,ALT001,2024-01-01
Test Paper 2,10.1000/test_upload_002,1234-5678,Test Journal,ALT002,2024-01-02
Test Paper 3,10.1000/test_upload_003,1234-5678,Test Journal,ALT003,2024-01-03
"""
return csv_content
def test_csv_upload():
"""Test the CSV upload functionality."""
print("🧪 Testing CSV Upload Functionality")
print("=" * 50)
# Create Flask app
app = create_app()
with app.test_client() as client:
# Create test CSV
csv_content = create_test_csv()
# Prepare file data
csv_file = io.BytesIO(csv_content.encode('utf-8'))
csv_file.name = 'test_upload.csv'
print("📤 Uploading CSV file...")
# Make upload request
response = client.post('/upload/', data={
'file': (csv_file, 'test_upload.csv'),
'delimiter': ',',
'duplicate_strategy': 'skip'
}, content_type='multipart/form-data')
print(f"Response Status: {response.status_code}")
print(f"Response Data: {response.get_json()}")
if response.status_code == 200:
response_data = response.get_json()
if 'task_id' in response_data:
task_id = response_data['task_id']
print(f"✅ Task scheduled successfully: {task_id}")
# Monitor task progress
print("\n📊 Monitoring task progress...")
for i in range(30): # Wait up to 30 seconds
progress_response = client.get(f'/upload/task_status/{task_id}')
if progress_response.status_code == 200:
progress_data = progress_response.get_json()
print(f"Progress: {progress_data}")
if progress_data.get('state') == 'SUCCESS':
print("✅ CSV upload completed successfully!")
result = progress_data.get('result', {})
print(f" Added: {result.get('added', 0)}")
print(f" Skipped: {result.get('skipped', 0)}")
print(f" Errors: {result.get('error_count', 0)}")
return True
elif progress_data.get('state') == 'FAILURE':
print(f"❌ CSV upload failed: {progress_data.get('error')}")
return False
else:
print(f"❌ Failed to get task status: {progress_response.status_code}")
return False
time.sleep(1)
print("⏰ Task did not complete within 30 seconds")
return False
else:
print(f"❌ No task_id in response: {response_data}")
return False
else:
print(f"❌ Upload request failed: {response.status_code}")
print(f"Response: {response.get_data(as_text=True)}")
return False
def check_scheduler_status():
"""Check APScheduler status."""
print("\n🔍 Checking APScheduler Status")
print("=" * 50)
app = create_app()
with app.app_context():
from scipaperloader.scheduler import _scheduler
if not _scheduler:
print("❌ APScheduler not initialized")
return False
if not _scheduler.running:
print("❌ APScheduler not running")
return False
jobs = _scheduler.get_jobs()
print(f"✅ APScheduler running with {len(jobs)} jobs")
# Show current jobs
for job in jobs:
print(f" - {job.id}: {job.name}")
return True
if __name__ == "__main__":
print("🚀 CSV Upload Test Suite")
print("=" * 50)
# First check scheduler status
if not check_scheduler_status():
print("❌ APScheduler issues detected, cannot proceed with test")
exit(1)
# Run the upload test
success = test_csv_upload()
if success:
print("\n🎉 All tests passed! CSV upload is working correctly.")
exit(0)
else:
print("\n❌ Test failed! CSV upload needs debugging.")
exit(1)