implements asynchronous task management to the input module.

This commit is contained in:
Michael Beck 2025-04-12 12:55:19 +02:00
parent 5d63e28a61
commit 05f4c8b517
14 changed files with 2190 additions and 148 deletions

4
.gitignore vendored
View File

@ -12,4 +12,6 @@ dist/
*.db
*.R
*.R
migrations/

View File

@ -1,10 +1,13 @@
# List of phony targets (targets that don't represent files)
.PHONY: all clean venv run format format-check lint mypy test dist reformat dev
.PHONY: all clean venv run format format-check lint mypy test dist reformat dev celery celery-flower redis run-all
# Define Python and pip executables inside virtual environment
PYTHON := venv/bin/python
PIP := venv/bin/pip
# Celery worker command
CELERY := venv/bin/celery
# Default target that runs the application
all: run
@ -83,11 +86,11 @@ todos:
@grep -r "TODO\|FIXME" scipaperloader || echo "No TODOs found"
# Reset the database: delete, initialize, and migrate
reset-db:
reset-db: venv
rm -f $(DB_PATH)
flask db init || true
flask db migrate -m "Initial migration"
flask db upgrade
$(PYTHON) -m flask --app scipaperloader db init || true
$(PYTHON) -m flask --app scipaperloader db migrate -m "Initial migration"
$(PYTHON) -m flask --app scipaperloader db upgrade
# Create and set up virtual environment
venv:
@ -130,3 +133,21 @@ dist: format-check lint mypy test
# Set up complete development environment
dev: clean venv
# Start Celery worker for processing tasks
celery: venv
$(CELERY) -A celery_worker:celery worker --loglevel=info
# Monitor Celery tasks with flower web interface
celery-flower: venv
$(PIP) install flower
$(CELERY) -A celery_worker:celery flower --port=5555
# Check if Redis is running, start if needed
redis:
@redis-cli ping > /dev/null 2>&1 || (echo "Starting Redis server..." && redis-server --daemonize yes)
# Run complete application stack (Flask app + Celery worker + Redis)
run-all: redis
@echo "Starting Flask and Celery..."
@$(MAKE) -j2 run celery

View File

@ -14,7 +14,8 @@ And open it in the browser at [http://localhost:5000/](http://localhost:5000/)
## Prerequisites
Python >=3.8
- Python >=3.8
- Redis (for Celery task queue)
## Development environment
@ -40,12 +41,44 @@ Python >=3.8
add development dependencies under `project.optional-dependencies.*`; run
`make clean && make venv` to reinstall the environment
## Asynchronous Task Processing with Celery
SciPaperLoader uses Celery for processing large CSV uploads and other background tasks. This allows the application to handle large datasets reliably without blocking the web interface.
### Running Celery Components
- `make redis`: ensures Redis server is running (required for Celery)
- `make celery`: starts a Celery worker to process background tasks
- `make celery-flower`: starts Flower, a web interface for monitoring Celery tasks at http://localhost:5555
- `make run-all`: runs the entire stack (Flask app + Celery worker + Redis) in development mode
### How It Works
When you upload a CSV file through the web interface:
1. The file is sent to the server
2. A Celery task is created to process the file asynchronously
3. The browser shows a progress bar with real-time updates
4. The results are displayed when processing is complete
This architecture allows SciPaperLoader to handle CSV files with thousands of papers without timing out or blocking the web interface.
## Configuration
Default configuration is loaded from `scipaperloader.defaults` and can be
overriden by environment variables with a `FLASK_` prefix. See
[Configuring from Environment Variables](https://flask.palletsprojects.com/en/3.0.x/config/#configuring-from-environment-variables).
### Celery Configuration
The following environment variables can be set to configure Celery:
- `FLASK_CELERY_BROKER_URL`: Redis URL for the message broker (default: `redis://localhost:6379/0`)
- `FLASK_CELERY_RESULT_BACKEND`: Redis URL for storing task results (default: `redis://localhost:6379/0`)
Consider using
[dotenv](https://flask.palletsprojects.com/en/3.0.x/cli/#environment-variables-from-dotenv).
@ -58,4 +91,12 @@ deliver to your server, or copy in your `Dockerfile`, and insall it with `pip`.
You must set a
[SECRET_KEY](https://flask.palletsprojects.com/en/3.0.x/tutorial/deploy/#configure-the-secret-key)
in production to a secret and stable value.
in production to a secret and stable value.
### Deploying with Celery
When deploying to production:
1. Configure a production-ready Redis instance or use a managed service
2. Run Celery workers as system services or in Docker containers
3. Consider setting up monitoring for your Celery tasks and workers

7
celery_worker.py Normal file
View File

@ -0,0 +1,7 @@
from scipaperloader.celery import celery, configure_celery
# Configure celery with Flask app
configure_celery()
if __name__ == '__main__':
celery.start()

BIN
dump.rdb Normal file

Binary file not shown.

View File

@ -13,6 +13,10 @@ dependencies = [
"flask-wtf>=1.2.2,<2",
"pyzotero>=1.6.11,<2",
"pandas>=2.2.3,<3",
"celery>=5.5.1,<6",
"redis>=5.2.1,<6",
"flower>=2.0.1,<3",
"flask-migrate>=4.1.0,<5",
]
[project.optional-dependencies]

View File

@ -1,5 +1,5 @@
from flask import Flask, request
from flask_migrate import Migrate # Add this line
from .config import Config
from .db import db
from .models import init_schedule_config
@ -10,10 +10,15 @@ def create_app(test_config=None):
app = Flask(__name__)
app.config.from_object(Config)
# Celery configuration
app.config['CELERY_BROKER_URL'] = app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0')
app.config['CELERY_RESULT_BACKEND'] = app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0')
if test_config:
app.config.update(test_config)
db.init_app(app)
migrate = Migrate(app, db) # Add this line to initialize Flask-Migrate
with app.app_context():
db.create_all()
@ -27,10 +32,19 @@ def create_app(test_config=None):
@app.before_request
def before_request():
# Skip logging for static files, health checks, or other frequent requests
if request.path.startswith('/static/') or request.path == '/health' or request.path == '/favicon.ico':
return
# Skip task status checks to avoid log spam
if request.path.startswith('/task_status/'):
return
action = request.endpoint or request.path or "unknown_request"
ActivityLog.log_gui_interaction(
action=request.endpoint,
description=f"Request to {request.endpoint}",
action=action,
description=f"Request to {request.path}",
extra={"method": request.method, "url": request.url}
)
return app
return app

View File

@ -116,7 +116,7 @@ def export_papers():
[
paper.id,
paper.title,
getattr(paper, "journal", ""),
paper.journal,
paper.doi,
paper.issn,
paper.status,

View File

@ -3,26 +3,39 @@ import codecs
import csv
import datetime
from io import StringIO
import json
import pandas as pd
from flask import (
Blueprint,
flash,
jsonify,
redirect,
render_template,
request,
send_file,
session,
url_for,
current_app
)
from ..db import db
from ..models import PaperMetadata
from ..models import PaperMetadata, ActivityLog
from ..celery import celery # Import the celery instance directly
bp = Blueprint("upload", __name__)
REQUIRED_COLUMNS = {"alternative_id", "journal", "doi", "issn", "title"}
CHUNK_SIZE = 100 # Number of rows to process per batch
def parse_date(date_str):
"""Parse date string into datetime object."""
if not date_str or pd.isna(date_str):
return None
try:
return datetime.datetime.strptime(date_str, "%Y-%m-%d")
except ValueError:
return None
@bp.route("/", methods=["GET", "POST"])
def upload():
@ -32,136 +45,214 @@ def upload():
duplicate_strategy = request.form.get("duplicate_strategy", "skip")
if not file:
return render_template("upload.html.jinja", error="No file selected.")
return jsonify({"error": "No file selected."})
try:
stream = codecs.iterdecode(file.stream, "utf-8")
content = "".join(stream)
df = pd.read_csv(StringIO(content), delimiter=delimiter)
except Exception as e:
return render_template("upload.html.jinja", error=f"Failed to read CSV file: {e}")
stream = codecs.iterdecode(file.stream, "utf-8")
content = "".join(stream)
missing = REQUIRED_COLUMNS - set(df.columns)
if missing:
return render_template(
"upload.html.jinja", error=f"Missing required columns: {', '.join(missing)}"
)
# Optional: parse 'published_online' to date
def parse_date(val):
if pd.isna(val):
return None
try:
return pd.to_datetime(val).date()
except Exception:
return None
# Count statistics
added_count = 0
skipped_count = 0
updated_count = 0
error_count = 0
# Trigger the Celery task
task = process_csv.delay(content, delimiter, duplicate_strategy)
# Collect error information
errors = []
# Process each row
for index, row in df.iterrows():
try:
# Get DOI from row for error reporting
doi = str(row.get("doi", "N/A"))
# Validate required fields
for field in ["title", "doi", "issn"]:
if pd.isna(row.get(field)) or not str(row.get(field)).strip():
raise ValueError(f"Missing required field: {field}")
# Check if paper with this DOI already exists
existing = PaperMetadata.query.filter_by(doi=doi).first()
if existing:
if duplicate_strategy == 'update':
# Update existing record
existing.title = row["title"]
existing.alt_id = row.get("alternative_id")
existing.issn = row["issn"]
existing.journal = row.get("journal")
existing.type = row.get("type")
existing.language = row.get("language")
existing.published_online = parse_date(row.get("published_online"))
updated_count += 1
return jsonify({"task_id": task.id})
return render_template("upload.html.jinja")
@celery.task(bind=True)
def process_csv(self, file_content, delimiter, duplicate_strategy):
"""Process CSV file and import paper metadata."""
# With the ContextTask in place, we're already inside an app context
added_count = skipped_count = updated_count = error_count = 0
errors = []
skipped_records = [] # Add this to track skipped records
try:
# Log the start of import using ActivityLog model
ActivityLog.log_import_activity(
action="start_csv_import",
status="processing",
description=f"Starting CSV import with strategy: {duplicate_strategy}",
file_size=len(file_content),
delimiter=delimiter
)
# Set initial progress percentage
self.update_state(state='PROGRESS', meta={'progress': 10})
# Read CSV into chunks
csv_buffer = StringIO(file_content)
# Count total chunks
csv_buffer.seek(0)
total_chunks = len(list(pd.read_csv(csv_buffer, delimiter=delimiter, chunksize=CHUNK_SIZE)))
csv_buffer.seek(0)
# Process each chunk of rows
for chunk_idx, chunk in enumerate(pd.read_csv(csv_buffer, delimiter=delimiter, chunksize=CHUNK_SIZE)):
for index, row in chunk.iterrows():
try:
doi = str(row.get("doi", "N/A"))
# Validate required fields
if pd.isna(row.get("title")) or pd.isna(row.get("doi")) or pd.isna(row.get("issn")):
raise ValueError("Missing required fields")
# Try finding an existing record based on DOI
existing = db.session.query(PaperMetadata).filter_by(doi=doi).first()
if existing:
if duplicate_strategy == "update":
existing.title = row["title"]
existing.alt_id = row.get("alternative_id")
existing.issn = row["issn"]
existing.journal = row.get("journal")
existing.published_online = parse_date(row.get("published_online"))
updated_count += 1
else:
# Track why this record was skipped
skipped_records.append({
"row": index + 2,
"doi": doi,
"reason": f"Duplicate DOI found and strategy is '{duplicate_strategy}'"
})
skipped_count += 1
continue
else:
# Skip this record
skipped_count += 1
continue
else:
# Create new record
metadata = PaperMetadata(
title=row["title"],
doi=doi,
alt_id=row.get("alternative_id"),
issn=row["issn"],
journal=row.get("journal"),
type=row.get("type"),
language=row.get("language"),
published_online=parse_date(row.get("published_online")),
status="New",
file_path=None,
error_msg=None,
)
db.session.add(metadata)
added_count += 1
except Exception as e:
error_count += 1
errors.append({
"row": index + 2, # +2 because index is 0-based and we have a header row
"doi": row.get("doi", "N/A"),
"error": str(e)
})
continue # Skip this row and continue with the next
metadata = PaperMetadata(
title=row["title"],
doi=doi,
alt_id=row.get("alternative_id"),
issn=row["issn"],
journal=row.get("journal"),
published_online=parse_date(row.get("published_online")),
status="New",
)
db.session.add(metadata)
added_count += 1
except Exception as e:
error_count += 1
errors.append({"row": index + 2, "doi": row.get("doi", "N/A"), "error": str(e)})
try:
# Commit the chunk and roll session fresh
db.session.commit()
except Exception as e:
db.session.rollback()
return render_template(
"upload.html.jinja", error=f"Failed to save data to database: {e}"
)
# Prepare error samples for display
error_samples = errors[:5] if errors else []
error_message = None
if errors:
error_message = f"Encountered {len(errors)} errors. First 5 shown below."
# Store the full errors list in the session for potential download
if errors:
# Log periodic progress every 5 chunks
if (chunk_idx + 1) % 5 == 0:
ActivityLog.log_import_activity(
action="import_progress",
status="processing",
description=f"Processed {chunk_idx+1}/{total_chunks} chunks",
current_stats={
"added": added_count,
"updated": updated_count,
"skipped": skipped_count,
"errors": error_count
}
)
progress = min(90, 10 + int((chunk_idx + 1) * 80 / total_chunks))
self.update_state(state='PROGRESS', meta={'progress': progress})
# Final progress update and completion log
self.update_state(state='PROGRESS', meta={'progress': 100})
ActivityLog.log_import_activity(
action="complete_csv_import",
status="success",
description="CSV import completed",
stats={
"added": added_count,
"updated": updated_count,
"skipped": skipped_count,
"errors": error_count
}
)
except Exception as e:
db.session.rollback()
ActivityLog.log_error(
error_message="CSV import failed",
exception=e,
severity="error",
source="upload.process_csv"
)
return {'error': str(e), 'progress': 0}
finally:
db.session.remove()
# If there were errors, store an error CSV for potential download
if errors:
try:
error_csv = StringIO()
writer = csv.DictWriter(error_csv, fieldnames=["row", "doi", "error"])
writer.writeheader()
writer.writerows(errors)
session["error_data"] = error_csv.getvalue()
ActivityLog.log_import_activity(
action="import_errors",
status="error",
description=f"Import completed with {error_count} errors",
error_csv=error_csv.getvalue(),
task_id=self.request.id,
error_count=error_count
)
except Exception:
# Do not fail the task if error logging fails
pass
return render_template(
"upload.html.jinja",
success=f"File processed! Added: {added_count}, Updated: {updated_count}, Skipped: {skipped_count}, Errors: {error_count}",
error_message=error_message,
error_samples=error_samples
)
# Update the return value to include skipped records information
return {
"added": added_count,
"updated": updated_count,
"skipped": skipped_count,
"skipped_records": skipped_records[:5], # Include up to 5 examples
"skipped_reason_summary": "Records were skipped because they already exist in the database. Use 'update' strategy to update them.",
"errors": errors[:5],
"error_count": error_count,
"task_id": self.request.id
}
@bp.route("/task_status/<task_id>")
def task_status(task_id):
"""Get status of background task."""
task = celery.AsyncResult(task_id)
if task.state == "PENDING":
response = {"state": task.state, "progress": 0}
elif task.state == "PROGRESS":
response = {
"state": task.state,
"progress": task.info.get("progress", 0)
}
elif task.state == "SUCCESS":
response = {
"state": task.state,
"result": task.result
}
else: # FAILURE, REVOKED, etc.
response = {
"state": task.state,
"error": str(task.info) if task.info else "Unknown error"
}
return jsonify(response)
return render_template("upload.html.jinja")
@bp.route("/download_error_log")
def download_error_log():
error_data = session.get("error_data")
if not error_data:
@bp.route("/download_error_log/<task_id>")
def download_error_log(task_id):
# Find the most recent error log for this task
error_log = ActivityLog.query.filter(
ActivityLog.action == "import_errors",
ActivityLog.extra_data.like(f'%"{task_id}"%') # Search in JSON
).order_by(ActivityLog.timestamp.desc()).first()
if not error_log:
flash("No error data available.")
return redirect(url_for("upload.upload"))
# Get the CSV data from extra_data
extra_data = error_log.get_extra_data()
error_csv = extra_data.get("error_csv")
if not error_csv:
flash("Error data format is invalid.")
return redirect(url_for("upload.upload"))
buffer = StringIO(error_data)
buffer = StringIO(error_csv)
return send_file(
buffer,
mimetype="text/csv",

43
scipaperloader/celery.py Normal file
View File

@ -0,0 +1,43 @@
from celery import Celery
# Create Celery instance without Flask app initially
celery = Celery(
'scipaperloader',
broker='redis://localhost:6379/0',
backend='redis://localhost:6379/0',
)
def configure_celery(app=None):
"""Configure Celery with the Flask app settings and ensure tasks run in the app context."""
if app is None:
# Import here to avoid circular import
from scipaperloader import create_app
app = create_app()
# Update Celery configuration using the app settings
celery.conf.update(
broker_url=app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0'),
result_backend=app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0'),
task_serializer='json',
accept_content=['json'],
result_serializer='json',
timezone='UTC',
enable_utc=True,
task_time_limit=3600, # 1 hour max runtime
task_soft_time_limit=3000, # 50 minutes soft limit
worker_max_tasks_per_child=10, # Restart workers after 10 tasks
worker_max_memory_per_child=1000000, # 1GB memory limit
task_acks_late=True, # Acknowledge tasks after completion
task_reject_on_worker_lost=True, # Requeue tasks if worker dies
)
# Create a custom task class that pushes the Flask application context
class ContextTask(celery.Task):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery

View File

@ -12,6 +12,7 @@ class ActivityCategory(Enum):
SCRAPER_COMMAND = "scraper_command"
SCRAPER_ACTIVITY = "scraper_activity"
SYSTEM = "system"
DATA_IMPORT = "data_import"
class ErrorSeverity(Enum):
@ -164,6 +165,20 @@ class ActivityLog(db.Model):
db.session.commit()
return log
@classmethod
def log_import_activity(cls, action, status=None, description=None, user_id=None, **extra):
"""Log data import activities (CSV uploads, bulk imports, etc.)."""
log = cls(
category=ActivityCategory.DATA_IMPORT.value,
action=action,
status=status,
description=description,
user_id=user_id
)
log.set_extra_data(extra)
db.session.add(log)
db.session.commit()
return log
class PaperMetadata(db.Model):
id = db.Column(db.Integer, primary_key=True)
@ -171,6 +186,7 @@ class PaperMetadata(db.Model):
doi = db.Column(db.String, unique=True, index=True)
alt_id = db.Column(db.String)
issn = db.Column(db.String(32))
journal = db.Column(db.String(255))
type = db.Column(db.String(50))
language = db.Column(db.String(50))
published_online = db.Column(db.Date) # or DateTime/String

View File

@ -1,5 +1,9 @@
.message {
padding: 10px;
font-size: 1.3em;
font-family: Arial, sans-serif;
padding: 10px;
font-size: 1.3em;
font-family: Arial, sans-serif;
}
.progress-bar {
width: 0%;
}

View File

@ -1,6 +1,8 @@
{% extends "base.html.jinja" %} {% block content %}
<h1>Welcome to SciPaperLoader</h1>
<div id="results-container"></div>
{% if success %}
<div class="alert alert-success mt-3">{{ success }}</div>
{% endif %} {% if error_message %}
@ -40,24 +42,9 @@
<li><code>issn</code> the ISSN of the journal</li>
<li><code>title</code> the title of the paper</li>
</ul>
<p>
The format of your CSV should resemble the response structure of the
Crossref API's <code>/journals/{issn}/works</code> endpoint.
</p>
</div>
<form method="post" action="{{ url_for('upload.upload') }}" enctype="multipart/form-data">
<div class="mb-3">
<label class="form-label">How to handle duplicate DOIs:</label>
<div class="form-check">
<input class="form-check-input" type="radio" name="duplicate_strategy" value="skip" id="skip" checked />
<label class="form-check-label" for="skip">Skip duplicate entries</label>
</div>
<div class="form-check">
<input class="form-check-input" type="radio" name="duplicate_strategy" value="update" id="update" />
<label class="form-check-label" for="update">Update existing entries</label>
</div>
</div>
<form method="post" action="{{ url_for('upload.upload') }}" enctype="multipart/form-data" id="upload-form">
<div class="form-group">
<label for="file">Upload CSV File</label>
<input type="file" name="file" id="file" class="form-control" required />
@ -73,4 +60,175 @@
</div>
<button type="submit" class="btn btn-primary mt-3">Upload</button>
</form>
<!-- Progress Modal -->
<div id="progressModal" class="modal fade" tabindex="-1">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Processing Your Upload</h5>
</div>
<div class="modal-body">
<div class="progress">
<div id="progressBar" class="progress-bar" role="progressbar">0%</div>
</div>
<p id="progressStatus" class="mt-2 text-center">Starting...</p>
</div>
</div>
</div>
</div>
<script>
const form = document.getElementById("upload-form");
form.addEventListener("submit", function (e) {
e.preventDefault();
// Display loading state immediately
const progressModal = new bootstrap.Modal(document.getElementById("progressModal"));
progressModal.show();
const progressBar = document.getElementById("progressBar");
progressBar.style.width = "5%";
progressBar.textContent = "Starting...";
const formData = new FormData(form);
// Disable the form while processing
const submitButton = form.querySelector("button[type='submit']");
submitButton.disabled = true;
fetch(form.action, {
method: "POST",
body: formData,
})
.then((response) => response.json())
.then((data) => {
if (data.error) {
// Handle error
progressModal.hide();
alert(`Error: ${data.error}`);
submitButton.disabled = false;
return;
}
const taskId = data.task_id;
const interval = setInterval(() => {
fetch("{{ url_for('upload.task_status', task_id='') }}" + taskId)
.then((response) => response.json())
.then((status) => {
console.log("Task status:", status);
if (status.state === "SUCCESS") {
clearInterval(interval);
progressBar.style.width = "100%";
progressBar.textContent = "Completed!";
setTimeout(() => {
progressModal.hide();
showResults(status.result);
submitButton.disabled = false;
}, 1000);
} else if (status.state === "FAILURE") {
clearInterval(interval);
progressBar.style.width = "100%";
progressBar.classList.add("bg-danger");
progressBar.textContent = "Failed!";
setTimeout(() => {
progressModal.hide();
alert(`Task failed: ${status.error || "Unknown error"}`);
submitButton.disabled = false;
}, 1000);
} else {
// Update progress bar with more information
const progress = status.progress || 0;
progressBar.style.width = `${progress}%`;
progressBar.textContent = `${progress}% complete`;
document.getElementById("progressStatus").innerText = `Processing... (${status.state})`;
}
})
.catch((err) => {
console.error("Failed to check task status:", err);
});
}, 1000);
})
.catch((err) => {
console.error("Upload failed:", err);
progressModal.hide();
alert("Upload failed. Please try again.");
submitButton.disabled = false;
});
});
const showResults = (result) => {
const message = `Upload completed! Added: ${result.added}, Updated: ${result.updated}, Skipped: ${result.skipped}, Errors: ${result.error_count}`;
let resultHTML = `<div class="alert alert-success">${message}</div>`;
// Add skipped records information
if (result.skipped > 0) {
resultHTML += `
<div class="alert alert-info">
<h4>${result.skipped} records were skipped</h4>
<p>${result.skipped_reason_summary || "Records were skipped because they already exist in the database."}</p>
${result.skipped_records && result.skipped_records.length > 0 ? `
<p>Examples of skipped records:</p>
<table class="table table-sm table-bordered">
<thead>
<tr>
<th>Row</th>
<th>DOI</th>
<th>Reason</th>
</tr>
</thead>
<tbody>
${result.skipped_records.map(record => `
<tr>
<td>${record.row}</td>
<td>${record.doi}</td>
<td>${record.reason}</td>
</tr>
`).join('')}
</tbody>
</table>
` : ''}
</div>`;
}
// Existing error display code
if (result.error_count > 0) {
resultHTML += `
<div class="alert alert-warning">
<h4>Some errors occurred (${result.error_count} total)</h4>
<p>Showing first ${result.errors.length} of ${result.error_count} errors:</p>
<table class="table table-sm table-bordered">
<thead>
<tr>
<th>Row</th>
<th>DOI</th>
<th>Error</th>
</tr>
</thead>
<tbody>`;
result.errors.forEach(error => {
resultHTML += `
<tr>
<td>${error.row}</td>
<td>${error.doi}</td>
<td>${error.error}</td>
</tr>`;
});
resultHTML += `
</tbody>
</table>
<p class="mt-2">Download the complete error log with all ${result.error_count} errors:</p>
<a href="/upload/download_error_log/${result.task_id}" class="btn btn-outline-secondary">
Download Full Error Log
</a>
</div>`;
}
document.getElementById("results-container").innerHTML = resultHTML;
};
</script>
{% endblock content %}

1641
testdata.csv Normal file

File diff suppressed because it is too large Load Diff