fixes dummy and single paper processing
This commit is contained in:
parent
8f2375215d
commit
012163ba3f
@ -1,6 +1,7 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from flask import Blueprint, jsonify, request
|
from flask import Blueprint, jsonify, request
|
||||||
from ..models import ActivityLog, ActivityCategory
|
from ..models import ActivityLog, ActivityCategory, PaperMetadata
|
||||||
|
from .. import db
|
||||||
|
|
||||||
bp = Blueprint("api", __name__, url_prefix="/api")
|
bp = Blueprint("api", __name__, url_prefix="/api")
|
||||||
|
|
||||||
@ -48,3 +49,90 @@ def get_activity_logs():
|
|||||||
result.append(log_data)
|
result.append(log_data)
|
||||||
|
|
||||||
return jsonify(result)
|
return jsonify(result)
|
||||||
|
|
||||||
|
@bp.route("/papers")
|
||||||
|
def search_papers():
|
||||||
|
"""
|
||||||
|
Search for papers by title, DOI, or ID.
|
||||||
|
|
||||||
|
Query parameters:
|
||||||
|
- query: Search term (required)
|
||||||
|
- limit: Maximum number of results (default: 10)
|
||||||
|
"""
|
||||||
|
query = request.args.get('query', '')
|
||||||
|
limit = int(request.args.get('limit', 10))
|
||||||
|
|
||||||
|
if not query:
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"message": "Search query is required",
|
||||||
|
"papers": []
|
||||||
|
})
|
||||||
|
|
||||||
|
# Try to parse query as an ID first
|
||||||
|
try:
|
||||||
|
paper_id = int(query)
|
||||||
|
paper_by_id = PaperMetadata.query.get(paper_id)
|
||||||
|
if paper_by_id:
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"papers": [{
|
||||||
|
"id": paper_by_id.id,
|
||||||
|
"title": paper_by_id.title,
|
||||||
|
"doi": paper_by_id.doi,
|
||||||
|
"journal": paper_by_id.journal,
|
||||||
|
"status": paper_by_id.status,
|
||||||
|
"created_at": paper_by_id.created_at.isoformat() if paper_by_id.created_at else None,
|
||||||
|
"updated_at": paper_by_id.updated_at.isoformat() if paper_by_id.updated_at else None
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
except ValueError:
|
||||||
|
pass # Not an ID, continue with text search
|
||||||
|
|
||||||
|
# Search in title and DOI
|
||||||
|
search_term = f"%{query}%"
|
||||||
|
papers = PaperMetadata.query.filter(
|
||||||
|
db.or_(
|
||||||
|
PaperMetadata.title.ilike(search_term),
|
||||||
|
PaperMetadata.doi.ilike(search_term)
|
||||||
|
)
|
||||||
|
).limit(limit).all()
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"papers": [{
|
||||||
|
"id": paper.id,
|
||||||
|
"title": paper.title,
|
||||||
|
"doi": paper.doi,
|
||||||
|
"journal": paper.journal,
|
||||||
|
"status": paper.status,
|
||||||
|
"created_at": paper.created_at.isoformat() if paper.created_at else None,
|
||||||
|
"updated_at": paper.updated_at.isoformat() if paper.updated_at else None
|
||||||
|
} for paper in papers]
|
||||||
|
})
|
||||||
|
|
||||||
|
@bp.route("/papers/<int:paper_id>")
|
||||||
|
def get_paper(paper_id):
|
||||||
|
"""Get details of a single paper by ID."""
|
||||||
|
paper = PaperMetadata.query.get(paper_id)
|
||||||
|
|
||||||
|
if not paper:
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"message": f"Paper with ID {paper_id} not found"
|
||||||
|
})
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"paper": {
|
||||||
|
"id": paper.id,
|
||||||
|
"title": paper.title,
|
||||||
|
"doi": paper.doi,
|
||||||
|
"journal": paper.journal,
|
||||||
|
"status": paper.status,
|
||||||
|
"error_msg": paper.error_msg,
|
||||||
|
"file_path": paper.file_path,
|
||||||
|
"created_at": paper.created_at.isoformat() if paper.created_at else None,
|
||||||
|
"updated_at": paper.updated_at.isoformat() if paper.updated_at else None
|
||||||
|
}
|
||||||
|
})
|
@ -2,7 +2,7 @@
|
|||||||
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
|
from flask import Blueprint, render_template, redirect, url_for, request, flash, jsonify, current_app
|
||||||
from ..db import db
|
from ..db import db
|
||||||
# Import the new model
|
# Import the new model
|
||||||
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig
|
from ..models import VolumeConfig, ScheduleConfig, ActivityLog, DownloadPathConfig, PaperMetadata
|
||||||
from ..defaults import MAX_VOLUME
|
from ..defaults import MAX_VOLUME
|
||||||
import os # Import os for path validation
|
import os # Import os for path validation
|
||||||
from scipaperloader.scrapers import __path__ as scrapers_path
|
from scipaperloader.scrapers import __path__ as scrapers_path
|
||||||
@ -403,3 +403,35 @@ def api_update_config():
|
|||||||
"success": False,
|
"success": False,
|
||||||
"message": f"Unexpected error: {str(e)}"
|
"message": f"Unexpected error: {str(e)}"
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route("/delete_all_papers", methods=["POST"])
|
||||||
|
def delete_all_papers():
|
||||||
|
"""Delete all paper records from the database."""
|
||||||
|
try:
|
||||||
|
# Count papers before deletion for logging purposes
|
||||||
|
paper_count = PaperMetadata.query.count()
|
||||||
|
|
||||||
|
# Delete all records from the PaperMetadata table
|
||||||
|
PaperMetadata.query.delete()
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
# Log the action
|
||||||
|
ActivityLog.log_config_change(
|
||||||
|
config_key="database",
|
||||||
|
old_value=f"{paper_count} papers",
|
||||||
|
new_value="0 papers",
|
||||||
|
description=f"Deleted all {paper_count} papers from the database"
|
||||||
|
)
|
||||||
|
|
||||||
|
flash(f"Successfully deleted all {paper_count} papers from the database.", "success")
|
||||||
|
except Exception as e:
|
||||||
|
db.session.rollback()
|
||||||
|
flash(f"Failed to delete papers: {str(e)}", "error")
|
||||||
|
ActivityLog.log_error(
|
||||||
|
error_message=f"Failed to delete all papers: {str(e)}",
|
||||||
|
exception=e,
|
||||||
|
source="config.delete_all_papers"
|
||||||
|
)
|
||||||
|
|
||||||
|
return redirect(url_for("config.general"))
|
@ -6,13 +6,13 @@ import os # Import os for path joining
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from flask import Blueprint, jsonify, render_template, request, current_app, flash
|
from flask import Blueprint, jsonify, render_template, request, current_app, flash
|
||||||
# Import the new model
|
# Import the new model
|
||||||
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig
|
from ..models import VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory, ScheduleConfig, ScraperState, DownloadPathConfig, ScraperModuleConfig
|
||||||
from ..db import db
|
from ..db import db
|
||||||
from ..celery import celery
|
from ..celery import celery
|
||||||
from ..defaults import MAX_VOLUME
|
from ..defaults import MAX_VOLUME
|
||||||
from celery.schedules import crontab
|
from celery.schedules import crontab
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from scipaperloader.scrapers.factory import get_scraper
|
from scipaperloader.scrapers.factory import get_scraper, get_available_scrapers
|
||||||
|
|
||||||
bp = Blueprint("scraper", __name__, url_prefix="/scraper")
|
bp = Blueprint("scraper", __name__, url_prefix="/scraper")
|
||||||
|
|
||||||
@ -767,3 +767,150 @@ def process_paper(self, paper_id):
|
|||||||
"status": result.status,
|
"status": result.status,
|
||||||
"message": result.message
|
"message": result.message
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@celery.task(bind=True)
|
||||||
|
@celery.task(bind=True)
|
||||||
|
def process_paper_with_scraper(self, paper_id, scraper_module):
|
||||||
|
"""Process a paper using a specific scraper module."""
|
||||||
|
from scipaperloader.models import PaperMetadata
|
||||||
|
import importlib
|
||||||
|
from ..scrapers.base import BaseScraper
|
||||||
|
|
||||||
|
paper = PaperMetadata.query.get(paper_id)
|
||||||
|
if not paper:
|
||||||
|
return {"status": "error", "message": f"Paper with ID {paper_id} not found"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Import the specified scraper module
|
||||||
|
module = importlib.import_module(f"scipaperloader.scrapers.{scraper_module}")
|
||||||
|
cls = getattr(module, "Scraper")
|
||||||
|
|
||||||
|
# Validate that it's a BaseScraper
|
||||||
|
if not issubclass(cls, BaseScraper):
|
||||||
|
error_msg = f"Scraper class in module '{scraper_module}' does not inherit from BaseScraper"
|
||||||
|
ActivityLog.log_error(
|
||||||
|
error_message=error_msg,
|
||||||
|
source="process_paper_with_scraper"
|
||||||
|
)
|
||||||
|
return {"status": "error", "message": error_msg}
|
||||||
|
|
||||||
|
# Instantiate and use the scraper
|
||||||
|
scraper = cls()
|
||||||
|
result = scraper.scrape(paper.doi)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"paper_id": paper_id,
|
||||||
|
"status": result.status,
|
||||||
|
"message": result.message,
|
||||||
|
"scraper": scraper_module
|
||||||
|
}
|
||||||
|
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
error_msg = f"Failed to load scraper module '{scraper_module}': {str(e)}"
|
||||||
|
ActivityLog.log_error(
|
||||||
|
error_message=error_msg,
|
||||||
|
source="process_paper_with_scraper"
|
||||||
|
)
|
||||||
|
return {"status": "error", "message": error_msg}
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error processing paper with scraper '{scraper_module}': {str(e)}"
|
||||||
|
ActivityLog.log_error(
|
||||||
|
error_message=error_msg,
|
||||||
|
source="process_paper_with_scraper",
|
||||||
|
exception=e
|
||||||
|
)
|
||||||
|
return {"status": "error", "message": error_msg}
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route("/process_single/<int:paper_id>", methods=["POST"])
|
||||||
|
def process_single_paper(paper_id):
|
||||||
|
"""Process a single paper by ID."""
|
||||||
|
try:
|
||||||
|
# Check if paper exists
|
||||||
|
paper = PaperMetadata.query.get(paper_id)
|
||||||
|
if not paper:
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"message": f"Paper with ID {paper_id} not found"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Get the scraper module name from the request
|
||||||
|
scraper_module = None
|
||||||
|
if request.is_json and request.json:
|
||||||
|
scraper_module = request.json.get('scraper_module')
|
||||||
|
|
||||||
|
# Update status to Pending
|
||||||
|
old_status = paper.status
|
||||||
|
paper.status = "Pending"
|
||||||
|
paper.updated_at = datetime.utcnow()
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
# Log that we're processing this paper
|
||||||
|
ActivityLog.log_scraper_activity(
|
||||||
|
action="manual_process_paper",
|
||||||
|
paper_id=paper_id,
|
||||||
|
status="pending",
|
||||||
|
description=f"Manual processing initiated for paper: {paper.title}" +
|
||||||
|
(f" using {scraper_module} scraper" if scraper_module else "")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start the task (without delay since it's manual)
|
||||||
|
if scraper_module:
|
||||||
|
task = process_paper_with_scraper.delay(paper_id, scraper_module)
|
||||||
|
else:
|
||||||
|
task = process_paper.delay(paper_id)
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"task_id": task.id,
|
||||||
|
"message": f"Processing paper '{paper.title}' (ID: {paper_id})" +
|
||||||
|
(f" using {scraper_module} scraper" if scraper_module else "") +
|
||||||
|
f". Previous status: {old_status}"
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
db.session.rollback()
|
||||||
|
ActivityLog.log_error(
|
||||||
|
error_message=f"Failed to process paper {paper_id}: {str(e)}",
|
||||||
|
exception=e,
|
||||||
|
source="process_single_paper"
|
||||||
|
)
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"message": f"Error: {str(e)}"
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route("/available_scrapers")
|
||||||
|
def available_scrapers():
|
||||||
|
"""Get list of available scraper modules."""
|
||||||
|
from scipaperloader.scrapers.factory import get_available_scrapers
|
||||||
|
from ..models import ScraperModuleConfig
|
||||||
|
|
||||||
|
try:
|
||||||
|
scrapers = get_available_scrapers()
|
||||||
|
current_module = ScraperModuleConfig.get_current_module()
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"scrapers": [
|
||||||
|
{
|
||||||
|
"name": s["name"],
|
||||||
|
"description": s["description"],
|
||||||
|
"is_current": s["name"] == current_module
|
||||||
|
} for s in scrapers
|
||||||
|
],
|
||||||
|
"current": current_module
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ActivityLog.log_error(
|
||||||
|
error_message=f"Failed to get available scrapers: {str(e)}",
|
||||||
|
source="available_scrapers"
|
||||||
|
)
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"message": f"Error: {str(e)}",
|
||||||
|
"scrapers": []
|
||||||
|
})
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from .base import BaseScraper, ScrapeResult
|
from .base import BaseScraper, ScrapeResult
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
@ -31,15 +32,47 @@ class Scraper(BaseScraper):
|
|||||||
success = random.random() < 0.8
|
success = random.random() < 0.8
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
# Get download path and simulate file creation
|
# Get download path and create an actual dummy file
|
||||||
download_path = DownloadPathConfig.get_path()
|
download_path = DownloadPathConfig.get_path()
|
||||||
file_name = f"{doi.replace('/', '_')}.pdf"
|
file_name = f"{doi.replace('/', '_')}.pdf"
|
||||||
file_path = f"{download_path}/{file_name}"
|
file_path = f"{download_path}/{file_name}"
|
||||||
|
|
||||||
# Update paper status
|
# Create directory if it doesn't exist
|
||||||
paper.status = "Done"
|
os.makedirs(download_path, exist_ok=True)
|
||||||
paper.file_path = file_path
|
|
||||||
paper.error_msg = None
|
# Create a simple dummy PDF file
|
||||||
|
try:
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
f.write(f"Dummy PDF file for paper with DOI: {doi}\n")
|
||||||
|
f.write(f"Title: {paper.title}\n")
|
||||||
|
f.write(f"Journal: {paper.journal}\n")
|
||||||
|
f.write(f"Generated: {datetime.utcnow().isoformat()}\n")
|
||||||
|
f.write("\nThis is a dummy file created by the SciPaperLoader dummy scraper.\n")
|
||||||
|
|
||||||
|
# Update paper status
|
||||||
|
paper.status = "Done"
|
||||||
|
paper.file_path = file_path
|
||||||
|
paper.error_msg = None
|
||||||
|
except Exception as e:
|
||||||
|
# Handle file creation errors
|
||||||
|
error_msg = f"Failed to create dummy file: {str(e)}"
|
||||||
|
paper.status = "Failed"
|
||||||
|
paper.error_msg = error_msg
|
||||||
|
|
||||||
|
ActivityLog.log_scraper_activity(
|
||||||
|
action="dummy_scrape_file_error",
|
||||||
|
status="error",
|
||||||
|
description=error_msg,
|
||||||
|
paper_id=paper.id
|
||||||
|
)
|
||||||
|
|
||||||
|
return ScrapeResult(
|
||||||
|
status="error",
|
||||||
|
message=error_msg,
|
||||||
|
data={"error_code": "file_creation_error"},
|
||||||
|
duration=time.time() - start_time,
|
||||||
|
timestamp=datetime.utcnow()
|
||||||
|
)
|
||||||
|
|
||||||
# Log success
|
# Log success
|
||||||
ActivityLog.log_scraper_activity(
|
ActivityLog.log_scraper_activity(
|
||||||
|
@ -90,6 +90,31 @@
|
|||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Database Management Section -->
|
||||||
|
<div class="row mt-4">
|
||||||
|
<div class="col-12">
|
||||||
|
<div class="card border-danger">
|
||||||
|
<div class="card-header bg-danger text-white">
|
||||||
|
<h5>Database Management</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<div class="form-section">
|
||||||
|
<h6>Delete All Papers</h6>
|
||||||
|
<p class="text-muted">This action will permanently delete all paper records from the
|
||||||
|
database. This cannot be undone.</p>
|
||||||
|
|
||||||
|
<form method="post" action="{{ url_for('config.delete_all_papers') }}" class="mt-3"
|
||||||
|
onsubmit="return confirm('WARNING: You are about to delete ALL papers from the database. This action cannot be undone. Are you sure you want to proceed?');">
|
||||||
|
<button type="submit" class="btn btn-danger">
|
||||||
|
<i class="fas fa-trash-alt"></i> Delete All Papers
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -36,6 +36,28 @@
|
|||||||
max-width: 350px;
|
max-width: 350px;
|
||||||
z-index: 1050;
|
z-index: 1050;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.search-results-container {
|
||||||
|
max-height: 300px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Paper status badges */
|
||||||
|
.badge-new {
|
||||||
|
background-color: #17a2b8;
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge-pending {
|
||||||
|
background-color: #ffc107;
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge-done {
|
||||||
|
background-color: #28a745;
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge-failed {
|
||||||
|
background-color: #dc3545;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
{% endblock styles %}
|
{% endblock styles %}
|
||||||
|
|
||||||
@ -89,6 +111,61 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- New row for single paper processing -->
|
||||||
|
<div class="row mb-4">
|
||||||
|
<div class="col-12">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-header">
|
||||||
|
<h5>Process Single Paper</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-6">
|
||||||
|
<form id="searchPaperForm" class="mb-3">
|
||||||
|
<div class="input-group">
|
||||||
|
<input type="text" id="paperSearchInput" class="form-control"
|
||||||
|
placeholder="Search paper by title, DOI, or ID...">
|
||||||
|
<button class="btn btn-outline-secondary" type="submit">Search</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-6">
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="scraperSelect">Scraper Module:</label>
|
||||||
|
<select class="form-control" id="scraperSelect">
|
||||||
|
<option value="">Use default system scraper</option>
|
||||||
|
<!-- Available scrapers will be populated here -->
|
||||||
|
</select>
|
||||||
|
<div class="form-text">
|
||||||
|
Select which scraper to use for processing the paper
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="searchResults" class="mt-3 search-results-container d-none">
|
||||||
|
<table class="table table-hover table-striped">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Title</th>
|
||||||
|
<th>DOI</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="paperSearchResults">
|
||||||
|
<!-- Search results will be populated here -->
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="processingStatus" class="alert alert-info mt-3 d-none"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="row mb-4">
|
<div class="row mb-4">
|
||||||
<div class="col-12">
|
<div class="col-12">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
@ -164,12 +241,19 @@
|
|||||||
const resetButton = document.getElementById('resetButton');
|
const resetButton = document.getElementById('resetButton');
|
||||||
const notificationsToggle = document.getElementById('notificationsToggle');
|
const notificationsToggle = document.getElementById('notificationsToggle');
|
||||||
const activityLog = document.getElementById('activityLog');
|
const activityLog = document.getElementById('activityLog');
|
||||||
|
const searchForm = document.getElementById('searchPaperForm');
|
||||||
|
const searchInput = document.getElementById('paperSearchInput');
|
||||||
|
const searchResults = document.getElementById('searchResults');
|
||||||
|
const processingStatus = document.getElementById('processingStatus');
|
||||||
|
const paperSearchResults = document.getElementById('paperSearchResults');
|
||||||
|
const scraperSelect = document.getElementById('scraperSelect');
|
||||||
|
|
||||||
// Initialize the page
|
// Initialize the page
|
||||||
document.addEventListener('DOMContentLoaded', function () {
|
document.addEventListener('DOMContentLoaded', function () {
|
||||||
initStatusPolling();
|
initStatusPolling();
|
||||||
loadActivityStats(currentTimeRange);
|
loadActivityStats(currentTimeRange);
|
||||||
loadRecentActivity();
|
loadRecentActivity();
|
||||||
|
loadAvailableScrapers();
|
||||||
|
|
||||||
// Initialize event listeners
|
// Initialize event listeners
|
||||||
startButton.addEventListener('click', startScraper);
|
startButton.addEventListener('click', startScraper);
|
||||||
@ -177,6 +261,10 @@
|
|||||||
stopButton.addEventListener('click', stopScraper);
|
stopButton.addEventListener('click', stopScraper);
|
||||||
resetButton.addEventListener('click', resetScraper);
|
resetButton.addEventListener('click', resetScraper);
|
||||||
notificationsToggle.addEventListener('click', toggleNotifications);
|
notificationsToggle.addEventListener('click', toggleNotifications);
|
||||||
|
searchForm.addEventListener('submit', function (e) {
|
||||||
|
e.preventDefault();
|
||||||
|
searchPapers();
|
||||||
|
});
|
||||||
|
|
||||||
document.getElementById('volumeForm').addEventListener('submit', function (e) {
|
document.getElementById('volumeForm').addEventListener('submit', function (e) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
@ -193,6 +281,185 @@
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Load available scraper modules
|
||||||
|
function loadAvailableScrapers() {
|
||||||
|
fetch('/scraper/available_scrapers')
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
if (data.success && data.scrapers && data.scrapers.length > 0) {
|
||||||
|
// Clear previous options except the default one
|
||||||
|
while (scraperSelect.options.length > 1) {
|
||||||
|
scraperSelect.remove(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add each scraper as an option
|
||||||
|
data.scrapers.forEach(scraper => {
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = scraper.name;
|
||||||
|
option.textContent = `${scraper.name} - ${scraper.description.substring(0, 50)}${scraper.description.length > 50 ? '...' : ''}`;
|
||||||
|
if (scraper.is_current) {
|
||||||
|
option.textContent += ' (system default)';
|
||||||
|
}
|
||||||
|
scraperSelect.appendChild(option);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// If no scrapers or error, add a note
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.disabled = true;
|
||||||
|
option.textContent = 'No scrapers available';
|
||||||
|
scraperSelect.appendChild(option);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error loading scrapers:', error);
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.disabled = true;
|
||||||
|
option.textContent = 'Error loading scrapers';
|
||||||
|
scraperSelect.appendChild(option);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search papers function
|
||||||
|
function searchPapers() {
|
||||||
|
const query = searchInput.value.trim();
|
||||||
|
|
||||||
|
if (!query) {
|
||||||
|
showFlashMessage('Please enter a search term', 'warning');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show loading message
|
||||||
|
paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Searching papers...</td></tr>';
|
||||||
|
searchResults.classList.remove('d-none');
|
||||||
|
|
||||||
|
// Fetch papers from API
|
||||||
|
fetch(`/api/papers?query=${encodeURIComponent(query)}`)
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
if (!data.papers || data.papers.length === 0) {
|
||||||
|
paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">No papers found matching your search</td></tr>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
paperSearchResults.innerHTML = '';
|
||||||
|
|
||||||
|
data.papers.forEach(paper => {
|
||||||
|
const row = document.createElement('tr');
|
||||||
|
|
||||||
|
// Create status badge
|
||||||
|
let statusBadge = '';
|
||||||
|
if (paper.status === 'New') {
|
||||||
|
statusBadge = '<span class="badge bg-info">New</span>';
|
||||||
|
} else if (paper.status === 'Pending') {
|
||||||
|
statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
|
||||||
|
} else if (paper.status === 'Done') {
|
||||||
|
statusBadge = '<span class="badge bg-success">Done</span>';
|
||||||
|
} else if (paper.status === 'Failed') {
|
||||||
|
statusBadge = '<span class="badge bg-danger">Failed</span>';
|
||||||
|
} else {
|
||||||
|
statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create process button (enabled only for papers not in 'Pending' status)
|
||||||
|
const processButtonDisabled = paper.status === 'Pending' ? 'disabled' : '';
|
||||||
|
|
||||||
|
// Truncate title if too long
|
||||||
|
const truncatedTitle = paper.title.length > 70 ? paper.title.substring(0, 70) + '...' : paper.title;
|
||||||
|
|
||||||
|
row.innerHTML = `
|
||||||
|
<td>${paper.id}</td>
|
||||||
|
<td title="${paper.title}">${truncatedTitle}</td>
|
||||||
|
<td>${paper.doi || 'N/A'}</td>
|
||||||
|
<td>${statusBadge}</td>
|
||||||
|
<td>
|
||||||
|
<button class="btn btn-sm btn-primary process-paper-btn"
|
||||||
|
data-paper-id="${paper.id}"
|
||||||
|
${processButtonDisabled}>
|
||||||
|
Process Now
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
`;
|
||||||
|
|
||||||
|
paperSearchResults.appendChild(row);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add event listeners to the process buttons
|
||||||
|
document.querySelectorAll('.process-paper-btn').forEach(btn => {
|
||||||
|
btn.addEventListener('click', function () {
|
||||||
|
processSinglePaper(this.getAttribute('data-paper-id'));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error searching papers:', error);
|
||||||
|
paperSearchResults.innerHTML = '<tr><td colspan="5" class="text-center">Error searching papers</td></tr>';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process a single paper
|
||||||
|
function processSinglePaper(paperId) {
|
||||||
|
// Disable all process buttons to prevent multiple clicks
|
||||||
|
document.querySelectorAll('.process-paper-btn').forEach(btn => {
|
||||||
|
btn.disabled = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Show processing status
|
||||||
|
processingStatus.textContent = 'Processing paper...';
|
||||||
|
processingStatus.classList.remove('d-none');
|
||||||
|
|
||||||
|
// Get selected scraper
|
||||||
|
const selectedScraper = scraperSelect.value;
|
||||||
|
|
||||||
|
// Send request to process the paper
|
||||||
|
fetch(`/scraper/process_single/${paperId}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
scraper_module: selectedScraper
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
if (data.success) {
|
||||||
|
processingStatus.textContent = data.message;
|
||||||
|
processingStatus.className = 'alert alert-success mt-3';
|
||||||
|
|
||||||
|
// Update status in the search results
|
||||||
|
const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
|
||||||
|
const statusCell = row.querySelector('td:nth-child(4)');
|
||||||
|
statusCell.innerHTML = '<span class="badge bg-warning text-dark">Pending</span>';
|
||||||
|
|
||||||
|
// Show notification
|
||||||
|
showFlashMessage(data.message, 'success');
|
||||||
|
|
||||||
|
// Set up polling to check paper status and refresh activity
|
||||||
|
pollPaperStatus(paperId, 3000, 20);
|
||||||
|
} else {
|
||||||
|
processingStatus.textContent = data.message;
|
||||||
|
processingStatus.className = 'alert alert-danger mt-3';
|
||||||
|
showFlashMessage(data.message, 'error');
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error processing paper:', error);
|
||||||
|
processingStatus.textContent = 'Error: Could not process paper';
|
||||||
|
processingStatus.className = 'alert alert-danger mt-3';
|
||||||
|
showFlashMessage('Error processing paper', 'error');
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
// Re-enable the process buttons after a short delay
|
||||||
|
setTimeout(() => {
|
||||||
|
document.querySelectorAll('.process-paper-btn').forEach(btn => {
|
||||||
|
if (btn.getAttribute('data-paper-id') !== paperId) {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}, 1000);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Status polling
|
// Status polling
|
||||||
function initStatusPolling() {
|
function initStatusPolling() {
|
||||||
updateStatus();
|
updateStatus();
|
||||||
@ -298,26 +565,26 @@
|
|||||||
clear_papers: true // You could make this configurable with a checkbox
|
clear_papers: true // You could make this configurable with a checkbox
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.then(response => response.json())
|
.then(response => response.json())
|
||||||
.then(data => {
|
.then(data => {
|
||||||
if (data.success) {
|
if (data.success) {
|
||||||
showFlashMessage('Scraper has been completely reset and restarted', 'success');
|
showFlashMessage('Scraper has been completely reset and restarted', 'success');
|
||||||
// Update everything
|
// Update everything
|
||||||
updateStatus();
|
updateStatus();
|
||||||
loadActivityStats(currentTimeRange);
|
loadActivityStats(currentTimeRange);
|
||||||
setTimeout(() => { loadRecentActivity(); }, 1000);
|
setTimeout(() => { loadRecentActivity(); }, 1000);
|
||||||
} else {
|
} else {
|
||||||
showFlashMessage(data.message || 'Error resetting scraper', 'error');
|
showFlashMessage(data.message || 'Error resetting scraper', 'error');
|
||||||
}
|
}
|
||||||
// Re-enable button
|
// Re-enable button
|
||||||
resetButton.disabled = false;
|
resetButton.disabled = false;
|
||||||
})
|
})
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
console.error("Error resetting scraper:", error);
|
console.error("Error resetting scraper:", error);
|
||||||
showFlashMessage('Error resetting scraper: ' + error.message, 'error');
|
showFlashMessage('Error resetting scraper: ' + error.message, 'error');
|
||||||
// Re-enable button
|
// Re-enable button
|
||||||
resetButton.disabled = false;
|
resetButton.disabled = false;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -345,6 +612,97 @@
|
|||||||
notificationsEnabled = notificationsToggle.checked;
|
notificationsEnabled = notificationsToggle.checked;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Poll paper status until it changes from Pending
|
||||||
|
function pollPaperStatus(paperId, interval = 3000, maxAttempts = 20) {
|
||||||
|
let attempts = 0;
|
||||||
|
|
||||||
|
// Immediately refresh activity log to show the initial pending status
|
||||||
|
loadRecentActivity();
|
||||||
|
|
||||||
|
const checkStatus = () => {
|
||||||
|
attempts++;
|
||||||
|
console.log(`Checking status of paper ${paperId}, attempt ${attempts}/${maxAttempts}`);
|
||||||
|
|
||||||
|
// Fetch the current paper status
|
||||||
|
fetch(`/api/papers/${paperId}`)
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
if (data && data.paper) {
|
||||||
|
const paper = data.paper;
|
||||||
|
console.log(`Paper status: ${paper.status}`);
|
||||||
|
|
||||||
|
// Update the UI with the current status
|
||||||
|
const row = document.querySelector(`.process-paper-btn[data-paper-id="${paperId}"]`).closest('tr');
|
||||||
|
if (row) {
|
||||||
|
const statusCell = row.querySelector('td:nth-child(4)');
|
||||||
|
let statusBadge = '';
|
||||||
|
|
||||||
|
if (paper.status === 'New') {
|
||||||
|
statusBadge = '<span class="badge bg-info">New</span>';
|
||||||
|
} else if (paper.status === 'Pending') {
|
||||||
|
statusBadge = '<span class="badge bg-warning text-dark">Pending</span>';
|
||||||
|
} else if (paper.status === 'Done') {
|
||||||
|
statusBadge = '<span class="badge bg-success">Done</span>';
|
||||||
|
} else if (paper.status === 'Failed') {
|
||||||
|
statusBadge = '<span class="badge bg-danger">Failed</span>';
|
||||||
|
} else {
|
||||||
|
statusBadge = `<span class="badge bg-secondary">${paper.status}</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
statusCell.innerHTML = statusBadge;
|
||||||
|
|
||||||
|
// Update processing status message if status changed
|
||||||
|
if (paper.status !== 'Pending') {
|
||||||
|
if (paper.status === 'Done') {
|
||||||
|
processingStatus.textContent = `Paper processed successfully: ${paper.title}`;
|
||||||
|
processingStatus.className = 'alert alert-success mt-3';
|
||||||
|
} else if (paper.status === 'Failed') {
|
||||||
|
processingStatus.textContent = `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
|
||||||
|
processingStatus.className = 'alert alert-danger mt-3';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always refresh activity log
|
||||||
|
loadRecentActivity();
|
||||||
|
|
||||||
|
// If status is still pending and we haven't reached max attempts, check again
|
||||||
|
if (paper.status === 'Pending' && attempts < maxAttempts) {
|
||||||
|
setTimeout(checkStatus, interval);
|
||||||
|
} else {
|
||||||
|
// If status changed or we reached max attempts, refresh chart data too
|
||||||
|
loadActivityStats(currentTimeRange);
|
||||||
|
|
||||||
|
// Show notification if status changed
|
||||||
|
if (paper.status !== 'Pending') {
|
||||||
|
const status = paper.status === 'Done' ? 'success' : 'error';
|
||||||
|
const message = paper.status === 'Done'
|
||||||
|
? `Paper processed successfully: ${paper.title}`
|
||||||
|
: `Paper processing failed: ${paper.error_msg || 'Unknown error'}`;
|
||||||
|
showFlashMessage(message, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we hit max attempts but status is still pending, show a message
|
||||||
|
if (paper.status === 'Pending' && attempts >= maxAttempts) {
|
||||||
|
processingStatus.textContent = 'Paper is still being processed. Check the activity log for updates.';
|
||||||
|
processingStatus.className = 'alert alert-info mt-3';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error(`Error polling paper status: ${error}`);
|
||||||
|
// If there's an error, we can still try again if under max attempts
|
||||||
|
if (attempts < maxAttempts) {
|
||||||
|
setTimeout(checkStatus, interval);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start checking
|
||||||
|
setTimeout(checkStatus, interval);
|
||||||
|
}
|
||||||
|
|
||||||
// Load data functions
|
// Load data functions
|
||||||
function loadActivityStats(hours) {
|
function loadActivityStats(hours) {
|
||||||
fetch(`/scraper/stats?hours=${hours}`)
|
fetch(`/scraper/stats?hours=${hours}`)
|
||||||
@ -359,8 +717,10 @@
|
|||||||
.then(response => response.json())
|
.then(response => response.json())
|
||||||
.then(data => {
|
.then(data => {
|
||||||
renderActivityLog(data);
|
renderActivityLog(data);
|
||||||
|
console.log("Activity log refreshed with latest data");
|
||||||
})
|
})
|
||||||
.catch(() => {
|
.catch((error) => {
|
||||||
|
console.error("Failed to load activity logs:", error);
|
||||||
// If the API endpoint doesn't exist, just show a message
|
// If the API endpoint doesn't exist, just show a message
|
||||||
activityLog.innerHTML = '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>';
|
activityLog.innerHTML = '<tr><td colspan="4" class="text-center">Activity log API not available</td></tr>';
|
||||||
});
|
});
|
||||||
@ -467,6 +827,26 @@
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flash message function
|
||||||
|
function showFlashMessage(message, type) {
|
||||||
|
const flashContainer = document.createElement('div');
|
||||||
|
flashContainer.className = `alert alert-${type === 'error' ? 'danger' : type} alert-dismissible fade show notification`;
|
||||||
|
flashContainer.innerHTML = `
|
||||||
|
${message}
|
||||||
|
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
|
||||||
|
`;
|
||||||
|
|
||||||
|
document.body.appendChild(flashContainer);
|
||||||
|
|
||||||
|
// Auto dismiss after 5 seconds
|
||||||
|
setTimeout(() => {
|
||||||
|
flashContainer.classList.remove('show');
|
||||||
|
setTimeout(() => {
|
||||||
|
flashContainer.remove();
|
||||||
|
}, 150); // Remove after fade out animation
|
||||||
|
}, 5000);
|
||||||
|
}
|
||||||
|
|
||||||
// WebSocket for real-time notifications
|
// WebSocket for real-time notifications
|
||||||
function setupWebSocket() {
|
function setupWebSocket() {
|
||||||
// If WebSocket is available, implement it here
|
// If WebSocket is available, implement it here
|
||||||
|
Loading…
x
Reference in New Issue
Block a user