/** * Scraper Overview functionality */ class ScraperOverview { constructor() { this.modal = null; this.scrapers = []; this.systemConfig = {}; this.init(); } init() { // Initialize modal reference this.modal = document.getElementById("scraperOverviewModal"); // Load data when modal is shown if (this.modal) { this.modal.addEventListener("show.bs.modal", () => { this.loadScraperOverview(); }); } } async loadScraperOverview() { const loadingEl = document.getElementById("scraperOverviewLoading"); const errorEl = document.getElementById("scraperOverviewError"); const contentEl = document.getElementById("scraperOverviewContent"); // Show loading state loadingEl?.classList.remove("d-none"); errorEl?.classList.add("d-none"); contentEl?.classList.add("d-none"); try { // Load scrapers, system config, and publishers in parallel const [scrapersResponse, statusResponse, publishersResponse] = await Promise.all([ fetch("/scraper/scrapers"), fetch("/scraper/status"), fetch("/scraper/publishers"), ]); if ( !scrapersResponse.ok || !statusResponse.ok || !publishersResponse.ok ) { throw new Error("Failed to load scraper information"); } const scrapersData = await scrapersResponse.json(); const statusData = await statusResponse.json(); const publishersData = await publishersResponse.json(); if ( !scrapersData.success || !statusData.success || !publishersData.success ) { throw new Error( scrapersData.message || statusData.message || publishersData.message || "Unknown error" ); } this.scrapers = scrapersData.scrapers; this.systemConfig = statusData; this.publishersData = publishersData.data; // Update UI this.updateSystemConfig(); this.updateScrapersTable(); this.updatePublishersSection(); this.updateStatusFlowDiagram(); // Show content loadingEl?.classList.add("d-none"); contentEl?.classList.remove("d-none"); } catch (error) { console.error("Error loading scraper overview:", error); // Show error state loadingEl?.classList.add("d-none"); const errorMessage = document.getElementById( "scraperOverviewErrorMessage" ); if (errorMessage) { errorMessage.textContent = error.message || "Failed to load scraper information"; } errorEl?.classList.remove("d-none"); } } updateSystemConfig() { // Current scraper module const currentModuleEl = document.getElementById("currentScraperModule"); if (currentModuleEl) { const currentModule = this.systemConfig.current_scraper_module || "System Default"; currentModuleEl.textContent = currentModule; currentModuleEl.className = "badge bg-primary"; } // Volume limit const volumeLimitEl = document.getElementById("currentVolumeLimit"); if (volumeLimitEl) { const volumeLimit = this.systemConfig.volume_config || "Unknown"; volumeLimitEl.textContent = volumeLimit; } // Total modules const totalModulesEl = document.getElementById("totalScraperModules"); if (totalModulesEl) { totalModulesEl.textContent = this.scrapers.length; } // Paper counts summary const paperCountsEl = document.getElementById("paperCountsSummary"); if (paperCountsEl && this.systemConfig.paper_counts) { const counts = this.systemConfig.paper_counts; paperCountsEl.innerHTML = `
${counts.new || 0} New ${ counts.processing || 0 } Processing ${ counts.done || 0 } Done ${ counts.failed || 0 } Failed ${ counts.pending || 0 } Pending ${ counts.retrying || 0 } Retrying
`; } } updateScrapersTable() { const tbody = document.getElementById("scrapersTableBody"); if (!tbody) return; tbody.innerHTML = ""; this.scrapers.forEach((scraper) => { const row = document.createElement("tr"); // Check if this is the current active scraper const isCurrentScraper = scraper.name === this.systemConfig.current_scraper_module; if (scraper.error) { row.innerHTML = ` ${scraper.name} ${scraper.error} `; } else { row.innerHTML = ` ${scraper.name} ${ scraper.name === "dummy" ? 'Test Module' : "" } ${ isCurrentScraper ? ' Active' : "" } ${this.truncateDescription(scraper.description)} ${this.renderStatusBadges( scraper.input_statuses, "bg-info" )} ${ scraper.output_status_success } ${ scraper.output_status_failure } ${ scraper.output_status_processing } `; } // Highlight the current scraper row if (isCurrentScraper) { row.classList.add("table-success"); } tbody.appendChild(row); }); } updateStatusFlowDiagram() { const diagramEl = document.getElementById("statusFlowDiagram"); if (!diagramEl) return; // Analyze actual scrapers to build real flow const statusFlow = this.analyzeScraperFlow(); let diagramHTML = '
'; // Create visual flow based on actual scrapers statusFlow.forEach((stage, index) => { if (index > 0) { diagramHTML += '
'; } diagramHTML += '
'; diagramHTML += `
${stage.title}
`; if (stage.scrapers && stage.scrapers.length > 0) { diagramHTML += '
Handled by: ' + stage.scrapers.map((s) => `${s}`).join(", ") + "
"; } diagramHTML += '
'; stage.statuses.forEach((status, statusIndex) => { if (statusIndex > 0) { diagramHTML += ''; } const badgeClass = this.getStatusBadgeClass(status); diagramHTML += `${status}`; }); diagramHTML += "
"; if (stage.description) { diagramHTML += `
${stage.description}
`; } diagramHTML += "
"; }); diagramHTML += "
"; // Add explanation diagramHTML += `
Flow Explanation:
`; diagramEl.innerHTML = diagramHTML; } analyzeScraperFlow() { // Build actual flow based on available scrapers const stages = []; const allInputStatuses = new Set(); const allOutputStatuses = new Set(); const scrapersByInput = {}; // Analyze scrapers to understand the flow this.scrapers.forEach((scraper) => { if (scraper.input_statuses) { scraper.input_statuses.forEach((status) => { allInputStatuses.add(status); if (!scrapersByInput[status]) { scrapersByInput[status] = []; } scrapersByInput[status].push(scraper.name); }); } if (scraper.output_status_success) allOutputStatuses.add(scraper.output_status_success); if (scraper.output_status_failure) allOutputStatuses.add(scraper.output_status_failure); }); // Entry point if (allInputStatuses.has("New")) { stages.push({ title: "Entry Point", statuses: ["New"], scrapers: scrapersByInput["New"] || [], description: "Newly uploaded papers enter the processing pipeline", }); } // Processing stages const processingStatuses = Array.from(allInputStatuses).filter( (status) => !["New", "Done", "Failed"].includes(status) ); if (processingStatuses.length > 0) { stages.push({ title: "Processing Stages", statuses: processingStatuses, scrapers: [], description: "Papers move through various processing stages", }); } // Final outputs const finalStatuses = ["Done", "Failed"]; stages.push({ title: "Final States", statuses: finalStatuses.filter((status) => allOutputStatuses.has(status)), scrapers: [], description: "Papers end up in final success or failure states", }); // Retry handling if (allInputStatuses.has("Failed")) { stages.push({ title: "Retry Processing", statuses: ["Failed", "Retrying"], scrapers: scrapersByInput["Failed"] || [], description: "Failed papers can be retried with specialized scrapers", }); } return stages; } getStatusBadgeClass(status) { const statusClasses = { New: "bg-primary", Pending: "bg-warning", Processing: "bg-warning", Retrying: "bg-warning", Done: "bg-success", Failed: "bg-danger", HtmlDownloaded: "bg-info", PublisherDetected: "bg-info", TextExtracted: "bg-info", }; return statusClasses[status] || "bg-secondary"; } renderStatusBadges(statuses, defaultClass = "bg-secondary") { if (!Array.isArray(statuses)) return ""; return statuses .map( (status) => `${status}` ) .join(""); } truncateDescription(description, maxLength = 100) { if (!description) return "No description available"; if (description.length <= maxLength) return description; return description.substring(0, maxLength).trim() + "..."; } updatePublishersSection() { // Update publisher statistics const publisherStatsEl = document.getElementById("publisherStats"); if (publisherStatsEl && this.publishersData && this.publishersData.stats) { const stats = this.publishersData.stats; publisherStatsEl.innerHTML = `
${stats.total_publishers}
Total Publishers
${stats.publishers_with_parsers}
With Parsers
${stats.publishers_without_parsers}
Missing Parsers
${stats.total_papers_with_publisher}
Papers with Publisher
`; } // Update publishers table const publishersTableBody = document.getElementById("publishersTableBody"); if ( publishersTableBody && this.publishersData && this.publishersData.publishers ) { publishersTableBody.innerHTML = ""; if (this.publishersData.publishers.length === 0) { publishersTableBody.innerHTML = ` No publishers detected yet.
Run the publisher_detector scraper to identify publishers from paper URLs. `; return; } this.publishersData.publishers.forEach((publisher) => { const row = document.createElement("tr"); // Publisher status badge const statusBadge = publisher.has_parser ? ' Available' : ' Missing'; // Parser availability indicator const parserIndicator = publisher.has_parser ? '' : ''; row.innerHTML = ` ${publisher.name} ${publisher.paper_count} ${statusBadge} ${parserIndicator} `; publishersTableBody.appendChild(row); }); } } // Public method to show the modal show() { if (this.modal) { const bootstrapModal = new bootstrap.Modal(this.modal); bootstrapModal.show(); } } } // Global function to load scraper overview (used by retry button) function loadScraperOverview() { if (window.scraperOverview) { window.scraperOverview.loadScraperOverview(); } } // Global function to show scraper overview modal function showScraperOverview() { if (!window.scraperOverview) { window.scraperOverview = new ScraperOverview(); } window.scraperOverview.show(); } // Initialize when DOM is ready document.addEventListener("DOMContentLoaded", function () { window.scraperOverview = new ScraperOverview(); });