diff --git a/scipaperloader/blueprints/scraper.py b/scipaperloader/blueprints/scraper.py index 725ba8e..bc25354 100644 --- a/scipaperloader/blueprints/scraper.py +++ b/scipaperloader/blueprints/scraper.py @@ -55,42 +55,38 @@ def start_scraper(): try: # Handle both JSON and form data if request.is_json: - data = request.get_json() or {} + data = request.get_json() + # Allow empty JSON payload for start requests + if data is None: + data = {} else: - data = request.form.to_dict() - - scraper_name = data.get('scraper_name', 'dummy') - + return jsonify({"success": False, "message": "Invalid payload format. Expected JSON."}), 400 + # Start the scraper using manager result = scraper_manager.start_scraper() - + if result["status"] == "success": ActivityLog.log_scraper_command( action="start_scraper", - status="success", - description="Started scraper with hourly scheduling" + status="success", + description="Scraper started successfully." ) - - return jsonify({ - "success": True, - "message": result["message"] - }) + return jsonify({"success": True, "message": result["message"]}) else: - return jsonify({ - "success": False, - "message": result["message"] - }), 400 - + ActivityLog.log_scraper_command( + action="start_scraper", + status="failure", + description=f"Failed to start scraper: {result['message']}" + ) + return jsonify({"success": False, "message": result["message"]}), 400 + except Exception as e: ActivityLog.log_scraper_command( action="start_scraper", status="error", description=f"Failed to start scraper: {str(e)}" ) - return jsonify({ - "success": False, - "message": f"Error starting scraper: {str(e)}" - }), 500 + return jsonify({"success": False, "message": f"An error occurred: {str(e)}"}), 500 @bp.route("/pause", methods=["POST"]) def pause_scraper(): @@ -428,40 +424,96 @@ def get_stats(): try: hours = int(request.args.get('hours', 24)) current_time = datetime.utcnow() - cutoff_time = current_time.replace(minute=0, second=0, microsecond=0) # Get activity logs for scraper actions in the last N hours from ..models import ActivityCategory - start_time = cutoff_time - timedelta(hours=hours) + start_time = current_time - timedelta(hours=hours) logs = ActivityLog.query.filter( ActivityLog.category == ActivityCategory.SCRAPER_ACTIVITY.value, ActivityLog.timestamp >= start_time ).all() + + # Get scraper command logs for state changes in the same time period + state_logs = ActivityLog.query.filter( + ActivityLog.category == ActivityCategory.SCRAPER_COMMAND.value, + ActivityLog.action.in_(['start_scraper', 'pause_scraper', 'stop_scraper', 'reset_scraper']), + ActivityLog.timestamp >= start_time + ).order_by(ActivityLog.timestamp.asc()).all() - # Group by hour and status - stats = {} + # Group by chronological hour buckets (not hour of day) + stats = [] for hour_offset in range(hours): - target_hour = (current_time.hour - hour_offset) % 24 - stats[target_hour] = { + # Calculate the hour bucket (most recent hour first when hour_offset=0) + bucket_end_time = current_time - timedelta(hours=hour_offset) + bucket_start_time = bucket_end_time - timedelta(hours=1) + + # Format hour label for display (e.g., "14:00-15:00" or "14:00" for simplicity) + hour_label = bucket_start_time.strftime("%H:%M") + + # Initialize counters for this hour bucket + bucket_stats = { "success": 0, "error": 0, "pending": 0, - "hour": target_hour, + "hour": hour_label, + "hour_offset": hour_offset, # For sorting + "bucket_start": bucket_start_time, + "bucket_end": bucket_end_time, + "scraper_active": 0 # Default to inactive } + + # Count logs that fall within this hour bucket + for log in logs: + if bucket_start_time <= log.timestamp < bucket_end_time: + if log.status == "success": + bucket_stats["success"] += 1 + elif log.status == "error": + bucket_stats["error"] += 1 + elif log.status in ("pending", "info"): + bucket_stats["pending"] += 1 + + # Determine scraper status for this hour by checking if scraper was active + # For simplicity, check if there were any successful scrapes in this hour + # If there were scrapes, assume scraper was active + bucket_stats["scraper_active"] = 1 if bucket_stats["success"] > 0 else 0 + + stats.append(bucket_stats) + + # Reverse so oldest hour comes first (better for chronological chart display) + stats.reverse() - for log in logs: - hour = log.timestamp.hour - if hour in stats: - if log.status == "success": - stats[hour]["success"] += 1 - elif log.status == "error": - stats[hour]["error"] += 1 - elif log.status in ("pending", "info"): - stats[hour]["pending"] += 1 - - # Convert to list for easier consumption by JavaScript - result = [stats[hour] for hour in sorted(stats.keys())] - return jsonify(result) + # Prepare precise scraper state changes for timeline + scraper_timeline = [] + for log in state_logs: + # Calculate hours ago from current time + time_diff = current_time - log.timestamp + hours_ago = time_diff.total_seconds() / 3600 + + # Only include logs within our time range + if hours_ago <= hours: + scraper_timeline.append({ + "timestamp": log.timestamp.isoformat(), + "hours_ago": hours_ago, + "action": log.action, + "status": log.status, + "active": 1 if log.action == "start_scraper" and log.status == "success" else 0 + }) + + # Clean up the response (remove internal fields) + result = [] + for stat in stats: + result.append({ + "success": stat["success"], + "error": stat["error"], + "pending": stat["pending"], + "hour": stat["hour"], + "scraper_active": stat["scraper_active"] + }) + + return jsonify({ + "hourly_stats": result, + "scraper_timeline": scraper_timeline + }) except Exception as e: return jsonify({ diff --git a/scipaperloader/static/js/chart.js b/scipaperloader/static/js/chart.js index 1a5cf2a..1314168 100644 --- a/scipaperloader/static/js/chart.js +++ b/scipaperloader/static/js/chart.js @@ -2,10 +2,15 @@ * Chart utilities for activity visualization */ +/** + * Chart utilities for activity visualization + */ + class ActivityChart { constructor(canvasId) { this.canvasId = canvasId; this.chart = null; + this.scraperChart = null; this.initChart(); } @@ -24,12 +29,29 @@ class ActivityChart { return; } + // Set canvas height directly + chartElement.style.height = "300px"; + chartElement.height = 300; + this.ctx = chartElement.getContext("2d"); + + // Initialize scraper activity chart + this.initScraperChart(); + } + + initScraperChart() { + const scraperChartElement = document.getElementById("scraperActivityChart"); + if (!scraperChartElement) { + console.warn("Scraper activity chart element not found"); + return; + } + + this.scraperCtx = scraperChartElement.getContext("2d"); } /** * Render the activity chart with provided data - * @param {Array} data - Chart data array + * @param {Object} data - Chart data object with hourly_stats and scraper_timeline */ render(data) { if (!this.ctx) { @@ -37,17 +59,30 @@ class ActivityChart { return; } - // Extract the data for the chart - const labels = data.map((item) => `${item.hour}:00`); - const successData = data.map((item) => item.success); - const errorData = data.map((item) => item.error); - const pendingData = data.map((item) => item.pending); + console.log("Render received data:", data); - // Destroy existing chart if it exists + // Handle both old and new data formats for compatibility + const hourlyStats = data.hourly_stats || data; + const scraperTimeline = data.scraper_timeline || []; + + console.log("Extracted hourlyStats:", hourlyStats); + console.log("Extracted scraperTimeline:", scraperTimeline); + + // Extract the data for the main chart (papers only) + const labels = hourlyStats.map((item) => item.hour); + const successData = hourlyStats.map((item) => item.success); + const errorData = hourlyStats.map((item) => item.error); + const pendingData = hourlyStats.map((item) => item.pending); + + // Destroy existing charts if they exist if (this.chart) { this.chart.destroy(); } + if (this.scraperChart) { + this.scraperChart.destroy(); + } + // Render main chart (papers only) this.chart = new Chart(this.ctx, { type: "bar", data: { @@ -57,34 +92,52 @@ class ActivityChart { label: "Success", data: successData, backgroundColor: "#28a745", - stack: "Stack 0", + stack: "Papers", }, { label: "Error", data: errorData, backgroundColor: "#dc3545", - stack: "Stack 0", + stack: "Papers", }, { label: "Pending", data: pendingData, backgroundColor: "#ffc107", - stack: "Stack 0", + stack: "Papers", }, ], }, options: { responsive: true, - maintainAspectRatio: false, + maintainAspectRatio: true, + aspectRatio: 2.5, + layout: { + padding: { + top: 20, + bottom: 20, + }, + }, + plugins: { + legend: { + position: "top", + }, + tooltip: { + mode: "index", + intersect: false, + }, + }, scales: { x: { stacked: true, title: { display: true, - text: "Hour", + text: "Time (Last Hours)", }, }, y: { + type: "linear", + display: true, stacked: true, beginAtZero: true, title: { @@ -95,6 +148,224 @@ class ActivityChart { }, }, }); + + // Render scraper activity timeline chart with precise timing + this.renderScraperChart(labels, scraperTimeline, hourlyStats.length); + + // Show simple legend for scraper activity + this.showScraperStateLegend(); + } + + /** + * Render the separate scraper activity timeline chart with precise timestamps + * @param {Array} hourLabels - Hour labels for main chart + * @param {Array} scraperTimeline - Timeline of scraper state changes + * @param {number} totalHours - Total hours range being displayed + */ + renderScraperChart(hourLabels, scraperTimeline, totalHours) { + if (!this.scraperCtx) { + console.warn("Scraper chart context not available"); + return; + } + + let timelineData = []; + + if (scraperTimeline && scraperTimeline.length > 0) { + console.log("Original scraper timeline:", scraperTimeline); + + // Filter out duplicate events with the same action, status, and hours_ago + const uniqueTimeline = scraperTimeline.filter((event, index, self) => { + return ( + index === + self.findIndex( + (e) => + e.action === event.action && + e.status === event.status && + e.hours_ago === event.hours_ago + ) + ); + }); + + console.log("Filtered unique timeline:", uniqueTimeline); + + // Sort timeline by hours_ago (oldest first = highest hours_ago first) + const sortedTimeline = [...uniqueTimeline].sort( + (a, b) => b.hours_ago - a.hours_ago + ); + + console.log("Sorted scraper timeline:", sortedTimeline); + + // Create simple timeline with relative positions + let currentState = 0; + + // Use hours_ago directly as x-coordinates (inverted so recent is on right) + for (let i = 0; i < sortedTimeline.length; i++) { + const event = sortedTimeline[i]; + + console.log(`Processing event ${i}:`, event); + + // Set the new state based on the action + if (event.action === "start_scraper" && event.status === "success") { + currentState = 1; + } else if ( + event.action === "stop_scraper" && + event.status === "success" + ) { + currentState = 0; + } else if ( + event.action === "reset_scraper" && + event.status === "success" + ) { + currentState = 0; + } else if ( + event.action === "pause_scraper" && + event.status === "success" + ) { + currentState = 0; // Treat pause as inactive + } + + console.log( + `New state for ${event.action}: ${currentState} at ${event.hours_ago}h ago` + ); + + // Use negative hours_ago so recent events are on the right + timelineData.push({ + x: -event.hours_ago, + y: currentState, + }); + } + + // Add current time point + timelineData.push({ + x: 0, // Current time + y: currentState, + }); + + console.log("Final timeline data:", timelineData); + } else { + // No timeline data, show as inactive + timelineData = [{ x: 0, y: 0 }]; + } + + this.scraperChart = new Chart(this.scraperCtx, { + type: "line", + data: { + datasets: [ + { + label: "Scraper Active", + data: timelineData, + borderColor: "#28a745", + backgroundColor: "rgba(40, 167, 69, 0.1)", + borderWidth: 3, + fill: true, + stepped: "before", // Creates step transitions + pointRadius: 5, + pointHoverRadius: 7, + pointBackgroundColor: "#28a745", + pointBorderColor: "#ffffff", + pointBorderWidth: 2, + tension: 0, + }, + ], + }, + options: { + responsive: true, + maintainAspectRatio: true, + aspectRatio: 10, + layout: { + padding: { + top: 10, + bottom: 10, + }, + }, + plugins: { + legend: { + display: false, + }, + tooltip: { + callbacks: { + label: function (context) { + const status = + context.parsed.y === 1 ? "Activated" : "Deactivated"; + const timestamp = new Date(); + timestamp.setHours( + timestamp.getHours() - Math.abs(context.parsed.x) + ); + const formattedTime = timestamp.toLocaleString("en-GB", { + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + day: "2-digit", + month: "2-digit", + year: "numeric", + }); + return `Scraper: ${status} at ${formattedTime}`; + }, + }, + }, + }, + scales: { + x: { + type: "linear", + title: { + display: true, + text: "Timeline (Hours Ago → Now)", + }, + ticks: { + callback: function (value) { + if (value === 0) return "Now"; + return `${Math.abs(value)}h ago`; + }, + }, + grid: { + display: true, + }, + }, + y: { + type: "linear", + display: true, + beginAtZero: true, + max: 1.2, + min: -0.2, + title: { + display: true, + text: "Active Status", + }, + ticks: { + stepSize: 1, + callback: function (value) { + return value === 1 ? "Active" : value === 0 ? "Inactive" : ""; + }, + }, + grid: { + color: function (context) { + return context.tick.value === 0.5 + ? "rgba(0,0,0,0.1)" + : "rgba(0,0,0,0.05)"; + }, + }, + }, + }, + }, + }); + } + + /** + * Show a simple legend for scraper states + */ + showScraperStateLegend() { + let legendContainer = document.getElementById("scraper-state-legend"); + if (!legendContainer) { + return; + } + + legendContainer.classList.remove("d-none"); + legendContainer.innerHTML = ` + + + The line chart below shows exact timestamps when the scraper was started or stopped with proper time intervals. + + `; } /** @@ -131,5 +402,9 @@ class ActivityChart { this.chart.destroy(); this.chart = null; } + if (this.scraperChart) { + this.scraperChart.destroy(); + this.scraperChart = null; + } } } diff --git a/scipaperloader/templates/scraper.html.jinja b/scipaperloader/templates/scraper.html.jinja index 5b0af8b..c79444a 100644 --- a/scipaperloader/templates/scraper.html.jinja +++ b/scipaperloader/templates/scraper.html.jinja @@ -31,7 +31,27 @@ .chart-wrapper { position: relative; - height: 400px; + height: 400px !important; + width: 100%; + overflow: hidden; + } + + .chart-wrapper canvas { + height: 400px !important; + } + + .scraper-chart-wrapper { + position: relative; + height: 150px !important; + width: 100%; + border-top: 1px solid #e0e0e0; + padding-top: 15px; + overflow: hidden; + } + + .scraper-chart-wrapper canvas { + display: block; + width: 100%; } .search-results-container { @@ -120,9 +140,10 @@