creates timeline for scraper activity

This commit is contained in:
Michael Beck 2025-06-11 14:03:35 +02:00
parent 5c5afefe40
commit 88e180bc94
3 changed files with 421 additions and 56 deletions

View File

@ -55,42 +55,38 @@ def start_scraper():
try:
# Handle both JSON and form data
if request.is_json:
data = request.get_json() or {}
data = request.get_json()
# Allow empty JSON payload for start requests
if data is None:
data = {}
else:
data = request.form.to_dict()
scraper_name = data.get('scraper_name', 'dummy')
return jsonify({"success": False, "message": "Invalid payload format. Expected JSON."}), 400
# Start the scraper using manager
result = scraper_manager.start_scraper()
if result["status"] == "success":
ActivityLog.log_scraper_command(
action="start_scraper",
status="success",
description="Started scraper with hourly scheduling"
status="success",
description="Scraper started successfully."
)
return jsonify({
"success": True,
"message": result["message"]
})
return jsonify({"success": True, "message": result["message"]})
else:
return jsonify({
"success": False,
"message": result["message"]
}), 400
ActivityLog.log_scraper_command(
action="start_scraper",
status="failure",
description=f"Failed to start scraper: {result['message']}"
)
return jsonify({"success": False, "message": result["message"]}), 400
except Exception as e:
ActivityLog.log_scraper_command(
action="start_scraper",
status="error",
description=f"Failed to start scraper: {str(e)}"
)
return jsonify({
"success": False,
"message": f"Error starting scraper: {str(e)}"
}), 500
return jsonify({"success": False, "message": f"An error occurred: {str(e)}"}), 500
@bp.route("/pause", methods=["POST"])
def pause_scraper():
@ -428,40 +424,96 @@ def get_stats():
try:
hours = int(request.args.get('hours', 24))
current_time = datetime.utcnow()
cutoff_time = current_time.replace(minute=0, second=0, microsecond=0)
# Get activity logs for scraper actions in the last N hours
from ..models import ActivityCategory
start_time = cutoff_time - timedelta(hours=hours)
start_time = current_time - timedelta(hours=hours)
logs = ActivityLog.query.filter(
ActivityLog.category == ActivityCategory.SCRAPER_ACTIVITY.value,
ActivityLog.timestamp >= start_time
).all()
# Get scraper command logs for state changes in the same time period
state_logs = ActivityLog.query.filter(
ActivityLog.category == ActivityCategory.SCRAPER_COMMAND.value,
ActivityLog.action.in_(['start_scraper', 'pause_scraper', 'stop_scraper', 'reset_scraper']),
ActivityLog.timestamp >= start_time
).order_by(ActivityLog.timestamp.asc()).all()
# Group by hour and status
stats = {}
# Group by chronological hour buckets (not hour of day)
stats = []
for hour_offset in range(hours):
target_hour = (current_time.hour - hour_offset) % 24
stats[target_hour] = {
# Calculate the hour bucket (most recent hour first when hour_offset=0)
bucket_end_time = current_time - timedelta(hours=hour_offset)
bucket_start_time = bucket_end_time - timedelta(hours=1)
# Format hour label for display (e.g., "14:00-15:00" or "14:00" for simplicity)
hour_label = bucket_start_time.strftime("%H:%M")
# Initialize counters for this hour bucket
bucket_stats = {
"success": 0,
"error": 0,
"pending": 0,
"hour": target_hour,
"hour": hour_label,
"hour_offset": hour_offset, # For sorting
"bucket_start": bucket_start_time,
"bucket_end": bucket_end_time,
"scraper_active": 0 # Default to inactive
}
# Count logs that fall within this hour bucket
for log in logs:
if bucket_start_time <= log.timestamp < bucket_end_time:
if log.status == "success":
bucket_stats["success"] += 1
elif log.status == "error":
bucket_stats["error"] += 1
elif log.status in ("pending", "info"):
bucket_stats["pending"] += 1
# Determine scraper status for this hour by checking if scraper was active
# For simplicity, check if there were any successful scrapes in this hour
# If there were scrapes, assume scraper was active
bucket_stats["scraper_active"] = 1 if bucket_stats["success"] > 0 else 0
stats.append(bucket_stats)
# Reverse so oldest hour comes first (better for chronological chart display)
stats.reverse()
for log in logs:
hour = log.timestamp.hour
if hour in stats:
if log.status == "success":
stats[hour]["success"] += 1
elif log.status == "error":
stats[hour]["error"] += 1
elif log.status in ("pending", "info"):
stats[hour]["pending"] += 1
# Convert to list for easier consumption by JavaScript
result = [stats[hour] for hour in sorted(stats.keys())]
return jsonify(result)
# Prepare precise scraper state changes for timeline
scraper_timeline = []
for log in state_logs:
# Calculate hours ago from current time
time_diff = current_time - log.timestamp
hours_ago = time_diff.total_seconds() / 3600
# Only include logs within our time range
if hours_ago <= hours:
scraper_timeline.append({
"timestamp": log.timestamp.isoformat(),
"hours_ago": hours_ago,
"action": log.action,
"status": log.status,
"active": 1 if log.action == "start_scraper" and log.status == "success" else 0
})
# Clean up the response (remove internal fields)
result = []
for stat in stats:
result.append({
"success": stat["success"],
"error": stat["error"],
"pending": stat["pending"],
"hour": stat["hour"],
"scraper_active": stat["scraper_active"]
})
return jsonify({
"hourly_stats": result,
"scraper_timeline": scraper_timeline
})
except Exception as e:
return jsonify({

View File

@ -2,10 +2,15 @@
* Chart utilities for activity visualization
*/
/**
* Chart utilities for activity visualization
*/
class ActivityChart {
constructor(canvasId) {
this.canvasId = canvasId;
this.chart = null;
this.scraperChart = null;
this.initChart();
}
@ -24,12 +29,29 @@ class ActivityChart {
return;
}
// Set canvas height directly
chartElement.style.height = "300px";
chartElement.height = 300;
this.ctx = chartElement.getContext("2d");
// Initialize scraper activity chart
this.initScraperChart();
}
initScraperChart() {
const scraperChartElement = document.getElementById("scraperActivityChart");
if (!scraperChartElement) {
console.warn("Scraper activity chart element not found");
return;
}
this.scraperCtx = scraperChartElement.getContext("2d");
}
/**
* Render the activity chart with provided data
* @param {Array} data - Chart data array
* @param {Object} data - Chart data object with hourly_stats and scraper_timeline
*/
render(data) {
if (!this.ctx) {
@ -37,17 +59,30 @@ class ActivityChart {
return;
}
// Extract the data for the chart
const labels = data.map((item) => `${item.hour}:00`);
const successData = data.map((item) => item.success);
const errorData = data.map((item) => item.error);
const pendingData = data.map((item) => item.pending);
console.log("Render received data:", data);
// Destroy existing chart if it exists
// Handle both old and new data formats for compatibility
const hourlyStats = data.hourly_stats || data;
const scraperTimeline = data.scraper_timeline || [];
console.log("Extracted hourlyStats:", hourlyStats);
console.log("Extracted scraperTimeline:", scraperTimeline);
// Extract the data for the main chart (papers only)
const labels = hourlyStats.map((item) => item.hour);
const successData = hourlyStats.map((item) => item.success);
const errorData = hourlyStats.map((item) => item.error);
const pendingData = hourlyStats.map((item) => item.pending);
// Destroy existing charts if they exist
if (this.chart) {
this.chart.destroy();
}
if (this.scraperChart) {
this.scraperChart.destroy();
}
// Render main chart (papers only)
this.chart = new Chart(this.ctx, {
type: "bar",
data: {
@ -57,34 +92,52 @@ class ActivityChart {
label: "Success",
data: successData,
backgroundColor: "#28a745",
stack: "Stack 0",
stack: "Papers",
},
{
label: "Error",
data: errorData,
backgroundColor: "#dc3545",
stack: "Stack 0",
stack: "Papers",
},
{
label: "Pending",
data: pendingData,
backgroundColor: "#ffc107",
stack: "Stack 0",
stack: "Papers",
},
],
},
options: {
responsive: true,
maintainAspectRatio: false,
maintainAspectRatio: true,
aspectRatio: 2.5,
layout: {
padding: {
top: 20,
bottom: 20,
},
},
plugins: {
legend: {
position: "top",
},
tooltip: {
mode: "index",
intersect: false,
},
},
scales: {
x: {
stacked: true,
title: {
display: true,
text: "Hour",
text: "Time (Last Hours)",
},
},
y: {
type: "linear",
display: true,
stacked: true,
beginAtZero: true,
title: {
@ -95,6 +148,224 @@ class ActivityChart {
},
},
});
// Render scraper activity timeline chart with precise timing
this.renderScraperChart(labels, scraperTimeline, hourlyStats.length);
// Show simple legend for scraper activity
this.showScraperStateLegend();
}
/**
* Render the separate scraper activity timeline chart with precise timestamps
* @param {Array} hourLabels - Hour labels for main chart
* @param {Array} scraperTimeline - Timeline of scraper state changes
* @param {number} totalHours - Total hours range being displayed
*/
renderScraperChart(hourLabels, scraperTimeline, totalHours) {
if (!this.scraperCtx) {
console.warn("Scraper chart context not available");
return;
}
let timelineData = [];
if (scraperTimeline && scraperTimeline.length > 0) {
console.log("Original scraper timeline:", scraperTimeline);
// Filter out duplicate events with the same action, status, and hours_ago
const uniqueTimeline = scraperTimeline.filter((event, index, self) => {
return (
index ===
self.findIndex(
(e) =>
e.action === event.action &&
e.status === event.status &&
e.hours_ago === event.hours_ago
)
);
});
console.log("Filtered unique timeline:", uniqueTimeline);
// Sort timeline by hours_ago (oldest first = highest hours_ago first)
const sortedTimeline = [...uniqueTimeline].sort(
(a, b) => b.hours_ago - a.hours_ago
);
console.log("Sorted scraper timeline:", sortedTimeline);
// Create simple timeline with relative positions
let currentState = 0;
// Use hours_ago directly as x-coordinates (inverted so recent is on right)
for (let i = 0; i < sortedTimeline.length; i++) {
const event = sortedTimeline[i];
console.log(`Processing event ${i}:`, event);
// Set the new state based on the action
if (event.action === "start_scraper" && event.status === "success") {
currentState = 1;
} else if (
event.action === "stop_scraper" &&
event.status === "success"
) {
currentState = 0;
} else if (
event.action === "reset_scraper" &&
event.status === "success"
) {
currentState = 0;
} else if (
event.action === "pause_scraper" &&
event.status === "success"
) {
currentState = 0; // Treat pause as inactive
}
console.log(
`New state for ${event.action}: ${currentState} at ${event.hours_ago}h ago`
);
// Use negative hours_ago so recent events are on the right
timelineData.push({
x: -event.hours_ago,
y: currentState,
});
}
// Add current time point
timelineData.push({
x: 0, // Current time
y: currentState,
});
console.log("Final timeline data:", timelineData);
} else {
// No timeline data, show as inactive
timelineData = [{ x: 0, y: 0 }];
}
this.scraperChart = new Chart(this.scraperCtx, {
type: "line",
data: {
datasets: [
{
label: "Scraper Active",
data: timelineData,
borderColor: "#28a745",
backgroundColor: "rgba(40, 167, 69, 0.1)",
borderWidth: 3,
fill: true,
stepped: "before", // Creates step transitions
pointRadius: 5,
pointHoverRadius: 7,
pointBackgroundColor: "#28a745",
pointBorderColor: "#ffffff",
pointBorderWidth: 2,
tension: 0,
},
],
},
options: {
responsive: true,
maintainAspectRatio: true,
aspectRatio: 10,
layout: {
padding: {
top: 10,
bottom: 10,
},
},
plugins: {
legend: {
display: false,
},
tooltip: {
callbacks: {
label: function (context) {
const status =
context.parsed.y === 1 ? "Activated" : "Deactivated";
const timestamp = new Date();
timestamp.setHours(
timestamp.getHours() - Math.abs(context.parsed.x)
);
const formattedTime = timestamp.toLocaleString("en-GB", {
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
day: "2-digit",
month: "2-digit",
year: "numeric",
});
return `Scraper: ${status} at ${formattedTime}`;
},
},
},
},
scales: {
x: {
type: "linear",
title: {
display: true,
text: "Timeline (Hours Ago → Now)",
},
ticks: {
callback: function (value) {
if (value === 0) return "Now";
return `${Math.abs(value)}h ago`;
},
},
grid: {
display: true,
},
},
y: {
type: "linear",
display: true,
beginAtZero: true,
max: 1.2,
min: -0.2,
title: {
display: true,
text: "Active Status",
},
ticks: {
stepSize: 1,
callback: function (value) {
return value === 1 ? "Active" : value === 0 ? "Inactive" : "";
},
},
grid: {
color: function (context) {
return context.tick.value === 0.5
? "rgba(0,0,0,0.1)"
: "rgba(0,0,0,0.05)";
},
},
},
},
},
});
}
/**
* Show a simple legend for scraper states
*/
showScraperStateLegend() {
let legendContainer = document.getElementById("scraper-state-legend");
if (!legendContainer) {
return;
}
legendContainer.classList.remove("d-none");
legendContainer.innerHTML = `
<small class="text-muted">
<i class="fas fa-info-circle"></i>
The line chart below shows exact timestamps when the scraper was started or stopped with proper time intervals.
</small>
`;
}
/**
@ -131,5 +402,9 @@ class ActivityChart {
this.chart.destroy();
this.chart = null;
}
if (this.scraperChart) {
this.scraperChart.destroy();
this.scraperChart = null;
}
}
}

View File

@ -31,7 +31,27 @@
.chart-wrapper {
position: relative;
height: 400px;
height: 400px !important;
width: 100%;
overflow: hidden;
}
.chart-wrapper canvas {
height: 400px !important;
}
.scraper-chart-wrapper {
position: relative;
height: 150px !important;
width: 100%;
border-top: 1px solid #e0e0e0;
padding-top: 15px;
overflow: hidden;
}
.scraper-chart-wrapper canvas {
display: block;
width: 100%;
}
.search-results-container {
@ -120,9 +140,10 @@
<div class="row">
<div class="col-md-6">
<form id="searchPaperForm" class="mb-3">
<label for="paperSearchInput">Search paper:</label>
<div class="input-group">
<input type="text" id="paperSearchInput" class="form-control"
placeholder="Search paper by title, DOI, or ID...">
placeholder="By title, DOI, or ID...">
<button class="btn btn-outline-secondary" type="submit">Search</button>
</div>
</form>
@ -185,9 +206,24 @@
<button class="btn btn-outline-secondary time-range-btn" data-hours="72">Last 3
days</button>
</div>
<!-- Scraper State Legend - will be populated by JavaScript -->
<div id="scraper-state-legend" class="scraper-state-legend mb-3 d-none">
<small class="text-muted">Scraper Status Timeline:</small><br>
<span class="badge bg-secondary">Loading state information...</span>
</div>
<div class="chart-wrapper">
<canvas id="activityChart"></canvas>
</div>
<!-- Scraper Activity Timeline Chart -->
<div class="scraper-chart-wrapper">
<h6 class="text-muted mb-2">
<i class="fas fa-power-off"></i> Scraper Activity Timeline
</h6>
<canvas id="scraperActivityChart"></canvas>
</div>
</div>
</div>
</div>
@ -227,6 +263,8 @@
{% block scripts %}
{{ super() }}
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script
src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns/dist/chartjs-adapter-date-fns.bundle.min.js"></script>
<!-- Modular JavaScript files -->
<script src="{{ url_for('static', filename='js/common.js') }}"></script>