creates timeline for scraper activity
This commit is contained in:
parent
5c5afefe40
commit
88e180bc94
@ -55,42 +55,38 @@ def start_scraper():
|
||||
try:
|
||||
# Handle both JSON and form data
|
||||
if request.is_json:
|
||||
data = request.get_json() or {}
|
||||
data = request.get_json()
|
||||
# Allow empty JSON payload for start requests
|
||||
if data is None:
|
||||
data = {}
|
||||
else:
|
||||
data = request.form.to_dict()
|
||||
|
||||
scraper_name = data.get('scraper_name', 'dummy')
|
||||
|
||||
return jsonify({"success": False, "message": "Invalid payload format. Expected JSON."}), 400
|
||||
|
||||
# Start the scraper using manager
|
||||
result = scraper_manager.start_scraper()
|
||||
|
||||
|
||||
if result["status"] == "success":
|
||||
ActivityLog.log_scraper_command(
|
||||
action="start_scraper",
|
||||
status="success",
|
||||
description="Started scraper with hourly scheduling"
|
||||
status="success",
|
||||
description="Scraper started successfully."
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"message": result["message"]
|
||||
})
|
||||
return jsonify({"success": True, "message": result["message"]})
|
||||
else:
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": result["message"]
|
||||
}), 400
|
||||
|
||||
ActivityLog.log_scraper_command(
|
||||
action="start_scraper",
|
||||
status="failure",
|
||||
description=f"Failed to start scraper: {result['message']}"
|
||||
)
|
||||
return jsonify({"success": False, "message": result["message"]}), 400
|
||||
|
||||
except Exception as e:
|
||||
ActivityLog.log_scraper_command(
|
||||
action="start_scraper",
|
||||
status="error",
|
||||
description=f"Failed to start scraper: {str(e)}"
|
||||
)
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": f"Error starting scraper: {str(e)}"
|
||||
}), 500
|
||||
return jsonify({"success": False, "message": f"An error occurred: {str(e)}"}), 500
|
||||
|
||||
@bp.route("/pause", methods=["POST"])
|
||||
def pause_scraper():
|
||||
@ -428,40 +424,96 @@ def get_stats():
|
||||
try:
|
||||
hours = int(request.args.get('hours', 24))
|
||||
current_time = datetime.utcnow()
|
||||
cutoff_time = current_time.replace(minute=0, second=0, microsecond=0)
|
||||
|
||||
# Get activity logs for scraper actions in the last N hours
|
||||
from ..models import ActivityCategory
|
||||
start_time = cutoff_time - timedelta(hours=hours)
|
||||
start_time = current_time - timedelta(hours=hours)
|
||||
logs = ActivityLog.query.filter(
|
||||
ActivityLog.category == ActivityCategory.SCRAPER_ACTIVITY.value,
|
||||
ActivityLog.timestamp >= start_time
|
||||
).all()
|
||||
|
||||
# Get scraper command logs for state changes in the same time period
|
||||
state_logs = ActivityLog.query.filter(
|
||||
ActivityLog.category == ActivityCategory.SCRAPER_COMMAND.value,
|
||||
ActivityLog.action.in_(['start_scraper', 'pause_scraper', 'stop_scraper', 'reset_scraper']),
|
||||
ActivityLog.timestamp >= start_time
|
||||
).order_by(ActivityLog.timestamp.asc()).all()
|
||||
|
||||
# Group by hour and status
|
||||
stats = {}
|
||||
# Group by chronological hour buckets (not hour of day)
|
||||
stats = []
|
||||
for hour_offset in range(hours):
|
||||
target_hour = (current_time.hour - hour_offset) % 24
|
||||
stats[target_hour] = {
|
||||
# Calculate the hour bucket (most recent hour first when hour_offset=0)
|
||||
bucket_end_time = current_time - timedelta(hours=hour_offset)
|
||||
bucket_start_time = bucket_end_time - timedelta(hours=1)
|
||||
|
||||
# Format hour label for display (e.g., "14:00-15:00" or "14:00" for simplicity)
|
||||
hour_label = bucket_start_time.strftime("%H:%M")
|
||||
|
||||
# Initialize counters for this hour bucket
|
||||
bucket_stats = {
|
||||
"success": 0,
|
||||
"error": 0,
|
||||
"pending": 0,
|
||||
"hour": target_hour,
|
||||
"hour": hour_label,
|
||||
"hour_offset": hour_offset, # For sorting
|
||||
"bucket_start": bucket_start_time,
|
||||
"bucket_end": bucket_end_time,
|
||||
"scraper_active": 0 # Default to inactive
|
||||
}
|
||||
|
||||
# Count logs that fall within this hour bucket
|
||||
for log in logs:
|
||||
if bucket_start_time <= log.timestamp < bucket_end_time:
|
||||
if log.status == "success":
|
||||
bucket_stats["success"] += 1
|
||||
elif log.status == "error":
|
||||
bucket_stats["error"] += 1
|
||||
elif log.status in ("pending", "info"):
|
||||
bucket_stats["pending"] += 1
|
||||
|
||||
# Determine scraper status for this hour by checking if scraper was active
|
||||
# For simplicity, check if there were any successful scrapes in this hour
|
||||
# If there were scrapes, assume scraper was active
|
||||
bucket_stats["scraper_active"] = 1 if bucket_stats["success"] > 0 else 0
|
||||
|
||||
stats.append(bucket_stats)
|
||||
|
||||
# Reverse so oldest hour comes first (better for chronological chart display)
|
||||
stats.reverse()
|
||||
|
||||
for log in logs:
|
||||
hour = log.timestamp.hour
|
||||
if hour in stats:
|
||||
if log.status == "success":
|
||||
stats[hour]["success"] += 1
|
||||
elif log.status == "error":
|
||||
stats[hour]["error"] += 1
|
||||
elif log.status in ("pending", "info"):
|
||||
stats[hour]["pending"] += 1
|
||||
|
||||
# Convert to list for easier consumption by JavaScript
|
||||
result = [stats[hour] for hour in sorted(stats.keys())]
|
||||
return jsonify(result)
|
||||
# Prepare precise scraper state changes for timeline
|
||||
scraper_timeline = []
|
||||
for log in state_logs:
|
||||
# Calculate hours ago from current time
|
||||
time_diff = current_time - log.timestamp
|
||||
hours_ago = time_diff.total_seconds() / 3600
|
||||
|
||||
# Only include logs within our time range
|
||||
if hours_ago <= hours:
|
||||
scraper_timeline.append({
|
||||
"timestamp": log.timestamp.isoformat(),
|
||||
"hours_ago": hours_ago,
|
||||
"action": log.action,
|
||||
"status": log.status,
|
||||
"active": 1 if log.action == "start_scraper" and log.status == "success" else 0
|
||||
})
|
||||
|
||||
# Clean up the response (remove internal fields)
|
||||
result = []
|
||||
for stat in stats:
|
||||
result.append({
|
||||
"success": stat["success"],
|
||||
"error": stat["error"],
|
||||
"pending": stat["pending"],
|
||||
"hour": stat["hour"],
|
||||
"scraper_active": stat["scraper_active"]
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
"hourly_stats": result,
|
||||
"scraper_timeline": scraper_timeline
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
|
@ -2,10 +2,15 @@
|
||||
* Chart utilities for activity visualization
|
||||
*/
|
||||
|
||||
/**
|
||||
* Chart utilities for activity visualization
|
||||
*/
|
||||
|
||||
class ActivityChart {
|
||||
constructor(canvasId) {
|
||||
this.canvasId = canvasId;
|
||||
this.chart = null;
|
||||
this.scraperChart = null;
|
||||
this.initChart();
|
||||
}
|
||||
|
||||
@ -24,12 +29,29 @@ class ActivityChart {
|
||||
return;
|
||||
}
|
||||
|
||||
// Set canvas height directly
|
||||
chartElement.style.height = "300px";
|
||||
chartElement.height = 300;
|
||||
|
||||
this.ctx = chartElement.getContext("2d");
|
||||
|
||||
// Initialize scraper activity chart
|
||||
this.initScraperChart();
|
||||
}
|
||||
|
||||
initScraperChart() {
|
||||
const scraperChartElement = document.getElementById("scraperActivityChart");
|
||||
if (!scraperChartElement) {
|
||||
console.warn("Scraper activity chart element not found");
|
||||
return;
|
||||
}
|
||||
|
||||
this.scraperCtx = scraperChartElement.getContext("2d");
|
||||
}
|
||||
|
||||
/**
|
||||
* Render the activity chart with provided data
|
||||
* @param {Array} data - Chart data array
|
||||
* @param {Object} data - Chart data object with hourly_stats and scraper_timeline
|
||||
*/
|
||||
render(data) {
|
||||
if (!this.ctx) {
|
||||
@ -37,17 +59,30 @@ class ActivityChart {
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract the data for the chart
|
||||
const labels = data.map((item) => `${item.hour}:00`);
|
||||
const successData = data.map((item) => item.success);
|
||||
const errorData = data.map((item) => item.error);
|
||||
const pendingData = data.map((item) => item.pending);
|
||||
console.log("Render received data:", data);
|
||||
|
||||
// Destroy existing chart if it exists
|
||||
// Handle both old and new data formats for compatibility
|
||||
const hourlyStats = data.hourly_stats || data;
|
||||
const scraperTimeline = data.scraper_timeline || [];
|
||||
|
||||
console.log("Extracted hourlyStats:", hourlyStats);
|
||||
console.log("Extracted scraperTimeline:", scraperTimeline);
|
||||
|
||||
// Extract the data for the main chart (papers only)
|
||||
const labels = hourlyStats.map((item) => item.hour);
|
||||
const successData = hourlyStats.map((item) => item.success);
|
||||
const errorData = hourlyStats.map((item) => item.error);
|
||||
const pendingData = hourlyStats.map((item) => item.pending);
|
||||
|
||||
// Destroy existing charts if they exist
|
||||
if (this.chart) {
|
||||
this.chart.destroy();
|
||||
}
|
||||
if (this.scraperChart) {
|
||||
this.scraperChart.destroy();
|
||||
}
|
||||
|
||||
// Render main chart (papers only)
|
||||
this.chart = new Chart(this.ctx, {
|
||||
type: "bar",
|
||||
data: {
|
||||
@ -57,34 +92,52 @@ class ActivityChart {
|
||||
label: "Success",
|
||||
data: successData,
|
||||
backgroundColor: "#28a745",
|
||||
stack: "Stack 0",
|
||||
stack: "Papers",
|
||||
},
|
||||
{
|
||||
label: "Error",
|
||||
data: errorData,
|
||||
backgroundColor: "#dc3545",
|
||||
stack: "Stack 0",
|
||||
stack: "Papers",
|
||||
},
|
||||
{
|
||||
label: "Pending",
|
||||
data: pendingData,
|
||||
backgroundColor: "#ffc107",
|
||||
stack: "Stack 0",
|
||||
stack: "Papers",
|
||||
},
|
||||
],
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
maintainAspectRatio: true,
|
||||
aspectRatio: 2.5,
|
||||
layout: {
|
||||
padding: {
|
||||
top: 20,
|
||||
bottom: 20,
|
||||
},
|
||||
},
|
||||
plugins: {
|
||||
legend: {
|
||||
position: "top",
|
||||
},
|
||||
tooltip: {
|
||||
mode: "index",
|
||||
intersect: false,
|
||||
},
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
stacked: true,
|
||||
title: {
|
||||
display: true,
|
||||
text: "Hour",
|
||||
text: "Time (Last Hours)",
|
||||
},
|
||||
},
|
||||
y: {
|
||||
type: "linear",
|
||||
display: true,
|
||||
stacked: true,
|
||||
beginAtZero: true,
|
||||
title: {
|
||||
@ -95,6 +148,224 @@ class ActivityChart {
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Render scraper activity timeline chart with precise timing
|
||||
this.renderScraperChart(labels, scraperTimeline, hourlyStats.length);
|
||||
|
||||
// Show simple legend for scraper activity
|
||||
this.showScraperStateLegend();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render the separate scraper activity timeline chart with precise timestamps
|
||||
* @param {Array} hourLabels - Hour labels for main chart
|
||||
* @param {Array} scraperTimeline - Timeline of scraper state changes
|
||||
* @param {number} totalHours - Total hours range being displayed
|
||||
*/
|
||||
renderScraperChart(hourLabels, scraperTimeline, totalHours) {
|
||||
if (!this.scraperCtx) {
|
||||
console.warn("Scraper chart context not available");
|
||||
return;
|
||||
}
|
||||
|
||||
let timelineData = [];
|
||||
|
||||
if (scraperTimeline && scraperTimeline.length > 0) {
|
||||
console.log("Original scraper timeline:", scraperTimeline);
|
||||
|
||||
// Filter out duplicate events with the same action, status, and hours_ago
|
||||
const uniqueTimeline = scraperTimeline.filter((event, index, self) => {
|
||||
return (
|
||||
index ===
|
||||
self.findIndex(
|
||||
(e) =>
|
||||
e.action === event.action &&
|
||||
e.status === event.status &&
|
||||
e.hours_ago === event.hours_ago
|
||||
)
|
||||
);
|
||||
});
|
||||
|
||||
console.log("Filtered unique timeline:", uniqueTimeline);
|
||||
|
||||
// Sort timeline by hours_ago (oldest first = highest hours_ago first)
|
||||
const sortedTimeline = [...uniqueTimeline].sort(
|
||||
(a, b) => b.hours_ago - a.hours_ago
|
||||
);
|
||||
|
||||
console.log("Sorted scraper timeline:", sortedTimeline);
|
||||
|
||||
// Create simple timeline with relative positions
|
||||
let currentState = 0;
|
||||
|
||||
// Use hours_ago directly as x-coordinates (inverted so recent is on right)
|
||||
for (let i = 0; i < sortedTimeline.length; i++) {
|
||||
const event = sortedTimeline[i];
|
||||
|
||||
console.log(`Processing event ${i}:`, event);
|
||||
|
||||
// Set the new state based on the action
|
||||
if (event.action === "start_scraper" && event.status === "success") {
|
||||
currentState = 1;
|
||||
} else if (
|
||||
event.action === "stop_scraper" &&
|
||||
event.status === "success"
|
||||
) {
|
||||
currentState = 0;
|
||||
} else if (
|
||||
event.action === "reset_scraper" &&
|
||||
event.status === "success"
|
||||
) {
|
||||
currentState = 0;
|
||||
} else if (
|
||||
event.action === "pause_scraper" &&
|
||||
event.status === "success"
|
||||
) {
|
||||
currentState = 0; // Treat pause as inactive
|
||||
}
|
||||
|
||||
console.log(
|
||||
`New state for ${event.action}: ${currentState} at ${event.hours_ago}h ago`
|
||||
);
|
||||
|
||||
// Use negative hours_ago so recent events are on the right
|
||||
timelineData.push({
|
||||
x: -event.hours_ago,
|
||||
y: currentState,
|
||||
});
|
||||
}
|
||||
|
||||
// Add current time point
|
||||
timelineData.push({
|
||||
x: 0, // Current time
|
||||
y: currentState,
|
||||
});
|
||||
|
||||
console.log("Final timeline data:", timelineData);
|
||||
} else {
|
||||
// No timeline data, show as inactive
|
||||
timelineData = [{ x: 0, y: 0 }];
|
||||
}
|
||||
|
||||
this.scraperChart = new Chart(this.scraperCtx, {
|
||||
type: "line",
|
||||
data: {
|
||||
datasets: [
|
||||
{
|
||||
label: "Scraper Active",
|
||||
data: timelineData,
|
||||
borderColor: "#28a745",
|
||||
backgroundColor: "rgba(40, 167, 69, 0.1)",
|
||||
borderWidth: 3,
|
||||
fill: true,
|
||||
stepped: "before", // Creates step transitions
|
||||
pointRadius: 5,
|
||||
pointHoverRadius: 7,
|
||||
pointBackgroundColor: "#28a745",
|
||||
pointBorderColor: "#ffffff",
|
||||
pointBorderWidth: 2,
|
||||
tension: 0,
|
||||
},
|
||||
],
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: true,
|
||||
aspectRatio: 10,
|
||||
layout: {
|
||||
padding: {
|
||||
top: 10,
|
||||
bottom: 10,
|
||||
},
|
||||
},
|
||||
plugins: {
|
||||
legend: {
|
||||
display: false,
|
||||
},
|
||||
tooltip: {
|
||||
callbacks: {
|
||||
label: function (context) {
|
||||
const status =
|
||||
context.parsed.y === 1 ? "Activated" : "Deactivated";
|
||||
const timestamp = new Date();
|
||||
timestamp.setHours(
|
||||
timestamp.getHours() - Math.abs(context.parsed.x)
|
||||
);
|
||||
const formattedTime = timestamp.toLocaleString("en-GB", {
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
second: "2-digit",
|
||||
day: "2-digit",
|
||||
month: "2-digit",
|
||||
year: "numeric",
|
||||
});
|
||||
return `Scraper: ${status} at ${formattedTime}`;
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
type: "linear",
|
||||
title: {
|
||||
display: true,
|
||||
text: "Timeline (Hours Ago → Now)",
|
||||
},
|
||||
ticks: {
|
||||
callback: function (value) {
|
||||
if (value === 0) return "Now";
|
||||
return `${Math.abs(value)}h ago`;
|
||||
},
|
||||
},
|
||||
grid: {
|
||||
display: true,
|
||||
},
|
||||
},
|
||||
y: {
|
||||
type: "linear",
|
||||
display: true,
|
||||
beginAtZero: true,
|
||||
max: 1.2,
|
||||
min: -0.2,
|
||||
title: {
|
||||
display: true,
|
||||
text: "Active Status",
|
||||
},
|
||||
ticks: {
|
||||
stepSize: 1,
|
||||
callback: function (value) {
|
||||
return value === 1 ? "Active" : value === 0 ? "Inactive" : "";
|
||||
},
|
||||
},
|
||||
grid: {
|
||||
color: function (context) {
|
||||
return context.tick.value === 0.5
|
||||
? "rgba(0,0,0,0.1)"
|
||||
: "rgba(0,0,0,0.05)";
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Show a simple legend for scraper states
|
||||
*/
|
||||
showScraperStateLegend() {
|
||||
let legendContainer = document.getElementById("scraper-state-legend");
|
||||
if (!legendContainer) {
|
||||
return;
|
||||
}
|
||||
|
||||
legendContainer.classList.remove("d-none");
|
||||
legendContainer.innerHTML = `
|
||||
<small class="text-muted">
|
||||
<i class="fas fa-info-circle"></i>
|
||||
The line chart below shows exact timestamps when the scraper was started or stopped with proper time intervals.
|
||||
</small>
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -131,5 +402,9 @@ class ActivityChart {
|
||||
this.chart.destroy();
|
||||
this.chart = null;
|
||||
}
|
||||
if (this.scraperChart) {
|
||||
this.scraperChart.destroy();
|
||||
this.scraperChart = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,27 @@
|
||||
|
||||
.chart-wrapper {
|
||||
position: relative;
|
||||
height: 400px;
|
||||
height: 400px !important;
|
||||
width: 100%;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.chart-wrapper canvas {
|
||||
height: 400px !important;
|
||||
}
|
||||
|
||||
.scraper-chart-wrapper {
|
||||
position: relative;
|
||||
height: 150px !important;
|
||||
width: 100%;
|
||||
border-top: 1px solid #e0e0e0;
|
||||
padding-top: 15px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.scraper-chart-wrapper canvas {
|
||||
display: block;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.search-results-container {
|
||||
@ -120,9 +140,10 @@
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<form id="searchPaperForm" class="mb-3">
|
||||
<label for="paperSearchInput">Search paper:</label>
|
||||
<div class="input-group">
|
||||
<input type="text" id="paperSearchInput" class="form-control"
|
||||
placeholder="Search paper by title, DOI, or ID...">
|
||||
placeholder="By title, DOI, or ID...">
|
||||
<button class="btn btn-outline-secondary" type="submit">Search</button>
|
||||
</div>
|
||||
</form>
|
||||
@ -185,9 +206,24 @@
|
||||
<button class="btn btn-outline-secondary time-range-btn" data-hours="72">Last 3
|
||||
days</button>
|
||||
</div>
|
||||
|
||||
<!-- Scraper State Legend - will be populated by JavaScript -->
|
||||
<div id="scraper-state-legend" class="scraper-state-legend mb-3 d-none">
|
||||
<small class="text-muted">Scraper Status Timeline:</small><br>
|
||||
<span class="badge bg-secondary">Loading state information...</span>
|
||||
</div>
|
||||
|
||||
<div class="chart-wrapper">
|
||||
<canvas id="activityChart"></canvas>
|
||||
</div>
|
||||
|
||||
<!-- Scraper Activity Timeline Chart -->
|
||||
<div class="scraper-chart-wrapper">
|
||||
<h6 class="text-muted mb-2">
|
||||
<i class="fas fa-power-off"></i> Scraper Activity Timeline
|
||||
</h6>
|
||||
<canvas id="scraperActivityChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -227,6 +263,8 @@
|
||||
{% block scripts %}
|
||||
{{ super() }}
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script
|
||||
src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns/dist/chartjs-adapter-date-fns.bundle.min.js"></script>
|
||||
|
||||
<!-- Modular JavaScript files -->
|
||||
<script src="{{ url_for('static', filename='js/common.js') }}"></script>
|
||||
|
Loading…
x
Reference in New Issue
Block a user