feat: Implement runner execution pipeline with planner dispatch and execution services
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled

- Introduced RunnerBackgroundService to handle execution of runner segments.
- Added RunnerExecutionService for processing segments and aggregating results.
- Implemented PlannerQueueDispatchService to manage dispatching of planner messages.
- Created PlannerQueueDispatcherBackgroundService for leasing and processing planner queue messages.
- Developed ScannerReportClient for interacting with the scanner service.
- Enhanced observability with SchedulerWorkerMetrics for tracking planner and runner performance.
- Added comprehensive documentation for the new runner execution pipeline and observability metrics.
- Implemented event emission for rescan activity and scanner report readiness.
This commit is contained in:
Vladimir Moushkov
2025-10-27 18:57:35 +02:00
parent 730354a1af
commit 4d932cc1ba
42 changed files with 3981 additions and 184 deletions

View File

@@ -0,0 +1,261 @@
{
"title": "Scheduler Worker Planning & Rescan",
"uid": "scheduler-worker-observability",
"schemaVersion": 38,
"version": 1,
"editable": true,
"timezone": "",
"graphTooltip": 0,
"time": {
"from": "now-24h",
"to": "now"
},
"templating": {
"list": [
{
"name": "datasource",
"type": "datasource",
"query": "prometheus",
"hide": 0,
"refresh": 1,
"current": {}
},
{
"name": "mode",
"label": "Mode",
"type": "query",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"query": "label_values(scheduler_planner_runs_total, mode)",
"refresh": 1,
"multi": true,
"includeAll": true,
"allValue": ".*",
"current": {
"selected": false,
"text": "All",
"value": ".*"
}
}
]
},
"annotations": {
"list": []
},
"panels": [
{
"id": 1,
"title": "Planner Runs per Status",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"displayName": "{{status}}"
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom"
}
},
"targets": [
{
"expr": "sum by (status) (rate(scheduler_planner_runs_total{mode=~\"$mode\"}[5m]))",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
}
},
{
"id": 2,
"title": "Planner Latency P95 (s)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"unit": "s"
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom"
}
},
"targets": [
{
"expr": "histogram_quantile(0.95, sum by (le) (rate(scheduler_planner_latency_seconds_bucket{mode=~\"$mode\"}[5m])))",
"legendFormat": "p95",
"refId": "A"
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
}
},
{
"id": 3,
"title": "Runner Segments per Status",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"displayName": "{{status}}"
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom"
}
},
"targets": [
{
"expr": "sum by (status) (rate(scheduler_runner_segments_total{mode=~\"$mode\"}[5m]))",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
}
},
{
"id": 4,
"title": "New Findings per Severity",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"displayName": "{{severity}}"
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom"
}
},
"targets": [
{
"expr": "sum(rate(scheduler_runner_delta_critical_total{mode=~\"$mode\"}[5m]))",
"legendFormat": "critical",
"refId": "A"
},
{
"expr": "sum(rate(scheduler_runner_delta_high_total{mode=~\"$mode\"}[5m]))",
"legendFormat": "high",
"refId": "B"
},
{
"expr": "sum(rate(scheduler_runner_delta_total{mode=~\"$mode\"}[5m]))",
"legendFormat": "total",
"refId": "C"
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
}
},
{
"id": 5,
"title": "Runner Backlog by Schedule",
"type": "table",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"displayName": "{{scheduleId}}",
"unit": "none"
},
"overrides": []
},
"options": {
"showHeader": true
},
"targets": [
{
"expr": "max by (scheduleId) (scheduler_runner_backlog{mode=~\"$mode\"})",
"format": "table",
"refId": "A"
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
}
},
{
"id": 6,
"title": "Active Runs",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"unit": "none"
},
"overrides": []
},
"options": {
"orientation": "horizontal",
"textMode": "value"
},
"targets": [
{
"expr": "sum(scheduler_runs_active{mode=~\"$mode\"})",
"refId": "A"
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
}
}
]
}