Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Introduced RunnerBackgroundService to handle execution of runner segments. - Added RunnerExecutionService for processing segments and aggregating results. - Implemented PlannerQueueDispatchService to manage dispatching of planner messages. - Created PlannerQueueDispatcherBackgroundService for leasing and processing planner queue messages. - Developed ScannerReportClient for interacting with the scanner service. - Enhanced observability with SchedulerWorkerMetrics for tracking planner and runner performance. - Added comprehensive documentation for the new runner execution pipeline and observability metrics. - Implemented event emission for rescan activity and scanner report readiness.
262 lines
5.5 KiB
JSON
262 lines
5.5 KiB
JSON
{
|
||
"title": "Scheduler Worker – Planning & Rescan",
|
||
"uid": "scheduler-worker-observability",
|
||
"schemaVersion": 38,
|
||
"version": 1,
|
||
"editable": true,
|
||
"timezone": "",
|
||
"graphTooltip": 0,
|
||
"time": {
|
||
"from": "now-24h",
|
||
"to": "now"
|
||
},
|
||
"templating": {
|
||
"list": [
|
||
{
|
||
"name": "datasource",
|
||
"type": "datasource",
|
||
"query": "prometheus",
|
||
"hide": 0,
|
||
"refresh": 1,
|
||
"current": {}
|
||
},
|
||
{
|
||
"name": "mode",
|
||
"label": "Mode",
|
||
"type": "query",
|
||
"datasource": {
|
||
"type": "prometheus",
|
||
"uid": "${datasource}"
|
||
},
|
||
"query": "label_values(scheduler_planner_runs_total, mode)",
|
||
"refresh": 1,
|
||
"multi": true,
|
||
"includeAll": true,
|
||
"allValue": ".*",
|
||
"current": {
|
||
"selected": false,
|
||
"text": "All",
|
||
"value": ".*"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"annotations": {
|
||
"list": []
|
||
},
|
||
"panels": [
|
||
{
|
||
"id": 1,
|
||
"title": "Planner Runs per Status",
|
||
"type": "timeseries",
|
||
"datasource": {
|
||
"type": "prometheus",
|
||
"uid": "${datasource}"
|
||
},
|
||
"fieldConfig": {
|
||
"defaults": {
|
||
"unit": "ops",
|
||
"displayName": "{{status}}"
|
||
},
|
||
"overrides": []
|
||
},
|
||
"options": {
|
||
"legend": {
|
||
"displayMode": "table",
|
||
"placement": "bottom"
|
||
}
|
||
},
|
||
"targets": [
|
||
{
|
||
"expr": "sum by (status) (rate(scheduler_planner_runs_total{mode=~\"$mode\"}[5m]))",
|
||
"legendFormat": "{{status}}",
|
||
"refId": "A"
|
||
}
|
||
],
|
||
"gridPos": {
|
||
"h": 8,
|
||
"w": 12,
|
||
"x": 0,
|
||
"y": 0
|
||
}
|
||
},
|
||
{
|
||
"id": 2,
|
||
"title": "Planner Latency P95 (s)",
|
||
"type": "timeseries",
|
||
"datasource": {
|
||
"type": "prometheus",
|
||
"uid": "${datasource}"
|
||
},
|
||
"fieldConfig": {
|
||
"defaults": {
|
||
"unit": "s"
|
||
},
|
||
"overrides": []
|
||
},
|
||
"options": {
|
||
"legend": {
|
||
"displayMode": "table",
|
||
"placement": "bottom"
|
||
}
|
||
},
|
||
"targets": [
|
||
{
|
||
"expr": "histogram_quantile(0.95, sum by (le) (rate(scheduler_planner_latency_seconds_bucket{mode=~\"$mode\"}[5m])))",
|
||
"legendFormat": "p95",
|
||
"refId": "A"
|
||
}
|
||
],
|
||
"gridPos": {
|
||
"h": 8,
|
||
"w": 12,
|
||
"x": 12,
|
||
"y": 0
|
||
}
|
||
},
|
||
{
|
||
"id": 3,
|
||
"title": "Runner Segments per Status",
|
||
"type": "timeseries",
|
||
"datasource": {
|
||
"type": "prometheus",
|
||
"uid": "${datasource}"
|
||
},
|
||
"fieldConfig": {
|
||
"defaults": {
|
||
"unit": "ops",
|
||
"displayName": "{{status}}"
|
||
},
|
||
"overrides": []
|
||
},
|
||
"options": {
|
||
"legend": {
|
||
"displayMode": "table",
|
||
"placement": "bottom"
|
||
}
|
||
},
|
||
"targets": [
|
||
{
|
||
"expr": "sum by (status) (rate(scheduler_runner_segments_total{mode=~\"$mode\"}[5m]))",
|
||
"legendFormat": "{{status}}",
|
||
"refId": "A"
|
||
}
|
||
],
|
||
"gridPos": {
|
||
"h": 8,
|
||
"w": 12,
|
||
"x": 0,
|
||
"y": 8
|
||
}
|
||
},
|
||
{
|
||
"id": 4,
|
||
"title": "New Findings per Severity",
|
||
"type": "timeseries",
|
||
"datasource": {
|
||
"type": "prometheus",
|
||
"uid": "${datasource}"
|
||
},
|
||
"fieldConfig": {
|
||
"defaults": {
|
||
"unit": "ops",
|
||
"displayName": "{{severity}}"
|
||
},
|
||
"overrides": []
|
||
},
|
||
"options": {
|
||
"legend": {
|
||
"displayMode": "table",
|
||
"placement": "bottom"
|
||
}
|
||
},
|
||
"targets": [
|
||
{
|
||
"expr": "sum(rate(scheduler_runner_delta_critical_total{mode=~\"$mode\"}[5m]))",
|
||
"legendFormat": "critical",
|
||
"refId": "A"
|
||
},
|
||
{
|
||
"expr": "sum(rate(scheduler_runner_delta_high_total{mode=~\"$mode\"}[5m]))",
|
||
"legendFormat": "high",
|
||
"refId": "B"
|
||
},
|
||
{
|
||
"expr": "sum(rate(scheduler_runner_delta_total{mode=~\"$mode\"}[5m]))",
|
||
"legendFormat": "total",
|
||
"refId": "C"
|
||
}
|
||
],
|
||
"gridPos": {
|
||
"h": 8,
|
||
"w": 12,
|
||
"x": 12,
|
||
"y": 8
|
||
}
|
||
},
|
||
{
|
||
"id": 5,
|
||
"title": "Runner Backlog by Schedule",
|
||
"type": "table",
|
||
"datasource": {
|
||
"type": "prometheus",
|
||
"uid": "${datasource}"
|
||
},
|
||
"fieldConfig": {
|
||
"defaults": {
|
||
"displayName": "{{scheduleId}}",
|
||
"unit": "none"
|
||
},
|
||
"overrides": []
|
||
},
|
||
"options": {
|
||
"showHeader": true
|
||
},
|
||
"targets": [
|
||
{
|
||
"expr": "max by (scheduleId) (scheduler_runner_backlog{mode=~\"$mode\"})",
|
||
"format": "table",
|
||
"refId": "A"
|
||
}
|
||
],
|
||
"gridPos": {
|
||
"h": 8,
|
||
"w": 12,
|
||
"x": 0,
|
||
"y": 16
|
||
}
|
||
},
|
||
{
|
||
"id": 6,
|
||
"title": "Active Runs",
|
||
"type": "stat",
|
||
"datasource": {
|
||
"type": "prometheus",
|
||
"uid": "${datasource}"
|
||
},
|
||
"fieldConfig": {
|
||
"defaults": {
|
||
"unit": "none"
|
||
},
|
||
"overrides": []
|
||
},
|
||
"options": {
|
||
"orientation": "horizontal",
|
||
"textMode": "value"
|
||
},
|
||
"targets": [
|
||
{
|
||
"expr": "sum(scheduler_runs_active{mode=~\"$mode\"})",
|
||
"refId": "A"
|
||
}
|
||
],
|
||
"gridPos": {
|
||
"h": 8,
|
||
"w": 12,
|
||
"x": 12,
|
||
"y": 16
|
||
}
|
||
}
|
||
]
|
||
}
|