feat: Implement runner execution pipeline with planner dispatch and execution services
- Introduced RunnerBackgroundService to handle execution of runner segments. - Added RunnerExecutionService for processing segments and aggregating results. - Implemented PlannerQueueDispatchService to manage dispatching of planner messages. - Created PlannerQueueDispatcherBackgroundService for leasing and processing planner queue messages. - Developed ScannerReportClient for interacting with the scanner service. - Enhanced observability with SchedulerWorkerMetrics for tracking planner and runner performance. - Added comprehensive documentation for the new runner execution pipeline and observability metrics. - Implemented event emission for rescan activity and scanner report readiness.
This commit is contained in:
		
							
								
								
									
										261
									
								
								docs/ops/scheduler-worker-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								docs/ops/scheduler-worker-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,261 @@
 | 
			
		||||
{
 | 
			
		||||
  "title": "Scheduler Worker – Planning & Rescan",
 | 
			
		||||
  "uid": "scheduler-worker-observability",
 | 
			
		||||
  "schemaVersion": 38,
 | 
			
		||||
  "version": 1,
 | 
			
		||||
  "editable": true,
 | 
			
		||||
  "timezone": "",
 | 
			
		||||
  "graphTooltip": 0,
 | 
			
		||||
  "time": {
 | 
			
		||||
    "from": "now-24h",
 | 
			
		||||
    "to": "now"
 | 
			
		||||
  },
 | 
			
		||||
  "templating": {
 | 
			
		||||
    "list": [
 | 
			
		||||
      {
 | 
			
		||||
        "name": "datasource",
 | 
			
		||||
        "type": "datasource",
 | 
			
		||||
        "query": "prometheus",
 | 
			
		||||
        "hide": 0,
 | 
			
		||||
        "refresh": 1,
 | 
			
		||||
        "current": {}
 | 
			
		||||
      },
 | 
			
		||||
      {
 | 
			
		||||
        "name": "mode",
 | 
			
		||||
        "label": "Mode",
 | 
			
		||||
        "type": "query",
 | 
			
		||||
        "datasource": {
 | 
			
		||||
          "type": "prometheus",
 | 
			
		||||
          "uid": "${datasource}"
 | 
			
		||||
        },
 | 
			
		||||
        "query": "label_values(scheduler_planner_runs_total, mode)",
 | 
			
		||||
        "refresh": 1,
 | 
			
		||||
        "multi": true,
 | 
			
		||||
        "includeAll": true,
 | 
			
		||||
        "allValue": ".*",
 | 
			
		||||
        "current": {
 | 
			
		||||
          "selected": false,
 | 
			
		||||
          "text": "All",
 | 
			
		||||
          "value": ".*"
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    ]
 | 
			
		||||
  },
 | 
			
		||||
  "annotations": {
 | 
			
		||||
    "list": []
 | 
			
		||||
  },
 | 
			
		||||
  "panels": [
 | 
			
		||||
    {
 | 
			
		||||
      "id": 1,
 | 
			
		||||
      "title": "Planner Runs per Status",
 | 
			
		||||
      "type": "timeseries",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "unit": "ops",
 | 
			
		||||
          "displayName": "{{status}}"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "legend": {
 | 
			
		||||
          "displayMode": "table",
 | 
			
		||||
          "placement": "bottom"
 | 
			
		||||
        }
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "sum by (status) (rate(scheduler_planner_runs_total{mode=~\"$mode\"}[5m]))",
 | 
			
		||||
          "legendFormat": "{{status}}",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        }
 | 
			
		||||
      ],
 | 
			
		||||
      "gridPos": {
 | 
			
		||||
        "h": 8,
 | 
			
		||||
        "w": 12,
 | 
			
		||||
        "x": 0,
 | 
			
		||||
        "y": 0
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "id": 2,
 | 
			
		||||
      "title": "Planner Latency P95 (s)",
 | 
			
		||||
      "type": "timeseries",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "unit": "s"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "legend": {
 | 
			
		||||
          "displayMode": "table",
 | 
			
		||||
          "placement": "bottom"
 | 
			
		||||
        }
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "histogram_quantile(0.95, sum by (le) (rate(scheduler_planner_latency_seconds_bucket{mode=~\"$mode\"}[5m])))",
 | 
			
		||||
          "legendFormat": "p95",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        }
 | 
			
		||||
      ],
 | 
			
		||||
      "gridPos": {
 | 
			
		||||
        "h": 8,
 | 
			
		||||
        "w": 12,
 | 
			
		||||
        "x": 12,
 | 
			
		||||
        "y": 0
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "id": 3,
 | 
			
		||||
      "title": "Runner Segments per Status",
 | 
			
		||||
      "type": "timeseries",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "unit": "ops",
 | 
			
		||||
          "displayName": "{{status}}"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "legend": {
 | 
			
		||||
          "displayMode": "table",
 | 
			
		||||
          "placement": "bottom"
 | 
			
		||||
        }
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "sum by (status) (rate(scheduler_runner_segments_total{mode=~\"$mode\"}[5m]))",
 | 
			
		||||
          "legendFormat": "{{status}}",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        }
 | 
			
		||||
      ],
 | 
			
		||||
      "gridPos": {
 | 
			
		||||
        "h": 8,
 | 
			
		||||
        "w": 12,
 | 
			
		||||
        "x": 0,
 | 
			
		||||
        "y": 8
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "id": 4,
 | 
			
		||||
      "title": "New Findings per Severity",
 | 
			
		||||
      "type": "timeseries",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "unit": "ops",
 | 
			
		||||
          "displayName": "{{severity}}"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "legend": {
 | 
			
		||||
          "displayMode": "table",
 | 
			
		||||
          "placement": "bottom"
 | 
			
		||||
        }
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "sum(rate(scheduler_runner_delta_critical_total{mode=~\"$mode\"}[5m]))",
 | 
			
		||||
          "legendFormat": "critical",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "sum(rate(scheduler_runner_delta_high_total{mode=~\"$mode\"}[5m]))",
 | 
			
		||||
          "legendFormat": "high",
 | 
			
		||||
          "refId": "B"
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "sum(rate(scheduler_runner_delta_total{mode=~\"$mode\"}[5m]))",
 | 
			
		||||
          "legendFormat": "total",
 | 
			
		||||
          "refId": "C"
 | 
			
		||||
        }
 | 
			
		||||
      ],
 | 
			
		||||
      "gridPos": {
 | 
			
		||||
        "h": 8,
 | 
			
		||||
        "w": 12,
 | 
			
		||||
        "x": 12,
 | 
			
		||||
        "y": 8
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "id": 5,
 | 
			
		||||
      "title": "Runner Backlog by Schedule",
 | 
			
		||||
      "type": "table",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "displayName": "{{scheduleId}}",
 | 
			
		||||
          "unit": "none"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "showHeader": true
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "max by (scheduleId) (scheduler_runner_backlog{mode=~\"$mode\"})",
 | 
			
		||||
          "format": "table",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        }
 | 
			
		||||
      ],
 | 
			
		||||
      "gridPos": {
 | 
			
		||||
        "h": 8,
 | 
			
		||||
        "w": 12,
 | 
			
		||||
        "x": 0,
 | 
			
		||||
        "y": 16
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "id": 6,
 | 
			
		||||
      "title": "Active Runs",
 | 
			
		||||
      "type": "stat",
 | 
			
		||||
      "datasource": {
 | 
			
		||||
        "type": "prometheus",
 | 
			
		||||
        "uid": "${datasource}"
 | 
			
		||||
      },
 | 
			
		||||
      "fieldConfig": {
 | 
			
		||||
        "defaults": {
 | 
			
		||||
          "unit": "none"
 | 
			
		||||
        },
 | 
			
		||||
        "overrides": []
 | 
			
		||||
      },
 | 
			
		||||
      "options": {
 | 
			
		||||
        "orientation": "horizontal",
 | 
			
		||||
        "textMode": "value"
 | 
			
		||||
      },
 | 
			
		||||
      "targets": [
 | 
			
		||||
        {
 | 
			
		||||
          "expr": "sum(scheduler_runs_active{mode=~\"$mode\"})",
 | 
			
		||||
          "refId": "A"
 | 
			
		||||
        }
 | 
			
		||||
      ],
 | 
			
		||||
      "gridPos": {
 | 
			
		||||
        "h": 8,
 | 
			
		||||
        "w": 12,
 | 
			
		||||
        "x": 12,
 | 
			
		||||
        "y": 16
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  ]
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user