feat: Implement runner execution pipeline with planner dispatch and execution services
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Docs CI / lint-and-preview (push) Has been cancelled
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Docs CI / lint-and-preview (push) Has been cancelled
				
			- Introduced RunnerBackgroundService to handle execution of runner segments. - Added RunnerExecutionService for processing segments and aggregating results. - Implemented PlannerQueueDispatchService to manage dispatching of planner messages. - Created PlannerQueueDispatcherBackgroundService for leasing and processing planner queue messages. - Developed ScannerReportClient for interacting with the scanner service. - Enhanced observability with SchedulerWorkerMetrics for tracking planner and runner performance. - Added comprehensive documentation for the new runner execution pipeline and observability metrics. - Implemented event emission for rescan activity and scanner report readiness.
This commit is contained in:
		
							
								
								
									
										261
									
								
								docs/ops/scheduler-worker-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								docs/ops/scheduler-worker-grafana-dashboard.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,261 @@ | ||||
| { | ||||
|   "title": "Scheduler Worker – Planning & Rescan", | ||||
|   "uid": "scheduler-worker-observability", | ||||
|   "schemaVersion": 38, | ||||
|   "version": 1, | ||||
|   "editable": true, | ||||
|   "timezone": "", | ||||
|   "graphTooltip": 0, | ||||
|   "time": { | ||||
|     "from": "now-24h", | ||||
|     "to": "now" | ||||
|   }, | ||||
|   "templating": { | ||||
|     "list": [ | ||||
|       { | ||||
|         "name": "datasource", | ||||
|         "type": "datasource", | ||||
|         "query": "prometheus", | ||||
|         "hide": 0, | ||||
|         "refresh": 1, | ||||
|         "current": {} | ||||
|       }, | ||||
|       { | ||||
|         "name": "mode", | ||||
|         "label": "Mode", | ||||
|         "type": "query", | ||||
|         "datasource": { | ||||
|           "type": "prometheus", | ||||
|           "uid": "${datasource}" | ||||
|         }, | ||||
|         "query": "label_values(scheduler_planner_runs_total, mode)", | ||||
|         "refresh": 1, | ||||
|         "multi": true, | ||||
|         "includeAll": true, | ||||
|         "allValue": ".*", | ||||
|         "current": { | ||||
|           "selected": false, | ||||
|           "text": "All", | ||||
|           "value": ".*" | ||||
|         } | ||||
|       } | ||||
|     ] | ||||
|   }, | ||||
|   "annotations": { | ||||
|     "list": [] | ||||
|   }, | ||||
|   "panels": [ | ||||
|     { | ||||
|       "id": 1, | ||||
|       "title": "Planner Runs per Status", | ||||
|       "type": "timeseries", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "unit": "ops", | ||||
|           "displayName": "{{status}}" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "legend": { | ||||
|           "displayMode": "table", | ||||
|           "placement": "bottom" | ||||
|         } | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "sum by (status) (rate(scheduler_planner_runs_total{mode=~\"$mode\"}[5m]))", | ||||
|           "legendFormat": "{{status}}", | ||||
|           "refId": "A" | ||||
|         } | ||||
|       ], | ||||
|       "gridPos": { | ||||
|         "h": 8, | ||||
|         "w": 12, | ||||
|         "x": 0, | ||||
|         "y": 0 | ||||
|       } | ||||
|     }, | ||||
|     { | ||||
|       "id": 2, | ||||
|       "title": "Planner Latency P95 (s)", | ||||
|       "type": "timeseries", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "unit": "s" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "legend": { | ||||
|           "displayMode": "table", | ||||
|           "placement": "bottom" | ||||
|         } | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "histogram_quantile(0.95, sum by (le) (rate(scheduler_planner_latency_seconds_bucket{mode=~\"$mode\"}[5m])))", | ||||
|           "legendFormat": "p95", | ||||
|           "refId": "A" | ||||
|         } | ||||
|       ], | ||||
|       "gridPos": { | ||||
|         "h": 8, | ||||
|         "w": 12, | ||||
|         "x": 12, | ||||
|         "y": 0 | ||||
|       } | ||||
|     }, | ||||
|     { | ||||
|       "id": 3, | ||||
|       "title": "Runner Segments per Status", | ||||
|       "type": "timeseries", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "unit": "ops", | ||||
|           "displayName": "{{status}}" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "legend": { | ||||
|           "displayMode": "table", | ||||
|           "placement": "bottom" | ||||
|         } | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "sum by (status) (rate(scheduler_runner_segments_total{mode=~\"$mode\"}[5m]))", | ||||
|           "legendFormat": "{{status}}", | ||||
|           "refId": "A" | ||||
|         } | ||||
|       ], | ||||
|       "gridPos": { | ||||
|         "h": 8, | ||||
|         "w": 12, | ||||
|         "x": 0, | ||||
|         "y": 8 | ||||
|       } | ||||
|     }, | ||||
|     { | ||||
|       "id": 4, | ||||
|       "title": "New Findings per Severity", | ||||
|       "type": "timeseries", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "unit": "ops", | ||||
|           "displayName": "{{severity}}" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "legend": { | ||||
|           "displayMode": "table", | ||||
|           "placement": "bottom" | ||||
|         } | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "sum(rate(scheduler_runner_delta_critical_total{mode=~\"$mode\"}[5m]))", | ||||
|           "legendFormat": "critical", | ||||
|           "refId": "A" | ||||
|         }, | ||||
|         { | ||||
|           "expr": "sum(rate(scheduler_runner_delta_high_total{mode=~\"$mode\"}[5m]))", | ||||
|           "legendFormat": "high", | ||||
|           "refId": "B" | ||||
|         }, | ||||
|         { | ||||
|           "expr": "sum(rate(scheduler_runner_delta_total{mode=~\"$mode\"}[5m]))", | ||||
|           "legendFormat": "total", | ||||
|           "refId": "C" | ||||
|         } | ||||
|       ], | ||||
|       "gridPos": { | ||||
|         "h": 8, | ||||
|         "w": 12, | ||||
|         "x": 12, | ||||
|         "y": 8 | ||||
|       } | ||||
|     }, | ||||
|     { | ||||
|       "id": 5, | ||||
|       "title": "Runner Backlog by Schedule", | ||||
|       "type": "table", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "displayName": "{{scheduleId}}", | ||||
|           "unit": "none" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "showHeader": true | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "max by (scheduleId) (scheduler_runner_backlog{mode=~\"$mode\"})", | ||||
|           "format": "table", | ||||
|           "refId": "A" | ||||
|         } | ||||
|       ], | ||||
|       "gridPos": { | ||||
|         "h": 8, | ||||
|         "w": 12, | ||||
|         "x": 0, | ||||
|         "y": 16 | ||||
|       } | ||||
|     }, | ||||
|     { | ||||
|       "id": 6, | ||||
|       "title": "Active Runs", | ||||
|       "type": "stat", | ||||
|       "datasource": { | ||||
|         "type": "prometheus", | ||||
|         "uid": "${datasource}" | ||||
|       }, | ||||
|       "fieldConfig": { | ||||
|         "defaults": { | ||||
|           "unit": "none" | ||||
|         }, | ||||
|         "overrides": [] | ||||
|       }, | ||||
|       "options": { | ||||
|         "orientation": "horizontal", | ||||
|         "textMode": "value" | ||||
|       }, | ||||
|       "targets": [ | ||||
|         { | ||||
|           "expr": "sum(scheduler_runs_active{mode=~\"$mode\"})", | ||||
|           "refId": "A" | ||||
|         } | ||||
|       ], | ||||
|       "gridPos": { | ||||
|         "h": 8, | ||||
|         "w": 12, | ||||
|         "x": 12, | ||||
|         "y": 16 | ||||
|       } | ||||
|     } | ||||
|   ] | ||||
| } | ||||
		Reference in New Issue
	
	Block a user