291 lines
8.1 KiB
JSON
291 lines
8.1 KiB
JSON
{
|
|
"dashboard": {
|
|
"id": null,
|
|
"uid": "stellaops-hlc-metrics",
|
|
"title": "StellaOps HLC Queue Metrics",
|
|
"description": "Hybrid Logical Clock ordering metrics for the Scheduler queue",
|
|
"tags": ["stellaops", "hlc", "scheduler", "audit"],
|
|
"timezone": "utc",
|
|
"schemaVersion": 39,
|
|
"version": 1,
|
|
"refresh": "30s",
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"title": "HLC Tick Rate",
|
|
"description": "Rate of HLC tick operations per second",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": { "drawStyle": "line", "lineInterpolation": "smooth" }
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "rate(hlc_ticks_total[1m])",
|
|
"legendFormat": "{{node_id}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Clock Skew Rejections",
|
|
"description": "HLC rejections due to clock skew exceeding tolerance",
|
|
"type": "stat",
|
|
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 1 },
|
|
{ "color": "red", "value": 10 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(increase(hlc_clock_skew_rejections_total[1h]))",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "Physical Time Offset",
|
|
"description": "Difference between HLC physical time and wall clock",
|
|
"type": "gauge",
|
|
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ms",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 100 },
|
|
{ "color": "red", "value": 1000 }
|
|
]
|
|
},
|
|
"max": 5000
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "max(hlc_physical_time_offset_seconds) * 1000",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 4,
|
|
"title": "Scheduler HLC Enqueues",
|
|
"description": "Rate of jobs enqueued with HLC timestamps",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": { "drawStyle": "bars", "fillOpacity": 50 }
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "rate(scheduler_hlc_enqueues_total[5m])",
|
|
"legendFormat": "{{tenant_id}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"title": "Chain Verifications",
|
|
"description": "Chain verification operations by result",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": { "id": "byName", "options": "valid" },
|
|
"properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }]
|
|
},
|
|
{
|
|
"matcher": { "id": "byName", "options": "invalid" },
|
|
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
|
|
}
|
|
]
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "rate(scheduler_chain_verifications_total[5m])",
|
|
"legendFormat": "{{result}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 6,
|
|
"title": "Verification Failures",
|
|
"description": "Chain verification failures - indicates tampering or corruption",
|
|
"type": "stat",
|
|
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 8 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "red", "value": 1 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(increase(scheduler_chain_verification_failures_total[1h]))",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 7,
|
|
"title": "Batch Snapshots",
|
|
"description": "Batch snapshot creation rate",
|
|
"type": "stat",
|
|
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 8 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(increase(scheduler_batch_snapshots_total[1h]))",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 8,
|
|
"title": "Air-Gap Bundle Exports",
|
|
"description": "Rate of air-gap bundles exported",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 16 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "rate(airgap_bundles_exported_total[5m])",
|
|
"legendFormat": "{{node_id}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 9,
|
|
"title": "Air-Gap Bundle Imports",
|
|
"description": "Rate of air-gap bundles imported",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 16 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "rate(airgap_bundles_imported_total[5m])",
|
|
"legendFormat": "imported",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 10,
|
|
"title": "Air-Gap Merge Conflicts",
|
|
"description": "Merge conflicts by type during air-gap sync",
|
|
"type": "stat",
|
|
"gridPos": { "h": 4, "w": 8, "x": 16, "y": 16 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 1 },
|
|
{ "color": "red", "value": 10 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum by (conflict_type) (increase(airgap_merge_conflicts_total[1h]))",
|
|
"legendFormat": "{{conflict_type}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 11,
|
|
"title": "Sync Duration",
|
|
"description": "Air-gap sync operation duration percentiles",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 20 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "histogram_quantile(0.50, sum(rate(airgap_sync_duration_seconds_bucket[5m])) by (le))",
|
|
"legendFormat": "p50",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.95, sum(rate(airgap_sync_duration_seconds_bucket[5m])) by (le))",
|
|
"legendFormat": "p95",
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.99, sum(rate(airgap_sync_duration_seconds_bucket[5m])) by (le))",
|
|
"legendFormat": "p99",
|
|
"refId": "C"
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"name": "Deployments",
|
|
"datasource": "-- Grafana --",
|
|
"enable": true,
|
|
"iconColor": "blue"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"folderId": 0,
|
|
"overwrite": true
|
|
}
|