Files
git.stella-ops.org/devops/observability/grafana/hlc-queue-metrics.json

291 lines
8.1 KiB
JSON

{
"dashboard": {
"id": null,
"uid": "stellaops-hlc-metrics",
"title": "StellaOps HLC Queue Metrics",
"description": "Hybrid Logical Clock ordering metrics for the Scheduler queue",
"tags": ["stellaops", "hlc", "scheduler", "audit"],
"timezone": "utc",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": {
"from": "now-1h",
"to": "now"
},
"panels": [
{
"id": 1,
"title": "HLC Tick Rate",
"description": "Rate of HLC tick operations per second",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "drawStyle": "line", "lineInterpolation": "smooth" }
}
},
"targets": [
{
"expr": "rate(hlc_ticks_total[1m])",
"legendFormat": "{{node_id}}",
"refId": "A"
}
]
},
{
"id": 2,
"title": "Clock Skew Rejections",
"description": "HLC rejections due to clock skew exceeding tolerance",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 10 }
]
}
}
},
"targets": [
{
"expr": "sum(increase(hlc_clock_skew_rejections_total[1h]))",
"refId": "A"
}
]
},
{
"id": 3,
"title": "Physical Time Offset",
"description": "Difference between HLC physical time and wall clock",
"type": "gauge",
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
"fieldConfig": {
"defaults": {
"unit": "ms",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 100 },
{ "color": "red", "value": 1000 }
]
},
"max": 5000
}
},
"targets": [
{
"expr": "max(hlc_physical_time_offset_seconds) * 1000",
"refId": "A"
}
]
},
{
"id": 4,
"title": "Scheduler HLC Enqueues",
"description": "Rate of jobs enqueued with HLC timestamps",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "drawStyle": "bars", "fillOpacity": 50 }
}
},
"targets": [
{
"expr": "rate(scheduler_hlc_enqueues_total[5m])",
"legendFormat": "{{tenant_id}}",
"refId": "A"
}
]
},
{
"id": 5,
"title": "Chain Verifications",
"description": "Chain verification operations by result",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"fieldConfig": {
"defaults": {
"unit": "ops"
},
"overrides": [
{
"matcher": { "id": "byName", "options": "valid" },
"properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }]
},
{
"matcher": { "id": "byName", "options": "invalid" },
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
}
]
},
"targets": [
{
"expr": "rate(scheduler_chain_verifications_total[5m])",
"legendFormat": "{{result}}",
"refId": "A"
}
]
},
{
"id": 6,
"title": "Verification Failures",
"description": "Chain verification failures - indicates tampering or corruption",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 8 },
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "red", "value": 1 }
]
}
}
},
"targets": [
{
"expr": "sum(increase(scheduler_chain_verification_failures_total[1h]))",
"refId": "A"
}
]
},
{
"id": 7,
"title": "Batch Snapshots",
"description": "Batch snapshot creation rate",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 8 },
"fieldConfig": {
"defaults": {
"unit": "short"
}
},
"targets": [
{
"expr": "sum(increase(scheduler_batch_snapshots_total[1h]))",
"refId": "A"
}
]
},
{
"id": 8,
"title": "Air-Gap Bundle Exports",
"description": "Rate of air-gap bundles exported",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 16 },
"fieldConfig": {
"defaults": {
"unit": "ops"
}
},
"targets": [
{
"expr": "rate(airgap_bundles_exported_total[5m])",
"legendFormat": "{{node_id}}",
"refId": "A"
}
]
},
{
"id": 9,
"title": "Air-Gap Bundle Imports",
"description": "Rate of air-gap bundles imported",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 16 },
"fieldConfig": {
"defaults": {
"unit": "ops"
}
},
"targets": [
{
"expr": "rate(airgap_bundles_imported_total[5m])",
"legendFormat": "imported",
"refId": "A"
}
]
},
{
"id": 10,
"title": "Air-Gap Merge Conflicts",
"description": "Merge conflicts by type during air-gap sync",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 16, "y": 16 },
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 10 }
]
}
}
},
"targets": [
{
"expr": "sum by (conflict_type) (increase(airgap_merge_conflicts_total[1h]))",
"legendFormat": "{{conflict_type}}",
"refId": "A"
}
]
},
{
"id": 11,
"title": "Sync Duration",
"description": "Air-gap sync operation duration percentiles",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 20 },
"fieldConfig": {
"defaults": {
"unit": "s"
}
},
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(airgap_sync_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(airgap_sync_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(airgap_sync_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
]
}
],
"annotations": {
"list": [
{
"name": "Deployments",
"datasource": "-- Grafana --",
"enable": true,
"iconColor": "blue"
}
]
}
},
"folderId": 0,
"overwrite": true
}