CD/CD consolidation
This commit is contained in:
78
devops/observability/grafana/policy-pipeline.json
Normal file
78
devops/observability/grafana/policy-pipeline.json
Normal file
@@ -0,0 +1,78 @@
|
||||
{
|
||||
"schemaVersion": 39,
|
||||
"title": "Policy Pipeline",
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Compile p99 (s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 2}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.99, sum(rate(policy_compile_duration_seconds_bucket[5m])) by (le))"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Compile Duration (p95/p50)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 2}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(policy_compile_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p95"},
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(policy_compile_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p50"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Simulation Queue Depth",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "none"}},
|
||||
"targets": [{"expr": "sum(policy_simulation_queue_depth)"}]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Queue Depth by Stage",
|
||||
"datasource": "Prometheus",
|
||||
"targets": [{"expr": "policy_simulation_queue_depth", "legendFormat": "{{stage}}"}],
|
||||
"fieldConfig": {"defaults": {"unit": "none"}}
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Approval p95 (s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 1}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(policy_approval_latency_seconds_bucket[5m])) by (le))"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Approval Latency",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 1}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.90, sum(rate(policy_approval_latency_seconds_bucket[5m])) by (le))", "legendFormat": "p90"},
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(policy_approval_latency_seconds_bucket[5m])) by (le))", "legendFormat": "p50"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "gauge",
|
||||
"title": "Promotion Success Rate (30m)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "percent", "min": 0, "max": 100}},
|
||||
"options": {"reduceOptions": {"calcs": ["last"]}, "orientation": "horizontal"},
|
||||
"targets": [
|
||||
{"expr": "100 * clamp_min(rate(policy_promotion_outcomes_total{outcome=\"success\"}[30m]),0) / clamp_min(rate(policy_promotion_outcomes_total[30m]),1)"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "barchart",
|
||||
"title": "Promotion Outcomes",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "1/s"}},
|
||||
"options": {"displayMode": "series"},
|
||||
"targets": [
|
||||
{"expr": "rate(policy_promotion_outcomes_total[5m])", "legendFormat": "{{outcome}}"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
74
devops/observability/grafana/signals-pipeline.json
Normal file
74
devops/observability/grafana/signals-pipeline.json
Normal file
@@ -0,0 +1,74 @@
|
||||
{
|
||||
"schemaVersion": 39,
|
||||
"title": "Signals Pipeline",
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Scoring p95 (s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 2}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(signals_reachability_scoring_duration_seconds_bucket[5m])) by (le))"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Scoring Duration p95/p50",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 2}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(signals_reachability_scoring_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p95"},
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(signals_reachability_scoring_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p50"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "gauge",
|
||||
"title": "Cache Hit Ratio (5m)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "percent", "min": 0, "max": 100}},
|
||||
"options": {"reduceOptions": {"calcs": ["last"]}, "orientation": "horizontal"},
|
||||
"targets": [
|
||||
{"expr": "100 * clamp_min(rate(signals_cache_hits_total[5m]),0) / clamp_min(rate(signals_cache_hits_total[5m]) + rate(signals_cache_misses_total[5m]), 1)"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Cache Hits/Misses",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "1/s"}},
|
||||
"targets": [
|
||||
{"expr": "rate(signals_cache_hits_total[5m])", "legendFormat": "hits"},
|
||||
{"expr": "rate(signals_cache_misses_total[5m])", "legendFormat": "misses"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Sensors Reporting",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "none"}},
|
||||
"targets": [
|
||||
{"expr": "count(max_over_time(signals_sensor_last_seen_timestamp_seconds[15m]))"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Sensor Staleness",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s"}},
|
||||
"targets": [
|
||||
{"expr": "time() - max(signals_sensor_last_seen_timestamp_seconds) by (sensor)", "legendFormat": "{{sensor}}"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "barchart",
|
||||
"title": "Ingestion Outcomes",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "1/s"}},
|
||||
"options": {"displayMode": "series"},
|
||||
"targets": [
|
||||
{"expr": "rate(signals_ingestion_total[5m])", "legendFormat": "total"},
|
||||
{"expr": "rate(signals_ingestion_failures_total[5m])", "legendFormat": "failures"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
26
devops/observability/grafana/slo-burn.json
Normal file
26
devops/observability/grafana/slo-burn.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"title": "SLO Burn",
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Error rate",
|
||||
"targets": [
|
||||
{ "expr": "rate(service_request_errors_total[5m]) / rate(service_requests_total[5m])", "legendFormat": "5m" },
|
||||
{ "expr": "rate(service_request_errors_total[1h]) / rate(service_requests_total[1h])", "legendFormat": "1h" }
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 0.01 } ] } }
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Budget used (24h)",
|
||||
"targets": [
|
||||
{ "expr": "(sum_over_time(service_request_errors_total[24h]) / sum_over_time(service_requests_total[24h]))" }
|
||||
]
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"version": 1
|
||||
}
|
||||
97
devops/observability/grafana/triage-ttfs.json
Normal file
97
devops/observability/grafana/triage-ttfs.json
Normal file
@@ -0,0 +1,97 @@
|
||||
{
|
||||
"schemaVersion": 39,
|
||||
"title": "Triage TTFS",
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "TTFS First Evidence p95 (s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 3}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(stellaops_ttfs_first_evidence_seconds_bucket[5m])) by (le))"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "TTFS First Evidence p50/p95 (s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 3}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(stellaops_ttfs_first_evidence_seconds_bucket[5m])) by (le))", "legendFormat": "p50"},
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(stellaops_ttfs_first_evidence_seconds_bucket[5m])) by (le))", "legendFormat": "p95"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "TTFS Skeleton p50/p95 (s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 3}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(stellaops_ttfs_skeleton_seconds_bucket[5m])) by (le))", "legendFormat": "p50"},
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(stellaops_ttfs_skeleton_seconds_bucket[5m])) by (le))", "legendFormat": "p95"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "TTFS Full Evidence p50/p95 (s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "s", "decimals": 3}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(stellaops_ttfs_full_evidence_seconds_bucket[5m])) by (le))", "legendFormat": "p50"},
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(stellaops_ttfs_full_evidence_seconds_bucket[5m])) by (le))", "legendFormat": "p95"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Clicks-to-Closure Median",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "none", "decimals": 1}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(stellaops_clicks_to_closure_bucket[5m])) by (le))"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Clicks-to-Closure p50/p95",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "none", "decimals": 1}},
|
||||
"targets": [
|
||||
{"expr": "histogram_quantile(0.50, sum(rate(stellaops_clicks_to_closure_bucket[5m])) by (le))", "legendFormat": "p50"},
|
||||
{"expr": "histogram_quantile(0.95, sum(rate(stellaops_clicks_to_closure_bucket[5m])) by (le))", "legendFormat": "p95"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Evidence Completeness Avg (%)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "percent", "decimals": 1}},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * (sum(rate(stellaops_evidence_completeness_score_sum[5m])) / clamp_min(sum(rate(stellaops_evidence_completeness_score_count[5m])), 1)) / 4"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Evidence Completeness Avg (%)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "percent", "decimals": 1}},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * (sum(rate(stellaops_evidence_completeness_score_sum[5m])) / clamp_min(sum(rate(stellaops_evidence_completeness_score_count[5m])), 1)) / 4",
|
||||
"legendFormat": "avg"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "barchart",
|
||||
"title": "Budget Violations Rate (1/s)",
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {"defaults": {"unit": "1/s"}},
|
||||
"options": {"displayMode": "series"},
|
||||
"targets": [
|
||||
{"expr": "sum(rate(stellaops_performance_budget_violations_total[5m])) by (phase)", "legendFormat": "{{phase}}"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user