Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
55 lines
1.9 KiB
YAML
55 lines
1.9 KiB
YAML
groups:
|
|
- name: signals-pipeline
|
|
rules:
|
|
- alert: SignalsScoringLatencyP95High
|
|
expr: histogram_quantile(0.95, sum(rate(signals_reachability_scoring_duration_seconds_bucket[5m])) by (le)) > 2
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
service: signals
|
|
annotations:
|
|
summary: "Signals scoring latency high (p95)"
|
|
description: "Reachability scoring p95 exceeds 2s for 10m"
|
|
|
|
- alert: SignalsCacheMissRateHigh
|
|
expr: |
|
|
clamp_min(rate(signals_cache_misses_total[5m]), 0)
|
|
/ clamp_min(rate(signals_cache_hits_total[5m]) + rate(signals_cache_misses_total[5m]), 1) > 0.3
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
service: signals
|
|
annotations:
|
|
summary: "Signals cache miss rate high"
|
|
description: "Cache miss ratio >30% over 10m; investigate Redis or key churn."
|
|
|
|
- alert: SignalsCacheDown
|
|
expr: signals_cache_available == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
service: signals
|
|
annotations:
|
|
summary: "Signals cache unavailable"
|
|
description: "Redis cache reported unavailable for >2m"
|
|
|
|
- alert: SignalsSensorStaleness
|
|
expr: time() - max(signals_sensor_last_seen_timestamp_seconds) by (sensor) > 900
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
service: signals
|
|
annotations:
|
|
summary: "Signals sensor stale"
|
|
description: "No updates from sensor for >15 minutes"
|
|
|
|
- alert: SignalsIngestionErrorRate
|
|
expr: clamp_min(rate(signals_ingestion_failures_total[5m]), 0) / clamp_min(rate(signals_ingestion_total[5m]), 1) > 0.05
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
service: signals
|
|
annotations:
|
|
summary: "Signals ingestion failures elevated"
|
|
description: "Ingestion failure ratio above 5% over 5m"
|