- Introduced `BinaryReachabilityLifterTests` to validate binary lifting functionality. - Created `PackRunWorkerOptions` for configuring worker paths and execution persistence. - Added `TimelineIngestionOptions` for configuring NATS and Redis ingestion transports. - Implemented `NatsTimelineEventSubscriber` for subscribing to NATS events. - Developed `RedisTimelineEventSubscriber` for reading from Redis Streams. - Added `TimelineEnvelopeParser` to normalize incoming event envelopes. - Created unit tests for `TimelineEnvelopeParser` to ensure correct field mapping. - Implemented `TimelineAuthorizationAuditSink` for logging authorization outcomes.
46 lines
1.6 KiB
YAML
46 lines
1.6 KiB
YAML
# Alert rules for tenant audit & auth (DEVOPS-TEN-49-001)
|
|
apiVersion: 1
|
|
groups:
|
|
- name: tenant-audit
|
|
rules:
|
|
- alert: tenant_error_rate_gt_0_5pct
|
|
expr: sum(rate(tenant_requests_total{status=~"5.."}[5m])) / sum(rate(tenant_requests_total[5m])) > 0.005
|
|
for: 5m
|
|
labels:
|
|
severity: page
|
|
annotations:
|
|
summary: Tenant error rate high
|
|
description: Error rate across tenant-labelled requests exceeds 0.5%.
|
|
- alert: jwks_cache_miss_spike
|
|
expr: rate(auth_jwks_cache_misses_total[5m]) / (rate(auth_jwks_cache_hits_total[5m]) + rate(auth_jwks_cache_misses_total[5m])) > 0.2
|
|
for: 5m
|
|
labels:
|
|
severity: warn
|
|
annotations:
|
|
summary: JWKS cache miss rate spike
|
|
description: JWKS miss ratio above 20% may indicate outage or cache expiry.
|
|
- alert: tenant_latency_p95_high
|
|
expr: tenant_latency_p95:5m > 0.6
|
|
for: 10m
|
|
labels:
|
|
severity: warn
|
|
annotations:
|
|
summary: Tenant p95 latency high
|
|
description: Per-tenant p95 latency over 600ms for 10m.
|
|
- alert: tenant_rate_limit_exceeded
|
|
expr: rate(tenant_rate_limit_hits_total[5m]) > 10
|
|
for: 5m
|
|
labels:
|
|
severity: warn
|
|
annotations:
|
|
summary: Frequent rate limit hits
|
|
description: Tenant rate limit exceeded more than 10 times per 5m window.
|
|
- alert: tenant_auth_failures_spike
|
|
expr: rate(auth_token_validation_failures_total{tenant!=""}[5m]) > 5
|
|
for: 5m
|
|
labels:
|
|
severity: page
|
|
annotations:
|
|
summary: Tenant auth failures elevated
|
|
description: Token validation failures exceed 5 per 5m for tenant-scoped traffic.
|