Refactor code structure and optimize performance across multiple modules
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
groups:
|
||||
- name: ledger-observability
|
||||
interval: 30s
|
||||
rules:
|
||||
- alert: LedgerWriteLatencyHighP95
|
||||
expr: histogram_quantile(0.95, sum(rate(ledger_write_latency_seconds_bucket[5m])) by (le, tenant)) > 0.12
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Ledger write latency p95 high (tenant {{ $labels.tenant }})"
|
||||
description: "ledger_write_latency_seconds p95 > 120ms for >10m. Check DB/queue."
|
||||
|
||||
- alert: ProjectionLagHigh
|
||||
expr: max_over_time(ledger_projection_lag_seconds[10m]) > 30
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Ledger projection lag high"
|
||||
description: "projection lag over 30s; projections falling behind ingest."
|
||||
|
||||
- alert: MerkleAnchorFailures
|
||||
expr: sum(rate(ledger_merkle_anchor_failures_total[15m])) by (tenant, reason) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Merkle anchor failures (tenant {{ $labels.tenant }})"
|
||||
description: "Anchoring failures detected (reason={{ $labels.reason }}). Investigate signing/storage."
|
||||
|
||||
- alert: AttachmentFailures
|
||||
expr: sum(rate(ledger_attachments_encryption_failures_total[10m])) by (tenant, stage) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Attachment pipeline failures (tenant {{ $labels.tenant }}, stage {{ $labels.stage }})"
|
||||
description: "Attachment encryption/sign/upload reported failures in the last 10m."
|
||||
Reference in New Issue
Block a user