groups: - name: ledger-observability interval: 30s rules: - alert: LedgerWriteLatencyHighP95 expr: histogram_quantile(0.95, sum(rate(ledger_write_latency_seconds_bucket[5m])) by (le, tenant)) > 0.12 for: 10m labels: severity: warning annotations: summary: "Ledger write latency p95 high (tenant {{ $labels.tenant }})" description: "ledger_write_latency_seconds p95 > 120ms for >10m. Check DB/queue." - alert: ProjectionLagHigh expr: max_over_time(ledger_projection_lag_seconds[10m]) > 30 for: 10m labels: severity: critical annotations: summary: "Ledger projection lag high" description: "projection lag over 30s; projections falling behind ingest." - alert: MerkleAnchorFailures expr: sum(rate(ledger_merkle_anchor_failures_total[15m])) by (tenant, reason) > 0 for: 15m labels: severity: critical annotations: summary: "Merkle anchor failures (tenant {{ $labels.tenant }})" description: "Anchoring failures detected (reason={{ $labels.reason }}). Investigate signing/storage." - alert: AttachmentFailures expr: sum(rate(ledger_attachments_encryption_failures_total[10m])) by (tenant, stage) > 0 for: 10m labels: severity: warning annotations: summary: "Attachment pipeline failures (tenant {{ $labels.tenant }}, stage {{ $labels.stage }})" description: "Attachment encryption/sign/upload reported failures in the last 10m."