Restructure solution layout by module
This commit is contained in:
		@@ -1,31 +1,31 @@
 | 
			
		||||
groups:
 | 
			
		||||
  - name: zastava-runtime
 | 
			
		||||
    interval: 30s
 | 
			
		||||
    rules:
 | 
			
		||||
      - alert: ZastavaRuntimeEventsSilent
 | 
			
		||||
        expr: sum(rate(zastava_runtime_events_total[10m])) == 0
 | 
			
		||||
        for: 15m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
          service: zastava-runtime
 | 
			
		||||
        annotations:
 | 
			
		||||
          summary: "Observer events stalled"
 | 
			
		||||
          description: "No runtime events emitted in the last 15 minutes. Check observer DaemonSet health and container runtime mounts."
 | 
			
		||||
      - alert: ZastavaRuntimeBackendLatencyHigh
 | 
			
		||||
        expr: histogram_quantile(0.95, sum by (le) (rate(zastava_runtime_backend_latency_ms_bucket[5m]))) > 0.75
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
          service: zastava-runtime
 | 
			
		||||
        annotations:
 | 
			
		||||
          summary: "Runtime backend latency p95 above 750 ms"
 | 
			
		||||
          description: "Latency to Scanner runtime APIs is elevated. Inspect Scanner.WebService readiness, Authority OpTok issuance, and cluster network."
 | 
			
		||||
      - alert: ZastavaAdmissionDenySpike
 | 
			
		||||
        expr: sum(rate(zastava_admission_decisions_total{decision="deny"}[5m])) > 20
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
          service: zastava-runtime
 | 
			
		||||
        annotations:
 | 
			
		||||
          summary: "Admission webhook denies exceeding threshold"
 | 
			
		||||
          description: "Webhook is denying more than 20 pod admissions per minute. Confirm policy verdicts and consider fail-open exception for impacted namespaces."
 | 
			
		||||
groups:
 | 
			
		||||
  - name: zastava-runtime
 | 
			
		||||
    interval: 30s
 | 
			
		||||
    rules:
 | 
			
		||||
      - alert: ZastavaRuntimeEventsSilent
 | 
			
		||||
        expr: sum(rate(zastava_runtime_events_total[10m])) == 0
 | 
			
		||||
        for: 15m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
          service: zastava-runtime
 | 
			
		||||
        annotations:
 | 
			
		||||
          summary: "Observer events stalled"
 | 
			
		||||
          description: "No runtime events emitted in the last 15 minutes. Check observer DaemonSet health and container runtime mounts."
 | 
			
		||||
      - alert: ZastavaRuntimeBackendLatencyHigh
 | 
			
		||||
        expr: histogram_quantile(0.95, sum by (le) (rate(zastava_runtime_backend_latency_ms_bucket[5m]))) > 0.75
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
          service: zastava-runtime
 | 
			
		||||
        annotations:
 | 
			
		||||
          summary: "Runtime backend latency p95 above 750 ms"
 | 
			
		||||
          description: "Latency to Scanner runtime APIs is elevated. Inspect Scanner.WebService readiness, Authority OpTok issuance, and cluster network."
 | 
			
		||||
      - alert: ZastavaAdmissionDenySpike
 | 
			
		||||
        expr: sum(rate(zastava_admission_decisions_total{decision="deny"}[5m])) > 20
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
          service: zastava-runtime
 | 
			
		||||
        annotations:
 | 
			
		||||
          summary: "Admission webhook denies exceeding threshold"
 | 
			
		||||
          description: "Webhook is denying more than 20 pod admissions per minute. Confirm policy verdicts and consider fail-open exception for impacted namespaces."
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user