doctor enhancements, setup, enhancements, ui functionality and design consolidation and , test projects fixes , product advisory attestation/rekor and delta verfications enhancements

This commit is contained in:
master
2026-01-19 09:02:59 +02:00
parent 8c4bf54aed
commit 17419ba7c4
809 changed files with 170738 additions and 12244 deletions

View File

@@ -0,0 +1,186 @@
# Unknowns Queue Alert Rules
# Sprint: SPRINT_20260118_018_Unknowns_queue_enhancement (UQ-007)
#
# Deploy to Prometheus/Alertmanager
groups:
- name: unknowns-queue
interval: 1m
rules:
# =============================================================================
# SLA Alerts
# =============================================================================
- alert: UnknownsSlaBreachCritical
expr: unknowns_sla_compliance < 0.80
for: 5m
labels:
severity: critical
team: security
annotations:
summary: "SLA compliance dropped below 80%"
description: |
SLA compliance is {{ $value | humanizePercentage }}.
Multiple unknowns have breached their SLA deadlines.
Immediate action required.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#sla-breach"
- alert: UnknownsSlaBreachWarning
expr: unknowns_sla_compliance < 0.95 and unknowns_sla_compliance >= 0.80
for: 15m
labels:
severity: warning
team: security
annotations:
summary: "SLA compliance below 95%"
description: |
SLA compliance is {{ $value | humanizePercentage }}.
Some unknowns are approaching or have breached SLA.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#sla-warning"
- alert: UnknownsSlaBreach
expr: increase(unknowns_sla_breach_total[1h]) > 0
for: 0m
labels:
severity: critical
team: security
annotations:
summary: "Unknown SLA breached"
description: |
{{ $value }} unknown(s) have breached SLA in the last hour.
Check the unknowns queue dashboard for affected entries.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#sla-breach"
# =============================================================================
# Queue Depth Alerts
# =============================================================================
- alert: UnknownsHotQueueHigh
expr: unknowns_queue_depth_hot > 5
for: 10m
labels:
severity: critical
team: security
annotations:
summary: "High number of HOT unknowns"
description: |
{{ $value }} HOT unknowns in queue.
HOT unknowns have 24-hour SLA and block releases.
Prioritize resolution immediately.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#hot-queue"
- alert: UnknownsHotQueuePresent
expr: unknowns_queue_depth_hot > 0
for: 1h
labels:
severity: warning
team: security
annotations:
summary: "HOT unknowns present for over 1 hour"
description: |
{{ $value }} HOT unknown(s) have been in queue for over 1 hour.
50% of 24-hour SLA elapsed.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#hot-queue"
- alert: UnknownsQueueBacklog
expr: (unknowns_queue_depth_hot + unknowns_queue_depth_warm + unknowns_queue_depth_cold) > 100
for: 30m
labels:
severity: warning
team: operations
annotations:
summary: "Unknowns queue backlog growing"
description: |
Total queue depth is {{ $value }}.
Consider scaling processing capacity or reviewing automation.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#backlog"
# =============================================================================
# Processing Alerts
# =============================================================================
- alert: UnknownsStuckProcessing
expr: greyqueue_processing_count > 10
for: 30m
labels:
severity: warning
team: operations
annotations:
summary: "Many entries stuck in processing"
description: |
{{ $value }} entries in Processing status for extended period.
Check for processing bottlenecks or failures.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#stuck-processing"
- alert: UnknownsProcessingTimeout
expr: increase(greyqueue_timeout_total[1h]) > 5
for: 0m
labels:
severity: warning
team: operations
annotations:
summary: "Processing timeouts occurring"
description: |
{{ $value }} processing timeouts in the last hour.
Entries are being forcefully retried.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#timeouts"
- alert: UnknownsProcessingFailures
expr: increase(greyqueue_watchdog_failed_total[1h]) > 0
for: 0m
labels:
severity: critical
team: operations
annotations:
summary: "Processing failures detected"
description: |
{{ $value }} entries moved to Failed status in the last hour.
Manual intervention may be required.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#failures"
# =============================================================================
# Escalation Alerts
# =============================================================================
- alert: UnknownsEscalationRate
expr: increase(unknowns_escalated_total[1h]) > 10
for: 0m
labels:
severity: warning
team: security
annotations:
summary: "High escalation rate"
description: |
{{ $value }} unknowns escalated in the last hour.
Review escalation criteria or upstream data quality.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#escalations"
# =============================================================================
# Service Health Alerts
# =============================================================================
- alert: UnknownsSlaMonitorDown
expr: absent(unknowns_queue_depth_hot) and absent(unknowns_queue_depth_warm)
for: 5m
labels:
severity: critical
team: operations
annotations:
summary: "Unknowns SLA monitor not reporting"
description: |
No metrics received from unknowns SLA monitor.
Check if the service is running.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#service-down"
- alert: UnknownsHealthCheckUnhealthy
expr: probe_success{job="unknowns-healthcheck"} == 0
for: 5m
labels:
severity: critical
team: operations
annotations:
summary: "Unknowns service health check failing"
description: |
Health check endpoint returning unhealthy.
SLA breaches may exist.
runbook_url: "https://docs.stella-ops.org/operations/unknowns-queue-runbook#health-check"