up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Symbols Server CI / symbols-smoke (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Symbols Server CI / symbols-smoke (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
This commit is contained in:
21
ops/devops/symbols/alerts.yaml
Normal file
21
ops/devops/symbols/alerts.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
groups:
|
||||
- name: symbols-availability
|
||||
rules:
|
||||
- alert: SymbolsDown
|
||||
expr: up{job="symbols"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
service: symbols
|
||||
annotations:
|
||||
summary: "Symbols.Server instance is down"
|
||||
description: "symbols scrape target has been down for 5 minutes"
|
||||
- alert: SymbolsErrorRateHigh
|
||||
expr: rate(http_requests_total{job="symbols",status=~"5.."}[5m]) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
service: symbols
|
||||
annotations:
|
||||
summary: "Symbols.Server error rate is elevated"
|
||||
description: "5xx responses detected for Symbols.Server"
|
||||
Reference in New Issue
Block a user