135 lines
3.8 KiB
Bash
135 lines
3.8 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Incident mode automation
|
|
# - Enables a feature-flag JSON when burn rate crosses threshold
|
|
# - Writes retention override parameters for downstream storage/ingest systems
|
|
# - Resets automatically after a cooldown period once burn subsides
|
|
# All inputs are provided via CLI flags or env vars to remain offline-friendly.
|
|
|
|
usage() {
|
|
cat <<'USAGE'
|
|
Usage: incident-mode.sh --burn-rate <float> [--threshold 2.0] [--reset-threshold 0.5] \
|
|
[--state-dir out/incident-mode] [--retention-hours 24] \
|
|
[--cooldown-mins 30] [--note "text"]
|
|
|
|
Environment overrides:
|
|
INCIDENT_STATE_DIR default: out/incident-mode
|
|
INCIDENT_THRESHOLD default: 2.0 (fast burn multiple)
|
|
INCIDENT_RESET_TH default: 0.5 (burn multiple to exit)
|
|
INCIDENT_COOLDOWN default: 30 (minutes below reset threshold)
|
|
INCIDENT_RETENTION_H default: 24 (hours)
|
|
|
|
Outputs (in state dir):
|
|
flag.json feature flag payload (enabled/disabled + metadata)
|
|
retention.json retention override (hours, applied_at)
|
|
last_burn.txt last burn rate observed
|
|
cooldown.txt consecutive minutes below reset threshold
|
|
|
|
Examples:
|
|
incident-mode.sh --burn-rate 3.1 --note "fast burn" # enter incident mode
|
|
incident-mode.sh --burn-rate 0.2 # progress cooldown / exit
|
|
USAGE
|
|
}
|
|
|
|
if [[ $# -eq 0 ]]; then usage; exit 1; fi
|
|
|
|
BURN_RATE=""
|
|
NOTE=""
|
|
STATE_DIR=${INCIDENT_STATE_DIR:-out/incident-mode}
|
|
THRESHOLD=${INCIDENT_THRESHOLD:-2.0}
|
|
RESET_TH=${INCIDENT_RESET_TH:-0.5}
|
|
COOLDOWN_MINS=${INCIDENT_COOLDOWN:-30}
|
|
RETENTION_H=${INCIDENT_RETENTION_H:-24}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--burn-rate) BURN_RATE="$2"; shift 2;;
|
|
--threshold) THRESHOLD="$2"; shift 2;;
|
|
--reset-threshold) RESET_TH="$2"; shift 2;;
|
|
--state-dir) STATE_DIR="$2"; shift 2;;
|
|
--retention-hours) RETENTION_H="$2"; shift 2;;
|
|
--cooldown-mins) COOLDOWN_MINS="$2"; shift 2;;
|
|
--note) NOTE="$2"; shift 2;;
|
|
-h|--help) usage; exit 0;;
|
|
*) echo "Unknown arg: $1" >&2; usage; exit 1;;
|
|
esac
|
|
done
|
|
|
|
if [[ -z "$BURN_RATE" ]]; then echo "--burn-rate is required" >&2; exit 1; fi
|
|
mkdir -p "$STATE_DIR"
|
|
FLAG_FILE="$STATE_DIR/flag.json"
|
|
RET_FILE="$STATE_DIR/retention.json"
|
|
LAST_FILE="$STATE_DIR/last_burn.txt"
|
|
COOLDOWN_FILE="$STATE_DIR/cooldown.txt"
|
|
|
|
jq_escape() { python - <<PY "$1"
|
|
import json,sys
|
|
print(json.dumps(sys.argv[1]))
|
|
PY
|
|
}
|
|
|
|
now_utc=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
burn_float=$(python - <<PY "$BURN_RATE"
|
|
import sys
|
|
print(float(sys.argv[1]))
|
|
PY)
|
|
|
|
cooldown_current=0
|
|
if [[ -f "$COOLDOWN_FILE" ]]; then
|
|
cooldown_current=$(cat "$COOLDOWN_FILE")
|
|
fi
|
|
|
|
enter_incident=false
|
|
exit_incident=false
|
|
|
|
if (( $(echo "$burn_float >= $THRESHOLD" | bc -l) )); then
|
|
enter_incident=true
|
|
cooldown_current=0
|
|
elif (( $(echo "$burn_float <= $RESET_TH" | bc -l) )); then
|
|
cooldown_current=$((cooldown_current + 1))
|
|
if (( cooldown_current >= COOLDOWN_MINS )); then
|
|
exit_incident=true
|
|
fi
|
|
else
|
|
cooldown_current=0
|
|
fi
|
|
|
|
echo "$burn_float" > "$LAST_FILE"
|
|
echo "$cooldown_current" > "$COOLDOWN_FILE"
|
|
|
|
write_flag() {
|
|
local enabled="$1"
|
|
cat > "$FLAG_FILE" <<JSON
|
|
{
|
|
"enabled": $enabled,
|
|
"updated_at": "$now_utc",
|
|
"reason": "incident-mode",
|
|
"note": $(jq_escape "$NOTE"),
|
|
"burn_rate": $burn_float
|
|
}
|
|
JSON
|
|
}
|
|
|
|
if $enter_incident; then
|
|
write_flag true
|
|
cat > "$RET_FILE" <<JSON
|
|
{
|
|
"retention_hours": $RETENTION_H,
|
|
"applied_at": "$now_utc"
|
|
}
|
|
JSON
|
|
echo "incident-mode: activated (burn_rate=$burn_float)" >&2
|
|
elif $exit_incident; then
|
|
write_flag false
|
|
echo "incident-mode: cleared after cooldown (burn_rate=$burn_float)" >&2
|
|
else
|
|
# no change; preserve prior flag if exists
|
|
if [[ ! -f "$FLAG_FILE" ]]; then
|
|
write_flag false
|
|
fi
|
|
echo "incident-mode: steady (burn_rate=$burn_float, cooldown=$cooldown_current/$COOLDOWN_MINS)" >&2
|
|
fi
|
|
|
|
exit 0
|