Add comprehensive security tests for OWASP A02, A05, A07, and A08 categories
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Findings Ledger CI / build-test (push) Has been cancelled
Findings Ledger CI / migration-validation (push) Has been cancelled
Findings Ledger CI / generate-manifest (push) Has been cancelled
Manifest Integrity / Validate Schema Integrity (push) Has been cancelled
Lighthouse CI / Lighthouse Audit (push) Has been cancelled
Lighthouse CI / Axe Accessibility Audit (push) Has been cancelled
Manifest Integrity / Validate Contract Documents (push) Has been cancelled
Manifest Integrity / Validate Pack Fixtures (push) Has been cancelled
Manifest Integrity / Audit SHA256SUMS Files (push) Has been cancelled
Manifest Integrity / Verify Merkle Roots (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Findings Ledger CI / build-test (push) Has been cancelled
Findings Ledger CI / migration-validation (push) Has been cancelled
Findings Ledger CI / generate-manifest (push) Has been cancelled
Manifest Integrity / Validate Schema Integrity (push) Has been cancelled
Lighthouse CI / Lighthouse Audit (push) Has been cancelled
Lighthouse CI / Axe Accessibility Audit (push) Has been cancelled
Manifest Integrity / Validate Contract Documents (push) Has been cancelled
Manifest Integrity / Validate Pack Fixtures (push) Has been cancelled
Manifest Integrity / Audit SHA256SUMS Files (push) Has been cancelled
Manifest Integrity / Verify Merkle Roots (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
- Implemented tests for Cryptographic Failures (A02) to ensure proper handling of sensitive data, secure algorithms, and key management. - Added tests for Security Misconfiguration (A05) to validate production configurations, security headers, CORS settings, and feature management. - Developed tests for Authentication Failures (A07) to enforce strong password policies, rate limiting, session management, and MFA support. - Created tests for Software and Data Integrity Failures (A08) to verify artifact signatures, SBOM integrity, attestation chains, and feed updates.
This commit is contained in:
159
docs/modules/telemetry/operations/alerts/ttfs-alerts.yaml
Normal file
159
docs/modules/telemetry/operations/alerts/ttfs-alerts.yaml
Normal file
@@ -0,0 +1,159 @@
|
||||
# TTFS (Time to First Signal) Alert Rules
|
||||
# Reference: SPRINT_0341_0001_0001 Task T10
|
||||
# These alerts monitor SLOs for the TTFS experience
|
||||
|
||||
groups:
|
||||
- name: ttfs-slo
|
||||
interval: 30s
|
||||
rules:
|
||||
# Primary SLO: P95 latency must be under 5 seconds
|
||||
- alert: TtfsP95High
|
||||
expr: |
|
||||
histogram_quantile(0.95, sum(rate(ttfs_latency_seconds_bucket[5m])) by (le, surface)) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
component: ttfs
|
||||
slo: ttfs-latency
|
||||
annotations:
|
||||
summary: "TTFS P95 latency exceeds 5s for {{ $labels.surface }}"
|
||||
description: "Time to First Signal P95 is {{ $value | humanizeDuration }} for surface {{ $labels.surface }}. This breaches the TTFS SLO."
|
||||
runbook: "docs/runbooks/ttfs-latency-high.md"
|
||||
dashboard: "https://grafana.stellaops.local/d/ttfs-overview"
|
||||
|
||||
# Cache performance: Hit rate should be above 70%
|
||||
- alert: TtfsCacheHitRateLow
|
||||
expr: |
|
||||
sum(rate(ttfs_cache_hit_total[5m])) / sum(rate(ttfs_signal_total[5m])) < 0.7
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
component: ttfs
|
||||
annotations:
|
||||
summary: "TTFS cache hit rate below 70%"
|
||||
description: "Cache hit rate is {{ $value | humanizePercentage }}. Low cache hit rates increase TTFS latency."
|
||||
runbook: "docs/runbooks/ttfs-cache-performance.md"
|
||||
|
||||
# Error rate: Should be under 1%
|
||||
- alert: TtfsErrorRateHigh
|
||||
expr: |
|
||||
sum(rate(ttfs_error_total[5m])) / sum(rate(ttfs_signal_total[5m])) > 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: ttfs
|
||||
annotations:
|
||||
summary: "TTFS error rate exceeds 1%"
|
||||
description: "Error rate is {{ $value | humanizePercentage }}. Check logs for FirstSignalService errors."
|
||||
runbook: "docs/runbooks/ttfs-error-investigation.md"
|
||||
|
||||
# SLO breach counter: Too many breaches in a short window
|
||||
- alert: TtfsSloBreach
|
||||
expr: |
|
||||
sum(increase(ttfs_slo_breach_total[5m])) > 10
|
||||
for: 1m
|
||||
labels:
|
||||
severity: page
|
||||
component: ttfs
|
||||
slo: ttfs-breach-rate
|
||||
annotations:
|
||||
summary: "TTFS SLO breach rate high"
|
||||
description: "{{ $value }} SLO breaches in last 5 minutes. Immediate investigation required."
|
||||
runbook: "docs/runbooks/ttfs-slo-breach.md"
|
||||
|
||||
# Endpoint latency: HTTP endpoint should respond within 500ms
|
||||
- alert: FirstSignalEndpointLatencyHigh
|
||||
expr: |
|
||||
histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{route=~"/api/v1/orchestrator/runs/.*/first-signal"}[5m])) by (le)) > 0.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: ttfs
|
||||
annotations:
|
||||
summary: "First signal endpoint P95 latency > 500ms"
|
||||
description: "The /first-signal API endpoint P95 is {{ $value | humanizeDuration }}. This is the API-level latency only."
|
||||
runbook: "docs/runbooks/first-signal-api-slow.md"
|
||||
|
||||
- name: ttfs-availability
|
||||
interval: 1m
|
||||
rules:
|
||||
# Availability: First signal endpoint should be available
|
||||
- alert: FirstSignalEndpointDown
|
||||
expr: |
|
||||
up{job="orchestrator"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: ttfs
|
||||
annotations:
|
||||
summary: "Orchestrator (First Signal provider) is down"
|
||||
description: "The Orchestrator service is not responding. First Signal functionality is unavailable."
|
||||
runbook: "docs/runbooks/orchestrator-down.md"
|
||||
|
||||
# No signals being generated
|
||||
- alert: TtfsNoSignals
|
||||
expr: |
|
||||
sum(rate(ttfs_signal_total[10m])) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
component: ttfs
|
||||
annotations:
|
||||
summary: "No TTFS signals generated in 15 minutes"
|
||||
description: "No First Signal events have been recorded. This could indicate no active runs or a metric collection issue."
|
||||
|
||||
- name: ttfs-ux
|
||||
interval: 1m
|
||||
rules:
|
||||
# UX: High bounce rate indicates poor experience
|
||||
- alert: TtfsBounceRateHigh
|
||||
expr: |
|
||||
sum(rate(ttfs_bounce_total[5m])) / sum(rate(ttfs_page_view_total[5m])) > 0.5
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
component: ttfs
|
||||
area: ux
|
||||
annotations:
|
||||
summary: "TTFS page bounce rate exceeds 50%"
|
||||
description: "More than 50% of users are leaving the run page within 10 seconds. This may indicate poor First Signal experience."
|
||||
|
||||
# UX: Long open-to-action time
|
||||
- alert: TtfsOpenToActionSlow
|
||||
expr: |
|
||||
histogram_quantile(0.75, sum(rate(ttfs_open_to_action_seconds_bucket[15m])) by (le)) > 30
|
||||
for: 1h
|
||||
labels:
|
||||
severity: info
|
||||
component: ttfs
|
||||
area: ux
|
||||
annotations:
|
||||
summary: "75% of users take >30s to first action"
|
||||
description: "Users are taking a long time to act on First Signal. Consider UX improvements."
|
||||
|
||||
- name: ttfs-failure-signatures
|
||||
interval: 30s
|
||||
rules:
|
||||
# New failure pattern emerging
|
||||
- alert: TtfsNewFailurePatternHigh
|
||||
expr: |
|
||||
sum(rate(ttfs_failure_signature_new_total[5m])) > 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
component: ttfs
|
||||
annotations:
|
||||
summary: "High rate of new failure signatures"
|
||||
description: "New failure patterns are being detected at {{ $value }}/s. This may indicate a new class of errors."
|
||||
|
||||
# Failure signature confidence upgrades
|
||||
- alert: TtfsFailureSignatureConfidenceUpgrade
|
||||
expr: |
|
||||
sum(increase(ttfs_failure_signature_confidence_upgrade_total[1h])) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: info
|
||||
component: ttfs
|
||||
annotations:
|
||||
summary: "Multiple failure signatures upgraded to high confidence"
|
||||
description: "{{ $value }} failure signatures have been upgraded to high confidence in the last hour."
|
||||
@@ -0,0 +1,552 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Time to First Signal (TTFS) observability dashboard for StellaOps",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"title": "TTFS P50/P95/P99 by Surface",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 },
|
||||
"id": 1,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(ttfs_latency_seconds_bucket[5m])) by (le, surface))",
|
||||
"legendFormat": "P50 - {{surface}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(ttfs_latency_seconds_bucket[5m])) by (le, surface))",
|
||||
"legendFormat": "P95 - {{surface}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(ttfs_latency_seconds_bucket[5m])) by (le, surface))",
|
||||
"legendFormat": "P99 - {{surface}}",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": null, "color": "green" },
|
||||
{ "value": 2, "color": "yellow" },
|
||||
{ "value": 5, "color": "red" }
|
||||
]
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"calcs": ["mean", "max", "lastNotNull"]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Cache Hit Rate",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 },
|
||||
"id": 2,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ttfs_cache_hit_total[5m])) / sum(rate(ttfs_signal_total[5m]))",
|
||||
"legendFormat": "Hit Rate",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": null, "color": "red" },
|
||||
{ "value": 0.7, "color": "yellow" },
|
||||
{ "value": 0.9, "color": "green" }
|
||||
]
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"values": false,
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": ""
|
||||
},
|
||||
"orientation": "auto",
|
||||
"textMode": "auto",
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "SLO Breaches (P95 > 5s)",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 },
|
||||
"id": 3,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(ttfs_slo_breach_total[1h]))",
|
||||
"legendFormat": "Breaches (1h)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": null, "color": "green" },
|
||||
{ "value": 1, "color": "yellow" },
|
||||
{ "value": 10, "color": "red" }
|
||||
]
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"values": false,
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": ""
|
||||
},
|
||||
"orientation": "auto",
|
||||
"textMode": "auto",
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Signal Source Distribution",
|
||||
"type": "piechart",
|
||||
"gridPos": { "x": 12, "y": 4, "w": 6, "h": 4 },
|
||||
"id": 4,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (signal_source) (rate(ttfs_signal_total[1h]))",
|
||||
"legendFormat": "{{signal_source}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "right"
|
||||
},
|
||||
"pieType": "pie",
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Failure Signature Matches",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 18, "y": 4, "w": 6, "h": 4 },
|
||||
"id": 5,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ttfs_failure_signature_match_total[5m]))",
|
||||
"legendFormat": "Matches/s",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "reqps",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": null, "color": "blue" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Signals by Kind",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 0, "y": 8, "w": 12, "h": 6 },
|
||||
"id": 6,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (kind) (rate(ttfs_signal_total[5m]))",
|
||||
"legendFormat": "{{kind}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "reqps",
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 20,
|
||||
"stacking": {
|
||||
"mode": "normal",
|
||||
"group": "A"
|
||||
}
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Error Rate",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 12, "y": 8, "w": 12, "h": 6 },
|
||||
"id": 7,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ttfs_error_total[5m])) / sum(rate(ttfs_signal_total[5m]))",
|
||||
"legendFormat": "Error Rate",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"max": 0.1,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": null, "color": "green" },
|
||||
{ "value": 0.01, "color": "yellow" },
|
||||
{ "value": 0.05, "color": "red" }
|
||||
]
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 10
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "TTFS Latency Heatmap",
|
||||
"type": "heatmap",
|
||||
"gridPos": { "x": 0, "y": 14, "w": 12, "h": 8 },
|
||||
"id": 8,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(ttfs_latency_seconds_bucket[1m])) by (le)",
|
||||
"legendFormat": "{{le}}",
|
||||
"format": "heatmap",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"calculate": false,
|
||||
"yAxis": {
|
||||
"axisPlacement": "left",
|
||||
"unit": "s"
|
||||
},
|
||||
"color": {
|
||||
"scheme": "Spectral",
|
||||
"mode": "scheme"
|
||||
},
|
||||
"cellGap": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "First Signal Endpoint Latency",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 12, "y": 14, "w": 12, "h": 8 },
|
||||
"id": 9,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket{route=~\"/api/v1/orchestrator/runs/.*/first-signal\"}[5m])) by (le))",
|
||||
"legendFormat": "P50",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{route=~\"/api/v1/orchestrator/runs/.*/first-signal\"}[5m])) by (le))",
|
||||
"legendFormat": "P95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{route=~\"/api/v1/orchestrator/runs/.*/first-signal\"}[5m])) by (le))",
|
||||
"legendFormat": "P99",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": null, "color": "green" },
|
||||
{ "value": 0.3, "color": "yellow" },
|
||||
{ "value": 0.5, "color": "red" }
|
||||
]
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 10
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Open→Action Time Distribution",
|
||||
"type": "histogram",
|
||||
"gridPos": { "x": 0, "y": 22, "w": 8, "h": 6 },
|
||||
"id": 10,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(ttfs_open_to_action_seconds_bucket[5m])) by (le)",
|
||||
"legendFormat": "{{le}}",
|
||||
"format": "heatmap",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Bounce Rate (< 10s)",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 8, "y": 22, "w": 4, "h": 6 },
|
||||
"id": 11,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ttfs_bounce_total[5m])) / sum(rate(ttfs_page_view_total[5m]))",
|
||||
"legendFormat": "Bounce Rate",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": null, "color": "green" },
|
||||
{ "value": 0.3, "color": "yellow" },
|
||||
{ "value": 0.5, "color": "red" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Top Failure Signatures",
|
||||
"type": "table",
|
||||
"gridPos": { "x": 12, "y": 22, "w": 12, "h": 6 },
|
||||
"id": 12,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, sum by (error_token, error_code) (ttfs_failure_signature_hit_total))",
|
||||
"legendFormat": "{{error_token}} ({{error_code}})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"align": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Value" },
|
||||
"properties": [
|
||||
{ "id": "displayName", "value": "Hit Count" }
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"renameByName": {
|
||||
"error_token": "Token",
|
||||
"error_code": "Code"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["ttfs", "ux", "slo", "stellaops"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "Prometheus",
|
||||
"value": "prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Datasource",
|
||||
"multi": false,
|
||||
"name": "datasource",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"definition": "label_values(ttfs_latency_seconds_bucket, surface)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Surface",
|
||||
"multi": true,
|
||||
"name": "surface",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(ttfs_latency_seconds_bucket, surface)",
|
||||
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "utc",
|
||||
"title": "TTFS - Time to First Signal",
|
||||
"uid": "ttfs-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
Reference in New Issue
Block a user