Add comprehensive security tests for OWASP A03 (Injection) and A10 (SSRF)
- Implemented InjectionTests.cs to cover various injection vulnerabilities including SQL, NoSQL, Command, LDAP, and XPath injections. - Created SsrfTests.cs to test for Server-Side Request Forgery (SSRF) vulnerabilities, including internal URL access, cloud metadata access, and URL allowlist bypass attempts. - Introduced MaliciousPayloads.cs to store a collection of malicious payloads for testing various security vulnerabilities. - Added SecurityAssertions.cs for common security-specific assertion helpers. - Established SecurityTestBase.cs as a base class for security tests, providing common infrastructure and mocking utilities. - Configured the test project StellaOps.Security.Tests.csproj with necessary dependencies for testing.
This commit is contained in:
287
scripts/ci/compute-reachability-metrics.sh
Normal file
287
scripts/ci/compute-reachability-metrics.sh
Normal file
@@ -0,0 +1,287 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# compute-reachability-metrics.sh
|
||||
# Computes reachability metrics against ground-truth corpus
|
||||
#
|
||||
# Usage: ./compute-reachability-metrics.sh [options]
|
||||
# --corpus-path PATH Path to ground-truth corpus (default: tests/reachability/corpus)
|
||||
# --output FILE Output JSON file (default: stdout)
|
||||
# --dry-run Show what would be computed without running scanner
|
||||
# --strict Exit non-zero if any threshold is violated
|
||||
# --verbose Enable verbose output
|
||||
#
|
||||
# Output: JSON with recall, precision, accuracy metrics per vulnerability class
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
|
||||
# Default paths
|
||||
CORPUS_PATH="${REPO_ROOT}/tests/reachability/corpus"
|
||||
OUTPUT_FILE=""
|
||||
DRY_RUN=false
|
||||
STRICT=false
|
||||
VERBOSE=false
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--corpus-path)
|
||||
CORPUS_PATH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--output)
|
||||
OUTPUT_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--strict)
|
||||
STRICT=true
|
||||
shift
|
||||
;;
|
||||
--verbose)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
head -20 "$0" | tail -15
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
log() {
|
||||
if [[ "${VERBOSE}" == "true" ]]; then
|
||||
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
# Validate corpus exists
|
||||
if [[ ! -d "${CORPUS_PATH}" ]]; then
|
||||
error "Corpus directory not found: ${CORPUS_PATH}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
MANIFEST_FILE="${CORPUS_PATH}/manifest.json"
|
||||
if [[ ! -f "${MANIFEST_FILE}" ]]; then
|
||||
error "Corpus manifest not found: ${MANIFEST_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Loading corpus from ${CORPUS_PATH}"
|
||||
log "Manifest: ${MANIFEST_FILE}"
|
||||
|
||||
# Initialize counters for each vulnerability class
|
||||
declare -A true_positives
|
||||
declare -A false_positives
|
||||
declare -A false_negatives
|
||||
declare -A total_expected
|
||||
|
||||
CLASSES=("runtime_dep" "os_pkg" "code" "config")
|
||||
|
||||
for class in "${CLASSES[@]}"; do
|
||||
true_positives[$class]=0
|
||||
false_positives[$class]=0
|
||||
false_negatives[$class]=0
|
||||
total_expected[$class]=0
|
||||
done
|
||||
|
||||
if [[ "${DRY_RUN}" == "true" ]]; then
|
||||
log "[DRY RUN] Would process corpus fixtures..."
|
||||
|
||||
# Generate mock metrics for dry-run
|
||||
cat <<EOF
|
||||
{
|
||||
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
|
||||
"corpus_path": "${CORPUS_PATH}",
|
||||
"dry_run": true,
|
||||
"metrics": {
|
||||
"runtime_dep": {
|
||||
"recall": 0.96,
|
||||
"precision": 0.94,
|
||||
"f1_score": 0.95,
|
||||
"total_expected": 100,
|
||||
"true_positives": 96,
|
||||
"false_positives": 6,
|
||||
"false_negatives": 4
|
||||
},
|
||||
"os_pkg": {
|
||||
"recall": 0.98,
|
||||
"precision": 0.97,
|
||||
"f1_score": 0.975,
|
||||
"total_expected": 50,
|
||||
"true_positives": 49,
|
||||
"false_positives": 2,
|
||||
"false_negatives": 1
|
||||
},
|
||||
"code": {
|
||||
"recall": 0.92,
|
||||
"precision": 0.90,
|
||||
"f1_score": 0.91,
|
||||
"total_expected": 25,
|
||||
"true_positives": 23,
|
||||
"false_positives": 3,
|
||||
"false_negatives": 2
|
||||
},
|
||||
"config": {
|
||||
"recall": 0.88,
|
||||
"precision": 0.85,
|
||||
"f1_score": 0.865,
|
||||
"total_expected": 20,
|
||||
"true_positives": 18,
|
||||
"false_positives": 3,
|
||||
"false_negatives": 2
|
||||
}
|
||||
},
|
||||
"aggregate": {
|
||||
"overall_recall": 0.9538,
|
||||
"overall_precision": 0.9302,
|
||||
"reachability_accuracy": 0.9268
|
||||
}
|
||||
}
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Process each fixture in the corpus
|
||||
log "Processing corpus fixtures..."
|
||||
|
||||
# Read manifest and iterate fixtures
|
||||
FIXTURE_COUNT=$(jq -r '.fixtures | length' "${MANIFEST_FILE}")
|
||||
log "Found ${FIXTURE_COUNT} fixtures"
|
||||
|
||||
for i in $(seq 0 $((FIXTURE_COUNT - 1))); do
|
||||
FIXTURE_ID=$(jq -r ".fixtures[$i].id" "${MANIFEST_FILE}")
|
||||
FIXTURE_PATH="${CORPUS_PATH}/$(jq -r ".fixtures[$i].path" "${MANIFEST_FILE}")"
|
||||
FIXTURE_CLASS=$(jq -r ".fixtures[$i].class" "${MANIFEST_FILE}")
|
||||
EXPECTED_REACHABLE=$(jq -r ".fixtures[$i].expected_reachable // 0" "${MANIFEST_FILE}")
|
||||
EXPECTED_UNREACHABLE=$(jq -r ".fixtures[$i].expected_unreachable // 0" "${MANIFEST_FILE}")
|
||||
|
||||
log "Processing fixture: ${FIXTURE_ID} (class: ${FIXTURE_CLASS})"
|
||||
|
||||
if [[ ! -d "${FIXTURE_PATH}" ]] && [[ ! -f "${FIXTURE_PATH}" ]]; then
|
||||
error "Fixture not found: ${FIXTURE_PATH}"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Update expected counts
|
||||
total_expected[$FIXTURE_CLASS]=$((${total_expected[$FIXTURE_CLASS]} + EXPECTED_REACHABLE))
|
||||
|
||||
# Run scanner on fixture (deterministic mode, offline)
|
||||
SCAN_RESULT_FILE=$(mktemp)
|
||||
trap "rm -f ${SCAN_RESULT_FILE}" EXIT
|
||||
|
||||
if dotnet run --project "${REPO_ROOT}/src/Scanner/StellaOps.Scanner.Cli" -- \
|
||||
scan --input "${FIXTURE_PATH}" \
|
||||
--output "${SCAN_RESULT_FILE}" \
|
||||
--deterministic \
|
||||
--offline \
|
||||
--format json \
|
||||
2>/dev/null; then
|
||||
|
||||
# Parse scanner results
|
||||
DETECTED_REACHABLE=$(jq -r '[.findings[] | select(.reachable == true)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0")
|
||||
DETECTED_UNREACHABLE=$(jq -r '[.findings[] | select(.reachable == false)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0")
|
||||
|
||||
# Calculate TP, FP, FN for this fixture
|
||||
TP=$((DETECTED_REACHABLE < EXPECTED_REACHABLE ? DETECTED_REACHABLE : EXPECTED_REACHABLE))
|
||||
FP=$((DETECTED_REACHABLE > EXPECTED_REACHABLE ? DETECTED_REACHABLE - EXPECTED_REACHABLE : 0))
|
||||
FN=$((EXPECTED_REACHABLE - TP))
|
||||
|
||||
true_positives[$FIXTURE_CLASS]=$((${true_positives[$FIXTURE_CLASS]} + TP))
|
||||
false_positives[$FIXTURE_CLASS]=$((${false_positives[$FIXTURE_CLASS]} + FP))
|
||||
false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + FN))
|
||||
else
|
||||
error "Scanner failed for fixture: ${FIXTURE_ID}"
|
||||
false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + EXPECTED_REACHABLE))
|
||||
fi
|
||||
done
|
||||
|
||||
# Calculate metrics per class
|
||||
calculate_metrics() {
|
||||
local class=$1
|
||||
local tp=${true_positives[$class]}
|
||||
local fp=${false_positives[$class]}
|
||||
local fn=${false_negatives[$class]}
|
||||
local total=${total_expected[$class]}
|
||||
|
||||
local recall=0
|
||||
local precision=0
|
||||
local f1=0
|
||||
|
||||
if [[ $((tp + fn)) -gt 0 ]]; then
|
||||
recall=$(echo "scale=4; $tp / ($tp + $fn)" | bc)
|
||||
fi
|
||||
|
||||
if [[ $((tp + fp)) -gt 0 ]]; then
|
||||
precision=$(echo "scale=4; $tp / ($tp + $fp)" | bc)
|
||||
fi
|
||||
|
||||
if (( $(echo "$recall + $precision > 0" | bc -l) )); then
|
||||
f1=$(echo "scale=4; 2 * $recall * $precision / ($recall + $precision)" | bc)
|
||||
fi
|
||||
|
||||
echo "{\"recall\": $recall, \"precision\": $precision, \"f1_score\": $f1, \"total_expected\": $total, \"true_positives\": $tp, \"false_positives\": $fp, \"false_negatives\": $fn}"
|
||||
}
|
||||
|
||||
# Generate output JSON
|
||||
OUTPUT=$(cat <<EOF
|
||||
{
|
||||
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
|
||||
"corpus_path": "${CORPUS_PATH}",
|
||||
"dry_run": false,
|
||||
"metrics": {
|
||||
"runtime_dep": $(calculate_metrics "runtime_dep"),
|
||||
"os_pkg": $(calculate_metrics "os_pkg"),
|
||||
"code": $(calculate_metrics "code"),
|
||||
"config": $(calculate_metrics "config")
|
||||
},
|
||||
"aggregate": {
|
||||
"overall_recall": $(echo "scale=4; (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]}) / (${total_expected[runtime_dep]} + ${total_expected[os_pkg]} + ${total_expected[code]} + ${total_expected[config]} + 0.0001)" | bc),
|
||||
"overall_precision": $(echo "scale=4; (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]}) / (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]} + ${false_positives[runtime_dep]} + ${false_positives[os_pkg]} + ${false_positives[code]} + ${false_positives[config]} + 0.0001)" | bc)
|
||||
}
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
# Output results
|
||||
if [[ -n "${OUTPUT_FILE}" ]]; then
|
||||
echo "${OUTPUT}" > "${OUTPUT_FILE}"
|
||||
log "Results written to ${OUTPUT_FILE}"
|
||||
else
|
||||
echo "${OUTPUT}"
|
||||
fi
|
||||
|
||||
# Check thresholds in strict mode
|
||||
if [[ "${STRICT}" == "true" ]]; then
|
||||
THRESHOLDS_FILE="${SCRIPT_DIR}/reachability-thresholds.yaml"
|
||||
if [[ -f "${THRESHOLDS_FILE}" ]]; then
|
||||
log "Checking thresholds from ${THRESHOLDS_FILE}"
|
||||
|
||||
# Extract thresholds and check
|
||||
MIN_RECALL=$(yq -r '.thresholds.runtime_dependency_recall.min // 0.95' "${THRESHOLDS_FILE}")
|
||||
ACTUAL_RECALL=$(echo "${OUTPUT}" | jq -r '.metrics.runtime_dep.recall')
|
||||
|
||||
if (( $(echo "$ACTUAL_RECALL < $MIN_RECALL" | bc -l) )); then
|
||||
error "Runtime dependency recall ${ACTUAL_RECALL} below threshold ${MIN_RECALL}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "All thresholds passed"
|
||||
fi
|
||||
fi
|
||||
|
||||
exit 0
|
||||
313
scripts/ci/compute-ttfs-metrics.sh
Normal file
313
scripts/ci/compute-ttfs-metrics.sh
Normal file
@@ -0,0 +1,313 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# compute-ttfs-metrics.sh
|
||||
# Computes Time-to-First-Signal (TTFS) metrics from test runs
|
||||
#
|
||||
# Usage: ./compute-ttfs-metrics.sh [options]
|
||||
# --results-path PATH Path to test results directory
|
||||
# --output FILE Output JSON file (default: stdout)
|
||||
# --baseline FILE Baseline TTFS file for comparison
|
||||
# --dry-run Show what would be computed
|
||||
# --strict Exit non-zero if thresholds are violated
|
||||
# --verbose Enable verbose output
|
||||
#
|
||||
# Output: JSON with TTFS p50, p95, p99 metrics and regression status
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
|
||||
# Default paths
|
||||
RESULTS_PATH="${REPO_ROOT}/bench/results"
|
||||
OUTPUT_FILE=""
|
||||
BASELINE_FILE="${REPO_ROOT}/bench/baselines/ttfs-baseline.json"
|
||||
DRY_RUN=false
|
||||
STRICT=false
|
||||
VERBOSE=false
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--results-path)
|
||||
RESULTS_PATH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--output)
|
||||
OUTPUT_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--baseline)
|
||||
BASELINE_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--strict)
|
||||
STRICT=true
|
||||
shift
|
||||
;;
|
||||
--verbose)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
head -20 "$0" | tail -15
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
log() {
|
||||
if [[ "${VERBOSE}" == "true" ]]; then
|
||||
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
warn() {
|
||||
echo "[WARN] $*" >&2
|
||||
}
|
||||
|
||||
# Calculate percentiles from sorted array
|
||||
percentile() {
|
||||
local -n arr=$1
|
||||
local p=$2
|
||||
local n=${#arr[@]}
|
||||
|
||||
if [[ $n -eq 0 ]]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
local idx=$(echo "scale=0; ($n - 1) * $p / 100" | bc)
|
||||
echo "${arr[$idx]}"
|
||||
}
|
||||
|
||||
if [[ "${DRY_RUN}" == "true" ]]; then
|
||||
log "[DRY RUN] Would process TTFS metrics..."
|
||||
|
||||
cat <<EOF
|
||||
{
|
||||
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
|
||||
"dry_run": true,
|
||||
"results_path": "${RESULTS_PATH}",
|
||||
"metrics": {
|
||||
"ttfs_ms": {
|
||||
"p50": 1250,
|
||||
"p95": 3500,
|
||||
"p99": 5200,
|
||||
"min": 450,
|
||||
"max": 8500,
|
||||
"mean": 1850,
|
||||
"sample_count": 100
|
||||
},
|
||||
"by_scan_type": {
|
||||
"image_scan": {
|
||||
"p50": 2100,
|
||||
"p95": 4500,
|
||||
"p99": 6800
|
||||
},
|
||||
"filesystem_scan": {
|
||||
"p50": 850,
|
||||
"p95": 1800,
|
||||
"p99": 2500
|
||||
},
|
||||
"sbom_scan": {
|
||||
"p50": 320,
|
||||
"p95": 650,
|
||||
"p99": 950
|
||||
}
|
||||
}
|
||||
},
|
||||
"baseline_comparison": {
|
||||
"baseline_path": "${BASELINE_FILE}",
|
||||
"p50_regression_pct": -2.5,
|
||||
"p95_regression_pct": 1.2,
|
||||
"regression_detected": false
|
||||
}
|
||||
}
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Validate results directory
|
||||
if [[ ! -d "${RESULTS_PATH}" ]]; then
|
||||
error "Results directory not found: ${RESULTS_PATH}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Processing TTFS results from ${RESULTS_PATH}"
|
||||
|
||||
# Collect all TTFS values from result files
|
||||
declare -a ttfs_values=()
|
||||
declare -a image_ttfs=()
|
||||
declare -a fs_ttfs=()
|
||||
declare -a sbom_ttfs=()
|
||||
|
||||
# Find and process all result files
|
||||
for result_file in "${RESULTS_PATH}"/*.json "${RESULTS_PATH}"/**/*.json; do
|
||||
[[ -f "${result_file}" ]] || continue
|
||||
|
||||
log "Processing: ${result_file}"
|
||||
|
||||
# Extract TTFS value if present
|
||||
TTFS=$(jq -r '.ttfs_ms // .time_to_first_signal_ms // empty' "${result_file}" 2>/dev/null || true)
|
||||
SCAN_TYPE=$(jq -r '.scan_type // "unknown"' "${result_file}" 2>/dev/null || echo "unknown")
|
||||
|
||||
if [[ -n "${TTFS}" ]] && [[ "${TTFS}" != "null" ]]; then
|
||||
ttfs_values+=("${TTFS}")
|
||||
|
||||
case "${SCAN_TYPE}" in
|
||||
image|image_scan|container)
|
||||
image_ttfs+=("${TTFS}")
|
||||
;;
|
||||
filesystem|fs|fs_scan)
|
||||
fs_ttfs+=("${TTFS}")
|
||||
;;
|
||||
sbom|sbom_scan)
|
||||
sbom_ttfs+=("${TTFS}")
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
done
|
||||
|
||||
# Sort arrays for percentile calculation
|
||||
IFS=$'\n' ttfs_sorted=($(sort -n <<<"${ttfs_values[*]}")); unset IFS
|
||||
IFS=$'\n' image_sorted=($(sort -n <<<"${image_ttfs[*]}")); unset IFS
|
||||
IFS=$'\n' fs_sorted=($(sort -n <<<"${fs_ttfs[*]}")); unset IFS
|
||||
IFS=$'\n' sbom_sorted=($(sort -n <<<"${sbom_ttfs[*]}")); unset IFS
|
||||
|
||||
# Calculate overall metrics
|
||||
SAMPLE_COUNT=${#ttfs_values[@]}
|
||||
if [[ $SAMPLE_COUNT -eq 0 ]]; then
|
||||
warn "No TTFS samples found"
|
||||
P50=0
|
||||
P95=0
|
||||
P99=0
|
||||
MIN=0
|
||||
MAX=0
|
||||
MEAN=0
|
||||
else
|
||||
P50=$(percentile ttfs_sorted 50)
|
||||
P95=$(percentile ttfs_sorted 95)
|
||||
P99=$(percentile ttfs_sorted 99)
|
||||
MIN=${ttfs_sorted[0]}
|
||||
MAX=${ttfs_sorted[-1]}
|
||||
|
||||
# Calculate mean
|
||||
SUM=0
|
||||
for v in "${ttfs_values[@]}"; do
|
||||
SUM=$((SUM + v))
|
||||
done
|
||||
MEAN=$((SUM / SAMPLE_COUNT))
|
||||
fi
|
||||
|
||||
# Calculate per-type metrics
|
||||
IMAGE_P50=$(percentile image_sorted 50)
|
||||
IMAGE_P95=$(percentile image_sorted 95)
|
||||
IMAGE_P99=$(percentile image_sorted 99)
|
||||
|
||||
FS_P50=$(percentile fs_sorted 50)
|
||||
FS_P95=$(percentile fs_sorted 95)
|
||||
FS_P99=$(percentile fs_sorted 99)
|
||||
|
||||
SBOM_P50=$(percentile sbom_sorted 50)
|
||||
SBOM_P95=$(percentile sbom_sorted 95)
|
||||
SBOM_P99=$(percentile sbom_sorted 99)
|
||||
|
||||
# Compare against baseline if available
|
||||
REGRESSION_DETECTED=false
|
||||
P50_REGRESSION_PCT=0
|
||||
P95_REGRESSION_PCT=0
|
||||
|
||||
if [[ -f "${BASELINE_FILE}" ]]; then
|
||||
log "Comparing against baseline: ${BASELINE_FILE}"
|
||||
|
||||
BASELINE_P50=$(jq -r '.metrics.ttfs_ms.p50 // 0' "${BASELINE_FILE}")
|
||||
BASELINE_P95=$(jq -r '.metrics.ttfs_ms.p95 // 0' "${BASELINE_FILE}")
|
||||
|
||||
if [[ $BASELINE_P50 -gt 0 ]]; then
|
||||
P50_REGRESSION_PCT=$(echo "scale=2; (${P50} - ${BASELINE_P50}) * 100 / ${BASELINE_P50}" | bc)
|
||||
fi
|
||||
|
||||
if [[ $BASELINE_P95 -gt 0 ]]; then
|
||||
P95_REGRESSION_PCT=$(echo "scale=2; (${P95} - ${BASELINE_P95}) * 100 / ${BASELINE_P95}" | bc)
|
||||
fi
|
||||
|
||||
# Check for regression (>10% increase)
|
||||
if (( $(echo "${P50_REGRESSION_PCT} > 10" | bc -l) )) || (( $(echo "${P95_REGRESSION_PCT} > 10" | bc -l) )); then
|
||||
REGRESSION_DETECTED=true
|
||||
warn "TTFS regression detected: p50=${P50_REGRESSION_PCT}%, p95=${P95_REGRESSION_PCT}%"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Generate output
|
||||
OUTPUT=$(cat <<EOF
|
||||
{
|
||||
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
|
||||
"dry_run": false,
|
||||
"results_path": "${RESULTS_PATH}",
|
||||
"metrics": {
|
||||
"ttfs_ms": {
|
||||
"p50": ${P50},
|
||||
"p95": ${P95},
|
||||
"p99": ${P99},
|
||||
"min": ${MIN},
|
||||
"max": ${MAX},
|
||||
"mean": ${MEAN},
|
||||
"sample_count": ${SAMPLE_COUNT}
|
||||
},
|
||||
"by_scan_type": {
|
||||
"image_scan": {
|
||||
"p50": ${IMAGE_P50:-0},
|
||||
"p95": ${IMAGE_P95:-0},
|
||||
"p99": ${IMAGE_P99:-0}
|
||||
},
|
||||
"filesystem_scan": {
|
||||
"p50": ${FS_P50:-0},
|
||||
"p95": ${FS_P95:-0},
|
||||
"p99": ${FS_P99:-0}
|
||||
},
|
||||
"sbom_scan": {
|
||||
"p50": ${SBOM_P50:-0},
|
||||
"p95": ${SBOM_P95:-0},
|
||||
"p99": ${SBOM_P99:-0}
|
||||
}
|
||||
}
|
||||
},
|
||||
"baseline_comparison": {
|
||||
"baseline_path": "${BASELINE_FILE}",
|
||||
"p50_regression_pct": ${P50_REGRESSION_PCT},
|
||||
"p95_regression_pct": ${P95_REGRESSION_PCT},
|
||||
"regression_detected": ${REGRESSION_DETECTED}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
# Output results
|
||||
if [[ -n "${OUTPUT_FILE}" ]]; then
|
||||
echo "${OUTPUT}" > "${OUTPUT_FILE}"
|
||||
log "Results written to ${OUTPUT_FILE}"
|
||||
else
|
||||
echo "${OUTPUT}"
|
||||
fi
|
||||
|
||||
# Strict mode: fail on regression
|
||||
if [[ "${STRICT}" == "true" ]] && [[ "${REGRESSION_DETECTED}" == "true" ]]; then
|
||||
error "TTFS regression exceeds threshold"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
326
scripts/ci/enforce-performance-slos.sh
Normal file
326
scripts/ci/enforce-performance-slos.sh
Normal file
@@ -0,0 +1,326 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# enforce-performance-slos.sh
|
||||
# Enforces scan time and compute budget SLOs in CI
|
||||
#
|
||||
# Usage: ./enforce-performance-slos.sh [options]
|
||||
# --results-path PATH Path to benchmark results directory
|
||||
# --slos-file FILE Path to SLO definitions (default: scripts/ci/performance-slos.yaml)
|
||||
# --output FILE Output JSON file (default: stdout)
|
||||
# --dry-run Show what would be enforced
|
||||
# --strict Exit non-zero if any SLO is violated
|
||||
# --verbose Enable verbose output
|
||||
#
|
||||
# Output: JSON with SLO evaluation results and violations
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
|
||||
# Default paths
|
||||
RESULTS_PATH="${REPO_ROOT}/bench/results"
|
||||
SLOS_FILE="${SCRIPT_DIR}/performance-slos.yaml"
|
||||
OUTPUT_FILE=""
|
||||
DRY_RUN=false
|
||||
STRICT=false
|
||||
VERBOSE=false
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--results-path)
|
||||
RESULTS_PATH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--slos-file)
|
||||
SLOS_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--output)
|
||||
OUTPUT_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--strict)
|
||||
STRICT=true
|
||||
shift
|
||||
;;
|
||||
--verbose)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
head -20 "$0" | tail -15
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
log() {
|
||||
if [[ "${VERBOSE}" == "true" ]]; then
|
||||
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
warn() {
|
||||
echo "[WARN] $*" >&2
|
||||
}
|
||||
|
||||
if [[ "${DRY_RUN}" == "true" ]]; then
|
||||
log "[DRY RUN] Would enforce performance SLOs..."
|
||||
|
||||
cat <<EOF
|
||||
{
|
||||
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
|
||||
"dry_run": true,
|
||||
"results_path": "${RESULTS_PATH}",
|
||||
"slos_file": "${SLOS_FILE}",
|
||||
"slo_evaluations": {
|
||||
"scan_time_p95": {
|
||||
"slo_name": "Scan Time P95",
|
||||
"threshold_ms": 30000,
|
||||
"actual_ms": 25000,
|
||||
"passed": true,
|
||||
"margin_pct": 16.7
|
||||
},
|
||||
"memory_peak_mb": {
|
||||
"slo_name": "Peak Memory Usage",
|
||||
"threshold_mb": 2048,
|
||||
"actual_mb": 1650,
|
||||
"passed": true,
|
||||
"margin_pct": 19.4
|
||||
},
|
||||
"cpu_time_seconds": {
|
||||
"slo_name": "CPU Time",
|
||||
"threshold_seconds": 60,
|
||||
"actual_seconds": 45,
|
||||
"passed": true,
|
||||
"margin_pct": 25.0
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"total_slos": 3,
|
||||
"passed": 3,
|
||||
"failed": 0,
|
||||
"all_passed": true
|
||||
}
|
||||
}
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Validate paths
|
||||
if [[ ! -d "${RESULTS_PATH}" ]]; then
|
||||
error "Results directory not found: ${RESULTS_PATH}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "${SLOS_FILE}" ]]; then
|
||||
warn "SLOs file not found: ${SLOS_FILE}, using defaults"
|
||||
fi
|
||||
|
||||
log "Enforcing SLOs from ${SLOS_FILE}"
|
||||
log "Results path: ${RESULTS_PATH}"
|
||||
|
||||
# Initialize evaluation results
|
||||
declare -A slo_results
|
||||
VIOLATIONS=()
|
||||
TOTAL_SLOS=0
|
||||
PASSED_SLOS=0
|
||||
|
||||
# Define default SLOs
|
||||
declare -A SLOS
|
||||
SLOS["scan_time_p95_ms"]=30000
|
||||
SLOS["scan_time_p99_ms"]=60000
|
||||
SLOS["memory_peak_mb"]=2048
|
||||
SLOS["cpu_time_seconds"]=120
|
||||
SLOS["sbom_gen_time_ms"]=10000
|
||||
SLOS["policy_eval_time_ms"]=5000
|
||||
|
||||
# Load SLOs from file if exists
|
||||
if [[ -f "${SLOS_FILE}" ]]; then
|
||||
while IFS=: read -r key value; do
|
||||
key=$(echo "$key" | tr -d ' ')
|
||||
value=$(echo "$value" | tr -d ' ')
|
||||
if [[ -n "$key" ]] && [[ -n "$value" ]] && [[ "$key" != "#"* ]]; then
|
||||
SLOS["$key"]=$value
|
||||
log "Loaded SLO: ${key}=${value}"
|
||||
fi
|
||||
done < <(yq -r 'to_entries | .[] | "\(.key):\(.value.threshold // .value)"' "${SLOS_FILE}" 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
# Collect metrics from results
|
||||
SCAN_TIMES=()
|
||||
MEMORY_VALUES=()
|
||||
CPU_TIMES=()
|
||||
SBOM_TIMES=()
|
||||
POLICY_TIMES=()
|
||||
|
||||
for result_file in "${RESULTS_PATH}"/*.json "${RESULTS_PATH}"/**/*.json; do
|
||||
[[ -f "${result_file}" ]] || continue
|
||||
|
||||
log "Processing: ${result_file}"
|
||||
|
||||
# Extract metrics
|
||||
SCAN_TIME=$(jq -r '.duration_ms // .scan_time_ms // empty' "${result_file}" 2>/dev/null || true)
|
||||
MEMORY=$(jq -r '.peak_memory_mb // .memory_mb // empty' "${result_file}" 2>/dev/null || true)
|
||||
CPU_TIME=$(jq -r '.cpu_time_seconds // .cpu_seconds // empty' "${result_file}" 2>/dev/null || true)
|
||||
SBOM_TIME=$(jq -r '.sbom_generation_ms // empty' "${result_file}" 2>/dev/null || true)
|
||||
POLICY_TIME=$(jq -r '.policy_evaluation_ms // empty' "${result_file}" 2>/dev/null || true)
|
||||
|
||||
[[ -n "${SCAN_TIME}" ]] && SCAN_TIMES+=("${SCAN_TIME}")
|
||||
[[ -n "${MEMORY}" ]] && MEMORY_VALUES+=("${MEMORY}")
|
||||
[[ -n "${CPU_TIME}" ]] && CPU_TIMES+=("${CPU_TIME}")
|
||||
[[ -n "${SBOM_TIME}" ]] && SBOM_TIMES+=("${SBOM_TIME}")
|
||||
[[ -n "${POLICY_TIME}" ]] && POLICY_TIMES+=("${POLICY_TIME}")
|
||||
done
|
||||
|
||||
# Helper: calculate percentile from array
|
||||
calc_percentile() {
|
||||
local -n values=$1
|
||||
local pct=$2
|
||||
|
||||
if [[ ${#values[@]} -eq 0 ]]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
IFS=$'\n' sorted=($(sort -n <<<"${values[*]}")); unset IFS
|
||||
local n=${#sorted[@]}
|
||||
local idx=$(echo "scale=0; ($n - 1) * $pct / 100" | bc)
|
||||
echo "${sorted[$idx]}"
|
||||
}
|
||||
|
||||
# Helper: calculate max from array
|
||||
calc_max() {
|
||||
local -n values=$1
|
||||
|
||||
if [[ ${#values[@]} -eq 0 ]]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
local max=0
|
||||
for v in "${values[@]}"; do
|
||||
if (( $(echo "$v > $max" | bc -l) )); then
|
||||
max=$v
|
||||
fi
|
||||
done
|
||||
echo "$max"
|
||||
}
|
||||
|
||||
# Evaluate each SLO
|
||||
evaluate_slo() {
|
||||
local name=$1
|
||||
local threshold=$2
|
||||
local actual=$3
|
||||
local unit=$4
|
||||
|
||||
((TOTAL_SLOS++))
|
||||
|
||||
local passed=true
|
||||
local margin_pct=0
|
||||
|
||||
if (( $(echo "$actual > $threshold" | bc -l) )); then
|
||||
passed=false
|
||||
margin_pct=$(echo "scale=2; ($actual - $threshold) * 100 / $threshold" | bc)
|
||||
VIOLATIONS+=("${name}: ${actual}${unit} exceeds threshold ${threshold}${unit} (+${margin_pct}%)")
|
||||
warn "SLO VIOLATION: ${name} = ${actual}${unit} (threshold: ${threshold}${unit})"
|
||||
else
|
||||
((PASSED_SLOS++))
|
||||
margin_pct=$(echo "scale=2; ($threshold - $actual) * 100 / $threshold" | bc)
|
||||
log "SLO PASSED: ${name} = ${actual}${unit} (threshold: ${threshold}${unit}, margin: ${margin_pct}%)"
|
||||
fi
|
||||
|
||||
echo "{\"slo_name\": \"${name}\", \"threshold\": ${threshold}, \"actual\": ${actual}, \"unit\": \"${unit}\", \"passed\": ${passed}, \"margin_pct\": ${margin_pct}}"
|
||||
}
|
||||
|
||||
# Calculate actuals
|
||||
SCAN_P95=$(calc_percentile SCAN_TIMES 95)
|
||||
SCAN_P99=$(calc_percentile SCAN_TIMES 99)
|
||||
MEMORY_MAX=$(calc_max MEMORY_VALUES)
|
||||
CPU_MAX=$(calc_max CPU_TIMES)
|
||||
SBOM_P95=$(calc_percentile SBOM_TIMES 95)
|
||||
POLICY_P95=$(calc_percentile POLICY_TIMES 95)
|
||||
|
||||
# Run evaluations
|
||||
SLO_SCAN_P95=$(evaluate_slo "Scan Time P95" "${SLOS[scan_time_p95_ms]}" "${SCAN_P95}" "ms")
|
||||
SLO_SCAN_P99=$(evaluate_slo "Scan Time P99" "${SLOS[scan_time_p99_ms]}" "${SCAN_P99}" "ms")
|
||||
SLO_MEMORY=$(evaluate_slo "Peak Memory" "${SLOS[memory_peak_mb]}" "${MEMORY_MAX}" "MB")
|
||||
SLO_CPU=$(evaluate_slo "CPU Time" "${SLOS[cpu_time_seconds]}" "${CPU_MAX}" "s")
|
||||
SLO_SBOM=$(evaluate_slo "SBOM Generation P95" "${SLOS[sbom_gen_time_ms]}" "${SBOM_P95}" "ms")
|
||||
SLO_POLICY=$(evaluate_slo "Policy Evaluation P95" "${SLOS[policy_eval_time_ms]}" "${POLICY_P95}" "ms")
|
||||
|
||||
# Generate output
|
||||
ALL_PASSED=true
|
||||
if [[ ${#VIOLATIONS[@]} -gt 0 ]]; then
|
||||
ALL_PASSED=false
|
||||
fi
|
||||
|
||||
# Build violations JSON array
|
||||
VIOLATIONS_JSON="[]"
|
||||
if [[ ${#VIOLATIONS[@]} -gt 0 ]]; then
|
||||
VIOLATIONS_JSON="["
|
||||
for i in "${!VIOLATIONS[@]}"; do
|
||||
[[ $i -gt 0 ]] && VIOLATIONS_JSON+=","
|
||||
VIOLATIONS_JSON+="\"${VIOLATIONS[$i]}\""
|
||||
done
|
||||
VIOLATIONS_JSON+="]"
|
||||
fi
|
||||
|
||||
OUTPUT=$(cat <<EOF
|
||||
{
|
||||
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
|
||||
"dry_run": false,
|
||||
"results_path": "${RESULTS_PATH}",
|
||||
"slos_file": "${SLOS_FILE}",
|
||||
"slo_evaluations": {
|
||||
"scan_time_p95": ${SLO_SCAN_P95},
|
||||
"scan_time_p99": ${SLO_SCAN_P99},
|
||||
"memory_peak_mb": ${SLO_MEMORY},
|
||||
"cpu_time_seconds": ${SLO_CPU},
|
||||
"sbom_gen_time_ms": ${SLO_SBOM},
|
||||
"policy_eval_time_ms": ${SLO_POLICY}
|
||||
},
|
||||
"summary": {
|
||||
"total_slos": ${TOTAL_SLOS},
|
||||
"passed": ${PASSED_SLOS},
|
||||
"failed": $((TOTAL_SLOS - PASSED_SLOS)),
|
||||
"all_passed": ${ALL_PASSED},
|
||||
"violations": ${VIOLATIONS_JSON}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
# Output results
|
||||
if [[ -n "${OUTPUT_FILE}" ]]; then
|
||||
echo "${OUTPUT}" > "${OUTPUT_FILE}"
|
||||
log "Results written to ${OUTPUT_FILE}"
|
||||
else
|
||||
echo "${OUTPUT}"
|
||||
fi
|
||||
|
||||
# Strict mode: fail on violations
|
||||
if [[ "${STRICT}" == "true" ]] && [[ "${ALL_PASSED}" == "false" ]]; then
|
||||
error "Performance SLO violations detected"
|
||||
for v in "${VIOLATIONS[@]}"; do
|
||||
error " - ${v}"
|
||||
done
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
94
scripts/ci/performance-slos.yaml
Normal file
94
scripts/ci/performance-slos.yaml
Normal file
@@ -0,0 +1,94 @@
|
||||
# =============================================================================
|
||||
# Performance SLOs (Service Level Objectives)
|
||||
# Reference: Testing and Quality Guardrails Technical Reference
|
||||
#
|
||||
# These SLOs define the performance budgets for CI quality gates.
|
||||
# Violations will be flagged and may block releases.
|
||||
# =============================================================================
|
||||
|
||||
# Scan Time SLOs (milliseconds)
|
||||
scan_time:
|
||||
p50:
|
||||
threshold: 15000
|
||||
description: "50th percentile scan time"
|
||||
severity: "info"
|
||||
p95:
|
||||
threshold: 30000
|
||||
description: "95th percentile scan time - primary SLO"
|
||||
severity: "warning"
|
||||
p99:
|
||||
threshold: 60000
|
||||
description: "99th percentile scan time - tail latency"
|
||||
severity: "critical"
|
||||
|
||||
# Memory Usage SLOs (megabytes)
|
||||
memory:
|
||||
peak_mb:
|
||||
threshold: 2048
|
||||
description: "Peak memory usage during scan"
|
||||
severity: "warning"
|
||||
average_mb:
|
||||
threshold: 1024
|
||||
description: "Average memory usage"
|
||||
severity: "info"
|
||||
|
||||
# CPU Time SLOs (seconds)
|
||||
cpu:
|
||||
max_seconds:
|
||||
threshold: 120
|
||||
description: "Maximum CPU time per scan"
|
||||
severity: "warning"
|
||||
average_seconds:
|
||||
threshold: 60
|
||||
description: "Average CPU time per scan"
|
||||
severity: "info"
|
||||
|
||||
# Component-Specific SLOs (milliseconds)
|
||||
components:
|
||||
sbom_generation:
|
||||
p95:
|
||||
threshold: 10000
|
||||
description: "SBOM generation time P95"
|
||||
severity: "warning"
|
||||
policy_evaluation:
|
||||
p95:
|
||||
threshold: 5000
|
||||
description: "Policy evaluation time P95"
|
||||
severity: "warning"
|
||||
reachability_analysis:
|
||||
p95:
|
||||
threshold: 20000
|
||||
description: "Reachability analysis time P95"
|
||||
severity: "warning"
|
||||
vulnerability_matching:
|
||||
p95:
|
||||
threshold: 8000
|
||||
description: "Vulnerability matching time P95"
|
||||
severity: "warning"
|
||||
|
||||
# Resource Budget SLOs
|
||||
resource_budgets:
|
||||
disk_io_mb:
|
||||
threshold: 500
|
||||
description: "Maximum disk I/O per scan"
|
||||
network_calls:
|
||||
threshold: 0
|
||||
description: "Network calls (should be zero for offline scans)"
|
||||
temp_storage_mb:
|
||||
threshold: 1024
|
||||
description: "Maximum temporary storage usage"
|
||||
|
||||
# Regression Thresholds
|
||||
regression:
|
||||
max_degradation_pct: 10
|
||||
warning_threshold_pct: 5
|
||||
baseline_window_days: 30
|
||||
|
||||
# Override Configuration
|
||||
overrides:
|
||||
allowed_labels:
|
||||
- "performance-override"
|
||||
- "large-scan"
|
||||
required_approvers:
|
||||
- "platform"
|
||||
- "performance"
|
||||
102
scripts/ci/reachability-thresholds.yaml
Normal file
102
scripts/ci/reachability-thresholds.yaml
Normal file
@@ -0,0 +1,102 @@
|
||||
# =============================================================================
|
||||
# Reachability Quality Gate Thresholds
|
||||
# Reference: Testing and Quality Guardrails Technical Reference
|
||||
#
|
||||
# These thresholds are enforced by CI quality gates. Violations will block PRs
|
||||
# unless an override is explicitly approved.
|
||||
# =============================================================================
|
||||
|
||||
thresholds:
|
||||
# Runtime dependency recall: percentage of runtime dependency vulns detected
|
||||
runtime_dependency_recall:
|
||||
min: 0.95
|
||||
description: "Percentage of runtime dependency vulnerabilities detected"
|
||||
severity: "critical"
|
||||
|
||||
# OS package recall: percentage of OS package vulns detected
|
||||
os_package_recall:
|
||||
min: 0.97
|
||||
description: "Percentage of OS package vulnerabilities detected"
|
||||
severity: "critical"
|
||||
|
||||
# Code vulnerability recall: percentage of code-level vulns detected
|
||||
code_vulnerability_recall:
|
||||
min: 0.90
|
||||
description: "Percentage of code vulnerabilities detected"
|
||||
severity: "high"
|
||||
|
||||
# Configuration vulnerability recall
|
||||
config_vulnerability_recall:
|
||||
min: 0.85
|
||||
description: "Percentage of configuration vulnerabilities detected"
|
||||
severity: "medium"
|
||||
|
||||
# False positive rate for unreachable findings
|
||||
unreachable_false_positives:
|
||||
max: 0.05
|
||||
description: "Rate of false positives for unreachable findings"
|
||||
severity: "high"
|
||||
|
||||
# Reachability underreport rate: missed reachable findings
|
||||
reachability_underreport:
|
||||
max: 0.10
|
||||
description: "Rate of reachable findings incorrectly marked unreachable"
|
||||
severity: "critical"
|
||||
|
||||
# Overall precision across all classes
|
||||
overall_precision:
|
||||
min: 0.90
|
||||
description: "Overall precision across all vulnerability classes"
|
||||
severity: "high"
|
||||
|
||||
# F1 score threshold
|
||||
f1_score_min:
|
||||
min: 0.90
|
||||
description: "Minimum F1 score across vulnerability classes"
|
||||
severity: "high"
|
||||
|
||||
# Class-specific thresholds
|
||||
class_thresholds:
|
||||
runtime_dep:
|
||||
recall_min: 0.95
|
||||
precision_min: 0.92
|
||||
f1_min: 0.93
|
||||
|
||||
os_pkg:
|
||||
recall_min: 0.97
|
||||
precision_min: 0.95
|
||||
f1_min: 0.96
|
||||
|
||||
code:
|
||||
recall_min: 0.90
|
||||
precision_min: 0.88
|
||||
f1_min: 0.89
|
||||
|
||||
config:
|
||||
recall_min: 0.85
|
||||
precision_min: 0.80
|
||||
f1_min: 0.82
|
||||
|
||||
# Regression detection settings
|
||||
regression:
|
||||
# Maximum allowed regression from baseline (percentage points)
|
||||
max_recall_regression: 0.02
|
||||
max_precision_regression: 0.03
|
||||
|
||||
# Path to baseline metrics file
|
||||
baseline_path: "bench/baselines/reachability-baseline.json"
|
||||
|
||||
# How many consecutive failures before blocking
|
||||
failure_threshold: 2
|
||||
|
||||
# Override configuration
|
||||
overrides:
|
||||
# Allow temporary bypass for specific PR labels
|
||||
bypass_labels:
|
||||
- "quality-gate-override"
|
||||
- "wip"
|
||||
|
||||
# Require explicit approval from these teams
|
||||
required_approvers:
|
||||
- "platform"
|
||||
- "reachability"
|
||||
Reference in New Issue
Block a user