Files
git.stella-ops.org/scripts/ci/compute-reachability-metrics.sh
master b55d9fa68d
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Add comprehensive security tests for OWASP A03 (Injection) and A10 (SSRF)
- Implemented InjectionTests.cs to cover various injection vulnerabilities including SQL, NoSQL, Command, LDAP, and XPath injections.
- Created SsrfTests.cs to test for Server-Side Request Forgery (SSRF) vulnerabilities, including internal URL access, cloud metadata access, and URL allowlist bypass attempts.
- Introduced MaliciousPayloads.cs to store a collection of malicious payloads for testing various security vulnerabilities.
- Added SecurityAssertions.cs for common security-specific assertion helpers.
- Established SecurityTestBase.cs as a base class for security tests, providing common infrastructure and mocking utilities.
- Configured the test project StellaOps.Security.Tests.csproj with necessary dependencies for testing.
2025-12-16 13:11:57 +02:00

288 lines
8.9 KiB
Bash

#!/usr/bin/env bash
# =============================================================================
# compute-reachability-metrics.sh
# Computes reachability metrics against ground-truth corpus
#
# Usage: ./compute-reachability-metrics.sh [options]
# --corpus-path PATH Path to ground-truth corpus (default: tests/reachability/corpus)
# --output FILE Output JSON file (default: stdout)
# --dry-run Show what would be computed without running scanner
# --strict Exit non-zero if any threshold is violated
# --verbose Enable verbose output
#
# Output: JSON with recall, precision, accuracy metrics per vulnerability class
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
# Default paths
CORPUS_PATH="${REPO_ROOT}/tests/reachability/corpus"
OUTPUT_FILE=""
DRY_RUN=false
STRICT=false
VERBOSE=false
# Parse arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--corpus-path)
CORPUS_PATH="$2"
shift 2
;;
--output)
OUTPUT_FILE="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
--strict)
STRICT=true
shift
;;
--verbose)
VERBOSE=true
shift
;;
-h|--help)
head -20 "$0" | tail -15
exit 0
;;
*)
echo "Unknown option: $1" >&2
exit 1
;;
esac
done
log() {
if [[ "${VERBOSE}" == "true" ]]; then
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2
fi
}
error() {
echo "[ERROR] $*" >&2
}
# Validate corpus exists
if [[ ! -d "${CORPUS_PATH}" ]]; then
error "Corpus directory not found: ${CORPUS_PATH}"
exit 1
fi
MANIFEST_FILE="${CORPUS_PATH}/manifest.json"
if [[ ! -f "${MANIFEST_FILE}" ]]; then
error "Corpus manifest not found: ${MANIFEST_FILE}"
exit 1
fi
log "Loading corpus from ${CORPUS_PATH}"
log "Manifest: ${MANIFEST_FILE}"
# Initialize counters for each vulnerability class
declare -A true_positives
declare -A false_positives
declare -A false_negatives
declare -A total_expected
CLASSES=("runtime_dep" "os_pkg" "code" "config")
for class in "${CLASSES[@]}"; do
true_positives[$class]=0
false_positives[$class]=0
false_negatives[$class]=0
total_expected[$class]=0
done
if [[ "${DRY_RUN}" == "true" ]]; then
log "[DRY RUN] Would process corpus fixtures..."
# Generate mock metrics for dry-run
cat <<EOF
{
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
"corpus_path": "${CORPUS_PATH}",
"dry_run": true,
"metrics": {
"runtime_dep": {
"recall": 0.96,
"precision": 0.94,
"f1_score": 0.95,
"total_expected": 100,
"true_positives": 96,
"false_positives": 6,
"false_negatives": 4
},
"os_pkg": {
"recall": 0.98,
"precision": 0.97,
"f1_score": 0.975,
"total_expected": 50,
"true_positives": 49,
"false_positives": 2,
"false_negatives": 1
},
"code": {
"recall": 0.92,
"precision": 0.90,
"f1_score": 0.91,
"total_expected": 25,
"true_positives": 23,
"false_positives": 3,
"false_negatives": 2
},
"config": {
"recall": 0.88,
"precision": 0.85,
"f1_score": 0.865,
"total_expected": 20,
"true_positives": 18,
"false_positives": 3,
"false_negatives": 2
}
},
"aggregate": {
"overall_recall": 0.9538,
"overall_precision": 0.9302,
"reachability_accuracy": 0.9268
}
}
EOF
exit 0
fi
# Process each fixture in the corpus
log "Processing corpus fixtures..."
# Read manifest and iterate fixtures
FIXTURE_COUNT=$(jq -r '.fixtures | length' "${MANIFEST_FILE}")
log "Found ${FIXTURE_COUNT} fixtures"
for i in $(seq 0 $((FIXTURE_COUNT - 1))); do
FIXTURE_ID=$(jq -r ".fixtures[$i].id" "${MANIFEST_FILE}")
FIXTURE_PATH="${CORPUS_PATH}/$(jq -r ".fixtures[$i].path" "${MANIFEST_FILE}")"
FIXTURE_CLASS=$(jq -r ".fixtures[$i].class" "${MANIFEST_FILE}")
EXPECTED_REACHABLE=$(jq -r ".fixtures[$i].expected_reachable // 0" "${MANIFEST_FILE}")
EXPECTED_UNREACHABLE=$(jq -r ".fixtures[$i].expected_unreachable // 0" "${MANIFEST_FILE}")
log "Processing fixture: ${FIXTURE_ID} (class: ${FIXTURE_CLASS})"
if [[ ! -d "${FIXTURE_PATH}" ]] && [[ ! -f "${FIXTURE_PATH}" ]]; then
error "Fixture not found: ${FIXTURE_PATH}"
continue
fi
# Update expected counts
total_expected[$FIXTURE_CLASS]=$((${total_expected[$FIXTURE_CLASS]} + EXPECTED_REACHABLE))
# Run scanner on fixture (deterministic mode, offline)
SCAN_RESULT_FILE=$(mktemp)
trap "rm -f ${SCAN_RESULT_FILE}" EXIT
if dotnet run --project "${REPO_ROOT}/src/Scanner/StellaOps.Scanner.Cli" -- \
scan --input "${FIXTURE_PATH}" \
--output "${SCAN_RESULT_FILE}" \
--deterministic \
--offline \
--format json \
2>/dev/null; then
# Parse scanner results
DETECTED_REACHABLE=$(jq -r '[.findings[] | select(.reachable == true)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0")
DETECTED_UNREACHABLE=$(jq -r '[.findings[] | select(.reachable == false)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0")
# Calculate TP, FP, FN for this fixture
TP=$((DETECTED_REACHABLE < EXPECTED_REACHABLE ? DETECTED_REACHABLE : EXPECTED_REACHABLE))
FP=$((DETECTED_REACHABLE > EXPECTED_REACHABLE ? DETECTED_REACHABLE - EXPECTED_REACHABLE : 0))
FN=$((EXPECTED_REACHABLE - TP))
true_positives[$FIXTURE_CLASS]=$((${true_positives[$FIXTURE_CLASS]} + TP))
false_positives[$FIXTURE_CLASS]=$((${false_positives[$FIXTURE_CLASS]} + FP))
false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + FN))
else
error "Scanner failed for fixture: ${FIXTURE_ID}"
false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + EXPECTED_REACHABLE))
fi
done
# Calculate metrics per class
calculate_metrics() {
local class=$1
local tp=${true_positives[$class]}
local fp=${false_positives[$class]}
local fn=${false_negatives[$class]}
local total=${total_expected[$class]}
local recall=0
local precision=0
local f1=0
if [[ $((tp + fn)) -gt 0 ]]; then
recall=$(echo "scale=4; $tp / ($tp + $fn)" | bc)
fi
if [[ $((tp + fp)) -gt 0 ]]; then
precision=$(echo "scale=4; $tp / ($tp + $fp)" | bc)
fi
if (( $(echo "$recall + $precision > 0" | bc -l) )); then
f1=$(echo "scale=4; 2 * $recall * $precision / ($recall + $precision)" | bc)
fi
echo "{\"recall\": $recall, \"precision\": $precision, \"f1_score\": $f1, \"total_expected\": $total, \"true_positives\": $tp, \"false_positives\": $fp, \"false_negatives\": $fn}"
}
# Generate output JSON
OUTPUT=$(cat <<EOF
{
"timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
"corpus_path": "${CORPUS_PATH}",
"dry_run": false,
"metrics": {
"runtime_dep": $(calculate_metrics "runtime_dep"),
"os_pkg": $(calculate_metrics "os_pkg"),
"code": $(calculate_metrics "code"),
"config": $(calculate_metrics "config")
},
"aggregate": {
"overall_recall": $(echo "scale=4; (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]}) / (${total_expected[runtime_dep]} + ${total_expected[os_pkg]} + ${total_expected[code]} + ${total_expected[config]} + 0.0001)" | bc),
"overall_precision": $(echo "scale=4; (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]}) / (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]} + ${false_positives[runtime_dep]} + ${false_positives[os_pkg]} + ${false_positives[code]} + ${false_positives[config]} + 0.0001)" | bc)
}
}
EOF
)
# Output results
if [[ -n "${OUTPUT_FILE}" ]]; then
echo "${OUTPUT}" > "${OUTPUT_FILE}"
log "Results written to ${OUTPUT_FILE}"
else
echo "${OUTPUT}"
fi
# Check thresholds in strict mode
if [[ "${STRICT}" == "true" ]]; then
THRESHOLDS_FILE="${SCRIPT_DIR}/reachability-thresholds.yaml"
if [[ -f "${THRESHOLDS_FILE}" ]]; then
log "Checking thresholds from ${THRESHOLDS_FILE}"
# Extract thresholds and check
MIN_RECALL=$(yq -r '.thresholds.runtime_dependency_recall.min // 0.95' "${THRESHOLDS_FILE}")
ACTUAL_RECALL=$(echo "${OUTPUT}" | jq -r '.metrics.runtime_dep.recall')
if (( $(echo "$ACTUAL_RECALL < $MIN_RECALL" | bc -l) )); then
error "Runtime dependency recall ${ACTUAL_RECALL} below threshold ${MIN_RECALL}"
exit 1
fi
log "All thresholds passed"
fi
fi
exit 0