#!/usr/bin/env bash # ============================================================================= # compute-reachability-metrics.sh # Computes reachability metrics against ground-truth corpus # # Usage: ./compute-reachability-metrics.sh [options] # --corpus-path PATH Path to ground-truth corpus (default: tests/reachability/corpus) # --output FILE Output JSON file (default: stdout) # --dry-run Show what would be computed without running scanner # --strict Exit non-zero if any threshold is violated # --verbose Enable verbose output # # Output: JSON with recall, precision, accuracy metrics per vulnerability class # ============================================================================= set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" # Default paths CORPUS_PATH="${REPO_ROOT}/tests/reachability/corpus" OUTPUT_FILE="" DRY_RUN=false STRICT=false VERBOSE=false # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --corpus-path) CORPUS_PATH="$2" shift 2 ;; --output) OUTPUT_FILE="$2" shift 2 ;; --dry-run) DRY_RUN=true shift ;; --strict) STRICT=true shift ;; --verbose) VERBOSE=true shift ;; -h|--help) head -20 "$0" | tail -15 exit 0 ;; *) echo "Unknown option: $1" >&2 exit 1 ;; esac done log() { if [[ "${VERBOSE}" == "true" ]]; then echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2 fi } error() { echo "[ERROR] $*" >&2 } # Validate corpus exists if [[ ! -d "${CORPUS_PATH}" ]]; then error "Corpus directory not found: ${CORPUS_PATH}" exit 1 fi MANIFEST_FILE="${CORPUS_PATH}/manifest.json" if [[ ! -f "${MANIFEST_FILE}" ]]; then error "Corpus manifest not found: ${MANIFEST_FILE}" exit 1 fi log "Loading corpus from ${CORPUS_PATH}" log "Manifest: ${MANIFEST_FILE}" # Initialize counters for each vulnerability class declare -A true_positives declare -A false_positives declare -A false_negatives declare -A total_expected CLASSES=("runtime_dep" "os_pkg" "code" "config") for class in "${CLASSES[@]}"; do true_positives[$class]=0 false_positives[$class]=0 false_negatives[$class]=0 total_expected[$class]=0 done if [[ "${DRY_RUN}" == "true" ]]; then log "[DRY RUN] Would process corpus fixtures..." # Generate mock metrics for dry-run cat </dev/null; then # Parse scanner results DETECTED_REACHABLE=$(jq -r '[.findings[] | select(.reachable == true)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0") DETECTED_UNREACHABLE=$(jq -r '[.findings[] | select(.reachable == false)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0") # Calculate TP, FP, FN for this fixture TP=$((DETECTED_REACHABLE < EXPECTED_REACHABLE ? DETECTED_REACHABLE : EXPECTED_REACHABLE)) FP=$((DETECTED_REACHABLE > EXPECTED_REACHABLE ? DETECTED_REACHABLE - EXPECTED_REACHABLE : 0)) FN=$((EXPECTED_REACHABLE - TP)) true_positives[$FIXTURE_CLASS]=$((${true_positives[$FIXTURE_CLASS]} + TP)) false_positives[$FIXTURE_CLASS]=$((${false_positives[$FIXTURE_CLASS]} + FP)) false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + FN)) else error "Scanner failed for fixture: ${FIXTURE_ID}" false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + EXPECTED_REACHABLE)) fi done # Calculate metrics per class calculate_metrics() { local class=$1 local tp=${true_positives[$class]} local fp=${false_positives[$class]} local fn=${false_negatives[$class]} local total=${total_expected[$class]} local recall=0 local precision=0 local f1=0 if [[ $((tp + fn)) -gt 0 ]]; then recall=$(echo "scale=4; $tp / ($tp + $fn)" | bc) fi if [[ $((tp + fp)) -gt 0 ]]; then precision=$(echo "scale=4; $tp / ($tp + $fp)" | bc) fi if (( $(echo "$recall + $precision > 0" | bc -l) )); then f1=$(echo "scale=4; 2 * $recall * $precision / ($recall + $precision)" | bc) fi echo "{\"recall\": $recall, \"precision\": $precision, \"f1_score\": $f1, \"total_expected\": $total, \"true_positives\": $tp, \"false_positives\": $fp, \"false_negatives\": $fn}" } # Generate output JSON OUTPUT=$(cat < "${OUTPUT_FILE}" log "Results written to ${OUTPUT_FILE}" else echo "${OUTPUT}" fi # Check thresholds in strict mode if [[ "${STRICT}" == "true" ]]; then THRESHOLDS_FILE="${SCRIPT_DIR}/reachability-thresholds.yaml" if [[ -f "${THRESHOLDS_FILE}" ]]; then log "Checking thresholds from ${THRESHOLDS_FILE}" # Extract thresholds and check MIN_RECALL=$(yq -r '.thresholds.runtime_dependency_recall.min // 0.95' "${THRESHOLDS_FILE}") ACTUAL_RECALL=$(echo "${OUTPUT}" | jq -r '.metrics.runtime_dep.recall') if (( $(echo "$ACTUAL_RECALL < $MIN_RECALL" | bc -l) )); then error "Runtime dependency recall ${ACTUAL_RECALL} below threshold ${MIN_RECALL}" exit 1 fi log "All thresholds passed" fi fi exit 0