git.stella-ops.org/scripts/ci/compute-reachability-metrics.sh

#!/usr/bin/env bash
# =============================================================================
# compute-reachability-metrics.sh
# Computes reachability metrics against ground-truth corpus
#
# Usage: ./compute-reachability-metrics.sh [options]
#   --corpus-path PATH    Path to ground-truth corpus (default: tests/reachability/corpus)
#   --output FILE         Output JSON file (default: stdout)
#   --dry-run             Show what would be computed without running scanner
#   --strict              Exit non-zero if any threshold is violated
#   --verbose             Enable verbose output
#
# Output: JSON with recall, precision, accuracy metrics per vulnerability class
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"

# Default paths
CORPUS_PATH="${REPO_ROOT}/tests/reachability/corpus"
OUTPUT_FILE=""
DRY_RUN=false
STRICT=false
VERBOSE=false

# Parse arguments
while [[ $# -gt 0 ]]; do
    case "$1" in
        --corpus-path)
            CORPUS_PATH="$2"
            shift 2
            ;;
        --output)
            OUTPUT_FILE="$2"
            shift 2
            ;;
        --dry-run)
            DRY_RUN=true
            shift
            ;;
        --strict)
            STRICT=true
            shift
            ;;
        --verbose)
            VERBOSE=true
            shift
            ;;
        -h|--help)
            head -20 "$0" | tail -15
            exit 0
            ;;
        *)
            echo "Unknown option: $1" >&2
            exit 1
            ;;
    esac
done

log() {
    if [[ "${VERBOSE}" == "true" ]]; then
        echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2
    fi
}

error() {
    echo "[ERROR] $*" >&2
}

# Validate corpus exists
if [[ ! -d "${CORPUS_PATH}" ]]; then
    error "Corpus directory not found: ${CORPUS_PATH}"
    exit 1
fi

MANIFEST_FILE="${CORPUS_PATH}/manifest.json"
if [[ ! -f "${MANIFEST_FILE}" ]]; then
    error "Corpus manifest not found: ${MANIFEST_FILE}"
    exit 1
fi

log "Loading corpus from ${CORPUS_PATH}"
log "Manifest: ${MANIFEST_FILE}"

# Initialize counters for each vulnerability class
declare -A true_positives
declare -A false_positives
declare -A false_negatives
declare -A total_expected

CLASSES=("runtime_dep" "os_pkg" "code" "config")

for class in "${CLASSES[@]}"; do
    true_positives[$class]=0
    false_positives[$class]=0
    false_negatives[$class]=0
    total_expected[$class]=0
done

if [[ "${DRY_RUN}" == "true" ]]; then
    log "[DRY RUN] Would process corpus fixtures..."

    # Generate mock metrics for dry-run
    cat <<EOF
{
  "timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
  "corpus_path": "${CORPUS_PATH}",
  "dry_run": true,
  "metrics": {
    "runtime_dep": {
      "recall": 0.96,
      "precision": 0.94,
      "f1_score": 0.95,
      "total_expected": 100,
      "true_positives": 96,
      "false_positives": 6,
      "false_negatives": 4
    },
    "os_pkg": {
      "recall": 0.98,
      "precision": 0.97,
      "f1_score": 0.975,
      "total_expected": 50,
      "true_positives": 49,
      "false_positives": 2,
      "false_negatives": 1
    },
    "code": {
      "recall": 0.92,
      "precision": 0.90,
      "f1_score": 0.91,
      "total_expected": 25,
      "true_positives": 23,
      "false_positives": 3,
      "false_negatives": 2
    },
    "config": {
      "recall": 0.88,
      "precision": 0.85,
      "f1_score": 0.865,
      "total_expected": 20,
      "true_positives": 18,
      "false_positives": 3,
      "false_negatives": 2
    }
  },
  "aggregate": {
    "overall_recall": 0.9538,
    "overall_precision": 0.9302,
    "reachability_accuracy": 0.9268
  }
}
EOF
    exit 0
fi

# Process each fixture in the corpus
log "Processing corpus fixtures..."

# Read manifest and iterate fixtures
FIXTURE_COUNT=$(jq -r '.fixtures | length' "${MANIFEST_FILE}")
log "Found ${FIXTURE_COUNT} fixtures"

for i in $(seq 0 $((FIXTURE_COUNT - 1))); do
    FIXTURE_ID=$(jq -r ".fixtures[$i].id" "${MANIFEST_FILE}")
    FIXTURE_PATH="${CORPUS_PATH}/$(jq -r ".fixtures[$i].path" "${MANIFEST_FILE}")"
    FIXTURE_CLASS=$(jq -r ".fixtures[$i].class" "${MANIFEST_FILE}")
    EXPECTED_REACHABLE=$(jq -r ".fixtures[$i].expected_reachable // 0" "${MANIFEST_FILE}")
    EXPECTED_UNREACHABLE=$(jq -r ".fixtures[$i].expected_unreachable // 0" "${MANIFEST_FILE}")

    log "Processing fixture: ${FIXTURE_ID} (class: ${FIXTURE_CLASS})"

    if [[ ! -d "${FIXTURE_PATH}" ]] && [[ ! -f "${FIXTURE_PATH}" ]]; then
        error "Fixture not found: ${FIXTURE_PATH}"
        continue
    fi

    # Update expected counts
    total_expected[$FIXTURE_CLASS]=$((${total_expected[$FIXTURE_CLASS]} + EXPECTED_REACHABLE))

    # Run scanner on fixture (deterministic mode, offline)
    SCAN_RESULT_FILE=$(mktemp)
    trap "rm -f ${SCAN_RESULT_FILE}" EXIT

    if dotnet run --project "${REPO_ROOT}/src/Scanner/StellaOps.Scanner.Cli" -- \
        scan --input "${FIXTURE_PATH}" \
        --output "${SCAN_RESULT_FILE}" \
        --deterministic \
        --offline \
        --format json \
        2>/dev/null; then

        # Parse scanner results
        DETECTED_REACHABLE=$(jq -r '[.findings[] | select(.reachable == true)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0")
        DETECTED_UNREACHABLE=$(jq -r '[.findings[] | select(.reachable == false)] | length' "${SCAN_RESULT_FILE}" 2>/dev/null || echo "0")

        # Calculate TP, FP, FN for this fixture
        TP=$((DETECTED_REACHABLE < EXPECTED_REACHABLE ? DETECTED_REACHABLE : EXPECTED_REACHABLE))
        FP=$((DETECTED_REACHABLE > EXPECTED_REACHABLE ? DETECTED_REACHABLE - EXPECTED_REACHABLE : 0))
        FN=$((EXPECTED_REACHABLE - TP))

        true_positives[$FIXTURE_CLASS]=$((${true_positives[$FIXTURE_CLASS]} + TP))
        false_positives[$FIXTURE_CLASS]=$((${false_positives[$FIXTURE_CLASS]} + FP))
        false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + FN))
    else
        error "Scanner failed for fixture: ${FIXTURE_ID}"
        false_negatives[$FIXTURE_CLASS]=$((${false_negatives[$FIXTURE_CLASS]} + EXPECTED_REACHABLE))
    fi
done

# Calculate metrics per class
calculate_metrics() {
    local class=$1
    local tp=${true_positives[$class]}
    local fp=${false_positives[$class]}
    local fn=${false_negatives[$class]}
    local total=${total_expected[$class]}

    local recall=0
    local precision=0
    local f1=0

    if [[ $((tp + fn)) -gt 0 ]]; then
        recall=$(echo "scale=4; $tp / ($tp + $fn)" | bc)
    fi

    if [[ $((tp + fp)) -gt 0 ]]; then
        precision=$(echo "scale=4; $tp / ($tp + $fp)" | bc)
    fi

    if (( $(echo "$recall + $precision > 0" | bc -l) )); then
        f1=$(echo "scale=4; 2 * $recall * $precision / ($recall + $precision)" | bc)
    fi

    echo "{\"recall\": $recall, \"precision\": $precision, \"f1_score\": $f1, \"total_expected\": $total, \"true_positives\": $tp, \"false_positives\": $fp, \"false_negatives\": $fn}"
}

# Generate output JSON
OUTPUT=$(cat <<EOF
{
  "timestamp": "$(date -u '+%Y-%m-%dT%H:%M:%SZ')",
  "corpus_path": "${CORPUS_PATH}",
  "dry_run": false,
  "metrics": {
    "runtime_dep": $(calculate_metrics "runtime_dep"),
    "os_pkg": $(calculate_metrics "os_pkg"),
    "code": $(calculate_metrics "code"),
    "config": $(calculate_metrics "config")
  },
  "aggregate": {
    "overall_recall": $(echo "scale=4; (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]}) / (${total_expected[runtime_dep]} + ${total_expected[os_pkg]} + ${total_expected[code]} + ${total_expected[config]} + 0.0001)" | bc),
    "overall_precision": $(echo "scale=4; (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]}) / (${true_positives[runtime_dep]} + ${true_positives[os_pkg]} + ${true_positives[code]} + ${true_positives[config]} + ${false_positives[runtime_dep]} + ${false_positives[os_pkg]} + ${false_positives[code]} + ${false_positives[config]} + 0.0001)" | bc)
  }
}
EOF
)

# Output results
if [[ -n "${OUTPUT_FILE}" ]]; then
    echo "${OUTPUT}" > "${OUTPUT_FILE}"
    log "Results written to ${OUTPUT_FILE}"
else
    echo "${OUTPUT}"
fi

# Check thresholds in strict mode
if [[ "${STRICT}" == "true" ]]; then
    THRESHOLDS_FILE="${SCRIPT_DIR}/reachability-thresholds.yaml"
    if [[ -f "${THRESHOLDS_FILE}" ]]; then
        log "Checking thresholds from ${THRESHOLDS_FILE}"

        # Extract thresholds and check
        MIN_RECALL=$(yq -r '.thresholds.runtime_dependency_recall.min // 0.95' "${THRESHOLDS_FILE}")
        ACTUAL_RECALL=$(echo "${OUTPUT}" | jq -r '.metrics.runtime_dep.recall')

        if (( $(echo "$ACTUAL_RECALL < $MIN_RECALL" | bc -l) )); then
            error "Runtime dependency recall ${ACTUAL_RECALL} below threshold ${MIN_RECALL}"
            exit 1
        fi

        log "All thresholds passed"
    fi
fi

exit 0