up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
This commit is contained in:
107
scripts/bench/run-baseline.sh
Normal file
107
scripts/bench/run-baseline.sh
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# BENCH-AUTO-401-019: Run baseline benchmark automation
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
|
||||
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
log_error() { echo -e "${RED}[ERROR]${NC} $*"; }
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [--populate] [--compute] [--compare BASELINE] [--all]"
|
||||
echo ""
|
||||
echo "Run benchmark automation pipeline."
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --populate Populate bench/findings from reachbench fixtures"
|
||||
echo " --compute Compute metrics from findings"
|
||||
echo " --compare BASELINE Compare with baseline scanner results"
|
||||
echo " --all Run all steps (populate + compute)"
|
||||
echo " --dry-run Don't write files (populate only)"
|
||||
echo " --limit N Limit cases processed (populate only)"
|
||||
echo " --help, -h Show this help"
|
||||
exit 1
|
||||
}
|
||||
|
||||
DO_POPULATE=false
|
||||
DO_COMPUTE=false
|
||||
BASELINE_PATH=""
|
||||
DRY_RUN=""
|
||||
LIMIT=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--populate)
|
||||
DO_POPULATE=true
|
||||
shift
|
||||
;;
|
||||
--compute)
|
||||
DO_COMPUTE=true
|
||||
shift
|
||||
;;
|
||||
--compare)
|
||||
BASELINE_PATH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--all)
|
||||
DO_POPULATE=true
|
||||
DO_COMPUTE=true
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN="--dry-run"
|
||||
shift
|
||||
;;
|
||||
--limit)
|
||||
LIMIT="--limit $2"
|
||||
shift 2
|
||||
;;
|
||||
--help|-h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$DO_POPULATE" == false && "$DO_COMPUTE" == false && -z "$BASELINE_PATH" ]]; then
|
||||
log_error "No action specified"
|
||||
usage
|
||||
fi
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
# Step 1: Populate findings
|
||||
if [[ "$DO_POPULATE" == true ]]; then
|
||||
log_info "Step 1: Populating findings from reachbench fixtures..."
|
||||
python3 scripts/bench/populate-findings.py $DRY_RUN $LIMIT
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Step 2: Compute metrics
|
||||
if [[ "$DO_COMPUTE" == true ]]; then
|
||||
log_info "Step 2: Computing metrics..."
|
||||
python3 scripts/bench/compute-metrics.py --json
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Step 3: Compare with baseline
|
||||
if [[ -n "$BASELINE_PATH" ]]; then
|
||||
log_info "Step 3: Comparing with baseline..."
|
||||
python3 bench/tools/compare.py --baseline "$BASELINE_PATH" --json
|
||||
echo ""
|
||||
fi
|
||||
|
||||
log_info "Benchmark automation complete!"
|
||||
log_info "Results available in bench/results/"
|
||||
Reference in New Issue
Block a user