Files
git.stella-ops.org/bench/reachability-benchmark/ci/run-ci.sh
StellaOps Bot 108d1c64b3
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Findings Ledger CI / build-test (push) Has been cancelled
Findings Ledger CI / migration-validation (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
cryptopro-linux-csp / build-and-test (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
sm-remote-ci / build-and-test (push) Has been cancelled
Findings Ledger CI / generate-manifest (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
up
2025-12-09 09:38:09 +02:00

55 lines
1.9 KiB
Bash

#!/usr/bin/env bash
# Deterministic CI runner for reachability benchmark (task BENCH-CI-513-013).
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
export SOURCE_DATE_EPOCH="${SOURCE_DATE_EPOCH:-1730000000}"
export DOTNET_CLI_TELEMETRY_OPTOUT=1
export GIT_TERMINAL_PROMPT=0
export TZ=UTC
source "${ROOT}/tools/java/ensure_jdk.sh"
ensure_bench_jdk
# 1) Validate schemas (truth + submission samples)
python "${ROOT}/tools/validate.py" --schemas "${ROOT}/schemas"
# 2) Build all cases deterministically (including Java via vendored JDK)
python "${ROOT}/tools/build/build_all.py" --cases "${ROOT}/cases"
# 3) Run Semgrep baseline (offline-safe)
bash "${ROOT}/baselines/semgrep/run_all.sh" "${ROOT}/cases" "${ROOT}/out/semgrep-baseline"
# 4) Run Stella baseline (offline-safe, uses truth)
bash "${ROOT}/baselines/stella/run_all.sh" "${ROOT}/cases" "${ROOT}/out/stella-baseline"
# 5) Run CodeQL baseline (offline-safe fallback)
bash "${ROOT}/baselines/codeql/run_all.sh" "${ROOT}/cases" "${ROOT}/out/codeql-baseline"
# 6) Build aggregated truth (merge all truth JSON files)
TRUTH_AGG="${ROOT}/out/truth-aggregated.json"
python - <<'PY'
import json, pathlib, sys
truth_dir = pathlib.Path(sys.argv[1])
out_path = pathlib.Path(sys.argv[2])
cases = []
for path in sorted(truth_dir.glob("*.json")):
doc = json.loads(path.read_text())
cases.extend(doc.get("cases", []))
agg = {"version": "1.0.0", "cases": cases}
out_path.write_text(json.dumps(agg, indent=2, sort_keys=True))
PY "${ROOT}/benchmark/truth" "${TRUTH_AGG}"
# 7) Leaderboard (using available baselines)
python "${ROOT}/tools/scorer/rb_compare.py" \
--truth "${TRUTH_AGG}" \
--submissions \
"${ROOT}/out/semgrep-baseline/submission.json" \
"${ROOT}/out/stella-baseline/submission.json" \
"${ROOT}/out/codeql-baseline/submission.json" \
--output "${ROOT}/out/leaderboard.json" \
--text
echo "CI run complete. Outputs under ${ROOT}/out"