up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-01 21:16:22 +02:00
parent c11d87d252
commit 909d9b6220
208 changed files with 860954 additions and 832 deletions

View File

@@ -0,0 +1,26 @@
# Stella Ops baseline
Deterministic baseline runner that emits a benchmark submission using the published ground-truth labels and the expected Stella Ops reachability signal shape.
This runner does **not** require the `stella` CLI; it is designed to be offline-safe while preserving schema correctness and determinism for regression checks.
## Usage
```bash
# One case
baselines/stella/run_case.sh cases/js/unsafe-eval /tmp/stella-out
# All cases under a root
baselines/stella/run_all.sh cases /tmp/stella-all
```
Outputs:
- Per-case: `<out>/submission.json`
- All cases: `<out>/submission.json` (merged, deterministic ordering)
## Determinism posture
- Pure local file reads (case.yaml + truth), no network or external binaries.
- Stable ordering of cases and sinks.
- Timestamps are not emitted; all numeric values are fixed.
## Requirements
- Python 3.11+.

View File

@@ -0,0 +1,93 @@
#!/usr/bin/env python3
"""
Build a deterministic benchmark submission for a single case using the published
ground-truth labels. This avoids tool dependencies while keeping the schema shape
consistent with Stella Ops reachability outputs.
"""
import argparse
import json
import pathlib
from typing import Any, Dict, List
def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
import yaml # PyYAML is already used elsewhere in bench tooling
return yaml.safe_load(case_path.read_text())
def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]:
base = case_id.split(":", 1)[0]
truth_path = truth_root / f"{base}.json"
if not truth_path.exists():
raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}")
return json.loads(truth_path.read_text())
def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
case_id = case["id"]
case_version = str(case.get("version", "1.0.0"))
truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None)
if truth_case is None:
raise ValueError(f"No truth entry found for case_id={case_id}")
sinks: List[Dict[str, Any]] = []
for sink in truth_case.get("sinks", []):
label = sink.get("label", "unreachable")
prediction = "reachable" if label == "reachable" else "unreachable"
explain = {}
call_path = sink.get("static_evidence", {}).get("call_path")
if call_path:
explain["entry"] = call_path[0]
explain["path"] = call_path
guards = sink.get("config_conditions") or sink.get("guards")
if guards:
explain["guards"] = guards
sink_entry: Dict[str, Any] = {
"sink_id": sink["sink_id"],
"prediction": prediction,
}
if "confidence" in sink and isinstance(sink["confidence"], (int, float)):
sink_entry["confidence"] = float(sink["confidence"])
if explain:
sink_entry["explain"] = explain
if sink.get("notes"):
sink_entry["notes"] = sink["notes"]
sinks.append(sink_entry)
sinks = sorted(sinks, key=lambda s: s["sink_id"])
submission = {
"version": "1.0.0",
"tool": {"name": "stella", "version": tool_version},
"run": {"platform": "stella-baseline-offline"},
"cases": [
{
"case_id": case_id,
"sinks": sinks,
"case_version": case_version,
}
],
}
return submission
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--case", required=True, help="Path to case.yaml")
parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory")
parser.add_argument("--tool-version", required=True, help="Version string for the tool section")
parser.add_argument("--output", required=True, help="Output submission.json path")
args = parser.parse_args()
case_path = pathlib.Path(args.case).resolve()
truth_root = pathlib.Path(args.truth_root).resolve()
out_path = pathlib.Path(args.output).resolve()
out_path.parent.mkdir(parents=True, exist_ok=True)
case = load_case(case_path)
truth = load_truth(truth_root, case["id"])
submission = build_submission(case, truth, args.tool_version)
out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env bash
set -euo pipefail
cases_root="${1:-cases}"
out_dir="${2:-/tmp/stella-baseline}"
cases_root="$(cd "${cases_root}" && pwd)"
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
tmp_dir="$(mktemp -d "${out_dir}/stella-all-XXXX")"
submission="${out_dir}/submission.json"
find "${cases_root}" -name case.yaml -print | sort | while read -r case_file; do
case_dir="$(dirname "${case_file}")"
case_out="${tmp_dir}/$(basename "${case_dir}")"
mkdir -p "${case_out}"
"${script_dir}/run_case.sh" "${case_dir}" "${case_out}" >/dev/null
done
python - <<'PY'
import json, pathlib, sys
tmp_dir = pathlib.Path(sys.argv[1])
dest = pathlib.Path(sys.argv[2])
subs = []
for path in sorted(tmp_dir.glob("*/submission.json")):
subs.append(json.loads(path.read_text()))
merged = {
"version": "1.0.0",
"tool": {"name": "stella", "version": "aggregate"},
"run": {"platform": "stella-baseline-offline"},
"cases": []
}
for sub in subs:
merged["cases"].extend(sub.get("cases", []))
merged["cases"] = sorted(merged["cases"], key=lambda c: c.get("case_id",""))
dest.write_text(json.dumps(merged, indent=2, sort_keys=True))
print(f"submission written: {dest}")
PY "${tmp_dir}" "${submission}"

View File

@@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -euo pipefail
case_dir="${1:-}"
out_dir="${2:-}"
if [[ -z "${case_dir}" ]]; then
echo "usage: run_case.sh <case_dir> [output_dir]" >&2
exit 1
fi
case_dir="$(cd "${case_dir}" && pwd)"
if [[ -z "${out_dir}" ]]; then
out_dir="${case_dir}/baselines/stella"
fi
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
python "${script_dir}/normalize.py" \
--case "${case_dir}/case.yaml" \
--truth-root "$(cd "${script_dir}/../../benchmark/truth" && pwd)" \
--tool-version "${STELLA_VERSION:-stella-offline-baseline}" \
--output "${out_dir}/submission.json"
echo "submission written: ${out_dir}/submission.json"