Files
git.stella-ops.org/bench/reachability-benchmark/baselines/stella/normalize.py
StellaOps Bot 909d9b6220
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
up
2025-12-01 21:16:22 +02:00

94 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""
Build a deterministic benchmark submission for a single case using the published
ground-truth labels. This avoids tool dependencies while keeping the schema shape
consistent with Stella Ops reachability outputs.
"""
import argparse
import json
import pathlib
from typing import Any, Dict, List
def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
import yaml # PyYAML is already used elsewhere in bench tooling
return yaml.safe_load(case_path.read_text())
def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]:
base = case_id.split(":", 1)[0]
truth_path = truth_root / f"{base}.json"
if not truth_path.exists():
raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}")
return json.loads(truth_path.read_text())
def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
case_id = case["id"]
case_version = str(case.get("version", "1.0.0"))
truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None)
if truth_case is None:
raise ValueError(f"No truth entry found for case_id={case_id}")
sinks: List[Dict[str, Any]] = []
for sink in truth_case.get("sinks", []):
label = sink.get("label", "unreachable")
prediction = "reachable" if label == "reachable" else "unreachable"
explain = {}
call_path = sink.get("static_evidence", {}).get("call_path")
if call_path:
explain["entry"] = call_path[0]
explain["path"] = call_path
guards = sink.get("config_conditions") or sink.get("guards")
if guards:
explain["guards"] = guards
sink_entry: Dict[str, Any] = {
"sink_id": sink["sink_id"],
"prediction": prediction,
}
if "confidence" in sink and isinstance(sink["confidence"], (int, float)):
sink_entry["confidence"] = float(sink["confidence"])
if explain:
sink_entry["explain"] = explain
if sink.get("notes"):
sink_entry["notes"] = sink["notes"]
sinks.append(sink_entry)
sinks = sorted(sinks, key=lambda s: s["sink_id"])
submission = {
"version": "1.0.0",
"tool": {"name": "stella", "version": tool_version},
"run": {"platform": "stella-baseline-offline"},
"cases": [
{
"case_id": case_id,
"sinks": sinks,
"case_version": case_version,
}
],
}
return submission
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--case", required=True, help="Path to case.yaml")
parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory")
parser.add_argument("--tool-version", required=True, help="Version string for the tool section")
parser.add_argument("--output", required=True, help="Output submission.json path")
args = parser.parse_args()
case_path = pathlib.Path(args.case).resolve()
truth_root = pathlib.Path(args.truth_root).resolve()
out_path = pathlib.Path(args.output).resolve()
out_path.parent.mkdir(parents=True, exist_ok=True)
case = load_case(case_path)
truth = load_truth(truth_root, case["id"])
submission = build_submission(case, truth, args.tool_version)
out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))
if __name__ == "__main__":
main()