up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-01 21:16:22 +02:00
parent c11d87d252
commit 909d9b6220
208 changed files with 860954 additions and 832 deletions

View File

@@ -0,0 +1,93 @@
#!/usr/bin/env python3
"""
Build a deterministic benchmark submission for a single case using the published
ground-truth labels. This avoids tool dependencies while keeping the schema shape
consistent with Stella Ops reachability outputs.
"""
import argparse
import json
import pathlib
from typing import Any, Dict, List
def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
import yaml # PyYAML is already used elsewhere in bench tooling
return yaml.safe_load(case_path.read_text())
def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]:
base = case_id.split(":", 1)[0]
truth_path = truth_root / f"{base}.json"
if not truth_path.exists():
raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}")
return json.loads(truth_path.read_text())
def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
case_id = case["id"]
case_version = str(case.get("version", "1.0.0"))
truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None)
if truth_case is None:
raise ValueError(f"No truth entry found for case_id={case_id}")
sinks: List[Dict[str, Any]] = []
for sink in truth_case.get("sinks", []):
label = sink.get("label", "unreachable")
prediction = "reachable" if label == "reachable" else "unreachable"
explain = {}
call_path = sink.get("static_evidence", {}).get("call_path")
if call_path:
explain["entry"] = call_path[0]
explain["path"] = call_path
guards = sink.get("config_conditions") or sink.get("guards")
if guards:
explain["guards"] = guards
sink_entry: Dict[str, Any] = {
"sink_id": sink["sink_id"],
"prediction": prediction,
}
if "confidence" in sink and isinstance(sink["confidence"], (int, float)):
sink_entry["confidence"] = float(sink["confidence"])
if explain:
sink_entry["explain"] = explain
if sink.get("notes"):
sink_entry["notes"] = sink["notes"]
sinks.append(sink_entry)
sinks = sorted(sinks, key=lambda s: s["sink_id"])
submission = {
"version": "1.0.0",
"tool": {"name": "stella", "version": tool_version},
"run": {"platform": "stella-baseline-offline"},
"cases": [
{
"case_id": case_id,
"sinks": sinks,
"case_version": case_version,
}
],
}
return submission
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--case", required=True, help="Path to case.yaml")
parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory")
parser.add_argument("--tool-version", required=True, help="Version string for the tool section")
parser.add_argument("--output", required=True, help="Output submission.json path")
args = parser.parse_args()
case_path = pathlib.Path(args.case).resolve()
truth_root = pathlib.Path(args.truth_root).resolve()
out_path = pathlib.Path(args.output).resolve()
out_path.parent.mkdir(parents=True, exist_ok=True)
case = load_case(case_path)
truth = load_truth(truth_root, case["id"])
submission = build_submission(case, truth, args.tool_version)
out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))
if __name__ == "__main__":
main()