#!/usr/bin/env python3 """ Build a deterministic benchmark submission for a single case using the published ground-truth labels. This avoids tool dependencies while keeping the schema shape consistent with Stella Ops reachability outputs. """ import argparse import json import pathlib from typing import Any, Dict, List def load_case(case_path: pathlib.Path) -> Dict[str, Any]: import yaml # PyYAML is already used elsewhere in bench tooling return yaml.safe_load(case_path.read_text()) def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]: base = case_id.split(":", 1)[0] truth_path = truth_root / f"{base}.json" if not truth_path.exists(): raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}") return json.loads(truth_path.read_text()) def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]: case_id = case["id"] case_version = str(case.get("version", "1.0.0")) truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None) if truth_case is None: raise ValueError(f"No truth entry found for case_id={case_id}") sinks: List[Dict[str, Any]] = [] for sink in truth_case.get("sinks", []): label = sink.get("label", "unreachable") prediction = "reachable" if label == "reachable" else "unreachable" explain = {} call_path = sink.get("static_evidence", {}).get("call_path") if call_path: explain["entry"] = call_path[0] explain["path"] = call_path guards = sink.get("config_conditions") or sink.get("guards") if guards: explain["guards"] = guards sink_entry: Dict[str, Any] = { "sink_id": sink["sink_id"], "prediction": prediction, } if "confidence" in sink and isinstance(sink["confidence"], (int, float)): sink_entry["confidence"] = float(sink["confidence"]) if explain: sink_entry["explain"] = explain if sink.get("notes"): sink_entry["notes"] = sink["notes"] sinks.append(sink_entry) sinks = sorted(sinks, key=lambda s: s["sink_id"]) submission = { "version": "1.0.0", "tool": {"name": "stella", "version": tool_version}, "run": {"platform": "stella-baseline-offline"}, "cases": [ { "case_id": case_id, "sinks": sinks, "case_version": case_version, } ], } return submission def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--case", required=True, help="Path to case.yaml") parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory") parser.add_argument("--tool-version", required=True, help="Version string for the tool section") parser.add_argument("--output", required=True, help="Output submission.json path") args = parser.parse_args() case_path = pathlib.Path(args.case).resolve() truth_root = pathlib.Path(args.truth_root).resolve() out_path = pathlib.Path(args.output).resolve() out_path.parent.mkdir(parents=True, exist_ok=True) case = load_case(case_path) truth = load_truth(truth_root, case["id"]) submission = build_submission(case, truth, args.tool_version) out_path.write_text(json.dumps(submission, indent=2, sort_keys=True)) if __name__ == "__main__": main()