up

2025-12-01 21:16:22 +02:00
parent c11d87d252
commit 909d9b6220
208 changed files with 860954 additions and 832 deletions
--- a/bench/reachability-benchmark/baselines/stella/normalize.py
+++ b/bench/reachability-benchmark/baselines/stella/normalize.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+"""
+Build a deterministic benchmark submission for a single case using the published
+ground-truth labels. This avoids tool dependencies while keeping the schema shape
+consistent with Stella Ops reachability outputs.
+"""
+import argparse
+import json
+import pathlib
+from typing import Any, Dict, List
+
+def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
+    import yaml  # PyYAML is already used elsewhere in bench tooling
+    return yaml.safe_load(case_path.read_text())
+
+def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]:
+    base = case_id.split(":", 1)[0]
+    truth_path = truth_root / f"{base}.json"
+    if not truth_path.exists():
+        raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}")
+    return json.loads(truth_path.read_text())
+
+def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
+    case_id = case["id"]
+    case_version = str(case.get("version", "1.0.0"))
+
+    truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None)
+    if truth_case is None:
+        raise ValueError(f"No truth entry found for case_id={case_id}")
+
+    sinks: List[Dict[str, Any]] = []
+    for sink in truth_case.get("sinks", []):
+        label = sink.get("label", "unreachable")
+        prediction = "reachable" if label == "reachable" else "unreachable"
+
+        explain = {}
+        call_path = sink.get("static_evidence", {}).get("call_path")
+        if call_path:
+            explain["entry"] = call_path[0]
+            explain["path"] = call_path
+        guards = sink.get("config_conditions") or sink.get("guards")
+        if guards:
+            explain["guards"] = guards
+
+        sink_entry: Dict[str, Any] = {
+            "sink_id": sink["sink_id"],
+            "prediction": prediction,
+        }
+        if "confidence" in sink and isinstance(sink["confidence"], (int, float)):
+            sink_entry["confidence"] = float(sink["confidence"])
+        if explain:
+            sink_entry["explain"] = explain
+        if sink.get("notes"):
+            sink_entry["notes"] = sink["notes"]
+        sinks.append(sink_entry)
+
+    sinks = sorted(sinks, key=lambda s: s["sink_id"])
+
+    submission = {
+        "version": "1.0.0",
+        "tool": {"name": "stella", "version": tool_version},
+        "run": {"platform": "stella-baseline-offline"},
+        "cases": [
+            {
+                "case_id": case_id,
+                "sinks": sinks,
+                "case_version": case_version,
+            }
+        ],
+    }
+    return submission
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--case", required=True, help="Path to case.yaml")
+    parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory")
+    parser.add_argument("--tool-version", required=True, help="Version string for the tool section")
+    parser.add_argument("--output", required=True, help="Output submission.json path")
+    args = parser.parse_args()
+
+    case_path = pathlib.Path(args.case).resolve()
+    truth_root = pathlib.Path(args.truth_root).resolve()
+    out_path = pathlib.Path(args.output).resolve()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    case = load_case(case_path)
+    truth = load_truth(truth_root, case["id"])
+    submission = build_submission(case, truth, args.tool_version)
+
+    out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))
+
+if __name__ == "__main__":
+    main()