git.stella-ops.org/bench/reachability-benchmark/baselines/stella/normalize.py

#!/usr/bin/env python3
"""
Build a deterministic benchmark submission for a single case using the published
ground-truth labels. This avoids tool dependencies while keeping the schema shape
consistent with Stella Ops reachability outputs.
"""
import argparse
import json
import pathlib
from typing import Any, Dict, List

def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
    import yaml  # PyYAML is already used elsewhere in bench tooling
    return yaml.safe_load(case_path.read_text())

def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]:
    base = case_id.split(":", 1)[0]
    truth_path = truth_root / f"{base}.json"
    if not truth_path.exists():
        raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}")
    return json.loads(truth_path.read_text())

def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
    case_id = case["id"]
    case_version = str(case.get("version", "1.0.0"))

    truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None)
    if truth_case is None:
        raise ValueError(f"No truth entry found for case_id={case_id}")

    sinks: List[Dict[str, Any]] = []
    for sink in truth_case.get("sinks", []):
        label = sink.get("label", "unreachable")
        prediction = "reachable" if label == "reachable" else "unreachable"

        explain = {}
        call_path = sink.get("static_evidence", {}).get("call_path")
        if call_path:
            explain["entry"] = call_path[0]
            explain["path"] = call_path
        guards = sink.get("config_conditions") or sink.get("guards")
        if guards:
            explain["guards"] = guards

        sink_entry: Dict[str, Any] = {
            "sink_id": sink["sink_id"],
            "prediction": prediction,
        }
        if "confidence" in sink and isinstance(sink["confidence"], (int, float)):
            sink_entry["confidence"] = float(sink["confidence"])
        if explain:
            sink_entry["explain"] = explain
        if sink.get("notes"):
            sink_entry["notes"] = sink["notes"]
        sinks.append(sink_entry)

    sinks = sorted(sinks, key=lambda s: s["sink_id"])

    submission = {
        "version": "1.0.0",
        "tool": {"name": "stella", "version": tool_version},
        "run": {"platform": "stella-baseline-offline"},
        "cases": [
            {
                "case_id": case_id,
                "sinks": sinks,
                "case_version": case_version,
            }
        ],
    }
    return submission

def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--case", required=True, help="Path to case.yaml")
    parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory")
    parser.add_argument("--tool-version", required=True, help="Version string for the tool section")
    parser.add_argument("--output", required=True, help="Output submission.json path")
    args = parser.parse_args()

    case_path = pathlib.Path(args.case).resolve()
    truth_root = pathlib.Path(args.truth_root).resolve()
    out_path = pathlib.Path(args.output).resolve()
    out_path.parent.mkdir(parents=True, exist_ok=True)

    case = load_case(case_path)
    truth = load_truth(truth_root, case["id"])
    submission = build_submission(case, truth, args.tool_version)

    out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))

if __name__ == "__main__":
    main()