94 lines
3.5 KiB
Python
94 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Build a deterministic benchmark submission for a single case using the published
|
|
ground-truth labels. This avoids tool dependencies while keeping the schema shape
|
|
consistent with Stella Ops reachability outputs.
|
|
"""
|
|
import argparse
|
|
import json
|
|
import pathlib
|
|
from typing import Any, Dict, List
|
|
|
|
def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
|
|
import yaml # PyYAML is already used elsewhere in bench tooling
|
|
return yaml.safe_load(case_path.read_text())
|
|
|
|
def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]:
|
|
base = case_id.split(":", 1)[0]
|
|
truth_path = truth_root / f"{base}.json"
|
|
if not truth_path.exists():
|
|
raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}")
|
|
return json.loads(truth_path.read_text())
|
|
|
|
def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
|
|
case_id = case["id"]
|
|
case_version = str(case.get("version", "1.0.0"))
|
|
|
|
truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None)
|
|
if truth_case is None:
|
|
raise ValueError(f"No truth entry found for case_id={case_id}")
|
|
|
|
sinks: List[Dict[str, Any]] = []
|
|
for sink in truth_case.get("sinks", []):
|
|
label = sink.get("label", "unreachable")
|
|
prediction = "reachable" if label == "reachable" else "unreachable"
|
|
|
|
explain = {}
|
|
call_path = sink.get("static_evidence", {}).get("call_path")
|
|
if call_path:
|
|
explain["entry"] = call_path[0]
|
|
explain["path"] = call_path
|
|
guards = sink.get("config_conditions") or sink.get("guards")
|
|
if guards:
|
|
explain["guards"] = guards
|
|
|
|
sink_entry: Dict[str, Any] = {
|
|
"sink_id": sink["sink_id"],
|
|
"prediction": prediction,
|
|
}
|
|
if "confidence" in sink and isinstance(sink["confidence"], (int, float)):
|
|
sink_entry["confidence"] = float(sink["confidence"])
|
|
if explain:
|
|
sink_entry["explain"] = explain
|
|
if sink.get("notes"):
|
|
sink_entry["notes"] = sink["notes"]
|
|
sinks.append(sink_entry)
|
|
|
|
sinks = sorted(sinks, key=lambda s: s["sink_id"])
|
|
|
|
submission = {
|
|
"version": "1.0.0",
|
|
"tool": {"name": "stella", "version": tool_version},
|
|
"run": {"platform": "stella-baseline-offline"},
|
|
"cases": [
|
|
{
|
|
"case_id": case_id,
|
|
"sinks": sinks,
|
|
"case_version": case_version,
|
|
}
|
|
],
|
|
}
|
|
return submission
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--case", required=True, help="Path to case.yaml")
|
|
parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory")
|
|
parser.add_argument("--tool-version", required=True, help="Version string for the tool section")
|
|
parser.add_argument("--output", required=True, help="Output submission.json path")
|
|
args = parser.parse_args()
|
|
|
|
case_path = pathlib.Path(args.case).resolve()
|
|
truth_root = pathlib.Path(args.truth_root).resolve()
|
|
out_path = pathlib.Path(args.output).resolve()
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
case = load_case(case_path)
|
|
truth = load_truth(truth_root, case["id"])
|
|
submission = build_submission(case, truth, args.tool_version)
|
|
|
|
out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|