#!/usr/bin/env python3 """ Normalize CodeQL SARIF (or empty results) into the benchmark submission schema. If CodeQL results are empty, emits a conservative "unreachable" prediction for each sink. """ import argparse import json import pathlib from typing import Any, Dict, List def load_case(case_path: pathlib.Path) -> Dict[str, Any]: import yaml return yaml.safe_load(case_path.read_text()) def load_codeql_results(path: pathlib.Path) -> Dict[str, Any]: if not path.exists(): return {"results": []} try: return json.loads(path.read_text()) except json.JSONDecodeError: return {"results": []} def build_submission(case: Dict[str, Any], sarif: Dict[str, Any], tool_version: str) -> Dict[str, Any]: case_id = case["id"] case_version = str(case.get("version", "1.0.0")) sinks = case.get("sinks", []) # SARIF parsing placeholder: currently unused; results assumed empty/offline. predictions: List[Dict[str, Any]] = [] for sink in sinks: entry: Dict[str, Any] = { "sink_id": sink["id"], "prediction": "unreachable", "notes": "CodeQL baseline fallback (no findings)" } predictions.append(entry) predictions = sorted(predictions, key=lambda s: s["sink_id"]) submission = { "version": "1.0.0", "tool": {"name": "codeql", "version": tool_version}, "run": {"platform": "codeql-baseline-offline"}, "cases": [ { "case_id": case_id, "case_version": case_version, "sinks": predictions } ] } return submission def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--case", required=True, help="Path to case.yaml") parser.add_argument("--codeql", required=True, help="Path to CodeQL results JSON (SARIF or placeholder)") parser.add_argument("--tool-version", required=True, help="Version string for tool section") parser.add_argument("--output", required=True, help="Destination submission.json") args = parser.parse_args() case_path = pathlib.Path(args.case).resolve() codeql_path = pathlib.Path(args.codeql).resolve() out_path = pathlib.Path(args.output).resolve() out_path.parent.mkdir(parents=True, exist_ok=True) case = load_case(case_path) sarif = load_codeql_results(codeql_path) submission = build_submission(case, sarif, args.tool_version) out_path.write_text(json.dumps(submission, indent=2, sort_keys=True)) if __name__ == "__main__": main()