#!/usr/bin/env python3 """Generate corpus-manifest.json from case directories.""" from __future__ import annotations import hashlib import json from datetime import datetime, timezone from pathlib import Path ROOT = Path(__file__).resolve().parents[2] CORPUS = ROOT / "bench" / "golden-corpus" / "categories" OUTPUT = ROOT / "bench" / "golden-corpus" / "corpus-manifest.json" def sha256(path: Path) -> str: h = hashlib.sha256() with path.open("rb") as fh: while True: chunk = fh.read(8192) if not chunk: break h.update(chunk) return h.hexdigest() def main() -> int: cases = [] for case_dir in sorted([p for p in CORPUS.rglob("*") if p.is_dir() and (p / "case-manifest.json").exists()]): manifest_path = case_dir / "case-manifest.json" cases.append({ "id": case_dir.name, "path": str(case_dir.relative_to(ROOT)).replace("\\", "/"), "manifestDigest": f"sha256:{sha256(manifest_path)}", }) payload = { "generatedAt": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "caseCount": len(cases), "cases": cases, } OUTPUT.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") return 0 if __name__ == "__main__": raise SystemExit(main())