#!/usr/bin/env python3 """Offline validator for reachability benchmark manifests. Usage: python tools/verify_manifest.py benchmark/manifest.sample.json --root bench/reachability-benchmark Checks performed: - Manifest validates against `benchmark/schemas/benchmark-manifest.schema.json`. - Every hashed path exists relative to --root (or absolute). - SHA-256 of files/directories matches the manifest values. - Optional DSSE envelopes listed under `dsse` are hashed and compared to envelopeDigest when provided. """ from __future__ import annotations import argparse import hashlib import json from pathlib import Path from typing import Dict, Iterable from jsonschema import Draft202012Validator ROOT = Path(__file__).resolve().parent.parent SCHEMA_PATH = ROOT / "benchmark" / "schemas" / "benchmark-manifest.schema.json" def load_manifest(path: Path) -> Dict: text = path.read_text(encoding="utf-8") return json.loads(text) def compute_sha256(target: Path) -> str: if target.is_dir(): digest = hashlib.sha256() for child in sorted(target.rglob("*")): if child.is_dir(): continue rel = child.relative_to(target) digest.update(str(rel).encode("utf-8")) digest.update(child.read_bytes()) return digest.hexdigest() return hashlib.sha256(target.read_bytes()).hexdigest() def validate_against_schema(manifest: Dict) -> Iterable[str]: schema = json.loads(SCHEMA_PATH.read_text(encoding="utf-8")) validator = Draft202012Validator(schema) for error in validator.iter_errors(manifest): pointer = "/".join(str(p) for p in error.path) or "" yield f"schema:{pointer}: {error.message}" def resolve_path(root: Path, path_value: str) -> Path: candidate = Path(path_value) if not candidate.is_absolute(): candidate = root / candidate return candidate def validate_hashed_path(root: Path, label: str, spec: Dict, envelope_digest: str | None = None) -> Iterable[str]: errors: list[str] = [] path = resolve_path(root, spec["path"]) if not path.exists(): return [f"missing:{label}:{path}"] actual = compute_sha256(path) expected = spec["sha256"].lower() if actual.lower() != expected: errors.append(f"mismatch:{label}:{path}: expected {expected} got {actual}") dsse_path = spec.get("dsse") if dsse_path: dsse_full = resolve_path(root, dsse_path) if not dsse_full.exists(): errors.append(f"missing:{label}:dsse:{dsse_full}") else: dsse_digest = compute_sha256(dsse_full) if envelope_digest and envelope_digest.lower() != dsse_digest.lower(): errors.append( f"mismatch:{label}:dsse:{dsse_full}: expected envelopeDigest {envelope_digest} got {dsse_digest}" ) return errors def validate_cases(root: Path, manifest: Dict) -> Iterable[str]: for case in manifest.get("cases", []): base = f"case:{case.get('id', '')}" hashes: Dict = case.get("hashes", {}) for key, spec in hashes.items(): errors = validate_hashed_path(root, f"{base}:{key}", spec) yield from errors def validate_artifacts(root: Path, manifest: Dict) -> Iterable[str]: artifacts = manifest.get("artifacts", {}) for label in ("submissionSchema", "scorer"): if label in artifacts: yield from validate_hashed_path(root, f"artifacts:{label}", artifacts[label]) for baseline in artifacts.get("baselineSubmissions", []) or []: prefix = f"baseline:{baseline.get('tool','?')}-{baseline.get('version','?')}" yield from validate_hashed_path(root, f"{prefix}:submission", baseline["submission"]) dsse_spec = baseline.get("dsse") if dsse_spec: yield from validate_hashed_path(root, f"{prefix}:dsse", dsse_spec, baseline.get("envelopeDigest")) def validate_tools(root: Path, manifest: Dict) -> Iterable[str]: tools = manifest.get("tools", {}) for label in ("builder", "validator"): if label in tools: yield from validate_hashed_path(root, f"tools:{label}", tools[label]) def main() -> int: parser = argparse.ArgumentParser(description="Validate reachability benchmark manifest and artifacts") parser.add_argument("manifest", type=Path, help="Path to manifest JSON") parser.add_argument("--root", type=Path, default=ROOT, help="Root directory for relative paths") args = parser.parse_args() manifest = load_manifest(args.manifest) failures: list[str] = [] failures.extend(validate_against_schema(manifest)) failures.extend(validate_cases(args.root, manifest)) failures.extend(validate_artifacts(args.root, manifest)) failures.extend(validate_tools(args.root, manifest)) if failures: for item in failures: print(f"FAIL {item}") return 1 print(f"OK manifest {args.manifest} validated") return 0 if __name__ == "__main__": raise SystemExit(main())