Add receipt input JSON and SHA256 hash for CVSS policy scoring tests

- Introduced a new JSON fixture `receipt-input.json` containing base, environmental, and threat metrics for CVSS scoring. - Added corresponding SHA256 hash file `receipt-input.sha256` to ensure integrity of the JSON fixture.
2025-12-04 07:30:42 +02:00
parent 2d079d61ed
commit e1262eb916
91 changed files with 19493 additions and 187 deletions
--- a/bench/reachability-benchmark/tools/verify_manifest.py
+++ b/bench/reachability-benchmark/tools/verify_manifest.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""Offline validator for reachability benchmark manifests.
+
+Usage:
+  python tools/verify_manifest.py benchmark/manifest.sample.json --root bench/reachability-benchmark
+
+Checks performed:
+- Manifest validates against `benchmark/schemas/benchmark-manifest.schema.json`.
+- Every hashed path exists relative to --root (or absolute).
+- SHA-256 of files/directories matches the manifest values.
+- Optional DSSE envelopes listed under `dsse` are hashed and compared to envelopeDigest
+  when provided.
+"""
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+from pathlib import Path
+from typing import Dict, Iterable
+
+from jsonschema import Draft202012Validator
+
+ROOT = Path(__file__).resolve().parent.parent
+SCHEMA_PATH = ROOT / "benchmark" / "schemas" / "benchmark-manifest.schema.json"
+
+
+def load_manifest(path: Path) -> Dict:
+    text = path.read_text(encoding="utf-8")
+    return json.loads(text)
+
+
+def compute_sha256(target: Path) -> str:
+    if target.is_dir():
+        digest = hashlib.sha256()
+        for child in sorted(target.rglob("*")):
+            if child.is_dir():
+                continue
+            rel = child.relative_to(target)
+            digest.update(str(rel).encode("utf-8"))
+            digest.update(child.read_bytes())
+        return digest.hexdigest()
+    return hashlib.sha256(target.read_bytes()).hexdigest()
+
+
+def validate_against_schema(manifest: Dict) -> Iterable[str]:
+    schema = json.loads(SCHEMA_PATH.read_text(encoding="utf-8"))
+    validator = Draft202012Validator(schema)
+    for error in validator.iter_errors(manifest):
+        pointer = "/".join(str(p) for p in error.path) or "<root>"
+        yield f"schema:{pointer}: {error.message}"
+
+
+def resolve_path(root: Path, path_value: str) -> Path:
+    candidate = Path(path_value)
+    if not candidate.is_absolute():
+        candidate = root / candidate
+    return candidate
+
+
+def validate_hashed_path(root: Path, label: str, spec: Dict, envelope_digest: str | None = None) -> Iterable[str]:
+    errors: list[str] = []
+    path = resolve_path(root, spec["path"])
+    if not path.exists():
+        return [f"missing:{label}:{path}"]
+    actual = compute_sha256(path)
+    expected = spec["sha256"].lower()
+    if actual.lower() != expected:
+        errors.append(f"mismatch:{label}:{path}: expected {expected} got {actual}")
+    dsse_path = spec.get("dsse")
+    if dsse_path:
+        dsse_full = resolve_path(root, dsse_path)
+        if not dsse_full.exists():
+            errors.append(f"missing:{label}:dsse:{dsse_full}")
+        else:
+            dsse_digest = compute_sha256(dsse_full)
+            if envelope_digest and envelope_digest.lower() != dsse_digest.lower():
+                errors.append(
+                    f"mismatch:{label}:dsse:{dsse_full}: expected envelopeDigest {envelope_digest} got {dsse_digest}"
+                )
+    return errors
+
+
+def validate_cases(root: Path, manifest: Dict) -> Iterable[str]:
+    for case in manifest.get("cases", []):
+        base = f"case:{case.get('id', '<unknown>')}"
+        hashes: Dict = case.get("hashes", {})
+        for key, spec in hashes.items():
+            errors = validate_hashed_path(root, f"{base}:{key}", spec)
+            yield from errors
+
+
+def validate_artifacts(root: Path, manifest: Dict) -> Iterable[str]:
+    artifacts = manifest.get("artifacts", {})
+    for label in ("submissionSchema", "scorer"):
+        if label in artifacts:
+            yield from validate_hashed_path(root, f"artifacts:{label}", artifacts[label])
+    for baseline in artifacts.get("baselineSubmissions", []) or []:
+        prefix = f"baseline:{baseline.get('tool','?')}-{baseline.get('version','?')}"
+        yield from validate_hashed_path(root, f"{prefix}:submission", baseline["submission"])
+        dsse_spec = baseline.get("dsse")
+        if dsse_spec:
+            yield from validate_hashed_path(root, f"{prefix}:dsse", dsse_spec, baseline.get("envelopeDigest"))
+
+
+def validate_tools(root: Path, manifest: Dict) -> Iterable[str]:
+    tools = manifest.get("tools", {})
+    for label in ("builder", "validator"):
+        if label in tools:
+            yield from validate_hashed_path(root, f"tools:{label}", tools[label])
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Validate reachability benchmark manifest and artifacts")
+    parser.add_argument("manifest", type=Path, help="Path to manifest JSON")
+    parser.add_argument("--root", type=Path, default=ROOT, help="Root directory for relative paths")
+    args = parser.parse_args()
+
+    manifest = load_manifest(args.manifest)
+
+    failures: list[str] = []
+    failures.extend(validate_against_schema(manifest))
+    failures.extend(validate_cases(args.root, manifest))
+    failures.extend(validate_artifacts(args.root, manifest))
+    failures.extend(validate_tools(args.root, manifest))
+
+    if failures:
+        for item in failures:
+            print(f"FAIL {item}")
+        return 1
+
+    print(f"OK  manifest {args.manifest} validated")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())