- Introduced a new JSON fixture `receipt-input.json` containing base, environmental, and threat metrics for CVSS scoring. - Added corresponding SHA256 hash file `receipt-input.sha256` to ensure integrity of the JSON fixture.
138 lines
5.0 KiB
Python
138 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Offline validator for reachability benchmark manifests.
|
|
|
|
Usage:
|
|
python tools/verify_manifest.py benchmark/manifest.sample.json --root bench/reachability-benchmark
|
|
|
|
Checks performed:
|
|
- Manifest validates against `benchmark/schemas/benchmark-manifest.schema.json`.
|
|
- Every hashed path exists relative to --root (or absolute).
|
|
- SHA-256 of files/directories matches the manifest values.
|
|
- Optional DSSE envelopes listed under `dsse` are hashed and compared to envelopeDigest
|
|
when provided.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable
|
|
|
|
from jsonschema import Draft202012Validator
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
SCHEMA_PATH = ROOT / "benchmark" / "schemas" / "benchmark-manifest.schema.json"
|
|
|
|
|
|
def load_manifest(path: Path) -> Dict:
|
|
text = path.read_text(encoding="utf-8")
|
|
return json.loads(text)
|
|
|
|
|
|
def compute_sha256(target: Path) -> str:
|
|
if target.is_dir():
|
|
digest = hashlib.sha256()
|
|
for child in sorted(target.rglob("*")):
|
|
if child.is_dir():
|
|
continue
|
|
rel = child.relative_to(target)
|
|
digest.update(str(rel).encode("utf-8"))
|
|
digest.update(child.read_bytes())
|
|
return digest.hexdigest()
|
|
return hashlib.sha256(target.read_bytes()).hexdigest()
|
|
|
|
|
|
def validate_against_schema(manifest: Dict) -> Iterable[str]:
|
|
schema = json.loads(SCHEMA_PATH.read_text(encoding="utf-8"))
|
|
validator = Draft202012Validator(schema)
|
|
for error in validator.iter_errors(manifest):
|
|
pointer = "/".join(str(p) for p in error.path) or "<root>"
|
|
yield f"schema:{pointer}: {error.message}"
|
|
|
|
|
|
def resolve_path(root: Path, path_value: str) -> Path:
|
|
candidate = Path(path_value)
|
|
if not candidate.is_absolute():
|
|
candidate = root / candidate
|
|
return candidate
|
|
|
|
|
|
def validate_hashed_path(root: Path, label: str, spec: Dict, envelope_digest: str | None = None) -> Iterable[str]:
|
|
errors: list[str] = []
|
|
path = resolve_path(root, spec["path"])
|
|
if not path.exists():
|
|
return [f"missing:{label}:{path}"]
|
|
actual = compute_sha256(path)
|
|
expected = spec["sha256"].lower()
|
|
if actual.lower() != expected:
|
|
errors.append(f"mismatch:{label}:{path}: expected {expected} got {actual}")
|
|
dsse_path = spec.get("dsse")
|
|
if dsse_path:
|
|
dsse_full = resolve_path(root, dsse_path)
|
|
if not dsse_full.exists():
|
|
errors.append(f"missing:{label}:dsse:{dsse_full}")
|
|
else:
|
|
dsse_digest = compute_sha256(dsse_full)
|
|
if envelope_digest and envelope_digest.lower() != dsse_digest.lower():
|
|
errors.append(
|
|
f"mismatch:{label}:dsse:{dsse_full}: expected envelopeDigest {envelope_digest} got {dsse_digest}"
|
|
)
|
|
return errors
|
|
|
|
|
|
def validate_cases(root: Path, manifest: Dict) -> Iterable[str]:
|
|
for case in manifest.get("cases", []):
|
|
base = f"case:{case.get('id', '<unknown>')}"
|
|
hashes: Dict = case.get("hashes", {})
|
|
for key, spec in hashes.items():
|
|
errors = validate_hashed_path(root, f"{base}:{key}", spec)
|
|
yield from errors
|
|
|
|
|
|
def validate_artifacts(root: Path, manifest: Dict) -> Iterable[str]:
|
|
artifacts = manifest.get("artifacts", {})
|
|
for label in ("submissionSchema", "scorer"):
|
|
if label in artifacts:
|
|
yield from validate_hashed_path(root, f"artifacts:{label}", artifacts[label])
|
|
for baseline in artifacts.get("baselineSubmissions", []) or []:
|
|
prefix = f"baseline:{baseline.get('tool','?')}-{baseline.get('version','?')}"
|
|
yield from validate_hashed_path(root, f"{prefix}:submission", baseline["submission"])
|
|
dsse_spec = baseline.get("dsse")
|
|
if dsse_spec:
|
|
yield from validate_hashed_path(root, f"{prefix}:dsse", dsse_spec, baseline.get("envelopeDigest"))
|
|
|
|
|
|
def validate_tools(root: Path, manifest: Dict) -> Iterable[str]:
|
|
tools = manifest.get("tools", {})
|
|
for label in ("builder", "validator"):
|
|
if label in tools:
|
|
yield from validate_hashed_path(root, f"tools:{label}", tools[label])
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Validate reachability benchmark manifest and artifacts")
|
|
parser.add_argument("manifest", type=Path, help="Path to manifest JSON")
|
|
parser.add_argument("--root", type=Path, default=ROOT, help="Root directory for relative paths")
|
|
args = parser.parse_args()
|
|
|
|
manifest = load_manifest(args.manifest)
|
|
|
|
failures: list[str] = []
|
|
failures.extend(validate_against_schema(manifest))
|
|
failures.extend(validate_cases(args.root, manifest))
|
|
failures.extend(validate_artifacts(args.root, manifest))
|
|
failures.extend(validate_tools(args.root, manifest))
|
|
|
|
if failures:
|
|
for item in failures:
|
|
print(f"FAIL {item}")
|
|
return 1
|
|
|
|
print(f"OK manifest {args.manifest} validated")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|