#!/usr/bin/env python3 """Deterministic JSON parsing and canonicalization helpers for supply-chain tests.""" from __future__ import annotations import hashlib import json from dataclasses import dataclass from typing import Any class DuplicateKeyError(ValueError): """Raised when JSON object contains duplicate keys.""" def _strict_object_pairs_hook(pairs: list[tuple[str, Any]]) -> dict[str, Any]: seen: set[str] = set() result: dict[str, Any] = {} for key, value in pairs: if key in seen: raise DuplicateKeyError(f"Duplicate key detected: {key}") seen.add(key) result[key] = value return result def parse_json_strict(text: str) -> Any: """Parse JSON and reject duplicate keys deterministically.""" return json.loads(text, object_pairs_hook=_strict_object_pairs_hook) def canonicalize_value(value: Any) -> str: """ Canonicalize JSON value with deterministic ordering. This is a strict deterministic serializer used for test invariants. """ return json.dumps( value, ensure_ascii=False, separators=(",", ":"), sort_keys=True, ) def canonicalize_text(text: str) -> str: """Parse and canonicalize a JSON document.""" return canonicalize_value(parse_json_strict(text)) def sha256_hex(value: str) -> str: """Compute hex SHA-256 digest for canonical payload tracking.""" return hashlib.sha256(value.encode("utf-8")).hexdigest() @dataclass(frozen=True) class CanonicalResult: canonical_json: str sha256: str def canonical_result_from_text(text: str) -> CanonicalResult: canonical = canonicalize_text(text) return CanonicalResult(canonical_json=canonical, sha256=sha256_hex(canonical))