Files
git.stella-ops.org/tests/supply-chain/tools/canonicalize_json.py

65 lines
1.7 KiB
Python

#!/usr/bin/env python3
"""Deterministic JSON parsing and canonicalization helpers for supply-chain tests."""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass
from typing import Any
class DuplicateKeyError(ValueError):
"""Raised when JSON object contains duplicate keys."""
def _strict_object_pairs_hook(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
seen: set[str] = set()
result: dict[str, Any] = {}
for key, value in pairs:
if key in seen:
raise DuplicateKeyError(f"Duplicate key detected: {key}")
seen.add(key)
result[key] = value
return result
def parse_json_strict(text: str) -> Any:
"""Parse JSON and reject duplicate keys deterministically."""
return json.loads(text, object_pairs_hook=_strict_object_pairs_hook)
def canonicalize_value(value: Any) -> str:
"""
Canonicalize JSON value with deterministic ordering.
This is a strict deterministic serializer used for test invariants.
"""
return json.dumps(
value,
ensure_ascii=False,
separators=(",", ":"),
sort_keys=True,
)
def canonicalize_text(text: str) -> str:
"""Parse and canonicalize a JSON document."""
return canonicalize_value(parse_json_strict(text))
def sha256_hex(value: str) -> str:
"""Compute hex SHA-256 digest for canonical payload tracking."""
return hashlib.sha256(value.encode("utf-8")).hexdigest()
@dataclass(frozen=True)
class CanonicalResult:
canonical_json: str
sha256: str
def canonical_result_from_text(text: str) -> CanonicalResult:
canonical = canonicalize_text(text)
return CanonicalResult(canonical_json=canonical, sha256=sha256_hex(canonical))