65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Deterministic JSON parsing and canonicalization helpers for supply-chain tests."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
|
|
class DuplicateKeyError(ValueError):
|
|
"""Raised when JSON object contains duplicate keys."""
|
|
|
|
|
|
def _strict_object_pairs_hook(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
|
|
seen: set[str] = set()
|
|
result: dict[str, Any] = {}
|
|
for key, value in pairs:
|
|
if key in seen:
|
|
raise DuplicateKeyError(f"Duplicate key detected: {key}")
|
|
seen.add(key)
|
|
result[key] = value
|
|
return result
|
|
|
|
|
|
def parse_json_strict(text: str) -> Any:
|
|
"""Parse JSON and reject duplicate keys deterministically."""
|
|
return json.loads(text, object_pairs_hook=_strict_object_pairs_hook)
|
|
|
|
|
|
def canonicalize_value(value: Any) -> str:
|
|
"""
|
|
Canonicalize JSON value with deterministic ordering.
|
|
|
|
This is a strict deterministic serializer used for test invariants.
|
|
"""
|
|
return json.dumps(
|
|
value,
|
|
ensure_ascii=False,
|
|
separators=(",", ":"),
|
|
sort_keys=True,
|
|
)
|
|
|
|
|
|
def canonicalize_text(text: str) -> str:
|
|
"""Parse and canonicalize a JSON document."""
|
|
return canonicalize_value(parse_json_strict(text))
|
|
|
|
|
|
def sha256_hex(value: str) -> str:
|
|
"""Compute hex SHA-256 digest for canonical payload tracking."""
|
|
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CanonicalResult:
|
|
canonical_json: str
|
|
sha256: str
|
|
|
|
|
|
def canonical_result_from_text(text: str) -> CanonicalResult:
|
|
canonical = canonicalize_text(text)
|
|
return CanonicalResult(canonical_json=canonical, sha256=sha256_hex(canonical))
|