Add Canonical JSON serialization library with tests and documentation

- Implemented CanonJson class for deterministic JSON serialization and hashing.
- Added unit tests for CanonJson functionality, covering various scenarios including key sorting, handling of nested objects, arrays, and special characters.
- Created project files for the Canonical JSON library and its tests, including necessary package references.
- Added README.md for library usage and API reference.
- Introduced RabbitMqIntegrationFactAttribute for conditional RabbitMQ integration tests.
This commit is contained in:
master
2025-12-19 15:35:00 +02:00
parent 43882078a4
commit 951a38d561
192 changed files with 27550 additions and 2611 deletions

View File

@@ -0,0 +1,151 @@
using System.Security.Cryptography;
using System.Text.Json;
namespace StellaOps.Canonical.Json;
/// <summary>
/// Canonical JSON serialization with deterministic hashing.
/// Produces bit-identical output across environments for proof replay.
/// </summary>
/// <remarks>
/// Key guarantees:
/// <list type="bullet">
/// <item>Object keys are sorted alphabetically (Ordinal comparison)</item>
/// <item>No whitespace or formatting variations</item>
/// <item>Consistent number formatting</item>
/// <item>UTF-8 encoding without BOM</item>
/// </list>
/// </remarks>
public static class CanonJson
{
/// <summary>
/// Canonicalizes an object to a deterministic byte array.
/// Object keys are recursively sorted using Ordinal comparison.
/// </summary>
/// <typeparam name="T">The type to serialize.</typeparam>
/// <param name="obj">The object to canonicalize.</param>
/// <returns>UTF-8 encoded canonical JSON bytes.</returns>
public static byte[] Canonicalize<T>(T obj)
{
var json = JsonSerializer.SerializeToUtf8Bytes(obj, new JsonSerializerOptions
{
WriteIndented = false,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
using var doc = JsonDocument.Parse(json);
using var ms = new MemoryStream();
using var writer = new Utf8JsonWriter(ms, new JsonWriterOptions { Indented = false });
WriteElementSorted(doc.RootElement, writer);
writer.Flush();
return ms.ToArray();
}
/// <summary>
/// Canonicalizes an object using custom serializer options.
/// Object keys are recursively sorted using Ordinal comparison.
/// </summary>
/// <typeparam name="T">The type to serialize.</typeparam>
/// <param name="obj">The object to canonicalize.</param>
/// <param name="options">JSON serializer options to use for initial serialization.</param>
/// <returns>UTF-8 encoded canonical JSON bytes.</returns>
public static byte[] Canonicalize<T>(T obj, JsonSerializerOptions options)
{
var json = JsonSerializer.SerializeToUtf8Bytes(obj, options);
using var doc = JsonDocument.Parse(json);
using var ms = new MemoryStream();
using var writer = new Utf8JsonWriter(ms, new JsonWriterOptions { Indented = false });
WriteElementSorted(doc.RootElement, writer);
writer.Flush();
return ms.ToArray();
}
/// <summary>
/// Canonicalizes raw JSON bytes by parsing and re-sorting keys.
/// Use this when you have existing JSON that needs to be canonicalized.
/// </summary>
/// <param name="jsonBytes">UTF-8 encoded JSON bytes.</param>
/// <returns>UTF-8 encoded canonical JSON bytes.</returns>
public static byte[] CanonicalizeParsedJson(ReadOnlySpan<byte> jsonBytes)
{
using var doc = JsonDocument.Parse(jsonBytes.ToArray());
using var ms = new MemoryStream();
using var writer = new Utf8JsonWriter(ms, new JsonWriterOptions { Indented = false });
WriteElementSorted(doc.RootElement, writer);
writer.Flush();
return ms.ToArray();
}
private static void WriteElementSorted(JsonElement el, Utf8JsonWriter w)
{
switch (el.ValueKind)
{
case JsonValueKind.Object:
w.WriteStartObject();
foreach (var prop in el.EnumerateObject().OrderBy(p => p.Name, StringComparer.Ordinal))
{
w.WritePropertyName(prop.Name);
WriteElementSorted(prop.Value, w);
}
w.WriteEndObject();
break;
case JsonValueKind.Array:
w.WriteStartArray();
foreach (var item in el.EnumerateArray())
{
WriteElementSorted(item, w);
}
w.WriteEndArray();
break;
default:
el.WriteTo(w);
break;
}
}
/// <summary>
/// Computes SHA-256 hash of bytes, returns lowercase hex string.
/// </summary>
/// <param name="bytes">The bytes to hash.</param>
/// <returns>64-character lowercase hex string.</returns>
public static string Sha256Hex(ReadOnlySpan<byte> bytes)
=> Convert.ToHexString(SHA256.HashData(bytes)).ToLowerInvariant();
/// <summary>
/// Computes SHA-256 hash of bytes, returns prefixed hash string.
/// </summary>
/// <param name="bytes">The bytes to hash.</param>
/// <returns>Hash string with "sha256:" prefix.</returns>
public static string Sha256Prefixed(ReadOnlySpan<byte> bytes)
=> "sha256:" + Sha256Hex(bytes);
/// <summary>
/// Canonicalizes an object and computes its SHA-256 hash.
/// </summary>
/// <typeparam name="T">The type to serialize.</typeparam>
/// <param name="obj">The object to hash.</param>
/// <returns>64-character lowercase hex string.</returns>
public static string Hash<T>(T obj)
{
var canonical = Canonicalize(obj);
return Sha256Hex(canonical);
}
/// <summary>
/// Canonicalizes an object and computes its prefixed SHA-256 hash.
/// </summary>
/// <typeparam name="T">The type to serialize.</typeparam>
/// <param name="obj">The object to hash.</param>
/// <returns>Hash string with "sha256:" prefix.</returns>
public static string HashPrefixed<T>(T obj)
{
var canonical = Canonicalize(obj);
return Sha256Prefixed(canonical);
}
}

View File

@@ -0,0 +1,95 @@
# StellaOps.Canonical.Json
Canonical JSON serialization with deterministic hashing for StellaOps proofs.
## Overview
This library provides canonical JSON serialization that produces bit-identical output across different environments, enabling deterministic replay and cryptographic verification of score proofs.
## Key Features
- **Deterministic Output**: Object keys are recursively sorted using Ordinal comparison
- **No Whitespace**: Compact output with no formatting variations
- **Consistent Hashing**: SHA-256 hashes are always lowercase hex
- **Cross-Platform**: Same output across Windows, Linux, containers
## Usage
### Basic Canonicalization
```csharp
using StellaOps.Canonical.Json;
var obj = new { z = 3, a = 1, nested = new { b = 2, x = 1 } };
// Get canonical bytes
byte[] canonical = CanonJson.Canonicalize(obj);
// Result: {"a":1,"nested":{"b":2,"x":1},"z":3}
// Compute hash
string hash = CanonJson.Sha256Hex(canonical);
// Result: lowercase 64-char hex string
```
### One-Step Hash
```csharp
// Hash object directly
string hash = CanonJson.Hash(obj);
// With sha256: prefix
string prefixed = CanonJson.HashPrefixed(obj);
// Result: "sha256:a1b2c3..."
```
### Canonicalizing Existing JSON
```csharp
// Re-sort keys in existing JSON
byte[] rawJson = Encoding.UTF8.GetBytes(@"{""z"":1,""a"":2}");
byte[] canonical = CanonJson.CanonicalizeParsedJson(rawJson);
// Result: {"a":2,"z":1}
```
### Custom Serialization Options
```csharp
var options = new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower
};
byte[] canonical = CanonJson.Canonicalize(obj, options);
```
## API Reference
| Method | Description |
|--------|-------------|
| `Canonicalize<T>(obj)` | Serialize and canonicalize an object |
| `Canonicalize<T>(obj, options)` | Serialize with custom options and canonicalize |
| `CanonicalizeParsedJson(bytes)` | Canonicalize existing JSON bytes |
| `Sha256Hex(bytes)` | Compute SHA-256, return lowercase hex |
| `Sha256Prefixed(bytes)` | Compute SHA-256 with "sha256:" prefix |
| `Hash<T>(obj)` | Canonicalize and hash in one step |
| `HashPrefixed<T>(obj)` | Canonicalize and hash with prefix |
## Guarantees
1. **Key Ordering**: Object keys are always sorted alphabetically (Ordinal)
2. **No Environment Dependencies**: No timestamps, random values, or environment variables
3. **UTF-8 Without BOM**: Output is always UTF-8 encoded without byte order mark
4. **Array Order Preserved**: Arrays maintain element order (only object keys are sorted)
## Use Cases
- **Scan Manifests**: Hash all inputs affecting scan results
- **DSSE Payloads**: Sign canonical JSON for attestations
- **Proof Replay**: Verify scores are deterministic
- **Content Addressing**: Store proofs by their hash
## Related Components
- `StellaOps.Scanner.Core.Models.ScanManifest` - Uses CanonJson for manifest hashing
- `StellaOps.Attestor` - Signs canonical JSON payloads
- `StellaOps.Evidence.Bundle` - Content-addressed proof storage

View File

@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<Description>Canonical JSON serialization with deterministic hashing for StellaOps proofs.</Description>
</PropertyGroup>
</Project>