up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,754 @@
|
||||
Here’s a practical way to make a cross‑platform, hash‑stable JSON “fingerprint” for things like a `graph_revision_id`, so your hashes don’t change between OS/locale settings.
|
||||
|
||||
---
|
||||
|
||||
### What “canonical JSON” means (in plain terms)
|
||||
|
||||
* **Deterministic order:** Always write object properties in a fixed order (e.g., lexicographic).
|
||||
* **Stable numbers:** Serialize numbers the same way everywhere (no locale, no extra zeros).
|
||||
* **Normalized text:** Normalize all strings to Unicode **NFC** so accented/combined characters don’t vary.
|
||||
* **Consistent bytes:** Encode as **UTF‑8** with **LF** (`\n`) newlines only.
|
||||
|
||||
These ideas match the JSON Canonicalization Scheme (RFC 8785)—use it as your north star for stable hashing.
|
||||
|
||||
---
|
||||
|
||||
### Drop‑in C# helper (targets .NET 8/10)
|
||||
|
||||
This gives you a canonical UTF‑8 byte[] and a SHA‑256 hex hash. It:
|
||||
|
||||
* Recursively sorts object properties,
|
||||
* Emits numbers with invariant formatting,
|
||||
* Normalizes all string values to **NFC**,
|
||||
* Uses `\n` endings,
|
||||
* Produces a SHA‑256 for `graph_revision_id`.
|
||||
|
||||
```csharp
|
||||
using System;
|
||||
using System.Buffers.Text;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Nodes;
|
||||
using System.Text.Unicode;
|
||||
|
||||
public static class CanonJson
|
||||
{
|
||||
// Entry point: produce canonical UTF-8 bytes
|
||||
public static byte[] ToCanonicalUtf8(object? value)
|
||||
{
|
||||
// 1) Serialize once to JsonNode to work with types safely
|
||||
var initialJson = JsonSerializer.SerializeToNode(
|
||||
value,
|
||||
new JsonSerializerOptions
|
||||
{
|
||||
NumberHandling = JsonNumberHandling.AllowReadingFromString,
|
||||
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping // we will control escaping
|
||||
});
|
||||
|
||||
// 2) Canonicalize (sort keys, normalize strings, normalize numbers)
|
||||
var canonNode = CanonicalizeNode(initialJson);
|
||||
|
||||
// 3) Write in a deterministic manner
|
||||
var sb = new StringBuilder(4096);
|
||||
WriteCanonical(canonNode!, sb);
|
||||
|
||||
// 4) Ensure LF only
|
||||
var lf = sb.ToString().Replace("\r\n", "\n").Replace("\r", "\n");
|
||||
|
||||
// 5) UTF-8 bytes
|
||||
return Encoding.UTF8.GetBytes(lf);
|
||||
}
|
||||
|
||||
// Convenience: compute SHA-256 hex for graph_revision_id
|
||||
public static string ComputeGraphRevisionId(object? value)
|
||||
{
|
||||
var bytes = ToCanonicalUtf8(value);
|
||||
using var sha = SHA256.Create();
|
||||
var hash = sha.ComputeHash(bytes);
|
||||
var sb = new StringBuilder(hash.Length * 2);
|
||||
foreach (var b in hash) sb.Append(b.ToString("x2"));
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
// --- Internals ---
|
||||
|
||||
private static JsonNode? CanonicalizeNode(JsonNode? node)
|
||||
{
|
||||
if (node is null) return null;
|
||||
|
||||
switch (node)
|
||||
{
|
||||
case JsonValue v:
|
||||
if (v.TryGetValue<string>(out var s))
|
||||
{
|
||||
// Normalize strings to NFC
|
||||
var nfc = s.Normalize(NormalizationForm.FormC);
|
||||
return JsonValue.Create(nfc);
|
||||
}
|
||||
if (v.TryGetValue<double>(out var d))
|
||||
{
|
||||
// RFC-like minimal form: Invariant, no thousand sep; handle -0 => 0
|
||||
if (d == 0) d = 0; // squash -0
|
||||
return JsonValue.Create(d);
|
||||
}
|
||||
if (v.TryGetValue<long>(out var l))
|
||||
{
|
||||
return JsonValue.Create(l);
|
||||
}
|
||||
// Fallback keep as-is
|
||||
return v;
|
||||
|
||||
case JsonArray arr:
|
||||
var outArr = new JsonArray();
|
||||
foreach (var elem in arr)
|
||||
outArr.Add(CanonicalizeNode(elem));
|
||||
return outArr;
|
||||
|
||||
case JsonObject obj:
|
||||
// Sort keys lexicographically (RFC 8785 uses code unit order)
|
||||
var sorted = new JsonObject();
|
||||
foreach (var kvp in obj.OrderBy(k => k.Key, StringComparer.Ordinal))
|
||||
sorted[kvp.Key] = CanonicalizeNode(kvp.Value);
|
||||
return sorted;
|
||||
|
||||
default:
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
// Deterministic writer matching our canonical rules
|
||||
private static void WriteCanonical(JsonNode node, StringBuilder sb)
|
||||
{
|
||||
switch (node)
|
||||
{
|
||||
case JsonObject obj:
|
||||
sb.Append('{');
|
||||
bool first = true;
|
||||
foreach (var kvp in obj)
|
||||
{
|
||||
if (!first) sb.Append(',');
|
||||
first = false;
|
||||
WriteString(kvp.Key, sb); // property name
|
||||
sb.Append(':');
|
||||
WriteCanonical(kvp.Value!, sb);
|
||||
}
|
||||
sb.Append('}');
|
||||
break;
|
||||
|
||||
case JsonArray arr:
|
||||
sb.Append('[');
|
||||
for (int i = 0; i < arr.Count; i++)
|
||||
{
|
||||
if (i > 0) sb.Append(',');
|
||||
WriteCanonical(arr[i]!, sb);
|
||||
}
|
||||
sb.Append(']');
|
||||
break;
|
||||
|
||||
case JsonValue val:
|
||||
if (val.TryGetValue<string>(out var s))
|
||||
{
|
||||
WriteString(s, sb);
|
||||
}
|
||||
else if (val.TryGetValue<long>(out var l))
|
||||
{
|
||||
sb.Append(l.ToString(CultureInfo.InvariantCulture));
|
||||
}
|
||||
else if (val.TryGetValue<double>(out var d))
|
||||
{
|
||||
// Minimal form close to RFC 8785 guidance:
|
||||
// - No NaN/Infinity in JSON
|
||||
// - Invariant culture, trim trailing zeros and dot
|
||||
if (double.IsNaN(d) || double.IsInfinity(d))
|
||||
throw new InvalidOperationException("Non-finite numbers are not valid in canonical JSON.");
|
||||
if (d == 0) d = 0; // squash -0
|
||||
var sNum = d.ToString("G17", CultureInfo.InvariantCulture);
|
||||
// Trim redundant zeros in exponentless decimals
|
||||
if (sNum.Contains('.') && !sNum.Contains("e") && !sNum.Contains("E"))
|
||||
{
|
||||
sNum = sNum.TrimEnd('0').TrimEnd('.');
|
||||
}
|
||||
sb.Append(sNum);
|
||||
}
|
||||
else
|
||||
{
|
||||
// bool / null
|
||||
if (val.TryGetValue<bool>(out var b))
|
||||
sb.Append(b ? "true" : "false");
|
||||
else
|
||||
sb.Append("null");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
sb.Append("null");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteString(string s, StringBuilder sb)
|
||||
{
|
||||
sb.Append('"');
|
||||
foreach (var ch in s)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
case '\"': sb.Append("\\\""); break;
|
||||
case '\\': sb.Append("\\\\"); break;
|
||||
case '\b': sb.Append("\\b"); break;
|
||||
case '\f': sb.Append("\\f"); break;
|
||||
case '\n': sb.Append("\\n"); break;
|
||||
case '\r': sb.Append("\\r"); break;
|
||||
case '\t': sb.Append("\\t"); break;
|
||||
default:
|
||||
if (char.IsControl(ch))
|
||||
{
|
||||
sb.Append("\\u");
|
||||
sb.Append(((int)ch).ToString("x4"));
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(ch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
sb.Append('"');
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Usage in your code (e.g., Stella Ops):**
|
||||
|
||||
```csharp
|
||||
var payload = new {
|
||||
graphId = "core-vuln-edges",
|
||||
version = 3,
|
||||
edges = new[]{ new { from = "pkg:nuget/Newtonsoft.Json@13.0.3", to = "pkg:nuget/System.Text.Json@8.0.4" } },
|
||||
meta = new { generatedAt = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ssZ") }
|
||||
};
|
||||
|
||||
// Canonical bytes (UTF-8 + LF) for storage/attestation:
|
||||
var canon = CanonJson.ToCanonicalUtf8(payload);
|
||||
|
||||
// Stable revision id (SHA-256 hex):
|
||||
var graphRevisionId = CanonJson.ComputeGraphRevisionId(payload);
|
||||
Console.WriteLine(graphRevisionId);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Operational tips
|
||||
|
||||
* **Freeze locales:** Always run with `CultureInfo.InvariantCulture` when formatting numbers/dates before they hit JSON.
|
||||
* **Reject non‑finite numbers:** Don’t allow `NaN`/`Infinity`—they’re not valid JSON and will break canonicalization.
|
||||
* **One writer, everywhere:** Use this same helper in CI, build agents, and runtime so the hash never drifts.
|
||||
* **Record the scheme:** Store the **canonicalization version** (e.g., `canon_v="JCS‑like v1"`) alongside the hash to allow future upgrades without breaking verification.
|
||||
|
||||
If you want, I can adapt this to stream very large JSONs (avoid `JsonNode`) or emit a **DSSE**/in‑toto style envelope with the canonical bytes as the payload for your attestation chain.
|
||||
Here’s a concrete, step‑by‑step implementation plan you can hand to the devs so they know exactly what to build and how it all fits together.
|
||||
|
||||
I’ll break it into phases:
|
||||
|
||||
1. **Design & scope**
|
||||
2. **Canonical JSON library**
|
||||
3. **Graph canonicalization & `graph_revision_id` calculation**
|
||||
4. **Tooling, tests & cross‑platform verification**
|
||||
5. **Integration & rollout**
|
||||
|
||||
---
|
||||
|
||||
## 1. Design & scope
|
||||
|
||||
### 1.1. Goals
|
||||
|
||||
* Produce a **stable, cross‑platform hash** (e.g. SHA‑256) from JSON content.
|
||||
* This hash becomes your **`graph_revision_id`** for supply‑chain graphs.
|
||||
* Hash **must not change** due to:
|
||||
|
||||
* OS differences (Windows/Linux/macOS)
|
||||
* Locale differences
|
||||
* Whitespace/property order differences
|
||||
* Unicode normalization issues (e.g. accented chars)
|
||||
|
||||
### 1.2. Canonicalization strategy (what devs should implement)
|
||||
|
||||
You’ll use **two levels of canonicalization**:
|
||||
|
||||
1. **Domain-level canonicalization (graph)**
|
||||
Make sure semantically equivalent graphs always serialize to the same in‑memory structure:
|
||||
|
||||
* Sort arrays (e.g. nodes, edges) in a deterministic way (ID, then type, etc.).
|
||||
* Remove / ignore non-semantic or unstable fields (timestamps, debug info, transient IDs).
|
||||
2. **Encoding-level canonicalization (JSON)**
|
||||
Convert that normalized object into **canonical JSON**:
|
||||
|
||||
* Object keys sorted lexicographically (`StringComparer.Ordinal`).
|
||||
* Strings normalized to **Unicode NFC**.
|
||||
* Numbers formatted with **InvariantCulture**, no locale effects.
|
||||
* No NaN/Infinity (reject or map them before hashing).
|
||||
* UTF‑8 output with **LF (`\n`) only**.
|
||||
|
||||
You already have a C# canonical JSON helper from me; this plan is about turning it into a production-ready component and wiring it through the system.
|
||||
|
||||
---
|
||||
|
||||
## 2. Canonical JSON library
|
||||
|
||||
**Owner:** backend platform team
|
||||
**Deliverable:** `StellaOps.CanonicalJson` (or similar) shared library
|
||||
|
||||
### 2.1. Project setup
|
||||
|
||||
* Create a **.NET class library**:
|
||||
|
||||
* `src/StellaOps.CanonicalJson/StellaOps.CanonicalJson.csproj`
|
||||
* Target same framework as your services (e.g. `net8.0`).
|
||||
* Add reference to `System.Text.Json`.
|
||||
|
||||
### 2.2. Public API design
|
||||
|
||||
In `CanonicalJson.cs` (or `CanonJson.cs`):
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.CanonicalJson;
|
||||
|
||||
public static class CanonJson
|
||||
{
|
||||
// Version of your canonicalization algorithm (important for future changes)
|
||||
public const string CanonicalizationVersion = "canon-json-v1";
|
||||
|
||||
public static byte[] ToCanonicalUtf8<T>(T value);
|
||||
|
||||
public static string ToCanonicalString<T>(T value);
|
||||
|
||||
public static byte[] ComputeSha256<T>(T value);
|
||||
|
||||
public static string ComputeSha256Hex<T>(T value);
|
||||
}
|
||||
```
|
||||
|
||||
**Behavioral requirements:**
|
||||
|
||||
* `ToCanonicalUtf8`:
|
||||
|
||||
* Serializes input to a `JsonNode`.
|
||||
* Applies canonicalization rules (sort keys, normalize strings, normalize numbers).
|
||||
* Writes minimal JSON with:
|
||||
|
||||
* No extra spaces.
|
||||
* Keys in lexicographic order.
|
||||
* UTF‑8 bytes and LF newlines only.
|
||||
* `ComputeSha256Hex`:
|
||||
|
||||
* Uses `ToCanonicalUtf8` and computes SHA‑256.
|
||||
* Returns lower‑case hex string.
|
||||
|
||||
### 2.3. Canonicalization rules (dev checklist)
|
||||
|
||||
**Objects (`JsonObject`):**
|
||||
|
||||
* Sort keys using `StringComparer.Ordinal`.
|
||||
* Recursively canonicalize child nodes.
|
||||
|
||||
**Arrays (`JsonArray`):**
|
||||
|
||||
* Preserve order as given by caller.
|
||||
*(The “graph canonicalization” step will make sure this order is semantically stable before JSON.)*
|
||||
|
||||
**Strings:**
|
||||
|
||||
* Normalize to **NFC**:
|
||||
|
||||
```csharp
|
||||
var normalized = original.Normalize(NormalizationForm.FormC);
|
||||
```
|
||||
* When writing JSON:
|
||||
|
||||
* Escape `"`, `\`, control characters (`< 0x20`) using `\uXXXX` format.
|
||||
* Use `\n`, `\r`, `\t`, `\b`, `\f` for standard escapes.
|
||||
|
||||
**Numbers:**
|
||||
|
||||
* Support at least `long`, `double`, `decimal`.
|
||||
* Use **InvariantCulture**:
|
||||
|
||||
```csharp
|
||||
someNumber.ToString("G17", CultureInfo.InvariantCulture);
|
||||
```
|
||||
* Normalize `-0` to `0`.
|
||||
* No grouping separators, no locale decimals.
|
||||
* Reject `NaN`, `+Infinity`, `-Infinity` with a clear exception.
|
||||
|
||||
**Booleans & null:**
|
||||
|
||||
* Emit `true`, `false`, `null` (lowercase).
|
||||
|
||||
**Newlines:**
|
||||
|
||||
* Ensure final string has only `\n`:
|
||||
|
||||
```csharp
|
||||
json = json.Replace("\r\n", "\n").Replace("\r", "\n");
|
||||
```
|
||||
|
||||
### 2.4. Error handling & logging
|
||||
|
||||
* Throw a **custom exception** for unsupported content:
|
||||
|
||||
* `CanonicalJsonException : Exception`.
|
||||
* Example triggers:
|
||||
|
||||
* Non‑finite numbers (NaN/Infinity).
|
||||
* Types that can’t be represented in JSON.
|
||||
* Log the path to the field where canonicalization failed (for debugging).
|
||||
|
||||
---
|
||||
|
||||
## 3. Graph canonicalization & `graph_revision_id`
|
||||
|
||||
This is where the library gets used and where the semantics of the graph are defined.
|
||||
|
||||
**Owner:** team that owns your supply‑chain graph model / graph ingestion.
|
||||
**Deliverables:**
|
||||
|
||||
* Domain-specific canonicalization for graphs.
|
||||
* Stable `graph_revision_id` computation integrated into services.
|
||||
|
||||
### 3.1. Define what goes into the hash
|
||||
|
||||
Create a short **spec document** (internal) that answers:
|
||||
|
||||
1. **What object is being hashed?**
|
||||
|
||||
* For example:
|
||||
|
||||
```json
|
||||
{
|
||||
"graphId": "core-vuln-edges",
|
||||
"schemaVersion": "3",
|
||||
"nodes": [...],
|
||||
"edges": [...],
|
||||
"metadata": {
|
||||
"source": "scanner-x",
|
||||
"epoch": 1732730885
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **Which fields are included vs excluded?**
|
||||
|
||||
* Include:
|
||||
|
||||
* Graph identity (ID, schema version).
|
||||
* Nodes (with stable key set).
|
||||
* Edges (with stable key set).
|
||||
* Exclude or **normalize**:
|
||||
|
||||
* Raw timestamps of ingestion.
|
||||
* Non-deterministic IDs (if they’re not part of graph semantics).
|
||||
* Any environment‑specific details.
|
||||
|
||||
3. **Versioning:**
|
||||
|
||||
* Add:
|
||||
|
||||
* `canonicalizationVersion` (from `CanonJson.CanonicalizationVersion`).
|
||||
* `graphHashSchemaVersion` (separate from graph schema version).
|
||||
|
||||
Example JSON passed into `CanonJson`:
|
||||
|
||||
```json
|
||||
{
|
||||
"graphId": "...",
|
||||
"graphSchemaVersion": "3",
|
||||
"graphHashSchemaVersion": "1",
|
||||
"canonicalizationVersion": "canon-json-v1",
|
||||
"nodes": [...],
|
||||
"edges": [...]
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2. Domain-level canonicalizer
|
||||
|
||||
Create a class like `GraphCanonicalizer` in your graph domain assembly:
|
||||
|
||||
```csharp
|
||||
public interface IGraphCanonicalizer<TGraph>
|
||||
{
|
||||
object ToCanonicalGraphObject(TGraph graph);
|
||||
}
|
||||
```
|
||||
|
||||
Implementation tasks:
|
||||
|
||||
1. **Choose a deterministic ordering for arrays:**
|
||||
|
||||
* Nodes: sort by `(nodeType, nodeId)` or `(packageUrl, version)`.
|
||||
* Edges: sort by `(from, to, edgeType)`.
|
||||
|
||||
2. **Strip / transform unstable fields:**
|
||||
|
||||
* Example: external IDs that may change but are not semantically relevant.
|
||||
* Replace `DateTime` with a normalized string format (if it must be part of the semantics).
|
||||
|
||||
3. **Output DTOs with primitive types only:**
|
||||
|
||||
* Create DTOs like:
|
||||
|
||||
```csharp
|
||||
public sealed record CanonicalNode(
|
||||
string Id,
|
||||
string Type,
|
||||
string Name,
|
||||
string? Version,
|
||||
IReadOnlyDictionary<string, string>? Attributes
|
||||
);
|
||||
```
|
||||
|
||||
* Use simple `record` types / POCOs that serialize cleanly with `System.Text.Json`.
|
||||
|
||||
4. **Combine into a single canonical graph object:**
|
||||
|
||||
```csharp
|
||||
public sealed record CanonicalGraphDto(
|
||||
string GraphId,
|
||||
string GraphSchemaVersion,
|
||||
string GraphHashSchemaVersion,
|
||||
string CanonicalizationVersion,
|
||||
IReadOnlyList<CanonicalNode> Nodes,
|
||||
IReadOnlyList<CanonicalEdge> Edges
|
||||
);
|
||||
```
|
||||
|
||||
`ToCanonicalGraphObject` returns `CanonicalGraphDto`.
|
||||
|
||||
### 3.3. `graph_revision_id` calculator
|
||||
|
||||
Add a service:
|
||||
|
||||
```csharp
|
||||
public interface IGraphRevisionCalculator<TGraph>
|
||||
{
|
||||
string CalculateRevisionId(TGraph graph);
|
||||
}
|
||||
|
||||
public sealed class GraphRevisionCalculator<TGraph> : IGraphRevisionCalculator<TGraph>
|
||||
{
|
||||
private readonly IGraphCanonicalizer<TGraph> _canonicalizer;
|
||||
|
||||
public GraphRevisionCalculator(IGraphCanonicalizer<TGraph> canonicalizer)
|
||||
{
|
||||
_canonicalizer = canonicalizer;
|
||||
}
|
||||
|
||||
public string CalculateRevisionId(TGraph graph)
|
||||
{
|
||||
var canonical = _canonicalizer.ToCanonicalGraphObject(graph);
|
||||
return CanonJson.ComputeSha256Hex(canonical);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Wire this up in DI** for all services that handle graph creation/update.
|
||||
|
||||
### 3.4. Persistence & APIs
|
||||
|
||||
1. **Database schema:**
|
||||
|
||||
* Add a `graph_revision_id` column (string, length 64) to graph tables/collections.
|
||||
* Optionally add `graph_hash_schema_version` and `canonicalization_version` columns for debugging.
|
||||
|
||||
2. **Write path:**
|
||||
|
||||
* On graph creation/update:
|
||||
|
||||
* Build the domain model.
|
||||
* Use `GraphRevisionCalculator` to get `graph_revision_id`.
|
||||
* Store it alongside the graph.
|
||||
|
||||
3. **Read path & APIs:**
|
||||
|
||||
* Ensure all relevant APIs return `graph_revision_id` for clients.
|
||||
* If you use it in attestation / DSSE payloads, include it there too.
|
||||
|
||||
---
|
||||
|
||||
## 4. Tooling, tests & cross‑platform verification
|
||||
|
||||
This is where you make sure it **actually behaves identically** on all platforms and input variations.
|
||||
|
||||
### 4.1. Unit tests for `CanonJson`
|
||||
|
||||
Create a dedicated test project: `tests/StellaOps.CanonicalJson.Tests`.
|
||||
|
||||
**Test categories & examples:**
|
||||
|
||||
1. **Property ordering:**
|
||||
|
||||
* Input 1: `{"b":1,"a":2}`
|
||||
* Input 2: `{"a":2,"b":1}`
|
||||
* Assert: `ToCanonicalString` is identical + same hash.
|
||||
|
||||
2. **Whitespace variations:**
|
||||
|
||||
* Input with lots of spaces/newlines vs compact.
|
||||
* Canonical outputs must match.
|
||||
|
||||
3. **Unicode normalization:**
|
||||
|
||||
* One string using precomposed characters.
|
||||
* Same text using combining characters.
|
||||
* Canonical output must match (NFC).
|
||||
|
||||
4. **Number formatting:**
|
||||
|
||||
* `1`, `1.0`, `1.0000000000` → must canonicalize to the same representation.
|
||||
* `-0.0` → canonicalizes to `0`.
|
||||
|
||||
5. **Booleans & null:**
|
||||
|
||||
* Check exact lowercase output: `true`, `false`, `null`.
|
||||
|
||||
6. **Error behaviors:**
|
||||
|
||||
* Try serializing `double.NaN` → expect `CanonicalJsonException`.
|
||||
|
||||
### 4.2. Integration tests for graph hashing
|
||||
|
||||
Create tests in graph service test project:
|
||||
|
||||
1. Build two graphs that are **semantically identical** but:
|
||||
|
||||
* Nodes/edges inserted in different order.
|
||||
* Fields ordered differently.
|
||||
* Different whitespace in strings (if your app might introduce such).
|
||||
|
||||
2. Assert:
|
||||
|
||||
* `CalculateRevisionId` yields the same result.
|
||||
* Canonical DTOs match expected snapshots (optional snapshot tests).
|
||||
|
||||
3. Build graphs that differ in a meaningful way (e.g., extra edge).
|
||||
|
||||
* Assert that `graph_revision_id` is different.
|
||||
|
||||
### 4.3. Cross‑platform smoke tests
|
||||
|
||||
**Goal:** Prove same hash on Windows, Linux and macOS.
|
||||
|
||||
Implementation idea:
|
||||
|
||||
1. Add a small console tool: `StellaOps.CanonicalJson.Tool`:
|
||||
|
||||
* Usage:
|
||||
`stella-canon hash graph.json`
|
||||
* Prints:
|
||||
|
||||
* Canonical JSON (optional flag).
|
||||
* SHA‑256 hex.
|
||||
|
||||
2. In CI:
|
||||
|
||||
* Run the same test JSON on:
|
||||
|
||||
* Windows runner.
|
||||
* Linux runner.
|
||||
* Assert hashes are equal (store expected in a test harness or artifact).
|
||||
|
||||
---
|
||||
|
||||
## 5. Integration into your pipelines & rollout
|
||||
|
||||
### 5.1. Where to compute `graph_revision_id`
|
||||
|
||||
Decide (and document) **one place** where the ID is authoritative, for example:
|
||||
|
||||
* After ingestion + normalization step, **before** persisting to your graph store.
|
||||
* Or in a dedicated “graph revision service” used by ingestion pipelines.
|
||||
|
||||
Implementation:
|
||||
|
||||
* Update the ingestion service:
|
||||
|
||||
1. Parse incoming data into internal graph model.
|
||||
2. Apply domain canonicalizer → `CanonicalGraphDto`.
|
||||
3. Use `GraphRevisionCalculator` → `graph_revision_id`.
|
||||
4. Persist graph + revision ID.
|
||||
|
||||
### 5.2. Migration / backfill plan
|
||||
|
||||
If you already have graphs in production:
|
||||
|
||||
1. Add new columns/fields for `graph_revision_id` (nullable).
|
||||
2. Write a migration job:
|
||||
|
||||
* Fetch existing graph.
|
||||
* Canonicalize + hash.
|
||||
* Store `graph_revision_id`.
|
||||
3. For a transition period:
|
||||
|
||||
* Accept both “old” and “new” graphs.
|
||||
* Use `graph_revision_id` where available; fall back to legacy IDs when necessary.
|
||||
4. After backfill is complete:
|
||||
|
||||
* Make `graph_revision_id` mandatory for new graphs.
|
||||
* Phase out any legacy revision logic.
|
||||
|
||||
### 5.3. Feature flag & safety
|
||||
|
||||
* Gate the use of `graph_revision_id` in high‑risk flows (e.g., attestations, policy decisions) behind a **feature flag**:
|
||||
|
||||
* `graphRevisionIdEnabled`.
|
||||
* Roll out gradually:
|
||||
|
||||
* Start in staging.
|
||||
* Then a subset of production tenants.
|
||||
* Monitor for:
|
||||
|
||||
* Unexpected changes in revision IDs on unchanged graphs.
|
||||
* Errors from `CanonicalJsonException`.
|
||||
|
||||
---
|
||||
|
||||
## 6. Documentation for developers & operators
|
||||
|
||||
Have a short internal doc (or page) with:
|
||||
|
||||
1. **Canonical JSON spec summary:**
|
||||
|
||||
* Sorting rules.
|
||||
* Unicode NFC requirement.
|
||||
* Number format rules.
|
||||
* Non‑finite numbers not allowed.
|
||||
|
||||
2. **Graph hashing spec:**
|
||||
|
||||
* Fields included in the hash.
|
||||
* Fields explicitly ignored.
|
||||
* Array ordering rules for nodes/edges.
|
||||
* Current:
|
||||
|
||||
* `graphHashSchemaVersion = "1"`
|
||||
* `CanonicalizationVersion = "canon-json-v1"`
|
||||
|
||||
3. **Examples:**
|
||||
|
||||
* Sample graph JSON input.
|
||||
* Canonical JSON output.
|
||||
* Expected SHA‑256.
|
||||
|
||||
4. **Operational guidance:**
|
||||
|
||||
* How to run the CLI tool to debug:
|
||||
|
||||
* “Why did this graph get a new `graph_revision_id`?”
|
||||
* What to do on canonicalization errors (usually indicates bad data).
|
||||
|
||||
---
|
||||
|
||||
If you’d like, next step I can do is: draft the **actual C# projects and folder structure** (with file names + stub code) so your team can just copy/paste the skeleton into the repo and start filling in the domain-specific bits.
|
||||
Reference in New Issue
Block a user