Add Canonical JSON serialization library with tests and documentation

- Implemented CanonJson class for deterministic JSON serialization and hashing. - Added unit tests for CanonJson functionality, covering various scenarios including key sorting, handling of nested objects, arrays, and special characters. - Created project files for the Canonical JSON library and its tests, including necessary package references. - Added README.md for library usage and API reference. - Introduced RabbitMqIntegrationFactAttribute for conditional RabbitMQ integration tests.
2025-12-19 15:35:00 +02:00
parent 43882078a4
commit 951a38d561
192 changed files with 27550 additions and 2611 deletions
--- a/bench/determinism/README.md
+++ b/bench/determinism/README.md
@@ -0,0 +1,129 @@
+# Determinism Benchmark Suite
+
+> **Purpose:** Verify that StellaOps produces bit-identical results across replays.
+> **Status:** Active
+> **Sprint:** SPRINT_3850_0001_0001 (Competitive Gap Closure)
+
+## Overview
+
+Determinism is a core differentiator for StellaOps:
+- Same inputs → same outputs (bit-identical)
+- Replay manifests enable audit verification
+- No hidden state or environment leakage
+
+## What Gets Tested
+
+### Canonical JSON
+- Object key ordering (alphabetical)
+- Number formatting consistency
+- UTF-8 encoding without BOM
+- No whitespace variation
+
+### Scan Manifests
+- Same artifact + same feeds → same manifest hash
+- Seed values propagate correctly
+- Timestamp handling (fixed UTC)
+
+### Proof Bundles
+- Root hash computation
+- DSSE envelope determinism
+- ProofLedger node ordering
+
+### Score Computation
+- Same manifest → same score
+- Lattice merge is associative/commutative
+- Policy rule ordering doesn't affect outcome
+
+## Test Cases
+
+### TC-001: Canonical JSON Determinism
+
+```bash
+# Run same object through CanonJson 100 times
+# All hashes must match
+```
+
+### TC-002: Manifest Hash Stability
+
+```bash
+# Create manifest with identical inputs
+# Verify ComputeHash() returns same value
+```
+
+### TC-003: Cross-Platform Determinism
+
+```bash
+# Run on Linux, Windows, macOS
+# Compare output hashes
+```
+
+### TC-004: Feed Snapshot Determinism
+
+```bash
+# Same feed snapshot hash → same scan results
+```
+
+## Fixtures
+
+```
+fixtures/
+├── sample-manifest.json
+├── sample-ledger.json
+├── expected-hashes.json
+└── cross-platform/
+    ├── linux-x64.hashes.json
+    ├── windows-x64.hashes.json
+    └── macos-arm64.hashes.json
+```
+
+## Running the Suite
+
+```bash
+# Run determinism tests
+dotnet test tests/StellaOps.Determinism.Tests
+
+# Run replay verification
+./run-replay.sh --manifest fixtures/sample-manifest.json --runs 10
+
+# Cross-platform verification (requires CI matrix)
+./verify-cross-platform.sh
+```
+
+## Metrics
+
+| Metric | Target | Description |
+|--------|--------|-------------|
+| Hash stability | 100% | All runs produce identical hash |
+| Replay success | 100% | All replays match original |
+| Cross-platform parity | 100% | Same hash across OS/arch |
+
+## Integration with CI
+
+```yaml
+# .gitea/workflows/bench-determinism.yaml
+name: Determinism Benchmark
+on:
+  push:
+    paths:
+      - 'src/__Libraries/StellaOps.Canonical.Json/**'
+      - 'src/Scanner/__Libraries/StellaOps.Scanner.Core/**'
+      - 'bench/determinism/**'
+
+jobs:
+  determinism:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Determinism Tests
+        run: dotnet test tests/StellaOps.Determinism.Tests
+      - name: Capture Hashes
+        run: ./bench/determinism/capture-hashes.sh
+      - name: Upload Hashes
+        uses: actions/upload-artifact@v4
+        with:
+          name: hashes-${{ matrix.os }}
+          path: bench/determinism/results/
+```
--- a/bench/determinism/run-replay.sh
+++ b/bench/determinism/run-replay.sh
@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+# run-replay.sh
+# Deterministic Replay Benchmark
+# Sprint: SPRINT_3850_0001_0001
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+RESULTS_DIR="$SCRIPT_DIR/results/$(date -u +%Y%m%d_%H%M%S)"
+
+# Parse arguments
+MANIFEST_FILE=""
+RUNS=5
+VERBOSE=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --manifest)
+            MANIFEST_FILE="$2"
+            shift 2
+            ;;
+        --runs)
+            RUNS="$2"
+            shift 2
+            ;;
+        --verbose|-v)
+            VERBOSE=true
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+echo "╔════════════════════════════════════════════════╗"
+echo "║       Deterministic Replay Benchmark           ║"
+echo "╚════════════════════════════════════════════════╝"
+echo ""
+echo "Configuration:"
+echo "  Manifest:    ${MANIFEST_FILE:-<default sample>}"
+echo "  Runs:        $RUNS"
+echo "  Results dir: $RESULTS_DIR"
+echo ""
+
+mkdir -p "$RESULTS_DIR"
+
+# Use sample manifest if none provided
+if [ -z "$MANIFEST_FILE" ] && [ -f "$SCRIPT_DIR/fixtures/sample-manifest.json" ]; then
+    MANIFEST_FILE="$SCRIPT_DIR/fixtures/sample-manifest.json"
+fi
+
+declare -a HASHES
+
+echo "Running $RUNS iterations..."
+echo ""
+
+for i in $(seq 1 $RUNS); do
+    echo -n "  Run $i: "
+    
+    OUTPUT_FILE="$RESULTS_DIR/run_$i.json"
+    
+    if command -v dotnet &> /dev/null; then
+        # Run the replay service
+        dotnet run --project "$SCRIPT_DIR/../../src/Scanner/StellaOps.Scanner.WebService" -- \
+            replay \
+            --manifest "$MANIFEST_FILE" \
+            --output "$OUTPUT_FILE" \
+            --format json 2>/dev/null || {
+                echo "⊘ Skipped (replay command not available)"
+                continue
+            }
+        
+        if [ -f "$OUTPUT_FILE" ]; then
+            HASH=$(sha256sum "$OUTPUT_FILE" | cut -d' ' -f1)
+            HASHES+=("$HASH")
+            echo "sha256:${HASH:0:16}..."
+        else
+            echo "⊘ No output generated"
+        fi
+    else
+        echo "⊘ Skipped (dotnet not available)"
+    fi
+done
+
+echo ""
+
+# Verify all hashes match
+if [ ${#HASHES[@]} -gt 1 ]; then
+    FIRST_HASH="${HASHES[0]}"
+    ALL_MATCH=true
+    
+    for hash in "${HASHES[@]}"; do
+        if [ "$hash" != "$FIRST_HASH" ]; then
+            ALL_MATCH=false
+            break
+        fi
+    done
+    
+    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+    echo "Results"
+    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+    
+    if $ALL_MATCH; then
+        echo "✓ PASS: All $RUNS runs produced identical output"
+        echo "  Hash: sha256:$FIRST_HASH"
+    else
+        echo "✗ FAIL: Outputs differ between runs"
+        echo ""
+        echo "Hashes:"
+        for i in "${!HASHES[@]}"; do
+            echo "  Run $((i+1)): ${HASHES[$i]}"
+        done
+    fi
+else
+    echo "ℹ️  Insufficient runs to verify determinism"
+fi
+
+# Create summary JSON
+cat > "$RESULTS_DIR/summary.json" <<EOF
+{
+  "benchmark": "determinism-replay",
+  "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
+  "manifest": "$MANIFEST_FILE",
+  "runs": $RUNS,
+  "hashes": [$(printf '"%s",' "${HASHES[@]}" | sed 's/,$//')],
+  "deterministic": ${ALL_MATCH:-null}
+}
+EOF
+
+echo ""
+echo "Results saved to: $RESULTS_DIR"