feat: Add VEX compact fixture and implement offline verifier for Findings Ledger exports
- Introduced a new VEX compact fixture for testing purposes. - Implemented `verify_export.py` script to validate Findings Ledger exports, ensuring deterministic ordering and applying redaction manifests. - Added a lightweight stub `HarnessRunner` for unit tests to validate ledger hashing expectations. - Documented tasks related to the Mirror Creator. - Created models for entropy signals and implemented the `EntropyPenaltyCalculator` to compute penalties based on scanner outputs. - Developed unit tests for `EntropyPenaltyCalculator` to ensure correct penalty calculations and handling of edge cases. - Added tests for symbol ID normalization in the reachability scanner. - Enhanced console status service with comprehensive unit tests for connection handling and error recovery. - Included Cosign tool version 2.6.0 with checksums for various platforms.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
using System.CommandLine;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Nodes;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
@@ -51,6 +53,10 @@ var metricsOption = new Option<FileInfo?>(
|
||||
name: "--metrics",
|
||||
description: "Optional path to write metrics snapshot JSON");
|
||||
|
||||
var expectedChecksumOption = new Option<FileInfo?>(
|
||||
name: "--expected-checksum",
|
||||
description: "Optional JSON file containing expected eventStream/projection checksums");
|
||||
|
||||
var root = new RootCommand("Findings Ledger Replay Harness (LEDGER-29-008)");
|
||||
root.AddOption(fixturesOption);
|
||||
root.AddOption(connectionOption);
|
||||
@@ -58,8 +64,9 @@ root.AddOption(tenantOption);
|
||||
root.AddOption(maxParallelOption);
|
||||
root.AddOption(reportOption);
|
||||
root.AddOption(metricsOption);
|
||||
root.AddOption(expectedChecksumOption);
|
||||
|
||||
root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, int maxParallel, FileInfo? reportFile, FileInfo? metricsFile) =>
|
||||
root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, int maxParallel, FileInfo? reportFile, FileInfo? metricsFile, FileInfo? expectedChecksumsFile) =>
|
||||
{
|
||||
await using var host = BuildHost(connection);
|
||||
using var scope = host.Services.CreateScope();
|
||||
@@ -103,7 +110,7 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
|
||||
|
||||
meterListener.RecordObservableInstruments();
|
||||
|
||||
var verification = await VerifyLedgerAsync(scope.ServiceProvider, tenant, eventsWritten, cts.Token).ConfigureAwait(false);
|
||||
var verification = await VerifyLedgerAsync(scope.ServiceProvider, tenant, eventsWritten, expectedChecksumsFile, cts.Token).ConfigureAwait(false);
|
||||
|
||||
var writeDurations = metrics.HistDouble("ledger_write_duration_seconds").Concat(metrics.HistDouble("ledger_write_latency_seconds"));
|
||||
var writeLatencyP95Ms = Percentile(writeDurations, 95) * 1000;
|
||||
@@ -123,6 +130,8 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
|
||||
ProjectionLagSecondsMax: projectionLagSeconds,
|
||||
BacklogEventsMax: backlogEvents,
|
||||
DbConnectionsObserved: dbConnections,
|
||||
EventStreamChecksum: verification.EventStreamChecksum,
|
||||
ProjectionChecksum: verification.ProjectionChecksum,
|
||||
VerificationErrors: verification.Errors.ToArray());
|
||||
|
||||
var jsonOptions = new JsonSerializerOptions { WriteIndented = true };
|
||||
@@ -132,7 +141,8 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
|
||||
if (reportFile is not null)
|
||||
{
|
||||
await File.WriteAllTextAsync(reportFile.FullName, json, cts.Token).ConfigureAwait(false);
|
||||
await WriteDssePlaceholderAsync(reportFile.FullName, json, cts.Token).ConfigureAwait(false);
|
||||
var policyHash = Environment.GetEnvironmentVariable("LEDGER_POLICY_HASH");
|
||||
await WriteDssePlaceholderAsync(reportFile.FullName, json, policyHash, cts.Token).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (metricsFile is not null)
|
||||
@@ -148,7 +158,7 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
|
||||
|
||||
await root.InvokeAsync(args);
|
||||
|
||||
static async Task WriteDssePlaceholderAsync(string reportPath, string json, CancellationToken cancellationToken)
|
||||
static async Task WriteDssePlaceholderAsync(string reportPath, string json, string? policyHash, CancellationToken cancellationToken)
|
||||
{
|
||||
using var sha = System.Security.Cryptography.SHA256.Create();
|
||||
var digest = sha.ComputeHash(System.Text.Encoding.UTF8.GetBytes(json));
|
||||
@@ -156,6 +166,8 @@ static async Task WriteDssePlaceholderAsync(string reportPath, string json, Canc
|
||||
{
|
||||
payloadType = "application/vnd.stella-ledger-harness+json",
|
||||
sha256 = Convert.ToHexString(digest).ToLowerInvariant(),
|
||||
policyHash = policyHash ?? string.Empty,
|
||||
schemaVersion = "ledger.harness.v1",
|
||||
signedBy = "harness-local",
|
||||
createdAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
@@ -210,6 +222,8 @@ static IHost BuildHost(string connectionString)
|
||||
opts.Database.ConnectionString = connectionString;
|
||||
});
|
||||
|
||||
LedgerMetrics.ConfigureQuotas(20_000);
|
||||
|
||||
services.AddSingleton<TimeProvider>(_ => TimeProvider.System);
|
||||
services.AddSingleton<LedgerDataSource>();
|
||||
services.AddSingleton<ILedgerEventRepository, PostgresLedgerEventRepository>();
|
||||
@@ -302,13 +316,17 @@ static LedgerEventDraft ToDraft(JsonObject node, string defaultTenant, DateTimeO
|
||||
prev);
|
||||
}
|
||||
|
||||
static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider services, string tenant, long expectedEvents, CancellationToken cancellationToken)
|
||||
static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider services, string tenant, long expectedEvents, FileInfo? expectedChecksumsFile, CancellationToken cancellationToken)
|
||||
{
|
||||
var errors = new List<string>();
|
||||
var dataSource = services.GetRequiredService<LedgerDataSource>();
|
||||
var expectedChecksums = LoadExpectedChecksums(expectedChecksumsFile);
|
||||
|
||||
await using var connection = await dataSource.OpenConnectionAsync(tenant, "verify", cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var eventHasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
|
||||
var projectionHasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
|
||||
|
||||
// Count check
|
||||
await using (var countCommand = new Npgsql.NpgsqlCommand("select count(*) from ledger_events where tenant_id = @tenant", connection))
|
||||
{
|
||||
@@ -346,6 +364,7 @@ static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider service
|
||||
var eventHash = reader.GetString(4);
|
||||
var previousHash = reader.GetString(5);
|
||||
var merkleLeafHash = reader.GetString(6);
|
||||
eventHasher.AppendData(Encoding.UTF8.GetBytes($"{eventHash}:{sequence}\n"));
|
||||
|
||||
if (currentChain != chainId)
|
||||
{
|
||||
@@ -382,17 +401,47 @@ static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider service
|
||||
expectedSequence++;
|
||||
}
|
||||
|
||||
if (errors.Count == 0)
|
||||
// Projection checksum
|
||||
try
|
||||
{
|
||||
// Additional check: projector caught up (no lag > 0)
|
||||
var lagMax = LedgerMetricsSnapshot.LagMax;
|
||||
if (lagMax > 0)
|
||||
await using var projectionCommand = new Npgsql.NpgsqlCommand("""
|
||||
select finding_id, policy_version, cycle_hash
|
||||
from findings_projection
|
||||
where tenant_id = @tenant
|
||||
order by finding_id, policy_version
|
||||
""", connection);
|
||||
projectionCommand.Parameters.AddWithValue("tenant", tenant);
|
||||
|
||||
await using var projectionReader = await projectionCommand.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
while (await projectionReader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
errors.Add($"projection_lag_remaining:{lagMax}");
|
||||
var findingId = projectionReader.GetString(0);
|
||||
var policyVersion = projectionReader.GetString(1);
|
||||
var cycleHash = projectionReader.GetString(2);
|
||||
projectionHasher.AppendData(Encoding.UTF8.GetBytes($"{findingId}:{policyVersion}:{cycleHash}\n"));
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
errors.Add($"projection_checksum_error:{ex.GetType().Name}");
|
||||
}
|
||||
|
||||
return new VerificationResult(errors.Count == 0, errors);
|
||||
var eventStreamChecksum = Convert.ToHexString(eventHasher.GetHashAndReset()).ToLowerInvariant();
|
||||
var projectionChecksum = Convert.ToHexString(projectionHasher.GetHashAndReset()).ToLowerInvariant();
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(expectedChecksums.EventStream) &&
|
||||
!eventStreamChecksum.Equals(expectedChecksums.EventStream, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
errors.Add($"event_checksum_mismatch:{eventStreamChecksum}");
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(expectedChecksums.Projection) &&
|
||||
!projectionChecksum.Equals(expectedChecksums.Projection, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
errors.Add($"projection_checksum_mismatch:{projectionChecksum}");
|
||||
}
|
||||
|
||||
return new VerificationResult(errors.Count == 0, errors, eventStreamChecksum, projectionChecksum);
|
||||
}
|
||||
|
||||
static double Percentile(IEnumerable<double> values, double percentile)
|
||||
@@ -426,9 +475,16 @@ internal sealed record HarnessReport(
|
||||
double ProjectionLagSecondsMax,
|
||||
double BacklogEventsMax,
|
||||
long DbConnectionsObserved,
|
||||
string EventStreamChecksum,
|
||||
string ProjectionChecksum,
|
||||
IReadOnlyList<string> VerificationErrors);
|
||||
|
||||
internal sealed record VerificationResult(bool Success, IReadOnlyList<string> Errors);
|
||||
internal sealed record VerificationResult(bool Success, IReadOnlyList<string> Errors, string EventStreamChecksum, string ProjectionChecksum);
|
||||
|
||||
internal sealed record ExpectedChecksums(string? EventStream, string? Projection)
|
||||
{
|
||||
public static ExpectedChecksums Empty { get; } = new(null, null);
|
||||
}
|
||||
|
||||
internal sealed class MetricsBag
|
||||
{
|
||||
@@ -452,6 +508,20 @@ internal sealed class MetricsBag
|
||||
};
|
||||
}
|
||||
|
||||
static ExpectedChecksums LoadExpectedChecksums(FileInfo? file)
|
||||
{
|
||||
if (file is null)
|
||||
{
|
||||
return ExpectedChecksums.Empty;
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.Parse(File.ReadAllText(file.FullName));
|
||||
var root = doc.RootElement;
|
||||
var eventStream = root.TryGetProperty("eventStream", out var ev) ? ev.GetString() : null;
|
||||
var projection = root.TryGetProperty("projection", out var pr) ? pr.GetString() : null;
|
||||
return new ExpectedChecksums(eventStream, projection);
|
||||
}
|
||||
|
||||
// Harness lightweight no-op implementations for projection/merkle to keep replay fast
|
||||
internal sealed class NoOpPolicyEvaluationService : IPolicyEvaluationService
|
||||
{
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Offline verifier for Findings Ledger exports (FL8).
|
||||
- Validates deterministic ordering and applies redaction manifest.
|
||||
- Computes per-line and dataset SHA-256 digests.
|
||||
"""
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def load_manifest(path: Path) -> Dict[str, Any]:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(path)
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
if path.suffix in (".json", ".ndjson"):
|
||||
return json.load(f)
|
||||
return yaml_manifest(f.read(), path)
|
||||
|
||||
|
||||
def yaml_manifest(content: str, path: Path) -> Dict[str, Any]:
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
except ImportError as exc: # pragma: no cover - optional dependency
|
||||
raise RuntimeError(
|
||||
f"YAML manifest requested but PyYAML is not installed. "
|
||||
f"Install pyyaml or provide JSON manifest instead ({path})."
|
||||
) from exc
|
||||
return yaml.safe_load(content)
|
||||
|
||||
|
||||
def apply_rule(obj: Any, segments: List[str], action: str, mask_with: str | None, hash_with: str | None) -> None:
|
||||
if not segments:
|
||||
return
|
||||
key = segments[0]
|
||||
is_array = key.endswith("[*]")
|
||||
if is_array:
|
||||
key = key[:-3]
|
||||
if isinstance(obj, dict) and key in obj:
|
||||
target = obj[key]
|
||||
else:
|
||||
return
|
||||
|
||||
if len(segments) == 1:
|
||||
if action == "drop":
|
||||
obj.pop(key, None)
|
||||
elif action == "mask":
|
||||
obj[key] = mask_with or "<masked>"
|
||||
elif action == "hash":
|
||||
if isinstance(target, str):
|
||||
obj[key] = hashlib.sha256(target.encode("utf-8")).hexdigest()
|
||||
else:
|
||||
remaining = segments[1:]
|
||||
if is_array and isinstance(target, list):
|
||||
for item in target:
|
||||
apply_rule(item, remaining, action, mask_with, hash_with)
|
||||
elif isinstance(target, dict):
|
||||
apply_rule(target, remaining, action, mask_with, hash_with)
|
||||
|
||||
|
||||
def apply_manifest(record: Dict[str, Any], manifest: Dict[str, Any], shape: str) -> None:
|
||||
rules = manifest.get("rules", {}).get(shape, [])
|
||||
for rule in rules:
|
||||
path = rule.get("path")
|
||||
action = rule.get("action")
|
||||
if not path or not action:
|
||||
continue
|
||||
segments = path.replace("$.", "").split(".")
|
||||
apply_rule(record, segments, action, rule.get("maskWith"), rule.get("hashWith"))
|
||||
|
||||
|
||||
def canonical(obj: Dict[str, Any]) -> str:
|
||||
return json.dumps(obj, separators=(",", ":"), sort_keys=True, ensure_ascii=False)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Verify deterministic Findings Ledger export")
|
||||
parser.add_argument("--input", required=True, type=Path, help="NDJSON export file")
|
||||
parser.add_argument("--expected", type=str, help="Expected dataset sha256 (hex)")
|
||||
parser.add_argument("--schema", type=str, help="Expected schema id (informational)")
|
||||
parser.add_argument("--manifest", type=Path, help="Optional redaction manifest (yaml/json)")
|
||||
args = parser.parse_args()
|
||||
|
||||
manifest = None
|
||||
if args.manifest:
|
||||
manifest = load_manifest(args.manifest)
|
||||
|
||||
dataset_hash = hashlib.sha256()
|
||||
line_hashes: list[str] = []
|
||||
records = 0
|
||||
|
||||
with args.input.open("r", encoding="utf-8") as f:
|
||||
for raw in f:
|
||||
if not raw.strip():
|
||||
continue
|
||||
try:
|
||||
record = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
sys.stderr.write(f"invalid json: {exc}\n")
|
||||
return 1
|
||||
shape = record.get("shape") or args.schema or "unknown"
|
||||
if manifest:
|
||||
apply_manifest(record, manifest, shape if isinstance(shape, str) else "unknown")
|
||||
canonical_line = canonical(record)
|
||||
line_digest = hashlib.sha256(canonical_line.encode("utf-8")).hexdigest()
|
||||
line_hashes.append(line_digest)
|
||||
dataset_hash.update(line_digest.encode("utf-8"))
|
||||
records += 1
|
||||
|
||||
dataset_digest = dataset_hash.hexdigest()
|
||||
print(json.dumps({
|
||||
"file": str(args.input),
|
||||
"schema": args.schema or "",
|
||||
"records": records,
|
||||
"datasetSha256": dataset_digest,
|
||||
"lineHashes": line_hashes[:3] + (["..."] if len(line_hashes) > 3 else [])
|
||||
}, indent=2))
|
||||
|
||||
if args.expected and args.expected.lower() != dataset_digest.lower():
|
||||
sys.stderr.write(f"checksum mismatch: expected {args.expected} got {dataset_digest}\n")
|
||||
return 2
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user