feat: Add VEX compact fixture and implement offline verifier for Findings Ledger exports

- Introduced a new VEX compact fixture for testing purposes.
- Implemented `verify_export.py` script to validate Findings Ledger exports, ensuring deterministic ordering and applying redaction manifests.
- Added a lightweight stub `HarnessRunner` for unit tests to validate ledger hashing expectations.
- Documented tasks related to the Mirror Creator.
- Created models for entropy signals and implemented the `EntropyPenaltyCalculator` to compute penalties based on scanner outputs.
- Developed unit tests for `EntropyPenaltyCalculator` to ensure correct penalty calculations and handling of edge cases.
- Added tests for symbol ID normalization in the reachability scanner.
- Enhanced console status service with comprehensive unit tests for connection handling and error recovery.
- Included Cosign tool version 2.6.0 with checksums for various platforms.
This commit is contained in:
StellaOps Bot
2025-12-02 21:08:01 +02:00
parent 6d049905c7
commit 47168fec38
146 changed files with 4329 additions and 549 deletions

View File

@@ -1,6 +1,8 @@
using System.CommandLine;
using System.Diagnostics;
using System.Diagnostics.Metrics;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.Json.Nodes;
using Microsoft.Extensions.DependencyInjection;
@@ -51,6 +53,10 @@ var metricsOption = new Option<FileInfo?>(
name: "--metrics",
description: "Optional path to write metrics snapshot JSON");
var expectedChecksumOption = new Option<FileInfo?>(
name: "--expected-checksum",
description: "Optional JSON file containing expected eventStream/projection checksums");
var root = new RootCommand("Findings Ledger Replay Harness (LEDGER-29-008)");
root.AddOption(fixturesOption);
root.AddOption(connectionOption);
@@ -58,8 +64,9 @@ root.AddOption(tenantOption);
root.AddOption(maxParallelOption);
root.AddOption(reportOption);
root.AddOption(metricsOption);
root.AddOption(expectedChecksumOption);
root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, int maxParallel, FileInfo? reportFile, FileInfo? metricsFile) =>
root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, int maxParallel, FileInfo? reportFile, FileInfo? metricsFile, FileInfo? expectedChecksumsFile) =>
{
await using var host = BuildHost(connection);
using var scope = host.Services.CreateScope();
@@ -103,7 +110,7 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
meterListener.RecordObservableInstruments();
var verification = await VerifyLedgerAsync(scope.ServiceProvider, tenant, eventsWritten, cts.Token).ConfigureAwait(false);
var verification = await VerifyLedgerAsync(scope.ServiceProvider, tenant, eventsWritten, expectedChecksumsFile, cts.Token).ConfigureAwait(false);
var writeDurations = metrics.HistDouble("ledger_write_duration_seconds").Concat(metrics.HistDouble("ledger_write_latency_seconds"));
var writeLatencyP95Ms = Percentile(writeDurations, 95) * 1000;
@@ -123,6 +130,8 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
ProjectionLagSecondsMax: projectionLagSeconds,
BacklogEventsMax: backlogEvents,
DbConnectionsObserved: dbConnections,
EventStreamChecksum: verification.EventStreamChecksum,
ProjectionChecksum: verification.ProjectionChecksum,
VerificationErrors: verification.Errors.ToArray());
var jsonOptions = new JsonSerializerOptions { WriteIndented = true };
@@ -132,7 +141,8 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
if (reportFile is not null)
{
await File.WriteAllTextAsync(reportFile.FullName, json, cts.Token).ConfigureAwait(false);
await WriteDssePlaceholderAsync(reportFile.FullName, json, cts.Token).ConfigureAwait(false);
var policyHash = Environment.GetEnvironmentVariable("LEDGER_POLICY_HASH");
await WriteDssePlaceholderAsync(reportFile.FullName, json, policyHash, cts.Token).ConfigureAwait(false);
}
if (metricsFile is not null)
@@ -148,7 +158,7 @@ root.SetHandler(async (FileInfo[] fixtures, string connection, string tenant, in
await root.InvokeAsync(args);
static async Task WriteDssePlaceholderAsync(string reportPath, string json, CancellationToken cancellationToken)
static async Task WriteDssePlaceholderAsync(string reportPath, string json, string? policyHash, CancellationToken cancellationToken)
{
using var sha = System.Security.Cryptography.SHA256.Create();
var digest = sha.ComputeHash(System.Text.Encoding.UTF8.GetBytes(json));
@@ -156,6 +166,8 @@ static async Task WriteDssePlaceholderAsync(string reportPath, string json, Canc
{
payloadType = "application/vnd.stella-ledger-harness+json",
sha256 = Convert.ToHexString(digest).ToLowerInvariant(),
policyHash = policyHash ?? string.Empty,
schemaVersion = "ledger.harness.v1",
signedBy = "harness-local",
createdAt = DateTimeOffset.UtcNow
};
@@ -210,6 +222,8 @@ static IHost BuildHost(string connectionString)
opts.Database.ConnectionString = connectionString;
});
LedgerMetrics.ConfigureQuotas(20_000);
services.AddSingleton<TimeProvider>(_ => TimeProvider.System);
services.AddSingleton<LedgerDataSource>();
services.AddSingleton<ILedgerEventRepository, PostgresLedgerEventRepository>();
@@ -302,13 +316,17 @@ static LedgerEventDraft ToDraft(JsonObject node, string defaultTenant, DateTimeO
prev);
}
static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider services, string tenant, long expectedEvents, CancellationToken cancellationToken)
static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider services, string tenant, long expectedEvents, FileInfo? expectedChecksumsFile, CancellationToken cancellationToken)
{
var errors = new List<string>();
var dataSource = services.GetRequiredService<LedgerDataSource>();
var expectedChecksums = LoadExpectedChecksums(expectedChecksumsFile);
await using var connection = await dataSource.OpenConnectionAsync(tenant, "verify", cancellationToken).ConfigureAwait(false);
var eventHasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
var projectionHasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
// Count check
await using (var countCommand = new Npgsql.NpgsqlCommand("select count(*) from ledger_events where tenant_id = @tenant", connection))
{
@@ -346,6 +364,7 @@ static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider service
var eventHash = reader.GetString(4);
var previousHash = reader.GetString(5);
var merkleLeafHash = reader.GetString(6);
eventHasher.AppendData(Encoding.UTF8.GetBytes($"{eventHash}:{sequence}\n"));
if (currentChain != chainId)
{
@@ -382,17 +401,47 @@ static async Task<VerificationResult> VerifyLedgerAsync(IServiceProvider service
expectedSequence++;
}
if (errors.Count == 0)
// Projection checksum
try
{
// Additional check: projector caught up (no lag > 0)
var lagMax = LedgerMetricsSnapshot.LagMax;
if (lagMax > 0)
await using var projectionCommand = new Npgsql.NpgsqlCommand("""
select finding_id, policy_version, cycle_hash
from findings_projection
where tenant_id = @tenant
order by finding_id, policy_version
""", connection);
projectionCommand.Parameters.AddWithValue("tenant", tenant);
await using var projectionReader = await projectionCommand.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await projectionReader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
errors.Add($"projection_lag_remaining:{lagMax}");
var findingId = projectionReader.GetString(0);
var policyVersion = projectionReader.GetString(1);
var cycleHash = projectionReader.GetString(2);
projectionHasher.AppendData(Encoding.UTF8.GetBytes($"{findingId}:{policyVersion}:{cycleHash}\n"));
}
}
catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
{
errors.Add($"projection_checksum_error:{ex.GetType().Name}");
}
return new VerificationResult(errors.Count == 0, errors);
var eventStreamChecksum = Convert.ToHexString(eventHasher.GetHashAndReset()).ToLowerInvariant();
var projectionChecksum = Convert.ToHexString(projectionHasher.GetHashAndReset()).ToLowerInvariant();
if (!string.IsNullOrWhiteSpace(expectedChecksums.EventStream) &&
!eventStreamChecksum.Equals(expectedChecksums.EventStream, StringComparison.OrdinalIgnoreCase))
{
errors.Add($"event_checksum_mismatch:{eventStreamChecksum}");
}
if (!string.IsNullOrWhiteSpace(expectedChecksums.Projection) &&
!projectionChecksum.Equals(expectedChecksums.Projection, StringComparison.OrdinalIgnoreCase))
{
errors.Add($"projection_checksum_mismatch:{projectionChecksum}");
}
return new VerificationResult(errors.Count == 0, errors, eventStreamChecksum, projectionChecksum);
}
static double Percentile(IEnumerable<double> values, double percentile)
@@ -426,9 +475,16 @@ internal sealed record HarnessReport(
double ProjectionLagSecondsMax,
double BacklogEventsMax,
long DbConnectionsObserved,
string EventStreamChecksum,
string ProjectionChecksum,
IReadOnlyList<string> VerificationErrors);
internal sealed record VerificationResult(bool Success, IReadOnlyList<string> Errors);
internal sealed record VerificationResult(bool Success, IReadOnlyList<string> Errors, string EventStreamChecksum, string ProjectionChecksum);
internal sealed record ExpectedChecksums(string? EventStream, string? Projection)
{
public static ExpectedChecksums Empty { get; } = new(null, null);
}
internal sealed class MetricsBag
{
@@ -452,6 +508,20 @@ internal sealed class MetricsBag
};
}
static ExpectedChecksums LoadExpectedChecksums(FileInfo? file)
{
if (file is null)
{
return ExpectedChecksums.Empty;
}
using var doc = JsonDocument.Parse(File.ReadAllText(file.FullName));
var root = doc.RootElement;
var eventStream = root.TryGetProperty("eventStream", out var ev) ? ev.GetString() : null;
var projection = root.TryGetProperty("projection", out var pr) ? pr.GetString() : null;
return new ExpectedChecksums(eventStream, projection);
}
// Harness lightweight no-op implementations for projection/merkle to keep replay fast
internal sealed class NoOpPolicyEvaluationService : IPolicyEvaluationService
{

View File

@@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""Offline verifier for Findings Ledger exports (FL8).
- Validates deterministic ordering and applies redaction manifest.
- Computes per-line and dataset SHA-256 digests.
"""
import argparse
import hashlib
import json
import sys
from pathlib import Path
from typing import Any, Dict, List
def load_manifest(path: Path) -> Dict[str, Any]:
if not path.exists():
raise FileNotFoundError(path)
with path.open("r", encoding="utf-8") as f:
if path.suffix in (".json", ".ndjson"):
return json.load(f)
return yaml_manifest(f.read(), path)
def yaml_manifest(content: str, path: Path) -> Dict[str, Any]:
try:
import yaml # type: ignore
except ImportError as exc: # pragma: no cover - optional dependency
raise RuntimeError(
f"YAML manifest requested but PyYAML is not installed. "
f"Install pyyaml or provide JSON manifest instead ({path})."
) from exc
return yaml.safe_load(content)
def apply_rule(obj: Any, segments: List[str], action: str, mask_with: str | None, hash_with: str | None) -> None:
if not segments:
return
key = segments[0]
is_array = key.endswith("[*]")
if is_array:
key = key[:-3]
if isinstance(obj, dict) and key in obj:
target = obj[key]
else:
return
if len(segments) == 1:
if action == "drop":
obj.pop(key, None)
elif action == "mask":
obj[key] = mask_with or "<masked>"
elif action == "hash":
if isinstance(target, str):
obj[key] = hashlib.sha256(target.encode("utf-8")).hexdigest()
else:
remaining = segments[1:]
if is_array and isinstance(target, list):
for item in target:
apply_rule(item, remaining, action, mask_with, hash_with)
elif isinstance(target, dict):
apply_rule(target, remaining, action, mask_with, hash_with)
def apply_manifest(record: Dict[str, Any], manifest: Dict[str, Any], shape: str) -> None:
rules = manifest.get("rules", {}).get(shape, [])
for rule in rules:
path = rule.get("path")
action = rule.get("action")
if not path or not action:
continue
segments = path.replace("$.", "").split(".")
apply_rule(record, segments, action, rule.get("maskWith"), rule.get("hashWith"))
def canonical(obj: Dict[str, Any]) -> str:
return json.dumps(obj, separators=(",", ":"), sort_keys=True, ensure_ascii=False)
def main() -> int:
parser = argparse.ArgumentParser(description="Verify deterministic Findings Ledger export")
parser.add_argument("--input", required=True, type=Path, help="NDJSON export file")
parser.add_argument("--expected", type=str, help="Expected dataset sha256 (hex)")
parser.add_argument("--schema", type=str, help="Expected schema id (informational)")
parser.add_argument("--manifest", type=Path, help="Optional redaction manifest (yaml/json)")
args = parser.parse_args()
manifest = None
if args.manifest:
manifest = load_manifest(args.manifest)
dataset_hash = hashlib.sha256()
line_hashes: list[str] = []
records = 0
with args.input.open("r", encoding="utf-8") as f:
for raw in f:
if not raw.strip():
continue
try:
record = json.loads(raw)
except json.JSONDecodeError as exc:
sys.stderr.write(f"invalid json: {exc}\n")
return 1
shape = record.get("shape") or args.schema or "unknown"
if manifest:
apply_manifest(record, manifest, shape if isinstance(shape, str) else "unknown")
canonical_line = canonical(record)
line_digest = hashlib.sha256(canonical_line.encode("utf-8")).hexdigest()
line_hashes.append(line_digest)
dataset_hash.update(line_digest.encode("utf-8"))
records += 1
dataset_digest = dataset_hash.hexdigest()
print(json.dumps({
"file": str(args.input),
"schema": args.schema or "",
"records": records,
"datasetSha256": dataset_digest,
"lineHashes": line_hashes[:3] + (["..."] if len(line_hashes) > 3 else [])
}, indent=2))
if args.expected and args.expected.lower() != dataset_digest.lower():
sys.stderr.write(f"checksum mismatch: expected {args.expected} got {dataset_digest}\n")
return 2
return 0
if __name__ == "__main__":
sys.exit(main())