using StellaOps.Canonicalization.Json; using StellaOps.Signals.Models; using System; using System.Collections.Generic; using System.Linq; using System.Security.Cryptography; using System.Text; namespace StellaOps.Signals.Services; internal static class ReachabilityFactDigestCalculator { public static string Compute(ReachabilityFactDocument fact) { ArgumentNullException.ThrowIfNull(fact); var canonical = new CanonicalReachabilityFact( CallgraphId: fact.CallgraphId ?? string.Empty, SubjectKey: fact.SubjectKey ?? string.Empty, Subject: new CanonicalSubject( fact.Subject?.ImageDigest ?? string.Empty, fact.Subject?.Component ?? string.Empty, fact.Subject?.Version ?? string.Empty, fact.Subject?.ScanId ?? string.Empty), EntryPoints: NormalizeList(fact.EntryPoints), States: NormalizeStates(fact.States), RuntimeFacts: NormalizeRuntimeFacts(fact.RuntimeFacts), UncertaintyStates: NormalizeUncertaintyStates(fact.Uncertainty), Metadata: NormalizeMetadata(fact.Metadata), Score: fact.Score, RiskScore: fact.RiskScore, UnknownsCount: fact.UnknownsCount, UnknownsPressure: fact.UnknownsPressure, ComputedAt: fact.ComputedAt); var json = CanonicalJsonSerializer.Serialize(canonical); Span hash = stackalloc byte[SHA256.HashSizeInBytes]; SHA256.HashData(Encoding.UTF8.GetBytes(json), hash); return "sha256:" + Convert.ToHexString(hash).ToLowerInvariant(); } private static List NormalizeList(IEnumerable? values) => values? .Where(v => !string.IsNullOrWhiteSpace(v)) .Select(v => v.Trim()) .Distinct(StringComparer.Ordinal) .OrderBy(v => v, StringComparer.Ordinal) .ToList() ?? new List(); private static List NormalizeStates(IEnumerable? states) { if (states is null) { return new List(); } return states .OrderBy(s => s.Target, StringComparer.Ordinal) .Select(state => new CanonicalState( Target: state.Target ?? string.Empty, Reachable: state.Reachable, Confidence: state.Confidence, Bucket: state.Bucket ?? "unknown", Weight: state.Weight, Score: state.Score, Path: NormalizeList(state.Path), RuntimeHits: NormalizeList(state.Evidence?.RuntimeHits), BlockedEdges: NormalizeList(state.Evidence?.BlockedEdges), GateMultiplierBps: Math.Clamp(state.Evidence?.GateMultiplierBps ?? 10000, 0, 10000), Gates: NormalizeGates(state.Evidence?.Gates))) .ToList(); } private static List NormalizeGates(IEnumerable? gates) { if (gates is null) { return new List(); } return gates .Where(g => g is not null) .Select(g => new CanonicalGate( Type: g.Type.ToString(), GuardSymbol: g.GuardSymbol?.Trim() ?? string.Empty, DetectionMethod: g.DetectionMethod?.Trim() ?? string.Empty, Confidence: double.IsNaN(g.Confidence) ? 0.0 : Math.Clamp(g.Confidence, 0.0, 1.0), SourceFile: string.IsNullOrWhiteSpace(g.SourceFile) ? null : g.SourceFile.Trim(), LineNumber: g.LineNumber is > 0 ? g.LineNumber : null, Detail: g.Detail?.Trim() ?? string.Empty)) .OrderBy(g => g.Type, StringComparer.Ordinal) .ThenBy(g => g.GuardSymbol, StringComparer.Ordinal) .ThenBy(g => g.DetectionMethod, StringComparer.Ordinal) .ThenBy(g => g.SourceFile, StringComparer.Ordinal) .ThenBy(g => g.LineNumber ?? 0) .ThenBy(g => g.Detail, StringComparer.Ordinal) .ToList(); } private static List NormalizeRuntimeFacts(IEnumerable? facts) { if (facts is null) { return new List(); } return facts .Select(f => new CanonicalRuntimeFact( SymbolId: f.SymbolId ?? string.Empty, CodeId: f.CodeId, SymbolDigest: f.SymbolDigest, Purl: f.Purl, BuildId: f.BuildId, LoaderBase: f.LoaderBase, ProcessId: f.ProcessId, ProcessName: f.ProcessName, SocketAddress: f.SocketAddress, ContainerId: f.ContainerId, EvidenceUri: f.EvidenceUri, HitCount: f.HitCount, ObservedAt: f.ObservedAt, Metadata: NormalizeMetadata(f.Metadata))) .OrderBy(f => f.SymbolId, StringComparer.Ordinal) .ThenBy(f => f.CodeId, StringComparer.Ordinal) .ThenBy(f => f.LoaderBase, StringComparer.Ordinal) .ToList(); } private static SortedDictionary NormalizeMetadata(IDictionary? metadata) { var normalized = new SortedDictionary(StringComparer.Ordinal); if (metadata is null) { return normalized; } foreach (var kvp in metadata) { if (string.IsNullOrWhiteSpace(kvp.Key)) { continue; } normalized[kvp.Key.Trim()] = kvp.Value?.Trim(); } return normalized; } private static List NormalizeUncertaintyStates(UncertaintyDocument? uncertainty) { if (uncertainty?.States is not { Count: > 0 }) { return new List(); } return uncertainty.States .Where(s => s is not null && !string.IsNullOrWhiteSpace(s.Code)) .Select(s => new CanonicalUncertaintyState( Code: s.Code.Trim(), Name: s.Name?.Trim() ?? string.Empty, Entropy: Math.Clamp(s.Entropy, 0.0, 1.0), Evidence: NormalizeUncertaintyEvidence(s.Evidence), Timestamp: s.Timestamp)) .OrderBy(s => s.Code, StringComparer.Ordinal) .ThenBy(s => s.Name, StringComparer.Ordinal) .ToList(); } private static List NormalizeUncertaintyEvidence(IEnumerable? evidence) { if (evidence is null) { return new List(); } return evidence .Select(e => new CanonicalUncertaintyEvidence( Type: e.Type?.Trim() ?? string.Empty, SourceId: e.SourceId?.Trim() ?? string.Empty, Detail: e.Detail?.Trim() ?? string.Empty)) .OrderBy(e => e.Type, StringComparer.Ordinal) .ThenBy(e => e.SourceId, StringComparer.Ordinal) .ThenBy(e => e.Detail, StringComparer.Ordinal) .ToList(); } private sealed record CanonicalReachabilityFact( string CallgraphId, string SubjectKey, CanonicalSubject Subject, List EntryPoints, List States, List RuntimeFacts, List UncertaintyStates, SortedDictionary Metadata, double Score, double RiskScore, int UnknownsCount, double UnknownsPressure, DateTimeOffset ComputedAt); private sealed record CanonicalSubject( string ImageDigest, string Component, string Version, string ScanId); private sealed record CanonicalState( string Target, bool Reachable, double Confidence, string Bucket, double Weight, double Score, List Path, List RuntimeHits, List BlockedEdges, int GateMultiplierBps, List Gates); private sealed record CanonicalGate( string Type, string GuardSymbol, string DetectionMethod, double Confidence, string? SourceFile, int? LineNumber, string Detail); private sealed record CanonicalRuntimeFact( string SymbolId, string? CodeId, string? SymbolDigest, string? Purl, string? BuildId, string? LoaderBase, int? ProcessId, string? ProcessName, string? SocketAddress, string? ContainerId, string? EvidenceUri, int HitCount, DateTimeOffset? ObservedAt, SortedDictionary Metadata); private sealed record CanonicalUncertaintyState( string Code, string Name, double Entropy, List Evidence, DateTimeOffset? Timestamp); private sealed record CanonicalUncertaintyEvidence( string Type, string SourceId, string Detail); }