// Licensed to StellaOps under the BUSL-1.1 license. using StellaOps.ReachGraph.Schema; using System.Collections.Immutable; namespace StellaOps.ReachGraph.Deduplication; /// /// Service for deduplicating edges from multiple sources into semantically unique edges. /// public interface IEdgeDeduplicator { /// /// Deduplicates a collection of edges by their semantic keys. /// /// The edges to deduplicate. /// Function to extract semantic key from an edge. /// Function to extract source ID from an edge. /// Function to extract strength/weight from an edge. /// Function to extract observation timestamp. /// Deduplicated edges with merged provenance. IReadOnlyList Deduplicate( IEnumerable edges, Func keyExtractor, Func sourceExtractor, Func strengthExtractor, Func timestampExtractor); } /// /// Default implementation of . /// public sealed class EdgeDeduplicator : IEdgeDeduplicator { /// /// Gets the singleton instance. /// public static IEdgeDeduplicator Instance { get; } = new EdgeDeduplicator(); /// public IReadOnlyList Deduplicate( IEnumerable edges, Func keyExtractor, Func sourceExtractor, Func strengthExtractor, Func timestampExtractor) { ArgumentNullException.ThrowIfNull(edges); ArgumentNullException.ThrowIfNull(keyExtractor); ArgumentNullException.ThrowIfNull(sourceExtractor); ArgumentNullException.ThrowIfNull(strengthExtractor); ArgumentNullException.ThrowIfNull(timestampExtractor); // Group edges by semantic key var builders = new Dictionary(); foreach (var edge in edges) { var key = keyExtractor(edge); if (!builders.TryGetValue(key, out var builder)) { builder = new DeduplicatedEdgeBuilder(key, edge.From, edge.To); builders[key] = builder; } builder.AddSource( sourceExtractor(edge), edge.Why, strengthExtractor(edge), timestampExtractor(edge)); } // Build deduplicated edges, sorted by strength descending for stability return builders.Values .Select(b => b.Build()) .OrderByDescending(e => e.Strength) .ThenBy(e => e.Key.ComputeKey(), StringComparer.Ordinal) .ToList(); } } /// /// Extensions for edge deduplication. /// public static class EdgeDeduplicatorExtensions { /// /// Deduplicates edges using default extractors based on edge properties. /// /// The deduplicator instance. /// The edges to deduplicate. /// The vulnerability ID to associate with edges. /// Default source ID if not specified. /// Time provider for timestamps. /// Deduplicated edges. public static IReadOnlyList DeduplicateWithDefaults( this IEdgeDeduplicator deduplicator, IEnumerable edges, string vulnerabilityId, string defaultSource = "unknown", TimeProvider? timeProvider = null) { var time = timeProvider ?? TimeProvider.System; var now = time.GetUtcNow(); return deduplicator.Deduplicate( edges, keyExtractor: e => new EdgeSemanticKey(e.From, e.To, vulnerabilityId), sourceExtractor: _ => defaultSource, strengthExtractor: e => GetEdgeStrength(e.Why), timestampExtractor: _ => now); } private static double GetEdgeStrength(EdgeExplanation explanation) { // Use the explanation's confidence as the base strength // Map edge explanation type to a multiplier var typeMultiplier = explanation.Type switch { EdgeExplanationType.DirectCall => 1.0, EdgeExplanationType.Import => 0.95, EdgeExplanationType.DynamicLoad => 0.9, EdgeExplanationType.Ffi => 0.85, EdgeExplanationType.Reflection => 0.8, EdgeExplanationType.LoaderRule => 0.75, EdgeExplanationType.TaintGate => 0.7, EdgeExplanationType.EnvGuard => 0.65, EdgeExplanationType.FeatureFlag => 0.6, EdgeExplanationType.PlatformArch => 0.6, EdgeExplanationType.Unknown => 0.5, _ => 0.5 }; return explanation.Confidence * typeMultiplier; } }