// Licensed to StellaOps under the BUSL-1.1 license.
using StellaOps.ReachGraph.Schema;
using System.Collections.Immutable;
namespace StellaOps.ReachGraph.Deduplication;
///
/// Service for deduplicating edges from multiple sources into semantically unique edges.
///
public interface IEdgeDeduplicator
{
///
/// Deduplicates a collection of edges by their semantic keys.
///
/// The edges to deduplicate.
/// Function to extract semantic key from an edge.
/// Function to extract source ID from an edge.
/// Function to extract strength/weight from an edge.
/// Function to extract observation timestamp.
/// Deduplicated edges with merged provenance.
IReadOnlyList Deduplicate(
IEnumerable edges,
Func keyExtractor,
Func sourceExtractor,
Func strengthExtractor,
Func timestampExtractor);
}
///
/// Default implementation of .
///
public sealed class EdgeDeduplicator : IEdgeDeduplicator
{
///
/// Gets the singleton instance.
///
public static IEdgeDeduplicator Instance { get; } = new EdgeDeduplicator();
///
public IReadOnlyList Deduplicate(
IEnumerable edges,
Func keyExtractor,
Func sourceExtractor,
Func strengthExtractor,
Func timestampExtractor)
{
ArgumentNullException.ThrowIfNull(edges);
ArgumentNullException.ThrowIfNull(keyExtractor);
ArgumentNullException.ThrowIfNull(sourceExtractor);
ArgumentNullException.ThrowIfNull(strengthExtractor);
ArgumentNullException.ThrowIfNull(timestampExtractor);
// Group edges by semantic key
var builders = new Dictionary();
foreach (var edge in edges)
{
var key = keyExtractor(edge);
if (!builders.TryGetValue(key, out var builder))
{
builder = new DeduplicatedEdgeBuilder(key, edge.From, edge.To);
builders[key] = builder;
}
builder.AddSource(
sourceExtractor(edge),
edge.Why,
strengthExtractor(edge),
timestampExtractor(edge));
}
// Build deduplicated edges, sorted by strength descending for stability
return builders.Values
.Select(b => b.Build())
.OrderByDescending(e => e.Strength)
.ThenBy(e => e.Key.ComputeKey(), StringComparer.Ordinal)
.ToList();
}
}
///
/// Extensions for edge deduplication.
///
public static class EdgeDeduplicatorExtensions
{
///
/// Deduplicates edges using default extractors based on edge properties.
///
/// The deduplicator instance.
/// The edges to deduplicate.
/// The vulnerability ID to associate with edges.
/// Default source ID if not specified.
/// Time provider for timestamps.
/// Deduplicated edges.
public static IReadOnlyList DeduplicateWithDefaults(
this IEdgeDeduplicator deduplicator,
IEnumerable edges,
string vulnerabilityId,
string defaultSource = "unknown",
TimeProvider? timeProvider = null)
{
var time = timeProvider ?? TimeProvider.System;
var now = time.GetUtcNow();
return deduplicator.Deduplicate(
edges,
keyExtractor: e => new EdgeSemanticKey(e.From, e.To, vulnerabilityId),
sourceExtractor: _ => defaultSource,
strengthExtractor: e => GetEdgeStrength(e.Why),
timestampExtractor: _ => now);
}
private static double GetEdgeStrength(EdgeExplanation explanation)
{
// Use the explanation's confidence as the base strength
// Map edge explanation type to a multiplier
var typeMultiplier = explanation.Type switch
{
EdgeExplanationType.DirectCall => 1.0,
EdgeExplanationType.Import => 0.95,
EdgeExplanationType.DynamicLoad => 0.9,
EdgeExplanationType.Ffi => 0.85,
EdgeExplanationType.Reflection => 0.8,
EdgeExplanationType.LoaderRule => 0.75,
EdgeExplanationType.TaintGate => 0.7,
EdgeExplanationType.EnvGuard => 0.65,
EdgeExplanationType.FeatureFlag => 0.6,
EdgeExplanationType.PlatformArch => 0.6,
EdgeExplanationType.Unknown => 0.5,
_ => 0.5
};
return explanation.Confidence * typeMultiplier;
}
}