Files
git.stella-ops.org/src/__Libraries/StellaOps.ReachGraph/Deduplication/EdgeDeduplicator.cs
2026-02-01 21:37:40 +02:00

139 lines
5.3 KiB
C#

// Licensed to StellaOps under the BUSL-1.1 license.
using StellaOps.ReachGraph.Schema;
using System.Collections.Immutable;
namespace StellaOps.ReachGraph.Deduplication;
/// <summary>
/// Service for deduplicating edges from multiple sources into semantically unique edges.
/// </summary>
public interface IEdgeDeduplicator
{
/// <summary>
/// Deduplicates a collection of edges by their semantic keys.
/// </summary>
/// <param name="edges">The edges to deduplicate.</param>
/// <param name="keyExtractor">Function to extract semantic key from an edge.</param>
/// <param name="sourceExtractor">Function to extract source ID from an edge.</param>
/// <param name="strengthExtractor">Function to extract strength/weight from an edge.</param>
/// <param name="timestampExtractor">Function to extract observation timestamp.</param>
/// <returns>Deduplicated edges with merged provenance.</returns>
IReadOnlyList<DeduplicatedEdge> Deduplicate(
IEnumerable<ReachGraphEdge> edges,
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
Func<ReachGraphEdge, string> sourceExtractor,
Func<ReachGraphEdge, double> strengthExtractor,
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor);
}
/// <summary>
/// Default implementation of <see cref="IEdgeDeduplicator"/>.
/// </summary>
public sealed class EdgeDeduplicator : IEdgeDeduplicator
{
/// <summary>
/// Gets the singleton instance.
/// </summary>
public static IEdgeDeduplicator Instance { get; } = new EdgeDeduplicator();
/// <inheritdoc/>
public IReadOnlyList<DeduplicatedEdge> Deduplicate(
IEnumerable<ReachGraphEdge> edges,
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
Func<ReachGraphEdge, string> sourceExtractor,
Func<ReachGraphEdge, double> strengthExtractor,
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor)
{
ArgumentNullException.ThrowIfNull(edges);
ArgumentNullException.ThrowIfNull(keyExtractor);
ArgumentNullException.ThrowIfNull(sourceExtractor);
ArgumentNullException.ThrowIfNull(strengthExtractor);
ArgumentNullException.ThrowIfNull(timestampExtractor);
// Group edges by semantic key
var builders = new Dictionary<EdgeSemanticKey, DeduplicatedEdgeBuilder>();
foreach (var edge in edges)
{
var key = keyExtractor(edge);
if (!builders.TryGetValue(key, out var builder))
{
builder = new DeduplicatedEdgeBuilder(key, edge.From, edge.To);
builders[key] = builder;
}
builder.AddSource(
sourceExtractor(edge),
edge.Why,
strengthExtractor(edge),
timestampExtractor(edge));
}
// Build deduplicated edges, sorted by strength descending for stability
return builders.Values
.Select(b => b.Build())
.OrderByDescending(e => e.Strength)
.ThenBy(e => e.Key.ComputeKey(), StringComparer.Ordinal)
.ToList();
}
}
/// <summary>
/// Extensions for edge deduplication.
/// </summary>
public static class EdgeDeduplicatorExtensions
{
/// <summary>
/// Deduplicates edges using default extractors based on edge properties.
/// </summary>
/// <param name="deduplicator">The deduplicator instance.</param>
/// <param name="edges">The edges to deduplicate.</param>
/// <param name="vulnerabilityId">The vulnerability ID to associate with edges.</param>
/// <param name="defaultSource">Default source ID if not specified.</param>
/// <param name="timeProvider">Time provider for timestamps.</param>
/// <returns>Deduplicated edges.</returns>
public static IReadOnlyList<DeduplicatedEdge> DeduplicateWithDefaults(
this IEdgeDeduplicator deduplicator,
IEnumerable<ReachGraphEdge> edges,
string vulnerabilityId,
string defaultSource = "unknown",
TimeProvider? timeProvider = null)
{
var time = timeProvider ?? TimeProvider.System;
var now = time.GetUtcNow();
return deduplicator.Deduplicate(
edges,
keyExtractor: e => new EdgeSemanticKey(e.From, e.To, vulnerabilityId),
sourceExtractor: _ => defaultSource,
strengthExtractor: e => GetEdgeStrength(e.Why),
timestampExtractor: _ => now);
}
private static double GetEdgeStrength(EdgeExplanation explanation)
{
// Use the explanation's confidence as the base strength
// Map edge explanation type to a multiplier
var typeMultiplier = explanation.Type switch
{
EdgeExplanationType.DirectCall => 1.0,
EdgeExplanationType.Import => 0.95,
EdgeExplanationType.DynamicLoad => 0.9,
EdgeExplanationType.Ffi => 0.85,
EdgeExplanationType.Reflection => 0.8,
EdgeExplanationType.LoaderRule => 0.75,
EdgeExplanationType.TaintGate => 0.7,
EdgeExplanationType.EnvGuard => 0.65,
EdgeExplanationType.FeatureFlag => 0.6,
EdgeExplanationType.PlatformArch => 0.6,
EdgeExplanationType.Unknown => 0.5,
_ => 0.5
};
return explanation.Confidence * typeMultiplier;
}
}