139 lines
5.3 KiB
C#
139 lines
5.3 KiB
C#
// Licensed to StellaOps under the BUSL-1.1 license.
|
|
|
|
|
|
using StellaOps.ReachGraph.Schema;
|
|
using System.Collections.Immutable;
|
|
|
|
namespace StellaOps.ReachGraph.Deduplication;
|
|
|
|
/// <summary>
|
|
/// Service for deduplicating edges from multiple sources into semantically unique edges.
|
|
/// </summary>
|
|
public interface IEdgeDeduplicator
|
|
{
|
|
/// <summary>
|
|
/// Deduplicates a collection of edges by their semantic keys.
|
|
/// </summary>
|
|
/// <param name="edges">The edges to deduplicate.</param>
|
|
/// <param name="keyExtractor">Function to extract semantic key from an edge.</param>
|
|
/// <param name="sourceExtractor">Function to extract source ID from an edge.</param>
|
|
/// <param name="strengthExtractor">Function to extract strength/weight from an edge.</param>
|
|
/// <param name="timestampExtractor">Function to extract observation timestamp.</param>
|
|
/// <returns>Deduplicated edges with merged provenance.</returns>
|
|
IReadOnlyList<DeduplicatedEdge> Deduplicate(
|
|
IEnumerable<ReachGraphEdge> edges,
|
|
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
|
|
Func<ReachGraphEdge, string> sourceExtractor,
|
|
Func<ReachGraphEdge, double> strengthExtractor,
|
|
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Default implementation of <see cref="IEdgeDeduplicator"/>.
|
|
/// </summary>
|
|
public sealed class EdgeDeduplicator : IEdgeDeduplicator
|
|
{
|
|
/// <summary>
|
|
/// Gets the singleton instance.
|
|
/// </summary>
|
|
public static IEdgeDeduplicator Instance { get; } = new EdgeDeduplicator();
|
|
|
|
/// <inheritdoc/>
|
|
public IReadOnlyList<DeduplicatedEdge> Deduplicate(
|
|
IEnumerable<ReachGraphEdge> edges,
|
|
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
|
|
Func<ReachGraphEdge, string> sourceExtractor,
|
|
Func<ReachGraphEdge, double> strengthExtractor,
|
|
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(edges);
|
|
ArgumentNullException.ThrowIfNull(keyExtractor);
|
|
ArgumentNullException.ThrowIfNull(sourceExtractor);
|
|
ArgumentNullException.ThrowIfNull(strengthExtractor);
|
|
ArgumentNullException.ThrowIfNull(timestampExtractor);
|
|
|
|
// Group edges by semantic key
|
|
var builders = new Dictionary<EdgeSemanticKey, DeduplicatedEdgeBuilder>();
|
|
|
|
foreach (var edge in edges)
|
|
{
|
|
var key = keyExtractor(edge);
|
|
|
|
if (!builders.TryGetValue(key, out var builder))
|
|
{
|
|
builder = new DeduplicatedEdgeBuilder(key, edge.From, edge.To);
|
|
builders[key] = builder;
|
|
}
|
|
|
|
builder.AddSource(
|
|
sourceExtractor(edge),
|
|
edge.Why,
|
|
strengthExtractor(edge),
|
|
timestampExtractor(edge));
|
|
}
|
|
|
|
// Build deduplicated edges, sorted by strength descending for stability
|
|
return builders.Values
|
|
.Select(b => b.Build())
|
|
.OrderByDescending(e => e.Strength)
|
|
.ThenBy(e => e.Key.ComputeKey(), StringComparer.Ordinal)
|
|
.ToList();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extensions for edge deduplication.
|
|
/// </summary>
|
|
public static class EdgeDeduplicatorExtensions
|
|
{
|
|
/// <summary>
|
|
/// Deduplicates edges using default extractors based on edge properties.
|
|
/// </summary>
|
|
/// <param name="deduplicator">The deduplicator instance.</param>
|
|
/// <param name="edges">The edges to deduplicate.</param>
|
|
/// <param name="vulnerabilityId">The vulnerability ID to associate with edges.</param>
|
|
/// <param name="defaultSource">Default source ID if not specified.</param>
|
|
/// <param name="timeProvider">Time provider for timestamps.</param>
|
|
/// <returns>Deduplicated edges.</returns>
|
|
public static IReadOnlyList<DeduplicatedEdge> DeduplicateWithDefaults(
|
|
this IEdgeDeduplicator deduplicator,
|
|
IEnumerable<ReachGraphEdge> edges,
|
|
string vulnerabilityId,
|
|
string defaultSource = "unknown",
|
|
TimeProvider? timeProvider = null)
|
|
{
|
|
var time = timeProvider ?? TimeProvider.System;
|
|
var now = time.GetUtcNow();
|
|
|
|
return deduplicator.Deduplicate(
|
|
edges,
|
|
keyExtractor: e => new EdgeSemanticKey(e.From, e.To, vulnerabilityId),
|
|
sourceExtractor: _ => defaultSource,
|
|
strengthExtractor: e => GetEdgeStrength(e.Why),
|
|
timestampExtractor: _ => now);
|
|
}
|
|
|
|
private static double GetEdgeStrength(EdgeExplanation explanation)
|
|
{
|
|
// Use the explanation's confidence as the base strength
|
|
// Map edge explanation type to a multiplier
|
|
var typeMultiplier = explanation.Type switch
|
|
{
|
|
EdgeExplanationType.DirectCall => 1.0,
|
|
EdgeExplanationType.Import => 0.95,
|
|
EdgeExplanationType.DynamicLoad => 0.9,
|
|
EdgeExplanationType.Ffi => 0.85,
|
|
EdgeExplanationType.Reflection => 0.8,
|
|
EdgeExplanationType.LoaderRule => 0.75,
|
|
EdgeExplanationType.TaintGate => 0.7,
|
|
EdgeExplanationType.EnvGuard => 0.65,
|
|
EdgeExplanationType.FeatureFlag => 0.6,
|
|
EdgeExplanationType.PlatformArch => 0.6,
|
|
EdgeExplanationType.Unknown => 0.5,
|
|
_ => 0.5
|
|
};
|
|
|
|
return explanation.Confidence * typeMultiplier;
|
|
}
|
|
}
|