save progress
This commit is contained in:
@@ -0,0 +1,137 @@
|
||||
// Licensed to StellaOps under the AGPL-3.0-or-later license.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.ReachGraph.Schema;
|
||||
|
||||
namespace StellaOps.ReachGraph.Deduplication;
|
||||
|
||||
/// <summary>
|
||||
/// Service for deduplicating edges from multiple sources into semantically unique edges.
|
||||
/// </summary>
|
||||
public interface IEdgeDeduplicator
|
||||
{
|
||||
/// <summary>
|
||||
/// Deduplicates a collection of edges by their semantic keys.
|
||||
/// </summary>
|
||||
/// <param name="edges">The edges to deduplicate.</param>
|
||||
/// <param name="keyExtractor">Function to extract semantic key from an edge.</param>
|
||||
/// <param name="sourceExtractor">Function to extract source ID from an edge.</param>
|
||||
/// <param name="strengthExtractor">Function to extract strength/weight from an edge.</param>
|
||||
/// <param name="timestampExtractor">Function to extract observation timestamp.</param>
|
||||
/// <returns>Deduplicated edges with merged provenance.</returns>
|
||||
IReadOnlyList<DeduplicatedEdge> Deduplicate(
|
||||
IEnumerable<ReachGraphEdge> edges,
|
||||
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
|
||||
Func<ReachGraphEdge, string> sourceExtractor,
|
||||
Func<ReachGraphEdge, double> strengthExtractor,
|
||||
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of <see cref="IEdgeDeduplicator"/>.
|
||||
/// </summary>
|
||||
public sealed class EdgeDeduplicator : IEdgeDeduplicator
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the singleton instance.
|
||||
/// </summary>
|
||||
public static IEdgeDeduplicator Instance { get; } = new EdgeDeduplicator();
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IReadOnlyList<DeduplicatedEdge> Deduplicate(
|
||||
IEnumerable<ReachGraphEdge> edges,
|
||||
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
|
||||
Func<ReachGraphEdge, string> sourceExtractor,
|
||||
Func<ReachGraphEdge, double> strengthExtractor,
|
||||
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(edges);
|
||||
ArgumentNullException.ThrowIfNull(keyExtractor);
|
||||
ArgumentNullException.ThrowIfNull(sourceExtractor);
|
||||
ArgumentNullException.ThrowIfNull(strengthExtractor);
|
||||
ArgumentNullException.ThrowIfNull(timestampExtractor);
|
||||
|
||||
// Group edges by semantic key
|
||||
var builders = new Dictionary<EdgeSemanticKey, DeduplicatedEdgeBuilder>();
|
||||
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
var key = keyExtractor(edge);
|
||||
|
||||
if (!builders.TryGetValue(key, out var builder))
|
||||
{
|
||||
builder = new DeduplicatedEdgeBuilder(key, edge.From, edge.To);
|
||||
builders[key] = builder;
|
||||
}
|
||||
|
||||
builder.AddSource(
|
||||
sourceExtractor(edge),
|
||||
edge.Why,
|
||||
strengthExtractor(edge),
|
||||
timestampExtractor(edge));
|
||||
}
|
||||
|
||||
// Build deduplicated edges, sorted by strength descending for stability
|
||||
return builders.Values
|
||||
.Select(b => b.Build())
|
||||
.OrderByDescending(e => e.Strength)
|
||||
.ThenBy(e => e.Key.ComputeKey(), StringComparer.Ordinal)
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extensions for edge deduplication.
|
||||
/// </summary>
|
||||
public static class EdgeDeduplicatorExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Deduplicates edges using default extractors based on edge properties.
|
||||
/// </summary>
|
||||
/// <param name="deduplicator">The deduplicator instance.</param>
|
||||
/// <param name="edges">The edges to deduplicate.</param>
|
||||
/// <param name="vulnerabilityId">The vulnerability ID to associate with edges.</param>
|
||||
/// <param name="defaultSource">Default source ID if not specified.</param>
|
||||
/// <param name="timeProvider">Time provider for timestamps.</param>
|
||||
/// <returns>Deduplicated edges.</returns>
|
||||
public static IReadOnlyList<DeduplicatedEdge> DeduplicateWithDefaults(
|
||||
this IEdgeDeduplicator deduplicator,
|
||||
IEnumerable<ReachGraphEdge> edges,
|
||||
string vulnerabilityId,
|
||||
string defaultSource = "unknown",
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
var time = timeProvider ?? TimeProvider.System;
|
||||
var now = time.GetUtcNow();
|
||||
|
||||
return deduplicator.Deduplicate(
|
||||
edges,
|
||||
keyExtractor: e => new EdgeSemanticKey(e.From, e.To, vulnerabilityId),
|
||||
sourceExtractor: _ => defaultSource,
|
||||
strengthExtractor: e => GetEdgeStrength(e.Why),
|
||||
timestampExtractor: _ => now);
|
||||
}
|
||||
|
||||
private static double GetEdgeStrength(EdgeExplanation explanation)
|
||||
{
|
||||
// Use the explanation's confidence as the base strength
|
||||
// Map edge explanation type to a multiplier
|
||||
var typeMultiplier = explanation.Type switch
|
||||
{
|
||||
EdgeExplanationType.DirectCall => 1.0,
|
||||
EdgeExplanationType.Import => 0.95,
|
||||
EdgeExplanationType.DynamicLoad => 0.9,
|
||||
EdgeExplanationType.Ffi => 0.85,
|
||||
EdgeExplanationType.Reflection => 0.8,
|
||||
EdgeExplanationType.LoaderRule => 0.75,
|
||||
EdgeExplanationType.TaintGate => 0.7,
|
||||
EdgeExplanationType.EnvGuard => 0.65,
|
||||
EdgeExplanationType.FeatureFlag => 0.6,
|
||||
EdgeExplanationType.PlatformArch => 0.6,
|
||||
EdgeExplanationType.Unknown => 0.5,
|
||||
_ => 0.5
|
||||
};
|
||||
|
||||
return explanation.Confidence * typeMultiplier;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user