save progress
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
// Licensed to StellaOps under the AGPL-3.0-or-later license.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.ReachGraph.Schema;
|
||||
|
||||
namespace StellaOps.ReachGraph.Deduplication;
|
||||
|
||||
/// <summary>
|
||||
/// An edge that has been deduplicated from multiple source edges.
|
||||
/// Preserves provenance by tracking all contributing sources.
|
||||
/// </summary>
|
||||
public sealed record DeduplicatedEdge
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the semantic key for this edge.
|
||||
/// </summary>
|
||||
public required EdgeSemanticKey Key { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the source node ID (from entry point).
|
||||
/// </summary>
|
||||
public required string From { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the target node ID (to sink).
|
||||
/// </summary>
|
||||
public required string To { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the aggregated explanation for this edge.
|
||||
/// </summary>
|
||||
public required EdgeExplanation Why { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the set of source identifiers that contributed this edge.
|
||||
/// </summary>
|
||||
public required ImmutableHashSet<string> Sources { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum strength (weight) among all contributing sources.
|
||||
/// </summary>
|
||||
public required double Strength { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the timestamp of the most recent observation of this edge.
|
||||
/// </summary>
|
||||
public required DateTimeOffset LastSeen { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of contributing sources.
|
||||
/// </summary>
|
||||
public int SourceCount => Sources.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this edge has multiple confirming sources.
|
||||
/// </summary>
|
||||
public bool IsCorroborated => Sources.Count > 1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builder for creating <see cref="DeduplicatedEdge"/> instances by merging multiple source edges.
|
||||
/// </summary>
|
||||
public sealed class DeduplicatedEdgeBuilder
|
||||
{
|
||||
private readonly EdgeSemanticKey _key;
|
||||
private readonly string _from;
|
||||
private readonly string _to;
|
||||
private readonly HashSet<string> _sources = new(StringComparer.Ordinal);
|
||||
private EdgeExplanation? _explanation;
|
||||
private double _maxStrength;
|
||||
private DateTimeOffset _lastSeen = DateTimeOffset.MinValue;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new builder for the given semantic key.
|
||||
/// </summary>
|
||||
public DeduplicatedEdgeBuilder(EdgeSemanticKey key, string from, string to)
|
||||
{
|
||||
_key = key;
|
||||
_from = from;
|
||||
_to = to;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds a source edge to this builder.
|
||||
/// </summary>
|
||||
/// <param name="sourceId">The source identifier (e.g., feed name, analyzer ID).</param>
|
||||
/// <param name="explanation">The edge explanation from this source.</param>
|
||||
/// <param name="strength">The strength/weight from this source.</param>
|
||||
/// <param name="observedAt">When this source observed the edge.</param>
|
||||
/// <returns>This builder for chaining.</returns>
|
||||
public DeduplicatedEdgeBuilder AddSource(
|
||||
string sourceId,
|
||||
EdgeExplanation explanation,
|
||||
double strength,
|
||||
DateTimeOffset observedAt)
|
||||
{
|
||||
_sources.Add(sourceId);
|
||||
|
||||
// Keep the strongest explanation
|
||||
if (strength > _maxStrength || _explanation is null)
|
||||
{
|
||||
_maxStrength = strength;
|
||||
_explanation = explanation;
|
||||
}
|
||||
|
||||
// Track most recent observation
|
||||
if (observedAt > _lastSeen)
|
||||
{
|
||||
_lastSeen = observedAt;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the deduplicated edge.
|
||||
/// </summary>
|
||||
/// <returns>The deduplicated edge with merged provenance.</returns>
|
||||
/// <exception cref="InvalidOperationException">If no sources were added.</exception>
|
||||
public DeduplicatedEdge Build()
|
||||
{
|
||||
if (_sources.Count == 0 || _explanation is null)
|
||||
{
|
||||
throw new InvalidOperationException("At least one source must be added before building.");
|
||||
}
|
||||
|
||||
return new DeduplicatedEdge
|
||||
{
|
||||
Key = _key,
|
||||
From = _from,
|
||||
To = _to,
|
||||
Why = _explanation,
|
||||
Sources = _sources.ToImmutableHashSet(StringComparer.Ordinal),
|
||||
Strength = _maxStrength,
|
||||
LastSeen = _lastSeen
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
// Licensed to StellaOps under the AGPL-3.0-or-later license.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.ReachGraph.Schema;
|
||||
|
||||
namespace StellaOps.ReachGraph.Deduplication;
|
||||
|
||||
/// <summary>
|
||||
/// Service for deduplicating edges from multiple sources into semantically unique edges.
|
||||
/// </summary>
|
||||
public interface IEdgeDeduplicator
|
||||
{
|
||||
/// <summary>
|
||||
/// Deduplicates a collection of edges by their semantic keys.
|
||||
/// </summary>
|
||||
/// <param name="edges">The edges to deduplicate.</param>
|
||||
/// <param name="keyExtractor">Function to extract semantic key from an edge.</param>
|
||||
/// <param name="sourceExtractor">Function to extract source ID from an edge.</param>
|
||||
/// <param name="strengthExtractor">Function to extract strength/weight from an edge.</param>
|
||||
/// <param name="timestampExtractor">Function to extract observation timestamp.</param>
|
||||
/// <returns>Deduplicated edges with merged provenance.</returns>
|
||||
IReadOnlyList<DeduplicatedEdge> Deduplicate(
|
||||
IEnumerable<ReachGraphEdge> edges,
|
||||
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
|
||||
Func<ReachGraphEdge, string> sourceExtractor,
|
||||
Func<ReachGraphEdge, double> strengthExtractor,
|
||||
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of <see cref="IEdgeDeduplicator"/>.
|
||||
/// </summary>
|
||||
public sealed class EdgeDeduplicator : IEdgeDeduplicator
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the singleton instance.
|
||||
/// </summary>
|
||||
public static IEdgeDeduplicator Instance { get; } = new EdgeDeduplicator();
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IReadOnlyList<DeduplicatedEdge> Deduplicate(
|
||||
IEnumerable<ReachGraphEdge> edges,
|
||||
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
|
||||
Func<ReachGraphEdge, string> sourceExtractor,
|
||||
Func<ReachGraphEdge, double> strengthExtractor,
|
||||
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(edges);
|
||||
ArgumentNullException.ThrowIfNull(keyExtractor);
|
||||
ArgumentNullException.ThrowIfNull(sourceExtractor);
|
||||
ArgumentNullException.ThrowIfNull(strengthExtractor);
|
||||
ArgumentNullException.ThrowIfNull(timestampExtractor);
|
||||
|
||||
// Group edges by semantic key
|
||||
var builders = new Dictionary<EdgeSemanticKey, DeduplicatedEdgeBuilder>();
|
||||
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
var key = keyExtractor(edge);
|
||||
|
||||
if (!builders.TryGetValue(key, out var builder))
|
||||
{
|
||||
builder = new DeduplicatedEdgeBuilder(key, edge.From, edge.To);
|
||||
builders[key] = builder;
|
||||
}
|
||||
|
||||
builder.AddSource(
|
||||
sourceExtractor(edge),
|
||||
edge.Why,
|
||||
strengthExtractor(edge),
|
||||
timestampExtractor(edge));
|
||||
}
|
||||
|
||||
// Build deduplicated edges, sorted by strength descending for stability
|
||||
return builders.Values
|
||||
.Select(b => b.Build())
|
||||
.OrderByDescending(e => e.Strength)
|
||||
.ThenBy(e => e.Key.ComputeKey(), StringComparer.Ordinal)
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extensions for edge deduplication.
|
||||
/// </summary>
|
||||
public static class EdgeDeduplicatorExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Deduplicates edges using default extractors based on edge properties.
|
||||
/// </summary>
|
||||
/// <param name="deduplicator">The deduplicator instance.</param>
|
||||
/// <param name="edges">The edges to deduplicate.</param>
|
||||
/// <param name="vulnerabilityId">The vulnerability ID to associate with edges.</param>
|
||||
/// <param name="defaultSource">Default source ID if not specified.</param>
|
||||
/// <param name="timeProvider">Time provider for timestamps.</param>
|
||||
/// <returns>Deduplicated edges.</returns>
|
||||
public static IReadOnlyList<DeduplicatedEdge> DeduplicateWithDefaults(
|
||||
this IEdgeDeduplicator deduplicator,
|
||||
IEnumerable<ReachGraphEdge> edges,
|
||||
string vulnerabilityId,
|
||||
string defaultSource = "unknown",
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
var time = timeProvider ?? TimeProvider.System;
|
||||
var now = time.GetUtcNow();
|
||||
|
||||
return deduplicator.Deduplicate(
|
||||
edges,
|
||||
keyExtractor: e => new EdgeSemanticKey(e.From, e.To, vulnerabilityId),
|
||||
sourceExtractor: _ => defaultSource,
|
||||
strengthExtractor: e => GetEdgeStrength(e.Why),
|
||||
timestampExtractor: _ => now);
|
||||
}
|
||||
|
||||
private static double GetEdgeStrength(EdgeExplanation explanation)
|
||||
{
|
||||
// Use the explanation's confidence as the base strength
|
||||
// Map edge explanation type to a multiplier
|
||||
var typeMultiplier = explanation.Type switch
|
||||
{
|
||||
EdgeExplanationType.DirectCall => 1.0,
|
||||
EdgeExplanationType.Import => 0.95,
|
||||
EdgeExplanationType.DynamicLoad => 0.9,
|
||||
EdgeExplanationType.Ffi => 0.85,
|
||||
EdgeExplanationType.Reflection => 0.8,
|
||||
EdgeExplanationType.LoaderRule => 0.75,
|
||||
EdgeExplanationType.TaintGate => 0.7,
|
||||
EdgeExplanationType.EnvGuard => 0.65,
|
||||
EdgeExplanationType.FeatureFlag => 0.6,
|
||||
EdgeExplanationType.PlatformArch => 0.6,
|
||||
EdgeExplanationType.Unknown => 0.5,
|
||||
_ => 0.5
|
||||
};
|
||||
|
||||
return explanation.Confidence * typeMultiplier;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
// Licensed to StellaOps under the AGPL-3.0-or-later license.
|
||||
|
||||
using System.Globalization;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.ReachGraph.Deduplication;
|
||||
|
||||
/// <summary>
|
||||
/// A semantic key for edge deduplication that identifies edges with equivalent meaning
|
||||
/// regardless of their source or representation.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Edges from different sources (reachability analysis, call graph, binary analysis)
|
||||
/// may represent the same semantic relationship. This key normalizes them for deduplication.
|
||||
///
|
||||
/// Two edges are semantically equivalent if they have the same:
|
||||
/// - Entry point node ID
|
||||
/// - Sink node ID
|
||||
/// - Vulnerability ID (if applicable)
|
||||
/// - Applied gate (if any)
|
||||
/// </remarks>
|
||||
public readonly record struct EdgeSemanticKey : IEquatable<EdgeSemanticKey>
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the entry point node identifier.
|
||||
/// </summary>
|
||||
public string EntryPointId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the sink (vulnerable) node identifier.
|
||||
/// </summary>
|
||||
public string SinkId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the vulnerability identifier, if this edge is associated with one.
|
||||
/// </summary>
|
||||
public string? VulnerabilityId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the applied gate identifier, if any gate was applied to this edge.
|
||||
/// </summary>
|
||||
public string? GateApplied { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="EdgeSemanticKey"/> struct.
|
||||
/// </summary>
|
||||
/// <param name="entryPointId">The entry point node ID.</param>
|
||||
/// <param name="sinkId">The sink node ID.</param>
|
||||
/// <param name="vulnerabilityId">Optional vulnerability ID.</param>
|
||||
/// <param name="gateApplied">Optional gate identifier.</param>
|
||||
public EdgeSemanticKey(
|
||||
string entryPointId,
|
||||
string sinkId,
|
||||
string? vulnerabilityId = null,
|
||||
string? gateApplied = null)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(entryPointId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(sinkId);
|
||||
|
||||
EntryPointId = entryPointId;
|
||||
SinkId = sinkId;
|
||||
VulnerabilityId = NormalizeId(vulnerabilityId);
|
||||
GateApplied = gateApplied;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a canonical string key for this semantic key.
|
||||
/// </summary>
|
||||
/// <returns>A canonical string representation suitable for dictionary keys.</returns>
|
||||
public string ComputeKey()
|
||||
{
|
||||
var builder = new StringBuilder(256);
|
||||
builder.Append(EntryPointId);
|
||||
builder.Append('|');
|
||||
builder.Append(SinkId);
|
||||
builder.Append('|');
|
||||
builder.Append(VulnerabilityId ?? string.Empty);
|
||||
builder.Append('|');
|
||||
builder.Append(GateApplied ?? string.Empty);
|
||||
return builder.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a SHA-256 hash of the canonical key for compact storage.
|
||||
/// </summary>
|
||||
/// <returns>A lowercase hex-encoded SHA-256 hash.</returns>
|
||||
public string ComputeHash()
|
||||
{
|
||||
var key = ComputeKey();
|
||||
var bytes = Encoding.UTF8.GetBytes(key);
|
||||
var hash = SHA256.HashData(bytes);
|
||||
return Convert.ToHexStringLower(hash);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return HashCode.Combine(
|
||||
EntryPointId,
|
||||
SinkId,
|
||||
VulnerabilityId ?? string.Empty,
|
||||
GateApplied ?? string.Empty);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool Equals(EdgeSemanticKey other)
|
||||
{
|
||||
return string.Equals(EntryPointId, other.EntryPointId, StringComparison.Ordinal) &&
|
||||
string.Equals(SinkId, other.SinkId, StringComparison.Ordinal) &&
|
||||
string.Equals(VulnerabilityId, other.VulnerabilityId, StringComparison.OrdinalIgnoreCase) &&
|
||||
string.Equals(GateApplied, other.GateApplied, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public override string ToString() => ComputeKey();
|
||||
|
||||
private static string? NormalizeId(string? id)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(id))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Normalize CVE IDs to uppercase for consistent comparison
|
||||
if (id.StartsWith("cve-", StringComparison.OrdinalIgnoreCase) ||
|
||||
id.StartsWith("CVE-", StringComparison.Ordinal))
|
||||
{
|
||||
return id.ToUpperInvariant();
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user