save progress

This commit is contained in:
StellaOps Bot
2026-01-04 14:54:52 +02:00
parent c49b03a254
commit 3098e84de4
132 changed files with 19783 additions and 31 deletions

View File

@@ -0,0 +1,138 @@
// Licensed to StellaOps under the AGPL-3.0-or-later license.
using System.Collections.Immutable;
using StellaOps.ReachGraph.Schema;
namespace StellaOps.ReachGraph.Deduplication;
/// <summary>
/// An edge that has been deduplicated from multiple source edges.
/// Preserves provenance by tracking all contributing sources.
/// </summary>
public sealed record DeduplicatedEdge
{
/// <summary>
/// Gets the semantic key for this edge.
/// </summary>
public required EdgeSemanticKey Key { get; init; }
/// <summary>
/// Gets the source node ID (from entry point).
/// </summary>
public required string From { get; init; }
/// <summary>
/// Gets the target node ID (to sink).
/// </summary>
public required string To { get; init; }
/// <summary>
/// Gets the aggregated explanation for this edge.
/// </summary>
public required EdgeExplanation Why { get; init; }
/// <summary>
/// Gets the set of source identifiers that contributed this edge.
/// </summary>
public required ImmutableHashSet<string> Sources { get; init; }
/// <summary>
/// Gets the maximum strength (weight) among all contributing sources.
/// </summary>
public required double Strength { get; init; }
/// <summary>
/// Gets the timestamp of the most recent observation of this edge.
/// </summary>
public required DateTimeOffset LastSeen { get; init; }
/// <summary>
/// Gets the number of contributing sources.
/// </summary>
public int SourceCount => Sources.Count;
/// <summary>
/// Gets whether this edge has multiple confirming sources.
/// </summary>
public bool IsCorroborated => Sources.Count > 1;
}
/// <summary>
/// Builder for creating <see cref="DeduplicatedEdge"/> instances by merging multiple source edges.
/// </summary>
public sealed class DeduplicatedEdgeBuilder
{
private readonly EdgeSemanticKey _key;
private readonly string _from;
private readonly string _to;
private readonly HashSet<string> _sources = new(StringComparer.Ordinal);
private EdgeExplanation? _explanation;
private double _maxStrength;
private DateTimeOffset _lastSeen = DateTimeOffset.MinValue;
/// <summary>
/// Initializes a new builder for the given semantic key.
/// </summary>
public DeduplicatedEdgeBuilder(EdgeSemanticKey key, string from, string to)
{
_key = key;
_from = from;
_to = to;
}
/// <summary>
/// Adds a source edge to this builder.
/// </summary>
/// <param name="sourceId">The source identifier (e.g., feed name, analyzer ID).</param>
/// <param name="explanation">The edge explanation from this source.</param>
/// <param name="strength">The strength/weight from this source.</param>
/// <param name="observedAt">When this source observed the edge.</param>
/// <returns>This builder for chaining.</returns>
public DeduplicatedEdgeBuilder AddSource(
string sourceId,
EdgeExplanation explanation,
double strength,
DateTimeOffset observedAt)
{
_sources.Add(sourceId);
// Keep the strongest explanation
if (strength > _maxStrength || _explanation is null)
{
_maxStrength = strength;
_explanation = explanation;
}
// Track most recent observation
if (observedAt > _lastSeen)
{
_lastSeen = observedAt;
}
return this;
}
/// <summary>
/// Builds the deduplicated edge.
/// </summary>
/// <returns>The deduplicated edge with merged provenance.</returns>
/// <exception cref="InvalidOperationException">If no sources were added.</exception>
public DeduplicatedEdge Build()
{
if (_sources.Count == 0 || _explanation is null)
{
throw new InvalidOperationException("At least one source must be added before building.");
}
return new DeduplicatedEdge
{
Key = _key,
From = _from,
To = _to,
Why = _explanation,
Sources = _sources.ToImmutableHashSet(StringComparer.Ordinal),
Strength = _maxStrength,
LastSeen = _lastSeen
};
}
}

View File

@@ -0,0 +1,137 @@
// Licensed to StellaOps under the AGPL-3.0-or-later license.
using System.Collections.Immutable;
using StellaOps.ReachGraph.Schema;
namespace StellaOps.ReachGraph.Deduplication;
/// <summary>
/// Service for deduplicating edges from multiple sources into semantically unique edges.
/// </summary>
public interface IEdgeDeduplicator
{
/// <summary>
/// Deduplicates a collection of edges by their semantic keys.
/// </summary>
/// <param name="edges">The edges to deduplicate.</param>
/// <param name="keyExtractor">Function to extract semantic key from an edge.</param>
/// <param name="sourceExtractor">Function to extract source ID from an edge.</param>
/// <param name="strengthExtractor">Function to extract strength/weight from an edge.</param>
/// <param name="timestampExtractor">Function to extract observation timestamp.</param>
/// <returns>Deduplicated edges with merged provenance.</returns>
IReadOnlyList<DeduplicatedEdge> Deduplicate(
IEnumerable<ReachGraphEdge> edges,
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
Func<ReachGraphEdge, string> sourceExtractor,
Func<ReachGraphEdge, double> strengthExtractor,
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor);
}
/// <summary>
/// Default implementation of <see cref="IEdgeDeduplicator"/>.
/// </summary>
public sealed class EdgeDeduplicator : IEdgeDeduplicator
{
/// <summary>
/// Gets the singleton instance.
/// </summary>
public static IEdgeDeduplicator Instance { get; } = new EdgeDeduplicator();
/// <inheritdoc/>
public IReadOnlyList<DeduplicatedEdge> Deduplicate(
IEnumerable<ReachGraphEdge> edges,
Func<ReachGraphEdge, EdgeSemanticKey> keyExtractor,
Func<ReachGraphEdge, string> sourceExtractor,
Func<ReachGraphEdge, double> strengthExtractor,
Func<ReachGraphEdge, DateTimeOffset> timestampExtractor)
{
ArgumentNullException.ThrowIfNull(edges);
ArgumentNullException.ThrowIfNull(keyExtractor);
ArgumentNullException.ThrowIfNull(sourceExtractor);
ArgumentNullException.ThrowIfNull(strengthExtractor);
ArgumentNullException.ThrowIfNull(timestampExtractor);
// Group edges by semantic key
var builders = new Dictionary<EdgeSemanticKey, DeduplicatedEdgeBuilder>();
foreach (var edge in edges)
{
var key = keyExtractor(edge);
if (!builders.TryGetValue(key, out var builder))
{
builder = new DeduplicatedEdgeBuilder(key, edge.From, edge.To);
builders[key] = builder;
}
builder.AddSource(
sourceExtractor(edge),
edge.Why,
strengthExtractor(edge),
timestampExtractor(edge));
}
// Build deduplicated edges, sorted by strength descending for stability
return builders.Values
.Select(b => b.Build())
.OrderByDescending(e => e.Strength)
.ThenBy(e => e.Key.ComputeKey(), StringComparer.Ordinal)
.ToList();
}
}
/// <summary>
/// Extensions for edge deduplication.
/// </summary>
public static class EdgeDeduplicatorExtensions
{
/// <summary>
/// Deduplicates edges using default extractors based on edge properties.
/// </summary>
/// <param name="deduplicator">The deduplicator instance.</param>
/// <param name="edges">The edges to deduplicate.</param>
/// <param name="vulnerabilityId">The vulnerability ID to associate with edges.</param>
/// <param name="defaultSource">Default source ID if not specified.</param>
/// <param name="timeProvider">Time provider for timestamps.</param>
/// <returns>Deduplicated edges.</returns>
public static IReadOnlyList<DeduplicatedEdge> DeduplicateWithDefaults(
this IEdgeDeduplicator deduplicator,
IEnumerable<ReachGraphEdge> edges,
string vulnerabilityId,
string defaultSource = "unknown",
TimeProvider? timeProvider = null)
{
var time = timeProvider ?? TimeProvider.System;
var now = time.GetUtcNow();
return deduplicator.Deduplicate(
edges,
keyExtractor: e => new EdgeSemanticKey(e.From, e.To, vulnerabilityId),
sourceExtractor: _ => defaultSource,
strengthExtractor: e => GetEdgeStrength(e.Why),
timestampExtractor: _ => now);
}
private static double GetEdgeStrength(EdgeExplanation explanation)
{
// Use the explanation's confidence as the base strength
// Map edge explanation type to a multiplier
var typeMultiplier = explanation.Type switch
{
EdgeExplanationType.DirectCall => 1.0,
EdgeExplanationType.Import => 0.95,
EdgeExplanationType.DynamicLoad => 0.9,
EdgeExplanationType.Ffi => 0.85,
EdgeExplanationType.Reflection => 0.8,
EdgeExplanationType.LoaderRule => 0.75,
EdgeExplanationType.TaintGate => 0.7,
EdgeExplanationType.EnvGuard => 0.65,
EdgeExplanationType.FeatureFlag => 0.6,
EdgeExplanationType.PlatformArch => 0.6,
EdgeExplanationType.Unknown => 0.5,
_ => 0.5
};
return explanation.Confidence * typeMultiplier;
}
}

View File

@@ -0,0 +1,134 @@
// Licensed to StellaOps under the AGPL-3.0-or-later license.
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.ReachGraph.Deduplication;
/// <summary>
/// A semantic key for edge deduplication that identifies edges with equivalent meaning
/// regardless of their source or representation.
/// </summary>
/// <remarks>
/// Edges from different sources (reachability analysis, call graph, binary analysis)
/// may represent the same semantic relationship. This key normalizes them for deduplication.
///
/// Two edges are semantically equivalent if they have the same:
/// - Entry point node ID
/// - Sink node ID
/// - Vulnerability ID (if applicable)
/// - Applied gate (if any)
/// </remarks>
public readonly record struct EdgeSemanticKey : IEquatable<EdgeSemanticKey>
{
/// <summary>
/// Gets the entry point node identifier.
/// </summary>
public string EntryPointId { get; }
/// <summary>
/// Gets the sink (vulnerable) node identifier.
/// </summary>
public string SinkId { get; }
/// <summary>
/// Gets the vulnerability identifier, if this edge is associated with one.
/// </summary>
public string? VulnerabilityId { get; }
/// <summary>
/// Gets the applied gate identifier, if any gate was applied to this edge.
/// </summary>
public string? GateApplied { get; }
/// <summary>
/// Initializes a new instance of the <see cref="EdgeSemanticKey"/> struct.
/// </summary>
/// <param name="entryPointId">The entry point node ID.</param>
/// <param name="sinkId">The sink node ID.</param>
/// <param name="vulnerabilityId">Optional vulnerability ID.</param>
/// <param name="gateApplied">Optional gate identifier.</param>
public EdgeSemanticKey(
string entryPointId,
string sinkId,
string? vulnerabilityId = null,
string? gateApplied = null)
{
ArgumentException.ThrowIfNullOrWhiteSpace(entryPointId);
ArgumentException.ThrowIfNullOrWhiteSpace(sinkId);
EntryPointId = entryPointId;
SinkId = sinkId;
VulnerabilityId = NormalizeId(vulnerabilityId);
GateApplied = gateApplied;
}
/// <summary>
/// Computes a canonical string key for this semantic key.
/// </summary>
/// <returns>A canonical string representation suitable for dictionary keys.</returns>
public string ComputeKey()
{
var builder = new StringBuilder(256);
builder.Append(EntryPointId);
builder.Append('|');
builder.Append(SinkId);
builder.Append('|');
builder.Append(VulnerabilityId ?? string.Empty);
builder.Append('|');
builder.Append(GateApplied ?? string.Empty);
return builder.ToString();
}
/// <summary>
/// Computes a SHA-256 hash of the canonical key for compact storage.
/// </summary>
/// <returns>A lowercase hex-encoded SHA-256 hash.</returns>
public string ComputeHash()
{
var key = ComputeKey();
var bytes = Encoding.UTF8.GetBytes(key);
var hash = SHA256.HashData(bytes);
return Convert.ToHexStringLower(hash);
}
/// <inheritdoc/>
public override int GetHashCode()
{
return HashCode.Combine(
EntryPointId,
SinkId,
VulnerabilityId ?? string.Empty,
GateApplied ?? string.Empty);
}
/// <inheritdoc/>
public bool Equals(EdgeSemanticKey other)
{
return string.Equals(EntryPointId, other.EntryPointId, StringComparison.Ordinal) &&
string.Equals(SinkId, other.SinkId, StringComparison.Ordinal) &&
string.Equals(VulnerabilityId, other.VulnerabilityId, StringComparison.OrdinalIgnoreCase) &&
string.Equals(GateApplied, other.GateApplied, StringComparison.Ordinal);
}
/// <inheritdoc/>
public override string ToString() => ComputeKey();
private static string? NormalizeId(string? id)
{
if (string.IsNullOrWhiteSpace(id))
{
return null;
}
// Normalize CVE IDs to uppercase for consistent comparison
if (id.StartsWith("cve-", StringComparison.OrdinalIgnoreCase) ||
id.StartsWith("CVE-", StringComparison.Ordinal))
{
return id.ToUpperInvariant();
}
return id;
}
}