391 lines
14 KiB
C#
391 lines
14 KiB
C#
using System.Collections.Immutable;
|
|
using System.Diagnostics;
|
|
|
|
namespace StellaOps.Policy.Engine.SelectionJoin;
|
|
|
|
/// <summary>
|
|
/// Service for resolving SBOM↔Advisory↔VEX tuples using linksets and PURL equivalence.
|
|
/// All operations are deterministic: given identical inputs, produces identical outputs.
|
|
/// </summary>
|
|
public sealed class SelectionJoinService
|
|
{
|
|
/// <summary>
|
|
/// Resolves SBOM components against advisory and VEX linksets.
|
|
/// Uses deterministic batching for large datasets.
|
|
/// </summary>
|
|
public SelectionJoinBatchResult ResolveTuples(SelectionJoinBatchInput input)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(input);
|
|
|
|
var stopwatch = Stopwatch.StartNew();
|
|
|
|
var equivalenceTable = input.EquivalenceTable ?? PurlEquivalenceTable.Empty;
|
|
var options = input.Options;
|
|
|
|
// Build lookup indexes for deterministic matching
|
|
var advisoryIndex = BuildAdvisoryIndex(input.Advisories);
|
|
var vexIndex = BuildVexIndex(input.VexLinksets);
|
|
|
|
// Process components in deterministic order
|
|
var orderedComponents = input.Components
|
|
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
|
|
.ThenBy(c => c.Name, StringComparer.OrdinalIgnoreCase)
|
|
.ToImmutableArray();
|
|
|
|
var tuples = new List<SelectionJoinTuple>();
|
|
var unmatched = new List<SbomComponentInput>();
|
|
var stats = new SelectionJoinStatsBuilder();
|
|
|
|
stats.TotalComponents = orderedComponents.Length;
|
|
stats.TotalAdvisories = input.Advisories.Length;
|
|
|
|
// Process in batches for memory efficiency
|
|
var batches = CreateDeterministicBatches(orderedComponents, options.MaxBatchSize);
|
|
|
|
foreach (var batch in batches)
|
|
{
|
|
ProcessBatch(
|
|
batch,
|
|
input.TenantId,
|
|
advisoryIndex,
|
|
vexIndex,
|
|
equivalenceTable,
|
|
options,
|
|
tuples,
|
|
unmatched,
|
|
stats);
|
|
}
|
|
|
|
stopwatch.Stop();
|
|
stats.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
|
|
|
|
// Sort results for deterministic output
|
|
var sortedTuples = tuples
|
|
.OrderBy(t => t.Component.Purl, StringComparer.OrdinalIgnoreCase)
|
|
.ThenBy(t => t.Advisory.AdvisoryId, StringComparer.OrdinalIgnoreCase)
|
|
.ToImmutableArray();
|
|
|
|
var sortedUnmatched = unmatched
|
|
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
|
|
.ToImmutableArray();
|
|
|
|
return new SelectionJoinBatchResult(
|
|
input.BatchId,
|
|
sortedTuples,
|
|
sortedUnmatched,
|
|
stats.Build());
|
|
}
|
|
|
|
private static void ProcessBatch(
|
|
IReadOnlyList<SbomComponentInput> components,
|
|
string tenantId,
|
|
AdvisoryIndex advisoryIndex,
|
|
VexIndex vexIndex,
|
|
PurlEquivalenceTable equivalenceTable,
|
|
SelectionJoinOptions options,
|
|
List<SelectionJoinTuple> tuples,
|
|
List<SbomComponentInput> unmatched,
|
|
SelectionJoinStatsBuilder stats)
|
|
{
|
|
foreach (var component in components)
|
|
{
|
|
var matches = FindAdvisoryMatches(component, advisoryIndex, equivalenceTable, options);
|
|
|
|
if (matches.Count == 0)
|
|
{
|
|
unmatched.Add(component);
|
|
continue;
|
|
}
|
|
|
|
foreach (var (advisory, matchType, confidence) in matches)
|
|
{
|
|
if (confidence < options.MinConfidenceThreshold)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// Find matching VEX linkset
|
|
var vex = FindVexMatch(component, advisory, vexIndex);
|
|
|
|
var tupleId = SelectionJoinTuple.CreateTupleId(
|
|
tenantId,
|
|
component.Purl,
|
|
advisory.AdvisoryId);
|
|
|
|
var tuple = new SelectionJoinTuple(
|
|
tupleId,
|
|
component,
|
|
advisory,
|
|
vex,
|
|
matchType,
|
|
confidence);
|
|
|
|
tuples.Add(tuple);
|
|
|
|
// Update statistics
|
|
stats.MatchedTuples++;
|
|
switch (matchType)
|
|
{
|
|
case SelectionMatchType.ExactPurl:
|
|
stats.ExactPurlMatches++;
|
|
break;
|
|
case SelectionMatchType.PackageKeyMatch:
|
|
stats.PackageKeyMatches++;
|
|
break;
|
|
case SelectionMatchType.CpeMatch:
|
|
stats.CpeMatches++;
|
|
break;
|
|
case SelectionMatchType.EquivalenceMatch:
|
|
stats.EquivalenceMatches++;
|
|
break;
|
|
}
|
|
|
|
if (vex is not null)
|
|
{
|
|
stats.VexOverlays++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private static IReadOnlyList<(AdvisoryLinksetInput Advisory, SelectionMatchType MatchType, double Confidence)> FindAdvisoryMatches(
|
|
SbomComponentInput component,
|
|
AdvisoryIndex index,
|
|
PurlEquivalenceTable equivalenceTable,
|
|
SelectionJoinOptions options)
|
|
{
|
|
var matches = new List<(AdvisoryLinksetInput, SelectionMatchType, double)>();
|
|
var componentPurl = component.Purl.ToLowerInvariant();
|
|
var componentKey = component.PackageKey.ToLowerInvariant();
|
|
|
|
// 1. Exact PURL match (highest confidence)
|
|
if (index.ByExactPurl.TryGetValue(componentPurl, out var exactMatches))
|
|
{
|
|
foreach (var advisory in exactMatches)
|
|
{
|
|
var confidence = ComputeFinalConfidence(1.0, advisory.Confidence);
|
|
matches.Add((advisory, SelectionMatchType.ExactPurl, confidence));
|
|
}
|
|
}
|
|
|
|
// 2. Package key match (same package, possibly different version)
|
|
if (index.ByPackageKey.TryGetValue(componentKey, out var keyMatches))
|
|
{
|
|
foreach (var advisory in keyMatches)
|
|
{
|
|
// Skip if already matched by exact PURL
|
|
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var confidence = ComputeFinalConfidence(0.8, advisory.Confidence);
|
|
matches.Add((advisory, SelectionMatchType.PackageKeyMatch, confidence));
|
|
}
|
|
}
|
|
|
|
// 3. Equivalence table match
|
|
var equivalents = equivalenceTable.GetEquivalents(componentPurl);
|
|
foreach (var equivalent in equivalents)
|
|
{
|
|
if (string.Equals(equivalent, componentPurl, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var equivalentKey = PurlEquivalence.ExtractPackageKey(equivalent).ToLowerInvariant();
|
|
if (index.ByPackageKey.TryGetValue(equivalentKey, out var equivMatches))
|
|
{
|
|
foreach (var advisory in equivMatches)
|
|
{
|
|
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var confidence = ComputeFinalConfidence(0.9, advisory.Confidence);
|
|
matches.Add((advisory, SelectionMatchType.EquivalenceMatch, confidence));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort matches by confidence (descending) for deterministic ordering
|
|
return matches
|
|
.OrderByDescending(m => m.Item3)
|
|
.ThenBy(m => m.Item1.AdvisoryId, StringComparer.OrdinalIgnoreCase)
|
|
.ToList();
|
|
}
|
|
|
|
private static VexLinksetInput? FindVexMatch(
|
|
SbomComponentInput component,
|
|
AdvisoryLinksetInput advisory,
|
|
VexIndex vexIndex)
|
|
{
|
|
// Try exact vulnerability ID + product key match
|
|
foreach (var alias in advisory.Aliases)
|
|
{
|
|
var key = $"{alias.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
|
|
if (vexIndex.ByVulnAndProduct.TryGetValue(key, out var vex))
|
|
{
|
|
return vex;
|
|
}
|
|
|
|
// Try package key match
|
|
var pkgKey = $"{alias.ToLowerInvariant()}|{component.PackageKey.ToLowerInvariant()}";
|
|
if (vexIndex.ByVulnAndPackageKey.TryGetValue(pkgKey, out vex))
|
|
{
|
|
return vex;
|
|
}
|
|
}
|
|
|
|
// Try advisory ID directly
|
|
var directKey = $"{advisory.AdvisoryId.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
|
|
if (vexIndex.ByVulnAndProduct.TryGetValue(directKey, out var directVex))
|
|
{
|
|
return directVex;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static double ComputeFinalConfidence(double matchConfidence, double? linksetConfidence)
|
|
{
|
|
var linkset = linksetConfidence ?? 1.0;
|
|
// Geometric mean of match confidence and linkset confidence
|
|
return Math.Sqrt(matchConfidence * linkset);
|
|
}
|
|
|
|
private static AdvisoryIndex BuildAdvisoryIndex(ImmutableArray<AdvisoryLinksetInput> advisories)
|
|
{
|
|
var byExactPurl = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
|
|
var byPackageKey = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
foreach (var advisory in advisories)
|
|
{
|
|
foreach (var purl in advisory.Purls)
|
|
{
|
|
var normalizedPurl = purl.ToLowerInvariant();
|
|
var packageKey = PurlEquivalence.ExtractPackageKey(normalizedPurl);
|
|
|
|
if (!byExactPurl.TryGetValue(normalizedPurl, out var exactList))
|
|
{
|
|
exactList = new List<AdvisoryLinksetInput>();
|
|
byExactPurl[normalizedPurl] = exactList;
|
|
}
|
|
exactList.Add(advisory);
|
|
|
|
if (!string.IsNullOrEmpty(packageKey))
|
|
{
|
|
if (!byPackageKey.TryGetValue(packageKey, out var keyList))
|
|
{
|
|
keyList = new List<AdvisoryLinksetInput>();
|
|
byPackageKey[packageKey] = keyList;
|
|
}
|
|
|
|
// Avoid duplicates in the same advisory
|
|
if (!keyList.Any(a => a.AdvisoryId == advisory.AdvisoryId))
|
|
{
|
|
keyList.Add(advisory);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return new AdvisoryIndex(
|
|
byExactPurl.ToImmutableDictionary(
|
|
kvp => kvp.Key,
|
|
kvp => kvp.Value.ToImmutableArray(),
|
|
StringComparer.OrdinalIgnoreCase),
|
|
byPackageKey.ToImmutableDictionary(
|
|
kvp => kvp.Key,
|
|
kvp => kvp.Value.ToImmutableArray(),
|
|
StringComparer.OrdinalIgnoreCase));
|
|
}
|
|
|
|
private static VexIndex BuildVexIndex(ImmutableArray<VexLinksetInput> vexLinksets)
|
|
{
|
|
var byVulnAndProduct = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
|
|
var byVulnAndPackageKey = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
foreach (var vex in vexLinksets)
|
|
{
|
|
var vulnKey = vex.VulnerabilityId.ToLowerInvariant();
|
|
var productKey = vex.ProductKey.ToLowerInvariant();
|
|
var packageKey = PurlEquivalence.ExtractPackageKey(productKey);
|
|
|
|
var exactKey = $"{vulnKey}|{productKey}";
|
|
byVulnAndProduct.TryAdd(exactKey, vex);
|
|
|
|
if (!string.IsNullOrEmpty(packageKey))
|
|
{
|
|
var pkgLookupKey = $"{vulnKey}|{packageKey}";
|
|
byVulnAndPackageKey.TryAdd(pkgLookupKey, vex);
|
|
}
|
|
}
|
|
|
|
return new VexIndex(
|
|
byVulnAndProduct.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase),
|
|
byVulnAndPackageKey.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase));
|
|
}
|
|
|
|
private static IReadOnlyList<IReadOnlyList<SbomComponentInput>> CreateDeterministicBatches(
|
|
ImmutableArray<SbomComponentInput> components,
|
|
int batchSize)
|
|
{
|
|
if (batchSize <= 0)
|
|
{
|
|
batchSize = 1000;
|
|
}
|
|
|
|
var batches = new List<IReadOnlyList<SbomComponentInput>>();
|
|
|
|
for (var i = 0; i < components.Length; i += batchSize)
|
|
{
|
|
var remaining = components.Length - i;
|
|
var count = Math.Min(batchSize, remaining);
|
|
var batch = new List<SbomComponentInput>(count);
|
|
|
|
for (var j = 0; j < count; j++)
|
|
{
|
|
batch.Add(components[i + j]);
|
|
}
|
|
|
|
batches.Add(batch);
|
|
}
|
|
|
|
return batches;
|
|
}
|
|
|
|
private sealed record AdvisoryIndex(
|
|
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByExactPurl,
|
|
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByPackageKey);
|
|
|
|
private sealed record VexIndex(
|
|
ImmutableDictionary<string, VexLinksetInput> ByVulnAndProduct,
|
|
ImmutableDictionary<string, VexLinksetInput> ByVulnAndPackageKey);
|
|
|
|
private sealed class SelectionJoinStatsBuilder
|
|
{
|
|
public int TotalComponents { get; set; }
|
|
public int TotalAdvisories { get; set; }
|
|
public int MatchedTuples { get; set; }
|
|
public int ExactPurlMatches { get; set; }
|
|
public int PackageKeyMatches { get; set; }
|
|
public int CpeMatches { get; set; }
|
|
public int EquivalenceMatches { get; set; }
|
|
public int VexOverlays { get; set; }
|
|
public long ProcessingTimeMs { get; set; }
|
|
|
|
public SelectionJoinStatistics Build() => new(
|
|
TotalComponents,
|
|
TotalAdvisories,
|
|
MatchedTuples,
|
|
ExactPurlMatches,
|
|
PackageKeyMatches,
|
|
CpeMatches,
|
|
EquivalenceMatches,
|
|
VexOverlays,
|
|
ProcessingTimeMs);
|
|
}
|
|
}
|