up
This commit is contained in:
@@ -0,0 +1,390 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace StellaOps.Policy.Engine.SelectionJoin;
|
||||
|
||||
/// <summary>
|
||||
/// Service for resolving SBOM↔Advisory↔VEX tuples using linksets and PURL equivalence.
|
||||
/// All operations are deterministic: given identical inputs, produces identical outputs.
|
||||
/// </summary>
|
||||
public sealed class SelectionJoinService
|
||||
{
|
||||
/// <summary>
|
||||
/// Resolves SBOM components against advisory and VEX linksets.
|
||||
/// Uses deterministic batching for large datasets.
|
||||
/// </summary>
|
||||
public SelectionJoinBatchResult ResolveTuples(SelectionJoinBatchInput input)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(input);
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
var equivalenceTable = input.EquivalenceTable ?? PurlEquivalenceTable.Empty;
|
||||
var options = input.Options;
|
||||
|
||||
// Build lookup indexes for deterministic matching
|
||||
var advisoryIndex = BuildAdvisoryIndex(input.Advisories);
|
||||
var vexIndex = BuildVexIndex(input.VexLinksets);
|
||||
|
||||
// Process components in deterministic order
|
||||
var orderedComponents = input.Components
|
||||
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
|
||||
.ThenBy(c => c.Name, StringComparer.OrdinalIgnoreCase)
|
||||
.ToImmutableArray();
|
||||
|
||||
var tuples = new List<SelectionJoinTuple>();
|
||||
var unmatched = new List<SbomComponentInput>();
|
||||
var stats = new SelectionJoinStatsBuilder();
|
||||
|
||||
stats.TotalComponents = orderedComponents.Length;
|
||||
stats.TotalAdvisories = input.Advisories.Length;
|
||||
|
||||
// Process in batches for memory efficiency
|
||||
var batches = CreateDeterministicBatches(orderedComponents, options.MaxBatchSize);
|
||||
|
||||
foreach (var batch in batches)
|
||||
{
|
||||
ProcessBatch(
|
||||
batch,
|
||||
input.TenantId,
|
||||
advisoryIndex,
|
||||
vexIndex,
|
||||
equivalenceTable,
|
||||
options,
|
||||
tuples,
|
||||
unmatched,
|
||||
stats);
|
||||
}
|
||||
|
||||
stopwatch.Stop();
|
||||
stats.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
|
||||
|
||||
// Sort results for deterministic output
|
||||
var sortedTuples = tuples
|
||||
.OrderBy(t => t.Component.Purl, StringComparer.OrdinalIgnoreCase)
|
||||
.ThenBy(t => t.Advisory.AdvisoryId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToImmutableArray();
|
||||
|
||||
var sortedUnmatched = unmatched
|
||||
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
|
||||
.ToImmutableArray();
|
||||
|
||||
return new SelectionJoinBatchResult(
|
||||
input.BatchId,
|
||||
sortedTuples,
|
||||
sortedUnmatched,
|
||||
stats.Build());
|
||||
}
|
||||
|
||||
private static void ProcessBatch(
|
||||
IReadOnlyList<SbomComponentInput> components,
|
||||
string tenantId,
|
||||
AdvisoryIndex advisoryIndex,
|
||||
VexIndex vexIndex,
|
||||
PurlEquivalenceTable equivalenceTable,
|
||||
SelectionJoinOptions options,
|
||||
List<SelectionJoinTuple> tuples,
|
||||
List<SbomComponentInput> unmatched,
|
||||
SelectionJoinStatsBuilder stats)
|
||||
{
|
||||
foreach (var component in components)
|
||||
{
|
||||
var matches = FindAdvisoryMatches(component, advisoryIndex, equivalenceTable, options);
|
||||
|
||||
if (matches.Count == 0)
|
||||
{
|
||||
unmatched.Add(component);
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var (advisory, matchType, confidence) in matches)
|
||||
{
|
||||
if (confidence < options.MinConfidenceThreshold)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find matching VEX linkset
|
||||
var vex = FindVexMatch(component, advisory, vexIndex);
|
||||
|
||||
var tupleId = SelectionJoinTuple.CreateTupleId(
|
||||
tenantId,
|
||||
component.Purl,
|
||||
advisory.AdvisoryId);
|
||||
|
||||
var tuple = new SelectionJoinTuple(
|
||||
tupleId,
|
||||
component,
|
||||
advisory,
|
||||
vex,
|
||||
matchType,
|
||||
confidence);
|
||||
|
||||
tuples.Add(tuple);
|
||||
|
||||
// Update statistics
|
||||
stats.MatchedTuples++;
|
||||
switch (matchType)
|
||||
{
|
||||
case SelectionMatchType.ExactPurl:
|
||||
stats.ExactPurlMatches++;
|
||||
break;
|
||||
case SelectionMatchType.PackageKeyMatch:
|
||||
stats.PackageKeyMatches++;
|
||||
break;
|
||||
case SelectionMatchType.CpeMatch:
|
||||
stats.CpeMatches++;
|
||||
break;
|
||||
case SelectionMatchType.EquivalenceMatch:
|
||||
stats.EquivalenceMatches++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (vex is not null)
|
||||
{
|
||||
stats.VexOverlays++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyList<(AdvisoryLinksetInput Advisory, SelectionMatchType MatchType, double Confidence)> FindAdvisoryMatches(
|
||||
SbomComponentInput component,
|
||||
AdvisoryIndex index,
|
||||
PurlEquivalenceTable equivalenceTable,
|
||||
SelectionJoinOptions options)
|
||||
{
|
||||
var matches = new List<(AdvisoryLinksetInput, SelectionMatchType, double)>();
|
||||
var componentPurl = component.Purl.ToLowerInvariant();
|
||||
var componentKey = component.PackageKey.ToLowerInvariant();
|
||||
|
||||
// 1. Exact PURL match (highest confidence)
|
||||
if (index.ByExactPurl.TryGetValue(componentPurl, out var exactMatches))
|
||||
{
|
||||
foreach (var advisory in exactMatches)
|
||||
{
|
||||
var confidence = ComputeFinalConfidence(1.0, advisory.Confidence);
|
||||
matches.Add((advisory, SelectionMatchType.ExactPurl, confidence));
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Package key match (same package, possibly different version)
|
||||
if (index.ByPackageKey.TryGetValue(componentKey, out var keyMatches))
|
||||
{
|
||||
foreach (var advisory in keyMatches)
|
||||
{
|
||||
// Skip if already matched by exact PURL
|
||||
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var confidence = ComputeFinalConfidence(0.8, advisory.Confidence);
|
||||
matches.Add((advisory, SelectionMatchType.PackageKeyMatch, confidence));
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Equivalence table match
|
||||
var equivalents = equivalenceTable.GetEquivalents(componentPurl);
|
||||
foreach (var equivalent in equivalents)
|
||||
{
|
||||
if (string.Equals(equivalent, componentPurl, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var equivalentKey = PurlEquivalence.ExtractPackageKey(equivalent).ToLowerInvariant();
|
||||
if (index.ByPackageKey.TryGetValue(equivalentKey, out var equivMatches))
|
||||
{
|
||||
foreach (var advisory in equivMatches)
|
||||
{
|
||||
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var confidence = ComputeFinalConfidence(0.9, advisory.Confidence);
|
||||
matches.Add((advisory, SelectionMatchType.EquivalenceMatch, confidence));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort matches by confidence (descending) for deterministic ordering
|
||||
return matches
|
||||
.OrderByDescending(m => m.Item3)
|
||||
.ThenBy(m => m.Item1.AdvisoryId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static VexLinksetInput? FindVexMatch(
|
||||
SbomComponentInput component,
|
||||
AdvisoryLinksetInput advisory,
|
||||
VexIndex vexIndex)
|
||||
{
|
||||
// Try exact vulnerability ID + product key match
|
||||
foreach (var alias in advisory.Aliases)
|
||||
{
|
||||
var key = $"{alias.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
|
||||
if (vexIndex.ByVulnAndProduct.TryGetValue(key, out var vex))
|
||||
{
|
||||
return vex;
|
||||
}
|
||||
|
||||
// Try package key match
|
||||
var pkgKey = $"{alias.ToLowerInvariant()}|{component.PackageKey.ToLowerInvariant()}";
|
||||
if (vexIndex.ByVulnAndPackageKey.TryGetValue(pkgKey, out vex))
|
||||
{
|
||||
return vex;
|
||||
}
|
||||
}
|
||||
|
||||
// Try advisory ID directly
|
||||
var directKey = $"{advisory.AdvisoryId.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
|
||||
if (vexIndex.ByVulnAndProduct.TryGetValue(directKey, out var directVex))
|
||||
{
|
||||
return directVex;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static double ComputeFinalConfidence(double matchConfidence, double? linksetConfidence)
|
||||
{
|
||||
var linkset = linksetConfidence ?? 1.0;
|
||||
// Geometric mean of match confidence and linkset confidence
|
||||
return Math.Sqrt(matchConfidence * linkset);
|
||||
}
|
||||
|
||||
private static AdvisoryIndex BuildAdvisoryIndex(ImmutableArray<AdvisoryLinksetInput> advisories)
|
||||
{
|
||||
var byExactPurl = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
|
||||
var byPackageKey = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var advisory in advisories)
|
||||
{
|
||||
foreach (var purl in advisory.Purls)
|
||||
{
|
||||
var normalizedPurl = purl.ToLowerInvariant();
|
||||
var packageKey = PurlEquivalence.ExtractPackageKey(normalizedPurl);
|
||||
|
||||
if (!byExactPurl.TryGetValue(normalizedPurl, out var exactList))
|
||||
{
|
||||
exactList = new List<AdvisoryLinksetInput>();
|
||||
byExactPurl[normalizedPurl] = exactList;
|
||||
}
|
||||
exactList.Add(advisory);
|
||||
|
||||
if (!string.IsNullOrEmpty(packageKey))
|
||||
{
|
||||
if (!byPackageKey.TryGetValue(packageKey, out var keyList))
|
||||
{
|
||||
keyList = new List<AdvisoryLinksetInput>();
|
||||
byPackageKey[packageKey] = keyList;
|
||||
}
|
||||
|
||||
// Avoid duplicates in the same advisory
|
||||
if (!keyList.Any(a => a.AdvisoryId == advisory.AdvisoryId))
|
||||
{
|
||||
keyList.Add(advisory);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new AdvisoryIndex(
|
||||
byExactPurl.ToImmutableDictionary(
|
||||
kvp => kvp.Key,
|
||||
kvp => kvp.Value.ToImmutableArray(),
|
||||
StringComparer.OrdinalIgnoreCase),
|
||||
byPackageKey.ToImmutableDictionary(
|
||||
kvp => kvp.Key,
|
||||
kvp => kvp.Value.ToImmutableArray(),
|
||||
StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static VexIndex BuildVexIndex(ImmutableArray<VexLinksetInput> vexLinksets)
|
||||
{
|
||||
var byVulnAndProduct = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
|
||||
var byVulnAndPackageKey = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var vex in vexLinksets)
|
||||
{
|
||||
var vulnKey = vex.VulnerabilityId.ToLowerInvariant();
|
||||
var productKey = vex.ProductKey.ToLowerInvariant();
|
||||
var packageKey = PurlEquivalence.ExtractPackageKey(productKey);
|
||||
|
||||
var exactKey = $"{vulnKey}|{productKey}";
|
||||
byVulnAndProduct.TryAdd(exactKey, vex);
|
||||
|
||||
if (!string.IsNullOrEmpty(packageKey))
|
||||
{
|
||||
var pkgLookupKey = $"{vulnKey}|{packageKey}";
|
||||
byVulnAndPackageKey.TryAdd(pkgLookupKey, vex);
|
||||
}
|
||||
}
|
||||
|
||||
return new VexIndex(
|
||||
byVulnAndProduct.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase),
|
||||
byVulnAndPackageKey.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static IReadOnlyList<IReadOnlyList<SbomComponentInput>> CreateDeterministicBatches(
|
||||
ImmutableArray<SbomComponentInput> components,
|
||||
int batchSize)
|
||||
{
|
||||
if (batchSize <= 0)
|
||||
{
|
||||
batchSize = 1000;
|
||||
}
|
||||
|
||||
var batches = new List<IReadOnlyList<SbomComponentInput>>();
|
||||
|
||||
for (var i = 0; i < components.Length; i += batchSize)
|
||||
{
|
||||
var remaining = components.Length - i;
|
||||
var count = Math.Min(batchSize, remaining);
|
||||
var batch = new List<SbomComponentInput>(count);
|
||||
|
||||
for (var j = 0; j < count; j++)
|
||||
{
|
||||
batch.Add(components[i + j]);
|
||||
}
|
||||
|
||||
batches.Add(batch);
|
||||
}
|
||||
|
||||
return batches;
|
||||
}
|
||||
|
||||
private sealed record AdvisoryIndex(
|
||||
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByExactPurl,
|
||||
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByPackageKey);
|
||||
|
||||
private sealed record VexIndex(
|
||||
ImmutableDictionary<string, VexLinksetInput> ByVulnAndProduct,
|
||||
ImmutableDictionary<string, VexLinksetInput> ByVulnAndPackageKey);
|
||||
|
||||
private sealed class SelectionJoinStatsBuilder
|
||||
{
|
||||
public int TotalComponents { get; set; }
|
||||
public int TotalAdvisories { get; set; }
|
||||
public int MatchedTuples { get; set; }
|
||||
public int ExactPurlMatches { get; set; }
|
||||
public int PackageKeyMatches { get; set; }
|
||||
public int CpeMatches { get; set; }
|
||||
public int EquivalenceMatches { get; set; }
|
||||
public int VexOverlays { get; set; }
|
||||
public long ProcessingTimeMs { get; set; }
|
||||
|
||||
public SelectionJoinStatistics Build() => new(
|
||||
TotalComponents,
|
||||
TotalAdvisories,
|
||||
MatchedTuples,
|
||||
ExactPurlMatches,
|
||||
PackageKeyMatches,
|
||||
CpeMatches,
|
||||
EquivalenceMatches,
|
||||
VexOverlays,
|
||||
ProcessingTimeMs);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user