Files
git.stella-ops.org/src/Policy/StellaOps.Policy.Engine/SelectionJoin/SelectionJoinService.cs
StellaOps Bot 3b96b2e3ea
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
up
2025-11-27 23:45:09 +02:00

391 lines
14 KiB
C#

using System.Collections.Immutable;
using System.Diagnostics;
namespace StellaOps.Policy.Engine.SelectionJoin;
/// <summary>
/// Service for resolving SBOM↔Advisory↔VEX tuples using linksets and PURL equivalence.
/// All operations are deterministic: given identical inputs, produces identical outputs.
/// </summary>
public sealed class SelectionJoinService
{
/// <summary>
/// Resolves SBOM components against advisory and VEX linksets.
/// Uses deterministic batching for large datasets.
/// </summary>
public SelectionJoinBatchResult ResolveTuples(SelectionJoinBatchInput input)
{
ArgumentNullException.ThrowIfNull(input);
var stopwatch = Stopwatch.StartNew();
var equivalenceTable = input.EquivalenceTable ?? PurlEquivalenceTable.Empty;
var options = input.Options;
// Build lookup indexes for deterministic matching
var advisoryIndex = BuildAdvisoryIndex(input.Advisories);
var vexIndex = BuildVexIndex(input.VexLinksets);
// Process components in deterministic order
var orderedComponents = input.Components
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
.ThenBy(c => c.Name, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
var tuples = new List<SelectionJoinTuple>();
var unmatched = new List<SbomComponentInput>();
var stats = new SelectionJoinStatsBuilder();
stats.TotalComponents = orderedComponents.Length;
stats.TotalAdvisories = input.Advisories.Length;
// Process in batches for memory efficiency
var batches = CreateDeterministicBatches(orderedComponents, options.MaxBatchSize);
foreach (var batch in batches)
{
ProcessBatch(
batch,
input.TenantId,
advisoryIndex,
vexIndex,
equivalenceTable,
options,
tuples,
unmatched,
stats);
}
stopwatch.Stop();
stats.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
// Sort results for deterministic output
var sortedTuples = tuples
.OrderBy(t => t.Component.Purl, StringComparer.OrdinalIgnoreCase)
.ThenBy(t => t.Advisory.AdvisoryId, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
var sortedUnmatched = unmatched
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
return new SelectionJoinBatchResult(
input.BatchId,
sortedTuples,
sortedUnmatched,
stats.Build());
}
private static void ProcessBatch(
IReadOnlyList<SbomComponentInput> components,
string tenantId,
AdvisoryIndex advisoryIndex,
VexIndex vexIndex,
PurlEquivalenceTable equivalenceTable,
SelectionJoinOptions options,
List<SelectionJoinTuple> tuples,
List<SbomComponentInput> unmatched,
SelectionJoinStatsBuilder stats)
{
foreach (var component in components)
{
var matches = FindAdvisoryMatches(component, advisoryIndex, equivalenceTable, options);
if (matches.Count == 0)
{
unmatched.Add(component);
continue;
}
foreach (var (advisory, matchType, confidence) in matches)
{
if (confidence < options.MinConfidenceThreshold)
{
continue;
}
// Find matching VEX linkset
var vex = FindVexMatch(component, advisory, vexIndex);
var tupleId = SelectionJoinTuple.CreateTupleId(
tenantId,
component.Purl,
advisory.AdvisoryId);
var tuple = new SelectionJoinTuple(
tupleId,
component,
advisory,
vex,
matchType,
confidence);
tuples.Add(tuple);
// Update statistics
stats.MatchedTuples++;
switch (matchType)
{
case SelectionMatchType.ExactPurl:
stats.ExactPurlMatches++;
break;
case SelectionMatchType.PackageKeyMatch:
stats.PackageKeyMatches++;
break;
case SelectionMatchType.CpeMatch:
stats.CpeMatches++;
break;
case SelectionMatchType.EquivalenceMatch:
stats.EquivalenceMatches++;
break;
}
if (vex is not null)
{
stats.VexOverlays++;
}
}
}
}
private static IReadOnlyList<(AdvisoryLinksetInput Advisory, SelectionMatchType MatchType, double Confidence)> FindAdvisoryMatches(
SbomComponentInput component,
AdvisoryIndex index,
PurlEquivalenceTable equivalenceTable,
SelectionJoinOptions options)
{
var matches = new List<(AdvisoryLinksetInput, SelectionMatchType, double)>();
var componentPurl = component.Purl.ToLowerInvariant();
var componentKey = component.PackageKey.ToLowerInvariant();
// 1. Exact PURL match (highest confidence)
if (index.ByExactPurl.TryGetValue(componentPurl, out var exactMatches))
{
foreach (var advisory in exactMatches)
{
var confidence = ComputeFinalConfidence(1.0, advisory.Confidence);
matches.Add((advisory, SelectionMatchType.ExactPurl, confidence));
}
}
// 2. Package key match (same package, possibly different version)
if (index.ByPackageKey.TryGetValue(componentKey, out var keyMatches))
{
foreach (var advisory in keyMatches)
{
// Skip if already matched by exact PURL
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
{
continue;
}
var confidence = ComputeFinalConfidence(0.8, advisory.Confidence);
matches.Add((advisory, SelectionMatchType.PackageKeyMatch, confidence));
}
}
// 3. Equivalence table match
var equivalents = equivalenceTable.GetEquivalents(componentPurl);
foreach (var equivalent in equivalents)
{
if (string.Equals(equivalent, componentPurl, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var equivalentKey = PurlEquivalence.ExtractPackageKey(equivalent).ToLowerInvariant();
if (index.ByPackageKey.TryGetValue(equivalentKey, out var equivMatches))
{
foreach (var advisory in equivMatches)
{
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
{
continue;
}
var confidence = ComputeFinalConfidence(0.9, advisory.Confidence);
matches.Add((advisory, SelectionMatchType.EquivalenceMatch, confidence));
}
}
}
// Sort matches by confidence (descending) for deterministic ordering
return matches
.OrderByDescending(m => m.Item3)
.ThenBy(m => m.Item1.AdvisoryId, StringComparer.OrdinalIgnoreCase)
.ToList();
}
private static VexLinksetInput? FindVexMatch(
SbomComponentInput component,
AdvisoryLinksetInput advisory,
VexIndex vexIndex)
{
// Try exact vulnerability ID + product key match
foreach (var alias in advisory.Aliases)
{
var key = $"{alias.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
if (vexIndex.ByVulnAndProduct.TryGetValue(key, out var vex))
{
return vex;
}
// Try package key match
var pkgKey = $"{alias.ToLowerInvariant()}|{component.PackageKey.ToLowerInvariant()}";
if (vexIndex.ByVulnAndPackageKey.TryGetValue(pkgKey, out vex))
{
return vex;
}
}
// Try advisory ID directly
var directKey = $"{advisory.AdvisoryId.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
if (vexIndex.ByVulnAndProduct.TryGetValue(directKey, out var directVex))
{
return directVex;
}
return null;
}
private static double ComputeFinalConfidence(double matchConfidence, double? linksetConfidence)
{
var linkset = linksetConfidence ?? 1.0;
// Geometric mean of match confidence and linkset confidence
return Math.Sqrt(matchConfidence * linkset);
}
private static AdvisoryIndex BuildAdvisoryIndex(ImmutableArray<AdvisoryLinksetInput> advisories)
{
var byExactPurl = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
var byPackageKey = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
foreach (var advisory in advisories)
{
foreach (var purl in advisory.Purls)
{
var normalizedPurl = purl.ToLowerInvariant();
var packageKey = PurlEquivalence.ExtractPackageKey(normalizedPurl);
if (!byExactPurl.TryGetValue(normalizedPurl, out var exactList))
{
exactList = new List<AdvisoryLinksetInput>();
byExactPurl[normalizedPurl] = exactList;
}
exactList.Add(advisory);
if (!string.IsNullOrEmpty(packageKey))
{
if (!byPackageKey.TryGetValue(packageKey, out var keyList))
{
keyList = new List<AdvisoryLinksetInput>();
byPackageKey[packageKey] = keyList;
}
// Avoid duplicates in the same advisory
if (!keyList.Any(a => a.AdvisoryId == advisory.AdvisoryId))
{
keyList.Add(advisory);
}
}
}
}
return new AdvisoryIndex(
byExactPurl.ToImmutableDictionary(
kvp => kvp.Key,
kvp => kvp.Value.ToImmutableArray(),
StringComparer.OrdinalIgnoreCase),
byPackageKey.ToImmutableDictionary(
kvp => kvp.Key,
kvp => kvp.Value.ToImmutableArray(),
StringComparer.OrdinalIgnoreCase));
}
private static VexIndex BuildVexIndex(ImmutableArray<VexLinksetInput> vexLinksets)
{
var byVulnAndProduct = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
var byVulnAndPackageKey = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
foreach (var vex in vexLinksets)
{
var vulnKey = vex.VulnerabilityId.ToLowerInvariant();
var productKey = vex.ProductKey.ToLowerInvariant();
var packageKey = PurlEquivalence.ExtractPackageKey(productKey);
var exactKey = $"{vulnKey}|{productKey}";
byVulnAndProduct.TryAdd(exactKey, vex);
if (!string.IsNullOrEmpty(packageKey))
{
var pkgLookupKey = $"{vulnKey}|{packageKey}";
byVulnAndPackageKey.TryAdd(pkgLookupKey, vex);
}
}
return new VexIndex(
byVulnAndProduct.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase),
byVulnAndPackageKey.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase));
}
private static IReadOnlyList<IReadOnlyList<SbomComponentInput>> CreateDeterministicBatches(
ImmutableArray<SbomComponentInput> components,
int batchSize)
{
if (batchSize <= 0)
{
batchSize = 1000;
}
var batches = new List<IReadOnlyList<SbomComponentInput>>();
for (var i = 0; i < components.Length; i += batchSize)
{
var remaining = components.Length - i;
var count = Math.Min(batchSize, remaining);
var batch = new List<SbomComponentInput>(count);
for (var j = 0; j < count; j++)
{
batch.Add(components[i + j]);
}
batches.Add(batch);
}
return batches;
}
private sealed record AdvisoryIndex(
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByExactPurl,
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByPackageKey);
private sealed record VexIndex(
ImmutableDictionary<string, VexLinksetInput> ByVulnAndProduct,
ImmutableDictionary<string, VexLinksetInput> ByVulnAndPackageKey);
private sealed class SelectionJoinStatsBuilder
{
public int TotalComponents { get; set; }
public int TotalAdvisories { get; set; }
public int MatchedTuples { get; set; }
public int ExactPurlMatches { get; set; }
public int PackageKeyMatches { get; set; }
public int CpeMatches { get; set; }
public int EquivalenceMatches { get; set; }
public int VexOverlays { get; set; }
public long ProcessingTimeMs { get; set; }
public SelectionJoinStatistics Build() => new(
TotalComponents,
TotalAdvisories,
MatchedTuples,
ExactPurlMatches,
PackageKeyMatches,
CpeMatches,
EquivalenceMatches,
VexOverlays,
ProcessingTimeMs);
}
}