using System.Collections.Immutable; using System.Diagnostics; namespace StellaOps.Policy.Engine.SelectionJoin; /// /// Service for resolving SBOM↔Advisory↔VEX tuples using linksets and PURL equivalence. /// All operations are deterministic: given identical inputs, produces identical outputs. /// public sealed class SelectionJoinService { /// /// Resolves SBOM components against advisory and VEX linksets. /// Uses deterministic batching for large datasets. /// public SelectionJoinBatchResult ResolveTuples(SelectionJoinBatchInput input) { ArgumentNullException.ThrowIfNull(input); var stopwatch = Stopwatch.StartNew(); var equivalenceTable = input.EquivalenceTable ?? PurlEquivalenceTable.Empty; var options = input.Options; // Build lookup indexes for deterministic matching var advisoryIndex = BuildAdvisoryIndex(input.Advisories); var vexIndex = BuildVexIndex(input.VexLinksets); // Process components in deterministic order var orderedComponents = input.Components .OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase) .ThenBy(c => c.Name, StringComparer.OrdinalIgnoreCase) .ToImmutableArray(); var tuples = new List(); var unmatched = new List(); var stats = new SelectionJoinStatsBuilder(); stats.TotalComponents = orderedComponents.Length; stats.TotalAdvisories = input.Advisories.Length; // Process in batches for memory efficiency var batches = CreateDeterministicBatches(orderedComponents, options.MaxBatchSize); foreach (var batch in batches) { ProcessBatch( batch, input.TenantId, advisoryIndex, vexIndex, equivalenceTable, options, tuples, unmatched, stats); } stopwatch.Stop(); stats.ProcessingTimeMs = stopwatch.ElapsedMilliseconds; // Sort results for deterministic output var sortedTuples = tuples .OrderBy(t => t.Component.Purl, StringComparer.OrdinalIgnoreCase) .ThenBy(t => t.Advisory.AdvisoryId, StringComparer.OrdinalIgnoreCase) .ToImmutableArray(); var sortedUnmatched = unmatched .OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase) .ToImmutableArray(); return new SelectionJoinBatchResult( input.BatchId, sortedTuples, sortedUnmatched, stats.Build()); } private static void ProcessBatch( IReadOnlyList components, string tenantId, AdvisoryIndex advisoryIndex, VexIndex vexIndex, PurlEquivalenceTable equivalenceTable, SelectionJoinOptions options, List tuples, List unmatched, SelectionJoinStatsBuilder stats) { foreach (var component in components) { var matches = FindAdvisoryMatches(component, advisoryIndex, equivalenceTable, options); if (matches.Count == 0) { unmatched.Add(component); continue; } foreach (var (advisory, matchType, confidence) in matches) { if (confidence < options.MinConfidenceThreshold) { continue; } // Find matching VEX linkset var vex = FindVexMatch(component, advisory, vexIndex); var tupleId = SelectionJoinTuple.CreateTupleId( tenantId, component.Purl, advisory.AdvisoryId); var tuple = new SelectionJoinTuple( tupleId, component, advisory, vex, matchType, confidence); tuples.Add(tuple); // Update statistics stats.MatchedTuples++; switch (matchType) { case SelectionMatchType.ExactPurl: stats.ExactPurlMatches++; break; case SelectionMatchType.PackageKeyMatch: stats.PackageKeyMatches++; break; case SelectionMatchType.CpeMatch: stats.CpeMatches++; break; case SelectionMatchType.EquivalenceMatch: stats.EquivalenceMatches++; break; } if (vex is not null) { stats.VexOverlays++; } } } } private static IReadOnlyList<(AdvisoryLinksetInput Advisory, SelectionMatchType MatchType, double Confidence)> FindAdvisoryMatches( SbomComponentInput component, AdvisoryIndex index, PurlEquivalenceTable equivalenceTable, SelectionJoinOptions options) { var matches = new List<(AdvisoryLinksetInput, SelectionMatchType, double)>(); var componentPurl = component.Purl.ToLowerInvariant(); var componentKey = component.PackageKey.ToLowerInvariant(); // 1. Exact PURL match (highest confidence) if (index.ByExactPurl.TryGetValue(componentPurl, out var exactMatches)) { foreach (var advisory in exactMatches) { var confidence = ComputeFinalConfidence(1.0, advisory.Confidence); matches.Add((advisory, SelectionMatchType.ExactPurl, confidence)); } } // 2. Package key match (same package, possibly different version) if (index.ByPackageKey.TryGetValue(componentKey, out var keyMatches)) { foreach (var advisory in keyMatches) { // Skip if already matched by exact PURL if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId)) { continue; } var confidence = ComputeFinalConfidence(0.8, advisory.Confidence); matches.Add((advisory, SelectionMatchType.PackageKeyMatch, confidence)); } } // 3. Equivalence table match var equivalents = equivalenceTable.GetEquivalents(componentPurl); foreach (var equivalent in equivalents) { if (string.Equals(equivalent, componentPurl, StringComparison.OrdinalIgnoreCase)) { continue; } var equivalentKey = PurlEquivalence.ExtractPackageKey(equivalent).ToLowerInvariant(); if (index.ByPackageKey.TryGetValue(equivalentKey, out var equivMatches)) { foreach (var advisory in equivMatches) { if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId)) { continue; } var confidence = ComputeFinalConfidence(0.9, advisory.Confidence); matches.Add((advisory, SelectionMatchType.EquivalenceMatch, confidence)); } } } // Sort matches by confidence (descending) for deterministic ordering return matches .OrderByDescending(m => m.Item3) .ThenBy(m => m.Item1.AdvisoryId, StringComparer.OrdinalIgnoreCase) .ToList(); } private static VexLinksetInput? FindVexMatch( SbomComponentInput component, AdvisoryLinksetInput advisory, VexIndex vexIndex) { // Try exact vulnerability ID + product key match foreach (var alias in advisory.Aliases) { var key = $"{alias.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}"; if (vexIndex.ByVulnAndProduct.TryGetValue(key, out var vex)) { return vex; } // Try package key match var pkgKey = $"{alias.ToLowerInvariant()}|{component.PackageKey.ToLowerInvariant()}"; if (vexIndex.ByVulnAndPackageKey.TryGetValue(pkgKey, out vex)) { return vex; } } // Try advisory ID directly var directKey = $"{advisory.AdvisoryId.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}"; if (vexIndex.ByVulnAndProduct.TryGetValue(directKey, out var directVex)) { return directVex; } return null; } private static double ComputeFinalConfidence(double matchConfidence, double? linksetConfidence) { var linkset = linksetConfidence ?? 1.0; // Geometric mean of match confidence and linkset confidence return Math.Sqrt(matchConfidence * linkset); } private static AdvisoryIndex BuildAdvisoryIndex(ImmutableArray advisories) { var byExactPurl = new Dictionary>(StringComparer.OrdinalIgnoreCase); var byPackageKey = new Dictionary>(StringComparer.OrdinalIgnoreCase); foreach (var advisory in advisories) { foreach (var purl in advisory.Purls) { var normalizedPurl = purl.ToLowerInvariant(); var packageKey = PurlEquivalence.ExtractPackageKey(normalizedPurl); if (!byExactPurl.TryGetValue(normalizedPurl, out var exactList)) { exactList = new List(); byExactPurl[normalizedPurl] = exactList; } exactList.Add(advisory); if (!string.IsNullOrEmpty(packageKey)) { if (!byPackageKey.TryGetValue(packageKey, out var keyList)) { keyList = new List(); byPackageKey[packageKey] = keyList; } // Avoid duplicates in the same advisory if (!keyList.Any(a => a.AdvisoryId == advisory.AdvisoryId)) { keyList.Add(advisory); } } } } return new AdvisoryIndex( byExactPurl.ToImmutableDictionary( kvp => kvp.Key, kvp => kvp.Value.ToImmutableArray(), StringComparer.OrdinalIgnoreCase), byPackageKey.ToImmutableDictionary( kvp => kvp.Key, kvp => kvp.Value.ToImmutableArray(), StringComparer.OrdinalIgnoreCase)); } private static VexIndex BuildVexIndex(ImmutableArray vexLinksets) { var byVulnAndProduct = new Dictionary(StringComparer.OrdinalIgnoreCase); var byVulnAndPackageKey = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (var vex in vexLinksets) { var vulnKey = vex.VulnerabilityId.ToLowerInvariant(); var productKey = vex.ProductKey.ToLowerInvariant(); var packageKey = PurlEquivalence.ExtractPackageKey(productKey); var exactKey = $"{vulnKey}|{productKey}"; byVulnAndProduct.TryAdd(exactKey, vex); if (!string.IsNullOrEmpty(packageKey)) { var pkgLookupKey = $"{vulnKey}|{packageKey}"; byVulnAndPackageKey.TryAdd(pkgLookupKey, vex); } } return new VexIndex( byVulnAndProduct.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase), byVulnAndPackageKey.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase)); } private static IReadOnlyList> CreateDeterministicBatches( ImmutableArray components, int batchSize) { if (batchSize <= 0) { batchSize = 1000; } var batches = new List>(); for (var i = 0; i < components.Length; i += batchSize) { var remaining = components.Length - i; var count = Math.Min(batchSize, remaining); var batch = new List(count); for (var j = 0; j < count; j++) { batch.Add(components[i + j]); } batches.Add(batch); } return batches; } private sealed record AdvisoryIndex( ImmutableDictionary> ByExactPurl, ImmutableDictionary> ByPackageKey); private sealed record VexIndex( ImmutableDictionary ByVulnAndProduct, ImmutableDictionary ByVulnAndPackageKey); private sealed class SelectionJoinStatsBuilder { public int TotalComponents { get; set; } public int TotalAdvisories { get; set; } public int MatchedTuples { get; set; } public int ExactPurlMatches { get; set; } public int PackageKeyMatches { get; set; } public int CpeMatches { get; set; } public int EquivalenceMatches { get; set; } public int VexOverlays { get; set; } public long ProcessingTimeMs { get; set; } public SelectionJoinStatistics Build() => new( TotalComponents, TotalAdvisories, MatchedTuples, ExactPurlMatches, PackageKeyMatches, CpeMatches, EquivalenceMatches, VexOverlays, ProcessingTimeMs); } }