up
This commit is contained in:
@@ -0,0 +1,308 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Policy.Engine.SelectionJoin;
|
||||
|
||||
/// <summary>
|
||||
/// PURL equivalence table for mapping package identifiers across ecosystems.
|
||||
/// Enables matching when the same package has different identifiers in
|
||||
/// different sources (e.g., npm vs GitHub advisory database naming).
|
||||
/// </summary>
|
||||
public sealed class PurlEquivalenceTable
|
||||
{
|
||||
private readonly ImmutableDictionary<string, ImmutableHashSet<string>> _equivalenceGroups;
|
||||
private readonly ImmutableDictionary<string, string> _canonicalMapping;
|
||||
|
||||
private PurlEquivalenceTable(
|
||||
ImmutableDictionary<string, ImmutableHashSet<string>> equivalenceGroups,
|
||||
ImmutableDictionary<string, string> canonicalMapping)
|
||||
{
|
||||
_equivalenceGroups = equivalenceGroups;
|
||||
_canonicalMapping = canonicalMapping;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates an empty equivalence table.
|
||||
/// </summary>
|
||||
public static PurlEquivalenceTable Empty { get; } = new(
|
||||
ImmutableDictionary<string, ImmutableHashSet<string>>.Empty,
|
||||
ImmutableDictionary<string, string>.Empty);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an equivalence table from a list of equivalence groups.
|
||||
/// Each group contains PURLs that should be considered equivalent.
|
||||
/// </summary>
|
||||
public static PurlEquivalenceTable FromGroups(IEnumerable<IEnumerable<string>> groups)
|
||||
{
|
||||
var equivalenceBuilder = ImmutableDictionary.CreateBuilder<string, ImmutableHashSet<string>>(
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
var canonicalBuilder = ImmutableDictionary.CreateBuilder<string, string>(
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var group in groups)
|
||||
{
|
||||
var normalizedGroup = group
|
||||
.Where(p => !string.IsNullOrWhiteSpace(p))
|
||||
.Select(p => p.Trim().ToLowerInvariant())
|
||||
.Distinct()
|
||||
.OrderBy(p => p, StringComparer.Ordinal)
|
||||
.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
if (normalizedGroup.Count < 2)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// First item (lexicographically) is the canonical form
|
||||
var canonical = normalizedGroup.First();
|
||||
|
||||
foreach (var purl in normalizedGroup)
|
||||
{
|
||||
equivalenceBuilder[purl] = normalizedGroup;
|
||||
canonicalBuilder[purl] = canonical;
|
||||
}
|
||||
}
|
||||
|
||||
return new PurlEquivalenceTable(
|
||||
equivalenceBuilder.ToImmutable(),
|
||||
canonicalBuilder.ToImmutable());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the canonical form of a PURL, or the original if not in the table.
|
||||
/// </summary>
|
||||
public string GetCanonical(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var normalized = purl.Trim().ToLowerInvariant();
|
||||
return _canonicalMapping.TryGetValue(normalized, out var canonical)
|
||||
? canonical
|
||||
: normalized;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all equivalent PURLs for a given PURL.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> GetEquivalents(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return ImmutableHashSet<string>.Empty;
|
||||
}
|
||||
|
||||
var normalized = purl.Trim().ToLowerInvariant();
|
||||
return _equivalenceGroups.TryGetValue(normalized, out var group)
|
||||
? group
|
||||
: ImmutableHashSet.Create(StringComparer.OrdinalIgnoreCase, normalized);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if two PURLs are equivalent.
|
||||
/// </summary>
|
||||
public bool AreEquivalent(string purl1, string purl2)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var norm1 = purl1.Trim().ToLowerInvariant();
|
||||
var norm2 = purl2.Trim().ToLowerInvariant();
|
||||
|
||||
if (string.Equals(norm1, norm2, StringComparison.Ordinal))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
var canonical1 = GetCanonical(norm1);
|
||||
var canonical2 = GetCanonical(norm2);
|
||||
|
||||
return string.Equals(canonical1, canonical2, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Number of equivalence groups in the table.
|
||||
/// </summary>
|
||||
public int GroupCount => _equivalenceGroups
|
||||
.Values
|
||||
.Select(g => g.First())
|
||||
.Distinct()
|
||||
.Count();
|
||||
|
||||
/// <summary>
|
||||
/// Total number of PURLs in the table.
|
||||
/// </summary>
|
||||
public int TotalEntries => _canonicalMapping.Count;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Static utilities for PURL equivalence matching.
|
||||
/// </summary>
|
||||
public static class PurlEquivalence
|
||||
{
|
||||
/// <summary>
|
||||
/// Extracts the package key from a PURL (removes version suffix).
|
||||
/// Example: "pkg:npm/lodash@4.17.21" → "pkg:npm/lodash"
|
||||
/// </summary>
|
||||
public static string ExtractPackageKey(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = purl.Trim();
|
||||
var atIndex = trimmed.LastIndexOf('@');
|
||||
|
||||
// Handle case where @ is part of namespace (e.g., pkg:npm/@scope/package@1.0.0)
|
||||
if (atIndex > 0)
|
||||
{
|
||||
// Check if there's another @ before this one (scoped package)
|
||||
var firstAt = trimmed.IndexOf('@');
|
||||
if (firstAt < atIndex)
|
||||
{
|
||||
// This is a scoped package, @ at atIndex is the version separator
|
||||
return trimmed[..atIndex];
|
||||
}
|
||||
|
||||
// Check if we have a proper version after @
|
||||
var afterAt = trimmed[(atIndex + 1)..];
|
||||
if (afterAt.Length > 0 && (char.IsDigit(afterAt[0]) || afterAt[0] == 'v'))
|
||||
{
|
||||
return trimmed[..atIndex];
|
||||
}
|
||||
}
|
||||
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the ecosystem from a PURL.
|
||||
/// Example: "pkg:npm/lodash@4.17.21" → "npm"
|
||||
/// </summary>
|
||||
public static string? ExtractEcosystem(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trimmed = purl.Trim();
|
||||
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var afterPrefix = trimmed[4..]; // Skip "pkg:"
|
||||
var slashIndex = afterPrefix.IndexOf('/');
|
||||
|
||||
return slashIndex > 0 ? afterPrefix[..slashIndex] : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the namespace from a PURL (if present).
|
||||
/// Example: "pkg:npm/@scope/package@1.0.0" → "@scope"
|
||||
/// </summary>
|
||||
public static string? ExtractNamespace(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trimmed = purl.Trim();
|
||||
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var afterPrefix = trimmed[4..];
|
||||
var slashIndex = afterPrefix.IndexOf('/');
|
||||
if (slashIndex < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var afterEcosystem = afterPrefix[(slashIndex + 1)..];
|
||||
var nextSlashIndex = afterEcosystem.IndexOf('/');
|
||||
|
||||
if (nextSlashIndex > 0)
|
||||
{
|
||||
// Has namespace
|
||||
return afterEcosystem[..nextSlashIndex];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the package name from a PURL.
|
||||
/// Example: "pkg:npm/@scope/package@1.0.0" → "package"
|
||||
/// </summary>
|
||||
public static string? ExtractName(string purl)
|
||||
{
|
||||
var packageKey = ExtractPackageKey(purl);
|
||||
if (string.IsNullOrWhiteSpace(packageKey))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var lastSlashIndex = packageKey.LastIndexOf('/');
|
||||
return lastSlashIndex >= 0 ? packageKey[(lastSlashIndex + 1)..] : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes match confidence between two PURLs.
|
||||
/// Returns 1.0 for exact match, 0.8 for package key match, 0.0 for no match.
|
||||
/// </summary>
|
||||
public static double ComputeMatchConfidence(string purl1, string purl2, PurlEquivalenceTable? equivalenceTable = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2))
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
var norm1 = purl1.Trim().ToLowerInvariant();
|
||||
var norm2 = purl2.Trim().ToLowerInvariant();
|
||||
|
||||
// Exact match
|
||||
if (string.Equals(norm1, norm2, StringComparison.Ordinal))
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Equivalence table match
|
||||
if (equivalenceTable is not null && equivalenceTable.AreEquivalent(norm1, norm2))
|
||||
{
|
||||
return 0.95;
|
||||
}
|
||||
|
||||
// Package key match (same package, different version)
|
||||
var key1 = ExtractPackageKey(norm1);
|
||||
var key2 = ExtractPackageKey(norm2);
|
||||
|
||||
if (!string.IsNullOrEmpty(key1) && string.Equals(key1, key2, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 0.8;
|
||||
}
|
||||
|
||||
// Same ecosystem and name (different namespace)
|
||||
var eco1 = ExtractEcosystem(norm1);
|
||||
var eco2 = ExtractEcosystem(norm2);
|
||||
var name1 = ExtractName(norm1);
|
||||
var name2 = ExtractName(norm2);
|
||||
|
||||
if (!string.IsNullOrEmpty(eco1) &&
|
||||
string.Equals(eco1, eco2, StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.IsNullOrEmpty(name1) &&
|
||||
string.Equals(name1, name2, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 0.5;
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.Policy.Engine.SelectionJoin;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an SBOM component for selection joining.
|
||||
/// </summary>
|
||||
/// <param name="Purl">Package URL (e.g., pkg:npm/lodash@4.17.21).</param>
|
||||
/// <param name="Name">Component name.</param>
|
||||
/// <param name="Version">Component version.</param>
|
||||
/// <param name="Ecosystem">Package ecosystem (npm, maven, pypi, etc.).</param>
|
||||
/// <param name="Metadata">Additional component metadata.</param>
|
||||
public sealed record SbomComponentInput(
|
||||
string Purl,
|
||||
string Name,
|
||||
string Version,
|
||||
string? Ecosystem,
|
||||
ImmutableDictionary<string, string> Metadata)
|
||||
{
|
||||
/// <summary>
|
||||
/// Extracts the package key from the PURL (removes version suffix).
|
||||
/// </summary>
|
||||
public string PackageKey => PurlEquivalence.ExtractPackageKey(Purl);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an advisory linkset reference for selection joining.
|
||||
/// </summary>
|
||||
/// <param name="AdvisoryId">Advisory identifier (CVE, GHSA, etc.).</param>
|
||||
/// <param name="Source">Advisory source.</param>
|
||||
/// <param name="Purls">Affected PURLs from the advisory.</param>
|
||||
/// <param name="Cpes">Affected CPEs from the advisory.</param>
|
||||
/// <param name="Aliases">Advisory aliases (e.g., CVE-2021-1234, GHSA-xxxx).</param>
|
||||
/// <param name="Confidence">Linkset confidence score.</param>
|
||||
public sealed record AdvisoryLinksetInput(
|
||||
string AdvisoryId,
|
||||
string Source,
|
||||
ImmutableArray<string> Purls,
|
||||
ImmutableArray<string> Cpes,
|
||||
ImmutableArray<string> Aliases,
|
||||
double? Confidence);
|
||||
|
||||
/// <summary>
|
||||
/// Represents a VEX linkset reference for selection joining.
|
||||
/// </summary>
|
||||
/// <param name="LinksetId">VEX linkset identifier.</param>
|
||||
/// <param name="VulnerabilityId">Vulnerability identifier.</param>
|
||||
/// <param name="ProductKey">Product key (PURL or CPE).</param>
|
||||
/// <param name="Status">VEX status (not_affected, affected, fixed, under_investigation).</param>
|
||||
/// <param name="Justification">VEX justification.</param>
|
||||
/// <param name="Confidence">Linkset confidence level.</param>
|
||||
public sealed record VexLinksetInput(
|
||||
string LinksetId,
|
||||
string VulnerabilityId,
|
||||
string ProductKey,
|
||||
string Status,
|
||||
string? Justification,
|
||||
VexConfidenceLevel Confidence);
|
||||
|
||||
/// <summary>
|
||||
/// VEX confidence level enumeration.
|
||||
/// </summary>
|
||||
public enum VexConfidenceLevel
|
||||
{
|
||||
Low = 0,
|
||||
Medium = 1,
|
||||
High = 2
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a resolved SBOM↔Advisory↔VEX tuple.
|
||||
/// </summary>
|
||||
/// <param name="TupleId">Deterministic identifier for this tuple.</param>
|
||||
/// <param name="Component">The SBOM component.</param>
|
||||
/// <param name="Advisory">The matched advisory linkset.</param>
|
||||
/// <param name="Vex">The matched VEX linkset (if any).</param>
|
||||
/// <param name="MatchType">How the match was determined.</param>
|
||||
/// <param name="MatchConfidence">Overall confidence in the match.</param>
|
||||
public sealed record SelectionJoinTuple(
|
||||
string TupleId,
|
||||
SbomComponentInput Component,
|
||||
AdvisoryLinksetInput Advisory,
|
||||
VexLinksetInput? Vex,
|
||||
SelectionMatchType MatchType,
|
||||
double MatchConfidence)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a deterministic tuple ID from the key components.
|
||||
/// </summary>
|
||||
public static string CreateTupleId(string tenantId, string componentPurl, string advisoryId)
|
||||
{
|
||||
var normalizedTenant = (tenantId ?? string.Empty).Trim().ToLowerInvariant();
|
||||
var normalizedPurl = (componentPurl ?? string.Empty).Trim().ToLowerInvariant();
|
||||
var normalizedAdvisory = (advisoryId ?? string.Empty).Trim();
|
||||
|
||||
var input = $"{normalizedTenant}|{normalizedPurl}|{normalizedAdvisory}";
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
|
||||
return $"tuple:sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// How the selection match was determined.
|
||||
/// </summary>
|
||||
public enum SelectionMatchType
|
||||
{
|
||||
/// <summary>Exact PURL match.</summary>
|
||||
ExactPurl,
|
||||
|
||||
/// <summary>Package key match (same package, different version).</summary>
|
||||
PackageKeyMatch,
|
||||
|
||||
/// <summary>CPE vendor/product match.</summary>
|
||||
CpeMatch,
|
||||
|
||||
/// <summary>Alias-based match.</summary>
|
||||
AliasMatch,
|
||||
|
||||
/// <summary>Equivalence table match.</summary>
|
||||
EquivalenceMatch,
|
||||
|
||||
/// <summary>No direct match, linked via advisory reference.</summary>
|
||||
IndirectMatch
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Input for a selection join batch operation.
|
||||
/// </summary>
|
||||
/// <param name="TenantId">Tenant identifier.</param>
|
||||
/// <param name="BatchId">Unique batch identifier for tracing.</param>
|
||||
/// <param name="Components">SBOM components to match.</param>
|
||||
/// <param name="Advisories">Advisory linksets to match against.</param>
|
||||
/// <param name="VexLinksets">VEX linksets to include.</param>
|
||||
/// <param name="EquivalenceTable">Optional PURL equivalence mappings.</param>
|
||||
/// <param name="Options">Batch processing options.</param>
|
||||
public sealed record SelectionJoinBatchInput(
|
||||
string TenantId,
|
||||
string BatchId,
|
||||
ImmutableArray<SbomComponentInput> Components,
|
||||
ImmutableArray<AdvisoryLinksetInput> Advisories,
|
||||
ImmutableArray<VexLinksetInput> VexLinksets,
|
||||
PurlEquivalenceTable? EquivalenceTable,
|
||||
SelectionJoinOptions Options);
|
||||
|
||||
/// <summary>
|
||||
/// Options for selection join batch processing.
|
||||
/// </summary>
|
||||
/// <param name="MaxBatchSize">Maximum items per batch for deterministic chunking.</param>
|
||||
/// <param name="IncludeIndirectMatches">Include indirect matches via advisory references.</param>
|
||||
/// <param name="MinConfidenceThreshold">Minimum confidence to include in results.</param>
|
||||
public sealed record SelectionJoinOptions(
|
||||
int MaxBatchSize = 1000,
|
||||
bool IncludeIndirectMatches = false,
|
||||
double MinConfidenceThreshold = 0.0);
|
||||
|
||||
/// <summary>
|
||||
/// Result of a selection join batch operation.
|
||||
/// </summary>
|
||||
/// <param name="BatchId">Batch identifier for tracing.</param>
|
||||
/// <param name="Tuples">Resolved tuples.</param>
|
||||
/// <param name="UnmatchedComponents">Components with no advisory matches.</param>
|
||||
/// <param name="Statistics">Batch statistics.</param>
|
||||
public sealed record SelectionJoinBatchResult(
|
||||
string BatchId,
|
||||
ImmutableArray<SelectionJoinTuple> Tuples,
|
||||
ImmutableArray<SbomComponentInput> UnmatchedComponents,
|
||||
SelectionJoinStatistics Statistics);
|
||||
|
||||
/// <summary>
|
||||
/// Statistics for a selection join batch.
|
||||
/// </summary>
|
||||
/// <param name="TotalComponents">Total components in input.</param>
|
||||
/// <param name="TotalAdvisories">Total advisories in input.</param>
|
||||
/// <param name="MatchedTuples">Number of matched tuples.</param>
|
||||
/// <param name="ExactPurlMatches">Exact PURL matches.</param>
|
||||
/// <param name="PackageKeyMatches">Package key matches.</param>
|
||||
/// <param name="CpeMatches">CPE matches.</param>
|
||||
/// <param name="EquivalenceMatches">Equivalence table matches.</param>
|
||||
/// <param name="VexOverlays">Tuples with VEX overlays.</param>
|
||||
/// <param name="ProcessingTimeMs">Processing time in milliseconds.</param>
|
||||
public sealed record SelectionJoinStatistics(
|
||||
int TotalComponents,
|
||||
int TotalAdvisories,
|
||||
int MatchedTuples,
|
||||
int ExactPurlMatches,
|
||||
int PackageKeyMatches,
|
||||
int CpeMatches,
|
||||
int EquivalenceMatches,
|
||||
int VexOverlays,
|
||||
long ProcessingTimeMs);
|
||||
@@ -0,0 +1,390 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace StellaOps.Policy.Engine.SelectionJoin;
|
||||
|
||||
/// <summary>
|
||||
/// Service for resolving SBOM↔Advisory↔VEX tuples using linksets and PURL equivalence.
|
||||
/// All operations are deterministic: given identical inputs, produces identical outputs.
|
||||
/// </summary>
|
||||
public sealed class SelectionJoinService
|
||||
{
|
||||
/// <summary>
|
||||
/// Resolves SBOM components against advisory and VEX linksets.
|
||||
/// Uses deterministic batching for large datasets.
|
||||
/// </summary>
|
||||
public SelectionJoinBatchResult ResolveTuples(SelectionJoinBatchInput input)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(input);
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
var equivalenceTable = input.EquivalenceTable ?? PurlEquivalenceTable.Empty;
|
||||
var options = input.Options;
|
||||
|
||||
// Build lookup indexes for deterministic matching
|
||||
var advisoryIndex = BuildAdvisoryIndex(input.Advisories);
|
||||
var vexIndex = BuildVexIndex(input.VexLinksets);
|
||||
|
||||
// Process components in deterministic order
|
||||
var orderedComponents = input.Components
|
||||
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
|
||||
.ThenBy(c => c.Name, StringComparer.OrdinalIgnoreCase)
|
||||
.ToImmutableArray();
|
||||
|
||||
var tuples = new List<SelectionJoinTuple>();
|
||||
var unmatched = new List<SbomComponentInput>();
|
||||
var stats = new SelectionJoinStatsBuilder();
|
||||
|
||||
stats.TotalComponents = orderedComponents.Length;
|
||||
stats.TotalAdvisories = input.Advisories.Length;
|
||||
|
||||
// Process in batches for memory efficiency
|
||||
var batches = CreateDeterministicBatches(orderedComponents, options.MaxBatchSize);
|
||||
|
||||
foreach (var batch in batches)
|
||||
{
|
||||
ProcessBatch(
|
||||
batch,
|
||||
input.TenantId,
|
||||
advisoryIndex,
|
||||
vexIndex,
|
||||
equivalenceTable,
|
||||
options,
|
||||
tuples,
|
||||
unmatched,
|
||||
stats);
|
||||
}
|
||||
|
||||
stopwatch.Stop();
|
||||
stats.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
|
||||
|
||||
// Sort results for deterministic output
|
||||
var sortedTuples = tuples
|
||||
.OrderBy(t => t.Component.Purl, StringComparer.OrdinalIgnoreCase)
|
||||
.ThenBy(t => t.Advisory.AdvisoryId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToImmutableArray();
|
||||
|
||||
var sortedUnmatched = unmatched
|
||||
.OrderBy(c => c.Purl, StringComparer.OrdinalIgnoreCase)
|
||||
.ToImmutableArray();
|
||||
|
||||
return new SelectionJoinBatchResult(
|
||||
input.BatchId,
|
||||
sortedTuples,
|
||||
sortedUnmatched,
|
||||
stats.Build());
|
||||
}
|
||||
|
||||
private static void ProcessBatch(
|
||||
IReadOnlyList<SbomComponentInput> components,
|
||||
string tenantId,
|
||||
AdvisoryIndex advisoryIndex,
|
||||
VexIndex vexIndex,
|
||||
PurlEquivalenceTable equivalenceTable,
|
||||
SelectionJoinOptions options,
|
||||
List<SelectionJoinTuple> tuples,
|
||||
List<SbomComponentInput> unmatched,
|
||||
SelectionJoinStatsBuilder stats)
|
||||
{
|
||||
foreach (var component in components)
|
||||
{
|
||||
var matches = FindAdvisoryMatches(component, advisoryIndex, equivalenceTable, options);
|
||||
|
||||
if (matches.Count == 0)
|
||||
{
|
||||
unmatched.Add(component);
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var (advisory, matchType, confidence) in matches)
|
||||
{
|
||||
if (confidence < options.MinConfidenceThreshold)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find matching VEX linkset
|
||||
var vex = FindVexMatch(component, advisory, vexIndex);
|
||||
|
||||
var tupleId = SelectionJoinTuple.CreateTupleId(
|
||||
tenantId,
|
||||
component.Purl,
|
||||
advisory.AdvisoryId);
|
||||
|
||||
var tuple = new SelectionJoinTuple(
|
||||
tupleId,
|
||||
component,
|
||||
advisory,
|
||||
vex,
|
||||
matchType,
|
||||
confidence);
|
||||
|
||||
tuples.Add(tuple);
|
||||
|
||||
// Update statistics
|
||||
stats.MatchedTuples++;
|
||||
switch (matchType)
|
||||
{
|
||||
case SelectionMatchType.ExactPurl:
|
||||
stats.ExactPurlMatches++;
|
||||
break;
|
||||
case SelectionMatchType.PackageKeyMatch:
|
||||
stats.PackageKeyMatches++;
|
||||
break;
|
||||
case SelectionMatchType.CpeMatch:
|
||||
stats.CpeMatches++;
|
||||
break;
|
||||
case SelectionMatchType.EquivalenceMatch:
|
||||
stats.EquivalenceMatches++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (vex is not null)
|
||||
{
|
||||
stats.VexOverlays++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyList<(AdvisoryLinksetInput Advisory, SelectionMatchType MatchType, double Confidence)> FindAdvisoryMatches(
|
||||
SbomComponentInput component,
|
||||
AdvisoryIndex index,
|
||||
PurlEquivalenceTable equivalenceTable,
|
||||
SelectionJoinOptions options)
|
||||
{
|
||||
var matches = new List<(AdvisoryLinksetInput, SelectionMatchType, double)>();
|
||||
var componentPurl = component.Purl.ToLowerInvariant();
|
||||
var componentKey = component.PackageKey.ToLowerInvariant();
|
||||
|
||||
// 1. Exact PURL match (highest confidence)
|
||||
if (index.ByExactPurl.TryGetValue(componentPurl, out var exactMatches))
|
||||
{
|
||||
foreach (var advisory in exactMatches)
|
||||
{
|
||||
var confidence = ComputeFinalConfidence(1.0, advisory.Confidence);
|
||||
matches.Add((advisory, SelectionMatchType.ExactPurl, confidence));
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Package key match (same package, possibly different version)
|
||||
if (index.ByPackageKey.TryGetValue(componentKey, out var keyMatches))
|
||||
{
|
||||
foreach (var advisory in keyMatches)
|
||||
{
|
||||
// Skip if already matched by exact PURL
|
||||
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var confidence = ComputeFinalConfidence(0.8, advisory.Confidence);
|
||||
matches.Add((advisory, SelectionMatchType.PackageKeyMatch, confidence));
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Equivalence table match
|
||||
var equivalents = equivalenceTable.GetEquivalents(componentPurl);
|
||||
foreach (var equivalent in equivalents)
|
||||
{
|
||||
if (string.Equals(equivalent, componentPurl, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var equivalentKey = PurlEquivalence.ExtractPackageKey(equivalent).ToLowerInvariant();
|
||||
if (index.ByPackageKey.TryGetValue(equivalentKey, out var equivMatches))
|
||||
{
|
||||
foreach (var advisory in equivMatches)
|
||||
{
|
||||
if (matches.Any(m => m.Item1.AdvisoryId == advisory.AdvisoryId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var confidence = ComputeFinalConfidence(0.9, advisory.Confidence);
|
||||
matches.Add((advisory, SelectionMatchType.EquivalenceMatch, confidence));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort matches by confidence (descending) for deterministic ordering
|
||||
return matches
|
||||
.OrderByDescending(m => m.Item3)
|
||||
.ThenBy(m => m.Item1.AdvisoryId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static VexLinksetInput? FindVexMatch(
|
||||
SbomComponentInput component,
|
||||
AdvisoryLinksetInput advisory,
|
||||
VexIndex vexIndex)
|
||||
{
|
||||
// Try exact vulnerability ID + product key match
|
||||
foreach (var alias in advisory.Aliases)
|
||||
{
|
||||
var key = $"{alias.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
|
||||
if (vexIndex.ByVulnAndProduct.TryGetValue(key, out var vex))
|
||||
{
|
||||
return vex;
|
||||
}
|
||||
|
||||
// Try package key match
|
||||
var pkgKey = $"{alias.ToLowerInvariant()}|{component.PackageKey.ToLowerInvariant()}";
|
||||
if (vexIndex.ByVulnAndPackageKey.TryGetValue(pkgKey, out vex))
|
||||
{
|
||||
return vex;
|
||||
}
|
||||
}
|
||||
|
||||
// Try advisory ID directly
|
||||
var directKey = $"{advisory.AdvisoryId.ToLowerInvariant()}|{component.Purl.ToLowerInvariant()}";
|
||||
if (vexIndex.ByVulnAndProduct.TryGetValue(directKey, out var directVex))
|
||||
{
|
||||
return directVex;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static double ComputeFinalConfidence(double matchConfidence, double? linksetConfidence)
|
||||
{
|
||||
var linkset = linksetConfidence ?? 1.0;
|
||||
// Geometric mean of match confidence and linkset confidence
|
||||
return Math.Sqrt(matchConfidence * linkset);
|
||||
}
|
||||
|
||||
private static AdvisoryIndex BuildAdvisoryIndex(ImmutableArray<AdvisoryLinksetInput> advisories)
|
||||
{
|
||||
var byExactPurl = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
|
||||
var byPackageKey = new Dictionary<string, List<AdvisoryLinksetInput>>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var advisory in advisories)
|
||||
{
|
||||
foreach (var purl in advisory.Purls)
|
||||
{
|
||||
var normalizedPurl = purl.ToLowerInvariant();
|
||||
var packageKey = PurlEquivalence.ExtractPackageKey(normalizedPurl);
|
||||
|
||||
if (!byExactPurl.TryGetValue(normalizedPurl, out var exactList))
|
||||
{
|
||||
exactList = new List<AdvisoryLinksetInput>();
|
||||
byExactPurl[normalizedPurl] = exactList;
|
||||
}
|
||||
exactList.Add(advisory);
|
||||
|
||||
if (!string.IsNullOrEmpty(packageKey))
|
||||
{
|
||||
if (!byPackageKey.TryGetValue(packageKey, out var keyList))
|
||||
{
|
||||
keyList = new List<AdvisoryLinksetInput>();
|
||||
byPackageKey[packageKey] = keyList;
|
||||
}
|
||||
|
||||
// Avoid duplicates in the same advisory
|
||||
if (!keyList.Any(a => a.AdvisoryId == advisory.AdvisoryId))
|
||||
{
|
||||
keyList.Add(advisory);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new AdvisoryIndex(
|
||||
byExactPurl.ToImmutableDictionary(
|
||||
kvp => kvp.Key,
|
||||
kvp => kvp.Value.ToImmutableArray(),
|
||||
StringComparer.OrdinalIgnoreCase),
|
||||
byPackageKey.ToImmutableDictionary(
|
||||
kvp => kvp.Key,
|
||||
kvp => kvp.Value.ToImmutableArray(),
|
||||
StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static VexIndex BuildVexIndex(ImmutableArray<VexLinksetInput> vexLinksets)
|
||||
{
|
||||
var byVulnAndProduct = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
|
||||
var byVulnAndPackageKey = new Dictionary<string, VexLinksetInput>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var vex in vexLinksets)
|
||||
{
|
||||
var vulnKey = vex.VulnerabilityId.ToLowerInvariant();
|
||||
var productKey = vex.ProductKey.ToLowerInvariant();
|
||||
var packageKey = PurlEquivalence.ExtractPackageKey(productKey);
|
||||
|
||||
var exactKey = $"{vulnKey}|{productKey}";
|
||||
byVulnAndProduct.TryAdd(exactKey, vex);
|
||||
|
||||
if (!string.IsNullOrEmpty(packageKey))
|
||||
{
|
||||
var pkgLookupKey = $"{vulnKey}|{packageKey}";
|
||||
byVulnAndPackageKey.TryAdd(pkgLookupKey, vex);
|
||||
}
|
||||
}
|
||||
|
||||
return new VexIndex(
|
||||
byVulnAndProduct.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase),
|
||||
byVulnAndPackageKey.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static IReadOnlyList<IReadOnlyList<SbomComponentInput>> CreateDeterministicBatches(
|
||||
ImmutableArray<SbomComponentInput> components,
|
||||
int batchSize)
|
||||
{
|
||||
if (batchSize <= 0)
|
||||
{
|
||||
batchSize = 1000;
|
||||
}
|
||||
|
||||
var batches = new List<IReadOnlyList<SbomComponentInput>>();
|
||||
|
||||
for (var i = 0; i < components.Length; i += batchSize)
|
||||
{
|
||||
var remaining = components.Length - i;
|
||||
var count = Math.Min(batchSize, remaining);
|
||||
var batch = new List<SbomComponentInput>(count);
|
||||
|
||||
for (var j = 0; j < count; j++)
|
||||
{
|
||||
batch.Add(components[i + j]);
|
||||
}
|
||||
|
||||
batches.Add(batch);
|
||||
}
|
||||
|
||||
return batches;
|
||||
}
|
||||
|
||||
private sealed record AdvisoryIndex(
|
||||
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByExactPurl,
|
||||
ImmutableDictionary<string, ImmutableArray<AdvisoryLinksetInput>> ByPackageKey);
|
||||
|
||||
private sealed record VexIndex(
|
||||
ImmutableDictionary<string, VexLinksetInput> ByVulnAndProduct,
|
||||
ImmutableDictionary<string, VexLinksetInput> ByVulnAndPackageKey);
|
||||
|
||||
private sealed class SelectionJoinStatsBuilder
|
||||
{
|
||||
public int TotalComponents { get; set; }
|
||||
public int TotalAdvisories { get; set; }
|
||||
public int MatchedTuples { get; set; }
|
||||
public int ExactPurlMatches { get; set; }
|
||||
public int PackageKeyMatches { get; set; }
|
||||
public int CpeMatches { get; set; }
|
||||
public int EquivalenceMatches { get; set; }
|
||||
public int VexOverlays { get; set; }
|
||||
public long ProcessingTimeMs { get; set; }
|
||||
|
||||
public SelectionJoinStatistics Build() => new(
|
||||
TotalComponents,
|
||||
TotalAdvisories,
|
||||
MatchedTuples,
|
||||
ExactPurlMatches,
|
||||
PackageKeyMatches,
|
||||
CpeMatches,
|
||||
EquivalenceMatches,
|
||||
VexOverlays,
|
||||
ProcessingTimeMs);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user