using System.Collections.Immutable; namespace StellaOps.Policy.Engine.SelectionJoin; /// /// PURL equivalence table for mapping package identifiers across ecosystems. /// Enables matching when the same package has different identifiers in /// different sources (e.g., npm vs GitHub advisory database naming). /// public sealed class PurlEquivalenceTable { private readonly ImmutableDictionary> _equivalenceGroups; private readonly ImmutableDictionary _canonicalMapping; private PurlEquivalenceTable( ImmutableDictionary> equivalenceGroups, ImmutableDictionary canonicalMapping) { _equivalenceGroups = equivalenceGroups; _canonicalMapping = canonicalMapping; } /// /// Creates an empty equivalence table. /// public static PurlEquivalenceTable Empty { get; } = new( ImmutableDictionary>.Empty, ImmutableDictionary.Empty); /// /// Creates an equivalence table from a list of equivalence groups. /// Each group contains PURLs that should be considered equivalent. /// public static PurlEquivalenceTable FromGroups(IEnumerable> groups) { var equivalenceBuilder = ImmutableDictionary.CreateBuilder>( StringComparer.OrdinalIgnoreCase); var canonicalBuilder = ImmutableDictionary.CreateBuilder( StringComparer.OrdinalIgnoreCase); foreach (var group in groups) { var normalizedList = group .Where(p => !string.IsNullOrWhiteSpace(p)) .Select(p => p.Trim().ToLowerInvariant()) .Distinct() .OrderBy(p => p, StringComparer.Ordinal) .ToArray(); if (normalizedList.Length < 2) { continue; } // Use an ordered array for canonical; hash-set only for membership var canonical = normalizedList[0]; var normalizedGroup = normalizedList.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase); foreach (var purl in normalizedGroup) { equivalenceBuilder[purl] = normalizedGroup; canonicalBuilder[purl] = canonical; } } return new PurlEquivalenceTable( equivalenceBuilder.ToImmutable(), canonicalBuilder.ToImmutable()); } /// /// Gets the canonical form of a PURL, or the original if not in the table. /// public string GetCanonical(string purl) { if (string.IsNullOrWhiteSpace(purl)) { return string.Empty; } var normalized = purl.Trim().ToLowerInvariant(); return _canonicalMapping.TryGetValue(normalized, out var canonical) ? canonical : normalized; } /// /// Gets all equivalent PURLs for a given PURL. /// public IReadOnlySet GetEquivalents(string purl) { if (string.IsNullOrWhiteSpace(purl)) { return ImmutableHashSet.Empty; } var normalized = purl.Trim().ToLowerInvariant(); return _equivalenceGroups.TryGetValue(normalized, out var group) ? group : ImmutableHashSet.Create(StringComparer.OrdinalIgnoreCase, normalized); } /// /// Checks if two PURLs are equivalent. /// public bool AreEquivalent(string purl1, string purl2) { if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2)) { return false; } var norm1 = purl1.Trim().ToLowerInvariant(); var norm2 = purl2.Trim().ToLowerInvariant(); if (string.Equals(norm1, norm2, StringComparison.Ordinal)) { return true; } var canonical1 = GetCanonical(norm1); var canonical2 = GetCanonical(norm2); return string.Equals(canonical1, canonical2, StringComparison.Ordinal); } /// /// Number of equivalence groups in the table. /// public int GroupCount => _equivalenceGroups .Values .Select(g => g.First()) .Distinct() .Count(); /// /// Total number of PURLs in the table. /// public int TotalEntries => _canonicalMapping.Count; } /// /// Static utilities for PURL equivalence matching. /// public static class PurlEquivalence { /// /// Extracts the package key from a PURL (removes version suffix). /// Example: "pkg:npm/lodash@4.17.21" → "pkg:npm/lodash" /// public static string ExtractPackageKey(string purl) { if (string.IsNullOrWhiteSpace(purl)) { return string.Empty; } var trimmed = purl.Trim(); var atIndex = trimmed.LastIndexOf('@'); // Handle case where @ is part of namespace (e.g., pkg:npm/@scope/package@1.0.0) if (atIndex > 0) { // Check if there's another @ before this one (scoped package) var firstAt = trimmed.IndexOf('@'); if (firstAt < atIndex) { // This is a scoped package, @ at atIndex is the version separator return trimmed[..atIndex]; } // Check if we have a proper version after @ var afterAt = trimmed[(atIndex + 1)..]; if (afterAt.Length > 0 && (char.IsDigit(afterAt[0]) || afterAt[0] == 'v')) { return trimmed[..atIndex]; } } return trimmed; } /// /// Extracts the ecosystem from a PURL. /// Example: "pkg:npm/lodash@4.17.21" → "npm" /// public static string? ExtractEcosystem(string purl) { if (string.IsNullOrWhiteSpace(purl)) { return null; } var trimmed = purl.Trim(); if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase)) { return null; } var afterPrefix = trimmed[4..]; // Skip "pkg:" var slashIndex = afterPrefix.IndexOf('/'); return slashIndex > 0 ? afterPrefix[..slashIndex] : null; } /// /// Extracts the namespace from a PURL (if present). /// Example: "pkg:npm/@scope/package@1.0.0" → "@scope" /// public static string? ExtractNamespace(string purl) { if (string.IsNullOrWhiteSpace(purl)) { return null; } var trimmed = purl.Trim(); if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase)) { return null; } var afterPrefix = trimmed[4..]; var slashIndex = afterPrefix.IndexOf('/'); if (slashIndex < 0) { return null; } var afterEcosystem = afterPrefix[(slashIndex + 1)..]; var nextSlashIndex = afterEcosystem.IndexOf('/'); if (nextSlashIndex > 0) { // Has namespace return afterEcosystem[..nextSlashIndex]; } return null; } /// /// Extracts the package name from a PURL. /// Example: "pkg:npm/@scope/package@1.0.0" → "package" /// public static string? ExtractName(string purl) { var packageKey = ExtractPackageKey(purl); if (string.IsNullOrWhiteSpace(packageKey)) { return null; } var lastSlashIndex = packageKey.LastIndexOf('/'); return lastSlashIndex >= 0 ? packageKey[(lastSlashIndex + 1)..] : null; } /// /// Computes match confidence between two PURLs. /// Returns 1.0 for exact match, 0.8 for package key match, 0.0 for no match. /// public static double ComputeMatchConfidence(string purl1, string purl2, PurlEquivalenceTable? equivalenceTable = null) { if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2)) { return 0.0; } var norm1 = purl1.Trim().ToLowerInvariant(); var norm2 = purl2.Trim().ToLowerInvariant(); // Exact match if (string.Equals(norm1, norm2, StringComparison.Ordinal)) { return 1.0; } // Equivalence table match if (equivalenceTable is not null && equivalenceTable.AreEquivalent(norm1, norm2)) { return 0.95; } // Package key match (same package, different version) var key1 = ExtractPackageKey(norm1); var key2 = ExtractPackageKey(norm2); if (!string.IsNullOrEmpty(key1) && string.Equals(key1, key2, StringComparison.OrdinalIgnoreCase)) { return 0.8; } // Same ecosystem and name (different namespace) var eco1 = ExtractEcosystem(norm1); var eco2 = ExtractEcosystem(norm2); var name1 = ExtractName(norm1); var name2 = ExtractName(norm2); if (!string.IsNullOrEmpty(eco1) && string.Equals(eco1, eco2, StringComparison.OrdinalIgnoreCase) && !string.IsNullOrEmpty(name1) && string.Equals(name1, name2, StringComparison.OrdinalIgnoreCase)) { return 0.5; } return 0.0; } }