using System.Collections.Immutable;
namespace StellaOps.Policy.Engine.SelectionJoin;
///
/// PURL equivalence table for mapping package identifiers across ecosystems.
/// Enables matching when the same package has different identifiers in
/// different sources (e.g., npm vs GitHub advisory database naming).
///
public sealed class PurlEquivalenceTable
{
private readonly ImmutableDictionary> _equivalenceGroups;
private readonly ImmutableDictionary _canonicalMapping;
private PurlEquivalenceTable(
ImmutableDictionary> equivalenceGroups,
ImmutableDictionary canonicalMapping)
{
_equivalenceGroups = equivalenceGroups;
_canonicalMapping = canonicalMapping;
}
///
/// Creates an empty equivalence table.
///
public static PurlEquivalenceTable Empty { get; } = new(
ImmutableDictionary>.Empty,
ImmutableDictionary.Empty);
///
/// Creates an equivalence table from a list of equivalence groups.
/// Each group contains PURLs that should be considered equivalent.
///
public static PurlEquivalenceTable FromGroups(IEnumerable> groups)
{
var equivalenceBuilder = ImmutableDictionary.CreateBuilder>(
StringComparer.OrdinalIgnoreCase);
var canonicalBuilder = ImmutableDictionary.CreateBuilder(
StringComparer.OrdinalIgnoreCase);
foreach (var group in groups)
{
var normalizedList = group
.Where(p => !string.IsNullOrWhiteSpace(p))
.Select(p => p.Trim().ToLowerInvariant())
.Distinct()
.OrderBy(p => p, StringComparer.Ordinal)
.ToArray();
if (normalizedList.Length < 2)
{
continue;
}
// Use an ordered array for canonical; hash-set only for membership
var canonical = normalizedList[0];
var normalizedGroup = normalizedList.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
foreach (var purl in normalizedGroup)
{
equivalenceBuilder[purl] = normalizedGroup;
canonicalBuilder[purl] = canonical;
}
}
return new PurlEquivalenceTable(
equivalenceBuilder.ToImmutable(),
canonicalBuilder.ToImmutable());
}
///
/// Gets the canonical form of a PURL, or the original if not in the table.
///
public string GetCanonical(string purl)
{
if (string.IsNullOrWhiteSpace(purl))
{
return string.Empty;
}
var normalized = purl.Trim().ToLowerInvariant();
return _canonicalMapping.TryGetValue(normalized, out var canonical)
? canonical
: normalized;
}
///
/// Gets all equivalent PURLs for a given PURL.
///
public IReadOnlySet GetEquivalents(string purl)
{
if (string.IsNullOrWhiteSpace(purl))
{
return ImmutableHashSet.Empty;
}
var normalized = purl.Trim().ToLowerInvariant();
return _equivalenceGroups.TryGetValue(normalized, out var group)
? group
: ImmutableHashSet.Create(StringComparer.OrdinalIgnoreCase, normalized);
}
///
/// Checks if two PURLs are equivalent.
///
public bool AreEquivalent(string purl1, string purl2)
{
if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2))
{
return false;
}
var norm1 = purl1.Trim().ToLowerInvariant();
var norm2 = purl2.Trim().ToLowerInvariant();
if (string.Equals(norm1, norm2, StringComparison.Ordinal))
{
return true;
}
var canonical1 = GetCanonical(norm1);
var canonical2 = GetCanonical(norm2);
return string.Equals(canonical1, canonical2, StringComparison.Ordinal);
}
///
/// Number of equivalence groups in the table.
///
public int GroupCount => _equivalenceGroups
.Values
.Select(g => g.First())
.Distinct()
.Count();
///
/// Total number of PURLs in the table.
///
public int TotalEntries => _canonicalMapping.Count;
}
///
/// Static utilities for PURL equivalence matching.
///
public static class PurlEquivalence
{
///
/// Extracts the package key from a PURL (removes version suffix).
/// Example: "pkg:npm/lodash@4.17.21" → "pkg:npm/lodash"
///
public static string ExtractPackageKey(string purl)
{
if (string.IsNullOrWhiteSpace(purl))
{
return string.Empty;
}
var trimmed = purl.Trim();
var atIndex = trimmed.LastIndexOf('@');
// Handle case where @ is part of namespace (e.g., pkg:npm/@scope/package@1.0.0)
if (atIndex > 0)
{
// Check if there's another @ before this one (scoped package)
var firstAt = trimmed.IndexOf('@');
if (firstAt < atIndex)
{
// This is a scoped package, @ at atIndex is the version separator
return trimmed[..atIndex];
}
// Check if we have a proper version after @
var afterAt = trimmed[(atIndex + 1)..];
if (afterAt.Length > 0 && (char.IsDigit(afterAt[0]) || afterAt[0] == 'v'))
{
return trimmed[..atIndex];
}
}
return trimmed;
}
///
/// Extracts the ecosystem from a PURL.
/// Example: "pkg:npm/lodash@4.17.21" → "npm"
///
public static string? ExtractEcosystem(string purl)
{
if (string.IsNullOrWhiteSpace(purl))
{
return null;
}
var trimmed = purl.Trim();
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
return null;
}
var afterPrefix = trimmed[4..]; // Skip "pkg:"
var slashIndex = afterPrefix.IndexOf('/');
return slashIndex > 0 ? afterPrefix[..slashIndex] : null;
}
///
/// Extracts the namespace from a PURL (if present).
/// Example: "pkg:npm/@scope/package@1.0.0" → "@scope"
///
public static string? ExtractNamespace(string purl)
{
if (string.IsNullOrWhiteSpace(purl))
{
return null;
}
var trimmed = purl.Trim();
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
return null;
}
var afterPrefix = trimmed[4..];
var slashIndex = afterPrefix.IndexOf('/');
if (slashIndex < 0)
{
return null;
}
var afterEcosystem = afterPrefix[(slashIndex + 1)..];
var nextSlashIndex = afterEcosystem.IndexOf('/');
if (nextSlashIndex > 0)
{
// Has namespace
return afterEcosystem[..nextSlashIndex];
}
return null;
}
///
/// Extracts the package name from a PURL.
/// Example: "pkg:npm/@scope/package@1.0.0" → "package"
///
public static string? ExtractName(string purl)
{
var packageKey = ExtractPackageKey(purl);
if (string.IsNullOrWhiteSpace(packageKey))
{
return null;
}
var lastSlashIndex = packageKey.LastIndexOf('/');
return lastSlashIndex >= 0 ? packageKey[(lastSlashIndex + 1)..] : null;
}
///
/// Computes match confidence between two PURLs.
/// Returns 1.0 for exact match, 0.8 for package key match, 0.0 for no match.
///
public static double ComputeMatchConfidence(string purl1, string purl2, PurlEquivalenceTable? equivalenceTable = null)
{
if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2))
{
return 0.0;
}
var norm1 = purl1.Trim().ToLowerInvariant();
var norm2 = purl2.Trim().ToLowerInvariant();
// Exact match
if (string.Equals(norm1, norm2, StringComparison.Ordinal))
{
return 1.0;
}
// Equivalence table match
if (equivalenceTable is not null && equivalenceTable.AreEquivalent(norm1, norm2))
{
return 0.95;
}
// Package key match (same package, different version)
var key1 = ExtractPackageKey(norm1);
var key2 = ExtractPackageKey(norm2);
if (!string.IsNullOrEmpty(key1) && string.Equals(key1, key2, StringComparison.OrdinalIgnoreCase))
{
return 0.8;
}
// Same ecosystem and name (different namespace)
var eco1 = ExtractEcosystem(norm1);
var eco2 = ExtractEcosystem(norm2);
var name1 = ExtractName(norm1);
var name2 = ExtractName(norm2);
if (!string.IsNullOrEmpty(eco1) &&
string.Equals(eco1, eco2, StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrEmpty(name1) &&
string.Equals(name1, name2, StringComparison.OrdinalIgnoreCase))
{
return 0.5;
}
return 0.0;
}
}