up
This commit is contained in:
@@ -0,0 +1,308 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Policy.Engine.SelectionJoin;
|
||||
|
||||
/// <summary>
|
||||
/// PURL equivalence table for mapping package identifiers across ecosystems.
|
||||
/// Enables matching when the same package has different identifiers in
|
||||
/// different sources (e.g., npm vs GitHub advisory database naming).
|
||||
/// </summary>
|
||||
public sealed class PurlEquivalenceTable
|
||||
{
|
||||
private readonly ImmutableDictionary<string, ImmutableHashSet<string>> _equivalenceGroups;
|
||||
private readonly ImmutableDictionary<string, string> _canonicalMapping;
|
||||
|
||||
private PurlEquivalenceTable(
|
||||
ImmutableDictionary<string, ImmutableHashSet<string>> equivalenceGroups,
|
||||
ImmutableDictionary<string, string> canonicalMapping)
|
||||
{
|
||||
_equivalenceGroups = equivalenceGroups;
|
||||
_canonicalMapping = canonicalMapping;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates an empty equivalence table.
|
||||
/// </summary>
|
||||
public static PurlEquivalenceTable Empty { get; } = new(
|
||||
ImmutableDictionary<string, ImmutableHashSet<string>>.Empty,
|
||||
ImmutableDictionary<string, string>.Empty);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an equivalence table from a list of equivalence groups.
|
||||
/// Each group contains PURLs that should be considered equivalent.
|
||||
/// </summary>
|
||||
public static PurlEquivalenceTable FromGroups(IEnumerable<IEnumerable<string>> groups)
|
||||
{
|
||||
var equivalenceBuilder = ImmutableDictionary.CreateBuilder<string, ImmutableHashSet<string>>(
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
var canonicalBuilder = ImmutableDictionary.CreateBuilder<string, string>(
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var group in groups)
|
||||
{
|
||||
var normalizedGroup = group
|
||||
.Where(p => !string.IsNullOrWhiteSpace(p))
|
||||
.Select(p => p.Trim().ToLowerInvariant())
|
||||
.Distinct()
|
||||
.OrderBy(p => p, StringComparer.Ordinal)
|
||||
.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
if (normalizedGroup.Count < 2)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// First item (lexicographically) is the canonical form
|
||||
var canonical = normalizedGroup.First();
|
||||
|
||||
foreach (var purl in normalizedGroup)
|
||||
{
|
||||
equivalenceBuilder[purl] = normalizedGroup;
|
||||
canonicalBuilder[purl] = canonical;
|
||||
}
|
||||
}
|
||||
|
||||
return new PurlEquivalenceTable(
|
||||
equivalenceBuilder.ToImmutable(),
|
||||
canonicalBuilder.ToImmutable());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the canonical form of a PURL, or the original if not in the table.
|
||||
/// </summary>
|
||||
public string GetCanonical(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var normalized = purl.Trim().ToLowerInvariant();
|
||||
return _canonicalMapping.TryGetValue(normalized, out var canonical)
|
||||
? canonical
|
||||
: normalized;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all equivalent PURLs for a given PURL.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> GetEquivalents(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return ImmutableHashSet<string>.Empty;
|
||||
}
|
||||
|
||||
var normalized = purl.Trim().ToLowerInvariant();
|
||||
return _equivalenceGroups.TryGetValue(normalized, out var group)
|
||||
? group
|
||||
: ImmutableHashSet.Create(StringComparer.OrdinalIgnoreCase, normalized);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if two PURLs are equivalent.
|
||||
/// </summary>
|
||||
public bool AreEquivalent(string purl1, string purl2)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var norm1 = purl1.Trim().ToLowerInvariant();
|
||||
var norm2 = purl2.Trim().ToLowerInvariant();
|
||||
|
||||
if (string.Equals(norm1, norm2, StringComparison.Ordinal))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
var canonical1 = GetCanonical(norm1);
|
||||
var canonical2 = GetCanonical(norm2);
|
||||
|
||||
return string.Equals(canonical1, canonical2, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Number of equivalence groups in the table.
|
||||
/// </summary>
|
||||
public int GroupCount => _equivalenceGroups
|
||||
.Values
|
||||
.Select(g => g.First())
|
||||
.Distinct()
|
||||
.Count();
|
||||
|
||||
/// <summary>
|
||||
/// Total number of PURLs in the table.
|
||||
/// </summary>
|
||||
public int TotalEntries => _canonicalMapping.Count;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Static utilities for PURL equivalence matching.
|
||||
/// </summary>
|
||||
public static class PurlEquivalence
|
||||
{
|
||||
/// <summary>
|
||||
/// Extracts the package key from a PURL (removes version suffix).
|
||||
/// Example: "pkg:npm/lodash@4.17.21" → "pkg:npm/lodash"
|
||||
/// </summary>
|
||||
public static string ExtractPackageKey(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = purl.Trim();
|
||||
var atIndex = trimmed.LastIndexOf('@');
|
||||
|
||||
// Handle case where @ is part of namespace (e.g., pkg:npm/@scope/package@1.0.0)
|
||||
if (atIndex > 0)
|
||||
{
|
||||
// Check if there's another @ before this one (scoped package)
|
||||
var firstAt = trimmed.IndexOf('@');
|
||||
if (firstAt < atIndex)
|
||||
{
|
||||
// This is a scoped package, @ at atIndex is the version separator
|
||||
return trimmed[..atIndex];
|
||||
}
|
||||
|
||||
// Check if we have a proper version after @
|
||||
var afterAt = trimmed[(atIndex + 1)..];
|
||||
if (afterAt.Length > 0 && (char.IsDigit(afterAt[0]) || afterAt[0] == 'v'))
|
||||
{
|
||||
return trimmed[..atIndex];
|
||||
}
|
||||
}
|
||||
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the ecosystem from a PURL.
|
||||
/// Example: "pkg:npm/lodash@4.17.21" → "npm"
|
||||
/// </summary>
|
||||
public static string? ExtractEcosystem(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trimmed = purl.Trim();
|
||||
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var afterPrefix = trimmed[4..]; // Skip "pkg:"
|
||||
var slashIndex = afterPrefix.IndexOf('/');
|
||||
|
||||
return slashIndex > 0 ? afterPrefix[..slashIndex] : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the namespace from a PURL (if present).
|
||||
/// Example: "pkg:npm/@scope/package@1.0.0" → "@scope"
|
||||
/// </summary>
|
||||
public static string? ExtractNamespace(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trimmed = purl.Trim();
|
||||
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var afterPrefix = trimmed[4..];
|
||||
var slashIndex = afterPrefix.IndexOf('/');
|
||||
if (slashIndex < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var afterEcosystem = afterPrefix[(slashIndex + 1)..];
|
||||
var nextSlashIndex = afterEcosystem.IndexOf('/');
|
||||
|
||||
if (nextSlashIndex > 0)
|
||||
{
|
||||
// Has namespace
|
||||
return afterEcosystem[..nextSlashIndex];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the package name from a PURL.
|
||||
/// Example: "pkg:npm/@scope/package@1.0.0" → "package"
|
||||
/// </summary>
|
||||
public static string? ExtractName(string purl)
|
||||
{
|
||||
var packageKey = ExtractPackageKey(purl);
|
||||
if (string.IsNullOrWhiteSpace(packageKey))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var lastSlashIndex = packageKey.LastIndexOf('/');
|
||||
return lastSlashIndex >= 0 ? packageKey[(lastSlashIndex + 1)..] : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes match confidence between two PURLs.
|
||||
/// Returns 1.0 for exact match, 0.8 for package key match, 0.0 for no match.
|
||||
/// </summary>
|
||||
public static double ComputeMatchConfidence(string purl1, string purl2, PurlEquivalenceTable? equivalenceTable = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl1) || string.IsNullOrWhiteSpace(purl2))
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
var norm1 = purl1.Trim().ToLowerInvariant();
|
||||
var norm2 = purl2.Trim().ToLowerInvariant();
|
||||
|
||||
// Exact match
|
||||
if (string.Equals(norm1, norm2, StringComparison.Ordinal))
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Equivalence table match
|
||||
if (equivalenceTable is not null && equivalenceTable.AreEquivalent(norm1, norm2))
|
||||
{
|
||||
return 0.95;
|
||||
}
|
||||
|
||||
// Package key match (same package, different version)
|
||||
var key1 = ExtractPackageKey(norm1);
|
||||
var key2 = ExtractPackageKey(norm2);
|
||||
|
||||
if (!string.IsNullOrEmpty(key1) && string.Equals(key1, key2, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 0.8;
|
||||
}
|
||||
|
||||
// Same ecosystem and name (different namespace)
|
||||
var eco1 = ExtractEcosystem(norm1);
|
||||
var eco2 = ExtractEcosystem(norm2);
|
||||
var name1 = ExtractName(norm1);
|
||||
var name2 = ExtractName(norm2);
|
||||
|
||||
if (!string.IsNullOrEmpty(eco1) &&
|
||||
string.Equals(eco1, eco2, StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.IsNullOrEmpty(name1) &&
|
||||
string.Equals(name1, name2, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 0.5;
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user