sprints enhancements
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// IMergeHashCalculator.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-002
|
||||
// Description: Interface for deterministic semantic merge hash computation
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using StellaOps.Concelier.Models;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity;
|
||||
|
||||
/// <summary>
|
||||
/// Computes deterministic semantic merge hash for advisory deduplication.
|
||||
/// Unlike content hashing, merge hash is based on identity components only:
|
||||
/// (CVE + affects_key + version_range + weaknesses + patch_lineage).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The same CVE affecting the same package should produce the same merge hash
|
||||
/// regardless of which source (Debian, RHEL, etc.) reported it.
|
||||
/// </remarks>
|
||||
public interface IMergeHashCalculator
|
||||
{
|
||||
/// <summary>
|
||||
/// Compute merge hash from advisory identity components.
|
||||
/// </summary>
|
||||
/// <param name="input">The identity components to hash.</param>
|
||||
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
|
||||
string ComputeMergeHash(MergeHashInput input);
|
||||
|
||||
/// <summary>
|
||||
/// Compute merge hash directly from Advisory domain model.
|
||||
/// Extracts identity components from the advisory and computes hash.
|
||||
/// </summary>
|
||||
/// <param name="advisory">The advisory to compute hash for.</param>
|
||||
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
|
||||
string ComputeMergeHash(Advisory advisory);
|
||||
|
||||
/// <summary>
|
||||
/// Compute merge hash for a specific affected package within an advisory.
|
||||
/// </summary>
|
||||
/// <param name="advisory">The advisory containing the CVE and weaknesses.</param>
|
||||
/// <param name="affectedPackage">The specific affected package.</param>
|
||||
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
|
||||
string ComputeMergeHash(Advisory advisory, AffectedPackage affectedPackage);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Input components for merge hash computation.
|
||||
/// </summary>
|
||||
public sealed record MergeHashInput
|
||||
{
|
||||
/// <summary>
|
||||
/// CVE identifier (e.g., "CVE-2024-1234"). Required.
|
||||
/// Will be normalized to uppercase.
|
||||
/// </summary>
|
||||
public required string Cve { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Affected package identifier (PURL or CPE). Required.
|
||||
/// Will be normalized according to package type rules.
|
||||
/// </summary>
|
||||
public required string AffectsKey { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Affected version range expression. Optional.
|
||||
/// Will be normalized to canonical interval notation.
|
||||
/// </summary>
|
||||
public string? VersionRange { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Associated CWE identifiers. Optional.
|
||||
/// Will be normalized to uppercase, sorted, deduplicated.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> Weaknesses { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Upstream patch provenance (commit SHA, patch ID). Optional.
|
||||
/// Enables differentiation of distro backports from upstream fixes.
|
||||
/// </summary>
|
||||
public string? PatchLineage { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// MergeHashCalculator.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Tasks: MHASH-8200-009, MHASH-8200-010, MHASH-8200-011
|
||||
// Description: Core merge hash calculator implementation
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
using StellaOps.Concelier.Models;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity;
|
||||
|
||||
/// <summary>
|
||||
/// Computes deterministic semantic merge hash for advisory deduplication.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The merge hash is computed from identity components only:
|
||||
/// <list type="bullet">
|
||||
/// <item>CVE identifier (normalized, uppercase)</item>
|
||||
/// <item>Affected package identifier (PURL/CPE, normalized)</item>
|
||||
/// <item>Version range (canonical interval notation)</item>
|
||||
/// <item>CWE weaknesses (sorted, deduplicated)</item>
|
||||
/// <item>Patch lineage (optional, for backport differentiation)</item>
|
||||
/// </list>
|
||||
/// </remarks>
|
||||
public sealed class MergeHashCalculator : IMergeHashCalculator
|
||||
{
|
||||
private static readonly UTF8Encoding Utf8NoBom = new(false);
|
||||
|
||||
private readonly ICveNormalizer _cveNormalizer;
|
||||
private readonly IPurlNormalizer _purlNormalizer;
|
||||
private readonly ICpeNormalizer _cpeNormalizer;
|
||||
private readonly IVersionRangeNormalizer _versionRangeNormalizer;
|
||||
private readonly ICweNormalizer _cweNormalizer;
|
||||
private readonly IPatchLineageNormalizer _patchLineageNormalizer;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new MergeHashCalculator with default normalizers.
|
||||
/// </summary>
|
||||
public MergeHashCalculator()
|
||||
: this(
|
||||
CveNormalizer.Instance,
|
||||
PurlNormalizer.Instance,
|
||||
CpeNormalizer.Instance,
|
||||
VersionRangeNormalizer.Instance,
|
||||
CweNormalizer.Instance,
|
||||
PatchLineageNormalizer.Instance)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new MergeHashCalculator with custom normalizers.
|
||||
/// </summary>
|
||||
public MergeHashCalculator(
|
||||
ICveNormalizer cveNormalizer,
|
||||
IPurlNormalizer purlNormalizer,
|
||||
ICpeNormalizer cpeNormalizer,
|
||||
IVersionRangeNormalizer versionRangeNormalizer,
|
||||
ICweNormalizer cweNormalizer,
|
||||
IPatchLineageNormalizer patchLineageNormalizer)
|
||||
{
|
||||
_cveNormalizer = cveNormalizer ?? throw new ArgumentNullException(nameof(cveNormalizer));
|
||||
_purlNormalizer = purlNormalizer ?? throw new ArgumentNullException(nameof(purlNormalizer));
|
||||
_cpeNormalizer = cpeNormalizer ?? throw new ArgumentNullException(nameof(cpeNormalizer));
|
||||
_versionRangeNormalizer = versionRangeNormalizer ?? throw new ArgumentNullException(nameof(versionRangeNormalizer));
|
||||
_cweNormalizer = cweNormalizer ?? throw new ArgumentNullException(nameof(cweNormalizer));
|
||||
_patchLineageNormalizer = patchLineageNormalizer ?? throw new ArgumentNullException(nameof(patchLineageNormalizer));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string ComputeMergeHash(MergeHashInput input)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(input);
|
||||
|
||||
var canonical = BuildCanonicalString(input);
|
||||
return ComputeHash(canonical);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string ComputeMergeHash(Advisory advisory)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(advisory);
|
||||
|
||||
// Extract CVE from advisory key or aliases
|
||||
var cve = ExtractCve(advisory);
|
||||
|
||||
// If no affected packages, compute hash from CVE and weaknesses only
|
||||
if (advisory.AffectedPackages.IsDefaultOrEmpty)
|
||||
{
|
||||
var input = new MergeHashInput
|
||||
{
|
||||
Cve = cve,
|
||||
AffectsKey = string.Empty,
|
||||
VersionRange = null,
|
||||
Weaknesses = ExtractWeaknesses(advisory),
|
||||
PatchLineage = null
|
||||
};
|
||||
return ComputeMergeHash(input);
|
||||
}
|
||||
|
||||
// Compute hash for first affected package (primary identity)
|
||||
// For multi-package advisories, each package gets its own hash
|
||||
return ComputeMergeHash(advisory, advisory.AffectedPackages[0]);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string ComputeMergeHash(Advisory advisory, AffectedPackage affectedPackage)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(advisory);
|
||||
ArgumentNullException.ThrowIfNull(affectedPackage);
|
||||
|
||||
var cve = ExtractCve(advisory);
|
||||
var affectsKey = BuildAffectsKey(affectedPackage);
|
||||
var versionRange = BuildVersionRange(affectedPackage);
|
||||
var weaknesses = ExtractWeaknesses(advisory);
|
||||
var patchLineage = ExtractPatchLineage(advisory, affectedPackage);
|
||||
|
||||
var input = new MergeHashInput
|
||||
{
|
||||
Cve = cve,
|
||||
AffectsKey = affectsKey,
|
||||
VersionRange = versionRange,
|
||||
Weaknesses = weaknesses,
|
||||
PatchLineage = patchLineage
|
||||
};
|
||||
|
||||
return ComputeMergeHash(input);
|
||||
}
|
||||
|
||||
private string BuildCanonicalString(MergeHashInput input)
|
||||
{
|
||||
// Normalize all components
|
||||
var cve = _cveNormalizer.Normalize(input.Cve);
|
||||
var affectsKey = NormalizeAffectsKey(input.AffectsKey);
|
||||
var versionRange = _versionRangeNormalizer.Normalize(input.VersionRange);
|
||||
var weaknesses = _cweNormalizer.Normalize(input.Weaknesses);
|
||||
var patchLineage = _patchLineageNormalizer.Normalize(input.PatchLineage);
|
||||
|
||||
// Build deterministic canonical string with field ordering
|
||||
// Format: CVE|AFFECTS|VERSION|CWE|LINEAGE
|
||||
var sb = new StringBuilder();
|
||||
|
||||
sb.Append("CVE:");
|
||||
sb.Append(cve);
|
||||
sb.Append('|');
|
||||
|
||||
sb.Append("AFFECTS:");
|
||||
sb.Append(affectsKey);
|
||||
sb.Append('|');
|
||||
|
||||
sb.Append("VERSION:");
|
||||
sb.Append(versionRange);
|
||||
sb.Append('|');
|
||||
|
||||
sb.Append("CWE:");
|
||||
sb.Append(weaknesses);
|
||||
sb.Append('|');
|
||||
|
||||
sb.Append("LINEAGE:");
|
||||
sb.Append(patchLineage ?? string.Empty);
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
private string NormalizeAffectsKey(string affectsKey)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(affectsKey))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = affectsKey.Trim();
|
||||
|
||||
// Route to appropriate normalizer
|
||||
if (trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return _purlNormalizer.Normalize(trimmed);
|
||||
}
|
||||
|
||||
if (trimmed.StartsWith("cpe:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return _cpeNormalizer.Normalize(trimmed);
|
||||
}
|
||||
|
||||
// Default to PURL normalizer for unknown formats
|
||||
return _purlNormalizer.Normalize(trimmed);
|
||||
}
|
||||
|
||||
private static string ComputeHash(string canonical)
|
||||
{
|
||||
var bytes = Utf8NoBom.GetBytes(canonical);
|
||||
var hash = SHA256.HashData(bytes);
|
||||
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
|
||||
}
|
||||
|
||||
private static string ExtractCve(Advisory advisory)
|
||||
{
|
||||
// Check if advisory key is a CVE
|
||||
if (advisory.AdvisoryKey.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return advisory.AdvisoryKey;
|
||||
}
|
||||
|
||||
// Look for CVE in aliases
|
||||
var cveAlias = advisory.Aliases
|
||||
.FirstOrDefault(static a => a.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
return cveAlias ?? advisory.AdvisoryKey;
|
||||
}
|
||||
|
||||
private static string BuildAffectsKey(AffectedPackage package)
|
||||
{
|
||||
// Build PURL-like identifier from package
|
||||
return package.Identifier;
|
||||
}
|
||||
|
||||
private static string? BuildVersionRange(AffectedPackage package)
|
||||
{
|
||||
if (package.VersionRanges.IsDefaultOrEmpty)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Combine all version ranges - use RangeExpression or build from primitives
|
||||
var ranges = package.VersionRanges
|
||||
.Select(static r => r.RangeExpression ?? BuildRangeFromPrimitives(r))
|
||||
.Where(static r => !string.IsNullOrWhiteSpace(r))
|
||||
.OrderBy(static r => r, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
if (ranges.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return string.Join(",", ranges);
|
||||
}
|
||||
|
||||
private static string? BuildRangeFromPrimitives(AffectedVersionRange range)
|
||||
{
|
||||
// Build a range expression from introduced/fixed/lastAffected
|
||||
var parts = new List<string>();
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(range.IntroducedVersion))
|
||||
{
|
||||
parts.Add($">={range.IntroducedVersion}");
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(range.FixedVersion))
|
||||
{
|
||||
parts.Add($"<{range.FixedVersion}");
|
||||
}
|
||||
else if (!string.IsNullOrWhiteSpace(range.LastAffectedVersion))
|
||||
{
|
||||
parts.Add($"<={range.LastAffectedVersion}");
|
||||
}
|
||||
|
||||
return parts.Count > 0 ? string.Join(",", parts) : null;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> ExtractWeaknesses(Advisory advisory)
|
||||
{
|
||||
if (advisory.Cwes.IsDefaultOrEmpty)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
return advisory.Cwes
|
||||
.Select(static w => w.Identifier)
|
||||
.Where(static w => !string.IsNullOrWhiteSpace(w))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static string? ExtractPatchLineage(Advisory advisory, AffectedPackage package)
|
||||
{
|
||||
// Look for patch lineage in provenance or references
|
||||
// This is a simplified implementation - real implementation would
|
||||
// extract from backport proof or upstream references
|
||||
var patchRef = advisory.References
|
||||
.Where(static r => r.Kind is "patch" or "fix" or "commit")
|
||||
.Select(static r => r.Url)
|
||||
.FirstOrDefault();
|
||||
|
||||
return patchRef;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// MergeHashShadowWriteService.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-020
|
||||
// Description: Shadow-write merge hashes for existing advisories during migration
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Concelier.Models;
|
||||
using StellaOps.Concelier.Storage.Advisories;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity;
|
||||
|
||||
/// <summary>
|
||||
/// Service to compute and persist merge hashes for existing advisories
|
||||
/// without changing their identity. Used during migration to backfill
|
||||
/// merge_hash for pre-existing data.
|
||||
/// </summary>
|
||||
public sealed class MergeHashShadowWriteService
|
||||
{
|
||||
private readonly IAdvisoryStore _advisoryStore;
|
||||
private readonly IMergeHashCalculator _mergeHashCalculator;
|
||||
private readonly ILogger<MergeHashShadowWriteService> _logger;
|
||||
|
||||
public MergeHashShadowWriteService(
|
||||
IAdvisoryStore advisoryStore,
|
||||
IMergeHashCalculator mergeHashCalculator,
|
||||
ILogger<MergeHashShadowWriteService> logger)
|
||||
{
|
||||
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
|
||||
_mergeHashCalculator = mergeHashCalculator ?? throw new ArgumentNullException(nameof(mergeHashCalculator));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Backfills merge hashes for all advisories that don't have one.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Summary of the backfill operation.</returns>
|
||||
public async Task<ShadowWriteResult> BackfillAllAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var processed = 0;
|
||||
var updated = 0;
|
||||
var skipped = 0;
|
||||
var failed = 0;
|
||||
|
||||
await foreach (var advisory in _advisoryStore.StreamAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
processed++;
|
||||
|
||||
// Skip if already has merge hash
|
||||
if (!string.IsNullOrEmpty(advisory.MergeHash))
|
||||
{
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
|
||||
var enriched = EnrichWithMergeHash(advisory, mergeHash);
|
||||
await _advisoryStore.UpsertAsync(enriched, cancellationToken).ConfigureAwait(false);
|
||||
updated++;
|
||||
|
||||
if (updated % 100 == 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Merge hash backfill progress: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
|
||||
processed, updated, skipped, failed);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
failed++;
|
||||
_logger.LogWarning(ex, "Failed to compute merge hash for {AdvisoryKey}", advisory.AdvisoryKey);
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Merge hash backfill complete: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
|
||||
processed, updated, skipped, failed);
|
||||
|
||||
return new ShadowWriteResult(processed, updated, skipped, failed);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes and persists merge hash for a single advisory.
|
||||
/// </summary>
|
||||
/// <param name="advisoryKey">The advisory key to process.</param>
|
||||
/// <param name="force">If true, recomputes even if hash exists.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if advisory was updated, false otherwise.</returns>
|
||||
public async Task<bool> BackfillOneAsync(string advisoryKey, bool force, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(advisoryKey);
|
||||
|
||||
var advisory = await _advisoryStore.FindAsync(advisoryKey, cancellationToken).ConfigureAwait(false);
|
||||
if (advisory is null)
|
||||
{
|
||||
_logger.LogWarning("Advisory {AdvisoryKey} not found for merge hash backfill", advisoryKey);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip if already has merge hash and not forcing
|
||||
if (!force && !string.IsNullOrEmpty(advisory.MergeHash))
|
||||
{
|
||||
_logger.LogDebug("Skipping {AdvisoryKey}: already has merge hash", advisoryKey);
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
|
||||
var enriched = EnrichWithMergeHash(advisory, mergeHash);
|
||||
await _advisoryStore.UpsertAsync(enriched, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation("Computed merge hash for {AdvisoryKey}: {MergeHash}", advisoryKey, mergeHash);
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to compute merge hash for {AdvisoryKey}", advisoryKey);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private static Advisory EnrichWithMergeHash(Advisory advisory, string mergeHash)
|
||||
{
|
||||
return new Advisory(
|
||||
advisory.AdvisoryKey,
|
||||
advisory.Title,
|
||||
advisory.Summary,
|
||||
advisory.Language,
|
||||
advisory.Published,
|
||||
advisory.Modified,
|
||||
advisory.Severity,
|
||||
advisory.ExploitKnown,
|
||||
advisory.Aliases,
|
||||
advisory.Credits,
|
||||
advisory.References,
|
||||
advisory.AffectedPackages,
|
||||
advisory.CvssMetrics,
|
||||
advisory.Provenance,
|
||||
advisory.Description,
|
||||
advisory.Cwes,
|
||||
advisory.CanonicalMetricId,
|
||||
mergeHash);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a shadow-write backfill operation.
|
||||
/// </summary>
|
||||
/// <param name="Processed">Total advisories examined.</param>
|
||||
/// <param name="Updated">Advisories updated with new merge hash.</param>
|
||||
/// <param name="Skipped">Advisories skipped (already had merge hash).</param>
|
||||
/// <param name="Failed">Advisories that failed hash computation.</param>
|
||||
public sealed record ShadowWriteResult(int Processed, int Updated, int Skipped, int Failed);
|
||||
@@ -0,0 +1,120 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// CpeNormalizer.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-004
|
||||
// Description: CPE normalization for merge hash
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes CPE identifiers to canonical CPE 2.3 format.
|
||||
/// </summary>
|
||||
public sealed partial class CpeNormalizer : ICpeNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Singleton instance.
|
||||
/// </summary>
|
||||
public static CpeNormalizer Instance { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for CPE 2.3 formatted string binding.
|
||||
/// </summary>
|
||||
[GeneratedRegex(
|
||||
@"^cpe:2\.3:([aho]):([^:]+):([^:]+):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*)$",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex Cpe23Pattern();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for CPE 2.2 URI binding.
|
||||
/// </summary>
|
||||
[GeneratedRegex(
|
||||
@"^cpe:/([aho]):([^:]+):([^:]+)(?::([^:]+))?(?::([^:]+))?(?::([^:]+))?(?::([^:]+))?$",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex Cpe22Pattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Normalize(string cpe)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(cpe))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = cpe.Trim();
|
||||
|
||||
// Try CPE 2.3 format first
|
||||
var match23 = Cpe23Pattern().Match(trimmed);
|
||||
if (match23.Success)
|
||||
{
|
||||
return NormalizeCpe23(match23);
|
||||
}
|
||||
|
||||
// Try CPE 2.2 format
|
||||
var match22 = Cpe22Pattern().Match(trimmed);
|
||||
if (match22.Success)
|
||||
{
|
||||
return ConvertCpe22ToCpe23(match22);
|
||||
}
|
||||
|
||||
// Return as lowercase if unrecognized
|
||||
return trimmed.ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string NormalizeCpe23(Match match)
|
||||
{
|
||||
var part = match.Groups[1].Value.ToLowerInvariant();
|
||||
var vendor = NormalizeComponent(match.Groups[2].Value);
|
||||
var product = NormalizeComponent(match.Groups[3].Value);
|
||||
var version = NormalizeComponent(match.Groups[4].Value);
|
||||
var update = NormalizeComponent(match.Groups[5].Value);
|
||||
var edition = NormalizeComponent(match.Groups[6].Value);
|
||||
var language = NormalizeComponent(match.Groups[7].Value);
|
||||
var swEdition = NormalizeComponent(match.Groups[8].Value);
|
||||
var targetSw = NormalizeComponent(match.Groups[9].Value);
|
||||
var targetHw = NormalizeComponent(match.Groups[10].Value);
|
||||
var other = NormalizeComponent(match.Groups[11].Value);
|
||||
|
||||
return $"cpe:2.3:{part}:{vendor}:{product}:{version}:{update}:{edition}:{language}:{swEdition}:{targetSw}:{targetHw}:{other}";
|
||||
}
|
||||
|
||||
private static string ConvertCpe22ToCpe23(Match match)
|
||||
{
|
||||
var part = match.Groups[1].Value.ToLowerInvariant();
|
||||
var vendor = NormalizeComponent(match.Groups[2].Value);
|
||||
var product = NormalizeComponent(match.Groups[3].Value);
|
||||
var version = match.Groups[4].Success ? NormalizeComponent(match.Groups[4].Value) : "*";
|
||||
var update = match.Groups[5].Success ? NormalizeComponent(match.Groups[5].Value) : "*";
|
||||
var edition = match.Groups[6].Success ? NormalizeComponent(match.Groups[6].Value) : "*";
|
||||
var language = match.Groups[7].Success ? NormalizeComponent(match.Groups[7].Value) : "*";
|
||||
|
||||
return $"cpe:2.3:{part}:{vendor}:{product}:{version}:{update}:{edition}:{language}:*:*:*:*";
|
||||
}
|
||||
|
||||
private static string NormalizeComponent(string component)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(component))
|
||||
{
|
||||
return "*";
|
||||
}
|
||||
|
||||
var trimmed = component.Trim();
|
||||
|
||||
// Wildcards
|
||||
if (trimmed is "*" or "-" or "ANY" or "NA")
|
||||
{
|
||||
return trimmed switch
|
||||
{
|
||||
"ANY" => "*",
|
||||
"NA" => "-",
|
||||
_ => trimmed
|
||||
};
|
||||
}
|
||||
|
||||
// Lowercase and handle escaping
|
||||
return trimmed.ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// CveNormalizer.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-003 (part of normalization helpers)
|
||||
// Description: CVE identifier normalization for merge hash
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes CVE identifiers to canonical uppercase format.
|
||||
/// </summary>
|
||||
public sealed partial class CveNormalizer : ICveNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Singleton instance.
|
||||
/// </summary>
|
||||
public static CveNormalizer Instance { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern matching CVE identifier: CVE-YYYY-NNNNN (4+ digits after year).
|
||||
/// </summary>
|
||||
[GeneratedRegex(@"^CVE-(\d{4})-(\d{4,})$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex CvePattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Normalize(string? cve)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(cve))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = cve.Trim();
|
||||
|
||||
// Handle common prefixes
|
||||
if (trimmed.StartsWith("cve-", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
trimmed = "CVE-" + trimmed[4..];
|
||||
}
|
||||
else if (!trimmed.StartsWith("CVE-", StringComparison.Ordinal))
|
||||
{
|
||||
// Try to extract CVE from the string
|
||||
var match = CvePattern().Match(trimmed);
|
||||
if (match.Success)
|
||||
{
|
||||
trimmed = match.Value;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Assume it's just the number part: 2024-1234 -> CVE-2024-1234
|
||||
if (Regex.IsMatch(trimmed, @"^\d{4}-\d{4,}$"))
|
||||
{
|
||||
trimmed = "CVE-" + trimmed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate and uppercase
|
||||
var normalized = trimmed.ToUpperInvariant();
|
||||
if (!CvePattern().IsMatch(normalized))
|
||||
{
|
||||
// Return as-is if not a valid CVE (will still be hashed consistently)
|
||||
return normalized;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// CweNormalizer.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-006
|
||||
// Description: CWE identifier list normalization for merge hash
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes CWE identifier lists for deterministic hashing.
|
||||
/// </summary>
|
||||
public sealed partial class CweNormalizer : ICweNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Singleton instance.
|
||||
/// </summary>
|
||||
public static CweNormalizer Instance { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern matching CWE identifier: CWE-NNN or just NNN.
|
||||
/// </summary>
|
||||
[GeneratedRegex(@"(?:CWE-)?(\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex CwePattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Normalize(IEnumerable<string>? cwes)
|
||||
{
|
||||
if (cwes is null)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var normalized = cwes
|
||||
.Where(static cwe => !string.IsNullOrWhiteSpace(cwe))
|
||||
.Select(NormalizeSingle)
|
||||
.Where(static cwe => cwe is not null)
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.OrderBy(ExtractCweNumber)
|
||||
.ThenBy(static cwe => cwe, StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
if (normalized.Count == 0)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return string.Join(",", normalized);
|
||||
}
|
||||
|
||||
private static string? NormalizeSingle(string cwe)
|
||||
{
|
||||
var trimmed = cwe.Trim();
|
||||
var match = CwePattern().Match(trimmed);
|
||||
|
||||
if (!match.Success)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var number = match.Groups[1].Value;
|
||||
return $"CWE-{number}";
|
||||
}
|
||||
|
||||
private static int ExtractCweNumber(string? cwe)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(cwe))
|
||||
{
|
||||
return int.MaxValue;
|
||||
}
|
||||
|
||||
var match = CwePattern().Match(cwe);
|
||||
if (match.Success && int.TryParse(match.Groups[1].Value, out var number))
|
||||
{
|
||||
return number;
|
||||
}
|
||||
|
||||
return int.MaxValue;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// INormalizer.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Tasks: MHASH-8200-003 to MHASH-8200-007
|
||||
// Description: Normalizer interfaces for merge hash components
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes PURL identifiers to canonical form for deterministic hashing.
|
||||
/// </summary>
|
||||
public interface IPurlNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize PURL to canonical form.
|
||||
/// - Lowercase package type
|
||||
/// - URL-encode special characters in namespace
|
||||
/// - Strip non-essential qualifiers (arch, type, checksum)
|
||||
/// - Sort remaining qualifiers alphabetically
|
||||
/// </summary>
|
||||
string Normalize(string purl);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes CPE identifiers to canonical CPE 2.3 format.
|
||||
/// </summary>
|
||||
public interface ICpeNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize CPE to canonical CPE 2.3 format.
|
||||
/// - Convert CPE 2.2 URI format to CPE 2.3 formatted string
|
||||
/// - Lowercase vendor and product
|
||||
/// - Normalize wildcards
|
||||
/// </summary>
|
||||
string Normalize(string cpe);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes version range expressions to canonical interval notation.
|
||||
/// </summary>
|
||||
public interface IVersionRangeNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize version range to canonical expression.
|
||||
/// - Convert various formats to canonical interval notation
|
||||
/// - Trim whitespace
|
||||
/// - Normalize operators (e.g., "[1.0, 2.0)" → ">=1.0,<2.0")
|
||||
/// </summary>
|
||||
string Normalize(string? range);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes CWE identifier lists for deterministic hashing.
|
||||
/// </summary>
|
||||
public interface ICweNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize CWE list to sorted, deduplicated, uppercase set.
|
||||
/// - Uppercase all identifiers
|
||||
/// - Ensure "CWE-" prefix
|
||||
/// - Sort numerically by CWE number
|
||||
/// - Deduplicate
|
||||
/// - Return comma-joined string
|
||||
/// </summary>
|
||||
string Normalize(IEnumerable<string>? cwes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes patch lineage references for deterministic hashing.
|
||||
/// </summary>
|
||||
public interface IPatchLineageNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize patch lineage to canonical commit reference.
|
||||
/// - Extract commit SHAs from various formats
|
||||
/// - Normalize to lowercase hex
|
||||
/// - Handle patch IDs, bug tracker references
|
||||
/// </summary>
|
||||
string? Normalize(string? lineage);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes CVE identifiers for deterministic hashing.
|
||||
/// </summary>
|
||||
public interface ICveNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize CVE identifier to canonical uppercase format.
|
||||
/// - Ensure "CVE-" prefix
|
||||
/// - Uppercase
|
||||
/// - Validate format (CVE-YYYY-NNNNN+)
|
||||
/// </summary>
|
||||
string Normalize(string? cve);
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PatchLineageNormalizer.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-007
|
||||
// Description: Patch lineage normalization for merge hash
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes patch lineage references for deterministic hashing.
|
||||
/// Extracts upstream commit references from various formats.
|
||||
/// </summary>
|
||||
public sealed partial class PatchLineageNormalizer : IPatchLineageNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Singleton instance.
|
||||
/// </summary>
|
||||
public static PatchLineageNormalizer Instance { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for full Git commit SHA (40 hex chars).
|
||||
/// </summary>
|
||||
[GeneratedRegex(@"\b([0-9a-f]{40})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex FullShaPattern();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for abbreviated Git commit SHA (7-12 hex chars).
|
||||
/// </summary>
|
||||
[GeneratedRegex(@"\b([0-9a-f]{7,12})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex AbbrevShaPattern();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for GitHub/GitLab commit URLs.
|
||||
/// </summary>
|
||||
[GeneratedRegex(
|
||||
@"(?:github\.com|gitlab\.com)/[^/]+/[^/]+/commit/([0-9a-f]{7,40})",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex CommitUrlPattern();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for patch IDs in format "patch-NNNNN" or "PATCH-NNNNN".
|
||||
/// </summary>
|
||||
[GeneratedRegex(@"\b(PATCH-\d+)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex PatchIdPattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string? Normalize(string? lineage)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(lineage))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trimmed = lineage.Trim();
|
||||
|
||||
// Try to extract commit SHA from URL first
|
||||
var urlMatch = CommitUrlPattern().Match(trimmed);
|
||||
if (urlMatch.Success)
|
||||
{
|
||||
return NormalizeSha(urlMatch.Groups[1].Value);
|
||||
}
|
||||
|
||||
// Try full SHA
|
||||
var fullMatch = FullShaPattern().Match(trimmed);
|
||||
if (fullMatch.Success)
|
||||
{
|
||||
return NormalizeSha(fullMatch.Groups[1].Value);
|
||||
}
|
||||
|
||||
// Try abbreviated SHA (only if it looks like a commit reference)
|
||||
if (LooksLikeCommitReference(trimmed))
|
||||
{
|
||||
var abbrevMatch = AbbrevShaPattern().Match(trimmed);
|
||||
if (abbrevMatch.Success)
|
||||
{
|
||||
return NormalizeSha(abbrevMatch.Groups[1].Value);
|
||||
}
|
||||
}
|
||||
|
||||
// Try patch ID
|
||||
var patchMatch = PatchIdPattern().Match(trimmed);
|
||||
if (patchMatch.Success)
|
||||
{
|
||||
return patchMatch.Groups[1].Value.ToUpperInvariant();
|
||||
}
|
||||
|
||||
// Return null if no recognizable pattern
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool LooksLikeCommitReference(string value)
|
||||
{
|
||||
// Heuristic: if it contains "commit", "sha", "fix", "patch" it's likely a commit ref
|
||||
var lower = value.ToLowerInvariant();
|
||||
return lower.Contains("commit") ||
|
||||
lower.Contains("sha") ||
|
||||
lower.Contains("fix") ||
|
||||
lower.Contains("patch") ||
|
||||
lower.Contains("backport");
|
||||
}
|
||||
|
||||
private static string NormalizeSha(string sha)
|
||||
{
|
||||
// Lowercase and ensure we have the full SHA or a consistent abbreviation
|
||||
var normalized = sha.ToLowerInvariant();
|
||||
|
||||
// If it's a full SHA, return it
|
||||
if (normalized.Length == 40)
|
||||
{
|
||||
return normalized;
|
||||
}
|
||||
|
||||
// For abbreviated SHAs, return as-is (they'll still hash consistently)
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PurlNormalizer.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-003
|
||||
// Description: PURL normalization for merge hash
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Web;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes PURL identifiers to canonical form for deterministic hashing.
|
||||
/// </summary>
|
||||
public sealed partial class PurlNormalizer : IPurlNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Singleton instance.
|
||||
/// </summary>
|
||||
public static PurlNormalizer Instance { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Qualifiers to strip from PURL for identity hashing (architecture-specific, non-identity).
|
||||
/// </summary>
|
||||
private static readonly HashSet<string> StrippedQualifiers = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"arch",
|
||||
"architecture",
|
||||
"os",
|
||||
"platform",
|
||||
"type",
|
||||
"classifier",
|
||||
"checksum",
|
||||
"download_url",
|
||||
"vcs_url",
|
||||
"repository_url"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for parsing PURL: pkg:type/namespace/name@version?qualifiers#subpath
|
||||
/// </summary>
|
||||
[GeneratedRegex(
|
||||
@"^pkg:([a-zA-Z][a-zA-Z0-9+.-]*)(?:/([^/@#?]+))?/([^/@#?]+)(?:@([^?#]+))?(?:\?([^#]+))?(?:#(.+))?$",
|
||||
RegexOptions.Compiled)]
|
||||
private static partial Regex PurlPattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Normalize(string purl)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = purl.Trim();
|
||||
|
||||
// Handle non-PURL identifiers (CPE, plain package names)
|
||||
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// If it looks like a CPE, return as-is for CPE normalizer
|
||||
if (trimmed.StartsWith("cpe:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
// Return lowercase for plain identifiers
|
||||
return trimmed.ToLowerInvariant();
|
||||
}
|
||||
|
||||
var match = PurlPattern().Match(trimmed);
|
||||
if (!match.Success)
|
||||
{
|
||||
// Invalid PURL format, return lowercase
|
||||
return trimmed.ToLowerInvariant();
|
||||
}
|
||||
|
||||
var type = match.Groups[1].Value.ToLowerInvariant();
|
||||
var ns = match.Groups[2].Success ? NormalizeNamespace(match.Groups[2].Value, type) : null;
|
||||
var name = NormalizeName(match.Groups[3].Value, type);
|
||||
var version = match.Groups[4].Success ? match.Groups[4].Value : null;
|
||||
var qualifiers = match.Groups[5].Success ? NormalizeQualifiers(match.Groups[5].Value) : null;
|
||||
// Subpath is stripped for identity purposes
|
||||
|
||||
return BuildPurl(type, ns, name, version, qualifiers);
|
||||
}
|
||||
|
||||
private static string NormalizeNamespace(string ns, string type)
|
||||
{
|
||||
// URL-decode then re-encode consistently
|
||||
var decoded = HttpUtility.UrlDecode(ns);
|
||||
|
||||
// For npm, handle scoped packages (@org/pkg)
|
||||
if (type == "npm" && decoded.StartsWith("@"))
|
||||
{
|
||||
decoded = decoded.ToLowerInvariant();
|
||||
return HttpUtility.UrlEncode(decoded)?.Replace("%40", "%40") ?? decoded;
|
||||
}
|
||||
|
||||
// Most ecosystems: lowercase namespace
|
||||
return decoded.ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string NormalizeName(string name, string type)
|
||||
{
|
||||
var decoded = HttpUtility.UrlDecode(name);
|
||||
|
||||
// Most ecosystems use lowercase names
|
||||
return type switch
|
||||
{
|
||||
"golang" => decoded, // Go uses mixed case
|
||||
"nuget" => decoded.ToLowerInvariant(), // NuGet is case-insensitive
|
||||
_ => decoded.ToLowerInvariant()
|
||||
};
|
||||
}
|
||||
|
||||
private static string? NormalizeQualifiers(string qualifiers)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(qualifiers))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var pairs = qualifiers
|
||||
.Split('&', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(static pair =>
|
||||
{
|
||||
var eqIndex = pair.IndexOf('=');
|
||||
if (eqIndex < 0)
|
||||
{
|
||||
return (Key: pair.ToLowerInvariant(), Value: (string?)null);
|
||||
}
|
||||
|
||||
return (Key: pair[..eqIndex].ToLowerInvariant(), Value: pair[(eqIndex + 1)..]);
|
||||
})
|
||||
.Where(pair => !StrippedQualifiers.Contains(pair.Key))
|
||||
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
if (pairs.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return string.Join("&", pairs.Select(static p =>
|
||||
p.Value is null ? p.Key : $"{p.Key}={p.Value}"));
|
||||
}
|
||||
|
||||
private static string BuildPurl(string type, string? ns, string name, string? version, string? qualifiers)
|
||||
{
|
||||
var sb = new StringBuilder("pkg:");
|
||||
sb.Append(type);
|
||||
sb.Append('/');
|
||||
|
||||
if (!string.IsNullOrEmpty(ns))
|
||||
{
|
||||
sb.Append(ns);
|
||||
sb.Append('/');
|
||||
}
|
||||
|
||||
sb.Append(name);
|
||||
|
||||
if (!string.IsNullOrEmpty(version))
|
||||
{
|
||||
sb.Append('@');
|
||||
sb.Append(version);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(qualifiers))
|
||||
{
|
||||
sb.Append('?');
|
||||
sb.Append(qualifiers);
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,165 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// VersionRangeNormalizer.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-005
|
||||
// Description: Version range normalization for merge hash
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes version range expressions to canonical interval notation.
|
||||
/// </summary>
|
||||
public sealed partial class VersionRangeNormalizer : IVersionRangeNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Singleton instance.
|
||||
/// </summary>
|
||||
public static VersionRangeNormalizer Instance { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for mathematical interval notation: [1.0, 2.0) or (1.0, 2.0]
|
||||
/// </summary>
|
||||
[GeneratedRegex(
|
||||
@"^([\[\(])\s*([^,\s]*)\s*,\s*([^)\]\s]*)\s*([\]\)])$",
|
||||
RegexOptions.Compiled)]
|
||||
private static partial Regex IntervalPattern();
|
||||
|
||||
/// <summary>
|
||||
/// Pattern for comparison operators: >= 1.0, < 2.0
|
||||
/// </summary>
|
||||
[GeneratedRegex(
|
||||
@"^(>=?|<=?|=|!=|~=|~>|\^)\s*(.+)$",
|
||||
RegexOptions.Compiled)]
|
||||
private static partial Regex ComparisonPattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Normalize(string? range)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(range))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = range.Trim();
|
||||
|
||||
// Handle "all versions" markers
|
||||
if (trimmed is "*" or "all" or "any")
|
||||
{
|
||||
return "*";
|
||||
}
|
||||
|
||||
// Try interval notation: [1.0, 2.0)
|
||||
var intervalMatch = IntervalPattern().Match(trimmed);
|
||||
if (intervalMatch.Success)
|
||||
{
|
||||
return NormalizeInterval(intervalMatch);
|
||||
}
|
||||
|
||||
// Try comparison operators: >= 1.0
|
||||
var compMatch = ComparisonPattern().Match(trimmed);
|
||||
if (compMatch.Success)
|
||||
{
|
||||
return NormalizeComparison(compMatch);
|
||||
}
|
||||
|
||||
// Handle comma-separated constraints: >=1.0, <2.0
|
||||
if (trimmed.Contains(','))
|
||||
{
|
||||
return NormalizeMultiConstraint(trimmed);
|
||||
}
|
||||
|
||||
// Handle "fixed" version notation
|
||||
if (trimmed.StartsWith("fixed:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var fixedVersion = trimmed[6..].Trim();
|
||||
return $">={fixedVersion}";
|
||||
}
|
||||
|
||||
// Handle plain version (treat as exact match)
|
||||
if (Regex.IsMatch(trimmed, @"^[\d.]+"))
|
||||
{
|
||||
return $"={trimmed}";
|
||||
}
|
||||
|
||||
// Return trimmed if unrecognized
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
private static string NormalizeInterval(Match match)
|
||||
{
|
||||
var leftBracket = match.Groups[1].Value;
|
||||
var lower = match.Groups[2].Value.Trim();
|
||||
var upper = match.Groups[3].Value.Trim();
|
||||
var rightBracket = match.Groups[4].Value;
|
||||
|
||||
var parts = new List<string>();
|
||||
|
||||
if (!string.IsNullOrEmpty(lower))
|
||||
{
|
||||
var op = leftBracket == "[" ? ">=" : ">";
|
||||
parts.Add($"{op}{lower}");
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(upper))
|
||||
{
|
||||
var op = rightBracket == "]" ? "<=" : "<";
|
||||
parts.Add($"{op}{upper}");
|
||||
}
|
||||
|
||||
return string.Join(",", parts);
|
||||
}
|
||||
|
||||
private static string NormalizeComparison(Match match)
|
||||
{
|
||||
var op = NormalizeOperator(match.Groups[1].Value);
|
||||
var version = match.Groups[2].Value.Trim();
|
||||
return $"{op}{version}";
|
||||
}
|
||||
|
||||
private static string NormalizeMultiConstraint(string range)
|
||||
{
|
||||
var constraints = range
|
||||
.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(static c => c.Trim())
|
||||
.Where(static c => !string.IsNullOrEmpty(c))
|
||||
.Select(NormalizeSingleConstraint)
|
||||
.OrderBy(static c => c, StringComparer.Ordinal)
|
||||
.Distinct()
|
||||
.ToList();
|
||||
|
||||
return string.Join(",", constraints);
|
||||
}
|
||||
|
||||
private static string NormalizeSingleConstraint(string constraint)
|
||||
{
|
||||
var match = ComparisonPattern().Match(constraint);
|
||||
if (match.Success)
|
||||
{
|
||||
var op = NormalizeOperator(match.Groups[1].Value);
|
||||
var version = match.Groups[2].Value.Trim();
|
||||
return $"{op}{version}";
|
||||
}
|
||||
|
||||
return constraint;
|
||||
}
|
||||
|
||||
private static string NormalizeOperator(string op)
|
||||
{
|
||||
return op switch
|
||||
{
|
||||
"~=" or "~>" => "~=",
|
||||
"^" => "^",
|
||||
">=" => ">=",
|
||||
">" => ">",
|
||||
"<=" => "<=",
|
||||
"<" => "<",
|
||||
"=" => "=",
|
||||
"!=" => "!=",
|
||||
_ => op
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// MergeHashBackfillJob.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-020
|
||||
// Description: Job to backfill merge hashes for existing advisories
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Concelier.Core.Jobs;
|
||||
using StellaOps.Concelier.Merge.Identity;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Jobs;
|
||||
|
||||
/// <summary>
|
||||
/// Job to backfill merge hashes for existing advisories during migration.
|
||||
/// Can target all advisories or a specific advisory key.
|
||||
/// </summary>
|
||||
public sealed class MergeHashBackfillJob : IJob
|
||||
{
|
||||
private readonly MergeHashShadowWriteService _shadowWriteService;
|
||||
private readonly ILogger<MergeHashBackfillJob> _logger;
|
||||
|
||||
public MergeHashBackfillJob(
|
||||
MergeHashShadowWriteService shadowWriteService,
|
||||
ILogger<MergeHashBackfillJob> logger)
|
||||
{
|
||||
_shadowWriteService = shadowWriteService ?? throw new ArgumentNullException(nameof(shadowWriteService));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Executes the backfill job.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Parameters:
|
||||
/// - "seed" (optional): Specific advisory key to backfill. If empty, backfills all.
|
||||
/// - "force" (optional): If "true", recomputes hash even for advisories that have one.
|
||||
/// </remarks>
|
||||
public async Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
|
||||
{
|
||||
var hasSeed = context.Parameters.TryGetValue("seed", out var seedValue);
|
||||
var seed = seedValue as string;
|
||||
var force = context.Parameters.TryGetValue("force", out var forceValue)
|
||||
&& forceValue is string forceStr
|
||||
&& string.Equals(forceStr, "true", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
if (hasSeed && !string.IsNullOrWhiteSpace(seed))
|
||||
{
|
||||
_logger.LogInformation("Starting merge hash backfill for single advisory: {AdvisoryKey}, force={Force}", seed, force);
|
||||
var updated = await _shadowWriteService.BackfillOneAsync(seed, force, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"Merge hash backfill for {AdvisoryKey} complete: updated={Updated}",
|
||||
seed,
|
||||
updated);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogInformation("Starting merge hash backfill for all advisories");
|
||||
var result = await _shadowWriteService.BackfillAllAsync(cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"Merge hash backfill complete: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
|
||||
result.Processed,
|
||||
result.Updated,
|
||||
result.Skipped,
|
||||
result.Failed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,4 +3,5 @@ namespace StellaOps.Concelier.Merge.Jobs;
|
||||
internal static class MergeJobKinds
|
||||
{
|
||||
public const string Reconcile = "merge:reconcile";
|
||||
public const string HashBackfill = "merge:hash-backfill";
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Concelier.Core;
|
||||
using StellaOps.Concelier.Core.Events;
|
||||
using StellaOps.Concelier.Merge.Identity;
|
||||
using StellaOps.Concelier.Models;
|
||||
using StellaOps.Concelier.Storage.Advisories;
|
||||
using StellaOps.Concelier.Storage.Aliases;
|
||||
@@ -41,6 +42,7 @@ public sealed class AdvisoryMergeService
|
||||
private readonly IAdvisoryEventLog _eventLog;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly CanonicalMerger _canonicalMerger;
|
||||
private readonly IMergeHashCalculator? _mergeHashCalculator;
|
||||
private readonly ILogger<AdvisoryMergeService> _logger;
|
||||
|
||||
public AdvisoryMergeService(
|
||||
@@ -51,7 +53,8 @@ public sealed class AdvisoryMergeService
|
||||
CanonicalMerger canonicalMerger,
|
||||
IAdvisoryEventLog eventLog,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<AdvisoryMergeService> logger)
|
||||
ILogger<AdvisoryMergeService> logger,
|
||||
IMergeHashCalculator? mergeHashCalculator = null)
|
||||
{
|
||||
_aliasResolver = aliasResolver ?? throw new ArgumentNullException(nameof(aliasResolver));
|
||||
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
|
||||
@@ -61,6 +64,7 @@ public sealed class AdvisoryMergeService
|
||||
_eventLog = eventLog ?? throw new ArgumentNullException(nameof(eventLog));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_mergeHashCalculator = mergeHashCalculator; // Optional during migration
|
||||
}
|
||||
|
||||
public async Task<AdvisoryMergeResult> MergeAsync(string seedAdvisoryKey, CancellationToken cancellationToken)
|
||||
@@ -102,7 +106,7 @@ public sealed class AdvisoryMergeService
|
||||
throw;
|
||||
}
|
||||
|
||||
var merged = precedenceResult.Advisory;
|
||||
var merged = EnrichWithMergeHash(precedenceResult.Advisory);
|
||||
var conflictDetails = precedenceResult.Conflicts;
|
||||
|
||||
if (component.Collisions.Count > 0)
|
||||
@@ -309,7 +313,48 @@ public sealed class AdvisoryMergeService
|
||||
source.Provenance,
|
||||
source.Description,
|
||||
source.Cwes,
|
||||
source.CanonicalMetricId);
|
||||
source.CanonicalMetricId,
|
||||
source.MergeHash);
|
||||
|
||||
/// <summary>
|
||||
/// Enriches an advisory with its computed merge hash if calculator is available.
|
||||
/// </summary>
|
||||
private Advisory EnrichWithMergeHash(Advisory advisory)
|
||||
{
|
||||
if (_mergeHashCalculator is null)
|
||||
{
|
||||
return advisory;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
|
||||
return new Advisory(
|
||||
advisory.AdvisoryKey,
|
||||
advisory.Title,
|
||||
advisory.Summary,
|
||||
advisory.Language,
|
||||
advisory.Published,
|
||||
advisory.Modified,
|
||||
advisory.Severity,
|
||||
advisory.ExploitKnown,
|
||||
advisory.Aliases,
|
||||
advisory.Credits,
|
||||
advisory.References,
|
||||
advisory.AffectedPackages,
|
||||
advisory.CvssMetrics,
|
||||
advisory.Provenance,
|
||||
advisory.Description,
|
||||
advisory.Cwes,
|
||||
advisory.CanonicalMetricId,
|
||||
mergeHash);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to compute merge hash for {AdvisoryKey}, continuing without hash", advisory.AdvisoryKey);
|
||||
return advisory;
|
||||
}
|
||||
}
|
||||
|
||||
private CanonicalMergeResult? ApplyCanonicalMergeIfNeeded(string canonicalKey, List<Advisory> inputs)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// MergeHashBackfillService.cs
|
||||
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
|
||||
// Task: MHASH-8200-020
|
||||
// Description: Shadow-write mode for computing merge_hash on existing advisories
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Diagnostics;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Concelier.Merge.Identity;
|
||||
using StellaOps.Concelier.Models;
|
||||
using StellaOps.Concelier.Storage.Advisories;
|
||||
|
||||
namespace StellaOps.Concelier.Merge.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Service for backfilling merge hashes on existing advisories without changing their identity.
|
||||
/// Runs in shadow-write mode: computes merge_hash and updates only that field.
|
||||
/// </summary>
|
||||
public sealed class MergeHashBackfillService
|
||||
{
|
||||
private readonly IAdvisoryStore _advisoryStore;
|
||||
private readonly IMergeHashCalculator _mergeHashCalculator;
|
||||
private readonly ILogger<MergeHashBackfillService> _logger;
|
||||
|
||||
public MergeHashBackfillService(
|
||||
IAdvisoryStore advisoryStore,
|
||||
IMergeHashCalculator mergeHashCalculator,
|
||||
ILogger<MergeHashBackfillService> logger)
|
||||
{
|
||||
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
|
||||
_mergeHashCalculator = mergeHashCalculator ?? throw new ArgumentNullException(nameof(mergeHashCalculator));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Backfills merge hashes for all advisories that don't have one.
|
||||
/// </summary>
|
||||
/// <param name="batchSize">Number of advisories to process before yielding progress.</param>
|
||||
/// <param name="dryRun">If true, computes hashes but doesn't persist them.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Backfill result with statistics.</returns>
|
||||
public async Task<MergeHashBackfillResult> BackfillAsync(
|
||||
int batchSize = 100,
|
||||
bool dryRun = false,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
var processed = 0;
|
||||
var updated = 0;
|
||||
var skipped = 0;
|
||||
var errors = 0;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Starting merge hash backfill (dryRun={DryRun}, batchSize={BatchSize})",
|
||||
dryRun, batchSize);
|
||||
|
||||
await foreach (var advisory in _advisoryStore.StreamAsync(cancellationToken))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
processed++;
|
||||
|
||||
// Skip if already has merge hash
|
||||
if (!string.IsNullOrEmpty(advisory.MergeHash))
|
||||
{
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
|
||||
|
||||
if (!dryRun)
|
||||
{
|
||||
var enrichedAdvisory = CreateAdvisoryWithMergeHash(advisory, mergeHash);
|
||||
await _advisoryStore.UpsertAsync(enrichedAdvisory, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
updated++;
|
||||
|
||||
if (updated % batchSize == 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Backfill progress: {Updated} updated, {Skipped} skipped, {Errors} errors (of {Processed} processed)",
|
||||
updated, skipped, errors, processed);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
errors++;
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Failed to compute/update merge hash for {AdvisoryKey}",
|
||||
advisory.AdvisoryKey);
|
||||
}
|
||||
}
|
||||
|
||||
stopwatch.Stop();
|
||||
|
||||
var result = new MergeHashBackfillResult(
|
||||
TotalProcessed: processed,
|
||||
Updated: updated,
|
||||
Skipped: skipped,
|
||||
Errors: errors,
|
||||
DryRun: dryRun,
|
||||
Duration: stopwatch.Elapsed);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Merge hash backfill completed: {Updated} updated, {Skipped} skipped, {Errors} errors (of {Processed} processed) in {Duration}",
|
||||
result.Updated, result.Skipped, result.Errors, result.TotalProcessed, result.Duration);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes merge hash for a single advisory without persisting.
|
||||
/// Useful for testing or preview mode.
|
||||
/// </summary>
|
||||
public string ComputeMergeHash(Advisory advisory)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(advisory);
|
||||
return _mergeHashCalculator.ComputeMergeHash(advisory);
|
||||
}
|
||||
|
||||
private static Advisory CreateAdvisoryWithMergeHash(Advisory source, string mergeHash)
|
||||
=> new(
|
||||
source.AdvisoryKey,
|
||||
source.Title,
|
||||
source.Summary,
|
||||
source.Language,
|
||||
source.Published,
|
||||
source.Modified,
|
||||
source.Severity,
|
||||
source.ExploitKnown,
|
||||
source.Aliases,
|
||||
source.Credits,
|
||||
source.References,
|
||||
source.AffectedPackages,
|
||||
source.CvssMetrics,
|
||||
source.Provenance,
|
||||
source.Description,
|
||||
source.Cwes,
|
||||
source.CanonicalMetricId,
|
||||
mergeHash);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a merge hash backfill operation.
|
||||
/// </summary>
|
||||
public sealed record MergeHashBackfillResult(
|
||||
int TotalProcessed,
|
||||
int Updated,
|
||||
int Skipped,
|
||||
int Errors,
|
||||
bool DryRun,
|
||||
TimeSpan Duration)
|
||||
{
|
||||
/// <summary>
|
||||
/// Percentage of advisories that were successfully updated.
|
||||
/// </summary>
|
||||
public double SuccessRate => TotalProcessed > 0
|
||||
? (double)(Updated + Skipped) / TotalProcessed * 100
|
||||
: 100;
|
||||
|
||||
/// <summary>
|
||||
/// Average time per advisory in milliseconds.
|
||||
/// </summary>
|
||||
public double AvgTimePerAdvisoryMs => TotalProcessed > 0
|
||||
? Duration.TotalMilliseconds / TotalProcessed
|
||||
: 0;
|
||||
}
|
||||
Reference in New Issue
Block a user