sprints enhancements

This commit is contained in:
StellaOps Bot
2025-12-25 19:52:30 +02:00
parent ef6ac36323
commit b8b2d83f4a
138 changed files with 25133 additions and 594 deletions

View File

@@ -0,0 +1,81 @@
// -----------------------------------------------------------------------------
// IMergeHashCalculator.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-002
// Description: Interface for deterministic semantic merge hash computation
// -----------------------------------------------------------------------------
using StellaOps.Concelier.Models;
namespace StellaOps.Concelier.Merge.Identity;
/// <summary>
/// Computes deterministic semantic merge hash for advisory deduplication.
/// Unlike content hashing, merge hash is based on identity components only:
/// (CVE + affects_key + version_range + weaknesses + patch_lineage).
/// </summary>
/// <remarks>
/// The same CVE affecting the same package should produce the same merge hash
/// regardless of which source (Debian, RHEL, etc.) reported it.
/// </remarks>
public interface IMergeHashCalculator
{
/// <summary>
/// Compute merge hash from advisory identity components.
/// </summary>
/// <param name="input">The identity components to hash.</param>
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
string ComputeMergeHash(MergeHashInput input);
/// <summary>
/// Compute merge hash directly from Advisory domain model.
/// Extracts identity components from the advisory and computes hash.
/// </summary>
/// <param name="advisory">The advisory to compute hash for.</param>
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
string ComputeMergeHash(Advisory advisory);
/// <summary>
/// Compute merge hash for a specific affected package within an advisory.
/// </summary>
/// <param name="advisory">The advisory containing the CVE and weaknesses.</param>
/// <param name="affectedPackage">The specific affected package.</param>
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
string ComputeMergeHash(Advisory advisory, AffectedPackage affectedPackage);
}
/// <summary>
/// Input components for merge hash computation.
/// </summary>
public sealed record MergeHashInput
{
/// <summary>
/// CVE identifier (e.g., "CVE-2024-1234"). Required.
/// Will be normalized to uppercase.
/// </summary>
public required string Cve { get; init; }
/// <summary>
/// Affected package identifier (PURL or CPE). Required.
/// Will be normalized according to package type rules.
/// </summary>
public required string AffectsKey { get; init; }
/// <summary>
/// Affected version range expression. Optional.
/// Will be normalized to canonical interval notation.
/// </summary>
public string? VersionRange { get; init; }
/// <summary>
/// Associated CWE identifiers. Optional.
/// Will be normalized to uppercase, sorted, deduplicated.
/// </summary>
public IReadOnlyList<string> Weaknesses { get; init; } = [];
/// <summary>
/// Upstream patch provenance (commit SHA, patch ID). Optional.
/// Enables differentiation of distro backports from upstream fixes.
/// </summary>
public string? PatchLineage { get; init; }
}

View File

@@ -0,0 +1,288 @@
// -----------------------------------------------------------------------------
// MergeHashCalculator.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Tasks: MHASH-8200-009, MHASH-8200-010, MHASH-8200-011
// Description: Core merge hash calculator implementation
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Text;
using StellaOps.Concelier.Merge.Identity.Normalizers;
using StellaOps.Concelier.Models;
namespace StellaOps.Concelier.Merge.Identity;
/// <summary>
/// Computes deterministic semantic merge hash for advisory deduplication.
/// </summary>
/// <remarks>
/// The merge hash is computed from identity components only:
/// <list type="bullet">
/// <item>CVE identifier (normalized, uppercase)</item>
/// <item>Affected package identifier (PURL/CPE, normalized)</item>
/// <item>Version range (canonical interval notation)</item>
/// <item>CWE weaknesses (sorted, deduplicated)</item>
/// <item>Patch lineage (optional, for backport differentiation)</item>
/// </list>
/// </remarks>
public sealed class MergeHashCalculator : IMergeHashCalculator
{
private static readonly UTF8Encoding Utf8NoBom = new(false);
private readonly ICveNormalizer _cveNormalizer;
private readonly IPurlNormalizer _purlNormalizer;
private readonly ICpeNormalizer _cpeNormalizer;
private readonly IVersionRangeNormalizer _versionRangeNormalizer;
private readonly ICweNormalizer _cweNormalizer;
private readonly IPatchLineageNormalizer _patchLineageNormalizer;
/// <summary>
/// Creates a new MergeHashCalculator with default normalizers.
/// </summary>
public MergeHashCalculator()
: this(
CveNormalizer.Instance,
PurlNormalizer.Instance,
CpeNormalizer.Instance,
VersionRangeNormalizer.Instance,
CweNormalizer.Instance,
PatchLineageNormalizer.Instance)
{
}
/// <summary>
/// Creates a new MergeHashCalculator with custom normalizers.
/// </summary>
public MergeHashCalculator(
ICveNormalizer cveNormalizer,
IPurlNormalizer purlNormalizer,
ICpeNormalizer cpeNormalizer,
IVersionRangeNormalizer versionRangeNormalizer,
ICweNormalizer cweNormalizer,
IPatchLineageNormalizer patchLineageNormalizer)
{
_cveNormalizer = cveNormalizer ?? throw new ArgumentNullException(nameof(cveNormalizer));
_purlNormalizer = purlNormalizer ?? throw new ArgumentNullException(nameof(purlNormalizer));
_cpeNormalizer = cpeNormalizer ?? throw new ArgumentNullException(nameof(cpeNormalizer));
_versionRangeNormalizer = versionRangeNormalizer ?? throw new ArgumentNullException(nameof(versionRangeNormalizer));
_cweNormalizer = cweNormalizer ?? throw new ArgumentNullException(nameof(cweNormalizer));
_patchLineageNormalizer = patchLineageNormalizer ?? throw new ArgumentNullException(nameof(patchLineageNormalizer));
}
/// <inheritdoc />
public string ComputeMergeHash(MergeHashInput input)
{
ArgumentNullException.ThrowIfNull(input);
var canonical = BuildCanonicalString(input);
return ComputeHash(canonical);
}
/// <inheritdoc />
public string ComputeMergeHash(Advisory advisory)
{
ArgumentNullException.ThrowIfNull(advisory);
// Extract CVE from advisory key or aliases
var cve = ExtractCve(advisory);
// If no affected packages, compute hash from CVE and weaknesses only
if (advisory.AffectedPackages.IsDefaultOrEmpty)
{
var input = new MergeHashInput
{
Cve = cve,
AffectsKey = string.Empty,
VersionRange = null,
Weaknesses = ExtractWeaknesses(advisory),
PatchLineage = null
};
return ComputeMergeHash(input);
}
// Compute hash for first affected package (primary identity)
// For multi-package advisories, each package gets its own hash
return ComputeMergeHash(advisory, advisory.AffectedPackages[0]);
}
/// <inheritdoc />
public string ComputeMergeHash(Advisory advisory, AffectedPackage affectedPackage)
{
ArgumentNullException.ThrowIfNull(advisory);
ArgumentNullException.ThrowIfNull(affectedPackage);
var cve = ExtractCve(advisory);
var affectsKey = BuildAffectsKey(affectedPackage);
var versionRange = BuildVersionRange(affectedPackage);
var weaknesses = ExtractWeaknesses(advisory);
var patchLineage = ExtractPatchLineage(advisory, affectedPackage);
var input = new MergeHashInput
{
Cve = cve,
AffectsKey = affectsKey,
VersionRange = versionRange,
Weaknesses = weaknesses,
PatchLineage = patchLineage
};
return ComputeMergeHash(input);
}
private string BuildCanonicalString(MergeHashInput input)
{
// Normalize all components
var cve = _cveNormalizer.Normalize(input.Cve);
var affectsKey = NormalizeAffectsKey(input.AffectsKey);
var versionRange = _versionRangeNormalizer.Normalize(input.VersionRange);
var weaknesses = _cweNormalizer.Normalize(input.Weaknesses);
var patchLineage = _patchLineageNormalizer.Normalize(input.PatchLineage);
// Build deterministic canonical string with field ordering
// Format: CVE|AFFECTS|VERSION|CWE|LINEAGE
var sb = new StringBuilder();
sb.Append("CVE:");
sb.Append(cve);
sb.Append('|');
sb.Append("AFFECTS:");
sb.Append(affectsKey);
sb.Append('|');
sb.Append("VERSION:");
sb.Append(versionRange);
sb.Append('|');
sb.Append("CWE:");
sb.Append(weaknesses);
sb.Append('|');
sb.Append("LINEAGE:");
sb.Append(patchLineage ?? string.Empty);
return sb.ToString();
}
private string NormalizeAffectsKey(string affectsKey)
{
if (string.IsNullOrWhiteSpace(affectsKey))
{
return string.Empty;
}
var trimmed = affectsKey.Trim();
// Route to appropriate normalizer
if (trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
return _purlNormalizer.Normalize(trimmed);
}
if (trimmed.StartsWith("cpe:", StringComparison.OrdinalIgnoreCase))
{
return _cpeNormalizer.Normalize(trimmed);
}
// Default to PURL normalizer for unknown formats
return _purlNormalizer.Normalize(trimmed);
}
private static string ComputeHash(string canonical)
{
var bytes = Utf8NoBom.GetBytes(canonical);
var hash = SHA256.HashData(bytes);
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
private static string ExtractCve(Advisory advisory)
{
// Check if advisory key is a CVE
if (advisory.AdvisoryKey.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
{
return advisory.AdvisoryKey;
}
// Look for CVE in aliases
var cveAlias = advisory.Aliases
.FirstOrDefault(static a => a.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase));
return cveAlias ?? advisory.AdvisoryKey;
}
private static string BuildAffectsKey(AffectedPackage package)
{
// Build PURL-like identifier from package
return package.Identifier;
}
private static string? BuildVersionRange(AffectedPackage package)
{
if (package.VersionRanges.IsDefaultOrEmpty)
{
return null;
}
// Combine all version ranges - use RangeExpression or build from primitives
var ranges = package.VersionRanges
.Select(static r => r.RangeExpression ?? BuildRangeFromPrimitives(r))
.Where(static r => !string.IsNullOrWhiteSpace(r))
.OrderBy(static r => r, StringComparer.Ordinal)
.ToList();
if (ranges.Count == 0)
{
return null;
}
return string.Join(",", ranges);
}
private static string? BuildRangeFromPrimitives(AffectedVersionRange range)
{
// Build a range expression from introduced/fixed/lastAffected
var parts = new List<string>();
if (!string.IsNullOrWhiteSpace(range.IntroducedVersion))
{
parts.Add($">={range.IntroducedVersion}");
}
if (!string.IsNullOrWhiteSpace(range.FixedVersion))
{
parts.Add($"<{range.FixedVersion}");
}
else if (!string.IsNullOrWhiteSpace(range.LastAffectedVersion))
{
parts.Add($"<={range.LastAffectedVersion}");
}
return parts.Count > 0 ? string.Join(",", parts) : null;
}
private static IReadOnlyList<string> ExtractWeaknesses(Advisory advisory)
{
if (advisory.Cwes.IsDefaultOrEmpty)
{
return [];
}
return advisory.Cwes
.Select(static w => w.Identifier)
.Where(static w => !string.IsNullOrWhiteSpace(w))
.ToList();
}
private static string? ExtractPatchLineage(Advisory advisory, AffectedPackage package)
{
// Look for patch lineage in provenance or references
// This is a simplified implementation - real implementation would
// extract from backport proof or upstream references
var patchRef = advisory.References
.Where(static r => r.Kind is "patch" or "fix" or "commit")
.Select(static r => r.Url)
.FirstOrDefault();
return patchRef;
}
}

View File

@@ -0,0 +1,159 @@
// -----------------------------------------------------------------------------
// MergeHashShadowWriteService.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-020
// Description: Shadow-write merge hashes for existing advisories during migration
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Advisories;
namespace StellaOps.Concelier.Merge.Identity;
/// <summary>
/// Service to compute and persist merge hashes for existing advisories
/// without changing their identity. Used during migration to backfill
/// merge_hash for pre-existing data.
/// </summary>
public sealed class MergeHashShadowWriteService
{
private readonly IAdvisoryStore _advisoryStore;
private readonly IMergeHashCalculator _mergeHashCalculator;
private readonly ILogger<MergeHashShadowWriteService> _logger;
public MergeHashShadowWriteService(
IAdvisoryStore advisoryStore,
IMergeHashCalculator mergeHashCalculator,
ILogger<MergeHashShadowWriteService> logger)
{
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_mergeHashCalculator = mergeHashCalculator ?? throw new ArgumentNullException(nameof(mergeHashCalculator));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Backfills merge hashes for all advisories that don't have one.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Summary of the backfill operation.</returns>
public async Task<ShadowWriteResult> BackfillAllAsync(CancellationToken cancellationToken)
{
var processed = 0;
var updated = 0;
var skipped = 0;
var failed = 0;
await foreach (var advisory in _advisoryStore.StreamAsync(cancellationToken).ConfigureAwait(false))
{
cancellationToken.ThrowIfCancellationRequested();
processed++;
// Skip if already has merge hash
if (!string.IsNullOrEmpty(advisory.MergeHash))
{
skipped++;
continue;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
var enriched = EnrichWithMergeHash(advisory, mergeHash);
await _advisoryStore.UpsertAsync(enriched, cancellationToken).ConfigureAwait(false);
updated++;
if (updated % 100 == 0)
{
_logger.LogInformation(
"Merge hash backfill progress: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
processed, updated, skipped, failed);
}
}
catch (Exception ex)
{
failed++;
_logger.LogWarning(ex, "Failed to compute merge hash for {AdvisoryKey}", advisory.AdvisoryKey);
}
}
_logger.LogInformation(
"Merge hash backfill complete: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
processed, updated, skipped, failed);
return new ShadowWriteResult(processed, updated, skipped, failed);
}
/// <summary>
/// Computes and persists merge hash for a single advisory.
/// </summary>
/// <param name="advisoryKey">The advisory key to process.</param>
/// <param name="force">If true, recomputes even if hash exists.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if advisory was updated, false otherwise.</returns>
public async Task<bool> BackfillOneAsync(string advisoryKey, bool force, CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(advisoryKey);
var advisory = await _advisoryStore.FindAsync(advisoryKey, cancellationToken).ConfigureAwait(false);
if (advisory is null)
{
_logger.LogWarning("Advisory {AdvisoryKey} not found for merge hash backfill", advisoryKey);
return false;
}
// Skip if already has merge hash and not forcing
if (!force && !string.IsNullOrEmpty(advisory.MergeHash))
{
_logger.LogDebug("Skipping {AdvisoryKey}: already has merge hash", advisoryKey);
return false;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
var enriched = EnrichWithMergeHash(advisory, mergeHash);
await _advisoryStore.UpsertAsync(enriched, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("Computed merge hash for {AdvisoryKey}: {MergeHash}", advisoryKey, mergeHash);
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to compute merge hash for {AdvisoryKey}", advisoryKey);
throw;
}
}
private static Advisory EnrichWithMergeHash(Advisory advisory, string mergeHash)
{
return new Advisory(
advisory.AdvisoryKey,
advisory.Title,
advisory.Summary,
advisory.Language,
advisory.Published,
advisory.Modified,
advisory.Severity,
advisory.ExploitKnown,
advisory.Aliases,
advisory.Credits,
advisory.References,
advisory.AffectedPackages,
advisory.CvssMetrics,
advisory.Provenance,
advisory.Description,
advisory.Cwes,
advisory.CanonicalMetricId,
mergeHash);
}
}
/// <summary>
/// Result of a shadow-write backfill operation.
/// </summary>
/// <param name="Processed">Total advisories examined.</param>
/// <param name="Updated">Advisories updated with new merge hash.</param>
/// <param name="Skipped">Advisories skipped (already had merge hash).</param>
/// <param name="Failed">Advisories that failed hash computation.</param>
public sealed record ShadowWriteResult(int Processed, int Updated, int Skipped, int Failed);

View File

@@ -0,0 +1,120 @@
// -----------------------------------------------------------------------------
// CpeNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-004
// Description: CPE normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes CPE identifiers to canonical CPE 2.3 format.
/// </summary>
public sealed partial class CpeNormalizer : ICpeNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static CpeNormalizer Instance { get; } = new();
/// <summary>
/// Pattern for CPE 2.3 formatted string binding.
/// </summary>
[GeneratedRegex(
@"^cpe:2\.3:([aho]):([^:]+):([^:]+):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*)$",
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex Cpe23Pattern();
/// <summary>
/// Pattern for CPE 2.2 URI binding.
/// </summary>
[GeneratedRegex(
@"^cpe:/([aho]):([^:]+):([^:]+)(?::([^:]+))?(?::([^:]+))?(?::([^:]+))?(?::([^:]+))?$",
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex Cpe22Pattern();
/// <inheritdoc />
public string Normalize(string cpe)
{
if (string.IsNullOrWhiteSpace(cpe))
{
return string.Empty;
}
var trimmed = cpe.Trim();
// Try CPE 2.3 format first
var match23 = Cpe23Pattern().Match(trimmed);
if (match23.Success)
{
return NormalizeCpe23(match23);
}
// Try CPE 2.2 format
var match22 = Cpe22Pattern().Match(trimmed);
if (match22.Success)
{
return ConvertCpe22ToCpe23(match22);
}
// Return as lowercase if unrecognized
return trimmed.ToLowerInvariant();
}
private static string NormalizeCpe23(Match match)
{
var part = match.Groups[1].Value.ToLowerInvariant();
var vendor = NormalizeComponent(match.Groups[2].Value);
var product = NormalizeComponent(match.Groups[3].Value);
var version = NormalizeComponent(match.Groups[4].Value);
var update = NormalizeComponent(match.Groups[5].Value);
var edition = NormalizeComponent(match.Groups[6].Value);
var language = NormalizeComponent(match.Groups[7].Value);
var swEdition = NormalizeComponent(match.Groups[8].Value);
var targetSw = NormalizeComponent(match.Groups[9].Value);
var targetHw = NormalizeComponent(match.Groups[10].Value);
var other = NormalizeComponent(match.Groups[11].Value);
return $"cpe:2.3:{part}:{vendor}:{product}:{version}:{update}:{edition}:{language}:{swEdition}:{targetSw}:{targetHw}:{other}";
}
private static string ConvertCpe22ToCpe23(Match match)
{
var part = match.Groups[1].Value.ToLowerInvariant();
var vendor = NormalizeComponent(match.Groups[2].Value);
var product = NormalizeComponent(match.Groups[3].Value);
var version = match.Groups[4].Success ? NormalizeComponent(match.Groups[4].Value) : "*";
var update = match.Groups[5].Success ? NormalizeComponent(match.Groups[5].Value) : "*";
var edition = match.Groups[6].Success ? NormalizeComponent(match.Groups[6].Value) : "*";
var language = match.Groups[7].Success ? NormalizeComponent(match.Groups[7].Value) : "*";
return $"cpe:2.3:{part}:{vendor}:{product}:{version}:{update}:{edition}:{language}:*:*:*:*";
}
private static string NormalizeComponent(string component)
{
if (string.IsNullOrWhiteSpace(component))
{
return "*";
}
var trimmed = component.Trim();
// Wildcards
if (trimmed is "*" or "-" or "ANY" or "NA")
{
return trimmed switch
{
"ANY" => "*",
"NA" => "-",
_ => trimmed
};
}
// Lowercase and handle escaping
return trimmed.ToLowerInvariant();
}
}

View File

@@ -0,0 +1,71 @@
// -----------------------------------------------------------------------------
// CveNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-003 (part of normalization helpers)
// Description: CVE identifier normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes CVE identifiers to canonical uppercase format.
/// </summary>
public sealed partial class CveNormalizer : ICveNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static CveNormalizer Instance { get; } = new();
/// <summary>
/// Pattern matching CVE identifier: CVE-YYYY-NNNNN (4+ digits after year).
/// </summary>
[GeneratedRegex(@"^CVE-(\d{4})-(\d{4,})$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CvePattern();
/// <inheritdoc />
public string Normalize(string? cve)
{
if (string.IsNullOrWhiteSpace(cve))
{
return string.Empty;
}
var trimmed = cve.Trim();
// Handle common prefixes
if (trimmed.StartsWith("cve-", StringComparison.OrdinalIgnoreCase))
{
trimmed = "CVE-" + trimmed[4..];
}
else if (!trimmed.StartsWith("CVE-", StringComparison.Ordinal))
{
// Try to extract CVE from the string
var match = CvePattern().Match(trimmed);
if (match.Success)
{
trimmed = match.Value;
}
else
{
// Assume it's just the number part: 2024-1234 -> CVE-2024-1234
if (Regex.IsMatch(trimmed, @"^\d{4}-\d{4,}$"))
{
trimmed = "CVE-" + trimmed;
}
}
}
// Validate and uppercase
var normalized = trimmed.ToUpperInvariant();
if (!CvePattern().IsMatch(normalized))
{
// Return as-is if not a valid CVE (will still be hashed consistently)
return normalized;
}
return normalized;
}
}

View File

@@ -0,0 +1,82 @@
// -----------------------------------------------------------------------------
// CweNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-006
// Description: CWE identifier list normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes CWE identifier lists for deterministic hashing.
/// </summary>
public sealed partial class CweNormalizer : ICweNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static CweNormalizer Instance { get; } = new();
/// <summary>
/// Pattern matching CWE identifier: CWE-NNN or just NNN.
/// </summary>
[GeneratedRegex(@"(?:CWE-)?(\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CwePattern();
/// <inheritdoc />
public string Normalize(IEnumerable<string>? cwes)
{
if (cwes is null)
{
return string.Empty;
}
var normalized = cwes
.Where(static cwe => !string.IsNullOrWhiteSpace(cwe))
.Select(NormalizeSingle)
.Where(static cwe => cwe is not null)
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(ExtractCweNumber)
.ThenBy(static cwe => cwe, StringComparer.OrdinalIgnoreCase)
.ToList();
if (normalized.Count == 0)
{
return string.Empty;
}
return string.Join(",", normalized);
}
private static string? NormalizeSingle(string cwe)
{
var trimmed = cwe.Trim();
var match = CwePattern().Match(trimmed);
if (!match.Success)
{
return null;
}
var number = match.Groups[1].Value;
return $"CWE-{number}";
}
private static int ExtractCweNumber(string? cwe)
{
if (string.IsNullOrWhiteSpace(cwe))
{
return int.MaxValue;
}
var match = CwePattern().Match(cwe);
if (match.Success && int.TryParse(match.Groups[1].Value, out var number))
{
return number;
}
return int.MaxValue;
}
}

View File

@@ -0,0 +1,95 @@
// -----------------------------------------------------------------------------
// INormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Tasks: MHASH-8200-003 to MHASH-8200-007
// Description: Normalizer interfaces for merge hash components
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes PURL identifiers to canonical form for deterministic hashing.
/// </summary>
public interface IPurlNormalizer
{
/// <summary>
/// Normalize PURL to canonical form.
/// - Lowercase package type
/// - URL-encode special characters in namespace
/// - Strip non-essential qualifiers (arch, type, checksum)
/// - Sort remaining qualifiers alphabetically
/// </summary>
string Normalize(string purl);
}
/// <summary>
/// Normalizes CPE identifiers to canonical CPE 2.3 format.
/// </summary>
public interface ICpeNormalizer
{
/// <summary>
/// Normalize CPE to canonical CPE 2.3 format.
/// - Convert CPE 2.2 URI format to CPE 2.3 formatted string
/// - Lowercase vendor and product
/// - Normalize wildcards
/// </summary>
string Normalize(string cpe);
}
/// <summary>
/// Normalizes version range expressions to canonical interval notation.
/// </summary>
public interface IVersionRangeNormalizer
{
/// <summary>
/// Normalize version range to canonical expression.
/// - Convert various formats to canonical interval notation
/// - Trim whitespace
/// - Normalize operators (e.g., "[1.0, 2.0)" → ">=1.0,&lt;2.0")
/// </summary>
string Normalize(string? range);
}
/// <summary>
/// Normalizes CWE identifier lists for deterministic hashing.
/// </summary>
public interface ICweNormalizer
{
/// <summary>
/// Normalize CWE list to sorted, deduplicated, uppercase set.
/// - Uppercase all identifiers
/// - Ensure "CWE-" prefix
/// - Sort numerically by CWE number
/// - Deduplicate
/// - Return comma-joined string
/// </summary>
string Normalize(IEnumerable<string>? cwes);
}
/// <summary>
/// Normalizes patch lineage references for deterministic hashing.
/// </summary>
public interface IPatchLineageNormalizer
{
/// <summary>
/// Normalize patch lineage to canonical commit reference.
/// - Extract commit SHAs from various formats
/// - Normalize to lowercase hex
/// - Handle patch IDs, bug tracker references
/// </summary>
string? Normalize(string? lineage);
}
/// <summary>
/// Normalizes CVE identifiers for deterministic hashing.
/// </summary>
public interface ICveNormalizer
{
/// <summary>
/// Normalize CVE identifier to canonical uppercase format.
/// - Ensure "CVE-" prefix
/// - Uppercase
/// - Validate format (CVE-YYYY-NNNNN+)
/// </summary>
string Normalize(string? cve);
}

View File

@@ -0,0 +1,119 @@
// -----------------------------------------------------------------------------
// PatchLineageNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-007
// Description: Patch lineage normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes patch lineage references for deterministic hashing.
/// Extracts upstream commit references from various formats.
/// </summary>
public sealed partial class PatchLineageNormalizer : IPatchLineageNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static PatchLineageNormalizer Instance { get; } = new();
/// <summary>
/// Pattern for full Git commit SHA (40 hex chars).
/// </summary>
[GeneratedRegex(@"\b([0-9a-f]{40})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex FullShaPattern();
/// <summary>
/// Pattern for abbreviated Git commit SHA (7-12 hex chars).
/// </summary>
[GeneratedRegex(@"\b([0-9a-f]{7,12})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex AbbrevShaPattern();
/// <summary>
/// Pattern for GitHub/GitLab commit URLs.
/// </summary>
[GeneratedRegex(
@"(?:github\.com|gitlab\.com)/[^/]+/[^/]+/commit/([0-9a-f]{7,40})",
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CommitUrlPattern();
/// <summary>
/// Pattern for patch IDs in format "patch-NNNNN" or "PATCH-NNNNN".
/// </summary>
[GeneratedRegex(@"\b(PATCH-\d+)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex PatchIdPattern();
/// <inheritdoc />
public string? Normalize(string? lineage)
{
if (string.IsNullOrWhiteSpace(lineage))
{
return null;
}
var trimmed = lineage.Trim();
// Try to extract commit SHA from URL first
var urlMatch = CommitUrlPattern().Match(trimmed);
if (urlMatch.Success)
{
return NormalizeSha(urlMatch.Groups[1].Value);
}
// Try full SHA
var fullMatch = FullShaPattern().Match(trimmed);
if (fullMatch.Success)
{
return NormalizeSha(fullMatch.Groups[1].Value);
}
// Try abbreviated SHA (only if it looks like a commit reference)
if (LooksLikeCommitReference(trimmed))
{
var abbrevMatch = AbbrevShaPattern().Match(trimmed);
if (abbrevMatch.Success)
{
return NormalizeSha(abbrevMatch.Groups[1].Value);
}
}
// Try patch ID
var patchMatch = PatchIdPattern().Match(trimmed);
if (patchMatch.Success)
{
return patchMatch.Groups[1].Value.ToUpperInvariant();
}
// Return null if no recognizable pattern
return null;
}
private static bool LooksLikeCommitReference(string value)
{
// Heuristic: if it contains "commit", "sha", "fix", "patch" it's likely a commit ref
var lower = value.ToLowerInvariant();
return lower.Contains("commit") ||
lower.Contains("sha") ||
lower.Contains("fix") ||
lower.Contains("patch") ||
lower.Contains("backport");
}
private static string NormalizeSha(string sha)
{
// Lowercase and ensure we have the full SHA or a consistent abbreviation
var normalized = sha.ToLowerInvariant();
// If it's a full SHA, return it
if (normalized.Length == 40)
{
return normalized;
}
// For abbreviated SHAs, return as-is (they'll still hash consistently)
return normalized;
}
}

View File

@@ -0,0 +1,178 @@
// -----------------------------------------------------------------------------
// PurlNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-003
// Description: PURL normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes PURL identifiers to canonical form for deterministic hashing.
/// </summary>
public sealed partial class PurlNormalizer : IPurlNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static PurlNormalizer Instance { get; } = new();
/// <summary>
/// Qualifiers to strip from PURL for identity hashing (architecture-specific, non-identity).
/// </summary>
private static readonly HashSet<string> StrippedQualifiers = new(StringComparer.OrdinalIgnoreCase)
{
"arch",
"architecture",
"os",
"platform",
"type",
"classifier",
"checksum",
"download_url",
"vcs_url",
"repository_url"
};
/// <summary>
/// Pattern for parsing PURL: pkg:type/namespace/name@version?qualifiers#subpath
/// </summary>
[GeneratedRegex(
@"^pkg:([a-zA-Z][a-zA-Z0-9+.-]*)(?:/([^/@#?]+))?/([^/@#?]+)(?:@([^?#]+))?(?:\?([^#]+))?(?:#(.+))?$",
RegexOptions.Compiled)]
private static partial Regex PurlPattern();
/// <inheritdoc />
public string Normalize(string purl)
{
if (string.IsNullOrWhiteSpace(purl))
{
return string.Empty;
}
var trimmed = purl.Trim();
// Handle non-PURL identifiers (CPE, plain package names)
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
// If it looks like a CPE, return as-is for CPE normalizer
if (trimmed.StartsWith("cpe:", StringComparison.OrdinalIgnoreCase))
{
return trimmed;
}
// Return lowercase for plain identifiers
return trimmed.ToLowerInvariant();
}
var match = PurlPattern().Match(trimmed);
if (!match.Success)
{
// Invalid PURL format, return lowercase
return trimmed.ToLowerInvariant();
}
var type = match.Groups[1].Value.ToLowerInvariant();
var ns = match.Groups[2].Success ? NormalizeNamespace(match.Groups[2].Value, type) : null;
var name = NormalizeName(match.Groups[3].Value, type);
var version = match.Groups[4].Success ? match.Groups[4].Value : null;
var qualifiers = match.Groups[5].Success ? NormalizeQualifiers(match.Groups[5].Value) : null;
// Subpath is stripped for identity purposes
return BuildPurl(type, ns, name, version, qualifiers);
}
private static string NormalizeNamespace(string ns, string type)
{
// URL-decode then re-encode consistently
var decoded = HttpUtility.UrlDecode(ns);
// For npm, handle scoped packages (@org/pkg)
if (type == "npm" && decoded.StartsWith("@"))
{
decoded = decoded.ToLowerInvariant();
return HttpUtility.UrlEncode(decoded)?.Replace("%40", "%40") ?? decoded;
}
// Most ecosystems: lowercase namespace
return decoded.ToLowerInvariant();
}
private static string NormalizeName(string name, string type)
{
var decoded = HttpUtility.UrlDecode(name);
// Most ecosystems use lowercase names
return type switch
{
"golang" => decoded, // Go uses mixed case
"nuget" => decoded.ToLowerInvariant(), // NuGet is case-insensitive
_ => decoded.ToLowerInvariant()
};
}
private static string? NormalizeQualifiers(string qualifiers)
{
if (string.IsNullOrWhiteSpace(qualifiers))
{
return null;
}
var pairs = qualifiers
.Split('&', StringSplitOptions.RemoveEmptyEntries)
.Select(static pair =>
{
var eqIndex = pair.IndexOf('=');
if (eqIndex < 0)
{
return (Key: pair.ToLowerInvariant(), Value: (string?)null);
}
return (Key: pair[..eqIndex].ToLowerInvariant(), Value: pair[(eqIndex + 1)..]);
})
.Where(pair => !StrippedQualifiers.Contains(pair.Key))
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToList();
if (pairs.Count == 0)
{
return null;
}
return string.Join("&", pairs.Select(static p =>
p.Value is null ? p.Key : $"{p.Key}={p.Value}"));
}
private static string BuildPurl(string type, string? ns, string name, string? version, string? qualifiers)
{
var sb = new StringBuilder("pkg:");
sb.Append(type);
sb.Append('/');
if (!string.IsNullOrEmpty(ns))
{
sb.Append(ns);
sb.Append('/');
}
sb.Append(name);
if (!string.IsNullOrEmpty(version))
{
sb.Append('@');
sb.Append(version);
}
if (!string.IsNullOrEmpty(qualifiers))
{
sb.Append('?');
sb.Append(qualifiers);
}
return sb.ToString();
}
}

View File

@@ -0,0 +1,165 @@
// -----------------------------------------------------------------------------
// VersionRangeNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-005
// Description: Version range normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes version range expressions to canonical interval notation.
/// </summary>
public sealed partial class VersionRangeNormalizer : IVersionRangeNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static VersionRangeNormalizer Instance { get; } = new();
/// <summary>
/// Pattern for mathematical interval notation: [1.0, 2.0) or (1.0, 2.0]
/// </summary>
[GeneratedRegex(
@"^([\[\(])\s*([^,\s]*)\s*,\s*([^)\]\s]*)\s*([\]\)])$",
RegexOptions.Compiled)]
private static partial Regex IntervalPattern();
/// <summary>
/// Pattern for comparison operators: >= 1.0, < 2.0
/// </summary>
[GeneratedRegex(
@"^(>=?|<=?|=|!=|~=|~>|\^)\s*(.+)$",
RegexOptions.Compiled)]
private static partial Regex ComparisonPattern();
/// <inheritdoc />
public string Normalize(string? range)
{
if (string.IsNullOrWhiteSpace(range))
{
return string.Empty;
}
var trimmed = range.Trim();
// Handle "all versions" markers
if (trimmed is "*" or "all" or "any")
{
return "*";
}
// Try interval notation: [1.0, 2.0)
var intervalMatch = IntervalPattern().Match(trimmed);
if (intervalMatch.Success)
{
return NormalizeInterval(intervalMatch);
}
// Try comparison operators: >= 1.0
var compMatch = ComparisonPattern().Match(trimmed);
if (compMatch.Success)
{
return NormalizeComparison(compMatch);
}
// Handle comma-separated constraints: >=1.0, <2.0
if (trimmed.Contains(','))
{
return NormalizeMultiConstraint(trimmed);
}
// Handle "fixed" version notation
if (trimmed.StartsWith("fixed:", StringComparison.OrdinalIgnoreCase))
{
var fixedVersion = trimmed[6..].Trim();
return $">={fixedVersion}";
}
// Handle plain version (treat as exact match)
if (Regex.IsMatch(trimmed, @"^[\d.]+"))
{
return $"={trimmed}";
}
// Return trimmed if unrecognized
return trimmed;
}
private static string NormalizeInterval(Match match)
{
var leftBracket = match.Groups[1].Value;
var lower = match.Groups[2].Value.Trim();
var upper = match.Groups[3].Value.Trim();
var rightBracket = match.Groups[4].Value;
var parts = new List<string>();
if (!string.IsNullOrEmpty(lower))
{
var op = leftBracket == "[" ? ">=" : ">";
parts.Add($"{op}{lower}");
}
if (!string.IsNullOrEmpty(upper))
{
var op = rightBracket == "]" ? "<=" : "<";
parts.Add($"{op}{upper}");
}
return string.Join(",", parts);
}
private static string NormalizeComparison(Match match)
{
var op = NormalizeOperator(match.Groups[1].Value);
var version = match.Groups[2].Value.Trim();
return $"{op}{version}";
}
private static string NormalizeMultiConstraint(string range)
{
var constraints = range
.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(static c => c.Trim())
.Where(static c => !string.IsNullOrEmpty(c))
.Select(NormalizeSingleConstraint)
.OrderBy(static c => c, StringComparer.Ordinal)
.Distinct()
.ToList();
return string.Join(",", constraints);
}
private static string NormalizeSingleConstraint(string constraint)
{
var match = ComparisonPattern().Match(constraint);
if (match.Success)
{
var op = NormalizeOperator(match.Groups[1].Value);
var version = match.Groups[2].Value.Trim();
return $"{op}{version}";
}
return constraint;
}
private static string NormalizeOperator(string op)
{
return op switch
{
"~=" or "~>" => "~=",
"^" => "^",
">=" => ">=",
">" => ">",
"<=" => "<=",
"<" => "<",
"=" => "=",
"!=" => "!=",
_ => op
};
}
}

View File

@@ -0,0 +1,68 @@
// -----------------------------------------------------------------------------
// MergeHashBackfillJob.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-020
// Description: Job to backfill merge hashes for existing advisories
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Merge.Identity;
namespace StellaOps.Concelier.Merge.Jobs;
/// <summary>
/// Job to backfill merge hashes for existing advisories during migration.
/// Can target all advisories or a specific advisory key.
/// </summary>
public sealed class MergeHashBackfillJob : IJob
{
private readonly MergeHashShadowWriteService _shadowWriteService;
private readonly ILogger<MergeHashBackfillJob> _logger;
public MergeHashBackfillJob(
MergeHashShadowWriteService shadowWriteService,
ILogger<MergeHashBackfillJob> logger)
{
_shadowWriteService = shadowWriteService ?? throw new ArgumentNullException(nameof(shadowWriteService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Executes the backfill job.
/// </summary>
/// <remarks>
/// Parameters:
/// - "seed" (optional): Specific advisory key to backfill. If empty, backfills all.
/// - "force" (optional): If "true", recomputes hash even for advisories that have one.
/// </remarks>
public async Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
{
var hasSeed = context.Parameters.TryGetValue("seed", out var seedValue);
var seed = seedValue as string;
var force = context.Parameters.TryGetValue("force", out var forceValue)
&& forceValue is string forceStr
&& string.Equals(forceStr, "true", StringComparison.OrdinalIgnoreCase);
if (hasSeed && !string.IsNullOrWhiteSpace(seed))
{
_logger.LogInformation("Starting merge hash backfill for single advisory: {AdvisoryKey}, force={Force}", seed, force);
var updated = await _shadowWriteService.BackfillOneAsync(seed, force, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Merge hash backfill for {AdvisoryKey} complete: updated={Updated}",
seed,
updated);
}
else
{
_logger.LogInformation("Starting merge hash backfill for all advisories");
var result = await _shadowWriteService.BackfillAllAsync(cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Merge hash backfill complete: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
result.Processed,
result.Updated,
result.Skipped,
result.Failed);
}
}
}

View File

@@ -3,4 +3,5 @@ namespace StellaOps.Concelier.Merge.Jobs;
internal static class MergeJobKinds
{
public const string Reconcile = "merge:reconcile";
public const string HashBackfill = "merge:hash-backfill";
}

View File

@@ -8,6 +8,7 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Core;
using StellaOps.Concelier.Core.Events;
using StellaOps.Concelier.Merge.Identity;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Advisories;
using StellaOps.Concelier.Storage.Aliases;
@@ -41,6 +42,7 @@ public sealed class AdvisoryMergeService
private readonly IAdvisoryEventLog _eventLog;
private readonly TimeProvider _timeProvider;
private readonly CanonicalMerger _canonicalMerger;
private readonly IMergeHashCalculator? _mergeHashCalculator;
private readonly ILogger<AdvisoryMergeService> _logger;
public AdvisoryMergeService(
@@ -51,7 +53,8 @@ public sealed class AdvisoryMergeService
CanonicalMerger canonicalMerger,
IAdvisoryEventLog eventLog,
TimeProvider timeProvider,
ILogger<AdvisoryMergeService> logger)
ILogger<AdvisoryMergeService> logger,
IMergeHashCalculator? mergeHashCalculator = null)
{
_aliasResolver = aliasResolver ?? throw new ArgumentNullException(nameof(aliasResolver));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
@@ -61,6 +64,7 @@ public sealed class AdvisoryMergeService
_eventLog = eventLog ?? throw new ArgumentNullException(nameof(eventLog));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_mergeHashCalculator = mergeHashCalculator; // Optional during migration
}
public async Task<AdvisoryMergeResult> MergeAsync(string seedAdvisoryKey, CancellationToken cancellationToken)
@@ -102,7 +106,7 @@ public sealed class AdvisoryMergeService
throw;
}
var merged = precedenceResult.Advisory;
var merged = EnrichWithMergeHash(precedenceResult.Advisory);
var conflictDetails = precedenceResult.Conflicts;
if (component.Collisions.Count > 0)
@@ -309,7 +313,48 @@ public sealed class AdvisoryMergeService
source.Provenance,
source.Description,
source.Cwes,
source.CanonicalMetricId);
source.CanonicalMetricId,
source.MergeHash);
/// <summary>
/// Enriches an advisory with its computed merge hash if calculator is available.
/// </summary>
private Advisory EnrichWithMergeHash(Advisory advisory)
{
if (_mergeHashCalculator is null)
{
return advisory;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
return new Advisory(
advisory.AdvisoryKey,
advisory.Title,
advisory.Summary,
advisory.Language,
advisory.Published,
advisory.Modified,
advisory.Severity,
advisory.ExploitKnown,
advisory.Aliases,
advisory.Credits,
advisory.References,
advisory.AffectedPackages,
advisory.CvssMetrics,
advisory.Provenance,
advisory.Description,
advisory.Cwes,
advisory.CanonicalMetricId,
mergeHash);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to compute merge hash for {AdvisoryKey}, continuing without hash", advisory.AdvisoryKey);
return advisory;
}
}
private CanonicalMergeResult? ApplyCanonicalMergeIfNeeded(string canonicalKey, List<Advisory> inputs)
{

View File

@@ -0,0 +1,172 @@
// -----------------------------------------------------------------------------
// MergeHashBackfillService.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-020
// Description: Shadow-write mode for computing merge_hash on existing advisories
// -----------------------------------------------------------------------------
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Merge.Identity;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Advisories;
namespace StellaOps.Concelier.Merge.Services;
/// <summary>
/// Service for backfilling merge hashes on existing advisories without changing their identity.
/// Runs in shadow-write mode: computes merge_hash and updates only that field.
/// </summary>
public sealed class MergeHashBackfillService
{
private readonly IAdvisoryStore _advisoryStore;
private readonly IMergeHashCalculator _mergeHashCalculator;
private readonly ILogger<MergeHashBackfillService> _logger;
public MergeHashBackfillService(
IAdvisoryStore advisoryStore,
IMergeHashCalculator mergeHashCalculator,
ILogger<MergeHashBackfillService> logger)
{
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_mergeHashCalculator = mergeHashCalculator ?? throw new ArgumentNullException(nameof(mergeHashCalculator));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Backfills merge hashes for all advisories that don't have one.
/// </summary>
/// <param name="batchSize">Number of advisories to process before yielding progress.</param>
/// <param name="dryRun">If true, computes hashes but doesn't persist them.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Backfill result with statistics.</returns>
public async Task<MergeHashBackfillResult> BackfillAsync(
int batchSize = 100,
bool dryRun = false,
CancellationToken cancellationToken = default)
{
var stopwatch = Stopwatch.StartNew();
var processed = 0;
var updated = 0;
var skipped = 0;
var errors = 0;
_logger.LogInformation(
"Starting merge hash backfill (dryRun={DryRun}, batchSize={BatchSize})",
dryRun, batchSize);
await foreach (var advisory in _advisoryStore.StreamAsync(cancellationToken))
{
cancellationToken.ThrowIfCancellationRequested();
processed++;
// Skip if already has merge hash
if (!string.IsNullOrEmpty(advisory.MergeHash))
{
skipped++;
continue;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
if (!dryRun)
{
var enrichedAdvisory = CreateAdvisoryWithMergeHash(advisory, mergeHash);
await _advisoryStore.UpsertAsync(enrichedAdvisory, cancellationToken).ConfigureAwait(false);
}
updated++;
if (updated % batchSize == 0)
{
_logger.LogInformation(
"Backfill progress: {Updated} updated, {Skipped} skipped, {Errors} errors (of {Processed} processed)",
updated, skipped, errors, processed);
}
}
catch (Exception ex)
{
errors++;
_logger.LogWarning(
ex,
"Failed to compute/update merge hash for {AdvisoryKey}",
advisory.AdvisoryKey);
}
}
stopwatch.Stop();
var result = new MergeHashBackfillResult(
TotalProcessed: processed,
Updated: updated,
Skipped: skipped,
Errors: errors,
DryRun: dryRun,
Duration: stopwatch.Elapsed);
_logger.LogInformation(
"Merge hash backfill completed: {Updated} updated, {Skipped} skipped, {Errors} errors (of {Processed} processed) in {Duration}",
result.Updated, result.Skipped, result.Errors, result.TotalProcessed, result.Duration);
return result;
}
/// <summary>
/// Computes merge hash for a single advisory without persisting.
/// Useful for testing or preview mode.
/// </summary>
public string ComputeMergeHash(Advisory advisory)
{
ArgumentNullException.ThrowIfNull(advisory);
return _mergeHashCalculator.ComputeMergeHash(advisory);
}
private static Advisory CreateAdvisoryWithMergeHash(Advisory source, string mergeHash)
=> new(
source.AdvisoryKey,
source.Title,
source.Summary,
source.Language,
source.Published,
source.Modified,
source.Severity,
source.ExploitKnown,
source.Aliases,
source.Credits,
source.References,
source.AffectedPackages,
source.CvssMetrics,
source.Provenance,
source.Description,
source.Cwes,
source.CanonicalMetricId,
mergeHash);
}
/// <summary>
/// Result of a merge hash backfill operation.
/// </summary>
public sealed record MergeHashBackfillResult(
int TotalProcessed,
int Updated,
int Skipped,
int Errors,
bool DryRun,
TimeSpan Duration)
{
/// <summary>
/// Percentage of advisories that were successfully updated.
/// </summary>
public double SuccessRate => TotalProcessed > 0
? (double)(Updated + Skipped) / TotalProcessed * 100
: 100;
/// <summary>
/// Average time per advisory in milliseconds.
/// </summary>
public double AvgTimePerAdvisoryMs => TotalProcessed > 0
? Duration.TotalMilliseconds / TotalProcessed
: 0;
}