sprints enhancements

This commit is contained in:
StellaOps Bot
2025-12-25 19:52:30 +02:00
parent ef6ac36323
commit b8b2d83f4a
138 changed files with 25133 additions and 594 deletions

View File

@@ -16,6 +16,7 @@ using StellaOps.Concelier.Storage.Advisories;
using StellaOps.Concelier.Storage;
using StellaOps.Concelier.Storage;
using StellaOps.Concelier.Storage.ChangeHistory;
using StellaOps.Concelier.Core.Canonical;
using StellaOps.Plugin;
using Json.Schema;
using StellaOps.Cryptography;
@@ -37,6 +38,7 @@ public sealed class NvdConnector : IFeedConnector
private readonly ILogger<NvdConnector> _logger;
private readonly NvdDiagnostics _diagnostics;
private readonly ICryptoHash _hash;
private readonly ICanonicalAdvisoryService? _canonicalService;
private static readonly JsonSchema Schema = NvdSchemaProvider.Schema;
@@ -53,7 +55,8 @@ public sealed class NvdConnector : IFeedConnector
NvdDiagnostics diagnostics,
ICryptoHash hash,
TimeProvider? timeProvider,
ILogger<NvdConnector> logger)
ILogger<NvdConnector> logger,
ICanonicalAdvisoryService? canonicalService = null)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
@@ -69,6 +72,7 @@ public sealed class NvdConnector : IFeedConnector
_hash = hash ?? throw new ArgumentNullException(nameof(hash));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_canonicalService = canonicalService; // Optional - canonical ingest
}
public string SourceName => NvdConnectorPlugin.SourceName;
@@ -292,6 +296,13 @@ public sealed class NvdConnector : IFeedConnector
{
await RecordChangeHistoryAsync(advisory, previous, document, now, cancellationToken).ConfigureAwait(false);
}
// Ingest to canonical advisory service if available
if (_canonicalService is not null)
{
await IngestToCanonicalAsync(advisory, json, document.FetchedAt, cancellationToken).ConfigureAwait(false);
}
mappedCount++;
}
@@ -565,4 +576,88 @@ public sealed class NvdConnector : IFeedConnector
builder.Query = string.Join("&", parameters.Select(static kvp => $"{System.Net.WebUtility.UrlEncode(kvp.Key)}={System.Net.WebUtility.UrlEncode(kvp.Value)}"));
return builder.Uri;
}
/// <summary>
/// Ingests NVD advisory to canonical advisory service for deduplication.
/// Creates one RawAdvisory per affected package.
/// </summary>
private async Task IngestToCanonicalAsync(
Advisory advisory,
string rawPayloadJson,
DateTimeOffset fetchedAt,
CancellationToken cancellationToken)
{
if (_canonicalService is null || advisory.AffectedPackages.IsEmpty)
{
return;
}
// NVD advisories are keyed by CVE ID
var cve = advisory.AdvisoryKey;
// Extract CWE weaknesses
var weaknesses = advisory.Cwes
.Where(w => w.Identifier.StartsWith("CWE-", StringComparison.OrdinalIgnoreCase))
.Select(w => w.Identifier)
.ToList();
// Create one RawAdvisory per affected package (CPE)
foreach (var affected in advisory.AffectedPackages)
{
if (string.IsNullOrWhiteSpace(affected.Identifier))
{
continue;
}
// Build version range JSON
string? versionRangeJson = null;
if (!affected.VersionRanges.IsEmpty)
{
var firstRange = affected.VersionRanges[0];
var rangeObj = new
{
introduced = firstRange.IntroducedVersion,
@fixed = firstRange.FixedVersion,
last_affected = firstRange.LastAffectedVersion
};
versionRangeJson = JsonSerializer.Serialize(rangeObj);
}
var rawAdvisory = new RawAdvisory
{
SourceAdvisoryId = cve,
Cve = cve,
AffectsKey = affected.Identifier,
VersionRangeJson = versionRangeJson,
Weaknesses = weaknesses,
PatchLineage = null,
Severity = advisory.Severity,
Title = advisory.Title,
Summary = advisory.Summary,
VendorStatus = VendorStatus.Affected,
RawPayloadJson = rawPayloadJson,
FetchedAt = fetchedAt
};
try
{
var result = await _canonicalService.IngestAsync(SourceName, rawAdvisory, cancellationToken).ConfigureAwait(false);
if (_logger.IsEnabled(LogLevel.Debug))
{
_logger.LogDebug(
"Canonical ingest for {CveId}/{AffectsKey}: {Decision} (canonical={CanonicalId})",
cve, affected.Identifier, result.Decision, result.CanonicalId);
}
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Failed to ingest {CveId}/{AffectsKey} to canonical service",
cve, affected.Identifier);
// Don't fail the mapping operation for canonical ingest failures
}
}
}
}

View File

@@ -20,8 +20,7 @@ using StellaOps.Concelier.Connector.Osv.Configuration;
using StellaOps.Concelier.Connector.Osv.Internal;
using StellaOps.Concelier.Storage;
using StellaOps.Concelier.Storage.Advisories;
using StellaOps.Concelier.Storage;
using StellaOps.Concelier.Storage;
using StellaOps.Concelier.Core.Canonical;
using StellaOps.Plugin;
using StellaOps.Cryptography;
@@ -41,6 +40,7 @@ public sealed class OsvConnector : IFeedConnector
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly ICanonicalAdvisoryService? _canonicalService;
private readonly OsvOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<OsvConnector> _logger;
@@ -58,7 +58,8 @@ public sealed class OsvConnector : IFeedConnector
OsvDiagnostics diagnostics,
ICryptoHash hash,
TimeProvider? timeProvider,
ILogger<OsvConnector> logger)
ILogger<OsvConnector> logger,
ICanonicalAdvisoryService? canonicalService = null)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
@@ -66,6 +67,7 @@ public sealed class OsvConnector : IFeedConnector
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_canonicalService = canonicalService; // Optional - canonical ingest
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_hash = hash ?? throw new ArgumentNullException(nameof(hash));
@@ -287,6 +289,12 @@ public sealed class OsvConnector : IFeedConnector
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
// Ingest to canonical advisory service if available
if (_canonicalService is not null)
{
await IngestToCanonicalAsync(osvDto, advisory, payloadJson, document.FetchedAt, cancellationToken).ConfigureAwait(false);
}
pendingMappings.Remove(documentId);
}
@@ -518,4 +526,91 @@ public sealed class OsvConnector : IFeedConnector
var safeId = vulnerabilityId.Replace(' ', '-');
return $"https://osv-vulnerabilities.storage.googleapis.com/{ecosystem}/{safeId}.json";
}
/// <summary>
/// Ingests OSV advisory to canonical advisory service for deduplication.
/// Creates one RawAdvisory per affected package.
/// </summary>
private async Task IngestToCanonicalAsync(
OsvVulnerabilityDto dto,
Advisory advisory,
string rawPayloadJson,
DateTimeOffset fetchedAt,
CancellationToken cancellationToken)
{
if (_canonicalService is null || dto.Affected is null || dto.Affected.Count == 0)
{
return;
}
// Find primary CVE from aliases
var cve = advisory.Aliases
.FirstOrDefault(a => a.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
?? dto.Id; // Fall back to OSV ID if no CVE
// Extract CWE weaknesses
var weaknesses = advisory.Cwes
.Where(w => w.Identifier.StartsWith("CWE-", StringComparison.OrdinalIgnoreCase))
.Select(w => w.Identifier)
.ToList();
// Create one RawAdvisory per affected package
foreach (var affected in advisory.AffectedPackages)
{
if (string.IsNullOrWhiteSpace(affected.Identifier))
{
continue;
}
// Build version range JSON
string? versionRangeJson = null;
if (affected.VersionRanges.Length > 0)
{
var firstRange = affected.VersionRanges[0];
var rangeObj = new
{
introduced = firstRange.IntroducedVersion,
@fixed = firstRange.FixedVersion,
last_affected = firstRange.LastAffectedVersion
};
versionRangeJson = JsonSerializer.Serialize(rangeObj, SerializerOptions);
}
var rawAdvisory = new RawAdvisory
{
SourceAdvisoryId = dto.Id,
Cve = cve,
AffectsKey = affected.Identifier,
VersionRangeJson = versionRangeJson,
Weaknesses = weaknesses,
PatchLineage = null, // OSV doesn't have patch lineage
Severity = advisory.Severity,
Title = advisory.Title,
Summary = advisory.Summary,
VendorStatus = VendorStatus.Affected,
RawPayloadJson = rawPayloadJson,
FetchedAt = fetchedAt
};
try
{
var result = await _canonicalService.IngestAsync(SourceName, rawAdvisory, cancellationToken).ConfigureAwait(false);
if (_logger.IsEnabled(LogLevel.Debug))
{
_logger.LogDebug(
"Canonical ingest for {OsvId}/{AffectsKey}: {Decision} (canonical={CanonicalId})",
dto.Id, affected.Identifier, result.Decision, result.CanonicalId);
}
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Failed to ingest {OsvId}/{AffectsKey} to canonical service",
dto.Id, affected.Identifier);
// Don't fail the mapping operation for canonical ingest failures
}
}
}
}

View File

@@ -1,7 +1,66 @@
# AGENTS
## Role
---
## Canonical Advisory Service
### Role
Deduplicated canonical advisory management with provenance-scoped source edges. Ingests raw advisories from multiple sources (NVD, GHSA, OSV, vendor, distro), computes merge hashes for deduplication, and maintains canonical records with linked source edges.
### Scope
- **Ingestion**: `IngestAsync` and `IngestBatchAsync` - Raw advisory to canonical pipeline with merge hash computation, duplicate detection, and source edge creation.
- **Query**: `GetByIdAsync`, `GetByCveAsync`, `GetByArtifactAsync`, `GetByMergeHashAsync`, `QueryAsync` - Lookup canonical advisories with source edges.
- **Status**: `UpdateStatusAsync`, `DegradeToStubsAsync` - Lifecycle management (Active, Stub, Withdrawn).
- **Caching**: `CachingCanonicalAdvisoryService` decorator with configurable TTLs for hot queries.
- **Signing**: Optional DSSE signing of source edges via `ISourceEdgeSigner` integration.
### Interfaces & Contracts
- **ICanonicalAdvisoryService**: Main service interface for ingest and query operations.
- **ICanonicalAdvisoryStore**: Storage abstraction for canonical/source edge persistence.
- **IMergeHashCalculator**: Merge hash computation (CVE + PURL + version range + CWE + patch lineage).
- **ISourceEdgeSigner**: Optional DSSE envelope signing for source edges.
### Domain Models
- **CanonicalAdvisory**: Deduplicated advisory record with merge hash, status, severity, EPSS, weaknesses.
- **SourceEdge**: Link from source advisory to canonical with precedence rank, doc hash, DSSE envelope.
- **IngestResult**: Outcome with MergeDecision (Created, Merged, Duplicate, Conflict).
- **RawAdvisory**: Input from connectors with CVE, affects key, version range, weaknesses.
### Source Precedence
Lower rank = higher priority for metadata updates:
- `vendor` = 10 (authoritative)
- `redhat/debian/suse/ubuntu/alpine` = 20 (distro)
- `osv` = 30
- `ghsa` = 35
- `nvd` = 40 (fallback)
### API Endpoints
- `GET /api/v1/canonical/{id}` - Get by ID
- `GET /api/v1/canonical?cve={cve}&artifact={purl}&mergeHash={hash}` - Query
- `POST /api/v1/canonical/ingest/{source}` - Ingest single advisory
- `POST /api/v1/canonical/ingest/{source}/batch` - Batch ingest
- `PATCH /api/v1/canonical/{id}/status` - Update status
### In/Out of Scope
**In**: Merge hash computation, canonical upsert, source edge linking, duplicate detection, caching, DSSE signing.
**Out**: Raw advisory fetching (connectors), database schema (Storage.Postgres), HTTP routing (WebService).
### Observability
- Logs: canonical ID, merge hash, decision, source, precedence rank, signing status.
- Cache: hit/miss tracing at Trace level.
### Tests
- Unit tests in `Core.Tests/Canonical/` covering ingest pipeline, caching, signing.
- Integration tests in `WebService.Tests/Canonical/` for API endpoints.
---
## Job Orchestration
### Role
Job orchestration and lifecycle. Registers job definitions, schedules execution, triggers runs, reports status for connectors and exporters.
## Scope
### Scope
- Contracts: IJob (execute with CancellationToken), JobRunStatus, JobTriggerOutcome/Result.
- Registration: JobSchedulerBuilder.AddJob<T>(kind, cronExpression?, timeout?, leaseDuration?); options recorded in JobSchedulerOptions.
- Plugin host integration discovers IJob providers via registered IDependencyInjectionRoutine implementations.

View File

@@ -0,0 +1,264 @@
// -----------------------------------------------------------------------------
// CachingCanonicalAdvisoryService.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-014
// Description: Caching decorator for canonical advisory service
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Caching.Memory;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Caching decorator for canonical advisory service.
/// Caches hot queries (by ID, merge hash, CVE) with short TTL.
/// </summary>
public sealed class CachingCanonicalAdvisoryService : ICanonicalAdvisoryService
{
private readonly ICanonicalAdvisoryService _inner;
private readonly IMemoryCache _cache;
private readonly ILogger<CachingCanonicalAdvisoryService> _logger;
private readonly CanonicalCacheOptions _options;
private const string CacheKeyPrefix = "canonical:";
public CachingCanonicalAdvisoryService(
ICanonicalAdvisoryService inner,
IMemoryCache cache,
IOptions<CanonicalCacheOptions> options,
ILogger<CachingCanonicalAdvisoryService> logger)
{
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? new CanonicalCacheOptions();
}
#region Ingest Operations (Pass-through with cache invalidation)
public async Task<IngestResult> IngestAsync(
string source,
RawAdvisory rawAdvisory,
CancellationToken ct = default)
{
var result = await _inner.IngestAsync(source, rawAdvisory, ct).ConfigureAwait(false);
// Invalidate cache for affected entries
if (result.Decision != MergeDecision.Duplicate)
{
InvalidateCacheForCanonical(result.CanonicalId, result.MergeHash, rawAdvisory.Cve);
}
return result;
}
public async Task<IReadOnlyList<IngestResult>> IngestBatchAsync(
string source,
IEnumerable<RawAdvisory> advisories,
CancellationToken ct = default)
{
var results = await _inner.IngestBatchAsync(source, advisories, ct).ConfigureAwait(false);
// Invalidate cache for all affected entries
foreach (var result in results.Where(r => r.Decision != MergeDecision.Duplicate))
{
InvalidateCacheForCanonical(result.CanonicalId, result.MergeHash, null);
}
return results;
}
#endregion
#region Query Operations (Cached)
public async Task<CanonicalAdvisory?> GetByIdAsync(Guid id, CancellationToken ct = default)
{
var cacheKey = $"{CacheKeyPrefix}id:{id}";
if (_cache.TryGetValue(cacheKey, out CanonicalAdvisory? cached))
{
_logger.LogTrace("Cache hit for canonical {CanonicalId}", id);
return cached;
}
var result = await _inner.GetByIdAsync(id, ct).ConfigureAwait(false);
if (result is not null)
{
SetCache(cacheKey, result, _options.DefaultTtl);
// Also cache by merge hash for cross-lookup
SetCache($"{CacheKeyPrefix}hash:{result.MergeHash}", result, _options.DefaultTtl);
}
return result;
}
public async Task<CanonicalAdvisory?> GetByMergeHashAsync(string mergeHash, CancellationToken ct = default)
{
var cacheKey = $"{CacheKeyPrefix}hash:{mergeHash}";
if (_cache.TryGetValue(cacheKey, out CanonicalAdvisory? cached))
{
_logger.LogTrace("Cache hit for merge hash {MergeHash}", mergeHash);
return cached;
}
var result = await _inner.GetByMergeHashAsync(mergeHash, ct).ConfigureAwait(false);
if (result is not null)
{
SetCache(cacheKey, result, _options.DefaultTtl);
// Also cache by ID for cross-lookup
SetCache($"{CacheKeyPrefix}id:{result.Id}", result, _options.DefaultTtl);
}
return result;
}
public async Task<IReadOnlyList<CanonicalAdvisory>> GetByCveAsync(string cve, CancellationToken ct = default)
{
var cacheKey = $"{CacheKeyPrefix}cve:{cve.ToUpperInvariant()}";
if (_cache.TryGetValue(cacheKey, out IReadOnlyList<CanonicalAdvisory>? cached) && cached is not null)
{
_logger.LogTrace("Cache hit for CVE {Cve} ({Count} items)", cve, cached.Count);
return cached;
}
var result = await _inner.GetByCveAsync(cve, ct).ConfigureAwait(false);
if (result.Count > 0)
{
SetCache(cacheKey, result, _options.CveTtl);
// Also cache individual items
foreach (var item in result)
{
SetCache($"{CacheKeyPrefix}id:{item.Id}", item, _options.DefaultTtl);
SetCache($"{CacheKeyPrefix}hash:{item.MergeHash}", item, _options.DefaultTtl);
}
}
return result;
}
public async Task<IReadOnlyList<CanonicalAdvisory>> GetByArtifactAsync(
string artifactKey,
CancellationToken ct = default)
{
var cacheKey = $"{CacheKeyPrefix}artifact:{artifactKey.ToLowerInvariant()}";
if (_cache.TryGetValue(cacheKey, out IReadOnlyList<CanonicalAdvisory>? cached) && cached is not null)
{
_logger.LogTrace("Cache hit for artifact {ArtifactKey} ({Count} items)", artifactKey, cached.Count);
return cached;
}
var result = await _inner.GetByArtifactAsync(artifactKey, ct).ConfigureAwait(false);
if (result.Count > 0)
{
SetCache(cacheKey, result, _options.ArtifactTtl);
}
return result;
}
public Task<PagedResult<CanonicalAdvisory>> QueryAsync(
CanonicalQueryOptions options,
CancellationToken ct = default)
{
// Don't cache complex queries - pass through
return _inner.QueryAsync(options, ct);
}
#endregion
#region Status Operations (Pass-through with cache invalidation)
public async Task UpdateStatusAsync(Guid id, CanonicalStatus status, CancellationToken ct = default)
{
await _inner.UpdateStatusAsync(id, status, ct).ConfigureAwait(false);
// Invalidate cache for this canonical
InvalidateCacheById(id);
}
public Task<int> DegradeToStubsAsync(double scoreThreshold, CancellationToken ct = default)
{
// This may affect many entries - don't try to invalidate individually
// The cache will naturally expire
return _inner.DegradeToStubsAsync(scoreThreshold, ct);
}
#endregion
#region Private Helpers
private void SetCache<T>(string key, T value, TimeSpan ttl) where T : class
{
if (ttl <= TimeSpan.Zero || !_options.Enabled)
{
return;
}
var options = new MemoryCacheEntryOptions
{
AbsoluteExpirationRelativeToNow = ttl,
Size = 1 // For size-limited caches
};
_cache.Set(key, value, options);
}
private void InvalidateCacheForCanonical(Guid id, string? mergeHash, string? cve)
{
InvalidateCacheById(id);
if (!string.IsNullOrEmpty(mergeHash))
{
_cache.Remove($"{CacheKeyPrefix}hash:{mergeHash}");
}
if (!string.IsNullOrEmpty(cve))
{
_cache.Remove($"{CacheKeyPrefix}cve:{cve.ToUpperInvariant()}");
}
}
private void InvalidateCacheById(Guid id)
{
_cache.Remove($"{CacheKeyPrefix}id:{id}");
}
#endregion
}
/// <summary>
/// Configuration options for canonical advisory caching.
/// </summary>
public sealed class CanonicalCacheOptions
{
/// <summary>
/// Whether caching is enabled. Default: true.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// Default TTL for individual canonical lookups. Default: 5 minutes.
/// </summary>
public TimeSpan DefaultTtl { get; set; } = TimeSpan.FromMinutes(5);
/// <summary>
/// TTL for CVE-based queries. Default: 2 minutes.
/// </summary>
public TimeSpan CveTtl { get; set; } = TimeSpan.FromMinutes(2);
/// <summary>
/// TTL for artifact-based queries. Default: 2 minutes.
/// </summary>
public TimeSpan ArtifactTtl { get; set; } = TimeSpan.FromMinutes(2);
}

View File

@@ -0,0 +1,95 @@
// -----------------------------------------------------------------------------
// CanonicalAdvisory.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-001
// Description: Domain model for canonical advisory with source edges
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Canonical advisory with all source edges.
/// </summary>
public sealed record CanonicalAdvisory
{
/// <summary>Unique canonical advisory identifier.</summary>
public Guid Id { get; init; }
/// <summary>CVE identifier (e.g., "CVE-2024-1234").</summary>
public required string Cve { get; init; }
/// <summary>Normalized PURL or CPE identifying the affected package.</summary>
public required string AffectsKey { get; init; }
/// <summary>Structured version range (introduced, fixed, last_affected).</summary>
public VersionRange? VersionRange { get; init; }
/// <summary>Sorted CWE identifiers.</summary>
public IReadOnlyList<string> Weaknesses { get; init; } = [];
/// <summary>Deterministic SHA256 hash of identity components.</summary>
public required string MergeHash { get; init; }
/// <summary>Status: active, stub, or withdrawn.</summary>
public CanonicalStatus Status { get; init; } = CanonicalStatus.Active;
/// <summary>Normalized severity: critical, high, medium, low, none.</summary>
public string? Severity { get; init; }
/// <summary>EPSS exploit prediction probability (0.0000-1.0000).</summary>
public decimal? EpssScore { get; init; }
/// <summary>Whether an exploit is known to exist.</summary>
public bool ExploitKnown { get; init; }
/// <summary>Advisory title.</summary>
public string? Title { get; init; }
/// <summary>Advisory summary.</summary>
public string? Summary { get; init; }
/// <summary>When the canonical record was created.</summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>When the canonical record was last updated.</summary>
public DateTimeOffset UpdatedAt { get; init; }
/// <summary>All source edges for this canonical, ordered by precedence.</summary>
public IReadOnlyList<SourceEdge> SourceEdges { get; init; } = [];
/// <summary>Primary source edge (highest precedence).</summary>
public SourceEdge? PrimarySource => SourceEdges.Count > 0 ? SourceEdges[0] : null;
}
/// <summary>
/// Status of a canonical advisory.
/// </summary>
public enum CanonicalStatus
{
/// <summary>Full active record with all data.</summary>
Active,
/// <summary>Minimal record for low-interest advisories.</summary>
Stub,
/// <summary>Withdrawn or superseded advisory.</summary>
Withdrawn
}
/// <summary>
/// Structured version range for affected packages.
/// </summary>
public sealed record VersionRange
{
/// <summary>Version where vulnerability was introduced.</summary>
public string? Introduced { get; init; }
/// <summary>Version where vulnerability was fixed.</summary>
public string? Fixed { get; init; }
/// <summary>Last known affected version.</summary>
public string? LastAffected { get; init; }
/// <summary>Canonical range expression (e.g., ">=1.0.0,<2.0.0").</summary>
public string? RangeExpression { get; init; }
}

View File

@@ -0,0 +1,375 @@
// -----------------------------------------------------------------------------
// CanonicalAdvisoryService.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Tasks: CANSVC-8200-004 through CANSVC-8200-008
// Description: Service implementation for canonical advisory management
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Service for managing canonical advisories with provenance-scoped deduplication.
/// </summary>
public sealed class CanonicalAdvisoryService : ICanonicalAdvisoryService
{
private readonly ICanonicalAdvisoryStore _store;
private readonly IMergeHashCalculator _mergeHashCalculator;
private readonly ISourceEdgeSigner? _signer;
private readonly ILogger<CanonicalAdvisoryService> _logger;
/// <summary>
/// Source precedence ranks (lower = higher priority).
/// </summary>
private static readonly Dictionary<string, int> SourcePrecedence = new(StringComparer.OrdinalIgnoreCase)
{
["vendor"] = 10,
["redhat"] = 20,
["debian"] = 20,
["suse"] = 20,
["ubuntu"] = 20,
["alpine"] = 20,
["osv"] = 30,
["ghsa"] = 35,
["nvd"] = 40
};
public CanonicalAdvisoryService(
ICanonicalAdvisoryStore store,
IMergeHashCalculator mergeHashCalculator,
ILogger<CanonicalAdvisoryService> logger,
ISourceEdgeSigner? signer = null)
{
_store = store ?? throw new ArgumentNullException(nameof(store));
_mergeHashCalculator = mergeHashCalculator ?? throw new ArgumentNullException(nameof(mergeHashCalculator));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_signer = signer; // Optional - if not provided, source edges are stored unsigned
}
#region Ingest Operations
/// <inheritdoc />
public async Task<IngestResult> IngestAsync(
string source,
RawAdvisory rawAdvisory,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(source);
ArgumentNullException.ThrowIfNull(rawAdvisory);
_logger.LogDebug(
"Ingesting advisory {SourceAdvisoryId} from {Source}",
rawAdvisory.SourceAdvisoryId, source);
// 1. Compute merge hash from identity components
var mergeHashInput = new MergeHashInput
{
Cve = rawAdvisory.Cve,
AffectsKey = rawAdvisory.AffectsKey,
VersionRange = rawAdvisory.VersionRangeJson,
Weaknesses = rawAdvisory.Weaknesses,
PatchLineage = rawAdvisory.PatchLineage
};
var mergeHash = _mergeHashCalculator.ComputeMergeHash(mergeHashInput);
// 2. Check for existing canonical
var existing = await _store.GetByMergeHashAsync(mergeHash, ct).ConfigureAwait(false);
MergeDecision decision;
Guid canonicalId;
if (existing is null)
{
// 3a. Create new canonical
var upsertRequest = new UpsertCanonicalRequest
{
Cve = rawAdvisory.Cve,
AffectsKey = rawAdvisory.AffectsKey,
MergeHash = mergeHash,
VersionRangeJson = rawAdvisory.VersionRangeJson,
Weaknesses = rawAdvisory.Weaknesses,
Severity = rawAdvisory.Severity,
Title = rawAdvisory.Title,
Summary = rawAdvisory.Summary
};
canonicalId = await _store.UpsertCanonicalAsync(upsertRequest, ct).ConfigureAwait(false);
decision = MergeDecision.Created;
_logger.LogInformation(
"Created canonical {CanonicalId} with merge_hash {MergeHash} for {Cve}",
canonicalId, mergeHash, rawAdvisory.Cve);
}
else
{
// 3b. Merge into existing canonical
canonicalId = existing.Id;
decision = MergeDecision.Merged;
// Update metadata if we have better data
await UpdateCanonicalMetadataIfBetterAsync(existing, rawAdvisory, source, ct).ConfigureAwait(false);
_logger.LogDebug(
"Merging into existing canonical {CanonicalId} for {Cve}",
canonicalId, rawAdvisory.Cve);
}
// 4. Compute source document hash
var sourceDocHash = ComputeDocumentHash(rawAdvisory);
// 5. Resolve source ID
var sourceId = await _store.ResolveSourceIdAsync(source, ct).ConfigureAwait(false);
// 6. Check if source edge already exists (duplicate detection)
var edgeExists = await _store.SourceEdgeExistsAsync(canonicalId, sourceId, sourceDocHash, ct).ConfigureAwait(false);
if (edgeExists)
{
_logger.LogDebug(
"Duplicate source edge detected for canonical {CanonicalId} from {Source}",
canonicalId, source);
return IngestResult.Duplicate(canonicalId, mergeHash, source, rawAdvisory.SourceAdvisoryId);
}
// 7. Sign source edge if signer is available
string? dsseEnvelopeJson = null;
Guid? signatureRef = null;
if (_signer is not null && rawAdvisory.RawPayloadJson is not null)
{
var signingRequest = new SourceEdgeSigningRequest
{
SourceAdvisoryId = rawAdvisory.SourceAdvisoryId,
SourceName = source,
PayloadHash = sourceDocHash,
PayloadJson = rawAdvisory.RawPayloadJson
};
var signingResult = await _signer.SignAsync(signingRequest, ct).ConfigureAwait(false);
if (signingResult.Success && signingResult.Envelope is not null)
{
dsseEnvelopeJson = JsonSerializer.Serialize(signingResult.Envelope);
signatureRef = signingResult.SignatureRef;
_logger.LogDebug(
"Signed source edge for {SourceAdvisoryId} from {Source} (ref: {SignatureRef})",
rawAdvisory.SourceAdvisoryId, source, signatureRef);
}
else if (!signingResult.Success)
{
_logger.LogWarning(
"Failed to sign source edge for {SourceAdvisoryId}: {Error}",
rawAdvisory.SourceAdvisoryId, signingResult.ErrorMessage);
}
}
// 8. Create source edge
var precedenceRank = GetPrecedenceRank(source);
var addEdgeRequest = new AddSourceEdgeRequest
{
CanonicalId = canonicalId,
SourceId = sourceId,
SourceAdvisoryId = rawAdvisory.SourceAdvisoryId,
SourceDocHash = sourceDocHash,
VendorStatus = rawAdvisory.VendorStatus,
PrecedenceRank = precedenceRank,
DsseEnvelopeJson = dsseEnvelopeJson,
RawPayloadJson = rawAdvisory.RawPayloadJson,
FetchedAt = rawAdvisory.FetchedAt
};
var edgeResult = await _store.AddSourceEdgeAsync(addEdgeRequest, ct).ConfigureAwait(false);
_logger.LogInformation(
"Added source edge {EdgeId} from {Source} ({SourceAdvisoryId}) to canonical {CanonicalId}{Signed}",
edgeResult.EdgeId, source, rawAdvisory.SourceAdvisoryId, canonicalId,
dsseEnvelopeJson is not null ? " [signed]" : "");
return decision == MergeDecision.Created
? IngestResult.Created(canonicalId, mergeHash, edgeResult.EdgeId, source, rawAdvisory.SourceAdvisoryId, signatureRef)
: IngestResult.Merged(canonicalId, mergeHash, edgeResult.EdgeId, source, rawAdvisory.SourceAdvisoryId, signatureRef);
}
/// <inheritdoc />
public async Task<IReadOnlyList<IngestResult>> IngestBatchAsync(
string source,
IEnumerable<RawAdvisory> advisories,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(source);
ArgumentNullException.ThrowIfNull(advisories);
var results = new List<IngestResult>();
foreach (var advisory in advisories)
{
ct.ThrowIfCancellationRequested();
try
{
var result = await IngestAsync(source, advisory, ct).ConfigureAwait(false);
results.Add(result);
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Failed to ingest advisory {SourceAdvisoryId} from {Source}",
advisory.SourceAdvisoryId, source);
// Create a conflict result for failed ingestion
results.Add(IngestResult.Conflict(
Guid.Empty,
string.Empty,
ex.Message,
source,
advisory.SourceAdvisoryId));
}
}
_logger.LogInformation(
"Batch ingest complete: {Created} created, {Merged} merged, {Duplicates} duplicates, {Conflicts} conflicts",
results.Count(r => r.Decision == MergeDecision.Created),
results.Count(r => r.Decision == MergeDecision.Merged),
results.Count(r => r.Decision == MergeDecision.Duplicate),
results.Count(r => r.Decision == MergeDecision.Conflict));
return results;
}
#endregion
#region Query Operations
/// <inheritdoc />
public Task<CanonicalAdvisory?> GetByIdAsync(Guid id, CancellationToken ct = default)
=> _store.GetByIdAsync(id, ct);
/// <inheritdoc />
public Task<CanonicalAdvisory?> GetByMergeHashAsync(string mergeHash, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(mergeHash);
return _store.GetByMergeHashAsync(mergeHash, ct);
}
/// <inheritdoc />
public Task<IReadOnlyList<CanonicalAdvisory>> GetByCveAsync(string cve, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(cve);
return _store.GetByCveAsync(cve, ct);
}
/// <inheritdoc />
public Task<IReadOnlyList<CanonicalAdvisory>> GetByArtifactAsync(string artifactKey, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(artifactKey);
return _store.GetByArtifactAsync(artifactKey, ct);
}
/// <inheritdoc />
public Task<PagedResult<CanonicalAdvisory>> QueryAsync(CanonicalQueryOptions options, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(options);
return _store.QueryAsync(options, ct);
}
#endregion
#region Status Operations
/// <inheritdoc />
public async Task UpdateStatusAsync(Guid id, CanonicalStatus status, CancellationToken ct = default)
{
await _store.UpdateStatusAsync(id, status, ct).ConfigureAwait(false);
_logger.LogInformation(
"Updated canonical {CanonicalId} status to {Status}",
id, status);
}
/// <inheritdoc />
public async Task<int> DegradeToStubsAsync(double scoreThreshold, CancellationToken ct = default)
{
// TODO: Implement stub degradation based on EPSS score or other criteria
// This would query for low-interest canonicals and update their status to Stub
_logger.LogWarning(
"DegradeToStubsAsync not yet implemented (threshold={Threshold})",
scoreThreshold);
return 0;
}
#endregion
#region Private Helpers
private async Task UpdateCanonicalMetadataIfBetterAsync(
CanonicalAdvisory existing,
RawAdvisory newAdvisory,
string source,
CancellationToken ct)
{
// Only update if the new source has higher precedence
var newPrecedence = GetPrecedenceRank(source);
var existingPrecedence = existing.PrimarySource?.PrecedenceRank ?? int.MaxValue;
if (newPrecedence >= existingPrecedence)
{
return; // New source is lower or equal precedence, don't update
}
// Update with better metadata
var updateRequest = new UpsertCanonicalRequest
{
Cve = existing.Cve,
AffectsKey = existing.AffectsKey,
MergeHash = existing.MergeHash,
Severity = newAdvisory.Severity ?? existing.Severity,
Title = newAdvisory.Title ?? existing.Title,
Summary = newAdvisory.Summary ?? existing.Summary
};
await _store.UpsertCanonicalAsync(updateRequest, ct).ConfigureAwait(false);
_logger.LogDebug(
"Updated canonical {CanonicalId} metadata from higher-precedence source {Source}",
existing.Id, source);
}
private static string ComputeDocumentHash(RawAdvisory advisory)
{
// Hash the raw payload if available, otherwise hash the key identity fields
var content = advisory.RawPayloadJson
?? JsonSerializer.Serialize(new
{
advisory.SourceAdvisoryId,
advisory.Cve,
advisory.AffectsKey,
advisory.VersionRangeJson,
advisory.Weaknesses,
advisory.Title,
advisory.Summary
});
var hashBytes = SHA256.HashData(Encoding.UTF8.GetBytes(content));
return $"sha256:{Convert.ToHexStringLower(hashBytes)}";
}
private static int GetPrecedenceRank(string source)
{
if (SourcePrecedence.TryGetValue(source, out var rank))
{
return rank;
}
// Unknown sources get default precedence
return 100;
}
#endregion
}

View File

@@ -0,0 +1,174 @@
// -----------------------------------------------------------------------------
// ICanonicalAdvisoryService.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-000
// Description: Service interface for canonical advisory management
// -----------------------------------------------------------------------------
using StellaOps.Concelier.Models;
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Service for managing canonical advisories with provenance-scoped deduplication.
/// </summary>
public interface ICanonicalAdvisoryService
{
// === Ingest Operations ===
/// <summary>
/// Ingest raw advisory from source, creating or updating canonical record.
/// </summary>
/// <param name="source">Source identifier (osv, nvd, ghsa, redhat, debian, etc.)</param>
/// <param name="rawAdvisory">Raw advisory document</param>
/// <param name="ct">Cancellation token</param>
/// <returns>Ingest result with canonical ID and merge decision</returns>
Task<IngestResult> IngestAsync(
string source,
RawAdvisory rawAdvisory,
CancellationToken ct = default);
/// <summary>
/// Batch ingest multiple advisories from same source.
/// </summary>
Task<IReadOnlyList<IngestResult>> IngestBatchAsync(
string source,
IEnumerable<RawAdvisory> advisories,
CancellationToken ct = default);
// === Query Operations ===
/// <summary>
/// Get canonical advisory by ID with all source edges.
/// </summary>
Task<CanonicalAdvisory?> GetByIdAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Get canonical advisory by merge hash.
/// </summary>
Task<CanonicalAdvisory?> GetByMergeHashAsync(string mergeHash, CancellationToken ct = default);
/// <summary>
/// Get all canonical advisories for a CVE.
/// </summary>
Task<IReadOnlyList<CanonicalAdvisory>> GetByCveAsync(string cve, CancellationToken ct = default);
/// <summary>
/// Get canonical advisories affecting an artifact (PURL or CPE).
/// </summary>
Task<IReadOnlyList<CanonicalAdvisory>> GetByArtifactAsync(
string artifactKey,
CancellationToken ct = default);
/// <summary>
/// Query canonical advisories with filters.
/// </summary>
Task<PagedResult<CanonicalAdvisory>> QueryAsync(
CanonicalQueryOptions options,
CancellationToken ct = default);
// === Status Operations ===
/// <summary>
/// Update canonical status (active, stub, withdrawn).
/// </summary>
Task UpdateStatusAsync(Guid id, CanonicalStatus status, CancellationToken ct = default);
/// <summary>
/// Degrade low-interest canonicals to stub status.
/// </summary>
Task<int> DegradeToStubsAsync(double scoreThreshold, CancellationToken ct = default);
}
/// <summary>
/// Raw advisory document before normalization.
/// </summary>
public sealed record RawAdvisory
{
/// <summary>Source advisory ID (DSA-5678, RHSA-2024:1234, etc.)</summary>
public required string SourceAdvisoryId { get; init; }
/// <summary>Primary CVE identifier.</summary>
public required string Cve { get; init; }
/// <summary>Affected package identifier (PURL or CPE).</summary>
public required string AffectsKey { get; init; }
/// <summary>Affected version range as JSON string.</summary>
public string? VersionRangeJson { get; init; }
/// <summary>CWE identifiers.</summary>
public IReadOnlyList<string> Weaknesses { get; init; } = [];
/// <summary>Patch lineage (commit SHA, patch ID).</summary>
public string? PatchLineage { get; init; }
/// <summary>Advisory title.</summary>
public string? Title { get; init; }
/// <summary>Advisory summary.</summary>
public string? Summary { get; init; }
/// <summary>Severity level.</summary>
public string? Severity { get; init; }
/// <summary>VEX-style vendor status.</summary>
public VendorStatus? VendorStatus { get; init; }
/// <summary>Raw payload as JSON.</summary>
public string? RawPayloadJson { get; init; }
/// <summary>When the advisory was fetched.</summary>
public DateTimeOffset FetchedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Query options for canonical advisories.
/// </summary>
public sealed record CanonicalQueryOptions
{
/// <summary>Filter by CVE (exact match).</summary>
public string? Cve { get; init; }
/// <summary>Filter by artifact key (PURL or CPE).</summary>
public string? ArtifactKey { get; init; }
/// <summary>Filter by severity.</summary>
public string? Severity { get; init; }
/// <summary>Filter by status.</summary>
public CanonicalStatus? Status { get; init; }
/// <summary>Only include canonicals with known exploits.</summary>
public bool? ExploitKnown { get; init; }
/// <summary>Include canonicals updated since this time.</summary>
public DateTimeOffset? UpdatedSince { get; init; }
/// <summary>Page size.</summary>
public int Limit { get; init; } = 100;
/// <summary>Page offset.</summary>
public int Offset { get; init; } = 0;
}
/// <summary>
/// Paged result for queries.
/// </summary>
public sealed record PagedResult<T>
{
/// <summary>Items in this page.</summary>
public required IReadOnlyList<T> Items { get; init; }
/// <summary>Total count across all pages.</summary>
public long TotalCount { get; init; }
/// <summary>Current page offset.</summary>
public int Offset { get; init; }
/// <summary>Page size.</summary>
public int Limit { get; init; }
/// <summary>Whether there are more items.</summary>
public bool HasMore => Offset + Items.Count < TotalCount;
}

View File

@@ -0,0 +1,138 @@
// -----------------------------------------------------------------------------
// ICanonicalAdvisoryStore.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-004
// Description: Storage abstraction for canonical advisory persistence
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Storage abstraction for canonical advisory and source edge persistence.
/// Implemented by PostgresCanonicalAdvisoryStore.
/// </summary>
public interface ICanonicalAdvisoryStore
{
#region Canonical Advisory Operations
/// <summary>
/// Gets a canonical advisory by ID with source edges.
/// </summary>
Task<CanonicalAdvisory?> GetByIdAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Gets a canonical advisory by merge hash.
/// </summary>
Task<CanonicalAdvisory?> GetByMergeHashAsync(string mergeHash, CancellationToken ct = default);
/// <summary>
/// Gets all canonical advisories for a CVE.
/// </summary>
Task<IReadOnlyList<CanonicalAdvisory>> GetByCveAsync(string cve, CancellationToken ct = default);
/// <summary>
/// Gets canonical advisories affecting an artifact (PURL or CPE).
/// </summary>
Task<IReadOnlyList<CanonicalAdvisory>> GetByArtifactAsync(string artifactKey, CancellationToken ct = default);
/// <summary>
/// Queries canonical advisories with filters.
/// </summary>
Task<PagedResult<CanonicalAdvisory>> QueryAsync(CanonicalQueryOptions options, CancellationToken ct = default);
/// <summary>
/// Upserts a canonical advisory (creates or updates by merge_hash).
/// </summary>
Task<Guid> UpsertCanonicalAsync(UpsertCanonicalRequest request, CancellationToken ct = default);
/// <summary>
/// Updates the status of a canonical advisory.
/// </summary>
Task UpdateStatusAsync(Guid id, CanonicalStatus status, CancellationToken ct = default);
/// <summary>
/// Counts active canonicals.
/// </summary>
Task<long> CountAsync(CancellationToken ct = default);
#endregion
#region Source Edge Operations
/// <summary>
/// Adds a source edge to a canonical advisory.
/// Returns existing edge ID if duplicate (canonical_id, source_id, doc_hash).
/// </summary>
Task<SourceEdgeResult> AddSourceEdgeAsync(AddSourceEdgeRequest request, CancellationToken ct = default);
/// <summary>
/// Gets all source edges for a canonical.
/// </summary>
Task<IReadOnlyList<SourceEdge>> GetSourceEdgesAsync(Guid canonicalId, CancellationToken ct = default);
/// <summary>
/// Checks if a source edge already exists.
/// </summary>
Task<bool> SourceEdgeExistsAsync(Guid canonicalId, Guid sourceId, string docHash, CancellationToken ct = default);
#endregion
#region Source Operations
/// <summary>
/// Resolves a source key to its ID, creating if necessary.
/// </summary>
Task<Guid> ResolveSourceIdAsync(string sourceKey, CancellationToken ct = default);
/// <summary>
/// Gets the precedence rank for a source.
/// </summary>
Task<int> GetSourcePrecedenceAsync(string sourceKey, CancellationToken ct = default);
#endregion
}
/// <summary>
/// Request to upsert a canonical advisory.
/// </summary>
public sealed record UpsertCanonicalRequest
{
public required string Cve { get; init; }
public required string AffectsKey { get; init; }
public required string MergeHash { get; init; }
public string? VersionRangeJson { get; init; }
public IReadOnlyList<string> Weaknesses { get; init; } = [];
public string? Severity { get; init; }
public decimal? EpssScore { get; init; }
public bool ExploitKnown { get; init; }
public string? Title { get; init; }
public string? Summary { get; init; }
}
/// <summary>
/// Request to add a source edge.
/// </summary>
public sealed record AddSourceEdgeRequest
{
public required Guid CanonicalId { get; init; }
public required Guid SourceId { get; init; }
public required string SourceAdvisoryId { get; init; }
public required string SourceDocHash { get; init; }
public VendorStatus? VendorStatus { get; init; }
public int PrecedenceRank { get; init; } = 100;
public string? DsseEnvelopeJson { get; init; }
public string? RawPayloadJson { get; init; }
public DateTimeOffset FetchedAt { get; init; } = DateTimeOffset.UtcNow;
}
/// <summary>
/// Result of adding a source edge.
/// </summary>
public sealed record SourceEdgeResult
{
public required Guid EdgeId { get; init; }
public required bool WasCreated { get; init; }
public static SourceEdgeResult Created(Guid edgeId) => new() { EdgeId = edgeId, WasCreated = true };
public static SourceEdgeResult Existing(Guid edgeId) => new() { EdgeId = edgeId, WasCreated = false };
}

View File

@@ -0,0 +1,54 @@
// -----------------------------------------------------------------------------
// IMergeHashCalculator.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-004
// Description: Merge hash calculator abstraction for Core (avoids circular ref)
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Computes deterministic semantic merge hash for advisory deduplication.
/// This is a local abstraction in Core to avoid circular dependency with Merge library.
/// The Merge library's MergeHashCalculator implements this interface.
/// </summary>
public interface IMergeHashCalculator
{
/// <summary>
/// Compute merge hash from advisory identity components.
/// </summary>
/// <param name="input">The identity components to hash.</param>
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
string ComputeMergeHash(MergeHashInput input);
}
/// <summary>
/// Input components for merge hash computation.
/// </summary>
public sealed record MergeHashInput
{
/// <summary>
/// CVE identifier (e.g., "CVE-2024-1234"). Required.
/// </summary>
public required string Cve { get; init; }
/// <summary>
/// Affected package identifier (PURL or CPE). Required.
/// </summary>
public required string AffectsKey { get; init; }
/// <summary>
/// Affected version range expression. Optional.
/// </summary>
public string? VersionRange { get; init; }
/// <summary>
/// Associated CWE identifiers. Optional.
/// </summary>
public IReadOnlyList<string> Weaknesses { get; init; } = [];
/// <summary>
/// Upstream patch provenance (commit SHA, patch ID). Optional.
/// </summary>
public string? PatchLineage { get; init; }
}

View File

@@ -0,0 +1,84 @@
// -----------------------------------------------------------------------------
// ISourceEdgeSigner.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-008
// Description: Interface for DSSE signing of source edges
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Service for signing source edges with DSSE envelopes.
/// This is an optional component - if not registered, source edges are stored unsigned.
/// </summary>
public interface ISourceEdgeSigner
{
/// <summary>
/// Signs a source edge payload and returns a DSSE envelope.
/// </summary>
/// <param name="request">The signing request with payload.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Signing result with envelope or error.</returns>
Task<SourceEdgeSigningResult> SignAsync(SourceEdgeSigningRequest request, CancellationToken ct = default);
}
/// <summary>
/// Request to sign a source edge.
/// </summary>
public sealed record SourceEdgeSigningRequest
{
/// <summary>Source advisory ID being signed.</summary>
public required string SourceAdvisoryId { get; init; }
/// <summary>Source name (e.g., "nvd", "debian").</summary>
public required string SourceName { get; init; }
/// <summary>SHA256 hash of the payload.</summary>
public required string PayloadHash { get; init; }
/// <summary>Raw payload JSON to be signed.</summary>
public required string PayloadJson { get; init; }
/// <summary>Payload type URI.</summary>
public string PayloadType { get; init; } = "application/vnd.stellaops.advisory.v1+json";
}
/// <summary>
/// Result of signing a source edge.
/// </summary>
public sealed record SourceEdgeSigningResult
{
/// <summary>Whether signing was successful.</summary>
public required bool Success { get; init; }
/// <summary>DSSE envelope (if successful).</summary>
public DsseEnvelope? Envelope { get; init; }
/// <summary>Error message (if failed).</summary>
public string? ErrorMessage { get; init; }
/// <summary>Signature reference ID for audit.</summary>
public Guid? SignatureRef { get; init; }
/// <summary>Creates a successful result.</summary>
public static SourceEdgeSigningResult Signed(DsseEnvelope envelope, Guid signatureRef) => new()
{
Success = true,
Envelope = envelope,
SignatureRef = signatureRef
};
/// <summary>Creates a failed result.</summary>
public static SourceEdgeSigningResult Failed(string errorMessage) => new()
{
Success = false,
ErrorMessage = errorMessage
};
/// <summary>Creates a skipped result (signer not available).</summary>
public static SourceEdgeSigningResult Skipped() => new()
{
Success = true,
ErrorMessage = "Signing skipped - no signer configured"
};
}

View File

@@ -0,0 +1,122 @@
// -----------------------------------------------------------------------------
// IngestResult.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-003
// Description: Result type for advisory ingestion with merge decision
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Result of ingesting a raw advisory.
/// </summary>
public sealed record IngestResult
{
/// <summary>ID of the canonical advisory (new or existing).</summary>
public required Guid CanonicalId { get; init; }
/// <summary>Computed merge hash for the ingested advisory.</summary>
public required string MergeHash { get; init; }
/// <summary>Decision made during ingestion.</summary>
public required MergeDecision Decision { get; init; }
/// <summary>Reference to the signature (if DSSE signed).</summary>
public Guid? SignatureRef { get; init; }
/// <summary>Reason for conflict (if Decision is Conflict).</summary>
public string? ConflictReason { get; init; }
/// <summary>ID of the created source edge.</summary>
public Guid? SourceEdgeId { get; init; }
/// <summary>Source that provided the advisory.</summary>
public string? SourceName { get; init; }
/// <summary>Source's advisory ID.</summary>
public string? SourceAdvisoryId { get; init; }
/// <summary>Creates a successful creation result.</summary>
public static IngestResult Created(
Guid canonicalId,
string mergeHash,
Guid sourceEdgeId,
string sourceName,
string sourceAdvisoryId,
Guid? signatureRef = null) => new()
{
CanonicalId = canonicalId,
MergeHash = mergeHash,
Decision = MergeDecision.Created,
SourceEdgeId = sourceEdgeId,
SourceName = sourceName,
SourceAdvisoryId = sourceAdvisoryId,
SignatureRef = signatureRef
};
/// <summary>Creates a successful merge result.</summary>
public static IngestResult Merged(
Guid canonicalId,
string mergeHash,
Guid sourceEdgeId,
string sourceName,
string sourceAdvisoryId,
Guid? signatureRef = null) => new()
{
CanonicalId = canonicalId,
MergeHash = mergeHash,
Decision = MergeDecision.Merged,
SourceEdgeId = sourceEdgeId,
SourceName = sourceName,
SourceAdvisoryId = sourceAdvisoryId,
SignatureRef = signatureRef
};
/// <summary>Creates a duplicate result (no changes made).</summary>
public static IngestResult Duplicate(
Guid canonicalId,
string mergeHash,
string sourceName,
string sourceAdvisoryId) => new()
{
CanonicalId = canonicalId,
MergeHash = mergeHash,
Decision = MergeDecision.Duplicate,
SourceName = sourceName,
SourceAdvisoryId = sourceAdvisoryId
};
/// <summary>Creates a conflict result.</summary>
public static IngestResult Conflict(
Guid canonicalId,
string mergeHash,
string conflictReason,
string sourceName,
string sourceAdvisoryId) => new()
{
CanonicalId = canonicalId,
MergeHash = mergeHash,
Decision = MergeDecision.Conflict,
ConflictReason = conflictReason,
SourceName = sourceName,
SourceAdvisoryId = sourceAdvisoryId
};
}
/// <summary>
/// Decision made when ingesting an advisory.
/// </summary>
public enum MergeDecision
{
/// <summary>New canonical advisory was created.</summary>
Created,
/// <summary>Advisory was merged into an existing canonical.</summary>
Merged,
/// <summary>Exact duplicate was detected, no changes made.</summary>
Duplicate,
/// <summary>Merge conflict was detected.</summary>
Conflict
}

View File

@@ -0,0 +1,92 @@
// -----------------------------------------------------------------------------
// SourceEdge.cs
// Sprint: SPRINT_8200_0012_0003_CONCEL_canonical_advisory_service
// Task: CANSVC-8200-002
// Description: Domain model for source edge linking canonical to source document
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Core.Canonical;
/// <summary>
/// Link from canonical advisory to source document.
/// </summary>
public sealed record SourceEdge
{
/// <summary>Unique source edge identifier.</summary>
public Guid Id { get; init; }
/// <summary>Reference to the canonical advisory.</summary>
public Guid CanonicalId { get; init; }
/// <summary>Source identifier (osv, nvd, ghsa, redhat, debian, etc.).</summary>
public required string SourceName { get; init; }
/// <summary>Source's advisory ID (DSA-5678, RHSA-2024:1234, etc.).</summary>
public required string SourceAdvisoryId { get; init; }
/// <summary>SHA256 hash of the raw source document.</summary>
public required string SourceDocHash { get; init; }
/// <summary>VEX-style status from the source.</summary>
public VendorStatus? VendorStatus { get; init; }
/// <summary>
/// Source priority: vendor=10, distro=20, osv=30, nvd=40, default=100.
/// Lower value = higher priority.
/// </summary>
public int PrecedenceRank { get; init; } = 100;
/// <summary>DSSE signature envelope.</summary>
public DsseEnvelope? DsseEnvelope { get; init; }
/// <summary>When the source document was fetched.</summary>
public DateTimeOffset FetchedAt { get; init; }
/// <summary>When the edge record was created.</summary>
public DateTimeOffset CreatedAt { get; init; }
}
/// <summary>
/// VEX-style vendor status for vulnerability.
/// </summary>
public enum VendorStatus
{
/// <summary>The product is affected by the vulnerability.</summary>
Affected,
/// <summary>The product is not affected by the vulnerability.</summary>
NotAffected,
/// <summary>The vulnerability has been fixed in this version.</summary>
Fixed,
/// <summary>The vendor is investigating the vulnerability.</summary>
UnderInvestigation
}
/// <summary>
/// DSSE (Dead Simple Signing Envelope) for cryptographic signatures.
/// </summary>
public sealed record DsseEnvelope
{
/// <summary>Payload type URI (e.g., "application/vnd.stellaops.advisory.v1+json").</summary>
public required string PayloadType { get; init; }
/// <summary>Base64-encoded payload.</summary>
public required string Payload { get; init; }
/// <summary>Signatures over the payload.</summary>
public IReadOnlyList<DsseSignature> Signatures { get; init; } = [];
}
/// <summary>
/// Single signature in a DSSE envelope.
/// </summary>
public sealed record DsseSignature
{
/// <summary>Key ID or identifier for the signing key.</summary>
public required string KeyId { get; init; }
/// <summary>Base64-encoded signature.</summary>
public required string Sig { get; init; }
}

View File

@@ -8,6 +8,7 @@
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.0" />

View File

@@ -0,0 +1,81 @@
// -----------------------------------------------------------------------------
// IMergeHashCalculator.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-002
// Description: Interface for deterministic semantic merge hash computation
// -----------------------------------------------------------------------------
using StellaOps.Concelier.Models;
namespace StellaOps.Concelier.Merge.Identity;
/// <summary>
/// Computes deterministic semantic merge hash for advisory deduplication.
/// Unlike content hashing, merge hash is based on identity components only:
/// (CVE + affects_key + version_range + weaknesses + patch_lineage).
/// </summary>
/// <remarks>
/// The same CVE affecting the same package should produce the same merge hash
/// regardless of which source (Debian, RHEL, etc.) reported it.
/// </remarks>
public interface IMergeHashCalculator
{
/// <summary>
/// Compute merge hash from advisory identity components.
/// </summary>
/// <param name="input">The identity components to hash.</param>
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
string ComputeMergeHash(MergeHashInput input);
/// <summary>
/// Compute merge hash directly from Advisory domain model.
/// Extracts identity components from the advisory and computes hash.
/// </summary>
/// <param name="advisory">The advisory to compute hash for.</param>
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
string ComputeMergeHash(Advisory advisory);
/// <summary>
/// Compute merge hash for a specific affected package within an advisory.
/// </summary>
/// <param name="advisory">The advisory containing the CVE and weaknesses.</param>
/// <param name="affectedPackage">The specific affected package.</param>
/// <returns>Hex-encoded SHA256 hash prefixed with "sha256:".</returns>
string ComputeMergeHash(Advisory advisory, AffectedPackage affectedPackage);
}
/// <summary>
/// Input components for merge hash computation.
/// </summary>
public sealed record MergeHashInput
{
/// <summary>
/// CVE identifier (e.g., "CVE-2024-1234"). Required.
/// Will be normalized to uppercase.
/// </summary>
public required string Cve { get; init; }
/// <summary>
/// Affected package identifier (PURL or CPE). Required.
/// Will be normalized according to package type rules.
/// </summary>
public required string AffectsKey { get; init; }
/// <summary>
/// Affected version range expression. Optional.
/// Will be normalized to canonical interval notation.
/// </summary>
public string? VersionRange { get; init; }
/// <summary>
/// Associated CWE identifiers. Optional.
/// Will be normalized to uppercase, sorted, deduplicated.
/// </summary>
public IReadOnlyList<string> Weaknesses { get; init; } = [];
/// <summary>
/// Upstream patch provenance (commit SHA, patch ID). Optional.
/// Enables differentiation of distro backports from upstream fixes.
/// </summary>
public string? PatchLineage { get; init; }
}

View File

@@ -0,0 +1,288 @@
// -----------------------------------------------------------------------------
// MergeHashCalculator.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Tasks: MHASH-8200-009, MHASH-8200-010, MHASH-8200-011
// Description: Core merge hash calculator implementation
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Text;
using StellaOps.Concelier.Merge.Identity.Normalizers;
using StellaOps.Concelier.Models;
namespace StellaOps.Concelier.Merge.Identity;
/// <summary>
/// Computes deterministic semantic merge hash for advisory deduplication.
/// </summary>
/// <remarks>
/// The merge hash is computed from identity components only:
/// <list type="bullet">
/// <item>CVE identifier (normalized, uppercase)</item>
/// <item>Affected package identifier (PURL/CPE, normalized)</item>
/// <item>Version range (canonical interval notation)</item>
/// <item>CWE weaknesses (sorted, deduplicated)</item>
/// <item>Patch lineage (optional, for backport differentiation)</item>
/// </list>
/// </remarks>
public sealed class MergeHashCalculator : IMergeHashCalculator
{
private static readonly UTF8Encoding Utf8NoBom = new(false);
private readonly ICveNormalizer _cveNormalizer;
private readonly IPurlNormalizer _purlNormalizer;
private readonly ICpeNormalizer _cpeNormalizer;
private readonly IVersionRangeNormalizer _versionRangeNormalizer;
private readonly ICweNormalizer _cweNormalizer;
private readonly IPatchLineageNormalizer _patchLineageNormalizer;
/// <summary>
/// Creates a new MergeHashCalculator with default normalizers.
/// </summary>
public MergeHashCalculator()
: this(
CveNormalizer.Instance,
PurlNormalizer.Instance,
CpeNormalizer.Instance,
VersionRangeNormalizer.Instance,
CweNormalizer.Instance,
PatchLineageNormalizer.Instance)
{
}
/// <summary>
/// Creates a new MergeHashCalculator with custom normalizers.
/// </summary>
public MergeHashCalculator(
ICveNormalizer cveNormalizer,
IPurlNormalizer purlNormalizer,
ICpeNormalizer cpeNormalizer,
IVersionRangeNormalizer versionRangeNormalizer,
ICweNormalizer cweNormalizer,
IPatchLineageNormalizer patchLineageNormalizer)
{
_cveNormalizer = cveNormalizer ?? throw new ArgumentNullException(nameof(cveNormalizer));
_purlNormalizer = purlNormalizer ?? throw new ArgumentNullException(nameof(purlNormalizer));
_cpeNormalizer = cpeNormalizer ?? throw new ArgumentNullException(nameof(cpeNormalizer));
_versionRangeNormalizer = versionRangeNormalizer ?? throw new ArgumentNullException(nameof(versionRangeNormalizer));
_cweNormalizer = cweNormalizer ?? throw new ArgumentNullException(nameof(cweNormalizer));
_patchLineageNormalizer = patchLineageNormalizer ?? throw new ArgumentNullException(nameof(patchLineageNormalizer));
}
/// <inheritdoc />
public string ComputeMergeHash(MergeHashInput input)
{
ArgumentNullException.ThrowIfNull(input);
var canonical = BuildCanonicalString(input);
return ComputeHash(canonical);
}
/// <inheritdoc />
public string ComputeMergeHash(Advisory advisory)
{
ArgumentNullException.ThrowIfNull(advisory);
// Extract CVE from advisory key or aliases
var cve = ExtractCve(advisory);
// If no affected packages, compute hash from CVE and weaknesses only
if (advisory.AffectedPackages.IsDefaultOrEmpty)
{
var input = new MergeHashInput
{
Cve = cve,
AffectsKey = string.Empty,
VersionRange = null,
Weaknesses = ExtractWeaknesses(advisory),
PatchLineage = null
};
return ComputeMergeHash(input);
}
// Compute hash for first affected package (primary identity)
// For multi-package advisories, each package gets its own hash
return ComputeMergeHash(advisory, advisory.AffectedPackages[0]);
}
/// <inheritdoc />
public string ComputeMergeHash(Advisory advisory, AffectedPackage affectedPackage)
{
ArgumentNullException.ThrowIfNull(advisory);
ArgumentNullException.ThrowIfNull(affectedPackage);
var cve = ExtractCve(advisory);
var affectsKey = BuildAffectsKey(affectedPackage);
var versionRange = BuildVersionRange(affectedPackage);
var weaknesses = ExtractWeaknesses(advisory);
var patchLineage = ExtractPatchLineage(advisory, affectedPackage);
var input = new MergeHashInput
{
Cve = cve,
AffectsKey = affectsKey,
VersionRange = versionRange,
Weaknesses = weaknesses,
PatchLineage = patchLineage
};
return ComputeMergeHash(input);
}
private string BuildCanonicalString(MergeHashInput input)
{
// Normalize all components
var cve = _cveNormalizer.Normalize(input.Cve);
var affectsKey = NormalizeAffectsKey(input.AffectsKey);
var versionRange = _versionRangeNormalizer.Normalize(input.VersionRange);
var weaknesses = _cweNormalizer.Normalize(input.Weaknesses);
var patchLineage = _patchLineageNormalizer.Normalize(input.PatchLineage);
// Build deterministic canonical string with field ordering
// Format: CVE|AFFECTS|VERSION|CWE|LINEAGE
var sb = new StringBuilder();
sb.Append("CVE:");
sb.Append(cve);
sb.Append('|');
sb.Append("AFFECTS:");
sb.Append(affectsKey);
sb.Append('|');
sb.Append("VERSION:");
sb.Append(versionRange);
sb.Append('|');
sb.Append("CWE:");
sb.Append(weaknesses);
sb.Append('|');
sb.Append("LINEAGE:");
sb.Append(patchLineage ?? string.Empty);
return sb.ToString();
}
private string NormalizeAffectsKey(string affectsKey)
{
if (string.IsNullOrWhiteSpace(affectsKey))
{
return string.Empty;
}
var trimmed = affectsKey.Trim();
// Route to appropriate normalizer
if (trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
return _purlNormalizer.Normalize(trimmed);
}
if (trimmed.StartsWith("cpe:", StringComparison.OrdinalIgnoreCase))
{
return _cpeNormalizer.Normalize(trimmed);
}
// Default to PURL normalizer for unknown formats
return _purlNormalizer.Normalize(trimmed);
}
private static string ComputeHash(string canonical)
{
var bytes = Utf8NoBom.GetBytes(canonical);
var hash = SHA256.HashData(bytes);
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
private static string ExtractCve(Advisory advisory)
{
// Check if advisory key is a CVE
if (advisory.AdvisoryKey.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
{
return advisory.AdvisoryKey;
}
// Look for CVE in aliases
var cveAlias = advisory.Aliases
.FirstOrDefault(static a => a.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase));
return cveAlias ?? advisory.AdvisoryKey;
}
private static string BuildAffectsKey(AffectedPackage package)
{
// Build PURL-like identifier from package
return package.Identifier;
}
private static string? BuildVersionRange(AffectedPackage package)
{
if (package.VersionRanges.IsDefaultOrEmpty)
{
return null;
}
// Combine all version ranges - use RangeExpression or build from primitives
var ranges = package.VersionRanges
.Select(static r => r.RangeExpression ?? BuildRangeFromPrimitives(r))
.Where(static r => !string.IsNullOrWhiteSpace(r))
.OrderBy(static r => r, StringComparer.Ordinal)
.ToList();
if (ranges.Count == 0)
{
return null;
}
return string.Join(",", ranges);
}
private static string? BuildRangeFromPrimitives(AffectedVersionRange range)
{
// Build a range expression from introduced/fixed/lastAffected
var parts = new List<string>();
if (!string.IsNullOrWhiteSpace(range.IntroducedVersion))
{
parts.Add($">={range.IntroducedVersion}");
}
if (!string.IsNullOrWhiteSpace(range.FixedVersion))
{
parts.Add($"<{range.FixedVersion}");
}
else if (!string.IsNullOrWhiteSpace(range.LastAffectedVersion))
{
parts.Add($"<={range.LastAffectedVersion}");
}
return parts.Count > 0 ? string.Join(",", parts) : null;
}
private static IReadOnlyList<string> ExtractWeaknesses(Advisory advisory)
{
if (advisory.Cwes.IsDefaultOrEmpty)
{
return [];
}
return advisory.Cwes
.Select(static w => w.Identifier)
.Where(static w => !string.IsNullOrWhiteSpace(w))
.ToList();
}
private static string? ExtractPatchLineage(Advisory advisory, AffectedPackage package)
{
// Look for patch lineage in provenance or references
// This is a simplified implementation - real implementation would
// extract from backport proof or upstream references
var patchRef = advisory.References
.Where(static r => r.Kind is "patch" or "fix" or "commit")
.Select(static r => r.Url)
.FirstOrDefault();
return patchRef;
}
}

View File

@@ -0,0 +1,159 @@
// -----------------------------------------------------------------------------
// MergeHashShadowWriteService.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-020
// Description: Shadow-write merge hashes for existing advisories during migration
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Advisories;
namespace StellaOps.Concelier.Merge.Identity;
/// <summary>
/// Service to compute and persist merge hashes for existing advisories
/// without changing their identity. Used during migration to backfill
/// merge_hash for pre-existing data.
/// </summary>
public sealed class MergeHashShadowWriteService
{
private readonly IAdvisoryStore _advisoryStore;
private readonly IMergeHashCalculator _mergeHashCalculator;
private readonly ILogger<MergeHashShadowWriteService> _logger;
public MergeHashShadowWriteService(
IAdvisoryStore advisoryStore,
IMergeHashCalculator mergeHashCalculator,
ILogger<MergeHashShadowWriteService> logger)
{
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_mergeHashCalculator = mergeHashCalculator ?? throw new ArgumentNullException(nameof(mergeHashCalculator));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Backfills merge hashes for all advisories that don't have one.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Summary of the backfill operation.</returns>
public async Task<ShadowWriteResult> BackfillAllAsync(CancellationToken cancellationToken)
{
var processed = 0;
var updated = 0;
var skipped = 0;
var failed = 0;
await foreach (var advisory in _advisoryStore.StreamAsync(cancellationToken).ConfigureAwait(false))
{
cancellationToken.ThrowIfCancellationRequested();
processed++;
// Skip if already has merge hash
if (!string.IsNullOrEmpty(advisory.MergeHash))
{
skipped++;
continue;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
var enriched = EnrichWithMergeHash(advisory, mergeHash);
await _advisoryStore.UpsertAsync(enriched, cancellationToken).ConfigureAwait(false);
updated++;
if (updated % 100 == 0)
{
_logger.LogInformation(
"Merge hash backfill progress: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
processed, updated, skipped, failed);
}
}
catch (Exception ex)
{
failed++;
_logger.LogWarning(ex, "Failed to compute merge hash for {AdvisoryKey}", advisory.AdvisoryKey);
}
}
_logger.LogInformation(
"Merge hash backfill complete: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
processed, updated, skipped, failed);
return new ShadowWriteResult(processed, updated, skipped, failed);
}
/// <summary>
/// Computes and persists merge hash for a single advisory.
/// </summary>
/// <param name="advisoryKey">The advisory key to process.</param>
/// <param name="force">If true, recomputes even if hash exists.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if advisory was updated, false otherwise.</returns>
public async Task<bool> BackfillOneAsync(string advisoryKey, bool force, CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(advisoryKey);
var advisory = await _advisoryStore.FindAsync(advisoryKey, cancellationToken).ConfigureAwait(false);
if (advisory is null)
{
_logger.LogWarning("Advisory {AdvisoryKey} not found for merge hash backfill", advisoryKey);
return false;
}
// Skip if already has merge hash and not forcing
if (!force && !string.IsNullOrEmpty(advisory.MergeHash))
{
_logger.LogDebug("Skipping {AdvisoryKey}: already has merge hash", advisoryKey);
return false;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
var enriched = EnrichWithMergeHash(advisory, mergeHash);
await _advisoryStore.UpsertAsync(enriched, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("Computed merge hash for {AdvisoryKey}: {MergeHash}", advisoryKey, mergeHash);
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to compute merge hash for {AdvisoryKey}", advisoryKey);
throw;
}
}
private static Advisory EnrichWithMergeHash(Advisory advisory, string mergeHash)
{
return new Advisory(
advisory.AdvisoryKey,
advisory.Title,
advisory.Summary,
advisory.Language,
advisory.Published,
advisory.Modified,
advisory.Severity,
advisory.ExploitKnown,
advisory.Aliases,
advisory.Credits,
advisory.References,
advisory.AffectedPackages,
advisory.CvssMetrics,
advisory.Provenance,
advisory.Description,
advisory.Cwes,
advisory.CanonicalMetricId,
mergeHash);
}
}
/// <summary>
/// Result of a shadow-write backfill operation.
/// </summary>
/// <param name="Processed">Total advisories examined.</param>
/// <param name="Updated">Advisories updated with new merge hash.</param>
/// <param name="Skipped">Advisories skipped (already had merge hash).</param>
/// <param name="Failed">Advisories that failed hash computation.</param>
public sealed record ShadowWriteResult(int Processed, int Updated, int Skipped, int Failed);

View File

@@ -0,0 +1,120 @@
// -----------------------------------------------------------------------------
// CpeNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-004
// Description: CPE normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes CPE identifiers to canonical CPE 2.3 format.
/// </summary>
public sealed partial class CpeNormalizer : ICpeNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static CpeNormalizer Instance { get; } = new();
/// <summary>
/// Pattern for CPE 2.3 formatted string binding.
/// </summary>
[GeneratedRegex(
@"^cpe:2\.3:([aho]):([^:]+):([^:]+):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*)$",
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex Cpe23Pattern();
/// <summary>
/// Pattern for CPE 2.2 URI binding.
/// </summary>
[GeneratedRegex(
@"^cpe:/([aho]):([^:]+):([^:]+)(?::([^:]+))?(?::([^:]+))?(?::([^:]+))?(?::([^:]+))?$",
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex Cpe22Pattern();
/// <inheritdoc />
public string Normalize(string cpe)
{
if (string.IsNullOrWhiteSpace(cpe))
{
return string.Empty;
}
var trimmed = cpe.Trim();
// Try CPE 2.3 format first
var match23 = Cpe23Pattern().Match(trimmed);
if (match23.Success)
{
return NormalizeCpe23(match23);
}
// Try CPE 2.2 format
var match22 = Cpe22Pattern().Match(trimmed);
if (match22.Success)
{
return ConvertCpe22ToCpe23(match22);
}
// Return as lowercase if unrecognized
return trimmed.ToLowerInvariant();
}
private static string NormalizeCpe23(Match match)
{
var part = match.Groups[1].Value.ToLowerInvariant();
var vendor = NormalizeComponent(match.Groups[2].Value);
var product = NormalizeComponent(match.Groups[3].Value);
var version = NormalizeComponent(match.Groups[4].Value);
var update = NormalizeComponent(match.Groups[5].Value);
var edition = NormalizeComponent(match.Groups[6].Value);
var language = NormalizeComponent(match.Groups[7].Value);
var swEdition = NormalizeComponent(match.Groups[8].Value);
var targetSw = NormalizeComponent(match.Groups[9].Value);
var targetHw = NormalizeComponent(match.Groups[10].Value);
var other = NormalizeComponent(match.Groups[11].Value);
return $"cpe:2.3:{part}:{vendor}:{product}:{version}:{update}:{edition}:{language}:{swEdition}:{targetSw}:{targetHw}:{other}";
}
private static string ConvertCpe22ToCpe23(Match match)
{
var part = match.Groups[1].Value.ToLowerInvariant();
var vendor = NormalizeComponent(match.Groups[2].Value);
var product = NormalizeComponent(match.Groups[3].Value);
var version = match.Groups[4].Success ? NormalizeComponent(match.Groups[4].Value) : "*";
var update = match.Groups[5].Success ? NormalizeComponent(match.Groups[5].Value) : "*";
var edition = match.Groups[6].Success ? NormalizeComponent(match.Groups[6].Value) : "*";
var language = match.Groups[7].Success ? NormalizeComponent(match.Groups[7].Value) : "*";
return $"cpe:2.3:{part}:{vendor}:{product}:{version}:{update}:{edition}:{language}:*:*:*:*";
}
private static string NormalizeComponent(string component)
{
if (string.IsNullOrWhiteSpace(component))
{
return "*";
}
var trimmed = component.Trim();
// Wildcards
if (trimmed is "*" or "-" or "ANY" or "NA")
{
return trimmed switch
{
"ANY" => "*",
"NA" => "-",
_ => trimmed
};
}
// Lowercase and handle escaping
return trimmed.ToLowerInvariant();
}
}

View File

@@ -0,0 +1,71 @@
// -----------------------------------------------------------------------------
// CveNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-003 (part of normalization helpers)
// Description: CVE identifier normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes CVE identifiers to canonical uppercase format.
/// </summary>
public sealed partial class CveNormalizer : ICveNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static CveNormalizer Instance { get; } = new();
/// <summary>
/// Pattern matching CVE identifier: CVE-YYYY-NNNNN (4+ digits after year).
/// </summary>
[GeneratedRegex(@"^CVE-(\d{4})-(\d{4,})$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CvePattern();
/// <inheritdoc />
public string Normalize(string? cve)
{
if (string.IsNullOrWhiteSpace(cve))
{
return string.Empty;
}
var trimmed = cve.Trim();
// Handle common prefixes
if (trimmed.StartsWith("cve-", StringComparison.OrdinalIgnoreCase))
{
trimmed = "CVE-" + trimmed[4..];
}
else if (!trimmed.StartsWith("CVE-", StringComparison.Ordinal))
{
// Try to extract CVE from the string
var match = CvePattern().Match(trimmed);
if (match.Success)
{
trimmed = match.Value;
}
else
{
// Assume it's just the number part: 2024-1234 -> CVE-2024-1234
if (Regex.IsMatch(trimmed, @"^\d{4}-\d{4,}$"))
{
trimmed = "CVE-" + trimmed;
}
}
}
// Validate and uppercase
var normalized = trimmed.ToUpperInvariant();
if (!CvePattern().IsMatch(normalized))
{
// Return as-is if not a valid CVE (will still be hashed consistently)
return normalized;
}
return normalized;
}
}

View File

@@ -0,0 +1,82 @@
// -----------------------------------------------------------------------------
// CweNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-006
// Description: CWE identifier list normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes CWE identifier lists for deterministic hashing.
/// </summary>
public sealed partial class CweNormalizer : ICweNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static CweNormalizer Instance { get; } = new();
/// <summary>
/// Pattern matching CWE identifier: CWE-NNN or just NNN.
/// </summary>
[GeneratedRegex(@"(?:CWE-)?(\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CwePattern();
/// <inheritdoc />
public string Normalize(IEnumerable<string>? cwes)
{
if (cwes is null)
{
return string.Empty;
}
var normalized = cwes
.Where(static cwe => !string.IsNullOrWhiteSpace(cwe))
.Select(NormalizeSingle)
.Where(static cwe => cwe is not null)
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(ExtractCweNumber)
.ThenBy(static cwe => cwe, StringComparer.OrdinalIgnoreCase)
.ToList();
if (normalized.Count == 0)
{
return string.Empty;
}
return string.Join(",", normalized);
}
private static string? NormalizeSingle(string cwe)
{
var trimmed = cwe.Trim();
var match = CwePattern().Match(trimmed);
if (!match.Success)
{
return null;
}
var number = match.Groups[1].Value;
return $"CWE-{number}";
}
private static int ExtractCweNumber(string? cwe)
{
if (string.IsNullOrWhiteSpace(cwe))
{
return int.MaxValue;
}
var match = CwePattern().Match(cwe);
if (match.Success && int.TryParse(match.Groups[1].Value, out var number))
{
return number;
}
return int.MaxValue;
}
}

View File

@@ -0,0 +1,95 @@
// -----------------------------------------------------------------------------
// INormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Tasks: MHASH-8200-003 to MHASH-8200-007
// Description: Normalizer interfaces for merge hash components
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes PURL identifiers to canonical form for deterministic hashing.
/// </summary>
public interface IPurlNormalizer
{
/// <summary>
/// Normalize PURL to canonical form.
/// - Lowercase package type
/// - URL-encode special characters in namespace
/// - Strip non-essential qualifiers (arch, type, checksum)
/// - Sort remaining qualifiers alphabetically
/// </summary>
string Normalize(string purl);
}
/// <summary>
/// Normalizes CPE identifiers to canonical CPE 2.3 format.
/// </summary>
public interface ICpeNormalizer
{
/// <summary>
/// Normalize CPE to canonical CPE 2.3 format.
/// - Convert CPE 2.2 URI format to CPE 2.3 formatted string
/// - Lowercase vendor and product
/// - Normalize wildcards
/// </summary>
string Normalize(string cpe);
}
/// <summary>
/// Normalizes version range expressions to canonical interval notation.
/// </summary>
public interface IVersionRangeNormalizer
{
/// <summary>
/// Normalize version range to canonical expression.
/// - Convert various formats to canonical interval notation
/// - Trim whitespace
/// - Normalize operators (e.g., "[1.0, 2.0)" → ">=1.0,&lt;2.0")
/// </summary>
string Normalize(string? range);
}
/// <summary>
/// Normalizes CWE identifier lists for deterministic hashing.
/// </summary>
public interface ICweNormalizer
{
/// <summary>
/// Normalize CWE list to sorted, deduplicated, uppercase set.
/// - Uppercase all identifiers
/// - Ensure "CWE-" prefix
/// - Sort numerically by CWE number
/// - Deduplicate
/// - Return comma-joined string
/// </summary>
string Normalize(IEnumerable<string>? cwes);
}
/// <summary>
/// Normalizes patch lineage references for deterministic hashing.
/// </summary>
public interface IPatchLineageNormalizer
{
/// <summary>
/// Normalize patch lineage to canonical commit reference.
/// - Extract commit SHAs from various formats
/// - Normalize to lowercase hex
/// - Handle patch IDs, bug tracker references
/// </summary>
string? Normalize(string? lineage);
}
/// <summary>
/// Normalizes CVE identifiers for deterministic hashing.
/// </summary>
public interface ICveNormalizer
{
/// <summary>
/// Normalize CVE identifier to canonical uppercase format.
/// - Ensure "CVE-" prefix
/// - Uppercase
/// - Validate format (CVE-YYYY-NNNNN+)
/// </summary>
string Normalize(string? cve);
}

View File

@@ -0,0 +1,119 @@
// -----------------------------------------------------------------------------
// PatchLineageNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-007
// Description: Patch lineage normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes patch lineage references for deterministic hashing.
/// Extracts upstream commit references from various formats.
/// </summary>
public sealed partial class PatchLineageNormalizer : IPatchLineageNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static PatchLineageNormalizer Instance { get; } = new();
/// <summary>
/// Pattern for full Git commit SHA (40 hex chars).
/// </summary>
[GeneratedRegex(@"\b([0-9a-f]{40})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex FullShaPattern();
/// <summary>
/// Pattern for abbreviated Git commit SHA (7-12 hex chars).
/// </summary>
[GeneratedRegex(@"\b([0-9a-f]{7,12})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex AbbrevShaPattern();
/// <summary>
/// Pattern for GitHub/GitLab commit URLs.
/// </summary>
[GeneratedRegex(
@"(?:github\.com|gitlab\.com)/[^/]+/[^/]+/commit/([0-9a-f]{7,40})",
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CommitUrlPattern();
/// <summary>
/// Pattern for patch IDs in format "patch-NNNNN" or "PATCH-NNNNN".
/// </summary>
[GeneratedRegex(@"\b(PATCH-\d+)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex PatchIdPattern();
/// <inheritdoc />
public string? Normalize(string? lineage)
{
if (string.IsNullOrWhiteSpace(lineage))
{
return null;
}
var trimmed = lineage.Trim();
// Try to extract commit SHA from URL first
var urlMatch = CommitUrlPattern().Match(trimmed);
if (urlMatch.Success)
{
return NormalizeSha(urlMatch.Groups[1].Value);
}
// Try full SHA
var fullMatch = FullShaPattern().Match(trimmed);
if (fullMatch.Success)
{
return NormalizeSha(fullMatch.Groups[1].Value);
}
// Try abbreviated SHA (only if it looks like a commit reference)
if (LooksLikeCommitReference(trimmed))
{
var abbrevMatch = AbbrevShaPattern().Match(trimmed);
if (abbrevMatch.Success)
{
return NormalizeSha(abbrevMatch.Groups[1].Value);
}
}
// Try patch ID
var patchMatch = PatchIdPattern().Match(trimmed);
if (patchMatch.Success)
{
return patchMatch.Groups[1].Value.ToUpperInvariant();
}
// Return null if no recognizable pattern
return null;
}
private static bool LooksLikeCommitReference(string value)
{
// Heuristic: if it contains "commit", "sha", "fix", "patch" it's likely a commit ref
var lower = value.ToLowerInvariant();
return lower.Contains("commit") ||
lower.Contains("sha") ||
lower.Contains("fix") ||
lower.Contains("patch") ||
lower.Contains("backport");
}
private static string NormalizeSha(string sha)
{
// Lowercase and ensure we have the full SHA or a consistent abbreviation
var normalized = sha.ToLowerInvariant();
// If it's a full SHA, return it
if (normalized.Length == 40)
{
return normalized;
}
// For abbreviated SHAs, return as-is (they'll still hash consistently)
return normalized;
}
}

View File

@@ -0,0 +1,178 @@
// -----------------------------------------------------------------------------
// PurlNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-003
// Description: PURL normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes PURL identifiers to canonical form for deterministic hashing.
/// </summary>
public sealed partial class PurlNormalizer : IPurlNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static PurlNormalizer Instance { get; } = new();
/// <summary>
/// Qualifiers to strip from PURL for identity hashing (architecture-specific, non-identity).
/// </summary>
private static readonly HashSet<string> StrippedQualifiers = new(StringComparer.OrdinalIgnoreCase)
{
"arch",
"architecture",
"os",
"platform",
"type",
"classifier",
"checksum",
"download_url",
"vcs_url",
"repository_url"
};
/// <summary>
/// Pattern for parsing PURL: pkg:type/namespace/name@version?qualifiers#subpath
/// </summary>
[GeneratedRegex(
@"^pkg:([a-zA-Z][a-zA-Z0-9+.-]*)(?:/([^/@#?]+))?/([^/@#?]+)(?:@([^?#]+))?(?:\?([^#]+))?(?:#(.+))?$",
RegexOptions.Compiled)]
private static partial Regex PurlPattern();
/// <inheritdoc />
public string Normalize(string purl)
{
if (string.IsNullOrWhiteSpace(purl))
{
return string.Empty;
}
var trimmed = purl.Trim();
// Handle non-PURL identifiers (CPE, plain package names)
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
// If it looks like a CPE, return as-is for CPE normalizer
if (trimmed.StartsWith("cpe:", StringComparison.OrdinalIgnoreCase))
{
return trimmed;
}
// Return lowercase for plain identifiers
return trimmed.ToLowerInvariant();
}
var match = PurlPattern().Match(trimmed);
if (!match.Success)
{
// Invalid PURL format, return lowercase
return trimmed.ToLowerInvariant();
}
var type = match.Groups[1].Value.ToLowerInvariant();
var ns = match.Groups[2].Success ? NormalizeNamespace(match.Groups[2].Value, type) : null;
var name = NormalizeName(match.Groups[3].Value, type);
var version = match.Groups[4].Success ? match.Groups[4].Value : null;
var qualifiers = match.Groups[5].Success ? NormalizeQualifiers(match.Groups[5].Value) : null;
// Subpath is stripped for identity purposes
return BuildPurl(type, ns, name, version, qualifiers);
}
private static string NormalizeNamespace(string ns, string type)
{
// URL-decode then re-encode consistently
var decoded = HttpUtility.UrlDecode(ns);
// For npm, handle scoped packages (@org/pkg)
if (type == "npm" && decoded.StartsWith("@"))
{
decoded = decoded.ToLowerInvariant();
return HttpUtility.UrlEncode(decoded)?.Replace("%40", "%40") ?? decoded;
}
// Most ecosystems: lowercase namespace
return decoded.ToLowerInvariant();
}
private static string NormalizeName(string name, string type)
{
var decoded = HttpUtility.UrlDecode(name);
// Most ecosystems use lowercase names
return type switch
{
"golang" => decoded, // Go uses mixed case
"nuget" => decoded.ToLowerInvariant(), // NuGet is case-insensitive
_ => decoded.ToLowerInvariant()
};
}
private static string? NormalizeQualifiers(string qualifiers)
{
if (string.IsNullOrWhiteSpace(qualifiers))
{
return null;
}
var pairs = qualifiers
.Split('&', StringSplitOptions.RemoveEmptyEntries)
.Select(static pair =>
{
var eqIndex = pair.IndexOf('=');
if (eqIndex < 0)
{
return (Key: pair.ToLowerInvariant(), Value: (string?)null);
}
return (Key: pair[..eqIndex].ToLowerInvariant(), Value: pair[(eqIndex + 1)..]);
})
.Where(pair => !StrippedQualifiers.Contains(pair.Key))
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToList();
if (pairs.Count == 0)
{
return null;
}
return string.Join("&", pairs.Select(static p =>
p.Value is null ? p.Key : $"{p.Key}={p.Value}"));
}
private static string BuildPurl(string type, string? ns, string name, string? version, string? qualifiers)
{
var sb = new StringBuilder("pkg:");
sb.Append(type);
sb.Append('/');
if (!string.IsNullOrEmpty(ns))
{
sb.Append(ns);
sb.Append('/');
}
sb.Append(name);
if (!string.IsNullOrEmpty(version))
{
sb.Append('@');
sb.Append(version);
}
if (!string.IsNullOrEmpty(qualifiers))
{
sb.Append('?');
sb.Append(qualifiers);
}
return sb.ToString();
}
}

View File

@@ -0,0 +1,165 @@
// -----------------------------------------------------------------------------
// VersionRangeNormalizer.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-005
// Description: Version range normalization for merge hash
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Merge.Identity.Normalizers;
/// <summary>
/// Normalizes version range expressions to canonical interval notation.
/// </summary>
public sealed partial class VersionRangeNormalizer : IVersionRangeNormalizer
{
/// <summary>
/// Singleton instance.
/// </summary>
public static VersionRangeNormalizer Instance { get; } = new();
/// <summary>
/// Pattern for mathematical interval notation: [1.0, 2.0) or (1.0, 2.0]
/// </summary>
[GeneratedRegex(
@"^([\[\(])\s*([^,\s]*)\s*,\s*([^)\]\s]*)\s*([\]\)])$",
RegexOptions.Compiled)]
private static partial Regex IntervalPattern();
/// <summary>
/// Pattern for comparison operators: >= 1.0, < 2.0
/// </summary>
[GeneratedRegex(
@"^(>=?|<=?|=|!=|~=|~>|\^)\s*(.+)$",
RegexOptions.Compiled)]
private static partial Regex ComparisonPattern();
/// <inheritdoc />
public string Normalize(string? range)
{
if (string.IsNullOrWhiteSpace(range))
{
return string.Empty;
}
var trimmed = range.Trim();
// Handle "all versions" markers
if (trimmed is "*" or "all" or "any")
{
return "*";
}
// Try interval notation: [1.0, 2.0)
var intervalMatch = IntervalPattern().Match(trimmed);
if (intervalMatch.Success)
{
return NormalizeInterval(intervalMatch);
}
// Try comparison operators: >= 1.0
var compMatch = ComparisonPattern().Match(trimmed);
if (compMatch.Success)
{
return NormalizeComparison(compMatch);
}
// Handle comma-separated constraints: >=1.0, <2.0
if (trimmed.Contains(','))
{
return NormalizeMultiConstraint(trimmed);
}
// Handle "fixed" version notation
if (trimmed.StartsWith("fixed:", StringComparison.OrdinalIgnoreCase))
{
var fixedVersion = trimmed[6..].Trim();
return $">={fixedVersion}";
}
// Handle plain version (treat as exact match)
if (Regex.IsMatch(trimmed, @"^[\d.]+"))
{
return $"={trimmed}";
}
// Return trimmed if unrecognized
return trimmed;
}
private static string NormalizeInterval(Match match)
{
var leftBracket = match.Groups[1].Value;
var lower = match.Groups[2].Value.Trim();
var upper = match.Groups[3].Value.Trim();
var rightBracket = match.Groups[4].Value;
var parts = new List<string>();
if (!string.IsNullOrEmpty(lower))
{
var op = leftBracket == "[" ? ">=" : ">";
parts.Add($"{op}{lower}");
}
if (!string.IsNullOrEmpty(upper))
{
var op = rightBracket == "]" ? "<=" : "<";
parts.Add($"{op}{upper}");
}
return string.Join(",", parts);
}
private static string NormalizeComparison(Match match)
{
var op = NormalizeOperator(match.Groups[1].Value);
var version = match.Groups[2].Value.Trim();
return $"{op}{version}";
}
private static string NormalizeMultiConstraint(string range)
{
var constraints = range
.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(static c => c.Trim())
.Where(static c => !string.IsNullOrEmpty(c))
.Select(NormalizeSingleConstraint)
.OrderBy(static c => c, StringComparer.Ordinal)
.Distinct()
.ToList();
return string.Join(",", constraints);
}
private static string NormalizeSingleConstraint(string constraint)
{
var match = ComparisonPattern().Match(constraint);
if (match.Success)
{
var op = NormalizeOperator(match.Groups[1].Value);
var version = match.Groups[2].Value.Trim();
return $"{op}{version}";
}
return constraint;
}
private static string NormalizeOperator(string op)
{
return op switch
{
"~=" or "~>" => "~=",
"^" => "^",
">=" => ">=",
">" => ">",
"<=" => "<=",
"<" => "<",
"=" => "=",
"!=" => "!=",
_ => op
};
}
}

View File

@@ -0,0 +1,68 @@
// -----------------------------------------------------------------------------
// MergeHashBackfillJob.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-020
// Description: Job to backfill merge hashes for existing advisories
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Merge.Identity;
namespace StellaOps.Concelier.Merge.Jobs;
/// <summary>
/// Job to backfill merge hashes for existing advisories during migration.
/// Can target all advisories or a specific advisory key.
/// </summary>
public sealed class MergeHashBackfillJob : IJob
{
private readonly MergeHashShadowWriteService _shadowWriteService;
private readonly ILogger<MergeHashBackfillJob> _logger;
public MergeHashBackfillJob(
MergeHashShadowWriteService shadowWriteService,
ILogger<MergeHashBackfillJob> logger)
{
_shadowWriteService = shadowWriteService ?? throw new ArgumentNullException(nameof(shadowWriteService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Executes the backfill job.
/// </summary>
/// <remarks>
/// Parameters:
/// - "seed" (optional): Specific advisory key to backfill. If empty, backfills all.
/// - "force" (optional): If "true", recomputes hash even for advisories that have one.
/// </remarks>
public async Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
{
var hasSeed = context.Parameters.TryGetValue("seed", out var seedValue);
var seed = seedValue as string;
var force = context.Parameters.TryGetValue("force", out var forceValue)
&& forceValue is string forceStr
&& string.Equals(forceStr, "true", StringComparison.OrdinalIgnoreCase);
if (hasSeed && !string.IsNullOrWhiteSpace(seed))
{
_logger.LogInformation("Starting merge hash backfill for single advisory: {AdvisoryKey}, force={Force}", seed, force);
var updated = await _shadowWriteService.BackfillOneAsync(seed, force, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Merge hash backfill for {AdvisoryKey} complete: updated={Updated}",
seed,
updated);
}
else
{
_logger.LogInformation("Starting merge hash backfill for all advisories");
var result = await _shadowWriteService.BackfillAllAsync(cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Merge hash backfill complete: processed={Processed}, updated={Updated}, skipped={Skipped}, failed={Failed}",
result.Processed,
result.Updated,
result.Skipped,
result.Failed);
}
}
}

View File

@@ -3,4 +3,5 @@ namespace StellaOps.Concelier.Merge.Jobs;
internal static class MergeJobKinds
{
public const string Reconcile = "merge:reconcile";
public const string HashBackfill = "merge:hash-backfill";
}

View File

@@ -8,6 +8,7 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Core;
using StellaOps.Concelier.Core.Events;
using StellaOps.Concelier.Merge.Identity;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Advisories;
using StellaOps.Concelier.Storage.Aliases;
@@ -41,6 +42,7 @@ public sealed class AdvisoryMergeService
private readonly IAdvisoryEventLog _eventLog;
private readonly TimeProvider _timeProvider;
private readonly CanonicalMerger _canonicalMerger;
private readonly IMergeHashCalculator? _mergeHashCalculator;
private readonly ILogger<AdvisoryMergeService> _logger;
public AdvisoryMergeService(
@@ -51,7 +53,8 @@ public sealed class AdvisoryMergeService
CanonicalMerger canonicalMerger,
IAdvisoryEventLog eventLog,
TimeProvider timeProvider,
ILogger<AdvisoryMergeService> logger)
ILogger<AdvisoryMergeService> logger,
IMergeHashCalculator? mergeHashCalculator = null)
{
_aliasResolver = aliasResolver ?? throw new ArgumentNullException(nameof(aliasResolver));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
@@ -61,6 +64,7 @@ public sealed class AdvisoryMergeService
_eventLog = eventLog ?? throw new ArgumentNullException(nameof(eventLog));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_mergeHashCalculator = mergeHashCalculator; // Optional during migration
}
public async Task<AdvisoryMergeResult> MergeAsync(string seedAdvisoryKey, CancellationToken cancellationToken)
@@ -102,7 +106,7 @@ public sealed class AdvisoryMergeService
throw;
}
var merged = precedenceResult.Advisory;
var merged = EnrichWithMergeHash(precedenceResult.Advisory);
var conflictDetails = precedenceResult.Conflicts;
if (component.Collisions.Count > 0)
@@ -309,7 +313,48 @@ public sealed class AdvisoryMergeService
source.Provenance,
source.Description,
source.Cwes,
source.CanonicalMetricId);
source.CanonicalMetricId,
source.MergeHash);
/// <summary>
/// Enriches an advisory with its computed merge hash if calculator is available.
/// </summary>
private Advisory EnrichWithMergeHash(Advisory advisory)
{
if (_mergeHashCalculator is null)
{
return advisory;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
return new Advisory(
advisory.AdvisoryKey,
advisory.Title,
advisory.Summary,
advisory.Language,
advisory.Published,
advisory.Modified,
advisory.Severity,
advisory.ExploitKnown,
advisory.Aliases,
advisory.Credits,
advisory.References,
advisory.AffectedPackages,
advisory.CvssMetrics,
advisory.Provenance,
advisory.Description,
advisory.Cwes,
advisory.CanonicalMetricId,
mergeHash);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to compute merge hash for {AdvisoryKey}, continuing without hash", advisory.AdvisoryKey);
return advisory;
}
}
private CanonicalMergeResult? ApplyCanonicalMergeIfNeeded(string canonicalKey, List<Advisory> inputs)
{

View File

@@ -0,0 +1,172 @@
// -----------------------------------------------------------------------------
// MergeHashBackfillService.cs
// Sprint: SPRINT_8200_0012_0001_CONCEL_merge_hash_library
// Task: MHASH-8200-020
// Description: Shadow-write mode for computing merge_hash on existing advisories
// -----------------------------------------------------------------------------
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Merge.Identity;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Advisories;
namespace StellaOps.Concelier.Merge.Services;
/// <summary>
/// Service for backfilling merge hashes on existing advisories without changing their identity.
/// Runs in shadow-write mode: computes merge_hash and updates only that field.
/// </summary>
public sealed class MergeHashBackfillService
{
private readonly IAdvisoryStore _advisoryStore;
private readonly IMergeHashCalculator _mergeHashCalculator;
private readonly ILogger<MergeHashBackfillService> _logger;
public MergeHashBackfillService(
IAdvisoryStore advisoryStore,
IMergeHashCalculator mergeHashCalculator,
ILogger<MergeHashBackfillService> logger)
{
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_mergeHashCalculator = mergeHashCalculator ?? throw new ArgumentNullException(nameof(mergeHashCalculator));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Backfills merge hashes for all advisories that don't have one.
/// </summary>
/// <param name="batchSize">Number of advisories to process before yielding progress.</param>
/// <param name="dryRun">If true, computes hashes but doesn't persist them.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Backfill result with statistics.</returns>
public async Task<MergeHashBackfillResult> BackfillAsync(
int batchSize = 100,
bool dryRun = false,
CancellationToken cancellationToken = default)
{
var stopwatch = Stopwatch.StartNew();
var processed = 0;
var updated = 0;
var skipped = 0;
var errors = 0;
_logger.LogInformation(
"Starting merge hash backfill (dryRun={DryRun}, batchSize={BatchSize})",
dryRun, batchSize);
await foreach (var advisory in _advisoryStore.StreamAsync(cancellationToken))
{
cancellationToken.ThrowIfCancellationRequested();
processed++;
// Skip if already has merge hash
if (!string.IsNullOrEmpty(advisory.MergeHash))
{
skipped++;
continue;
}
try
{
var mergeHash = _mergeHashCalculator.ComputeMergeHash(advisory);
if (!dryRun)
{
var enrichedAdvisory = CreateAdvisoryWithMergeHash(advisory, mergeHash);
await _advisoryStore.UpsertAsync(enrichedAdvisory, cancellationToken).ConfigureAwait(false);
}
updated++;
if (updated % batchSize == 0)
{
_logger.LogInformation(
"Backfill progress: {Updated} updated, {Skipped} skipped, {Errors} errors (of {Processed} processed)",
updated, skipped, errors, processed);
}
}
catch (Exception ex)
{
errors++;
_logger.LogWarning(
ex,
"Failed to compute/update merge hash for {AdvisoryKey}",
advisory.AdvisoryKey);
}
}
stopwatch.Stop();
var result = new MergeHashBackfillResult(
TotalProcessed: processed,
Updated: updated,
Skipped: skipped,
Errors: errors,
DryRun: dryRun,
Duration: stopwatch.Elapsed);
_logger.LogInformation(
"Merge hash backfill completed: {Updated} updated, {Skipped} skipped, {Errors} errors (of {Processed} processed) in {Duration}",
result.Updated, result.Skipped, result.Errors, result.TotalProcessed, result.Duration);
return result;
}
/// <summary>
/// Computes merge hash for a single advisory without persisting.
/// Useful for testing or preview mode.
/// </summary>
public string ComputeMergeHash(Advisory advisory)
{
ArgumentNullException.ThrowIfNull(advisory);
return _mergeHashCalculator.ComputeMergeHash(advisory);
}
private static Advisory CreateAdvisoryWithMergeHash(Advisory source, string mergeHash)
=> new(
source.AdvisoryKey,
source.Title,
source.Summary,
source.Language,
source.Published,
source.Modified,
source.Severity,
source.ExploitKnown,
source.Aliases,
source.Credits,
source.References,
source.AffectedPackages,
source.CvssMetrics,
source.Provenance,
source.Description,
source.Cwes,
source.CanonicalMetricId,
mergeHash);
}
/// <summary>
/// Result of a merge hash backfill operation.
/// </summary>
public sealed record MergeHashBackfillResult(
int TotalProcessed,
int Updated,
int Skipped,
int Errors,
bool DryRun,
TimeSpan Duration)
{
/// <summary>
/// Percentage of advisories that were successfully updated.
/// </summary>
public double SuccessRate => TotalProcessed > 0
? (double)(Updated + Skipped) / TotalProcessed * 100
: 100;
/// <summary>
/// Average time per advisory in milliseconds.
/// </summary>
public double AvgTimePerAdvisoryMs => TotalProcessed > 0
? Duration.TotalMilliseconds / TotalProcessed
: 0;
}

View File

@@ -26,7 +26,8 @@ public sealed record Advisory
provenance: Array.Empty<AdvisoryProvenance>(),
description: null,
cwes: Array.Empty<AdvisoryWeakness>(),
canonicalMetricId: null);
canonicalMetricId: null,
mergeHash: null);
public Advisory(
string advisoryKey,
@@ -44,7 +45,8 @@ public sealed record Advisory
IEnumerable<AdvisoryProvenance>? provenance,
string? description = null,
IEnumerable<AdvisoryWeakness>? cwes = null,
string? canonicalMetricId = null)
string? canonicalMetricId = null,
string? mergeHash = null)
: this(
advisoryKey,
title,
@@ -62,7 +64,8 @@ public sealed record Advisory
provenance,
description,
cwes,
canonicalMetricId)
canonicalMetricId,
mergeHash)
{
}
@@ -83,7 +86,8 @@ public sealed record Advisory
IEnumerable<AdvisoryProvenance>? provenance,
string? description = null,
IEnumerable<AdvisoryWeakness>? cwes = null,
string? canonicalMetricId = null)
string? canonicalMetricId = null,
string? mergeHash = null)
{
AdvisoryKey = Validation.EnsureNotNullOrWhiteSpace(advisoryKey, nameof(advisoryKey));
Title = Validation.EnsureNotNullOrWhiteSpace(title, nameof(title));
@@ -145,6 +149,8 @@ public sealed record Advisory
.ThenBy(static p => p.Kind, StringComparer.Ordinal)
.ThenBy(static p => p.RecordedAt)
.ToImmutableArray();
MergeHash = Validation.TrimToNull(mergeHash);
}
[JsonConstructor]
@@ -165,7 +171,8 @@ public sealed record Advisory
ImmutableArray<AdvisoryProvenance> provenance,
string? description,
ImmutableArray<AdvisoryWeakness> cwes,
string? canonicalMetricId)
string? canonicalMetricId,
string? mergeHash = null)
: this(
advisoryKey,
title,
@@ -183,7 +190,8 @@ public sealed record Advisory
provenance.IsDefault ? null : provenance.AsEnumerable(),
description,
cwes.IsDefault ? null : cwes.AsEnumerable(),
canonicalMetricId)
canonicalMetricId,
mergeHash)
{
}
@@ -220,4 +228,10 @@ public sealed record Advisory
public string? CanonicalMetricId { get; }
public ImmutableArray<AdvisoryProvenance> Provenance { get; }
/// <summary>
/// Semantic merge hash for provenance-scoped deduplication.
/// Nullable during migration; computed from (CVE + PURL + version-range + CWE + patch-lineage).
/// </summary>
public string? MergeHash { get; }
}

View File

@@ -8,21 +8,22 @@
| Field | Type | Required | Notes |
|-------|------|----------|-------|
| `advisoryKey` | string | yes | Globally unique identifier selected by the merge layer (often a CVE/GHSA/vendor key). Stored lowercased unless vendor casing is significant. |
| `title` | string | yes | Human readable title. Must be non-empty and trimmed. |
| `summary` | string? | optional | Short description; trimmed to `null` when empty. |
| `language` | string? | optional | ISO language code (lowercase). |
| `published` | DateTimeOffset? | optional | UTC timestamp when vendor originally published. |
| `modified` | DateTimeOffset? | optional | UTC timestamp when vendor last updated. |
| `severity` | string? | optional | Normalized severity label (`critical`, `high`, etc.). |
| `exploitKnown` | bool | yes | Whether KEV/other sources confirm active exploitation. |
| `aliases` | string[] | yes | Sorted, de-duplicated list of normalized aliases (see [Alias Schemes](#alias-schemes)). |
| `credits` | AdvisoryCredit[] | yes | Deterministically ordered acknowledgements (role + contact metadata). |
| `references` | AdvisoryReference[] | yes | Deterministically ordered reference set. |
| `affectedPackages` | AffectedPackage[] | yes | Deterministically ordered affected packages. |
| `cvssMetrics` | CvssMetric[] | yes | Deterministically ordered CVSS metrics (v3, v4 first). |
| `provenance` | AdvisoryProvenance[] | yes | Normalized provenance entries sorted by source then kind then recorded timestamp. |
| `advisoryKey` | string | yes | Globally unique identifier selected by the merge layer (often a CVE/GHSA/vendor key). Stored lowercased unless vendor casing is significant. |
| `title` | string | yes | Human readable title. Must be non-empty and trimmed. |
| `summary` | string? | optional | Short description; trimmed to `null` when empty. |
| `language` | string? | optional | ISO language code (lowercase). |
| `published` | DateTimeOffset? | optional | UTC timestamp when vendor originally published. |
| `modified` | DateTimeOffset? | optional | UTC timestamp when vendor last updated. |
| `severity` | string? | optional | Normalized severity label (`critical`, `high`, etc.). |
| `exploitKnown` | bool | yes | Whether KEV/other sources confirm active exploitation. |
| `aliases` | string[] | yes | Sorted, de-duplicated list of normalized aliases (see [Alias Schemes](#alias-schemes)). |
| `credits` | AdvisoryCredit[] | yes | Deterministically ordered acknowledgements (role + contact metadata). |
| `references` | AdvisoryReference[] | yes | Deterministically ordered reference set. |
| `affectedPackages` | AffectedPackage[] | yes | Deterministically ordered affected packages. |
| `cvssMetrics` | CvssMetric[] | yes | Deterministically ordered CVSS metrics (v3, v4 first). |
| `provenance` | AdvisoryProvenance[] | yes | Normalized provenance entries sorted by source then kind then recorded timestamp. |
| `mergeHash` | string? | optional | Semantic identity hash for deduplication (see [Merge Hash](#merge-hash)). |
### Invariants
- Collections are immutable (`ImmutableArray<T>`) and always sorted deterministically.
- `AdvisoryKey` and `Title` are mandatory and trimmed.
@@ -36,27 +37,27 @@
| `url` | string | yes | Absolute HTTP/HTTPS URL. |
| `kind` | string? | optional | Categorized reference role (e.g. `advisory`, `patch`, `changelog`). |
| `sourceTag` | string? | optional | Free-form tag identifying originating source. |
| `summary` | string? | optional | Short description. |
| `provenance` | AdvisoryProvenance | yes | Provenance entry describing how the reference was mapped. |
Deterministic ordering: by `url`, then `kind`, then `sourceTag`, then `provenance.RecordedAt`.
## AdvisoryCredit
| Field | Type | Required | Notes |
|-------|------|----------|-------|
| `displayName` | string | yes | Human-readable acknowledgement (reporter, maintainer, analyst, etc.). |
| `role` | string? | optional | Normalized role token (lowercase with `_` separators). |
| `contacts` | string[] | yes | Sorted set of vendor-supplied handles or URLs; may be empty. |
| `provenance` | AdvisoryProvenance | yes | Provenance entry describing how the credit was captured. |
Deterministic ordering: by `role` (nulls first) then `displayName`.
| `summary` | string? | optional | Short description. |
| `provenance` | AdvisoryProvenance | yes | Provenance entry describing how the reference was mapped. |
Deterministic ordering: by `url`, then `kind`, then `sourceTag`, then `provenance.RecordedAt`.
## AdvisoryCredit
| Field | Type | Required | Notes |
|-------|------|----------|-------|
| `displayName` | string | yes | Human-readable acknowledgement (reporter, maintainer, analyst, etc.). |
| `role` | string? | optional | Normalized role token (lowercase with `_` separators). |
| `contacts` | string[] | yes | Sorted set of vendor-supplied handles or URLs; may be empty. |
| `provenance` | AdvisoryProvenance | yes | Provenance entry describing how the credit was captured. |
Deterministic ordering: by `role` (nulls first) then `displayName`.
## AffectedPackage
| Field | Type | Required | Notes |
|-------|------|----------|-------|
| `type` | string | yes | Semantic type (`semver`, `rpm`, `deb`, `apk`, `purl`, `cpe`, etc.). Lowercase. |
| `type` | string | yes | Semantic type (`semver`, `rpm`, `deb`, `apk`, `purl`, `cpe`, etc.). Lowercase. |
| `identifier` | string | yes | Canonical identifier (package name, PURL, CPE, NEVRA, etc.). |
| `platform` | string? | optional | Explicit platform / distro (e.g. `ubuntu`, `rhel-8`). |
| `versionRanges` | AffectedVersionRange[] | yes | Deduplicated + sorted by introduced/fixed/last/expr/kind. |
@@ -69,7 +70,7 @@ Deterministic ordering: packages sorted by `type`, then `identifier`, then `plat
| Field | Type | Required | Notes |
|-------|------|----------|-------|
| `rangeKind` | string | yes | Classification of range semantics (`semver`, `evr`, `nevra`, `apk`, `version`, `purl`). Lowercase. |
| `rangeKind` | string | yes | Classification of range semantics (`semver`, `evr`, `nevra`, `apk`, `version`, `purl`). Lowercase. |
| `introducedVersion` | string? | optional | Inclusive lower bound when impact begins. |
| `fixedVersion` | string? | optional | Exclusive bounding version containing the fix. |
| `lastAffectedVersion` | string? | optional | Inclusive upper bound when no fix exists. |
@@ -95,18 +96,18 @@ Sorted by version then vector for determinism.
| Field | Type | Required | Notes |
|-------|------|----------|-------|
| `source` | string | yes | Logical source identifier (`nvd`, `redhat`, `osv`, etc.). |
| `kind` | string | yes | Operation performed (`fetch`, `parse`, `map`, `merge`, `enrich`). |
| `value` | string? | optional | Free-form pipeline detail (parser identifier, rule set, resume cursor). |
| `recordedAt` | DateTimeOffset | yes | UTC timestamp when provenance was captured. |
| `fieldMask` | string[] | optional | Canonical field coverage expressed as lowercase masks (e.g. `affectedpackages[]`, `affectedpackages[].versionranges[]`). |
| `source` | string | yes | Logical source identifier (`nvd`, `redhat`, `osv`, etc.). |
| `kind` | string | yes | Operation performed (`fetch`, `parse`, `map`, `merge`, `enrich`). |
| `value` | string? | optional | Free-form pipeline detail (parser identifier, rule set, resume cursor). |
| `recordedAt` | DateTimeOffset | yes | UTC timestamp when provenance was captured. |
| `fieldMask` | string[] | optional | Canonical field coverage expressed as lowercase masks (e.g. `affectedpackages[]`, `affectedpackages[].versionranges[]`). |
### Provenance Mask Expectations
Each canonical field is expected to carry at least one provenance entry derived from the
responsible pipeline stage. Populate `fieldMask` with the lowercase canonical mask(s) describing the
covered field(s); downstream metrics and resume helpers rely on this signal to reason about
coverage. When aggregating provenance from subcomponents (e.g., affected package ranges), merge code
should ensure:
Each canonical field is expected to carry at least one provenance entry derived from the
responsible pipeline stage. Populate `fieldMask` with the lowercase canonical mask(s) describing the
covered field(s); downstream metrics and resume helpers rely on this signal to reason about
coverage. When aggregating provenance from subcomponents (e.g., affected package ranges), merge code
should ensure:
- Advisory level provenance documents the source document and merge actions.
- References, packages, ranges, and metrics each include their own provenance entry reflecting
@@ -142,3 +143,112 @@ Supported alias scheme prefixes:
The registry exposed via `AliasSchemes` and `AliasSchemeRegistry` can be used to validate aliases and
drive downstream conditionals without re-implementing pattern rules.
## Merge Hash
The merge hash is a deterministic semantic identity hash that enables provenance-scoped deduplication.
Unlike content hashing (which changes when any field changes), merge hash is computed from identity
components only, allowing the same CVE from different sources (Debian, RHEL, NVD, etc.) to produce
identical hashes when semantically equivalent.
### Purpose
- **Deduplication**: Identify equivalent advisories across multiple sources
- **Stable Identity**: Hash remains constant despite variations in non-identity fields (title, description, CVSS scores)
- **Source Independence**: Same CVE affecting the same package produces the same hash regardless of source
### Hash Format
The merge hash is a hex-encoded SHA256 hash prefixed with `sha256:`:
```
sha256:a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2
```
Total length: 71 characters (`sha256:` prefix + 64 hex characters).
### Identity Components
The merge hash is computed from the following canonical string format:
```
CVE:{cve}|AFFECTS:{affects_key}|VERSION:{version_range}|CWE:{cwes}|LINEAGE:{patch_lineage}
```
| Component | Source | Notes |
|-----------|--------|-------|
| `cve` | Advisory key or CVE alias | Normalized to uppercase (e.g., `CVE-2024-1234`) |
| `affects_key` | First affected package identifier | PURL or CPE, normalized to canonical form |
| `version_range` | First affected package version ranges | Canonical interval notation, sorted |
| `cwes` | Advisory weaknesses | Uppercase, sorted numerically, comma-joined |
| `patch_lineage` | Patch references | Extracted commit SHA or PATCH-ID (optional) |
### Normalization Rules
#### CVE Normalization
- Uppercase: `cve-2024-1234``CVE-2024-1234`
- Numeric-only input prefixed: `2024-1234``CVE-2024-1234`
- Non-CVE advisories use advisory key as-is
#### PURL Normalization
- Type lowercase: `pkg:NPM/lodash``pkg:npm/lodash`
- Namespace/name lowercase: `pkg:npm/LODASH``pkg:npm/lodash`
- Strip non-identity qualifiers: `?arch=amd64`, `?checksum=...`, `?platform=linux`
- Preserve version: `@4.17.0` retained
#### CPE Normalization
- Convert CPE 2.2 to 2.3: `cpe:/a:vendor:product:1.0``cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*`
- Lowercase all components
- Normalize wildcards: `ANY``*`, `NA``-`
#### Version Range Normalization
- Interval to comparison: `[1.0.0, 2.0.0)``>=1.0.0,<2.0.0`
- Trim whitespace: `< 1.5.0``<1.5.0`
- Fixed notation: `fixed: 1.5.1``>=1.5.1`
- Multiple constraints sorted and comma-joined
#### CWE Normalization
- Uppercase: `cwe-79``CWE-79`
- Sort numerically: `CWE-89,CWE-79``CWE-79,CWE-89`
- Deduplicate
- Comma-joined output
#### Patch Lineage Normalization
- Extract 40-character SHA from GitHub/GitLab URLs
- Extract SHA from `commit {sha}` or `backport of {sha}` patterns
- Normalize PATCH-ID to uppercase: `patch-12345``PATCH-12345`
- Returns `null` for unrecognized formats (produces empty string in canonical form)
### Multi-Package Advisories
When an advisory affects multiple packages, the merge hash is computed from the first affected package.
Use `ComputeMergeHash(advisory, affectedPackage)` to compute per-package hashes for deduplication
at the package level.
### Implementation
The merge hash is computed by `MergeHashCalculator` in `StellaOps.Concelier.Merge.Identity`:
```csharp
var calculator = new MergeHashCalculator();
var hash = calculator.ComputeMergeHash(advisory);
// or for specific package:
var packageHash = calculator.ComputeMergeHash(advisory, affectedPackage);
```
### Migration
During migration, the `mergeHash` field is nullable. Use `MergeHashShadowWriteService` to backfill
hashes for existing advisories:
```csharp
var shadowWriter = new MergeHashShadowWriteService(advisoryStore, calculator, logger);
var result = await shadowWriter.BackfillAllAsync(cancellationToken);
// result.Updated: count of advisories updated with merge hashes
```

View File

@@ -0,0 +1,63 @@
-- Concelier Migration 008: Sync Ledger for Federation
-- Sprint: SPRINT_8200_0014_0001_DB_sync_ledger_schema
-- Task: SYNC-8200-002
-- Creates sync_ledger and site_policy tables for federation cursor tracking
-- Helper function for updated_at triggers
CREATE OR REPLACE FUNCTION vuln.update_timestamp()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Track federation sync state per remote site
CREATE TABLE IF NOT EXISTS vuln.sync_ledger (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
site_id TEXT NOT NULL, -- Remote site identifier (e.g., "site-us-west", "airgap-dc2")
cursor TEXT NOT NULL, -- Opaque cursor (usually ISO8601 timestamp#sequence)
bundle_hash TEXT NOT NULL, -- SHA256 of imported bundle
items_count INT NOT NULL DEFAULT 0, -- Number of items in bundle
signed_at TIMESTAMPTZ NOT NULL, -- When bundle was signed by remote
imported_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uq_sync_ledger_site_cursor UNIQUE (site_id, cursor),
CONSTRAINT uq_sync_ledger_bundle UNIQUE (bundle_hash)
);
CREATE INDEX IF NOT EXISTS idx_sync_ledger_site ON vuln.sync_ledger(site_id);
CREATE INDEX IF NOT EXISTS idx_sync_ledger_site_time ON vuln.sync_ledger(site_id, signed_at DESC);
COMMENT ON TABLE vuln.sync_ledger IS 'Federation sync cursor tracking per remote site';
COMMENT ON COLUMN vuln.sync_ledger.cursor IS 'Position marker for incremental sync (monotonically increasing)';
COMMENT ON COLUMN vuln.sync_ledger.site_id IS 'Remote site identifier for federation sync';
COMMENT ON COLUMN vuln.sync_ledger.bundle_hash IS 'SHA256 hash of imported bundle for deduplication';
-- Site federation policies
CREATE TABLE IF NOT EXISTS vuln.site_policy (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
site_id TEXT NOT NULL UNIQUE,
display_name TEXT,
allowed_sources TEXT[] NOT NULL DEFAULT '{}', -- Empty = allow all
denied_sources TEXT[] NOT NULL DEFAULT '{}',
max_bundle_size_mb INT NOT NULL DEFAULT 100,
max_items_per_bundle INT NOT NULL DEFAULT 10000,
require_signature BOOLEAN NOT NULL DEFAULT TRUE,
allowed_signers TEXT[] NOT NULL DEFAULT '{}', -- Key IDs or issuers
enabled BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_site_policy_enabled ON vuln.site_policy(enabled) WHERE enabled = TRUE;
COMMENT ON TABLE vuln.site_policy IS 'Per-site federation governance policies';
COMMENT ON COLUMN vuln.site_policy.allowed_sources IS 'Source keys to allow; empty array allows all sources';
COMMENT ON COLUMN vuln.site_policy.denied_sources IS 'Source keys to deny; takes precedence over allowed';
COMMENT ON COLUMN vuln.site_policy.allowed_signers IS 'Signing key IDs or issuer patterns allowed for bundle verification';
-- Trigger for automatic updated_at
CREATE TRIGGER trg_site_policy_updated
BEFORE UPDATE ON vuln.site_policy
FOR EACH ROW EXECUTE FUNCTION vuln.update_timestamp();

View File

@@ -0,0 +1,61 @@
-- Concelier Migration 009: Advisory Canonical Table
-- Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
-- Task: SCHEMA-8200-003
-- Creates deduplicated canonical advisories with merge_hash
-- Deduplicated canonical advisory records
CREATE TABLE IF NOT EXISTS vuln.advisory_canonical (
-- Identity
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Merge key components (used to compute merge_hash)
cve TEXT NOT NULL,
affects_key TEXT NOT NULL, -- normalized purl or cpe
version_range JSONB, -- structured: { introduced, fixed, last_affected }
weakness TEXT[] NOT NULL DEFAULT '{}', -- sorted CWE array
-- Computed identity
merge_hash TEXT NOT NULL, -- SHA256 of normalized (cve|affects|range|weakness|lineage)
-- Metadata
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'stub', 'withdrawn')),
severity TEXT CHECK (severity IN ('critical', 'high', 'medium', 'low', 'none', 'unknown')),
epss_score NUMERIC(5,4), -- EPSS probability (0.0000-1.0000)
exploit_known BOOLEAN NOT NULL DEFAULT FALSE,
-- Content (for stub degradation)
title TEXT,
summary TEXT,
-- Audit
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Constraints
CONSTRAINT uq_advisory_canonical_merge_hash UNIQUE (merge_hash)
);
-- Primary lookup indexes
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_cve ON vuln.advisory_canonical(cve);
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_affects ON vuln.advisory_canonical(affects_key);
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_merge_hash ON vuln.advisory_canonical(merge_hash);
-- Filtered indexes for common queries
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_status ON vuln.advisory_canonical(status) WHERE status = 'active';
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_severity ON vuln.advisory_canonical(severity) WHERE severity IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_exploit ON vuln.advisory_canonical(exploit_known) WHERE exploit_known = TRUE;
-- Time-based index for incremental queries
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_updated ON vuln.advisory_canonical(updated_at DESC);
-- Trigger for automatic updated_at
CREATE TRIGGER trg_advisory_canonical_updated
BEFORE UPDATE ON vuln.advisory_canonical
FOR EACH ROW EXECUTE FUNCTION vuln.update_timestamp();
-- Comments
COMMENT ON TABLE vuln.advisory_canonical IS 'Deduplicated canonical advisories with semantic merge_hash';
COMMENT ON COLUMN vuln.advisory_canonical.merge_hash IS 'Deterministic hash of (cve, affects_key, version_range, weakness, patch_lineage)';
COMMENT ON COLUMN vuln.advisory_canonical.affects_key IS 'Normalized PURL or CPE identifying the affected package';
COMMENT ON COLUMN vuln.advisory_canonical.status IS 'active=full record, stub=minimal for low interest, withdrawn=no longer valid';
COMMENT ON COLUMN vuln.advisory_canonical.epss_score IS 'EPSS exploit prediction probability (0.0000-1.0000)';

View File

@@ -0,0 +1,64 @@
-- Concelier Migration 010: Advisory Source Edge Table
-- Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
-- Task: SCHEMA-8200-004
-- Creates source edge linking canonical advisories to source documents
-- Source edge linking canonical advisory to source documents
CREATE TABLE IF NOT EXISTS vuln.advisory_source_edge (
-- Identity
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Relationships
canonical_id UUID NOT NULL REFERENCES vuln.advisory_canonical(id) ON DELETE CASCADE,
source_id UUID NOT NULL REFERENCES vuln.sources(id) ON DELETE RESTRICT,
-- Source document
source_advisory_id TEXT NOT NULL, -- vendor's advisory ID (DSA-5678, RHSA-2024:1234)
source_doc_hash TEXT NOT NULL, -- SHA256 of raw source document
-- VEX-style status
vendor_status TEXT CHECK (vendor_status IN (
'affected', 'not_affected', 'fixed', 'under_investigation'
)),
-- Precedence (lower = higher priority)
precedence_rank INT NOT NULL DEFAULT 100,
-- DSSE signature envelope
dsse_envelope JSONB, -- { payloadType, payload, signatures[] }
-- Content snapshot
raw_payload JSONB, -- original advisory document
-- Audit
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Constraints
CONSTRAINT uq_advisory_source_edge_unique
UNIQUE (canonical_id, source_id, source_doc_hash)
);
-- Primary lookup indexes
CREATE INDEX IF NOT EXISTS idx_source_edge_canonical ON vuln.advisory_source_edge(canonical_id);
CREATE INDEX IF NOT EXISTS idx_source_edge_source ON vuln.advisory_source_edge(source_id);
CREATE INDEX IF NOT EXISTS idx_source_edge_advisory_id ON vuln.advisory_source_edge(source_advisory_id);
-- Join optimization index
CREATE INDEX IF NOT EXISTS idx_source_edge_canonical_source ON vuln.advisory_source_edge(canonical_id, source_id);
-- Time-based index for incremental queries
CREATE INDEX IF NOT EXISTS idx_source_edge_fetched ON vuln.advisory_source_edge(fetched_at DESC);
-- GIN index for JSONB queries on dsse_envelope
CREATE INDEX IF NOT EXISTS idx_source_edge_dsse_gin ON vuln.advisory_source_edge
USING GIN (dsse_envelope jsonb_path_ops);
-- Comments
COMMENT ON TABLE vuln.advisory_source_edge IS 'Links canonical advisories to source documents with signatures';
COMMENT ON COLUMN vuln.advisory_source_edge.canonical_id IS 'Reference to deduplicated canonical advisory';
COMMENT ON COLUMN vuln.advisory_source_edge.source_id IS 'Reference to feed source';
COMMENT ON COLUMN vuln.advisory_source_edge.source_advisory_id IS 'Vendor advisory ID (e.g., DSA-5678, RHSA-2024:1234)';
COMMENT ON COLUMN vuln.advisory_source_edge.precedence_rank IS 'Source priority: vendor=10, distro=20, osv=30, nvd=40';
COMMENT ON COLUMN vuln.advisory_source_edge.dsse_envelope IS 'DSSE envelope with signature over raw_payload';
COMMENT ON COLUMN vuln.advisory_source_edge.vendor_status IS 'VEX-style status from source';

View File

@@ -0,0 +1,116 @@
-- Concelier Migration 011: Canonical Helper Functions
-- Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
-- Task: SCHEMA-8200-005
-- Creates helper functions for canonical advisory operations
-- Function to get canonical by merge_hash (most common lookup)
CREATE OR REPLACE FUNCTION vuln.get_canonical_by_hash(p_merge_hash TEXT)
RETURNS vuln.advisory_canonical
LANGUAGE sql STABLE
AS $$
SELECT * FROM vuln.advisory_canonical
WHERE merge_hash = p_merge_hash;
$$;
-- Function to get all source edges for a canonical
CREATE OR REPLACE FUNCTION vuln.get_source_edges(p_canonical_id UUID)
RETURNS SETOF vuln.advisory_source_edge
LANGUAGE sql STABLE
AS $$
SELECT * FROM vuln.advisory_source_edge
WHERE canonical_id = p_canonical_id
ORDER BY precedence_rank ASC, fetched_at DESC;
$$;
-- Function to upsert canonical with merge_hash dedup
CREATE OR REPLACE FUNCTION vuln.upsert_canonical(
p_cve TEXT,
p_affects_key TEXT,
p_version_range JSONB,
p_weakness TEXT[],
p_merge_hash TEXT,
p_severity TEXT DEFAULT NULL,
p_epss_score NUMERIC DEFAULT NULL,
p_exploit_known BOOLEAN DEFAULT FALSE,
p_title TEXT DEFAULT NULL,
p_summary TEXT DEFAULT NULL
)
RETURNS UUID
LANGUAGE plpgsql
AS $$
DECLARE
v_id UUID;
BEGIN
INSERT INTO vuln.advisory_canonical (
cve, affects_key, version_range, weakness, merge_hash,
severity, epss_score, exploit_known, title, summary
)
VALUES (
p_cve, p_affects_key, p_version_range, p_weakness, p_merge_hash,
p_severity, p_epss_score, p_exploit_known, p_title, p_summary
)
ON CONFLICT (merge_hash) DO UPDATE SET
severity = COALESCE(EXCLUDED.severity, vuln.advisory_canonical.severity),
epss_score = COALESCE(EXCLUDED.epss_score, vuln.advisory_canonical.epss_score),
exploit_known = EXCLUDED.exploit_known OR vuln.advisory_canonical.exploit_known,
title = COALESCE(EXCLUDED.title, vuln.advisory_canonical.title),
summary = COALESCE(EXCLUDED.summary, vuln.advisory_canonical.summary),
updated_at = NOW()
RETURNING id INTO v_id;
RETURN v_id;
END;
$$;
-- Function to add source edge with dedup
CREATE OR REPLACE FUNCTION vuln.add_source_edge(
p_canonical_id UUID,
p_source_id UUID,
p_source_advisory_id TEXT,
p_source_doc_hash TEXT,
p_vendor_status TEXT DEFAULT NULL,
p_precedence_rank INT DEFAULT 100,
p_dsse_envelope JSONB DEFAULT NULL,
p_raw_payload JSONB DEFAULT NULL,
p_fetched_at TIMESTAMPTZ DEFAULT NOW()
)
RETURNS UUID
LANGUAGE plpgsql
AS $$
DECLARE
v_id UUID;
BEGIN
INSERT INTO vuln.advisory_source_edge (
canonical_id, source_id, source_advisory_id, source_doc_hash,
vendor_status, precedence_rank, dsse_envelope, raw_payload, fetched_at
)
VALUES (
p_canonical_id, p_source_id, p_source_advisory_id, p_source_doc_hash,
p_vendor_status, p_precedence_rank, p_dsse_envelope, p_raw_payload, p_fetched_at
)
ON CONFLICT (canonical_id, source_id, source_doc_hash) DO UPDATE SET
vendor_status = COALESCE(EXCLUDED.vendor_status, vuln.advisory_source_edge.vendor_status),
precedence_rank = LEAST(EXCLUDED.precedence_rank, vuln.advisory_source_edge.precedence_rank),
dsse_envelope = COALESCE(EXCLUDED.dsse_envelope, vuln.advisory_source_edge.dsse_envelope),
raw_payload = COALESCE(EXCLUDED.raw_payload, vuln.advisory_source_edge.raw_payload)
RETURNING id INTO v_id;
RETURN v_id;
END;
$$;
-- Function to count active canonicals by CVE prefix
CREATE OR REPLACE FUNCTION vuln.count_canonicals_by_cve_year(p_year INT)
RETURNS BIGINT
LANGUAGE sql STABLE
AS $$
SELECT COUNT(*) FROM vuln.advisory_canonical
WHERE cve LIKE 'CVE-' || p_year::TEXT || '-%'
AND status = 'active';
$$;
-- Comments
COMMENT ON FUNCTION vuln.get_canonical_by_hash(TEXT) IS 'Lookup canonical advisory by merge_hash';
COMMENT ON FUNCTION vuln.get_source_edges(UUID) IS 'Get all source edges for a canonical, ordered by precedence';
COMMENT ON FUNCTION vuln.upsert_canonical IS 'Insert or update canonical advisory with merge_hash deduplication';
COMMENT ON FUNCTION vuln.add_source_edge IS 'Add source edge with deduplication by (canonical, source, doc_hash)';

View File

@@ -0,0 +1,144 @@
-- Concelier Migration 012: Populate advisory_canonical table
-- Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
-- Task: SCHEMA-8200-012
-- Populates advisory_canonical from existing advisories with placeholder merge_hash
-- NOTE: merge_hash will be backfilled by application-side MergeHashBackfillService
-- Populate advisory_canonical from existing advisories
-- Each advisory + affected package combination becomes a canonical record
INSERT INTO vuln.advisory_canonical (
id,
cve,
affects_key,
version_range,
weakness,
merge_hash,
status,
severity,
epss_score,
exploit_known,
title,
summary,
created_at,
updated_at
)
SELECT
gen_random_uuid() AS id,
COALESCE(
-- Try to get CVE from aliases
(SELECT alias_value FROM vuln.advisory_aliases
WHERE advisory_id = a.id AND alias_type = 'CVE'
ORDER BY is_primary DESC LIMIT 1),
-- Fall back to primary_vuln_id
a.primary_vuln_id
) AS cve,
COALESCE(
-- Prefer PURL if available
aa.purl,
-- Otherwise construct from ecosystem/package
CASE
WHEN aa.ecosystem IS NOT NULL AND aa.package_name IS NOT NULL
THEN 'pkg:' || lower(aa.ecosystem) || '/' || aa.package_name
ELSE 'unknown:' || a.id::text
END
) AS affects_key,
aa.version_range AS version_range,
-- Aggregate CWE IDs into sorted array
COALESCE(
(SELECT array_agg(DISTINCT upper(w.cwe_id) ORDER BY upper(w.cwe_id))
FROM vuln.advisory_weaknesses w
WHERE w.advisory_id = a.id),
'{}'::text[]
) AS weakness,
-- Placeholder merge_hash - will be backfilled by application
'PLACEHOLDER_' || a.id::text || '_' || COALESCE(aa.id::text, 'noaffects') AS merge_hash,
CASE
WHEN a.withdrawn_at IS NOT NULL THEN 'withdrawn'
ELSE 'active'
END AS status,
a.severity,
-- EPSS score if available from KEV
(SELECT CASE WHEN kf.known_ransomware_use THEN 0.95 ELSE NULL END
FROM vuln.kev_flags kf
WHERE kf.advisory_id = a.id
LIMIT 1) AS epss_score,
-- exploit_known from KEV flags
EXISTS(SELECT 1 FROM vuln.kev_flags kf WHERE kf.advisory_id = a.id) AS exploit_known,
a.title,
a.summary,
a.created_at,
NOW() AS updated_at
FROM vuln.advisories a
LEFT JOIN vuln.advisory_affected aa ON aa.advisory_id = a.id
WHERE NOT EXISTS (
-- Skip if already migrated (idempotent)
SELECT 1 FROM vuln.advisory_canonical c
WHERE c.merge_hash LIKE 'PLACEHOLDER_' || a.id::text || '%'
)
ON CONFLICT (merge_hash) DO NOTHING;
-- Handle advisories without affected packages
INSERT INTO vuln.advisory_canonical (
id,
cve,
affects_key,
version_range,
weakness,
merge_hash,
status,
severity,
exploit_known,
title,
summary,
created_at,
updated_at
)
SELECT
gen_random_uuid() AS id,
COALESCE(
(SELECT alias_value FROM vuln.advisory_aliases
WHERE advisory_id = a.id AND alias_type = 'CVE'
ORDER BY is_primary DESC LIMIT 1),
a.primary_vuln_id
) AS cve,
'unknown:' || a.primary_vuln_id AS affects_key,
NULL AS version_range,
COALESCE(
(SELECT array_agg(DISTINCT upper(w.cwe_id) ORDER BY upper(w.cwe_id))
FROM vuln.advisory_weaknesses w
WHERE w.advisory_id = a.id),
'{}'::text[]
) AS weakness,
'PLACEHOLDER_' || a.id::text || '_noaffects' AS merge_hash,
CASE
WHEN a.withdrawn_at IS NOT NULL THEN 'withdrawn'
ELSE 'active'
END AS status,
a.severity,
EXISTS(SELECT 1 FROM vuln.kev_flags kf WHERE kf.advisory_id = a.id) AS exploit_known,
a.title,
a.summary,
a.created_at,
NOW() AS updated_at
FROM vuln.advisories a
WHERE NOT EXISTS (
SELECT 1 FROM vuln.advisory_affected aa WHERE aa.advisory_id = a.id
)
AND NOT EXISTS (
SELECT 1 FROM vuln.advisory_canonical c
WHERE c.merge_hash LIKE 'PLACEHOLDER_' || a.id::text || '%'
)
ON CONFLICT (merge_hash) DO NOTHING;
-- Log migration progress
DO $$
DECLARE
canonical_count BIGINT;
placeholder_count BIGINT;
BEGIN
SELECT COUNT(*) INTO canonical_count FROM vuln.advisory_canonical;
SELECT COUNT(*) INTO placeholder_count FROM vuln.advisory_canonical WHERE merge_hash LIKE 'PLACEHOLDER_%';
RAISE NOTICE 'Migration 012 complete: % canonical records, % with placeholder hash (need backfill)',
canonical_count, placeholder_count;
END $$;

View File

@@ -0,0 +1,129 @@
-- Concelier Migration 013: Populate advisory_source_edge table
-- Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
-- Task: SCHEMA-8200-013
-- Creates source edges from existing advisory snapshots and provenance data
-- Create source edges from advisory snapshots
INSERT INTO vuln.advisory_source_edge (
id,
canonical_id,
source_id,
source_advisory_id,
source_doc_hash,
vendor_status,
precedence_rank,
dsse_envelope,
raw_payload,
fetched_at,
created_at
)
SELECT
gen_random_uuid() AS id,
c.id AS canonical_id,
a.source_id AS source_id,
a.advisory_key AS source_advisory_id,
snap.content_hash AS source_doc_hash,
CASE
WHEN a.withdrawn_at IS NOT NULL THEN 'not_affected'
ELSE 'affected'
END AS vendor_status,
COALESCE(s.priority, 100) AS precedence_rank,
NULL AS dsse_envelope, -- DSSE signatures added later
a.raw_payload AS raw_payload,
snap.created_at AS fetched_at,
NOW() AS created_at
FROM vuln.advisory_canonical c
JOIN vuln.advisories a ON (
-- Match by CVE
c.cve = a.primary_vuln_id
OR EXISTS (
SELECT 1 FROM vuln.advisory_aliases al
WHERE al.advisory_id = a.id AND al.alias_value = c.cve
)
)
JOIN vuln.advisory_snapshots snap ON snap.advisory_key = a.advisory_key
JOIN vuln.feed_snapshots fs ON fs.id = snap.feed_snapshot_id
LEFT JOIN vuln.sources s ON s.id = a.source_id
WHERE a.source_id IS NOT NULL
AND NOT EXISTS (
-- Skip if already migrated (idempotent)
SELECT 1 FROM vuln.advisory_source_edge e
WHERE e.canonical_id = c.id
AND e.source_id = a.source_id
AND e.source_doc_hash = snap.content_hash
)
ON CONFLICT (canonical_id, source_id, source_doc_hash) DO NOTHING;
-- Create source edges directly from advisories (for those without snapshots)
INSERT INTO vuln.advisory_source_edge (
id,
canonical_id,
source_id,
source_advisory_id,
source_doc_hash,
vendor_status,
precedence_rank,
dsse_envelope,
raw_payload,
fetched_at,
created_at
)
SELECT
gen_random_uuid() AS id,
c.id AS canonical_id,
a.source_id AS source_id,
a.advisory_key AS source_advisory_id,
-- Generate hash from raw_payload if available, otherwise use advisory_key
COALESCE(
encode(sha256(a.raw_payload::text::bytea), 'hex'),
encode(sha256(a.advisory_key::bytea), 'hex')
) AS source_doc_hash,
CASE
WHEN a.withdrawn_at IS NOT NULL THEN 'not_affected'
ELSE 'affected'
END AS vendor_status,
COALESCE(s.priority, 100) AS precedence_rank,
NULL AS dsse_envelope,
a.raw_payload AS raw_payload,
a.created_at AS fetched_at,
NOW() AS created_at
FROM vuln.advisory_canonical c
JOIN vuln.advisories a ON (
c.cve = a.primary_vuln_id
OR EXISTS (
SELECT 1 FROM vuln.advisory_aliases al
WHERE al.advisory_id = a.id AND al.alias_value = c.cve
)
)
LEFT JOIN vuln.sources s ON s.id = a.source_id
WHERE a.source_id IS NOT NULL
AND NOT EXISTS (
-- Only for advisories without snapshots
SELECT 1 FROM vuln.advisory_snapshots snap
WHERE snap.advisory_key = a.advisory_key
)
AND NOT EXISTS (
SELECT 1 FROM vuln.advisory_source_edge e
WHERE e.canonical_id = c.id AND e.source_id = a.source_id
)
ON CONFLICT (canonical_id, source_id, source_doc_hash) DO NOTHING;
-- Log migration progress
DO $$
DECLARE
edge_count BIGINT;
canonical_with_edges BIGINT;
avg_edges NUMERIC;
BEGIN
SELECT COUNT(*) INTO edge_count FROM vuln.advisory_source_edge;
SELECT COUNT(DISTINCT canonical_id) INTO canonical_with_edges FROM vuln.advisory_source_edge;
IF canonical_with_edges > 0 THEN
avg_edges := edge_count::numeric / canonical_with_edges;
ELSE
avg_edges := 0;
END IF;
RAISE NOTICE 'Migration 013 complete: % source edges, % canonicals with edges, avg %.2f edges/canonical',
edge_count, canonical_with_edges, avg_edges;
END $$;

View File

@@ -0,0 +1,165 @@
-- Concelier Migration 014: Verification queries for canonical migration
-- Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
-- Task: SCHEMA-8200-014
-- Verification queries to compare record counts and data integrity
-- Verification Report
DO $$
DECLARE
-- Source counts
advisory_count BIGINT;
affected_count BIGINT;
alias_count BIGINT;
weakness_count BIGINT;
kev_count BIGINT;
snapshot_count BIGINT;
source_count BIGINT;
-- Target counts
canonical_count BIGINT;
canonical_active BIGINT;
canonical_withdrawn BIGINT;
canonical_placeholder BIGINT;
edge_count BIGINT;
edge_unique_sources BIGINT;
edge_with_payload BIGINT;
-- Integrity checks
orphan_edges BIGINT;
missing_sources BIGINT;
duplicate_hashes BIGINT;
avg_edges_per_canonical NUMERIC;
BEGIN
-- Source table counts
SELECT COUNT(*) INTO advisory_count FROM vuln.advisories;
SELECT COUNT(*) INTO affected_count FROM vuln.advisory_affected;
SELECT COUNT(*) INTO alias_count FROM vuln.advisory_aliases;
SELECT COUNT(*) INTO weakness_count FROM vuln.advisory_weaknesses;
SELECT COUNT(*) INTO kev_count FROM vuln.kev_flags;
SELECT COUNT(*) INTO snapshot_count FROM vuln.advisory_snapshots;
SELECT COUNT(*) INTO source_count FROM vuln.sources WHERE enabled = true;
-- Target table counts
SELECT COUNT(*) INTO canonical_count FROM vuln.advisory_canonical;
SELECT COUNT(*) INTO canonical_active FROM vuln.advisory_canonical WHERE status = 'active';
SELECT COUNT(*) INTO canonical_withdrawn FROM vuln.advisory_canonical WHERE status = 'withdrawn';
SELECT COUNT(*) INTO canonical_placeholder FROM vuln.advisory_canonical WHERE merge_hash LIKE 'PLACEHOLDER_%';
SELECT COUNT(*) INTO edge_count FROM vuln.advisory_source_edge;
SELECT COUNT(DISTINCT source_id) INTO edge_unique_sources FROM vuln.advisory_source_edge;
SELECT COUNT(*) INTO edge_with_payload FROM vuln.advisory_source_edge WHERE raw_payload IS NOT NULL;
-- Integrity checks
SELECT COUNT(*) INTO orphan_edges
FROM vuln.advisory_source_edge e
WHERE NOT EXISTS (SELECT 1 FROM vuln.advisory_canonical c WHERE c.id = e.canonical_id);
SELECT COUNT(*) INTO missing_sources
FROM vuln.advisory_source_edge e
WHERE NOT EXISTS (SELECT 1 FROM vuln.sources s WHERE s.id = e.source_id);
SELECT COUNT(*) INTO duplicate_hashes
FROM (
SELECT merge_hash, COUNT(*) as cnt
FROM vuln.advisory_canonical
GROUP BY merge_hash
HAVING COUNT(*) > 1
) dups;
IF canonical_count > 0 THEN
avg_edges_per_canonical := edge_count::numeric / canonical_count;
ELSE
avg_edges_per_canonical := 0;
END IF;
-- Report
RAISE NOTICE '============================================';
RAISE NOTICE 'CANONICAL MIGRATION VERIFICATION REPORT';
RAISE NOTICE '============================================';
RAISE NOTICE '';
RAISE NOTICE 'SOURCE TABLE COUNTS:';
RAISE NOTICE ' Advisories: %', advisory_count;
RAISE NOTICE ' Affected packages: %', affected_count;
RAISE NOTICE ' Aliases: %', alias_count;
RAISE NOTICE ' Weaknesses (CWE): %', weakness_count;
RAISE NOTICE ' KEV flags: %', kev_count;
RAISE NOTICE ' Snapshots: %', snapshot_count;
RAISE NOTICE ' Enabled sources: %', source_count;
RAISE NOTICE '';
RAISE NOTICE 'TARGET TABLE COUNTS:';
RAISE NOTICE ' Canonicals: % (active: %, withdrawn: %)', canonical_count, canonical_active, canonical_withdrawn;
RAISE NOTICE ' Placeholder hashes:% (need backfill)', canonical_placeholder;
RAISE NOTICE ' Source edges: %', edge_count;
RAISE NOTICE ' Unique sources: %', edge_unique_sources;
RAISE NOTICE ' Edges with payload:%', edge_with_payload;
RAISE NOTICE '';
RAISE NOTICE 'METRICS:';
RAISE NOTICE ' Avg edges/canonical: %.2f', avg_edges_per_canonical;
RAISE NOTICE '';
RAISE NOTICE 'INTEGRITY CHECKS:';
RAISE NOTICE ' Orphan edges: % %', orphan_edges, CASE WHEN orphan_edges = 0 THEN '(OK)' ELSE '(FAIL)' END;
RAISE NOTICE ' Missing sources: % %', missing_sources, CASE WHEN missing_sources = 0 THEN '(OK)' ELSE '(FAIL)' END;
RAISE NOTICE ' Duplicate hashes: % %', duplicate_hashes, CASE WHEN duplicate_hashes = 0 THEN '(OK)' ELSE '(FAIL)' END;
RAISE NOTICE '';
-- Fail migration if integrity checks fail
IF orphan_edges > 0 OR missing_sources > 0 OR duplicate_hashes > 0 THEN
RAISE NOTICE 'VERIFICATION FAILED - Please investigate integrity issues';
ELSE
RAISE NOTICE 'VERIFICATION PASSED - Migration completed successfully';
END IF;
RAISE NOTICE '============================================';
END $$;
-- Additional verification queries (run individually for debugging)
-- Find CVEs that weren't migrated
-- SELECT a.primary_vuln_id, a.advisory_key, a.created_at
-- FROM vuln.advisories a
-- WHERE NOT EXISTS (
-- SELECT 1 FROM vuln.advisory_canonical c WHERE c.cve = a.primary_vuln_id
-- )
-- LIMIT 20;
-- Find canonicals without source edges
-- SELECT c.cve, c.affects_key, c.created_at
-- FROM vuln.advisory_canonical c
-- WHERE NOT EXISTS (
-- SELECT 1 FROM vuln.advisory_source_edge e WHERE e.canonical_id = c.id
-- )
-- LIMIT 20;
-- Distribution of edges per canonical
-- SELECT
-- CASE
-- WHEN edge_count = 0 THEN '0'
-- WHEN edge_count = 1 THEN '1'
-- WHEN edge_count BETWEEN 2 AND 5 THEN '2-5'
-- WHEN edge_count BETWEEN 6 AND 10 THEN '6-10'
-- ELSE '10+'
-- END AS edge_range,
-- COUNT(*) AS canonical_count
-- FROM (
-- SELECT c.id, COALESCE(e.edge_count, 0) AS edge_count
-- FROM vuln.advisory_canonical c
-- LEFT JOIN (
-- SELECT canonical_id, COUNT(*) AS edge_count
-- FROM vuln.advisory_source_edge
-- GROUP BY canonical_id
-- ) e ON e.canonical_id = c.id
-- ) sub
-- GROUP BY edge_range
-- ORDER BY edge_range;
-- Top CVEs by source coverage
-- SELECT
-- c.cve,
-- c.severity,
-- c.exploit_known,
-- COUNT(e.id) AS source_count
-- FROM vuln.advisory_canonical c
-- LEFT JOIN vuln.advisory_source_edge e ON e.canonical_id = c.id
-- GROUP BY c.id, c.cve, c.severity, c.exploit_known
-- ORDER BY source_count DESC
-- LIMIT 20;

View File

@@ -0,0 +1,85 @@
// -----------------------------------------------------------------------------
// AdvisoryCanonicalEntity.cs
// Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
// Task: SCHEMA-8200-007
// Description: Entity for deduplicated canonical advisory records
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Storage.Postgres.Models;
/// <summary>
/// Represents a deduplicated canonical advisory in the vuln schema.
/// Canonical advisories are identified by their semantic merge_hash.
/// </summary>
public sealed class AdvisoryCanonicalEntity
{
/// <summary>
/// Unique canonical advisory identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// CVE identifier (e.g., "CVE-2024-1234").
/// </summary>
public required string Cve { get; init; }
/// <summary>
/// Normalized PURL or CPE identifying the affected package.
/// </summary>
public required string AffectsKey { get; init; }
/// <summary>
/// Structured version range as JSON (introduced, fixed, last_affected).
/// </summary>
public string? VersionRange { get; init; }
/// <summary>
/// Sorted CWE array (e.g., ["CWE-79", "CWE-89"]).
/// </summary>
public string[] Weakness { get; init; } = [];
/// <summary>
/// Deterministic SHA256 hash of (cve, affects_key, version_range, weakness, patch_lineage).
/// </summary>
public required string MergeHash { get; init; }
/// <summary>
/// Status: active, stub, or withdrawn.
/// </summary>
public string Status { get; init; } = "active";
/// <summary>
/// Normalized severity: critical, high, medium, low, none, unknown.
/// </summary>
public string? Severity { get; init; }
/// <summary>
/// EPSS exploit prediction probability (0.0000-1.0000).
/// </summary>
public decimal? EpssScore { get; init; }
/// <summary>
/// Whether an exploit is known to exist.
/// </summary>
public bool ExploitKnown { get; init; }
/// <summary>
/// Advisory title (for stub degradation).
/// </summary>
public string? Title { get; init; }
/// <summary>
/// Advisory summary (for stub degradation).
/// </summary>
public string? Summary { get; init; }
/// <summary>
/// When the canonical record was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the canonical record was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
}

View File

@@ -0,0 +1,71 @@
// -----------------------------------------------------------------------------
// AdvisorySourceEdgeEntity.cs
// Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
// Task: SCHEMA-8200-008
// Description: Entity linking canonical advisory to source documents with DSSE
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Storage.Postgres.Models;
/// <summary>
/// Represents a link between a canonical advisory and its source document.
/// Stores DSSE signature envelopes and raw payload for provenance.
/// </summary>
public sealed class AdvisorySourceEdgeEntity
{
/// <summary>
/// Unique source edge identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Reference to the deduplicated canonical advisory.
/// </summary>
public required Guid CanonicalId { get; init; }
/// <summary>
/// Reference to the feed source.
/// </summary>
public required Guid SourceId { get; init; }
/// <summary>
/// Vendor's advisory ID (e.g., "DSA-5678", "RHSA-2024:1234").
/// </summary>
public required string SourceAdvisoryId { get; init; }
/// <summary>
/// SHA256 hash of the raw source document.
/// </summary>
public required string SourceDocHash { get; init; }
/// <summary>
/// VEX-style status: affected, not_affected, fixed, under_investigation.
/// </summary>
public string? VendorStatus { get; init; }
/// <summary>
/// Source priority: vendor=10, distro=20, osv=30, nvd=40, default=100.
/// Lower value = higher priority.
/// </summary>
public int PrecedenceRank { get; init; } = 100;
/// <summary>
/// DSSE signature envelope as JSON ({ payloadType, payload, signatures[] }).
/// </summary>
public string? DsseEnvelope { get; init; }
/// <summary>
/// Original advisory document as JSON.
/// </summary>
public string? RawPayload { get; init; }
/// <summary>
/// When the source document was fetched.
/// </summary>
public DateTimeOffset FetchedAt { get; init; }
/// <summary>
/// When the edge record was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
}

View File

@@ -0,0 +1,74 @@
// -----------------------------------------------------------------------------
// SitePolicyEntity.cs
// Sprint: SPRINT_8200_0014_0001_DB_sync_ledger_schema
// Task: SYNC-8200-005
// Description: Entity for per-site federation governance policies
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Storage.Postgres.Models;
/// <summary>
/// Represents a site federation policy for governance control.
/// </summary>
public sealed class SitePolicyEntity
{
/// <summary>
/// Unique policy identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Remote site identifier this policy applies to.
/// </summary>
public required string SiteId { get; init; }
/// <summary>
/// Human-readable display name for the site.
/// </summary>
public string? DisplayName { get; init; }
/// <summary>
/// Source keys to allow (empty allows all sources).
/// </summary>
public string[] AllowedSources { get; init; } = [];
/// <summary>
/// Source keys to deny (takes precedence over allowed).
/// </summary>
public string[] DeniedSources { get; init; } = [];
/// <summary>
/// Maximum bundle size in megabytes.
/// </summary>
public int MaxBundleSizeMb { get; init; } = 100;
/// <summary>
/// Maximum items per bundle.
/// </summary>
public int MaxItemsPerBundle { get; init; } = 10000;
/// <summary>
/// Whether bundles must be cryptographically signed.
/// </summary>
public bool RequireSignature { get; init; } = true;
/// <summary>
/// Signing key IDs or issuer patterns allowed for bundle verification.
/// </summary>
public string[] AllowedSigners { get; init; } = [];
/// <summary>
/// Whether this site policy is enabled.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// When the policy was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the policy was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
}

View File

@@ -0,0 +1,49 @@
// -----------------------------------------------------------------------------
// SyncLedgerEntity.cs
// Sprint: SPRINT_8200_0014_0001_DB_sync_ledger_schema
// Task: SYNC-8200-004
// Description: Entity for tracking federation sync state per remote site
// -----------------------------------------------------------------------------
namespace StellaOps.Concelier.Storage.Postgres.Models;
/// <summary>
/// Represents a sync ledger entry for federation cursor tracking.
/// </summary>
public sealed class SyncLedgerEntity
{
/// <summary>
/// Unique ledger entry identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Remote site identifier (e.g., "site-us-west", "airgap-dc2").
/// </summary>
public required string SiteId { get; init; }
/// <summary>
/// Opaque cursor position (usually ISO8601 timestamp#sequence).
/// </summary>
public required string Cursor { get; init; }
/// <summary>
/// SHA256 hash of the imported bundle for deduplication.
/// </summary>
public required string BundleHash { get; init; }
/// <summary>
/// Number of items in the imported bundle.
/// </summary>
public int ItemsCount { get; init; }
/// <summary>
/// When the bundle was signed by the remote site.
/// </summary>
public DateTimeOffset SignedAt { get; init; }
/// <summary>
/// When the bundle was imported to this site.
/// </summary>
public DateTimeOffset ImportedAt { get; init; }
}

View File

@@ -0,0 +1,429 @@
// -----------------------------------------------------------------------------
// AdvisoryCanonicalRepository.cs
// Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
// Task: SCHEMA-8200-010
// Description: PostgreSQL repository for canonical advisory and source edge operations
// -----------------------------------------------------------------------------
using System.Runtime.CompilerServices;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Concelier.Storage.Postgres.Models;
using StellaOps.Infrastructure.Postgres.Repositories;
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for canonical advisory and source edge operations.
/// </summary>
public sealed class AdvisoryCanonicalRepository : RepositoryBase<ConcelierDataSource>, IAdvisoryCanonicalRepository
{
private const string SystemTenantId = "_system";
public AdvisoryCanonicalRepository(ConcelierDataSource dataSource, ILogger<AdvisoryCanonicalRepository> logger)
: base(dataSource, logger)
{
}
#region Canonical Advisory Operations
public Task<AdvisoryCanonicalEntity?> GetByIdAsync(Guid id, CancellationToken ct = default)
{
const string sql = """
SELECT id, cve, affects_key, version_range::text, weakness, merge_hash,
status, severity, epss_score, exploit_known, title, summary,
created_at, updated_at
FROM vuln.advisory_canonical
WHERE id = @id
""";
return QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "id", id),
MapCanonical,
ct);
}
public Task<AdvisoryCanonicalEntity?> GetByMergeHashAsync(string mergeHash, CancellationToken ct = default)
{
const string sql = """
SELECT id, cve, affects_key, version_range::text, weakness, merge_hash,
status, severity, epss_score, exploit_known, title, summary,
created_at, updated_at
FROM vuln.advisory_canonical
WHERE merge_hash = @merge_hash
""";
return QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "merge_hash", mergeHash),
MapCanonical,
ct);
}
public Task<IReadOnlyList<AdvisoryCanonicalEntity>> GetByCveAsync(string cve, CancellationToken ct = default)
{
const string sql = """
SELECT id, cve, affects_key, version_range::text, weakness, merge_hash,
status, severity, epss_score, exploit_known, title, summary,
created_at, updated_at
FROM vuln.advisory_canonical
WHERE cve = @cve
ORDER BY updated_at DESC
""";
return QueryAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "cve", cve),
MapCanonical,
ct);
}
public Task<IReadOnlyList<AdvisoryCanonicalEntity>> GetByAffectsKeyAsync(string affectsKey, CancellationToken ct = default)
{
const string sql = """
SELECT id, cve, affects_key, version_range::text, weakness, merge_hash,
status, severity, epss_score, exploit_known, title, summary,
created_at, updated_at
FROM vuln.advisory_canonical
WHERE affects_key = @affects_key
ORDER BY updated_at DESC
""";
return QueryAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "affects_key", affectsKey),
MapCanonical,
ct);
}
public Task<IReadOnlyList<AdvisoryCanonicalEntity>> GetUpdatedSinceAsync(
DateTimeOffset since,
int limit = 1000,
CancellationToken ct = default)
{
const string sql = """
SELECT id, cve, affects_key, version_range::text, weakness, merge_hash,
status, severity, epss_score, exploit_known, title, summary,
created_at, updated_at
FROM vuln.advisory_canonical
WHERE updated_at > @since
ORDER BY updated_at ASC
LIMIT @limit
""";
return QueryAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "since", since);
AddParameter(cmd, "limit", limit);
},
MapCanonical,
ct);
}
public async Task<Guid> UpsertAsync(AdvisoryCanonicalEntity entity, CancellationToken ct = default)
{
const string sql = """
INSERT INTO vuln.advisory_canonical
(id, cve, affects_key, version_range, weakness, merge_hash,
status, severity, epss_score, exploit_known, title, summary)
VALUES
(@id, @cve, @affects_key, @version_range::jsonb, @weakness, @merge_hash,
@status, @severity, @epss_score, @exploit_known, @title, @summary)
ON CONFLICT (merge_hash) DO UPDATE SET
severity = COALESCE(EXCLUDED.severity, vuln.advisory_canonical.severity),
epss_score = COALESCE(EXCLUDED.epss_score, vuln.advisory_canonical.epss_score),
exploit_known = EXCLUDED.exploit_known OR vuln.advisory_canonical.exploit_known,
title = COALESCE(EXCLUDED.title, vuln.advisory_canonical.title),
summary = COALESCE(EXCLUDED.summary, vuln.advisory_canonical.summary),
updated_at = NOW()
RETURNING id
""";
var id = entity.Id == Guid.Empty ? Guid.NewGuid() : entity.Id;
return await ExecuteScalarAsync<Guid>(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "id", id);
AddParameter(cmd, "cve", entity.Cve);
AddParameter(cmd, "affects_key", entity.AffectsKey);
AddJsonbParameter(cmd, "version_range", entity.VersionRange);
AddTextArrayParameter(cmd, "weakness", entity.Weakness);
AddParameter(cmd, "merge_hash", entity.MergeHash);
AddParameter(cmd, "status", entity.Status);
AddParameter(cmd, "severity", entity.Severity);
AddParameter(cmd, "epss_score", entity.EpssScore);
AddParameter(cmd, "exploit_known", entity.ExploitKnown);
AddParameter(cmd, "title", entity.Title);
AddParameter(cmd, "summary", entity.Summary);
},
ct).ConfigureAwait(false);
}
public async Task UpdateStatusAsync(Guid id, string status, CancellationToken ct = default)
{
const string sql = """
UPDATE vuln.advisory_canonical
SET status = @status, updated_at = NOW()
WHERE id = @id
""";
await ExecuteAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "id", id);
AddParameter(cmd, "status", status);
},
ct).ConfigureAwait(false);
}
public async Task DeleteAsync(Guid id, CancellationToken ct = default)
{
const string sql = "DELETE FROM vuln.advisory_canonical WHERE id = @id";
await ExecuteAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "id", id),
ct).ConfigureAwait(false);
}
public async Task<long> CountAsync(CancellationToken ct = default)
{
const string sql = "SELECT COUNT(*) FROM vuln.advisory_canonical WHERE status = 'active'";
return await ExecuteScalarAsync<long>(
SystemTenantId,
sql,
null,
ct).ConfigureAwait(false);
}
public async IAsyncEnumerable<AdvisoryCanonicalEntity> StreamActiveAsync(
[EnumeratorCancellation] CancellationToken ct = default)
{
const string sql = """
SELECT id, cve, affects_key, version_range::text, weakness, merge_hash,
status, severity, epss_score, exploit_known, title, summary,
created_at, updated_at
FROM vuln.advisory_canonical
WHERE status = 'active'
ORDER BY id
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(ct).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
await using var reader = await command.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
yield return MapCanonical(reader);
}
}
#endregion
#region Source Edge Operations
public Task<IReadOnlyList<AdvisorySourceEdgeEntity>> GetSourceEdgesAsync(Guid canonicalId, CancellationToken ct = default)
{
const string sql = """
SELECT id, canonical_id, source_id, source_advisory_id, source_doc_hash,
vendor_status, precedence_rank, dsse_envelope::text, raw_payload::text,
fetched_at, created_at
FROM vuln.advisory_source_edge
WHERE canonical_id = @canonical_id
ORDER BY precedence_rank ASC, fetched_at DESC
""";
return QueryAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "canonical_id", canonicalId),
MapSourceEdge,
ct);
}
public Task<AdvisorySourceEdgeEntity?> GetSourceEdgeByIdAsync(Guid id, CancellationToken ct = default)
{
const string sql = """
SELECT id, canonical_id, source_id, source_advisory_id, source_doc_hash,
vendor_status, precedence_rank, dsse_envelope::text, raw_payload::text,
fetched_at, created_at
FROM vuln.advisory_source_edge
WHERE id = @id
""";
return QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "id", id),
MapSourceEdge,
ct);
}
public async Task<Guid> AddSourceEdgeAsync(AdvisorySourceEdgeEntity edge, CancellationToken ct = default)
{
const string sql = """
INSERT INTO vuln.advisory_source_edge
(id, canonical_id, source_id, source_advisory_id, source_doc_hash,
vendor_status, precedence_rank, dsse_envelope, raw_payload, fetched_at)
VALUES
(@id, @canonical_id, @source_id, @source_advisory_id, @source_doc_hash,
@vendor_status, @precedence_rank, @dsse_envelope::jsonb, @raw_payload::jsonb, @fetched_at)
ON CONFLICT (canonical_id, source_id, source_doc_hash) DO UPDATE SET
vendor_status = COALESCE(EXCLUDED.vendor_status, vuln.advisory_source_edge.vendor_status),
precedence_rank = LEAST(EXCLUDED.precedence_rank, vuln.advisory_source_edge.precedence_rank),
dsse_envelope = COALESCE(EXCLUDED.dsse_envelope, vuln.advisory_source_edge.dsse_envelope),
raw_payload = COALESCE(EXCLUDED.raw_payload, vuln.advisory_source_edge.raw_payload)
RETURNING id
""";
var id = edge.Id == Guid.Empty ? Guid.NewGuid() : edge.Id;
return await ExecuteScalarAsync<Guid>(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "id", id);
AddParameter(cmd, "canonical_id", edge.CanonicalId);
AddParameter(cmd, "source_id", edge.SourceId);
AddParameter(cmd, "source_advisory_id", edge.SourceAdvisoryId);
AddParameter(cmd, "source_doc_hash", edge.SourceDocHash);
AddParameter(cmd, "vendor_status", edge.VendorStatus);
AddParameter(cmd, "precedence_rank", edge.PrecedenceRank);
AddJsonbParameter(cmd, "dsse_envelope", edge.DsseEnvelope);
AddJsonbParameter(cmd, "raw_payload", edge.RawPayload);
AddParameter(cmd, "fetched_at", edge.FetchedAt == default ? DateTimeOffset.UtcNow : edge.FetchedAt);
},
ct).ConfigureAwait(false);
}
public Task<IReadOnlyList<AdvisorySourceEdgeEntity>> GetSourceEdgesByAdvisoryIdAsync(
string sourceAdvisoryId,
CancellationToken ct = default)
{
const string sql = """
SELECT id, canonical_id, source_id, source_advisory_id, source_doc_hash,
vendor_status, precedence_rank, dsse_envelope::text, raw_payload::text,
fetched_at, created_at
FROM vuln.advisory_source_edge
WHERE source_advisory_id = @source_advisory_id
ORDER BY fetched_at DESC
""";
return QueryAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "source_advisory_id", sourceAdvisoryId),
MapSourceEdge,
ct);
}
public async Task<long> CountSourceEdgesAsync(CancellationToken ct = default)
{
const string sql = "SELECT COUNT(*) FROM vuln.advisory_source_edge";
return await ExecuteScalarAsync<long>(
SystemTenantId,
sql,
null,
ct).ConfigureAwait(false);
}
#endregion
#region Statistics
public async Task<CanonicalStatistics> GetStatisticsAsync(CancellationToken ct = default)
{
const string sql = """
SELECT
(SELECT COUNT(*) FROM vuln.advisory_canonical) AS total_canonicals,
(SELECT COUNT(*) FROM vuln.advisory_canonical WHERE status = 'active') AS active_canonicals,
(SELECT COUNT(*) FROM vuln.advisory_source_edge) AS total_edges,
(SELECT MAX(updated_at) FROM vuln.advisory_canonical) AS last_updated
""";
var stats = await QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
_ => { },
reader => new
{
TotalCanonicals = reader.GetInt64(0),
ActiveCanonicals = reader.GetInt64(1),
TotalEdges = reader.GetInt64(2),
LastUpdated = GetNullableDateTimeOffset(reader, 3)
},
ct).ConfigureAwait(false);
if (stats is null)
{
return new CanonicalStatistics();
}
return new CanonicalStatistics
{
TotalCanonicals = stats.TotalCanonicals,
ActiveCanonicals = stats.ActiveCanonicals,
TotalSourceEdges = stats.TotalEdges,
AvgSourceEdgesPerCanonical = stats.TotalCanonicals > 0
? (double)stats.TotalEdges / stats.TotalCanonicals
: 0,
LastUpdatedAt = stats.LastUpdated
};
}
#endregion
#region Mappers
private static AdvisoryCanonicalEntity MapCanonical(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
Cve = reader.GetString(1),
AffectsKey = reader.GetString(2),
VersionRange = GetNullableString(reader, 3),
Weakness = reader.IsDBNull(4) ? [] : reader.GetFieldValue<string[]>(4),
MergeHash = reader.GetString(5),
Status = reader.GetString(6),
Severity = GetNullableString(reader, 7),
EpssScore = reader.IsDBNull(8) ? null : reader.GetDecimal(8),
ExploitKnown = reader.GetBoolean(9),
Title = GetNullableString(reader, 10),
Summary = GetNullableString(reader, 11),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(12),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(13)
};
private static AdvisorySourceEdgeEntity MapSourceEdge(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
CanonicalId = reader.GetGuid(1),
SourceId = reader.GetGuid(2),
SourceAdvisoryId = reader.GetString(3),
SourceDocHash = reader.GetString(4),
VendorStatus = GetNullableString(reader, 5),
PrecedenceRank = reader.GetInt32(6),
DsseEnvelope = GetNullableString(reader, 7),
RawPayload = GetNullableString(reader, 8),
FetchedAt = reader.GetFieldValue<DateTimeOffset>(9),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(10)
};
#endregion
}

View File

@@ -0,0 +1,144 @@
// -----------------------------------------------------------------------------
// IAdvisoryCanonicalRepository.cs
// Sprint: SPRINT_8200_0012_0002_DB_canonical_source_edge_schema
// Task: SCHEMA-8200-009
// Description: Repository interface for canonical advisory operations
// -----------------------------------------------------------------------------
using StellaOps.Concelier.Storage.Postgres.Models;
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for canonical advisory and source edge operations.
/// </summary>
public interface IAdvisoryCanonicalRepository
{
#region Canonical Advisory Operations
/// <summary>
/// Gets a canonical advisory by ID.
/// </summary>
Task<AdvisoryCanonicalEntity?> GetByIdAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Gets a canonical advisory by merge hash.
/// </summary>
Task<AdvisoryCanonicalEntity?> GetByMergeHashAsync(string mergeHash, CancellationToken ct = default);
/// <summary>
/// Gets all canonical advisories for a CVE.
/// </summary>
Task<IReadOnlyList<AdvisoryCanonicalEntity>> GetByCveAsync(string cve, CancellationToken ct = default);
/// <summary>
/// Gets all canonical advisories for an affects key (PURL or CPE).
/// </summary>
Task<IReadOnlyList<AdvisoryCanonicalEntity>> GetByAffectsKeyAsync(string affectsKey, CancellationToken ct = default);
/// <summary>
/// Gets canonical advisories updated since a given time.
/// </summary>
Task<IReadOnlyList<AdvisoryCanonicalEntity>> GetUpdatedSinceAsync(
DateTimeOffset since,
int limit = 1000,
CancellationToken ct = default);
/// <summary>
/// Upserts a canonical advisory (insert or update by merge_hash).
/// </summary>
Task<Guid> UpsertAsync(AdvisoryCanonicalEntity entity, CancellationToken ct = default);
/// <summary>
/// Updates the status of a canonical advisory.
/// </summary>
Task UpdateStatusAsync(Guid id, string status, CancellationToken ct = default);
/// <summary>
/// Deletes a canonical advisory and all its source edges (cascade).
/// </summary>
Task DeleteAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Counts total active canonical advisories.
/// </summary>
Task<long> CountAsync(CancellationToken ct = default);
/// <summary>
/// Streams all active canonical advisories for batch processing.
/// </summary>
IAsyncEnumerable<AdvisoryCanonicalEntity> StreamActiveAsync(CancellationToken ct = default);
#endregion
#region Source Edge Operations
/// <summary>
/// Gets all source edges for a canonical advisory.
/// </summary>
Task<IReadOnlyList<AdvisorySourceEdgeEntity>> GetSourceEdgesAsync(Guid canonicalId, CancellationToken ct = default);
/// <summary>
/// Gets a source edge by ID.
/// </summary>
Task<AdvisorySourceEdgeEntity?> GetSourceEdgeByIdAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Adds a source edge to a canonical advisory.
/// </summary>
Task<Guid> AddSourceEdgeAsync(AdvisorySourceEdgeEntity edge, CancellationToken ct = default);
/// <summary>
/// Gets source edges by source advisory ID (vendor ID).
/// </summary>
Task<IReadOnlyList<AdvisorySourceEdgeEntity>> GetSourceEdgesByAdvisoryIdAsync(
string sourceAdvisoryId,
CancellationToken ct = default);
/// <summary>
/// Counts total source edges.
/// </summary>
Task<long> CountSourceEdgesAsync(CancellationToken ct = default);
#endregion
#region Statistics
/// <summary>
/// Gets statistics about canonical advisories.
/// </summary>
Task<CanonicalStatistics> GetStatisticsAsync(CancellationToken ct = default);
#endregion
}
/// <summary>
/// Statistics about canonical advisory records.
/// </summary>
public sealed record CanonicalStatistics
{
/// <summary>
/// Total canonical advisory count.
/// </summary>
public long TotalCanonicals { get; init; }
/// <summary>
/// Active canonical advisory count.
/// </summary>
public long ActiveCanonicals { get; init; }
/// <summary>
/// Total source edge count.
/// </summary>
public long TotalSourceEdges { get; init; }
/// <summary>
/// Average source edges per canonical.
/// </summary>
public double AvgSourceEdgesPerCanonical { get; init; }
/// <summary>
/// Most recent canonical update time.
/// </summary>
public DateTimeOffset? LastUpdatedAt { get; init; }
}

View File

@@ -0,0 +1,130 @@
// -----------------------------------------------------------------------------
// ISyncLedgerRepository.cs
// Sprint: SPRINT_8200_0014_0001_DB_sync_ledger_schema
// Task: SYNC-8200-006
// Description: Repository interface for federation sync ledger operations
// -----------------------------------------------------------------------------
using StellaOps.Concelier.Storage.Postgres.Models;
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
/// <summary>
/// Repository for federation sync ledger and site policy operations.
/// </summary>
public interface ISyncLedgerRepository
{
#region Ledger Operations
/// <summary>
/// Gets the latest sync ledger entry for a site.
/// </summary>
Task<SyncLedgerEntity?> GetLatestAsync(string siteId, CancellationToken ct = default);
/// <summary>
/// Gets sync history for a site.
/// </summary>
Task<IReadOnlyList<SyncLedgerEntity>> GetHistoryAsync(string siteId, int limit = 10, CancellationToken ct = default);
/// <summary>
/// Gets a ledger entry by bundle hash (for deduplication).
/// </summary>
Task<SyncLedgerEntity?> GetByBundleHashAsync(string bundleHash, CancellationToken ct = default);
/// <summary>
/// Inserts a new ledger entry.
/// </summary>
Task<Guid> InsertAsync(SyncLedgerEntity entry, CancellationToken ct = default);
#endregion
#region Cursor Operations
/// <summary>
/// Gets the current cursor position for a site.
/// </summary>
Task<string?> GetCursorAsync(string siteId, CancellationToken ct = default);
/// <summary>
/// Advances the cursor to a new position (inserts a new ledger entry).
/// </summary>
Task AdvanceCursorAsync(
string siteId,
string newCursor,
string bundleHash,
int itemsCount,
DateTimeOffset signedAt,
CancellationToken ct = default);
/// <summary>
/// Checks if importing a bundle would conflict with existing cursor.
/// Returns true if the cursor is older than the current position.
/// </summary>
Task<bool> IsCursorConflictAsync(string siteId, string cursor, CancellationToken ct = default);
#endregion
#region Site Policy Operations
/// <summary>
/// Gets the policy for a specific site.
/// </summary>
Task<SitePolicyEntity?> GetPolicyAsync(string siteId, CancellationToken ct = default);
/// <summary>
/// Creates or updates a site policy.
/// </summary>
Task UpsertPolicyAsync(SitePolicyEntity policy, CancellationToken ct = default);
/// <summary>
/// Gets all site policies.
/// </summary>
Task<IReadOnlyList<SitePolicyEntity>> GetAllPoliciesAsync(bool enabledOnly = true, CancellationToken ct = default);
/// <summary>
/// Deletes a site policy.
/// </summary>
Task<bool> DeletePolicyAsync(string siteId, CancellationToken ct = default);
#endregion
#region Statistics
/// <summary>
/// Gets sync statistics across all sites.
/// </summary>
Task<SyncStatistics> GetStatisticsAsync(CancellationToken ct = default);
#endregion
}
/// <summary>
/// Aggregated sync statistics across all sites.
/// </summary>
public sealed record SyncStatistics
{
/// <summary>
/// Total number of registered sites.
/// </summary>
public int TotalSites { get; init; }
/// <summary>
/// Number of enabled sites.
/// </summary>
public int EnabledSites { get; init; }
/// <summary>
/// Total bundles imported across all sites.
/// </summary>
public long TotalBundlesImported { get; init; }
/// <summary>
/// Total items imported across all sites.
/// </summary>
public long TotalItemsImported { get; init; }
/// <summary>
/// Timestamp of the most recent import.
/// </summary>
public DateTimeOffset? LastImportAt { get; init; }
}

View File

@@ -0,0 +1,376 @@
// -----------------------------------------------------------------------------
// SyncLedgerRepository.cs
// Sprint: SPRINT_8200_0014_0001_DB_sync_ledger_schema
// Task: SYNC-8200-007
// Description: PostgreSQL repository for federation sync ledger operations
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Concelier.Storage.Postgres.Models;
using StellaOps.Infrastructure.Postgres.Repositories;
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for federation sync ledger and site policy operations.
/// </summary>
public sealed class SyncLedgerRepository : RepositoryBase<ConcelierDataSource>, ISyncLedgerRepository
{
private const string SystemTenantId = "_system";
public SyncLedgerRepository(ConcelierDataSource dataSource, ILogger<SyncLedgerRepository> logger)
: base(dataSource, logger)
{
}
#region Ledger Operations
public Task<SyncLedgerEntity?> GetLatestAsync(string siteId, CancellationToken ct = default)
{
const string sql = """
SELECT id, site_id, cursor, bundle_hash, items_count, signed_at, imported_at
FROM vuln.sync_ledger
WHERE site_id = @site_id
ORDER BY signed_at DESC
LIMIT 1
""";
return QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "site_id", siteId),
MapLedgerEntry,
ct);
}
public Task<IReadOnlyList<SyncLedgerEntity>> GetHistoryAsync(string siteId, int limit = 10, CancellationToken ct = default)
{
const string sql = """
SELECT id, site_id, cursor, bundle_hash, items_count, signed_at, imported_at
FROM vuln.sync_ledger
WHERE site_id = @site_id
ORDER BY signed_at DESC
LIMIT @limit
""";
return QueryAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "site_id", siteId);
AddParameter(cmd, "limit", limit);
},
MapLedgerEntry,
ct);
}
public Task<SyncLedgerEntity?> GetByBundleHashAsync(string bundleHash, CancellationToken ct = default)
{
const string sql = """
SELECT id, site_id, cursor, bundle_hash, items_count, signed_at, imported_at
FROM vuln.sync_ledger
WHERE bundle_hash = @bundle_hash
""";
return QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "bundle_hash", bundleHash),
MapLedgerEntry,
ct);
}
public async Task<Guid> InsertAsync(SyncLedgerEntity entry, CancellationToken ct = default)
{
const string sql = """
INSERT INTO vuln.sync_ledger
(id, site_id, cursor, bundle_hash, items_count, signed_at, imported_at)
VALUES
(@id, @site_id, @cursor, @bundle_hash, @items_count, @signed_at, @imported_at)
RETURNING id
""";
var id = entry.Id == Guid.Empty ? Guid.NewGuid() : entry.Id;
await ExecuteAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "id", id);
AddParameter(cmd, "site_id", entry.SiteId);
AddParameter(cmd, "cursor", entry.Cursor);
AddParameter(cmd, "bundle_hash", entry.BundleHash);
AddParameter(cmd, "items_count", entry.ItemsCount);
AddParameter(cmd, "signed_at", entry.SignedAt);
AddParameter(cmd, "imported_at", entry.ImportedAt == default ? DateTimeOffset.UtcNow : entry.ImportedAt);
},
ct).ConfigureAwait(false);
return id;
}
#endregion
#region Cursor Operations
public async Task<string?> GetCursorAsync(string siteId, CancellationToken ct = default)
{
const string sql = """
SELECT cursor
FROM vuln.sync_ledger
WHERE site_id = @site_id
ORDER BY signed_at DESC
LIMIT 1
""";
return await ExecuteScalarAsync<string>(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "site_id", siteId),
ct).ConfigureAwait(false);
}
public async Task AdvanceCursorAsync(
string siteId,
string newCursor,
string bundleHash,
int itemsCount,
DateTimeOffset signedAt,
CancellationToken ct = default)
{
var entry = new SyncLedgerEntity
{
Id = Guid.NewGuid(),
SiteId = siteId,
Cursor = newCursor,
BundleHash = bundleHash,
ItemsCount = itemsCount,
SignedAt = signedAt,
ImportedAt = DateTimeOffset.UtcNow
};
await InsertAsync(entry, ct).ConfigureAwait(false);
}
public async Task<bool> IsCursorConflictAsync(string siteId, string cursor, CancellationToken ct = default)
{
var currentCursor = await GetCursorAsync(siteId, ct).ConfigureAwait(false);
if (currentCursor is null)
{
// No existing cursor, no conflict
return false;
}
// Compare cursors - the new cursor should be newer than the current
return !CursorFormat.IsAfter(cursor, currentCursor);
}
#endregion
#region Site Policy Operations
public Task<SitePolicyEntity?> GetPolicyAsync(string siteId, CancellationToken ct = default)
{
const string sql = """
SELECT id, site_id, display_name, allowed_sources, denied_sources,
max_bundle_size_mb, max_items_per_bundle, require_signature,
allowed_signers, enabled, created_at, updated_at
FROM vuln.site_policy
WHERE site_id = @site_id
""";
return QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "site_id", siteId),
MapPolicy,
ct);
}
public async Task UpsertPolicyAsync(SitePolicyEntity policy, CancellationToken ct = default)
{
const string sql = """
INSERT INTO vuln.site_policy
(id, site_id, display_name, allowed_sources, denied_sources,
max_bundle_size_mb, max_items_per_bundle, require_signature,
allowed_signers, enabled)
VALUES
(@id, @site_id, @display_name, @allowed_sources, @denied_sources,
@max_bundle_size_mb, @max_items_per_bundle, @require_signature,
@allowed_signers, @enabled)
ON CONFLICT (site_id) DO UPDATE SET
display_name = EXCLUDED.display_name,
allowed_sources = EXCLUDED.allowed_sources,
denied_sources = EXCLUDED.denied_sources,
max_bundle_size_mb = EXCLUDED.max_bundle_size_mb,
max_items_per_bundle = EXCLUDED.max_items_per_bundle,
require_signature = EXCLUDED.require_signature,
allowed_signers = EXCLUDED.allowed_signers,
enabled = EXCLUDED.enabled,
updated_at = NOW()
""";
await ExecuteAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "id", policy.Id == Guid.Empty ? Guid.NewGuid() : policy.Id);
AddParameter(cmd, "site_id", policy.SiteId);
AddParameter(cmd, "display_name", policy.DisplayName);
AddTextArrayParameter(cmd, "allowed_sources", policy.AllowedSources);
AddTextArrayParameter(cmd, "denied_sources", policy.DeniedSources);
AddParameter(cmd, "max_bundle_size_mb", policy.MaxBundleSizeMb);
AddParameter(cmd, "max_items_per_bundle", policy.MaxItemsPerBundle);
AddParameter(cmd, "require_signature", policy.RequireSignature);
AddTextArrayParameter(cmd, "allowed_signers", policy.AllowedSigners);
AddParameter(cmd, "enabled", policy.Enabled);
},
ct).ConfigureAwait(false);
}
public Task<IReadOnlyList<SitePolicyEntity>> GetAllPoliciesAsync(bool enabledOnly = true, CancellationToken ct = default)
{
var sql = """
SELECT id, site_id, display_name, allowed_sources, denied_sources,
max_bundle_size_mb, max_items_per_bundle, require_signature,
allowed_signers, enabled, created_at, updated_at
FROM vuln.site_policy
""";
if (enabledOnly)
{
sql += " WHERE enabled = TRUE";
}
sql += " ORDER BY site_id";
return QueryAsync(
SystemTenantId,
sql,
_ => { },
MapPolicy,
ct);
}
public async Task<bool> DeletePolicyAsync(string siteId, CancellationToken ct = default)
{
const string sql = """
DELETE FROM vuln.site_policy
WHERE site_id = @site_id
""";
var rows = await ExecuteAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "site_id", siteId),
ct).ConfigureAwait(false);
return rows > 0;
}
#endregion
#region Statistics
public async Task<SyncStatistics> GetStatisticsAsync(CancellationToken ct = default)
{
const string sql = """
SELECT
(SELECT COUNT(DISTINCT site_id) FROM vuln.site_policy) AS total_sites,
(SELECT COUNT(DISTINCT site_id) FROM vuln.site_policy WHERE enabled = TRUE) AS enabled_sites,
(SELECT COUNT(*) FROM vuln.sync_ledger) AS total_bundles,
(SELECT COALESCE(SUM(items_count), 0) FROM vuln.sync_ledger) AS total_items,
(SELECT MAX(imported_at) FROM vuln.sync_ledger) AS last_import
""";
return await QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
_ => { },
reader => new SyncStatistics
{
TotalSites = reader.GetInt32(0),
EnabledSites = reader.GetInt32(1),
TotalBundlesImported = reader.GetInt64(2),
TotalItemsImported = reader.GetInt64(3),
LastImportAt = GetNullableDateTimeOffset(reader, 4)
},
ct).ConfigureAwait(false) ?? new SyncStatistics();
}
#endregion
#region Mappers
private static SyncLedgerEntity MapLedgerEntry(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
SiteId = reader.GetString(1),
Cursor = reader.GetString(2),
BundleHash = reader.GetString(3),
ItemsCount = reader.GetInt32(4),
SignedAt = reader.GetFieldValue<DateTimeOffset>(5),
ImportedAt = reader.GetFieldValue<DateTimeOffset>(6)
};
private static SitePolicyEntity MapPolicy(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
SiteId = reader.GetString(1),
DisplayName = GetNullableString(reader, 2),
AllowedSources = reader.GetFieldValue<string[]>(3),
DeniedSources = reader.GetFieldValue<string[]>(4),
MaxBundleSizeMb = reader.GetInt32(5),
MaxItemsPerBundle = reader.GetInt32(6),
RequireSignature = reader.GetBoolean(7),
AllowedSigners = reader.GetFieldValue<string[]>(8),
Enabled = reader.GetBoolean(9),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(10),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(11)
};
#endregion
}
/// <summary>
/// Cursor format utilities for federation sync.
/// </summary>
public static class CursorFormat
{
/// <summary>
/// Creates a cursor from timestamp and sequence.
/// Format: "2025-01-15T10:30:00.000Z#0042"
/// </summary>
public static string Create(DateTimeOffset timestamp, int sequence = 0)
{
return $"{timestamp:O}#{sequence:D4}";
}
/// <summary>
/// Parses a cursor into timestamp and sequence.
/// </summary>
public static (DateTimeOffset Timestamp, int Sequence) Parse(string cursor)
{
var parts = cursor.Split('#');
var timestamp = DateTimeOffset.Parse(parts[0]);
var sequence = parts.Length > 1 ? int.Parse(parts[1]) : 0;
return (timestamp, sequence);
}
/// <summary>
/// Compares two cursors. Returns true if cursor1 is after cursor2.
/// </summary>
public static bool IsAfter(string cursor1, string cursor2)
{
var (ts1, seq1) = Parse(cursor1);
var (ts2, seq2) = Parse(cursor2);
if (ts1 != ts2) return ts1 > ts2;
return seq1 > seq2;
}
}

View File

@@ -0,0 +1,407 @@
// -----------------------------------------------------------------------------
// SitePolicyEnforcementService.cs
// Sprint: SPRINT_8200_0014_0001_DB_sync_ledger_schema
// Task: SYNC-8200-014
// Description: Enforces site federation policies including source allow/deny lists
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Storage.Postgres.Models;
using StellaOps.Concelier.Storage.Postgres.Repositories;
namespace StellaOps.Concelier.Storage.Postgres.Sync;
/// <summary>
/// Enforces site federation policies for bundle imports.
/// </summary>
public sealed class SitePolicyEnforcementService
{
private readonly ISyncLedgerRepository _repository;
private readonly ILogger<SitePolicyEnforcementService> _logger;
public SitePolicyEnforcementService(
ISyncLedgerRepository repository,
ILogger<SitePolicyEnforcementService> logger)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Validates whether a source is allowed for a given site.
/// </summary>
/// <param name="siteId">The site identifier.</param>
/// <param name="sourceKey">The source key to validate.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Validation result indicating if the source is allowed.</returns>
public async Task<SourceValidationResult> ValidateSourceAsync(
string siteId,
string sourceKey,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(siteId);
ArgumentException.ThrowIfNullOrWhiteSpace(sourceKey);
var policy = await _repository.GetPolicyAsync(siteId, ct).ConfigureAwait(false);
if (policy is null)
{
_logger.LogDebug("No policy found for site {SiteId}, allowing source {SourceKey} by default", siteId, sourceKey);
return SourceValidationResult.Allowed("No policy configured");
}
if (!policy.Enabled)
{
_logger.LogWarning("Site {SiteId} policy is disabled, rejecting source {SourceKey}", siteId, sourceKey);
return SourceValidationResult.Denied("Site policy is disabled");
}
return ValidateSourceAgainstPolicy(policy, sourceKey);
}
/// <summary>
/// Validates a source against a specific policy without fetching from repository.
/// </summary>
public SourceValidationResult ValidateSourceAgainstPolicy(SitePolicyEntity policy, string sourceKey)
{
ArgumentNullException.ThrowIfNull(policy);
ArgumentException.ThrowIfNullOrWhiteSpace(sourceKey);
// Denied list takes precedence
if (IsSourceInList(policy.DeniedSources, sourceKey))
{
_logger.LogInformation(
"Source {SourceKey} is explicitly denied for site {SiteId}",
sourceKey, policy.SiteId);
return SourceValidationResult.Denied($"Source '{sourceKey}' is in deny list");
}
// If allowed list is empty, all non-denied sources are allowed
if (policy.AllowedSources.Length == 0)
{
_logger.LogDebug(
"Source {SourceKey} allowed for site {SiteId} (no allow list restrictions)",
sourceKey, policy.SiteId);
return SourceValidationResult.Allowed("No allow list restrictions");
}
// Check if source is in allowed list
if (IsSourceInList(policy.AllowedSources, sourceKey))
{
_logger.LogDebug(
"Source {SourceKey} is explicitly allowed for site {SiteId}",
sourceKey, policy.SiteId);
return SourceValidationResult.Allowed("Source is in allow list");
}
// Source not in allowed list
_logger.LogInformation(
"Source {SourceKey} not in allow list for site {SiteId}",
sourceKey, policy.SiteId);
return SourceValidationResult.Denied($"Source '{sourceKey}' is not in allow list");
}
/// <summary>
/// Validates multiple sources and returns results for each.
/// </summary>
public async Task<IReadOnlyDictionary<string, SourceValidationResult>> ValidateSourcesAsync(
string siteId,
IEnumerable<string> sourceKeys,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(siteId);
ArgumentNullException.ThrowIfNull(sourceKeys);
var policy = await _repository.GetPolicyAsync(siteId, ct).ConfigureAwait(false);
var results = new Dictionary<string, SourceValidationResult>();
foreach (var sourceKey in sourceKeys)
{
if (string.IsNullOrWhiteSpace(sourceKey))
{
continue;
}
if (policy is null)
{
results[sourceKey] = SourceValidationResult.Allowed("No policy configured");
}
else if (!policy.Enabled)
{
results[sourceKey] = SourceValidationResult.Denied("Site policy is disabled");
}
else
{
results[sourceKey] = ValidateSourceAgainstPolicy(policy, sourceKey);
}
}
return results;
}
/// <summary>
/// Filters a collection of source keys to only those allowed by the site policy.
/// </summary>
public async Task<IReadOnlyList<string>> FilterAllowedSourcesAsync(
string siteId,
IEnumerable<string> sourceKeys,
CancellationToken ct = default)
{
var results = await ValidateSourcesAsync(siteId, sourceKeys, ct).ConfigureAwait(false);
return results
.Where(kvp => kvp.Value.IsAllowed)
.Select(kvp => kvp.Key)
.ToList();
}
private static bool IsSourceInList(string[] sourceList, string sourceKey)
{
if (sourceList.Length == 0)
{
return false;
}
foreach (var source in sourceList)
{
// Exact match (case-insensitive)
if (string.Equals(source, sourceKey, StringComparison.OrdinalIgnoreCase))
{
return true;
}
// Wildcard pattern match (e.g., "nvd-*" matches "nvd-cve", "nvd-cpe")
if (source.EndsWith('*') && sourceKey.StartsWith(
source[..^1], StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
#region Size Budget Tracking (SYNC-8200-015)
/// <summary>
/// Validates bundle size against site policy limits.
/// </summary>
/// <param name="siteId">The site identifier.</param>
/// <param name="bundleSizeMb">Bundle size in megabytes.</param>
/// <param name="itemsCount">Number of items in the bundle.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Validation result indicating if the bundle is within limits.</returns>
public async Task<BundleSizeValidationResult> ValidateBundleSizeAsync(
string siteId,
decimal bundleSizeMb,
int itemsCount,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(siteId);
var policy = await _repository.GetPolicyAsync(siteId, ct).ConfigureAwait(false);
if (policy is null)
{
_logger.LogDebug(
"No policy found for site {SiteId}, allowing bundle (size={SizeMb}MB, items={Items})",
siteId, bundleSizeMb, itemsCount);
return BundleSizeValidationResult.Allowed("No policy configured", bundleSizeMb, itemsCount);
}
if (!policy.Enabled)
{
_logger.LogWarning("Site {SiteId} policy is disabled, rejecting bundle", siteId);
return BundleSizeValidationResult.Denied(
"Site policy is disabled",
bundleSizeMb,
itemsCount,
policy.MaxBundleSizeMb,
policy.MaxItemsPerBundle);
}
return ValidateBundleSizeAgainstPolicy(policy, bundleSizeMb, itemsCount);
}
/// <summary>
/// Validates bundle size against a specific policy without fetching from repository.
/// </summary>
public BundleSizeValidationResult ValidateBundleSizeAgainstPolicy(
SitePolicyEntity policy,
decimal bundleSizeMb,
int itemsCount)
{
ArgumentNullException.ThrowIfNull(policy);
var violations = new List<string>();
// Check size limit
if (bundleSizeMb > policy.MaxBundleSizeMb)
{
violations.Add($"Bundle size ({bundleSizeMb:F2}MB) exceeds limit ({policy.MaxBundleSizeMb}MB)");
}
// Check items limit
if (itemsCount > policy.MaxItemsPerBundle)
{
violations.Add($"Item count ({itemsCount}) exceeds limit ({policy.MaxItemsPerBundle})");
}
if (violations.Count > 0)
{
var reason = string.Join("; ", violations);
_logger.LogWarning(
"Bundle rejected for site {SiteId}: {Reason}",
policy.SiteId, reason);
return BundleSizeValidationResult.Denied(
reason,
bundleSizeMb,
itemsCount,
policy.MaxBundleSizeMb,
policy.MaxItemsPerBundle);
}
_logger.LogDebug(
"Bundle accepted for site {SiteId}: size={SizeMb}MB (limit={MaxSize}MB), items={Items} (limit={MaxItems})",
policy.SiteId, bundleSizeMb, policy.MaxBundleSizeMb, itemsCount, policy.MaxItemsPerBundle);
return BundleSizeValidationResult.Allowed(
"Within size limits",
bundleSizeMb,
itemsCount,
policy.MaxBundleSizeMb,
policy.MaxItemsPerBundle);
}
/// <summary>
/// Gets the remaining budget for a site based on recent imports.
/// </summary>
/// <param name="siteId">The site identifier.</param>
/// <param name="windowHours">Time window in hours to consider for recent imports.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Remaining budget information.</returns>
public async Task<SiteBudgetInfo> GetRemainingBudgetAsync(
string siteId,
int windowHours = 24,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(siteId);
var policy = await _repository.GetPolicyAsync(siteId, ct).ConfigureAwait(false);
var history = await _repository.GetHistoryAsync(siteId, limit: 100, ct).ConfigureAwait(false);
if (policy is null)
{
return new SiteBudgetInfo(
SiteId: siteId,
HasPolicy: false,
MaxBundleSizeMb: int.MaxValue,
MaxItemsPerBundle: int.MaxValue,
RecentImportsCount: history.Count,
RecentItemsImported: history.Sum(h => h.ItemsCount),
WindowHours: windowHours);
}
var windowStart = DateTimeOffset.UtcNow.AddHours(-windowHours);
var recentHistory = history.Where(h => h.ImportedAt >= windowStart).ToList();
return new SiteBudgetInfo(
SiteId: siteId,
HasPolicy: true,
MaxBundleSizeMb: policy.MaxBundleSizeMb,
MaxItemsPerBundle: policy.MaxItemsPerBundle,
RecentImportsCount: recentHistory.Count,
RecentItemsImported: recentHistory.Sum(h => h.ItemsCount),
WindowHours: windowHours);
}
#endregion
}
/// <summary>
/// Result of source validation against site policy.
/// </summary>
public sealed record SourceValidationResult
{
private SourceValidationResult(bool isAllowed, string reason)
{
IsAllowed = isAllowed;
Reason = reason;
}
/// <summary>
/// Whether the source is allowed.
/// </summary>
public bool IsAllowed { get; }
/// <summary>
/// Reason for the decision.
/// </summary>
public string Reason { get; }
/// <summary>
/// Creates an allowed result.
/// </summary>
public static SourceValidationResult Allowed(string reason) => new(true, reason);
/// <summary>
/// Creates a denied result.
/// </summary>
public static SourceValidationResult Denied(string reason) => new(false, reason);
}
/// <summary>
/// Result of bundle size validation against site policy.
/// </summary>
public sealed record BundleSizeValidationResult
{
private BundleSizeValidationResult(
bool isAllowed,
string reason,
decimal actualSizeMb,
int actualItemCount,
int? maxSizeMb,
int? maxItems)
{
IsAllowed = isAllowed;
Reason = reason;
ActualSizeMb = actualSizeMb;
ActualItemCount = actualItemCount;
MaxSizeMb = maxSizeMb;
MaxItems = maxItems;
}
public bool IsAllowed { get; }
public string Reason { get; }
public decimal ActualSizeMb { get; }
public int ActualItemCount { get; }
public int? MaxSizeMb { get; }
public int? MaxItems { get; }
public static BundleSizeValidationResult Allowed(
string reason,
decimal actualSizeMb,
int actualItemCount,
int? maxSizeMb = null,
int? maxItems = null)
=> new(true, reason, actualSizeMb, actualItemCount, maxSizeMb, maxItems);
public static BundleSizeValidationResult Denied(
string reason,
decimal actualSizeMb,
int actualItemCount,
int? maxSizeMb = null,
int? maxItems = null)
=> new(false, reason, actualSizeMb, actualItemCount, maxSizeMb, maxItems);
}
/// <summary>
/// Information about a site's remaining import budget.
/// </summary>
public sealed record SiteBudgetInfo(
string SiteId,
bool HasPolicy,
int MaxBundleSizeMb,
int MaxItemsPerBundle,
int RecentImportsCount,
int RecentItemsImported,
int WindowHours);