using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Net.Http; using System.Text.Json; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using MongoDB.Bson; using StellaOps.Concelier.Models; using StellaOps.Concelier.Connector.Common; using StellaOps.Concelier.Connector.Common.Fetch; using StellaOps.Concelier.Connector.Ghsa.Configuration; using StellaOps.Concelier.Connector.Ghsa.Internal; using StellaOps.Concelier.Storage.Mongo; using StellaOps.Concelier.Storage.Mongo.Advisories; using StellaOps.Concelier.Storage.Mongo.Documents; using StellaOps.Concelier.Storage.Mongo.Dtos; using StellaOps.Plugin; namespace StellaOps.Concelier.Connector.Ghsa; public sealed class GhsaConnector : IFeedConnector { private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web) { PropertyNameCaseInsensitive = true, WriteIndented = false, }; private readonly SourceFetchService _fetchService; private readonly RawDocumentStorage _rawDocumentStorage; private readonly IDocumentStore _documentStore; private readonly IDtoStore _dtoStore; private readonly IAdvisoryStore _advisoryStore; private readonly ISourceStateRepository _stateRepository; private readonly GhsaOptions _options; private readonly GhsaDiagnostics _diagnostics; private readonly TimeProvider _timeProvider; private readonly ILogger _logger; private readonly object _rateLimitWarningLock = new(); private readonly Dictionary<(string Phase, string Resource), bool> _rateLimitWarnings = new(); public GhsaConnector( SourceFetchService fetchService, RawDocumentStorage rawDocumentStorage, IDocumentStore documentStore, IDtoStore dtoStore, IAdvisoryStore advisoryStore, ISourceStateRepository stateRepository, IOptions options, GhsaDiagnostics diagnostics, TimeProvider? timeProvider, ILogger logger) { _fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService)); _rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage)); _documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore)); _dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore)); _advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore)); _stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository)); _options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options)); _options.Validate(); _diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics)); _timeProvider = timeProvider ?? TimeProvider.System; _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public string SourceName => GhsaConnectorPlugin.SourceName; public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(services); var now = _timeProvider.GetUtcNow(); var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); var pendingDocuments = cursor.PendingDocuments.ToHashSet(); var pendingMappings = cursor.PendingMappings.ToHashSet(); var since = cursor.CurrentWindowStart ?? cursor.LastUpdatedExclusive ?? now - _options.InitialBackfill; if (since > now) { since = now; } var until = cursor.CurrentWindowEnd ?? now; if (until <= since) { until = since + TimeSpan.FromMinutes(1); } var page = cursor.NextPage <= 0 ? 1 : cursor.NextPage; var pagesFetched = 0; var hasMore = true; var rateLimitHit = false; DateTimeOffset? maxUpdated = cursor.LastUpdatedExclusive; while (hasMore && pagesFetched < _options.MaxPagesPerFetch) { cancellationToken.ThrowIfCancellationRequested(); var listUri = BuildListUri(since, until, page, _options.PageSize); var metadata = new Dictionary(StringComparer.Ordinal) { ["since"] = since.ToString("O"), ["until"] = until.ToString("O"), ["page"] = page.ToString(CultureInfo.InvariantCulture), ["pageSize"] = _options.PageSize.ToString(CultureInfo.InvariantCulture), }; SourceFetchContentResult listResult; try { _diagnostics.FetchAttempt(); listResult = await _fetchService.FetchContentAsync( new SourceFetchRequest( GhsaOptions.HttpClientName, SourceName, listUri) { Metadata = metadata, AcceptHeaders = new[] { "application/vnd.github+json" }, }, cancellationToken).ConfigureAwait(false); } catch (HttpRequestException ex) { _diagnostics.FetchFailure(); await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false); throw; } if (listResult.IsNotModified) { _diagnostics.FetchUnchanged(); break; } if (!listResult.IsSuccess || listResult.Content is null) { _diagnostics.FetchFailure(); break; } var deferList = await ApplyRateLimitAsync(listResult.Headers, "list", cancellationToken).ConfigureAwait(false); if (deferList) { rateLimitHit = true; break; } var pageModel = GhsaListParser.Parse(listResult.Content, page, _options.PageSize); if (pageModel.Items.Count == 0) { hasMore = false; } foreach (var item in pageModel.Items) { cancellationToken.ThrowIfCancellationRequested(); var detailUri = BuildDetailUri(item.GhsaId); var detailMetadata = new Dictionary(StringComparer.Ordinal) { ["ghsaId"] = item.GhsaId, ["page"] = page.ToString(CultureInfo.InvariantCulture), ["since"] = since.ToString("O"), ["until"] = until.ToString("O"), }; SourceFetchResult detailResult; try { detailResult = await _fetchService.FetchAsync( new SourceFetchRequest( GhsaOptions.HttpClientName, SourceName, detailUri) { Metadata = detailMetadata, AcceptHeaders = new[] { "application/vnd.github+json" }, }, cancellationToken).ConfigureAwait(false); } catch (HttpRequestException ex) { _diagnostics.FetchFailure(); _logger.LogWarning(ex, "Failed fetching GHSA advisory {GhsaId}", item.GhsaId); continue; } if (detailResult.IsNotModified) { _diagnostics.FetchUnchanged(); continue; } if (!detailResult.IsSuccess || detailResult.Document is null) { _diagnostics.FetchFailure(); continue; } _diagnostics.FetchDocument(); pendingDocuments.Add(detailResult.Document.Id); pendingMappings.Add(detailResult.Document.Id); var deferDetail = await ApplyRateLimitAsync(detailResult.Document.Headers, "detail", cancellationToken).ConfigureAwait(false); if (deferDetail) { rateLimitHit = true; break; } } if (rateLimitHit) { break; } if (pageModel.MaxUpdated.HasValue) { if (!maxUpdated.HasValue || pageModel.MaxUpdated > maxUpdated) { maxUpdated = pageModel.MaxUpdated; } } hasMore = pageModel.HasMorePages; page = pageModel.NextPageCandidate; pagesFetched++; if (!rateLimitHit && hasMore && _options.RequestDelay > TimeSpan.Zero) { await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false); } } var updatedCursor = cursor .WithPendingDocuments(pendingDocuments) .WithPendingMappings(pendingMappings); if (hasMore || rateLimitHit) { updatedCursor = updatedCursor .WithCurrentWindowStart(since) .WithCurrentWindowEnd(until) .WithNextPage(page); } else { var nextSince = maxUpdated ?? until; updatedCursor = updatedCursor .WithLastUpdatedExclusive(nextSince) .WithCurrentWindowStart(null) .WithCurrentWindowEnd(null) .WithNextPage(1); } await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(services); var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); if (cursor.PendingDocuments.Count == 0) { return; } var remainingDocuments = cursor.PendingDocuments.ToList(); foreach (var documentId in cursor.PendingDocuments) { cancellationToken.ThrowIfCancellationRequested(); var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false); if (document is null) { remainingDocuments.Remove(documentId); continue; } if (!document.GridFsId.HasValue) { _diagnostics.ParseFailure(); _logger.LogWarning("GHSA document {DocumentId} missing GridFS content", documentId); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); remainingDocuments.Remove(documentId); continue; } byte[] rawBytes; try { rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _diagnostics.ParseFailure(); _logger.LogError(ex, "Unable to download GHSA raw document {DocumentId}", documentId); throw; } GhsaRecordDto dto; try { dto = GhsaRecordParser.Parse(rawBytes); } catch (JsonException ex) { _diagnostics.ParseQuarantine(); _logger.LogError(ex, "Malformed GHSA JSON for {DocumentId}", documentId); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); remainingDocuments.Remove(documentId); continue; } var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions)); var dtoRecord = new DtoRecord( Guid.NewGuid(), document.Id, SourceName, "ghsa/1.0", payload, _timeProvider.GetUtcNow()); await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false); remainingDocuments.Remove(documentId); _diagnostics.ParseSuccess(); } var updatedCursor = cursor.WithPendingDocuments(remainingDocuments); await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(services); var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); if (cursor.PendingMappings.Count == 0) { return; } var pendingMappings = cursor.PendingMappings.ToList(); foreach (var documentId in cursor.PendingMappings) { cancellationToken.ThrowIfCancellationRequested(); var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false); var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false); if (dtoRecord is null || document is null) { _logger.LogWarning("Skipping GHSA mapping for {DocumentId}: DTO or document missing", documentId); pendingMappings.Remove(documentId); continue; } GhsaRecordDto dto; try { dto = JsonSerializer.Deserialize(dtoRecord.Payload.ToJson(), SerializerOptions) ?? throw new InvalidOperationException("Deserialized DTO was null."); } catch (Exception ex) { _logger.LogError(ex, "Failed to deserialize GHSA DTO for {DocumentId}", documentId); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); pendingMappings.Remove(documentId); continue; } var advisory = GhsaMapper.Map(dto, document, dtoRecord.ValidatedAt); if (advisory.CvssMetrics.IsEmpty && !string.IsNullOrWhiteSpace(advisory.CanonicalMetricId)) { var fallbackSeverity = string.IsNullOrWhiteSpace(advisory.Severity) ? "unknown" : advisory.Severity!; _diagnostics.CanonicalMetricFallback(advisory.CanonicalMetricId!, fallbackSeverity); if (_logger.IsEnabled(LogLevel.Debug)) { _logger.LogDebug( "GHSA {GhsaId} emitted canonical metric fallback {CanonicalMetricId} (severity {Severity})", advisory.AdvisoryKey, advisory.CanonicalMetricId, fallbackSeverity); } } await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false); pendingMappings.Remove(documentId); _diagnostics.MapSuccess(1); } var updatedCursor = cursor.WithPendingMappings(pendingMappings); await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } private static Uri BuildListUri(DateTimeOffset since, DateTimeOffset until, int page, int pageSize) { var query = $"updated_since={Uri.EscapeDataString(since.ToString("O"))}&updated_until={Uri.EscapeDataString(until.ToString("O"))}&page={page}&per_page={pageSize}"; return new Uri($"security/advisories?{query}", UriKind.Relative); } private static Uri BuildDetailUri(string ghsaId) { var encoded = Uri.EscapeDataString(ghsaId); return new Uri($"security/advisories/{encoded}", UriKind.Relative); } private async Task GetCursorAsync(CancellationToken cancellationToken) { var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false); return state is null ? GhsaCursor.Empty : GhsaCursor.FromBson(state.Cursor); } private async Task UpdateCursorAsync(GhsaCursor cursor, CancellationToken cancellationToken) { await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false); } private bool ShouldLogRateLimitWarning(in GhsaRateLimitSnapshot snapshot, out bool recovered) { recovered = false; if (!snapshot.Remaining.HasValue) { return false; } var key = (snapshot.Phase, snapshot.Resource ?? "global"); var warn = snapshot.Remaining.Value <= _options.RateLimitWarningThreshold; lock (_rateLimitWarningLock) { var previouslyWarned = _rateLimitWarnings.TryGetValue(key, out var flagged) && flagged; if (warn) { if (previouslyWarned) { return false; } _rateLimitWarnings[key] = true; return true; } if (previouslyWarned) { _rateLimitWarnings.Remove(key); recovered = true; } return false; } } private static double? CalculateHeadroomPercentage(in GhsaRateLimitSnapshot snapshot) { if (!snapshot.Limit.HasValue || !snapshot.Remaining.HasValue) { return null; } var limit = snapshot.Limit.Value; if (limit <= 0) { return null; } return (double)snapshot.Remaining.Value / limit * 100d; } private static string FormatHeadroom(double? headroomPct) => headroomPct.HasValue ? $" (headroom {headroomPct.Value:F1}%)" : string.Empty; private async Task ApplyRateLimitAsync(IReadOnlyDictionary? headers, string phase, CancellationToken cancellationToken) { var snapshot = GhsaRateLimitParser.TryParse(headers, _timeProvider.GetUtcNow(), phase); if (snapshot is null || !snapshot.Value.HasData) { return false; } _diagnostics.RecordRateLimit(snapshot.Value); var headroomPct = CalculateHeadroomPercentage(snapshot.Value); if (ShouldLogRateLimitWarning(snapshot.Value, out var recovered)) { var resetMessage = snapshot.Value.ResetAfter.HasValue ? $" (resets in {snapshot.Value.ResetAfter.Value:c})" : snapshot.Value.ResetAt.HasValue ? $" (resets at {snapshot.Value.ResetAt.Value:O})" : string.Empty; _logger.LogWarning( "GHSA rate limit warning: remaining {Remaining} of {Limit} for {Phase} {Resource}{ResetMessage}{Headroom}", snapshot.Value.Remaining, snapshot.Value.Limit, phase, snapshot.Value.Resource ?? "global", resetMessage, FormatHeadroom(headroomPct)); } else if (recovered) { _logger.LogInformation( "GHSA rate limit recovered for {Phase} {Resource}: remaining {Remaining} of {Limit}{Headroom}", phase, snapshot.Value.Resource ?? "global", snapshot.Value.Remaining, snapshot.Value.Limit, FormatHeadroom(headroomPct)); } if (snapshot.Value.Remaining.HasValue && snapshot.Value.Remaining.Value <= 0) { _diagnostics.RateLimitExhausted(phase); var delay = snapshot.Value.RetryAfter ?? snapshot.Value.ResetAfter ?? _options.SecondaryRateLimitBackoff; if (delay > TimeSpan.Zero) { _logger.LogWarning( "GHSA rate limit exhausted for {Phase} {Resource}; delaying {Delay}{Headroom}", phase, snapshot.Value.Resource ?? "global", delay, FormatHeadroom(headroomPct)); await Task.Delay(delay, cancellationToken).ConfigureAwait(false); } return true; } return false; } }