548 lines
21 KiB
C#
548 lines
21 KiB
C#
using System.Collections.Generic;
|
|
using System.Globalization;
|
|
using System.Linq;
|
|
using System.Net.Http;
|
|
using System.Text.Json;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using MongoDB.Bson;
|
|
using StellaOps.Concelier.Models;
|
|
using StellaOps.Concelier.Connector.Common;
|
|
using StellaOps.Concelier.Connector.Common.Fetch;
|
|
using StellaOps.Concelier.Connector.Ghsa.Configuration;
|
|
using StellaOps.Concelier.Connector.Ghsa.Internal;
|
|
using StellaOps.Concelier.Storage.Mongo;
|
|
using StellaOps.Concelier.Storage.Mongo.Advisories;
|
|
using StellaOps.Concelier.Storage.Mongo.Documents;
|
|
using StellaOps.Concelier.Storage.Mongo.Dtos;
|
|
using StellaOps.Plugin;
|
|
|
|
namespace StellaOps.Concelier.Connector.Ghsa;
|
|
|
|
public sealed class GhsaConnector : IFeedConnector
|
|
{
|
|
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
|
|
{
|
|
PropertyNameCaseInsensitive = true,
|
|
WriteIndented = false,
|
|
};
|
|
|
|
private readonly SourceFetchService _fetchService;
|
|
private readonly RawDocumentStorage _rawDocumentStorage;
|
|
private readonly IDocumentStore _documentStore;
|
|
private readonly IDtoStore _dtoStore;
|
|
private readonly IAdvisoryStore _advisoryStore;
|
|
private readonly ISourceStateRepository _stateRepository;
|
|
private readonly GhsaOptions _options;
|
|
private readonly GhsaDiagnostics _diagnostics;
|
|
private readonly TimeProvider _timeProvider;
|
|
private readonly ILogger<GhsaConnector> _logger;
|
|
private readonly object _rateLimitWarningLock = new();
|
|
private readonly Dictionary<(string Phase, string Resource), bool> _rateLimitWarnings = new();
|
|
|
|
public GhsaConnector(
|
|
SourceFetchService fetchService,
|
|
RawDocumentStorage rawDocumentStorage,
|
|
IDocumentStore documentStore,
|
|
IDtoStore dtoStore,
|
|
IAdvisoryStore advisoryStore,
|
|
ISourceStateRepository stateRepository,
|
|
IOptions<GhsaOptions> options,
|
|
GhsaDiagnostics diagnostics,
|
|
TimeProvider? timeProvider,
|
|
ILogger<GhsaConnector> logger)
|
|
{
|
|
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
|
|
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
|
|
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
|
|
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
|
|
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
|
|
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
|
|
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
|
|
_options.Validate();
|
|
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
|
|
_timeProvider = timeProvider ?? TimeProvider.System;
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
public string SourceName => GhsaConnectorPlugin.SourceName;
|
|
|
|
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(services);
|
|
|
|
var now = _timeProvider.GetUtcNow();
|
|
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
|
|
|
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
|
|
var pendingMappings = cursor.PendingMappings.ToHashSet();
|
|
|
|
var since = cursor.CurrentWindowStart ?? cursor.LastUpdatedExclusive ?? now - _options.InitialBackfill;
|
|
if (since > now)
|
|
{
|
|
since = now;
|
|
}
|
|
|
|
var until = cursor.CurrentWindowEnd ?? now;
|
|
if (until <= since)
|
|
{
|
|
until = since + TimeSpan.FromMinutes(1);
|
|
}
|
|
|
|
var page = cursor.NextPage <= 0 ? 1 : cursor.NextPage;
|
|
var pagesFetched = 0;
|
|
var hasMore = true;
|
|
var rateLimitHit = false;
|
|
DateTimeOffset? maxUpdated = cursor.LastUpdatedExclusive;
|
|
|
|
while (hasMore && pagesFetched < _options.MaxPagesPerFetch)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var listUri = BuildListUri(since, until, page, _options.PageSize);
|
|
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
|
|
{
|
|
["since"] = since.ToString("O"),
|
|
["until"] = until.ToString("O"),
|
|
["page"] = page.ToString(CultureInfo.InvariantCulture),
|
|
["pageSize"] = _options.PageSize.ToString(CultureInfo.InvariantCulture),
|
|
};
|
|
|
|
SourceFetchContentResult listResult;
|
|
try
|
|
{
|
|
_diagnostics.FetchAttempt();
|
|
listResult = await _fetchService.FetchContentAsync(
|
|
new SourceFetchRequest(
|
|
GhsaOptions.HttpClientName,
|
|
SourceName,
|
|
listUri)
|
|
{
|
|
Metadata = metadata,
|
|
AcceptHeaders = new[] { "application/vnd.github+json" },
|
|
},
|
|
cancellationToken).ConfigureAwait(false);
|
|
}
|
|
catch (HttpRequestException ex)
|
|
{
|
|
_diagnostics.FetchFailure();
|
|
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
|
|
throw;
|
|
}
|
|
|
|
if (listResult.IsNotModified)
|
|
{
|
|
_diagnostics.FetchUnchanged();
|
|
break;
|
|
}
|
|
|
|
if (!listResult.IsSuccess || listResult.Content is null)
|
|
{
|
|
_diagnostics.FetchFailure();
|
|
break;
|
|
}
|
|
|
|
var deferList = await ApplyRateLimitAsync(listResult.Headers, "list", cancellationToken).ConfigureAwait(false);
|
|
if (deferList)
|
|
{
|
|
rateLimitHit = true;
|
|
break;
|
|
}
|
|
|
|
var pageModel = GhsaListParser.Parse(listResult.Content, page, _options.PageSize);
|
|
|
|
if (pageModel.Items.Count == 0)
|
|
{
|
|
hasMore = false;
|
|
}
|
|
|
|
foreach (var item in pageModel.Items)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var detailUri = BuildDetailUri(item.GhsaId);
|
|
var detailMetadata = new Dictionary<string, string>(StringComparer.Ordinal)
|
|
{
|
|
["ghsaId"] = item.GhsaId,
|
|
["page"] = page.ToString(CultureInfo.InvariantCulture),
|
|
["since"] = since.ToString("O"),
|
|
["until"] = until.ToString("O"),
|
|
};
|
|
|
|
SourceFetchResult detailResult;
|
|
try
|
|
{
|
|
detailResult = await _fetchService.FetchAsync(
|
|
new SourceFetchRequest(
|
|
GhsaOptions.HttpClientName,
|
|
SourceName,
|
|
detailUri)
|
|
{
|
|
Metadata = detailMetadata,
|
|
AcceptHeaders = new[] { "application/vnd.github+json" },
|
|
},
|
|
cancellationToken).ConfigureAwait(false);
|
|
}
|
|
catch (HttpRequestException ex)
|
|
{
|
|
_diagnostics.FetchFailure();
|
|
_logger.LogWarning(ex, "Failed fetching GHSA advisory {GhsaId}", item.GhsaId);
|
|
continue;
|
|
}
|
|
|
|
if (detailResult.IsNotModified)
|
|
{
|
|
_diagnostics.FetchUnchanged();
|
|
continue;
|
|
}
|
|
|
|
if (!detailResult.IsSuccess || detailResult.Document is null)
|
|
{
|
|
_diagnostics.FetchFailure();
|
|
continue;
|
|
}
|
|
|
|
_diagnostics.FetchDocument();
|
|
pendingDocuments.Add(detailResult.Document.Id);
|
|
pendingMappings.Add(detailResult.Document.Id);
|
|
|
|
var deferDetail = await ApplyRateLimitAsync(detailResult.Document.Headers, "detail", cancellationToken).ConfigureAwait(false);
|
|
if (deferDetail)
|
|
{
|
|
rateLimitHit = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (rateLimitHit)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (pageModel.MaxUpdated.HasValue)
|
|
{
|
|
if (!maxUpdated.HasValue || pageModel.MaxUpdated > maxUpdated)
|
|
{
|
|
maxUpdated = pageModel.MaxUpdated;
|
|
}
|
|
}
|
|
|
|
hasMore = pageModel.HasMorePages;
|
|
page = pageModel.NextPageCandidate;
|
|
pagesFetched++;
|
|
|
|
if (!rateLimitHit && hasMore && _options.RequestDelay > TimeSpan.Zero)
|
|
{
|
|
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
}
|
|
|
|
var updatedCursor = cursor
|
|
.WithPendingDocuments(pendingDocuments)
|
|
.WithPendingMappings(pendingMappings);
|
|
|
|
if (hasMore || rateLimitHit)
|
|
{
|
|
updatedCursor = updatedCursor
|
|
.WithCurrentWindowStart(since)
|
|
.WithCurrentWindowEnd(until)
|
|
.WithNextPage(page);
|
|
}
|
|
else
|
|
{
|
|
var nextSince = maxUpdated ?? until;
|
|
updatedCursor = updatedCursor
|
|
.WithLastUpdatedExclusive(nextSince)
|
|
.WithCurrentWindowStart(null)
|
|
.WithCurrentWindowEnd(null)
|
|
.WithNextPage(1);
|
|
}
|
|
|
|
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
|
|
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(services);
|
|
|
|
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
|
if (cursor.PendingDocuments.Count == 0)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var remainingDocuments = cursor.PendingDocuments.ToList();
|
|
|
|
foreach (var documentId in cursor.PendingDocuments)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
|
if (document is null)
|
|
{
|
|
remainingDocuments.Remove(documentId);
|
|
continue;
|
|
}
|
|
|
|
if (!document.GridFsId.HasValue)
|
|
{
|
|
_diagnostics.ParseFailure();
|
|
_logger.LogWarning("GHSA document {DocumentId} missing GridFS content", documentId);
|
|
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
|
remainingDocuments.Remove(documentId);
|
|
continue;
|
|
}
|
|
|
|
byte[] rawBytes;
|
|
try
|
|
{
|
|
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_diagnostics.ParseFailure();
|
|
_logger.LogError(ex, "Unable to download GHSA raw document {DocumentId}", documentId);
|
|
throw;
|
|
}
|
|
|
|
GhsaRecordDto dto;
|
|
try
|
|
{
|
|
dto = GhsaRecordParser.Parse(rawBytes);
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
_diagnostics.ParseQuarantine();
|
|
_logger.LogError(ex, "Malformed GHSA JSON for {DocumentId}", documentId);
|
|
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
|
remainingDocuments.Remove(documentId);
|
|
continue;
|
|
}
|
|
|
|
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
|
|
var dtoRecord = new DtoRecord(
|
|
Guid.NewGuid(),
|
|
document.Id,
|
|
SourceName,
|
|
"ghsa/1.0",
|
|
payload,
|
|
_timeProvider.GetUtcNow());
|
|
|
|
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
|
|
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
|
|
|
|
remainingDocuments.Remove(documentId);
|
|
_diagnostics.ParseSuccess();
|
|
}
|
|
|
|
var updatedCursor = cursor.WithPendingDocuments(remainingDocuments);
|
|
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
|
|
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(services);
|
|
|
|
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
|
if (cursor.PendingMappings.Count == 0)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var pendingMappings = cursor.PendingMappings.ToList();
|
|
|
|
foreach (var documentId in cursor.PendingMappings)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
|
|
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
|
|
|
if (dtoRecord is null || document is null)
|
|
{
|
|
_logger.LogWarning("Skipping GHSA mapping for {DocumentId}: DTO or document missing", documentId);
|
|
pendingMappings.Remove(documentId);
|
|
continue;
|
|
}
|
|
|
|
GhsaRecordDto dto;
|
|
try
|
|
{
|
|
dto = JsonSerializer.Deserialize<GhsaRecordDto>(dtoRecord.Payload.ToJson(), SerializerOptions)
|
|
?? throw new InvalidOperationException("Deserialized DTO was null.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to deserialize GHSA DTO for {DocumentId}", documentId);
|
|
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
|
pendingMappings.Remove(documentId);
|
|
continue;
|
|
}
|
|
|
|
var advisory = GhsaMapper.Map(dto, document, dtoRecord.ValidatedAt);
|
|
|
|
if (advisory.CvssMetrics.IsEmpty && !string.IsNullOrWhiteSpace(advisory.CanonicalMetricId))
|
|
{
|
|
var fallbackSeverity = string.IsNullOrWhiteSpace(advisory.Severity)
|
|
? "unknown"
|
|
: advisory.Severity!;
|
|
_diagnostics.CanonicalMetricFallback(advisory.CanonicalMetricId!, fallbackSeverity);
|
|
if (_logger.IsEnabled(LogLevel.Debug))
|
|
{
|
|
_logger.LogDebug(
|
|
"GHSA {GhsaId} emitted canonical metric fallback {CanonicalMetricId} (severity {Severity})",
|
|
advisory.AdvisoryKey,
|
|
advisory.CanonicalMetricId,
|
|
fallbackSeverity);
|
|
}
|
|
}
|
|
|
|
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
|
|
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
|
|
pendingMappings.Remove(documentId);
|
|
_diagnostics.MapSuccess(1);
|
|
}
|
|
|
|
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
|
|
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
|
|
private static Uri BuildListUri(DateTimeOffset since, DateTimeOffset until, int page, int pageSize)
|
|
{
|
|
var query = $"updated_since={Uri.EscapeDataString(since.ToString("O"))}&updated_until={Uri.EscapeDataString(until.ToString("O"))}&page={page}&per_page={pageSize}";
|
|
return new Uri($"security/advisories?{query}", UriKind.Relative);
|
|
}
|
|
|
|
private static Uri BuildDetailUri(string ghsaId)
|
|
{
|
|
var encoded = Uri.EscapeDataString(ghsaId);
|
|
return new Uri($"security/advisories/{encoded}", UriKind.Relative);
|
|
}
|
|
|
|
private async Task<GhsaCursor> GetCursorAsync(CancellationToken cancellationToken)
|
|
{
|
|
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
|
|
return state is null ? GhsaCursor.Empty : GhsaCursor.FromBson(state.Cursor);
|
|
}
|
|
|
|
private async Task UpdateCursorAsync(GhsaCursor cursor, CancellationToken cancellationToken)
|
|
{
|
|
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
|
|
}
|
|
|
|
private bool ShouldLogRateLimitWarning(in GhsaRateLimitSnapshot snapshot, out bool recovered)
|
|
{
|
|
recovered = false;
|
|
|
|
if (!snapshot.Remaining.HasValue)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var key = (snapshot.Phase, snapshot.Resource ?? "global");
|
|
var warn = snapshot.Remaining.Value <= _options.RateLimitWarningThreshold;
|
|
|
|
lock (_rateLimitWarningLock)
|
|
{
|
|
var previouslyWarned = _rateLimitWarnings.TryGetValue(key, out var flagged) && flagged;
|
|
|
|
if (warn)
|
|
{
|
|
if (previouslyWarned)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
_rateLimitWarnings[key] = true;
|
|
return true;
|
|
}
|
|
|
|
if (previouslyWarned)
|
|
{
|
|
_rateLimitWarnings.Remove(key);
|
|
recovered = true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static double? CalculateHeadroomPercentage(in GhsaRateLimitSnapshot snapshot)
|
|
{
|
|
if (!snapshot.Limit.HasValue || !snapshot.Remaining.HasValue)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var limit = snapshot.Limit.Value;
|
|
if (limit <= 0)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
return (double)snapshot.Remaining.Value / limit * 100d;
|
|
}
|
|
|
|
private static string FormatHeadroom(double? headroomPct)
|
|
=> headroomPct.HasValue ? $" (headroom {headroomPct.Value:F1}%)" : string.Empty;
|
|
|
|
private async Task<bool> ApplyRateLimitAsync(IReadOnlyDictionary<string, string>? headers, string phase, CancellationToken cancellationToken)
|
|
{
|
|
var snapshot = GhsaRateLimitParser.TryParse(headers, _timeProvider.GetUtcNow(), phase);
|
|
if (snapshot is null || !snapshot.Value.HasData)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
_diagnostics.RecordRateLimit(snapshot.Value);
|
|
|
|
var headroomPct = CalculateHeadroomPercentage(snapshot.Value);
|
|
if (ShouldLogRateLimitWarning(snapshot.Value, out var recovered))
|
|
{
|
|
var resetMessage = snapshot.Value.ResetAfter.HasValue
|
|
? $" (resets in {snapshot.Value.ResetAfter.Value:c})"
|
|
: snapshot.Value.ResetAt.HasValue ? $" (resets at {snapshot.Value.ResetAt.Value:O})" : string.Empty;
|
|
|
|
_logger.LogWarning(
|
|
"GHSA rate limit warning: remaining {Remaining} of {Limit} for {Phase} {Resource}{ResetMessage}{Headroom}",
|
|
snapshot.Value.Remaining,
|
|
snapshot.Value.Limit,
|
|
phase,
|
|
snapshot.Value.Resource ?? "global",
|
|
resetMessage,
|
|
FormatHeadroom(headroomPct));
|
|
}
|
|
else if (recovered)
|
|
{
|
|
_logger.LogInformation(
|
|
"GHSA rate limit recovered for {Phase} {Resource}: remaining {Remaining} of {Limit}{Headroom}",
|
|
phase,
|
|
snapshot.Value.Resource ?? "global",
|
|
snapshot.Value.Remaining,
|
|
snapshot.Value.Limit,
|
|
FormatHeadroom(headroomPct));
|
|
}
|
|
|
|
if (snapshot.Value.Remaining.HasValue && snapshot.Value.Remaining.Value <= 0)
|
|
{
|
|
_diagnostics.RateLimitExhausted(phase);
|
|
var delay = snapshot.Value.RetryAfter ?? snapshot.Value.ResetAfter ?? _options.SecondaryRateLimitBackoff;
|
|
|
|
if (delay > TimeSpan.Zero)
|
|
{
|
|
_logger.LogWarning(
|
|
"GHSA rate limit exhausted for {Phase} {Resource}; delaying {Delay}{Headroom}",
|
|
phase,
|
|
snapshot.Value.Resource ?? "global",
|
|
delay,
|
|
FormatHeadroom(headroomPct));
|
|
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|