Rename Concelier Source modules to Connector

This commit is contained in:
master
2025-10-18 20:11:18 +03:00
parent 89ede53cc3
commit 052da7a7d0
789 changed files with 1489 additions and 1489 deletions

View File

@@ -0,0 +1,547 @@
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Ghsa.Configuration;
using StellaOps.Concelier.Connector.Ghsa.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Ghsa;
public sealed class GhsaConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
WriteIndented = false,
};
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly GhsaOptions _options;
private readonly GhsaDiagnostics _diagnostics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GhsaConnector> _logger;
private readonly object _rateLimitWarningLock = new();
private readonly Dictionary<(string Phase, string Resource), bool> _rateLimitWarnings = new();
public GhsaConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<GhsaOptions> options,
GhsaDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<GhsaConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => GhsaConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var now = _timeProvider.GetUtcNow();
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var since = cursor.CurrentWindowStart ?? cursor.LastUpdatedExclusive ?? now - _options.InitialBackfill;
if (since > now)
{
since = now;
}
var until = cursor.CurrentWindowEnd ?? now;
if (until <= since)
{
until = since + TimeSpan.FromMinutes(1);
}
var page = cursor.NextPage <= 0 ? 1 : cursor.NextPage;
var pagesFetched = 0;
var hasMore = true;
var rateLimitHit = false;
DateTimeOffset? maxUpdated = cursor.LastUpdatedExclusive;
while (hasMore && pagesFetched < _options.MaxPagesPerFetch)
{
cancellationToken.ThrowIfCancellationRequested();
var listUri = BuildListUri(since, until, page, _options.PageSize);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["since"] = since.ToString("O"),
["until"] = until.ToString("O"),
["page"] = page.ToString(CultureInfo.InvariantCulture),
["pageSize"] = _options.PageSize.ToString(CultureInfo.InvariantCulture),
};
SourceFetchContentResult listResult;
try
{
_diagnostics.FetchAttempt();
listResult = await _fetchService.FetchContentAsync(
new SourceFetchRequest(
GhsaOptions.HttpClientName,
SourceName,
listUri)
{
Metadata = metadata,
AcceptHeaders = new[] { "application/vnd.github+json" },
},
cancellationToken).ConfigureAwait(false);
}
catch (HttpRequestException ex)
{
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (listResult.IsNotModified)
{
_diagnostics.FetchUnchanged();
break;
}
if (!listResult.IsSuccess || listResult.Content is null)
{
_diagnostics.FetchFailure();
break;
}
var deferList = await ApplyRateLimitAsync(listResult.Headers, "list", cancellationToken).ConfigureAwait(false);
if (deferList)
{
rateLimitHit = true;
break;
}
var pageModel = GhsaListParser.Parse(listResult.Content, page, _options.PageSize);
if (pageModel.Items.Count == 0)
{
hasMore = false;
}
foreach (var item in pageModel.Items)
{
cancellationToken.ThrowIfCancellationRequested();
var detailUri = BuildDetailUri(item.GhsaId);
var detailMetadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["ghsaId"] = item.GhsaId,
["page"] = page.ToString(CultureInfo.InvariantCulture),
["since"] = since.ToString("O"),
["until"] = until.ToString("O"),
};
SourceFetchResult detailResult;
try
{
detailResult = await _fetchService.FetchAsync(
new SourceFetchRequest(
GhsaOptions.HttpClientName,
SourceName,
detailUri)
{
Metadata = detailMetadata,
AcceptHeaders = new[] { "application/vnd.github+json" },
},
cancellationToken).ConfigureAwait(false);
}
catch (HttpRequestException ex)
{
_diagnostics.FetchFailure();
_logger.LogWarning(ex, "Failed fetching GHSA advisory {GhsaId}", item.GhsaId);
continue;
}
if (detailResult.IsNotModified)
{
_diagnostics.FetchUnchanged();
continue;
}
if (!detailResult.IsSuccess || detailResult.Document is null)
{
_diagnostics.FetchFailure();
continue;
}
_diagnostics.FetchDocument();
pendingDocuments.Add(detailResult.Document.Id);
pendingMappings.Add(detailResult.Document.Id);
var deferDetail = await ApplyRateLimitAsync(detailResult.Document.Headers, "detail", cancellationToken).ConfigureAwait(false);
if (deferDetail)
{
rateLimitHit = true;
break;
}
}
if (rateLimitHit)
{
break;
}
if (pageModel.MaxUpdated.HasValue)
{
if (!maxUpdated.HasValue || pageModel.MaxUpdated > maxUpdated)
{
maxUpdated = pageModel.MaxUpdated;
}
}
hasMore = pageModel.HasMorePages;
page = pageModel.NextPageCandidate;
pagesFetched++;
if (!rateLimitHit && hasMore && _options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
if (hasMore || rateLimitHit)
{
updatedCursor = updatedCursor
.WithCurrentWindowStart(since)
.WithCurrentWindowEnd(until)
.WithNextPage(page);
}
else
{
var nextSince = maxUpdated ?? until;
updatedCursor = updatedCursor
.WithLastUpdatedExclusive(nextSince)
.WithCurrentWindowStart(null)
.WithCurrentWindowEnd(null)
.WithNextPage(1);
}
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_diagnostics.ParseFailure();
_logger.LogWarning("GHSA document {DocumentId} missing GridFS content", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure();
_logger.LogError(ex, "Unable to download GHSA raw document {DocumentId}", documentId);
throw;
}
GhsaRecordDto dto;
try
{
dto = GhsaRecordParser.Parse(rawBytes);
}
catch (JsonException ex)
{
_diagnostics.ParseQuarantine();
_logger.LogError(ex, "Malformed GHSA JSON for {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
"ghsa/1.0",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
_diagnostics.ParseSuccess();
}
var updatedCursor = cursor.WithPendingDocuments(remainingDocuments);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
_logger.LogWarning("Skipping GHSA mapping for {DocumentId}: DTO or document missing", documentId);
pendingMappings.Remove(documentId);
continue;
}
GhsaRecordDto dto;
try
{
dto = JsonSerializer.Deserialize<GhsaRecordDto>(dtoRecord.Payload.ToJson(), SerializerOptions)
?? throw new InvalidOperationException("Deserialized DTO was null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize GHSA DTO for {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var advisory = GhsaMapper.Map(dto, document, dtoRecord.ValidatedAt);
if (advisory.CvssMetrics.IsEmpty && !string.IsNullOrWhiteSpace(advisory.CanonicalMetricId))
{
var fallbackSeverity = string.IsNullOrWhiteSpace(advisory.Severity)
? "unknown"
: advisory.Severity!;
_diagnostics.CanonicalMetricFallback(advisory.CanonicalMetricId!, fallbackSeverity);
if (_logger.IsEnabled(LogLevel.Debug))
{
_logger.LogDebug(
"GHSA {GhsaId} emitted canonical metric fallback {CanonicalMetricId} (severity {Severity})",
advisory.AdvisoryKey,
advisory.CanonicalMetricId,
fallbackSeverity);
}
}
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapSuccess(1);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private static Uri BuildListUri(DateTimeOffset since, DateTimeOffset until, int page, int pageSize)
{
var query = $"updated_since={Uri.EscapeDataString(since.ToString("O"))}&updated_until={Uri.EscapeDataString(until.ToString("O"))}&page={page}&per_page={pageSize}";
return new Uri($"security/advisories?{query}", UriKind.Relative);
}
private static Uri BuildDetailUri(string ghsaId)
{
var encoded = Uri.EscapeDataString(ghsaId);
return new Uri($"security/advisories/{encoded}", UriKind.Relative);
}
private async Task<GhsaCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? GhsaCursor.Empty : GhsaCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(GhsaCursor cursor, CancellationToken cancellationToken)
{
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private bool ShouldLogRateLimitWarning(in GhsaRateLimitSnapshot snapshot, out bool recovered)
{
recovered = false;
if (!snapshot.Remaining.HasValue)
{
return false;
}
var key = (snapshot.Phase, snapshot.Resource ?? "global");
var warn = snapshot.Remaining.Value <= _options.RateLimitWarningThreshold;
lock (_rateLimitWarningLock)
{
var previouslyWarned = _rateLimitWarnings.TryGetValue(key, out var flagged) && flagged;
if (warn)
{
if (previouslyWarned)
{
return false;
}
_rateLimitWarnings[key] = true;
return true;
}
if (previouslyWarned)
{
_rateLimitWarnings.Remove(key);
recovered = true;
}
return false;
}
}
private static double? CalculateHeadroomPercentage(in GhsaRateLimitSnapshot snapshot)
{
if (!snapshot.Limit.HasValue || !snapshot.Remaining.HasValue)
{
return null;
}
var limit = snapshot.Limit.Value;
if (limit <= 0)
{
return null;
}
return (double)snapshot.Remaining.Value / limit * 100d;
}
private static string FormatHeadroom(double? headroomPct)
=> headroomPct.HasValue ? $" (headroom {headroomPct.Value:F1}%)" : string.Empty;
private async Task<bool> ApplyRateLimitAsync(IReadOnlyDictionary<string, string>? headers, string phase, CancellationToken cancellationToken)
{
var snapshot = GhsaRateLimitParser.TryParse(headers, _timeProvider.GetUtcNow(), phase);
if (snapshot is null || !snapshot.Value.HasData)
{
return false;
}
_diagnostics.RecordRateLimit(snapshot.Value);
var headroomPct = CalculateHeadroomPercentage(snapshot.Value);
if (ShouldLogRateLimitWarning(snapshot.Value, out var recovered))
{
var resetMessage = snapshot.Value.ResetAfter.HasValue
? $" (resets in {snapshot.Value.ResetAfter.Value:c})"
: snapshot.Value.ResetAt.HasValue ? $" (resets at {snapshot.Value.ResetAt.Value:O})" : string.Empty;
_logger.LogWarning(
"GHSA rate limit warning: remaining {Remaining} of {Limit} for {Phase} {Resource}{ResetMessage}{Headroom}",
snapshot.Value.Remaining,
snapshot.Value.Limit,
phase,
snapshot.Value.Resource ?? "global",
resetMessage,
FormatHeadroom(headroomPct));
}
else if (recovered)
{
_logger.LogInformation(
"GHSA rate limit recovered for {Phase} {Resource}: remaining {Remaining} of {Limit}{Headroom}",
phase,
snapshot.Value.Resource ?? "global",
snapshot.Value.Remaining,
snapshot.Value.Limit,
FormatHeadroom(headroomPct));
}
if (snapshot.Value.Remaining.HasValue && snapshot.Value.Remaining.Value <= 0)
{
_diagnostics.RateLimitExhausted(phase);
var delay = snapshot.Value.RetryAfter ?? snapshot.Value.ResetAfter ?? _options.SecondaryRateLimitBackoff;
if (delay > TimeSpan.Zero)
{
_logger.LogWarning(
"GHSA rate limit exhausted for {Phase} {Resource}; delaying {Delay}{Headroom}",
phase,
snapshot.Value.Resource ?? "global",
delay,
FormatHeadroom(headroomPct));
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
}
return true;
}
return false;
}
}