using System.Globalization; using System.Security.Cryptography; using System.Text; using System.Text.Json; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using MongoDB.Bson; using StellaOps.Concelier.Models; using StellaOps.Concelier.Connector.Common; using StellaOps.Concelier.Connector.Common.Fetch; using StellaOps.Concelier.Connector.Common.Json; using StellaOps.Concelier.Connector.Common.Cursors; using StellaOps.Concelier.Connector.Nvd.Configuration; using StellaOps.Concelier.Connector.Nvd.Internal; using StellaOps.Concelier.Storage.Mongo; using StellaOps.Concelier.Storage.Mongo.Advisories; using StellaOps.Concelier.Storage.Mongo.Documents; using StellaOps.Concelier.Storage.Mongo.Dtos; using StellaOps.Concelier.Storage.Mongo.ChangeHistory; using StellaOps.Plugin; using Json.Schema; namespace StellaOps.Concelier.Connector.Nvd; public sealed class NvdConnector : IFeedConnector { private readonly SourceFetchService _fetchService; private readonly RawDocumentStorage _rawDocumentStorage; private readonly IDocumentStore _documentStore; private readonly IDtoStore _dtoStore; private readonly IAdvisoryStore _advisoryStore; private readonly IChangeHistoryStore _changeHistoryStore; private readonly ISourceStateRepository _stateRepository; private readonly IJsonSchemaValidator _schemaValidator; private readonly NvdOptions _options; private readonly TimeProvider _timeProvider; private readonly ILogger _logger; private readonly NvdDiagnostics _diagnostics; private static readonly JsonSchema Schema = NvdSchemaProvider.Schema; public NvdConnector( SourceFetchService fetchService, RawDocumentStorage rawDocumentStorage, IDocumentStore documentStore, IDtoStore dtoStore, IAdvisoryStore advisoryStore, IChangeHistoryStore changeHistoryStore, ISourceStateRepository stateRepository, IJsonSchemaValidator schemaValidator, IOptions options, NvdDiagnostics diagnostics, TimeProvider? timeProvider, ILogger logger) { _fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService)); _rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage)); _documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore)); _dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore)); _advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore)); _changeHistoryStore = changeHistoryStore ?? throw new ArgumentNullException(nameof(changeHistoryStore)); _stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository)); _schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator)); _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); _options.Validate(); _diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics)); _timeProvider = timeProvider ?? TimeProvider.System; _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public string SourceName => NvdConnectorPlugin.SourceName; public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken) { var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); var now = _timeProvider.GetUtcNow(); var windowOptions = new TimeWindowCursorOptions { WindowSize = _options.WindowSize, Overlap = _options.WindowOverlap, InitialBackfill = _options.InitialBackfill, }; var window = TimeWindowCursorPlanner.GetNextWindow(now, cursor.Window, windowOptions); var requestUri = BuildRequestUri(window); var metadata = new Dictionary(StringComparer.Ordinal) { ["windowStart"] = window.Start.ToString("O"), ["windowEnd"] = window.End.ToString("O"), }; metadata["startIndex"] = "0"; try { _diagnostics.FetchAttempt(); var result = await _fetchService.FetchAsync( new SourceFetchRequest( NvdOptions.HttpClientName, SourceName, requestUri) { Metadata = metadata }, cancellationToken).ConfigureAwait(false); if (result.IsNotModified) { _diagnostics.FetchUnchanged(); _logger.LogDebug("NVD window {Start} - {End} returned 304", window.Start, window.End); await UpdateCursorAsync(cursor.WithWindow(window), cancellationToken).ConfigureAwait(false); return; } if (!result.IsSuccess || result.Document is null) { _diagnostics.FetchFailure(); return; } _diagnostics.FetchDocument(); var pendingDocuments = new HashSet(cursor.PendingDocuments) { result.Document.Id }; var additionalDocuments = await FetchAdditionalPagesAsync( window, metadata, result.Document, cancellationToken).ConfigureAwait(false); foreach (var documentId in additionalDocuments) { pendingDocuments.Add(documentId); } var updated = cursor .WithWindow(window) .WithPendingDocuments(pendingDocuments) .WithPendingMappings(cursor.PendingMappings); await UpdateCursorAsync(updated, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _diagnostics.FetchFailure(); _logger.LogError(ex, "NVD fetch failed for {Uri}", requestUri); await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false); throw; } } public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken) { var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); if (cursor.PendingDocuments.Count == 0) { return; } var remainingFetch = cursor.PendingDocuments.ToList(); var pendingMapping = cursor.PendingMappings.ToList(); foreach (var documentId in cursor.PendingDocuments) { var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false); if (document is null) { _diagnostics.ParseFailure(); remainingFetch.Remove(documentId); pendingMapping.Remove(documentId); continue; } if (!document.GridFsId.HasValue) { _logger.LogWarning("Document {DocumentId} is missing GridFS content; skipping", documentId); _diagnostics.ParseFailure(); remainingFetch.Remove(documentId); pendingMapping.Remove(documentId); continue; } var rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false); try { using var jsonDocument = JsonDocument.Parse(rawBytes); try { _schemaValidator.Validate(jsonDocument, Schema, document.Uri); } catch (JsonSchemaValidationException ex) { _logger.LogWarning(ex, "NVD schema validation failed for document {DocumentId} ({Uri})", document.Id, document.Uri); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); remainingFetch.Remove(documentId); pendingMapping.Remove(documentId); _diagnostics.ParseQuarantine(); continue; } var sanitized = JsonSerializer.Serialize(jsonDocument.RootElement); var payload = BsonDocument.Parse(sanitized); var dtoRecord = new DtoRecord( Guid.NewGuid(), document.Id, SourceName, "nvd.cve.v2", payload, _timeProvider.GetUtcNow()); await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false); _diagnostics.ParseSuccess(); remainingFetch.Remove(documentId); if (!pendingMapping.Contains(documentId)) { pendingMapping.Add(documentId); } } catch (JsonException ex) { _logger.LogWarning(ex, "Failed to parse NVD JSON payload for document {DocumentId} ({Uri})", document.Id, document.Uri); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); remainingFetch.Remove(documentId); pendingMapping.Remove(documentId); _diagnostics.ParseFailure(); } } var updatedCursor = cursor .WithPendingDocuments(remainingFetch) .WithPendingMappings(pendingMapping); await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken) { var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); if (cursor.PendingMappings.Count == 0) { return; } var pendingMapping = cursor.PendingMappings.ToList(); var now = _timeProvider.GetUtcNow(); foreach (var documentId in cursor.PendingMappings) { var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false); var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false); if (dto is null || document is null) { pendingMapping.Remove(documentId); continue; } var json = dto.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings { OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson, }); using var jsonDocument = JsonDocument.Parse(json); var advisories = NvdMapper.Map(jsonDocument, document, now) .GroupBy(static advisory => advisory.AdvisoryKey, StringComparer.Ordinal) .Select(static group => group.First()) .ToArray(); var mappedCount = 0L; foreach (var advisory in advisories) { if (string.IsNullOrWhiteSpace(advisory.AdvisoryKey)) { _logger.LogWarning("Skipping advisory with missing key for document {DocumentId} ({Uri})", document.Id, document.Uri); continue; } var previous = await _advisoryStore.FindAsync(advisory.AdvisoryKey, cancellationToken).ConfigureAwait(false); await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false); if (previous is not null) { await RecordChangeHistoryAsync(advisory, previous, document, now, cancellationToken).ConfigureAwait(false); } mappedCount++; } if (mappedCount > 0) { _diagnostics.MapSuccess(mappedCount); } await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false); pendingMapping.Remove(documentId); } var updatedCursor = cursor.WithPendingMappings(pendingMapping); await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } private async Task> FetchAdditionalPagesAsync( TimeWindow window, IReadOnlyDictionary baseMetadata, DocumentRecord firstDocument, CancellationToken cancellationToken) { if (firstDocument.GridFsId is null) { return Array.Empty(); } byte[] rawBytes; try { rawBytes = await _rawDocumentStorage.DownloadAsync(firstDocument.GridFsId.Value, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _logger.LogWarning(ex, "Unable to download NVD first page {DocumentId} to evaluate pagination", firstDocument.Id); return Array.Empty(); } try { using var jsonDocument = JsonDocument.Parse(rawBytes); var root = jsonDocument.RootElement; if (!TryReadInt32(root, "totalResults", out var totalResults) || !TryReadInt32(root, "resultsPerPage", out var resultsPerPage)) { return Array.Empty(); } if (resultsPerPage <= 0 || totalResults <= resultsPerPage) { return Array.Empty(); } var fetchedDocuments = new List(); foreach (var startIndex in PaginationPlanner.EnumerateAdditionalPages(totalResults, resultsPerPage)) { var metadata = new Dictionary(StringComparer.Ordinal); foreach (var kvp in baseMetadata) { metadata[kvp.Key] = kvp.Value; } metadata["startIndex"] = startIndex.ToString(CultureInfo.InvariantCulture); var request = new SourceFetchRequest( NvdOptions.HttpClientName, SourceName, BuildRequestUri(window, startIndex)) { Metadata = metadata }; SourceFetchResult pageResult; try { _diagnostics.FetchAttempt(); pageResult = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _diagnostics.FetchFailure(); _logger.LogError(ex, "NVD fetch failed for page starting at {StartIndex}", startIndex); throw; } if (pageResult.IsNotModified) { _diagnostics.FetchUnchanged(); continue; } if (!pageResult.IsSuccess || pageResult.Document is null) { _diagnostics.FetchFailure(); _logger.LogWarning("NVD fetch for page starting at {StartIndex} returned status {Status}", startIndex, pageResult.StatusCode); continue; } _diagnostics.FetchDocument(); fetchedDocuments.Add(pageResult.Document.Id); } return fetchedDocuments; } catch (JsonException ex) { _logger.LogWarning(ex, "Failed to parse NVD first page {DocumentId} while determining pagination", firstDocument.Id); return Array.Empty(); } } private static bool TryReadInt32(JsonElement root, string propertyName, out int value) { value = 0; if (!root.TryGetProperty(propertyName, out var property) || property.ValueKind != JsonValueKind.Number) { return false; } if (property.TryGetInt32(out var intValue)) { value = intValue; return true; } if (property.TryGetInt64(out var longValue)) { if (longValue > int.MaxValue) { value = int.MaxValue; return true; } value = (int)longValue; return true; } return false; } private async Task RecordChangeHistoryAsync( Advisory current, Advisory previous, DocumentRecord document, DateTimeOffset capturedAt, CancellationToken cancellationToken) { if (current.Equals(previous)) { return; } var currentSnapshot = SnapshotSerializer.ToSnapshot(current); var previousSnapshot = SnapshotSerializer.ToSnapshot(previous); if (string.Equals(currentSnapshot, previousSnapshot, StringComparison.Ordinal)) { return; } var changes = ComputeChanges(previousSnapshot, currentSnapshot); if (changes.Count == 0) { return; } var documentHash = string.IsNullOrWhiteSpace(document.Sha256) ? ComputeHash(currentSnapshot) : document.Sha256; var record = new ChangeHistoryRecord( Guid.NewGuid(), SourceName, current.AdvisoryKey, document.Id, documentHash, ComputeHash(currentSnapshot), ComputeHash(previousSnapshot), currentSnapshot, previousSnapshot, changes, capturedAt); await _changeHistoryStore.AddAsync(record, cancellationToken).ConfigureAwait(false); } private static IReadOnlyList ComputeChanges(string previousSnapshot, string currentSnapshot) { using var previousDocument = JsonDocument.Parse(previousSnapshot); using var currentDocument = JsonDocument.Parse(currentSnapshot); var previousRoot = previousDocument.RootElement; var currentRoot = currentDocument.RootElement; var fields = new HashSet(StringComparer.Ordinal); foreach (var property in previousRoot.EnumerateObject()) { fields.Add(property.Name); } foreach (var property in currentRoot.EnumerateObject()) { fields.Add(property.Name); } var changes = new List(); foreach (var field in fields.OrderBy(static name => name, StringComparer.Ordinal)) { var hasPrevious = previousRoot.TryGetProperty(field, out var previousValue); var hasCurrent = currentRoot.TryGetProperty(field, out var currentValue); if (!hasPrevious && hasCurrent) { changes.Add(new ChangeHistoryFieldChange(field, "Added", null, SerializeElement(currentValue))); continue; } if (hasPrevious && !hasCurrent) { changes.Add(new ChangeHistoryFieldChange(field, "Removed", SerializeElement(previousValue), null)); continue; } if (hasPrevious && hasCurrent && !JsonElement.DeepEquals(previousValue, currentValue)) { changes.Add(new ChangeHistoryFieldChange(field, "Modified", SerializeElement(previousValue), SerializeElement(currentValue))); } } return changes; } private static string SerializeElement(JsonElement element) => JsonSerializer.Serialize(element, new JsonSerializerOptions { WriteIndented = false }); private static string ComputeHash(string snapshot) { var bytes = Encoding.UTF8.GetBytes(snapshot); var hash = SHA256.HashData(bytes); return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}"; } private async Task GetCursorAsync(CancellationToken cancellationToken) { var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false); return NvdCursor.FromBsonDocument(record?.Cursor); } private async Task UpdateCursorAsync(NvdCursor cursor, CancellationToken cancellationToken) { var completedAt = _timeProvider.GetUtcNow(); await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false); } private Uri BuildRequestUri(TimeWindow window, int startIndex = 0) { var builder = new UriBuilder(_options.BaseEndpoint); var parameters = new Dictionary { ["lastModifiedStartDate"] = window.Start.ToString("yyyy-MM-dd'T'HH:mm:ss.fffK"), ["lastModifiedEndDate"] = window.End.ToString("yyyy-MM-dd'T'HH:mm:ss.fffK"), ["resultsPerPage"] = "2000", }; if (startIndex > 0) { parameters["startIndex"] = startIndex.ToString(CultureInfo.InvariantCulture); } builder.Query = string.Join("&", parameters.Select(static kvp => $"{System.Net.WebUtility.UrlEncode(kvp.Key)}={System.Net.WebUtility.UrlEncode(kvp.Value)}")); return builder.Uri; } }