Files
git.stella-ops.org/src/StellaOps.Concelier.Connector.Nvd/NvdConnector.cs

566 lines
22 KiB
C#

using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Json;
using StellaOps.Concelier.Connector.Common.Cursors;
using StellaOps.Concelier.Connector.Nvd.Configuration;
using StellaOps.Concelier.Connector.Nvd.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.ChangeHistory;
using StellaOps.Plugin;
using Json.Schema;
namespace StellaOps.Concelier.Connector.Nvd;
public sealed class NvdConnector : IFeedConnector
{
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IChangeHistoryStore _changeHistoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IJsonSchemaValidator _schemaValidator;
private readonly NvdOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<NvdConnector> _logger;
private readonly NvdDiagnostics _diagnostics;
private static readonly JsonSchema Schema = NvdSchemaProvider.Schema;
public NvdConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IChangeHistoryStore changeHistoryStore,
ISourceStateRepository stateRepository,
IJsonSchemaValidator schemaValidator,
IOptions<NvdOptions> options,
NvdDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<NvdConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_changeHistoryStore = changeHistoryStore ?? throw new ArgumentNullException(nameof(changeHistoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => NvdConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowOptions = new TimeWindowCursorOptions
{
WindowSize = _options.WindowSize,
Overlap = _options.WindowOverlap,
InitialBackfill = _options.InitialBackfill,
};
var window = TimeWindowCursorPlanner.GetNextWindow(now, cursor.Window, windowOptions);
var requestUri = BuildRequestUri(window);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["windowStart"] = window.Start.ToString("O"),
["windowEnd"] = window.End.ToString("O"),
};
metadata["startIndex"] = "0";
try
{
_diagnostics.FetchAttempt();
var result = await _fetchService.FetchAsync(
new SourceFetchRequest(
NvdOptions.HttpClientName,
SourceName,
requestUri)
{
Metadata = metadata
},
cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.FetchUnchanged();
_logger.LogDebug("NVD window {Start} - {End} returned 304", window.Start, window.End);
await UpdateCursorAsync(cursor.WithWindow(window), cancellationToken).ConfigureAwait(false);
return;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.FetchFailure();
return;
}
_diagnostics.FetchDocument();
var pendingDocuments = new HashSet<Guid>(cursor.PendingDocuments)
{
result.Document.Id
};
var additionalDocuments = await FetchAdditionalPagesAsync(
window,
metadata,
result.Document,
cancellationToken).ConfigureAwait(false);
foreach (var documentId in additionalDocuments)
{
pendingDocuments.Add(documentId);
}
var updated = cursor
.WithWindow(window)
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings);
await UpdateCursorAsync(updated, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "NVD fetch failed for {Uri}", requestUri);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingFetch = cursor.PendingDocuments.ToList();
var pendingMapping = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
_diagnostics.ParseFailure();
remainingFetch.Remove(documentId);
pendingMapping.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Document {DocumentId} is missing GridFS content; skipping", documentId);
_diagnostics.ParseFailure();
remainingFetch.Remove(documentId);
pendingMapping.Remove(documentId);
continue;
}
var rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
try
{
using var jsonDocument = JsonDocument.Parse(rawBytes);
try
{
_schemaValidator.Validate(jsonDocument, Schema, document.Uri);
}
catch (JsonSchemaValidationException ex)
{
_logger.LogWarning(ex, "NVD schema validation failed for document {DocumentId} ({Uri})", document.Id, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingFetch.Remove(documentId);
pendingMapping.Remove(documentId);
_diagnostics.ParseQuarantine();
continue;
}
var sanitized = JsonSerializer.Serialize(jsonDocument.RootElement);
var payload = BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
"nvd.cve.v2",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
_diagnostics.ParseSuccess();
remainingFetch.Remove(documentId);
if (!pendingMapping.Contains(documentId))
{
pendingMapping.Add(documentId);
}
}
catch (JsonException ex)
{
_logger.LogWarning(ex, "Failed to parse NVD JSON payload for document {DocumentId} ({Uri})", document.Id, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingFetch.Remove(documentId);
pendingMapping.Remove(documentId);
_diagnostics.ParseFailure();
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingFetch)
.WithPendingMappings(pendingMapping);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMapping = cursor.PendingMappings.ToList();
var now = _timeProvider.GetUtcNow();
foreach (var documentId in cursor.PendingMappings)
{
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMapping.Remove(documentId);
continue;
}
var json = dto.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
using var jsonDocument = JsonDocument.Parse(json);
var advisories = NvdMapper.Map(jsonDocument, document, now)
.GroupBy(static advisory => advisory.AdvisoryKey, StringComparer.Ordinal)
.Select(static group => group.First())
.ToArray();
var mappedCount = 0L;
foreach (var advisory in advisories)
{
if (string.IsNullOrWhiteSpace(advisory.AdvisoryKey))
{
_logger.LogWarning("Skipping advisory with missing key for document {DocumentId} ({Uri})", document.Id, document.Uri);
continue;
}
var previous = await _advisoryStore.FindAsync(advisory.AdvisoryKey, cancellationToken).ConfigureAwait(false);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
if (previous is not null)
{
await RecordChangeHistoryAsync(advisory, previous, document, now, cancellationToken).ConfigureAwait(false);
}
mappedCount++;
}
if (mappedCount > 0)
{
_diagnostics.MapSuccess(mappedCount);
}
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMapping.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMapping);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyCollection<Guid>> FetchAdditionalPagesAsync(
TimeWindow window,
IReadOnlyDictionary<string, string> baseMetadata,
DocumentRecord firstDocument,
CancellationToken cancellationToken)
{
if (firstDocument.GridFsId is null)
{
return Array.Empty<Guid>();
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(firstDocument.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Unable to download NVD first page {DocumentId} to evaluate pagination", firstDocument.Id);
return Array.Empty<Guid>();
}
try
{
using var jsonDocument = JsonDocument.Parse(rawBytes);
var root = jsonDocument.RootElement;
if (!TryReadInt32(root, "totalResults", out var totalResults) || !TryReadInt32(root, "resultsPerPage", out var resultsPerPage))
{
return Array.Empty<Guid>();
}
if (resultsPerPage <= 0 || totalResults <= resultsPerPage)
{
return Array.Empty<Guid>();
}
var fetchedDocuments = new List<Guid>();
foreach (var startIndex in PaginationPlanner.EnumerateAdditionalPages(totalResults, resultsPerPage))
{
var metadata = new Dictionary<string, string>(StringComparer.Ordinal);
foreach (var kvp in baseMetadata)
{
metadata[kvp.Key] = kvp.Value;
}
metadata["startIndex"] = startIndex.ToString(CultureInfo.InvariantCulture);
var request = new SourceFetchRequest(
NvdOptions.HttpClientName,
SourceName,
BuildRequestUri(window, startIndex))
{
Metadata = metadata
};
SourceFetchResult pageResult;
try
{
_diagnostics.FetchAttempt();
pageResult = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "NVD fetch failed for page starting at {StartIndex}", startIndex);
throw;
}
if (pageResult.IsNotModified)
{
_diagnostics.FetchUnchanged();
continue;
}
if (!pageResult.IsSuccess || pageResult.Document is null)
{
_diagnostics.FetchFailure();
_logger.LogWarning("NVD fetch for page starting at {StartIndex} returned status {Status}", startIndex, pageResult.StatusCode);
continue;
}
_diagnostics.FetchDocument();
fetchedDocuments.Add(pageResult.Document.Id);
}
return fetchedDocuments;
}
catch (JsonException ex)
{
_logger.LogWarning(ex, "Failed to parse NVD first page {DocumentId} while determining pagination", firstDocument.Id);
return Array.Empty<Guid>();
}
}
private static bool TryReadInt32(JsonElement root, string propertyName, out int value)
{
value = 0;
if (!root.TryGetProperty(propertyName, out var property) || property.ValueKind != JsonValueKind.Number)
{
return false;
}
if (property.TryGetInt32(out var intValue))
{
value = intValue;
return true;
}
if (property.TryGetInt64(out var longValue))
{
if (longValue > int.MaxValue)
{
value = int.MaxValue;
return true;
}
value = (int)longValue;
return true;
}
return false;
}
private async Task RecordChangeHistoryAsync(
Advisory current,
Advisory previous,
DocumentRecord document,
DateTimeOffset capturedAt,
CancellationToken cancellationToken)
{
if (current.Equals(previous))
{
return;
}
var currentSnapshot = SnapshotSerializer.ToSnapshot(current);
var previousSnapshot = SnapshotSerializer.ToSnapshot(previous);
if (string.Equals(currentSnapshot, previousSnapshot, StringComparison.Ordinal))
{
return;
}
var changes = ComputeChanges(previousSnapshot, currentSnapshot);
if (changes.Count == 0)
{
return;
}
var documentHash = string.IsNullOrWhiteSpace(document.Sha256)
? ComputeHash(currentSnapshot)
: document.Sha256;
var record = new ChangeHistoryRecord(
Guid.NewGuid(),
SourceName,
current.AdvisoryKey,
document.Id,
documentHash,
ComputeHash(currentSnapshot),
ComputeHash(previousSnapshot),
currentSnapshot,
previousSnapshot,
changes,
capturedAt);
await _changeHistoryStore.AddAsync(record, cancellationToken).ConfigureAwait(false);
}
private static IReadOnlyList<ChangeHistoryFieldChange> ComputeChanges(string previousSnapshot, string currentSnapshot)
{
using var previousDocument = JsonDocument.Parse(previousSnapshot);
using var currentDocument = JsonDocument.Parse(currentSnapshot);
var previousRoot = previousDocument.RootElement;
var currentRoot = currentDocument.RootElement;
var fields = new HashSet<string>(StringComparer.Ordinal);
foreach (var property in previousRoot.EnumerateObject())
{
fields.Add(property.Name);
}
foreach (var property in currentRoot.EnumerateObject())
{
fields.Add(property.Name);
}
var changes = new List<ChangeHistoryFieldChange>();
foreach (var field in fields.OrderBy(static name => name, StringComparer.Ordinal))
{
var hasPrevious = previousRoot.TryGetProperty(field, out var previousValue);
var hasCurrent = currentRoot.TryGetProperty(field, out var currentValue);
if (!hasPrevious && hasCurrent)
{
changes.Add(new ChangeHistoryFieldChange(field, "Added", null, SerializeElement(currentValue)));
continue;
}
if (hasPrevious && !hasCurrent)
{
changes.Add(new ChangeHistoryFieldChange(field, "Removed", SerializeElement(previousValue), null));
continue;
}
if (hasPrevious && hasCurrent && !JsonElement.DeepEquals(previousValue, currentValue))
{
changes.Add(new ChangeHistoryFieldChange(field, "Modified", SerializeElement(previousValue), SerializeElement(currentValue)));
}
}
return changes;
}
private static string SerializeElement(JsonElement element)
=> JsonSerializer.Serialize(element, new JsonSerializerOptions { WriteIndented = false });
private static string ComputeHash(string snapshot)
{
var bytes = Encoding.UTF8.GetBytes(snapshot);
var hash = SHA256.HashData(bytes);
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
private async Task<NvdCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return NvdCursor.FromBsonDocument(record?.Cursor);
}
private async Task UpdateCursorAsync(NvdCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private Uri BuildRequestUri(TimeWindow window, int startIndex = 0)
{
var builder = new UriBuilder(_options.BaseEndpoint);
var parameters = new Dictionary<string, string>
{
["lastModifiedStartDate"] = window.Start.ToString("yyyy-MM-dd'T'HH:mm:ss.fffK"),
["lastModifiedEndDate"] = window.End.ToString("yyyy-MM-dd'T'HH:mm:ss.fffK"),
["resultsPerPage"] = "2000",
};
if (startIndex > 0)
{
parameters["startIndex"] = startIndex.ToString(CultureInfo.InvariantCulture);
}
builder.Query = string.Join("&", parameters.Select(static kvp => $"{System.Net.WebUtility.UrlEncode(kvp.Key)}={System.Net.WebUtility.UrlEncode(kvp.Value)}"));
return builder.Uri;
}
}