feat: Add CVSS receipt management endpoints and related functionality
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled

- Introduced new API endpoints for creating, retrieving, amending, and listing CVSS receipts.
- Updated IPolicyEngineClient interface to include methods for CVSS receipt operations.
- Implemented PolicyEngineClient to handle CVSS receipt requests.
- Enhanced Program.cs to map new CVSS receipt routes with appropriate authorization.
- Added necessary models and contracts for CVSS receipt requests and responses.
- Integrated Postgres document store for managing CVSS receipts and related data.
- Updated database schema with new migrations for source documents and payload storage.
- Refactored existing components to support new CVSS functionality.
This commit is contained in:
StellaOps Bot
2025-12-07 00:43:14 +02:00
parent 0de92144d2
commit 53889d85e7
67 changed files with 17207 additions and 16293 deletions

View File

@@ -245,7 +245,7 @@ public sealed class CertBundConnector : IFeedConnector
continue;
}
if (!document.GridFsId.HasValue)
if (!document.PayloadId.HasValue)
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
@@ -258,7 +258,7 @@ public sealed class CertBundConnector : IFeedConnector
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{

View File

@@ -1,337 +1,337 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.CertFr.Configuration;
using StellaOps.Concelier.Connector.CertFr.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertFr;
public sealed class CertFrConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertFrFeedClient _feedClient;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertFrOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertFrConnector> _logger;
public CertFrConnector(
CertFrFeedClient feedClient,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertFrOptions> options,
TimeProvider? timeProvider,
ILogger<CertFrConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertFrConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowEnd = now;
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
var windowStart = lastPublished - _options.WindowOverlap;
var minStart = now - _options.InitialBackfill;
if (windowStart < minStart)
{
windowStart = minStart;
}
IReadOnlyList<CertFrFeedItem> items;
try
{
items = await _feedClient.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR feed load failed {Start:o}-{End:o}", windowStart, windowEnd);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
await UpdateCursorAsync(cursor.WithLastPublished(windowEnd), cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
foreach (var item in items)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, item.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(CertFrOptions.HttpClientName, SourceName, item.DetailUri)
{
Metadata = CertFrDocumentMetadata.CreateMetadata(item),
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified || !result.IsSuccess || result.Document is null)
{
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR fetch failed for {Uri}", item.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (maxPublished == DateTimeOffset.MinValue)
{
maxPublished = cursor.LastPublished ?? windowEnd;
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastPublished(maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Cert-FR document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
CertFrDocumentMetadata metadata;
try
{
metadata = CertFrDocumentMetadata.FromDocument(document);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR metadata parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
CertFrDto dto;
try
{
var content = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
var html = System.Text.Encoding.UTF8.GetString(content);
dto = CertFrParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR parse failed for advisory {AdvisoryId} ({Uri})", metadata.AdvisoryId, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var validatedAt = _timeProvider.GetUtcNow();
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certfr.detail.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "certfr.detail.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
CertFrDto? dto;
try
{
var json = dtoRecord.Payload.ToJson();
dto = JsonSerializer.Deserialize<CertFrDto>(json, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR DTO deserialization failed for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (dto is null)
{
_logger.LogWarning("Cert-FR DTO payload deserialized as null for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var mappedAt = _timeProvider.GetUtcNow();
var advisory = CertFrMapper.Map(dto, SourceName, mappedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<CertFrCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return CertFrCursor.FromBson(record?.Cursor);
}
private async Task UpdateCursorAsync(CertFrCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.CertFr.Configuration;
using StellaOps.Concelier.Connector.CertFr.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertFr;
public sealed class CertFrConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertFrFeedClient _feedClient;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertFrOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertFrConnector> _logger;
public CertFrConnector(
CertFrFeedClient feedClient,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertFrOptions> options,
TimeProvider? timeProvider,
ILogger<CertFrConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertFrConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowEnd = now;
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
var windowStart = lastPublished - _options.WindowOverlap;
var minStart = now - _options.InitialBackfill;
if (windowStart < minStart)
{
windowStart = minStart;
}
IReadOnlyList<CertFrFeedItem> items;
try
{
items = await _feedClient.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR feed load failed {Start:o}-{End:o}", windowStart, windowEnd);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
await UpdateCursorAsync(cursor.WithLastPublished(windowEnd), cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
foreach (var item in items)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, item.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(CertFrOptions.HttpClientName, SourceName, item.DetailUri)
{
Metadata = CertFrDocumentMetadata.CreateMetadata(item),
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified || !result.IsSuccess || result.Document is null)
{
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR fetch failed for {Uri}", item.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (maxPublished == DateTimeOffset.MinValue)
{
maxPublished = cursor.LastPublished ?? windowEnd;
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastPublished(maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("Cert-FR document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
CertFrDocumentMetadata metadata;
try
{
metadata = CertFrDocumentMetadata.FromDocument(document);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR metadata parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
CertFrDto dto;
try
{
var content = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
var html = System.Text.Encoding.UTF8.GetString(content);
dto = CertFrParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR parse failed for advisory {AdvisoryId} ({Uri})", metadata.AdvisoryId, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var validatedAt = _timeProvider.GetUtcNow();
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certfr.detail.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "certfr.detail.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
CertFrDto? dto;
try
{
var json = dtoRecord.Payload.ToJson();
dto = JsonSerializer.Deserialize<CertFrDto>(json, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR DTO deserialization failed for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (dto is null)
{
_logger.LogWarning("Cert-FR DTO payload deserialized as null for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var mappedAt = _timeProvider.GetUtcNow();
var advisory = CertFrMapper.Map(dto, SourceName, mappedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<CertFrCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return CertFrCursor.FromBson(record?.Cursor);
}
private async Task UpdateCursorAsync(CertFrCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -1,370 +1,370 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.CertIn.Configuration;
using StellaOps.Concelier.Connector.CertIn.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertInClient _client;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertInOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertInConnector> _logger;
public CertInConnector(
CertInClient client,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertInOptions> options,
TimeProvider? timeProvider,
ILogger<CertInConnector> logger)
{
_client = client ?? throw new ArgumentNullException(nameof(client));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertInConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<CertInListingItem> listings;
try
{
listings = await _client.GetListingsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In listings fetch failed for page {Page}", page);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (listings.Count == 0)
{
break;
}
foreach (var listing in listings.OrderByDescending(static item => item.Published))
{
if (listing.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["certin.advisoryId"] = listing.AdvisoryId,
["certin.title"] = listing.Title,
["certin.link"] = listing.DetailUri.ToString(),
["certin.published"] = listing.Published.ToString("O")
};
if (!string.IsNullOrWhiteSpace(listing.Summary))
{
metadata["certin.summary"] = listing.Summary!;
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, listing.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(CertInOptions.HttpClientName, SourceName, listing.DetailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In fetch failed for {Uri}", listing.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(3), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
continue;
}
pendingDocuments.Add(result.Document.Id);
if (listing.Published > maxPublished)
{
maxPublished = listing.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("CERT-In document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (!TryDeserializeListing(document.Metadata, out var listing))
{
_logger.LogWarning("CERT-In metadata missing for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download raw CERT-In document {DocumentId}", document.Id);
throw;
}
var dto = CertInDetailParser.Parse(listing, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certin.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
CertInAdvisoryDto dto;
try
{
dto = JsonSerializer.Deserialize<CertInAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized CERT-In DTO is null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize CERT-In DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var advisory = MapAdvisory(dto, document, dtoRecord);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private Advisory MapAdvisory(CertInAdvisoryDto dto, DocumentRecord document, DtoRecord dtoRecord)
{
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", dto.AdvisoryId, dtoRecord.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
dto.AdvisoryId,
};
foreach (var cve in dto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
dto.Link,
"advisory",
"cert-in",
null,
new AdvisoryProvenance(SourceName, "reference", dto.Link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid CERT-In link {Link} for advisory {AdvisoryId}", dto.Link, dto.AdvisoryId);
}
foreach (var cve in dto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
foreach (var link in dto.ReferenceLinks)
{
try
{
references.Add(new AdvisoryReference(
link,
"reference",
null,
null,
new AdvisoryProvenance(SourceName, "reference", link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.CertIn.Configuration;
using StellaOps.Concelier.Connector.CertIn.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertInClient _client;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertInOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertInConnector> _logger;
public CertInConnector(
CertInClient client,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertInOptions> options,
TimeProvider? timeProvider,
ILogger<CertInConnector> logger)
{
_client = client ?? throw new ArgumentNullException(nameof(client));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertInConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<CertInListingItem> listings;
try
{
listings = await _client.GetListingsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In listings fetch failed for page {Page}", page);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (listings.Count == 0)
{
break;
}
foreach (var listing in listings.OrderByDescending(static item => item.Published))
{
if (listing.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["certin.advisoryId"] = listing.AdvisoryId,
["certin.title"] = listing.Title,
["certin.link"] = listing.DetailUri.ToString(),
["certin.published"] = listing.Published.ToString("O")
};
if (!string.IsNullOrWhiteSpace(listing.Summary))
{
metadata["certin.summary"] = listing.Summary!;
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, listing.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(CertInOptions.HttpClientName, SourceName, listing.DetailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In fetch failed for {Uri}", listing.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(3), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
continue;
}
pendingDocuments.Add(result.Document.Id);
if (listing.Published > maxPublished)
{
maxPublished = listing.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("CERT-In document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (!TryDeserializeListing(document.Metadata, out var listing))
{
_logger.LogWarning("CERT-In metadata missing for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download raw CERT-In document {DocumentId}", document.Id);
throw;
}
var dto = CertInDetailParser.Parse(listing, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certin.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
CertInAdvisoryDto dto;
try
{
dto = JsonSerializer.Deserialize<CertInAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized CERT-In DTO is null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize CERT-In DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var advisory = MapAdvisory(dto, document, dtoRecord);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private Advisory MapAdvisory(CertInAdvisoryDto dto, DocumentRecord document, DtoRecord dtoRecord)
{
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", dto.AdvisoryId, dtoRecord.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
dto.AdvisoryId,
};
foreach (var cve in dto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
dto.Link,
"advisory",
"cert-in",
null,
new AdvisoryProvenance(SourceName, "reference", dto.Link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid CERT-In link {Link} for advisory {AdvisoryId}", dto.Link, dto.AdvisoryId);
}
foreach (var cve in dto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
foreach (var link in dto.ReferenceLinks)
{
try
{
references.Add(new AdvisoryReference(
link,
"reference",
null,
null,
new AdvisoryProvenance(SourceName, "reference", link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
var affectedPackages = dto.VendorNames.Select(vendor =>
{
var provenance = new AdvisoryProvenance(SourceName, "affected", vendor, dtoRecord.ValidatedAt);
@@ -398,65 +398,65 @@ public sealed class CertInConnector : IFeedConnector
provenance: new[] { provenance });
})
.ToArray();
return new Advisory(
dto.AdvisoryId,
dto.Title,
dto.Summary ?? dto.Content,
language: "en",
published: dto.Published,
modified: dto.Published,
severity: dto.Severity,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
}
private async Task<CertInCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? CertInCursor.Empty : CertInCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(CertInCursor cursor, CancellationToken cancellationToken)
{
return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken);
}
private static bool TryDeserializeListing(IReadOnlyDictionary<string, string>? metadata, out CertInListingItem listing)
{
listing = null!;
if (metadata is null)
{
return false;
}
if (!metadata.TryGetValue("certin.advisoryId", out var advisoryId))
{
return false;
}
if (!metadata.TryGetValue("certin.title", out var title))
{
return false;
}
if (!metadata.TryGetValue("certin.link", out var link) || !Uri.TryCreate(link, UriKind.Absolute, out var detailUri))
{
return false;
}
if (!metadata.TryGetValue("certin.published", out var publishedText) || !DateTimeOffset.TryParse(publishedText, out var published))
{
return false;
}
metadata.TryGetValue("certin.summary", out var summary);
listing = new CertInListingItem(advisoryId, title, detailUri, published.ToUniversalTime(), summary);
return true;
}
}
return new Advisory(
dto.AdvisoryId,
dto.Title,
dto.Summary ?? dto.Content,
language: "en",
published: dto.Published,
modified: dto.Published,
severity: dto.Severity,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
}
private async Task<CertInCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? CertInCursor.Empty : CertInCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(CertInCursor cursor, CancellationToken cancellationToken)
{
return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken);
}
private static bool TryDeserializeListing(IReadOnlyDictionary<string, string>? metadata, out CertInListingItem listing)
{
listing = null!;
if (metadata is null)
{
return false;
}
if (!metadata.TryGetValue("certin.advisoryId", out var advisoryId))
{
return false;
}
if (!metadata.TryGetValue("certin.title", out var title))
{
return false;
}
if (!metadata.TryGetValue("certin.link", out var link) || !Uri.TryCreate(link, UriKind.Absolute, out var detailUri))
{
return false;
}
if (!metadata.TryGetValue("certin.published", out var publishedText) || !DateTimeOffset.TryParse(publishedText, out var published))
{
return false;
}
metadata.TryGetValue("certin.summary", out var summary);
listing = new CertInListingItem(advisoryId, title, detailUri, published.ToUniversalTime(), summary);
return true;
}
}

View File

@@ -3,6 +3,7 @@ using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.Common.Xml;
using StellaOps.Concelier.Core.Aoc;
@@ -169,7 +170,7 @@ public static class ServiceCollectionExtensions
services.AddSingleton<Fetch.IJitterSource, Fetch.CryptoJitterSource>();
services.AddConcelierAocGuards();
services.AddConcelierLinksetMappers();
services.AddSingleton<IDocumentStore, InMemoryDocumentStore>();
services.TryAddSingleton<IDocumentStore, InMemoryDocumentStore>();
services.AddSingleton<Fetch.RawDocumentStorage>();
services.AddSingleton<Fetch.SourceFetchService>();

View File

@@ -5,16 +5,16 @@ using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Cryptography;
namespace StellaOps.Concelier.Connector.Common.State;
/// <summary>
/// Persists raw documents and cursor state for connectors that require manual seeding.
/// </summary>
public sealed class SourceStateSeedProcessor
{
private readonly IDocumentStore _documentStore;
private readonly RawDocumentStorage _rawDocumentStorage;
namespace StellaOps.Concelier.Connector.Common.State;
/// <summary>
/// Persists raw documents and cursor state for connectors that require manual seeding.
/// </summary>
public sealed class SourceStateSeedProcessor
{
private readonly IDocumentStore _documentStore;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly ISourceStateRepository _stateRepository;
private readonly TimeProvider _timeProvider;
private readonly ILogger<SourceStateSeedProcessor> _logger;
@@ -35,298 +35,298 @@ public sealed class SourceStateSeedProcessor
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? NullLogger<SourceStateSeedProcessor>.Instance;
}
public async Task<SourceStateSeedResult> ProcessAsync(SourceStateSeedSpecification specification, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(specification);
ArgumentException.ThrowIfNullOrEmpty(specification.Source);
var completedAt = specification.CompletedAt ?? _timeProvider.GetUtcNow();
var documentIds = new List<Guid>();
var pendingDocumentIds = new HashSet<Guid>();
var pendingMappingIds = new HashSet<Guid>();
var knownAdvisories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
AppendRange(knownAdvisories, specification.KnownAdvisories);
if (specification.Cursor is { } cursorSeed)
{
AppendRange(pendingDocumentIds, cursorSeed.PendingDocuments);
AppendRange(pendingMappingIds, cursorSeed.PendingMappings);
AppendRange(knownAdvisories, cursorSeed.KnownAdvisories);
}
foreach (var document in specification.Documents ?? Array.Empty<SourceStateSeedDocument>())
{
cancellationToken.ThrowIfCancellationRequested();
await ProcessDocumentAsync(specification.Source, document, completedAt, documentIds, pendingDocumentIds, pendingMappingIds, knownAdvisories, cancellationToken).ConfigureAwait(false);
}
var state = await _stateRepository.TryGetAsync(specification.Source, cancellationToken).ConfigureAwait(false);
var cursor = state?.Cursor ?? new BsonDocument();
var newlyPendingDocuments = MergeGuidArray(cursor, "pendingDocuments", pendingDocumentIds);
var newlyPendingMappings = MergeGuidArray(cursor, "pendingMappings", pendingMappingIds);
var newlyKnownAdvisories = MergeStringArray(cursor, "knownAdvisories", knownAdvisories);
if (specification.Cursor is { } cursorSpec)
{
if (cursorSpec.LastModifiedCursor.HasValue)
{
cursor["lastModifiedCursor"] = cursorSpec.LastModifiedCursor.Value.UtcDateTime;
}
if (cursorSpec.LastFetchAt.HasValue)
{
cursor["lastFetchAt"] = cursorSpec.LastFetchAt.Value.UtcDateTime;
}
if (cursorSpec.Additional is not null)
{
foreach (var kvp in cursorSpec.Additional)
{
cursor[kvp.Key] = kvp.Value;
}
}
}
cursor["lastSeededAt"] = completedAt.UtcDateTime;
await _stateRepository.UpdateCursorAsync(specification.Source, cursor, completedAt, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Seeded {Documents} document(s) for {Source}. pendingDocuments+= {PendingDocuments}, pendingMappings+= {PendingMappings}, knownAdvisories+= {KnownAdvisories}",
documentIds.Count,
specification.Source,
newlyPendingDocuments.Count,
newlyPendingMappings.Count,
newlyKnownAdvisories.Count);
return new SourceStateSeedResult(
DocumentsProcessed: documentIds.Count,
PendingDocumentsAdded: newlyPendingDocuments.Count,
PendingMappingsAdded: newlyPendingMappings.Count,
DocumentIds: documentIds.AsReadOnly(),
PendingDocumentIds: newlyPendingDocuments,
PendingMappingIds: newlyPendingMappings,
KnownAdvisoriesAdded: newlyKnownAdvisories,
CompletedAt: completedAt);
}
private async Task ProcessDocumentAsync(
string source,
SourceStateSeedDocument document,
DateTimeOffset completedAt,
List<Guid> documentIds,
HashSet<Guid> pendingDocumentIds,
HashSet<Guid> pendingMappingIds,
HashSet<string> knownAdvisories,
CancellationToken cancellationToken)
{
if (document is null)
{
throw new ArgumentNullException(nameof(document));
}
ArgumentException.ThrowIfNullOrEmpty(document.Uri);
if (document.Content is not { Length: > 0 })
{
throw new InvalidOperationException($"Seed entry for '{document.Uri}' is missing content bytes.");
}
var payload = new byte[document.Content.Length];
Buffer.BlockCopy(document.Content, 0, payload, 0, document.Content.Length);
if (!document.Uri.Contains("://", StringComparison.Ordinal))
{
_logger.LogWarning("Seed document URI '{Uri}' does not appear to be absolute.", document.Uri);
}
public async Task<SourceStateSeedResult> ProcessAsync(SourceStateSeedSpecification specification, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(specification);
ArgumentException.ThrowIfNullOrEmpty(specification.Source);
var completedAt = specification.CompletedAt ?? _timeProvider.GetUtcNow();
var documentIds = new List<Guid>();
var pendingDocumentIds = new HashSet<Guid>();
var pendingMappingIds = new HashSet<Guid>();
var knownAdvisories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
AppendRange(knownAdvisories, specification.KnownAdvisories);
if (specification.Cursor is { } cursorSeed)
{
AppendRange(pendingDocumentIds, cursorSeed.PendingDocuments);
AppendRange(pendingMappingIds, cursorSeed.PendingMappings);
AppendRange(knownAdvisories, cursorSeed.KnownAdvisories);
}
foreach (var document in specification.Documents ?? Array.Empty<SourceStateSeedDocument>())
{
cancellationToken.ThrowIfCancellationRequested();
await ProcessDocumentAsync(specification.Source, document, completedAt, documentIds, pendingDocumentIds, pendingMappingIds, knownAdvisories, cancellationToken).ConfigureAwait(false);
}
var state = await _stateRepository.TryGetAsync(specification.Source, cancellationToken).ConfigureAwait(false);
var cursor = state?.Cursor ?? new BsonDocument();
var newlyPendingDocuments = MergeGuidArray(cursor, "pendingDocuments", pendingDocumentIds);
var newlyPendingMappings = MergeGuidArray(cursor, "pendingMappings", pendingMappingIds);
var newlyKnownAdvisories = MergeStringArray(cursor, "knownAdvisories", knownAdvisories);
if (specification.Cursor is { } cursorSpec)
{
if (cursorSpec.LastModifiedCursor.HasValue)
{
cursor["lastModifiedCursor"] = cursorSpec.LastModifiedCursor.Value.UtcDateTime;
}
if (cursorSpec.LastFetchAt.HasValue)
{
cursor["lastFetchAt"] = cursorSpec.LastFetchAt.Value.UtcDateTime;
}
if (cursorSpec.Additional is not null)
{
foreach (var kvp in cursorSpec.Additional)
{
cursor[kvp.Key] = kvp.Value;
}
}
}
cursor["lastSeededAt"] = completedAt.UtcDateTime;
await _stateRepository.UpdateCursorAsync(specification.Source, cursor, completedAt, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Seeded {Documents} document(s) for {Source}. pendingDocuments+= {PendingDocuments}, pendingMappings+= {PendingMappings}, knownAdvisories+= {KnownAdvisories}",
documentIds.Count,
specification.Source,
newlyPendingDocuments.Count,
newlyPendingMappings.Count,
newlyKnownAdvisories.Count);
return new SourceStateSeedResult(
DocumentsProcessed: documentIds.Count,
PendingDocumentsAdded: newlyPendingDocuments.Count,
PendingMappingsAdded: newlyPendingMappings.Count,
DocumentIds: documentIds.AsReadOnly(),
PendingDocumentIds: newlyPendingDocuments,
PendingMappingIds: newlyPendingMappings,
KnownAdvisoriesAdded: newlyKnownAdvisories,
CompletedAt: completedAt);
}
private async Task ProcessDocumentAsync(
string source,
SourceStateSeedDocument document,
DateTimeOffset completedAt,
List<Guid> documentIds,
HashSet<Guid> pendingDocumentIds,
HashSet<Guid> pendingMappingIds,
HashSet<string> knownAdvisories,
CancellationToken cancellationToken)
{
if (document is null)
{
throw new ArgumentNullException(nameof(document));
}
ArgumentException.ThrowIfNullOrEmpty(document.Uri);
if (document.Content is not { Length: > 0 })
{
throw new InvalidOperationException($"Seed entry for '{document.Uri}' is missing content bytes.");
}
var payload = new byte[document.Content.Length];
Buffer.BlockCopy(document.Content, 0, payload, 0, document.Content.Length);
if (!document.Uri.Contains("://", StringComparison.Ordinal))
{
_logger.LogWarning("Seed document URI '{Uri}' does not appear to be absolute.", document.Uri);
}
var contentHash = _hash.ComputeHashHex(payload, HashAlgorithms.Sha256);
var existing = await _documentStore.FindBySourceAndUriAsync(source, document.Uri, cancellationToken).ConfigureAwait(false);
if (existing?.GridFsId is { } oldGridId)
{
await _rawDocumentStorage.DeleteAsync(oldGridId, cancellationToken).ConfigureAwait(false);
}
var gridId = await _rawDocumentStorage.UploadAsync(
source,
document.Uri,
payload,
document.ContentType,
document.ExpiresAt,
cancellationToken)
.ConfigureAwait(false);
var headers = CloneDictionary(document.Headers);
if (!string.IsNullOrWhiteSpace(document.ContentType))
{
headers ??= new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
if (!headers.ContainsKey("content-type"))
{
headers["content-type"] = document.ContentType!;
}
}
var metadata = CloneDictionary(document.Metadata);
var existing = await _documentStore.FindBySourceAndUriAsync(source, document.Uri, cancellationToken).ConfigureAwait(false);
if (existing?.PayloadId is { } oldGridId)
{
await _rawDocumentStorage.DeleteAsync(oldGridId, cancellationToken).ConfigureAwait(false);
}
var gridId = await _rawDocumentStorage.UploadAsync(
source,
document.Uri,
payload,
document.ContentType,
document.ExpiresAt,
cancellationToken)
.ConfigureAwait(false);
var headers = CloneDictionary(document.Headers);
if (!string.IsNullOrWhiteSpace(document.ContentType))
{
headers ??= new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
if (!headers.ContainsKey("content-type"))
{
headers["content-type"] = document.ContentType!;
}
}
var metadata = CloneDictionary(document.Metadata);
var record = new DocumentRecord(
document.DocumentId ?? existing?.Id ?? Guid.NewGuid(),
source,
document.Uri,
document.FetchedAt ?? completedAt,
contentHash,
string.IsNullOrWhiteSpace(document.Status) ? DocumentStatuses.PendingParse : document.Status,
document.ContentType,
headers,
metadata,
document.Etag,
document.LastModified,
gridId,
string.IsNullOrWhiteSpace(document.Status) ? DocumentStatuses.PendingParse : document.Status,
document.ContentType,
headers,
metadata,
document.Etag,
document.LastModified,
gridId,
document.ExpiresAt);
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
documentIds.Add(upserted.Id);
if (document.AddToPendingDocuments)
{
pendingDocumentIds.Add(upserted.Id);
}
if (document.AddToPendingMappings)
{
pendingMappingIds.Add(upserted.Id);
}
AppendRange(knownAdvisories, document.KnownIdentifiers);
}
private static Dictionary<string, string>? CloneDictionary(IReadOnlyDictionary<string, string>? values)
{
if (values is null || values.Count == 0)
{
return null;
}
return new Dictionary<string, string>(values, StringComparer.OrdinalIgnoreCase);
}
private static IReadOnlyCollection<Guid> MergeGuidArray(BsonDocument cursor, string field, IReadOnlyCollection<Guid> additions)
{
if (additions.Count == 0)
{
return Array.Empty<Guid>();
}
var existing = cursor.TryGetValue(field, out var value) && value is BsonArray existingArray
? existingArray.Select(AsGuid).Where(static g => g != Guid.Empty).ToHashSet()
: new HashSet<Guid>();
var newlyAdded = new List<Guid>();
foreach (var guid in additions)
{
if (guid == Guid.Empty)
{
continue;
}
if (existing.Add(guid))
{
newlyAdded.Add(guid);
}
}
if (existing.Count > 0)
{
cursor[field] = new BsonArray(existing
.Select(static g => g.ToString("D"))
.OrderBy(static s => s, StringComparer.OrdinalIgnoreCase));
}
return newlyAdded.AsReadOnly();
}
private static IReadOnlyCollection<string> MergeStringArray(BsonDocument cursor, string field, IReadOnlyCollection<string> additions)
{
if (additions.Count == 0)
{
return Array.Empty<string>();
}
var existing = cursor.TryGetValue(field, out var value) && value is BsonArray existingArray
? existingArray.Select(static v => v?.AsString ?? string.Empty)
.Where(static s => !string.IsNullOrWhiteSpace(s))
.ToHashSet(StringComparer.OrdinalIgnoreCase)
: new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var newlyAdded = new List<string>();
foreach (var entry in additions)
{
if (string.IsNullOrWhiteSpace(entry))
{
continue;
}
var normalized = entry.Trim();
if (existing.Add(normalized))
{
newlyAdded.Add(normalized);
}
}
if (existing.Count > 0)
{
cursor[field] = new BsonArray(existing
.OrderBy(static s => s, StringComparer.OrdinalIgnoreCase));
}
return newlyAdded.AsReadOnly();
}
private static Guid AsGuid(BsonValue value)
{
if (value is null)
{
return Guid.Empty;
}
return Guid.TryParse(value.ToString(), out var parsed) ? parsed : Guid.Empty;
}
private static void AppendRange(HashSet<Guid> target, IReadOnlyCollection<Guid>? values)
{
if (values is null)
{
return;
}
foreach (var guid in values)
{
if (guid != Guid.Empty)
{
target.Add(guid);
}
}
}
private static void AppendRange(HashSet<string> target, IReadOnlyCollection<string>? values)
{
if (values is null)
{
return;
}
foreach (var value in values)
{
if (string.IsNullOrWhiteSpace(value))
{
continue;
}
target.Add(value.Trim());
}
}
}
documentIds.Add(upserted.Id);
if (document.AddToPendingDocuments)
{
pendingDocumentIds.Add(upserted.Id);
}
if (document.AddToPendingMappings)
{
pendingMappingIds.Add(upserted.Id);
}
AppendRange(knownAdvisories, document.KnownIdentifiers);
}
private static Dictionary<string, string>? CloneDictionary(IReadOnlyDictionary<string, string>? values)
{
if (values is null || values.Count == 0)
{
return null;
}
return new Dictionary<string, string>(values, StringComparer.OrdinalIgnoreCase);
}
private static IReadOnlyCollection<Guid> MergeGuidArray(BsonDocument cursor, string field, IReadOnlyCollection<Guid> additions)
{
if (additions.Count == 0)
{
return Array.Empty<Guid>();
}
var existing = cursor.TryGetValue(field, out var value) && value is BsonArray existingArray
? existingArray.Select(AsGuid).Where(static g => g != Guid.Empty).ToHashSet()
: new HashSet<Guid>();
var newlyAdded = new List<Guid>();
foreach (var guid in additions)
{
if (guid == Guid.Empty)
{
continue;
}
if (existing.Add(guid))
{
newlyAdded.Add(guid);
}
}
if (existing.Count > 0)
{
cursor[field] = new BsonArray(existing
.Select(static g => g.ToString("D"))
.OrderBy(static s => s, StringComparer.OrdinalIgnoreCase));
}
return newlyAdded.AsReadOnly();
}
private static IReadOnlyCollection<string> MergeStringArray(BsonDocument cursor, string field, IReadOnlyCollection<string> additions)
{
if (additions.Count == 0)
{
return Array.Empty<string>();
}
var existing = cursor.TryGetValue(field, out var value) && value is BsonArray existingArray
? existingArray.Select(static v => v?.AsString ?? string.Empty)
.Where(static s => !string.IsNullOrWhiteSpace(s))
.ToHashSet(StringComparer.OrdinalIgnoreCase)
: new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var newlyAdded = new List<string>();
foreach (var entry in additions)
{
if (string.IsNullOrWhiteSpace(entry))
{
continue;
}
var normalized = entry.Trim();
if (existing.Add(normalized))
{
newlyAdded.Add(normalized);
}
}
if (existing.Count > 0)
{
cursor[field] = new BsonArray(existing
.OrderBy(static s => s, StringComparer.OrdinalIgnoreCase));
}
return newlyAdded.AsReadOnly();
}
private static Guid AsGuid(BsonValue value)
{
if (value is null)
{
return Guid.Empty;
}
return Guid.TryParse(value.ToString(), out var parsed) ? parsed : Guid.Empty;
}
private static void AppendRange(HashSet<Guid> target, IReadOnlyCollection<Guid>? values)
{
if (values is null)
{
return;
}
foreach (var guid in values)
{
if (guid != Guid.Empty)
{
target.Add(guid);
}
}
}
private static void AppendRange(HashSet<string> target, IReadOnlyCollection<string>? values)
{
if (values is null)
{
return;
}
foreach (var value in values)
{
if (string.IsNullOrWhiteSpace(value))
{
continue;
}
target.Add(value.Trim());
}
}
}

View File

@@ -1,434 +1,434 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Distro.RedHat.Configuration;
using StellaOps.Concelier.Connector.Distro.RedHat.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Distro.RedHat;
public sealed class RedHatConnector : IFeedConnector
{
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly ILogger<RedHatConnector> _logger;
private readonly RedHatOptions _options;
private readonly TimeProvider _timeProvider;
public RedHatConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<RedHatOptions> options,
TimeProvider? timeProvider,
ILogger<RedHatConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => RedHatConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var baseline = cursor.LastReleasedOn ?? now - _options.InitialBackfill;
var overlap = _options.Overlap > TimeSpan.Zero ? _options.Overlap : TimeSpan.Zero;
var afterThreshold = baseline - overlap;
if (afterThreshold < DateTimeOffset.UnixEpoch)
{
afterThreshold = DateTimeOffset.UnixEpoch;
}
ProvenanceDiagnostics.ReportResumeWindow(SourceName, afterThreshold, _logger);
var processedSet = new HashSet<string>(cursor.ProcessedAdvisoryIds, StringComparer.OrdinalIgnoreCase);
var newSummaries = new List<RedHatSummaryItem>();
var stopDueToOlderData = false;
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
var summaryUri = BuildSummaryUri(afterThreshold, page);
var summaryKey = summaryUri.ToString();
touchedResources.Add(summaryKey);
var cachedSummary = cursor.TryGetFetchCache(summaryKey);
var summaryMetadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["page"] = page.ToString(CultureInfo.InvariantCulture),
["type"] = "summary"
};
var summaryRequest = new SourceFetchRequest(RedHatOptions.HttpClientName, SourceName, summaryUri)
{
Metadata = summaryMetadata,
ETag = cachedSummary?.ETag,
LastModified = cachedSummary?.LastModified,
TimeoutOverride = _options.FetchTimeout,
};
SourceFetchContentResult summaryResult;
try
{
summaryResult = await _fetchService.FetchContentAsync(summaryRequest, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Red Hat Hydra summary fetch failed for {Uri}", summaryUri);
throw;
}
if (summaryResult.IsNotModified)
{
if (page == 1)
{
break;
}
continue;
}
if (!summaryResult.IsSuccess || summaryResult.Content is null)
{
continue;
}
cursor = cursor.WithFetchCache(summaryKey, summaryResult.ETag, summaryResult.LastModified);
using var document = JsonDocument.Parse(summaryResult.Content);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning(
"Red Hat Hydra summary response had unexpected payload kind {Kind} for {Uri}",
document.RootElement.ValueKind,
summaryUri);
break;
}
var pageCount = 0;
foreach (var element in document.RootElement.EnumerateArray())
{
if (!RedHatSummaryItem.TryParse(element, out var summary))
{
continue;
}
pageCount++;
if (cursor.LastReleasedOn.HasValue)
{
if (summary.ReleasedOn < cursor.LastReleasedOn.Value - overlap)
{
stopDueToOlderData = true;
break;
}
if (summary.ReleasedOn < cursor.LastReleasedOn.Value)
{
stopDueToOlderData = true;
break;
}
if (summary.ReleasedOn == cursor.LastReleasedOn.Value && processedSet.Contains(summary.AdvisoryId))
{
continue;
}
}
newSummaries.Add(summary);
processedSet.Add(summary.AdvisoryId);
if (newSummaries.Count >= _options.MaxAdvisoriesPerFetch)
{
break;
}
}
if (newSummaries.Count >= _options.MaxAdvisoriesPerFetch || stopDueToOlderData)
{
break;
}
if (pageCount < _options.PageSize)
{
break;
}
}
if (newSummaries.Count == 0)
{
return;
}
newSummaries.Sort(static (left, right) =>
{
var compare = left.ReleasedOn.CompareTo(right.ReleasedOn);
return compare != 0
? compare
: string.CompareOrdinal(left.AdvisoryId, right.AdvisoryId);
});
var pendingDocuments = new HashSet<Guid>(cursor.PendingDocuments);
foreach (var summary in newSummaries)
{
var resourceUri = summary.ResourceUri;
var resourceKey = resourceUri.ToString();
touchedResources.Add(resourceKey);
var cached = cursor.TryGetFetchCache(resourceKey);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["advisoryId"] = summary.AdvisoryId,
["releasedOn"] = summary.ReleasedOn.ToString("O", CultureInfo.InvariantCulture)
};
var request = new SourceFetchRequest(RedHatOptions.HttpClientName, SourceName, resourceUri)
{
Metadata = metadata,
ETag = cached?.ETag,
LastModified = cached?.LastModified,
TimeoutOverride = _options.FetchTimeout,
};
try
{
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
continue;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
pendingDocuments.Add(result.Document.Id);
cursor = cursor.WithFetchCache(resourceKey, result.Document.Etag, result.Document.LastModified);
}
catch (Exception ex)
{
_logger.LogError(ex, "Red Hat Hydra advisory fetch failed for {Uri}", resourceUri);
throw;
}
}
var maxRelease = newSummaries.Max(static item => item.ReleasedOn);
var idsForMaxRelease = newSummaries
.Where(item => item.ReleasedOn == maxRelease)
.Select(item => item.AdvisoryId)
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray();
RedHatCursor updated;
if (cursor.LastReleasedOn.HasValue && maxRelease == cursor.LastReleasedOn.Value)
{
updated = cursor
.WithPendingDocuments(pendingDocuments)
.AddProcessedAdvisories(idsForMaxRelease)
.PruneFetchCache(touchedResources);
}
else
{
updated = cursor
.WithPendingDocuments(pendingDocuments)
.WithLastReleased(maxRelease, idsForMaxRelease)
.PruneFetchCache(touchedResources);
}
await UpdateCursorAsync(updated, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingFetch = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
DocumentRecord? document = null;
try
{
document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingFetch.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Red Hat document {DocumentId} missing GridFS content; skipping", document.Id);
remainingFetch.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
using var jsonDocument = JsonDocument.Parse(rawBytes);
var sanitized = JsonSerializer.Serialize(jsonDocument.RootElement);
var payload = BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
"redhat.csaf.v2",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingFetch.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
catch (Exception ex)
{
var uri = document?.Uri ?? documentId.ToString();
_logger.LogError(ex, "Red Hat CSAF parse failed for {Uri}", uri);
remainingFetch.Remove(documentId);
pendingMappings.Remove(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingFetch)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
try
{
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dto.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
using var jsonDocument = JsonDocument.Parse(json);
var advisory = RedHatMapper.Map(SourceName, dto, document, jsonDocument);
if (advisory is null)
{
pendingMappings.Remove(documentId);
continue;
}
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(documentId, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
catch (Exception ex)
{
_logger.LogError(ex, "Red Hat map failed for document {DocumentId}", documentId);
}
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<RedHatCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return RedHatCursor.FromBsonDocument(record?.Cursor);
}
private async Task UpdateCursorAsync(RedHatCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private Uri BuildSummaryUri(DateTimeOffset after, int page)
{
var builder = new UriBuilder(_options.BaseEndpoint);
var basePath = builder.Path?.TrimEnd('/') ?? string.Empty;
var summaryPath = _options.SummaryPath.TrimStart('/');
builder.Path = string.IsNullOrEmpty(basePath)
? $"/{summaryPath}"
: $"{basePath}/{summaryPath}";
var parameters = new Dictionary<string, string>(StringComparer.Ordinal)
{
["after"] = after.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture),
["per_page"] = _options.PageSize.ToString(CultureInfo.InvariantCulture),
["page"] = page.ToString(CultureInfo.InvariantCulture)
};
builder.Query = string.Join('&', parameters.Select(static kvp =>
$"{Uri.EscapeDataString(kvp.Key)}={Uri.EscapeDataString(kvp.Value)}"));
return builder.Uri;
}
}
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Distro.RedHat.Configuration;
using StellaOps.Concelier.Connector.Distro.RedHat.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Distro.RedHat;
public sealed class RedHatConnector : IFeedConnector
{
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly ILogger<RedHatConnector> _logger;
private readonly RedHatOptions _options;
private readonly TimeProvider _timeProvider;
public RedHatConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<RedHatOptions> options,
TimeProvider? timeProvider,
ILogger<RedHatConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => RedHatConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var baseline = cursor.LastReleasedOn ?? now - _options.InitialBackfill;
var overlap = _options.Overlap > TimeSpan.Zero ? _options.Overlap : TimeSpan.Zero;
var afterThreshold = baseline - overlap;
if (afterThreshold < DateTimeOffset.UnixEpoch)
{
afterThreshold = DateTimeOffset.UnixEpoch;
}
ProvenanceDiagnostics.ReportResumeWindow(SourceName, afterThreshold, _logger);
var processedSet = new HashSet<string>(cursor.ProcessedAdvisoryIds, StringComparer.OrdinalIgnoreCase);
var newSummaries = new List<RedHatSummaryItem>();
var stopDueToOlderData = false;
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
var summaryUri = BuildSummaryUri(afterThreshold, page);
var summaryKey = summaryUri.ToString();
touchedResources.Add(summaryKey);
var cachedSummary = cursor.TryGetFetchCache(summaryKey);
var summaryMetadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["page"] = page.ToString(CultureInfo.InvariantCulture),
["type"] = "summary"
};
var summaryRequest = new SourceFetchRequest(RedHatOptions.HttpClientName, SourceName, summaryUri)
{
Metadata = summaryMetadata,
ETag = cachedSummary?.ETag,
LastModified = cachedSummary?.LastModified,
TimeoutOverride = _options.FetchTimeout,
};
SourceFetchContentResult summaryResult;
try
{
summaryResult = await _fetchService.FetchContentAsync(summaryRequest, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Red Hat Hydra summary fetch failed for {Uri}", summaryUri);
throw;
}
if (summaryResult.IsNotModified)
{
if (page == 1)
{
break;
}
continue;
}
if (!summaryResult.IsSuccess || summaryResult.Content is null)
{
continue;
}
cursor = cursor.WithFetchCache(summaryKey, summaryResult.ETag, summaryResult.LastModified);
using var document = JsonDocument.Parse(summaryResult.Content);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning(
"Red Hat Hydra summary response had unexpected payload kind {Kind} for {Uri}",
document.RootElement.ValueKind,
summaryUri);
break;
}
var pageCount = 0;
foreach (var element in document.RootElement.EnumerateArray())
{
if (!RedHatSummaryItem.TryParse(element, out var summary))
{
continue;
}
pageCount++;
if (cursor.LastReleasedOn.HasValue)
{
if (summary.ReleasedOn < cursor.LastReleasedOn.Value - overlap)
{
stopDueToOlderData = true;
break;
}
if (summary.ReleasedOn < cursor.LastReleasedOn.Value)
{
stopDueToOlderData = true;
break;
}
if (summary.ReleasedOn == cursor.LastReleasedOn.Value && processedSet.Contains(summary.AdvisoryId))
{
continue;
}
}
newSummaries.Add(summary);
processedSet.Add(summary.AdvisoryId);
if (newSummaries.Count >= _options.MaxAdvisoriesPerFetch)
{
break;
}
}
if (newSummaries.Count >= _options.MaxAdvisoriesPerFetch || stopDueToOlderData)
{
break;
}
if (pageCount < _options.PageSize)
{
break;
}
}
if (newSummaries.Count == 0)
{
return;
}
newSummaries.Sort(static (left, right) =>
{
var compare = left.ReleasedOn.CompareTo(right.ReleasedOn);
return compare != 0
? compare
: string.CompareOrdinal(left.AdvisoryId, right.AdvisoryId);
});
var pendingDocuments = new HashSet<Guid>(cursor.PendingDocuments);
foreach (var summary in newSummaries)
{
var resourceUri = summary.ResourceUri;
var resourceKey = resourceUri.ToString();
touchedResources.Add(resourceKey);
var cached = cursor.TryGetFetchCache(resourceKey);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["advisoryId"] = summary.AdvisoryId,
["releasedOn"] = summary.ReleasedOn.ToString("O", CultureInfo.InvariantCulture)
};
var request = new SourceFetchRequest(RedHatOptions.HttpClientName, SourceName, resourceUri)
{
Metadata = metadata,
ETag = cached?.ETag,
LastModified = cached?.LastModified,
TimeoutOverride = _options.FetchTimeout,
};
try
{
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
continue;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
pendingDocuments.Add(result.Document.Id);
cursor = cursor.WithFetchCache(resourceKey, result.Document.Etag, result.Document.LastModified);
}
catch (Exception ex)
{
_logger.LogError(ex, "Red Hat Hydra advisory fetch failed for {Uri}", resourceUri);
throw;
}
}
var maxRelease = newSummaries.Max(static item => item.ReleasedOn);
var idsForMaxRelease = newSummaries
.Where(item => item.ReleasedOn == maxRelease)
.Select(item => item.AdvisoryId)
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray();
RedHatCursor updated;
if (cursor.LastReleasedOn.HasValue && maxRelease == cursor.LastReleasedOn.Value)
{
updated = cursor
.WithPendingDocuments(pendingDocuments)
.AddProcessedAdvisories(idsForMaxRelease)
.PruneFetchCache(touchedResources);
}
else
{
updated = cursor
.WithPendingDocuments(pendingDocuments)
.WithLastReleased(maxRelease, idsForMaxRelease)
.PruneFetchCache(touchedResources);
}
await UpdateCursorAsync(updated, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingFetch = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
DocumentRecord? document = null;
try
{
document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingFetch.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("Red Hat document {DocumentId} missing GridFS content; skipping", document.Id);
remainingFetch.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var rawBytes = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
using var jsonDocument = JsonDocument.Parse(rawBytes);
var sanitized = JsonSerializer.Serialize(jsonDocument.RootElement);
var payload = BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
"redhat.csaf.v2",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingFetch.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
catch (Exception ex)
{
var uri = document?.Uri ?? documentId.ToString();
_logger.LogError(ex, "Red Hat CSAF parse failed for {Uri}", uri);
remainingFetch.Remove(documentId);
pendingMappings.Remove(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingFetch)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
try
{
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dto.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
using var jsonDocument = JsonDocument.Parse(json);
var advisory = RedHatMapper.Map(SourceName, dto, document, jsonDocument);
if (advisory is null)
{
pendingMappings.Remove(documentId);
continue;
}
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(documentId, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
catch (Exception ex)
{
_logger.LogError(ex, "Red Hat map failed for document {DocumentId}", documentId);
}
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<RedHatCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return RedHatCursor.FromBsonDocument(record?.Cursor);
}
private async Task UpdateCursorAsync(RedHatCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private Uri BuildSummaryUri(DateTimeOffset after, int page)
{
var builder = new UriBuilder(_options.BaseEndpoint);
var basePath = builder.Path?.TrimEnd('/') ?? string.Empty;
var summaryPath = _options.SummaryPath.TrimStart('/');
builder.Path = string.IsNullOrEmpty(basePath)
? $"/{summaryPath}"
: $"{basePath}/{summaryPath}";
var parameters = new Dictionary<string, string>(StringComparer.Ordinal)
{
["after"] = after.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture),
["per_page"] = _options.PageSize.ToString(CultureInfo.InvariantCulture),
["page"] = page.ToString(CultureInfo.InvariantCulture)
};
builder.Query = string.Join('&', parameters.Select(static kvp =>
$"{Uri.EscapeDataString(kvp.Key)}={Uri.EscapeDataString(kvp.Value)}"));
return builder.Uri;
}
}

View File

@@ -1,384 +1,384 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Configuration;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky;
public sealed class KasperskyConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly KasperskyFeedClient _feedClient;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly KasperskyOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<KasperskyConnector> _logger;
public KasperskyConnector(
KasperskyFeedClient feedClient,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<KasperskyOptions> options,
TimeProvider? timeProvider,
ILogger<KasperskyConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => KasperskyConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
var cursorState = cursor;
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<KasperskyFeedItem> items;
try
{
items = await _feedClient.GetItemsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to load Kaspersky ICS feed page {Page}", page);
await _stateRepository.MarkFailureAsync(
SourceName,
now,
TimeSpan.FromMinutes(5),
ex.Message,
cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
break;
}
foreach (var item in items)
{
if (item.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["kaspersky.title"] = item.Title,
["kaspersky.link"] = item.Link.ToString(),
["kaspersky.published"] = item.Published.ToString("O"),
};
if (!string.IsNullOrWhiteSpace(item.Summary))
{
metadata["kaspersky.summary"] = item.Summary!;
}
var slug = ExtractSlug(item.Link);
if (!string.IsNullOrWhiteSpace(slug))
{
metadata["kaspersky.slug"] = slug;
}
var resourceKey = item.Link.ToString();
touchedResources.Add(resourceKey);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, resourceKey, cancellationToken).ConfigureAwait(false);
var fetchRequest = new SourceFetchRequest(KasperskyOptions.HttpClientName, SourceName, item.Link)
{
Metadata = metadata,
};
if (cursorState.TryGetFetchMetadata(resourceKey, out var cachedFetch))
{
fetchRequest = fetchRequest with
{
ETag = cachedFetch.ETag,
LastModified = cachedFetch.LastModified,
};
}
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(fetchRequest, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to fetch Kaspersky advisory {Link}", item.Link);
await _stateRepository.MarkFailureAsync(
SourceName,
_timeProvider.GetUtcNow(),
TimeSpan.FromMinutes(5),
ex.Message,
cancellationToken).ConfigureAwait(false);
throw;
}
if (result.IsNotModified)
{
continue;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
cursorState = cursorState.WithFetchMetadata(resourceKey, result.Document.Etag, result.Document.LastModified);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
pendingDocuments.Add(result.Document.Id);
cursorState = cursorState.WithFetchMetadata(resourceKey, result.Document.Etag, result.Document.LastModified);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
}
}
cursorState = cursorState.PruneFetchCache(touchedResources);
var updatedCursor = cursorState
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Kaspersky document {DocumentId} missing GridFS content", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
var metadata = document.Metadata ?? new Dictionary<string, string>();
var title = metadata.TryGetValue("kaspersky.title", out var titleValue) ? titleValue : document.Uri;
var link = metadata.TryGetValue("kaspersky.link", out var linkValue) ? linkValue : document.Uri;
var published = metadata.TryGetValue("kaspersky.published", out var publishedValue) && DateTimeOffset.TryParse(publishedValue, out var parsedPublished)
? parsedPublished.ToUniversalTime()
: document.FetchedAt;
var summary = metadata.TryGetValue("kaspersky.summary", out var summaryValue) ? summaryValue : null;
var slug = metadata.TryGetValue("kaspersky.slug", out var slugValue) ? slugValue : ExtractSlug(new Uri(link, UriKind.Absolute));
var advisoryKey = string.IsNullOrWhiteSpace(slug) ? Guid.NewGuid().ToString("N") : slug;
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed downloading raw Kaspersky document {DocumentId}", document.Id);
throw;
}
var dto = KasperskyAdvisoryParser.Parse(advisoryKey, title, link, published, summary, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "ics.kaspersky/1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
_logger.LogWarning("Skipping Kaspersky mapping for {DocumentId}: DTO or document missing", documentId);
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dto.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
KasperskyAdvisoryDto advisoryDto;
try
{
advisoryDto = JsonSerializer.Deserialize<KasperskyAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized DTO was null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize Kaspersky DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", advisoryDto.AdvisoryKey, dto.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
advisoryDto.AdvisoryKey,
};
foreach (var cve in advisoryDto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
advisoryDto.Link,
"advisory",
"kaspersky-ics",
null,
new AdvisoryProvenance(SourceName, "reference", advisoryDto.Link, dto.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid advisory link {Link} for {AdvisoryKey}", advisoryDto.Link, advisoryDto.AdvisoryKey);
}
foreach (var cve in advisoryDto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dto.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore malformed
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Configuration;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky;
public sealed class KasperskyConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly KasperskyFeedClient _feedClient;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly KasperskyOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<KasperskyConnector> _logger;
public KasperskyConnector(
KasperskyFeedClient feedClient,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<KasperskyOptions> options,
TimeProvider? timeProvider,
ILogger<KasperskyConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => KasperskyConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
var cursorState = cursor;
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<KasperskyFeedItem> items;
try
{
items = await _feedClient.GetItemsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to load Kaspersky ICS feed page {Page}", page);
await _stateRepository.MarkFailureAsync(
SourceName,
now,
TimeSpan.FromMinutes(5),
ex.Message,
cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
break;
}
foreach (var item in items)
{
if (item.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["kaspersky.title"] = item.Title,
["kaspersky.link"] = item.Link.ToString(),
["kaspersky.published"] = item.Published.ToString("O"),
};
if (!string.IsNullOrWhiteSpace(item.Summary))
{
metadata["kaspersky.summary"] = item.Summary!;
}
var slug = ExtractSlug(item.Link);
if (!string.IsNullOrWhiteSpace(slug))
{
metadata["kaspersky.slug"] = slug;
}
var resourceKey = item.Link.ToString();
touchedResources.Add(resourceKey);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, resourceKey, cancellationToken).ConfigureAwait(false);
var fetchRequest = new SourceFetchRequest(KasperskyOptions.HttpClientName, SourceName, item.Link)
{
Metadata = metadata,
};
if (cursorState.TryGetFetchMetadata(resourceKey, out var cachedFetch))
{
fetchRequest = fetchRequest with
{
ETag = cachedFetch.ETag,
LastModified = cachedFetch.LastModified,
};
}
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(fetchRequest, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to fetch Kaspersky advisory {Link}", item.Link);
await _stateRepository.MarkFailureAsync(
SourceName,
_timeProvider.GetUtcNow(),
TimeSpan.FromMinutes(5),
ex.Message,
cancellationToken).ConfigureAwait(false);
throw;
}
if (result.IsNotModified)
{
continue;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
cursorState = cursorState.WithFetchMetadata(resourceKey, result.Document.Etag, result.Document.LastModified);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
pendingDocuments.Add(result.Document.Id);
cursorState = cursorState.WithFetchMetadata(resourceKey, result.Document.Etag, result.Document.LastModified);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
}
}
cursorState = cursorState.PruneFetchCache(touchedResources);
var updatedCursor = cursorState
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("Kaspersky document {DocumentId} missing GridFS content", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
var metadata = document.Metadata ?? new Dictionary<string, string>();
var title = metadata.TryGetValue("kaspersky.title", out var titleValue) ? titleValue : document.Uri;
var link = metadata.TryGetValue("kaspersky.link", out var linkValue) ? linkValue : document.Uri;
var published = metadata.TryGetValue("kaspersky.published", out var publishedValue) && DateTimeOffset.TryParse(publishedValue, out var parsedPublished)
? parsedPublished.ToUniversalTime()
: document.FetchedAt;
var summary = metadata.TryGetValue("kaspersky.summary", out var summaryValue) ? summaryValue : null;
var slug = metadata.TryGetValue("kaspersky.slug", out var slugValue) ? slugValue : ExtractSlug(new Uri(link, UriKind.Absolute));
var advisoryKey = string.IsNullOrWhiteSpace(slug) ? Guid.NewGuid().ToString("N") : slug;
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed downloading raw Kaspersky document {DocumentId}", document.Id);
throw;
}
var dto = KasperskyAdvisoryParser.Parse(advisoryKey, title, link, published, summary, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "ics.kaspersky/1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
_logger.LogWarning("Skipping Kaspersky mapping for {DocumentId}: DTO or document missing", documentId);
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dto.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
KasperskyAdvisoryDto advisoryDto;
try
{
advisoryDto = JsonSerializer.Deserialize<KasperskyAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized DTO was null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize Kaspersky DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", advisoryDto.AdvisoryKey, dto.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
advisoryDto.AdvisoryKey,
};
foreach (var cve in advisoryDto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
advisoryDto.Link,
"advisory",
"kaspersky-ics",
null,
new AdvisoryProvenance(SourceName, "reference", advisoryDto.Link, dto.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid advisory link {Link} for {AdvisoryKey}", advisoryDto.Link, advisoryDto.AdvisoryKey);
}
foreach (var cve in advisoryDto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dto.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore malformed
}
}
var affectedPackages = new List<AffectedPackage>();
foreach (var vendor in advisoryDto.VendorNames)
{
@@ -413,52 +413,52 @@ public sealed class KasperskyConnector : IFeedConnector
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: provenance));
}
var advisory = new Advisory(
advisoryDto.AdvisoryKey,
advisoryDto.Title,
advisoryDto.Summary ?? advisoryDto.Content,
language: "en",
published: advisoryDto.Published,
modified: advisoryDto.Published,
severity: null,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<KasperskyCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? KasperskyCursor.Empty : KasperskyCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(KasperskyCursor cursor, CancellationToken cancellationToken)
{
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private static string? ExtractSlug(Uri link)
{
var segments = link.Segments;
if (segments.Length == 0)
{
return null;
}
var last = segments[^1].Trim('/');
return string.IsNullOrWhiteSpace(last) && segments.Length > 1 ? segments[^2].Trim('/') : last;
}
}
var advisory = new Advisory(
advisoryDto.AdvisoryKey,
advisoryDto.Title,
advisoryDto.Summary ?? advisoryDto.Content,
language: "en",
published: advisoryDto.Published,
modified: advisoryDto.Published,
severity: null,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<KasperskyCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? KasperskyCursor.Empty : KasperskyCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(KasperskyCursor cursor, CancellationToken cancellationToken)
{
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private static string? ExtractSlug(Uri link)
{
var segments = link.Segments;
if (segments.Length == 0)
{
return null;
}
var last = segments[^1].Trim('/');
return string.IsNullOrWhiteSpace(last) && segments.Length > 1 ? segments[^2].Trim('/') : last;
}
}

View File

@@ -1,325 +1,325 @@
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Jvn.Configuration;
using StellaOps.Concelier.Connector.Jvn.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.JpFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Jvn;
public sealed class JvnConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly MyJvnClient _client;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IJpFlagStore _jpFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly TimeProvider _timeProvider;
private readonly JvnOptions _options;
private readonly ILogger<JvnConnector> _logger;
public JvnConnector(
MyJvnClient client,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IJpFlagStore jpFlagStore,
ISourceStateRepository stateRepository,
IOptions<JvnOptions> options,
TimeProvider? timeProvider,
ILogger<JvnConnector> logger)
{
_client = client ?? throw new ArgumentNullException(nameof(client));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_jpFlagStore = jpFlagStore ?? throw new ArgumentNullException(nameof(jpFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => JvnConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowEnd = now;
var defaultWindowStart = windowEnd - _options.WindowSize;
var windowStart = cursor.LastCompletedWindowEnd.HasValue
? cursor.LastCompletedWindowEnd.Value - _options.WindowOverlap
: defaultWindowStart;
if (windowStart < defaultWindowStart)
{
windowStart = defaultWindowStart;
}
if (windowStart >= windowEnd)
{
windowStart = windowEnd - TimeSpan.FromHours(1);
}
_logger.LogInformation("JVN fetch window {WindowStart:o} - {WindowEnd:o}", windowStart, windowEnd);
IReadOnlyList<JvnOverviewItem> overviewItems;
try
{
overviewItems = await _client.GetOverviewAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to retrieve JVN overview between {Start:o} and {End:o}", windowStart, windowEnd);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
_logger.LogInformation("JVN overview returned {Count} items", overviewItems.Count);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
foreach (var item in overviewItems)
{
cancellationToken.ThrowIfCancellationRequested();
var detailUri = _client.BuildDetailUri(item.VulnerabilityId);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["jvn.vulnId"] = item.VulnerabilityId,
["jvn.detailUrl"] = detailUri.ToString(),
};
if (item.DateFirstPublished.HasValue)
{
metadata["jvn.firstPublished"] = item.DateFirstPublished.Value.ToString("O");
}
if (item.DateLastUpdated.HasValue)
{
metadata["jvn.lastUpdated"] = item.DateLastUpdated.Value.ToString("O");
}
var result = await _fetchService.FetchAsync(
new SourceFetchRequest(JvnOptions.HttpClientName, SourceName, detailUri)
{
Metadata = metadata
},
cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
if (!result.IsNotModified)
{
_logger.LogWarning("JVN fetch for {Uri} returned status {Status}", detailUri, result.StatusCode);
}
continue;
}
_logger.LogDebug("JVN fetched document {DocumentId}", result.Document.Id);
pendingDocuments.Add(result.Document.Id);
}
var updatedCursor = cursor
.WithWindow(windowStart, windowEnd)
.WithCompletedWindow(windowEnd)
.WithPendingDocuments(pendingDocuments);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
_logger.LogDebug("JVN parse pending documents: {PendingCount}", cursor.PendingDocuments.Count);
Console.WriteLine($"JVN parse pending count: {cursor.PendingDocuments.Count}");
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
_logger.LogDebug("JVN parsing document {DocumentId}", documentId);
Console.WriteLine($"JVN parsing document {documentId}");
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
_logger.LogWarning("JVN document {DocumentId} no longer exists; skipping", documentId);
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("JVN document {DocumentId} is missing GridFS content; marking as failed", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Unable to download raw JVN document {DocumentId}", document.Id);
throw;
}
JvnDetailDto detail;
try
{
detail = JvnDetailParser.Parse(rawBytes, document.Uri);
}
catch (JvnSchemaValidationException ex)
{
Console.WriteLine($"JVN schema validation exception: {ex.Message}");
_logger.LogWarning(ex, "JVN schema validation failed for document {DocumentId} ({Uri})", document.Id, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
throw;
}
var sanitizedJson = JsonSerializer.Serialize(detail, SerializerOptions);
var payload = BsonDocument.Parse(sanitizedJson);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
JvnConstants.DtoSchemaVersion,
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
Console.WriteLine($"Added mapping for {documentId}");
_logger.LogDebug("JVN parsed document {DocumentId}", documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
_logger.LogDebug("JVN map pending mappings: {PendingCount}", cursor.PendingMappings.Count);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
_logger.LogWarning("Skipping JVN mapping for {DocumentId}: DTO or document missing", documentId);
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dto.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
JvnDetailDto detail;
try
{
detail = JsonSerializer.Deserialize<JvnDetailDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized DTO was null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize JVN DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var (advisory, flag) = JvnAdvisoryMapper.Map(detail, document, dto, _timeProvider);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _jpFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_logger.LogDebug("JVN mapped document {DocumentId}", documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<JvnCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? JvnCursor.Empty : JvnCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(JvnCursor cursor, CancellationToken cancellationToken)
{
var cursorDocument = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, cursorDocument, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Jvn.Configuration;
using StellaOps.Concelier.Connector.Jvn.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.JpFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Jvn;
public sealed class JvnConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly MyJvnClient _client;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IJpFlagStore _jpFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly TimeProvider _timeProvider;
private readonly JvnOptions _options;
private readonly ILogger<JvnConnector> _logger;
public JvnConnector(
MyJvnClient client,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IJpFlagStore jpFlagStore,
ISourceStateRepository stateRepository,
IOptions<JvnOptions> options,
TimeProvider? timeProvider,
ILogger<JvnConnector> logger)
{
_client = client ?? throw new ArgumentNullException(nameof(client));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_jpFlagStore = jpFlagStore ?? throw new ArgumentNullException(nameof(jpFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => JvnConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowEnd = now;
var defaultWindowStart = windowEnd - _options.WindowSize;
var windowStart = cursor.LastCompletedWindowEnd.HasValue
? cursor.LastCompletedWindowEnd.Value - _options.WindowOverlap
: defaultWindowStart;
if (windowStart < defaultWindowStart)
{
windowStart = defaultWindowStart;
}
if (windowStart >= windowEnd)
{
windowStart = windowEnd - TimeSpan.FromHours(1);
}
_logger.LogInformation("JVN fetch window {WindowStart:o} - {WindowEnd:o}", windowStart, windowEnd);
IReadOnlyList<JvnOverviewItem> overviewItems;
try
{
overviewItems = await _client.GetOverviewAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to retrieve JVN overview between {Start:o} and {End:o}", windowStart, windowEnd);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
_logger.LogInformation("JVN overview returned {Count} items", overviewItems.Count);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
foreach (var item in overviewItems)
{
cancellationToken.ThrowIfCancellationRequested();
var detailUri = _client.BuildDetailUri(item.VulnerabilityId);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["jvn.vulnId"] = item.VulnerabilityId,
["jvn.detailUrl"] = detailUri.ToString(),
};
if (item.DateFirstPublished.HasValue)
{
metadata["jvn.firstPublished"] = item.DateFirstPublished.Value.ToString("O");
}
if (item.DateLastUpdated.HasValue)
{
metadata["jvn.lastUpdated"] = item.DateLastUpdated.Value.ToString("O");
}
var result = await _fetchService.FetchAsync(
new SourceFetchRequest(JvnOptions.HttpClientName, SourceName, detailUri)
{
Metadata = metadata
},
cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
if (!result.IsNotModified)
{
_logger.LogWarning("JVN fetch for {Uri} returned status {Status}", detailUri, result.StatusCode);
}
continue;
}
_logger.LogDebug("JVN fetched document {DocumentId}", result.Document.Id);
pendingDocuments.Add(result.Document.Id);
}
var updatedCursor = cursor
.WithWindow(windowStart, windowEnd)
.WithCompletedWindow(windowEnd)
.WithPendingDocuments(pendingDocuments);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
_logger.LogDebug("JVN parse pending documents: {PendingCount}", cursor.PendingDocuments.Count);
Console.WriteLine($"JVN parse pending count: {cursor.PendingDocuments.Count}");
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
_logger.LogDebug("JVN parsing document {DocumentId}", documentId);
Console.WriteLine($"JVN parsing document {documentId}");
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
_logger.LogWarning("JVN document {DocumentId} no longer exists; skipping", documentId);
remainingDocuments.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("JVN document {DocumentId} is missing GridFS content; marking as failed", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Unable to download raw JVN document {DocumentId}", document.Id);
throw;
}
JvnDetailDto detail;
try
{
detail = JvnDetailParser.Parse(rawBytes, document.Uri);
}
catch (JvnSchemaValidationException ex)
{
Console.WriteLine($"JVN schema validation exception: {ex.Message}");
_logger.LogWarning(ex, "JVN schema validation failed for document {DocumentId} ({Uri})", document.Id, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
throw;
}
var sanitizedJson = JsonSerializer.Serialize(detail, SerializerOptions);
var payload = BsonDocument.Parse(sanitizedJson);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
JvnConstants.DtoSchemaVersion,
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
Console.WriteLine($"Added mapping for {documentId}");
_logger.LogDebug("JVN parsed document {DocumentId}", documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
_logger.LogDebug("JVN map pending mappings: {PendingCount}", cursor.PendingMappings.Count);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
_logger.LogWarning("Skipping JVN mapping for {DocumentId}: DTO or document missing", documentId);
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dto.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
JvnDetailDto detail;
try
{
detail = JsonSerializer.Deserialize<JvnDetailDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized DTO was null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize JVN DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var (advisory, flag) = JvnAdvisoryMapper.Map(detail, document, dto, _timeProvider);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _jpFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_logger.LogDebug("JVN mapped document {DocumentId}", documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<JvnCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? JvnCursor.Empty : JvnCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(JvnCursor cursor, CancellationToken cancellationToken)
{
var cursorDocument = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, cursorDocument, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -1,441 +1,441 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Json;
using StellaOps.Concelier.Connector.Kev.Configuration;
using StellaOps.Concelier.Connector.Kev.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Kev;
public sealed class KevConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private const string SchemaVersion = "kev.catalog.v1";
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly KevOptions _options;
private readonly IJsonSchemaValidator _schemaValidator;
private readonly TimeProvider _timeProvider;
private readonly ILogger<KevConnector> _logger;
private readonly KevDiagnostics _diagnostics;
public KevConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<KevOptions> options,
IJsonSchemaValidator schemaValidator,
KevDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<KevConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => KevConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
try
{
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, _options.FeedUri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(
KevOptions.HttpClientName,
SourceName,
_options.FeedUri)
{
Metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["kev.cursor.catalogVersion"] = cursor.CatalogVersion ?? string.Empty,
["kev.cursor.catalogReleased"] = cursor.CatalogReleased?.ToString("O") ?? string.Empty,
},
ETag = existing?.Etag,
LastModified = existing?.LastModified,
TimeoutOverride = _options.RequestTimeout,
AcceptHeaders = new[] { "application/json", "text/json" },
};
_diagnostics.FetchAttempt();
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.FetchUnchanged();
_logger.LogInformation(
"KEV catalog not modified (catalogVersion={CatalogVersion}, etag={Etag})",
cursor.CatalogVersion ?? "(unknown)",
existing?.Etag ?? "(none)");
await UpdateCursorAsync(cursor, cancellationToken).ConfigureAwait(false);
return;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), "KEV feed returned no content.", cancellationToken).ConfigureAwait(false);
return;
}
_diagnostics.FetchSuccess();
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var pendingDocumentsBefore = pendingDocuments.Count;
var pendingMappingsBefore = pendingMappings.Count;
pendingDocuments.Add(result.Document.Id);
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
var document = result.Document;
var lastModified = document.LastModified?.ToUniversalTime().ToString("O") ?? "(unknown)";
_logger.LogInformation(
"Fetched KEV catalog document {DocumentId} (etag={Etag}, lastModified={LastModified}) pendingDocuments={PendingDocumentsBefore}->{PendingDocumentsAfter} pendingMappings={PendingMappingsBefore}->{PendingMappingsAfter}",
document.Id,
document.Etag ?? "(none)",
lastModified,
pendingDocumentsBefore,
pendingDocuments.Count,
pendingMappingsBefore,
pendingMappings.Count);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "KEV fetch failed for {Uri}", _options.FeedUri);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var latestCatalogVersion = cursor.CatalogVersion;
var latestCatalogReleased = cursor.CatalogReleased;
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_diagnostics.ParseFailure("missingPayload", cursor.CatalogVersion);
_logger.LogWarning("KEV document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure("download", cursor.CatalogVersion);
_logger.LogError(ex, "KEV parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
KevCatalogDto? catalog = null;
string? catalogVersion = null;
try
{
using var jsonDocument = JsonDocument.Parse(rawBytes);
catalogVersion = TryGetCatalogVersion(jsonDocument.RootElement);
_schemaValidator.Validate(jsonDocument, KevSchemaProvider.Schema, document.Uri);
catalog = jsonDocument.RootElement.Deserialize<KevCatalogDto>(SerializerOptions);
}
catch (JsonSchemaValidationException ex)
{
_diagnostics.ParseFailure("schema", catalogVersion);
_logger.LogWarning(ex, "KEV schema validation failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
catch (JsonException ex)
{
_diagnostics.ParseFailure("invalidJson", catalogVersion);
_logger.LogError(ex, "KEV JSON parsing failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
catch (Exception ex)
{
_diagnostics.ParseFailure("deserialize", catalogVersion);
_logger.LogError(ex, "KEV catalog deserialization failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (catalog is null)
{
_diagnostics.ParseFailure("emptyCatalog", catalogVersion);
_logger.LogWarning("KEV catalog payload was empty for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var entryCount = catalog.Vulnerabilities?.Count ?? 0;
var released = catalog.DateReleased?.ToUniversalTime();
RecordCatalogAnomalies(catalog);
try
{
var payloadJson = JsonSerializer.Serialize(catalog, SerializerOptions);
var payload = BsonDocument.Parse(payloadJson);
_logger.LogInformation(
"Parsed KEV catalog document {DocumentId} (version={CatalogVersion}, released={Released}, entries={EntryCount})",
document.Id,
catalog.CatalogVersion ?? "(unknown)",
released,
entryCount);
_diagnostics.CatalogParsed(catalog.CatalogVersion, entryCount);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
SchemaVersion,
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
latestCatalogVersion = catalog.CatalogVersion ?? latestCatalogVersion;
latestCatalogReleased = catalog.DateReleased ?? latestCatalogReleased;
}
catch (Exception ex)
{
_logger.LogError(ex, "KEV DTO persistence failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings)
.WithCatalogMetadata(latestCatalogVersion, latestCatalogReleased);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
KevCatalogDto? catalog;
try
{
var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
catalog = JsonSerializer.Deserialize<KevCatalogDto>(dtoJson, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "KEV mapping: failed to deserialize DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (catalog is null)
{
_logger.LogWarning("KEV mapping: DTO payload was empty for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var feedUri = TryParseUri(document.Uri) ?? _options.FeedUri;
var advisories = KevMapper.Map(catalog, SourceName, feedUri, document.FetchedAt, dtoRecord.ValidatedAt);
var entryCount = catalog.Vulnerabilities?.Count ?? 0;
var mappedCount = advisories.Count;
var skippedCount = Math.Max(0, entryCount - mappedCount);
_logger.LogInformation(
"Mapped {MappedCount}/{EntryCount} KEV advisories from catalog version {CatalogVersion} (skipped={SkippedCount})",
mappedCount,
entryCount,
catalog.CatalogVersion ?? "(unknown)",
skippedCount);
_diagnostics.AdvisoriesMapped(catalog.CatalogVersion, mappedCount);
foreach (var advisory in advisories)
{
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
}
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<KevCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? KevCursor.Empty : KevCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(KevCursor cursor, CancellationToken cancellationToken)
{
return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken);
}
private void RecordCatalogAnomalies(KevCatalogDto catalog)
{
ArgumentNullException.ThrowIfNull(catalog);
var version = catalog.CatalogVersion;
var vulnerabilities = catalog.Vulnerabilities ?? Array.Empty<KevVulnerabilityDto>();
if (catalog.Count != vulnerabilities.Count)
{
_diagnostics.RecordAnomaly("countMismatch", version);
}
foreach (var entry in vulnerabilities)
{
if (entry is null)
{
_diagnostics.RecordAnomaly("nullEntry", version);
continue;
}
if (string.IsNullOrWhiteSpace(entry.CveId))
{
_diagnostics.RecordAnomaly("missingCveId", version);
}
}
}
private static string? TryGetCatalogVersion(JsonElement root)
{
if (root.ValueKind != JsonValueKind.Object)
{
return null;
}
if (root.TryGetProperty("catalogVersion", out var versionElement) && versionElement.ValueKind == JsonValueKind.String)
{
return versionElement.GetString();
}
return null;
}
private static Uri? TryParseUri(string? value)
=> Uri.TryCreate(value, UriKind.Absolute, out var uri) ? uri : null;
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Json;
using StellaOps.Concelier.Connector.Kev.Configuration;
using StellaOps.Concelier.Connector.Kev.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Kev;
public sealed class KevConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private const string SchemaVersion = "kev.catalog.v1";
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly KevOptions _options;
private readonly IJsonSchemaValidator _schemaValidator;
private readonly TimeProvider _timeProvider;
private readonly ILogger<KevConnector> _logger;
private readonly KevDiagnostics _diagnostics;
public KevConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<KevOptions> options,
IJsonSchemaValidator schemaValidator,
KevDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<KevConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => KevConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
try
{
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, _options.FeedUri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(
KevOptions.HttpClientName,
SourceName,
_options.FeedUri)
{
Metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["kev.cursor.catalogVersion"] = cursor.CatalogVersion ?? string.Empty,
["kev.cursor.catalogReleased"] = cursor.CatalogReleased?.ToString("O") ?? string.Empty,
},
ETag = existing?.Etag,
LastModified = existing?.LastModified,
TimeoutOverride = _options.RequestTimeout,
AcceptHeaders = new[] { "application/json", "text/json" },
};
_diagnostics.FetchAttempt();
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.FetchUnchanged();
_logger.LogInformation(
"KEV catalog not modified (catalogVersion={CatalogVersion}, etag={Etag})",
cursor.CatalogVersion ?? "(unknown)",
existing?.Etag ?? "(none)");
await UpdateCursorAsync(cursor, cancellationToken).ConfigureAwait(false);
return;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), "KEV feed returned no content.", cancellationToken).ConfigureAwait(false);
return;
}
_diagnostics.FetchSuccess();
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var pendingDocumentsBefore = pendingDocuments.Count;
var pendingMappingsBefore = pendingMappings.Count;
pendingDocuments.Add(result.Document.Id);
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
var document = result.Document;
var lastModified = document.LastModified?.ToUniversalTime().ToString("O") ?? "(unknown)";
_logger.LogInformation(
"Fetched KEV catalog document {DocumentId} (etag={Etag}, lastModified={LastModified}) pendingDocuments={PendingDocumentsBefore}->{PendingDocumentsAfter} pendingMappings={PendingMappingsBefore}->{PendingMappingsAfter}",
document.Id,
document.Etag ?? "(none)",
lastModified,
pendingDocumentsBefore,
pendingDocuments.Count,
pendingMappingsBefore,
pendingMappings.Count);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "KEV fetch failed for {Uri}", _options.FeedUri);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var latestCatalogVersion = cursor.CatalogVersion;
var latestCatalogReleased = cursor.CatalogReleased;
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_diagnostics.ParseFailure("missingPayload", cursor.CatalogVersion);
_logger.LogWarning("KEV document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure("download", cursor.CatalogVersion);
_logger.LogError(ex, "KEV parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
KevCatalogDto? catalog = null;
string? catalogVersion = null;
try
{
using var jsonDocument = JsonDocument.Parse(rawBytes);
catalogVersion = TryGetCatalogVersion(jsonDocument.RootElement);
_schemaValidator.Validate(jsonDocument, KevSchemaProvider.Schema, document.Uri);
catalog = jsonDocument.RootElement.Deserialize<KevCatalogDto>(SerializerOptions);
}
catch (JsonSchemaValidationException ex)
{
_diagnostics.ParseFailure("schema", catalogVersion);
_logger.LogWarning(ex, "KEV schema validation failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
catch (JsonException ex)
{
_diagnostics.ParseFailure("invalidJson", catalogVersion);
_logger.LogError(ex, "KEV JSON parsing failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
catch (Exception ex)
{
_diagnostics.ParseFailure("deserialize", catalogVersion);
_logger.LogError(ex, "KEV catalog deserialization failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (catalog is null)
{
_diagnostics.ParseFailure("emptyCatalog", catalogVersion);
_logger.LogWarning("KEV catalog payload was empty for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var entryCount = catalog.Vulnerabilities?.Count ?? 0;
var released = catalog.DateReleased?.ToUniversalTime();
RecordCatalogAnomalies(catalog);
try
{
var payloadJson = JsonSerializer.Serialize(catalog, SerializerOptions);
var payload = BsonDocument.Parse(payloadJson);
_logger.LogInformation(
"Parsed KEV catalog document {DocumentId} (version={CatalogVersion}, released={Released}, entries={EntryCount})",
document.Id,
catalog.CatalogVersion ?? "(unknown)",
released,
entryCount);
_diagnostics.CatalogParsed(catalog.CatalogVersion, entryCount);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
SchemaVersion,
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
latestCatalogVersion = catalog.CatalogVersion ?? latestCatalogVersion;
latestCatalogReleased = catalog.DateReleased ?? latestCatalogReleased;
}
catch (Exception ex)
{
_logger.LogError(ex, "KEV DTO persistence failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings)
.WithCatalogMetadata(latestCatalogVersion, latestCatalogReleased);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
KevCatalogDto? catalog;
try
{
var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
catalog = JsonSerializer.Deserialize<KevCatalogDto>(dtoJson, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "KEV mapping: failed to deserialize DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (catalog is null)
{
_logger.LogWarning("KEV mapping: DTO payload was empty for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var feedUri = TryParseUri(document.Uri) ?? _options.FeedUri;
var advisories = KevMapper.Map(catalog, SourceName, feedUri, document.FetchedAt, dtoRecord.ValidatedAt);
var entryCount = catalog.Vulnerabilities?.Count ?? 0;
var mappedCount = advisories.Count;
var skippedCount = Math.Max(0, entryCount - mappedCount);
_logger.LogInformation(
"Mapped {MappedCount}/{EntryCount} KEV advisories from catalog version {CatalogVersion} (skipped={SkippedCount})",
mappedCount,
entryCount,
catalog.CatalogVersion ?? "(unknown)",
skippedCount);
_diagnostics.AdvisoriesMapped(catalog.CatalogVersion, mappedCount);
foreach (var advisory in advisories)
{
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
}
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<KevCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? KevCursor.Empty : KevCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(KevCursor cursor, CancellationToken cancellationToken)
{
return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken);
}
private void RecordCatalogAnomalies(KevCatalogDto catalog)
{
ArgumentNullException.ThrowIfNull(catalog);
var version = catalog.CatalogVersion;
var vulnerabilities = catalog.Vulnerabilities ?? Array.Empty<KevVulnerabilityDto>();
if (catalog.Count != vulnerabilities.Count)
{
_diagnostics.RecordAnomaly("countMismatch", version);
}
foreach (var entry in vulnerabilities)
{
if (entry is null)
{
_diagnostics.RecordAnomaly("nullEntry", version);
continue;
}
if (string.IsNullOrWhiteSpace(entry.CveId))
{
_diagnostics.RecordAnomaly("missingCveId", version);
}
}
}
private static string? TryGetCatalogVersion(JsonElement root)
{
if (root.ValueKind != JsonValueKind.Object)
{
return null;
}
if (root.TryGetProperty("catalogVersion", out var versionElement) && versionElement.ValueKind == JsonValueKind.String)
{
return versionElement.GetString();
}
return null;
}
private static Uri? TryParseUri(string? value)
=> Uri.TryCreate(value, UriKind.Absolute, out var uri) ? uri : null;
}

View File

@@ -1,136 +1,136 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Kisa.Configuration;
using StellaOps.Concelier.Connector.Kisa.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Kisa;
public sealed class KisaConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly KisaFeedClient _feedClient;
private readonly KisaDetailParser _detailParser;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly KisaOptions _options;
private readonly KisaDiagnostics _diagnostics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<KisaConnector> _logger;
public KisaConnector(
KisaFeedClient feedClient,
KisaDetailParser detailParser,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<KisaOptions> options,
KisaDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<KisaConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_detailParser = detailParser ?? throw new ArgumentNullException(nameof(detailParser));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => KisaConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
_diagnostics.FeedAttempt();
IReadOnlyList<KisaFeedItem> items;
try
{
items = await _feedClient.LoadAsync(cancellationToken).ConfigureAwait(false);
_diagnostics.FeedSuccess(items.Count);
if (items.Count > 0)
{
_logger.LogInformation("KISA feed returned {ItemCount} advisories", items.Count);
}
else
{
_logger.LogDebug("KISA feed returned no advisories");
}
}
catch (Exception ex)
{
_diagnostics.FeedFailure(ex.GetType().Name);
_logger.LogError(ex, "KISA feed fetch failed");
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
await UpdateCursorAsync(cursor.WithLastFetch(now), cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var knownIds = new HashSet<string>(cursor.KnownIds, StringComparer.OrdinalIgnoreCase);
var processed = 0;
var latestPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
foreach (var item in items.OrderByDescending(static i => i.Published))
{
cancellationToken.ThrowIfCancellationRequested();
if (knownIds.Contains(item.AdvisoryId))
{
continue;
}
if (processed >= _options.MaxAdvisoriesPerFetch)
{
break;
}
var category = item.Category;
_diagnostics.DetailAttempt(category);
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Kisa.Configuration;
using StellaOps.Concelier.Connector.Kisa.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Kisa;
public sealed class KisaConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly KisaFeedClient _feedClient;
private readonly KisaDetailParser _detailParser;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly KisaOptions _options;
private readonly KisaDiagnostics _diagnostics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<KisaConnector> _logger;
public KisaConnector(
KisaFeedClient feedClient,
KisaDetailParser detailParser,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<KisaOptions> options,
KisaDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<KisaConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_detailParser = detailParser ?? throw new ArgumentNullException(nameof(detailParser));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => KisaConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
_diagnostics.FeedAttempt();
IReadOnlyList<KisaFeedItem> items;
try
{
items = await _feedClient.LoadAsync(cancellationToken).ConfigureAwait(false);
_diagnostics.FeedSuccess(items.Count);
if (items.Count > 0)
{
_logger.LogInformation("KISA feed returned {ItemCount} advisories", items.Count);
}
else
{
_logger.LogDebug("KISA feed returned no advisories");
}
}
catch (Exception ex)
{
_diagnostics.FeedFailure(ex.GetType().Name);
_logger.LogError(ex, "KISA feed fetch failed");
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
await UpdateCursorAsync(cursor.WithLastFetch(now), cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var knownIds = new HashSet<string>(cursor.KnownIds, StringComparer.OrdinalIgnoreCase);
var processed = 0;
var latestPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
foreach (var item in items.OrderByDescending(static i => i.Published))
{
cancellationToken.ThrowIfCancellationRequested();
if (knownIds.Contains(item.AdvisoryId))
{
continue;
}
if (processed >= _options.MaxAdvisoriesPerFetch)
{
break;
}
var category = item.Category;
_diagnostics.DetailAttempt(category);
try
{
var detailUri = item.DetailPageUri;
@@ -149,125 +149,125 @@ public sealed class KisaConnector : IFeedConnector
LastModified = existing?.LastModified,
TimeoutOverride = _options.RequestTimeout,
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.DetailUnchanged(category);
_logger.LogDebug("KISA detail {Idx} unchanged ({Category})", item.AdvisoryId, category ?? "unknown");
knownIds.Add(item.AdvisoryId);
continue;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.DetailFailure(category, "empty-document");
_logger.LogWarning("KISA detail fetch returned no document for {Idx}", item.AdvisoryId);
continue;
}
pendingDocuments.Add(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
knownIds.Add(item.AdvisoryId);
processed++;
_diagnostics.DetailSuccess(category);
_logger.LogInformation(
"KISA fetched detail for {Idx} (documentId={DocumentId}, category={Category})",
item.AdvisoryId,
result.Document.Id,
category ?? "unknown");
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_diagnostics.DetailFailure(category, ex.GetType().Name);
_logger.LogError(ex, "KISA detail fetch failed for {Idx}", item.AdvisoryId);
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (item.Published > latestPublished)
{
latestPublished = item.Published;
_diagnostics.CursorAdvanced();
_logger.LogDebug("KISA advanced published cursor to {Published:O}", latestPublished);
}
}
var trimmedKnown = knownIds.Count > _options.MaxKnownAdvisories
? knownIds.OrderByDescending(id => id, StringComparer.OrdinalIgnoreCase)
.Take(_options.MaxKnownAdvisories)
.ToArray()
: knownIds.ToArray();
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithKnownIds(trimmedKnown)
.WithLastPublished(latestPublished == DateTimeOffset.MinValue ? cursor.LastPublished : latestPublished)
.WithLastFetch(now);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("KISA fetch stored {Processed} new documents (knownIds={KnownCount})", processed, trimmedKnown.Length);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var now = _timeProvider.GetUtcNow();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
_diagnostics.ParseFailure(null, "document-missing");
_logger.LogWarning("KISA document {DocumentId} missing during parse", documentId);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var category = GetCategory(document);
if (!document.GridFsId.HasValue)
{
_diagnostics.ParseFailure(category, "missing-gridfs");
_logger.LogWarning("KISA document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
_diagnostics.ParseAttempt(category);
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure(category, "download");
_logger.LogError(ex, "KISA unable to download document {DocumentId}", document.Id);
throw;
}
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.DetailUnchanged(category);
_logger.LogDebug("KISA detail {Idx} unchanged ({Category})", item.AdvisoryId, category ?? "unknown");
knownIds.Add(item.AdvisoryId);
continue;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.DetailFailure(category, "empty-document");
_logger.LogWarning("KISA detail fetch returned no document for {Idx}", item.AdvisoryId);
continue;
}
pendingDocuments.Add(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
knownIds.Add(item.AdvisoryId);
processed++;
_diagnostics.DetailSuccess(category);
_logger.LogInformation(
"KISA fetched detail for {Idx} (documentId={DocumentId}, category={Category})",
item.AdvisoryId,
result.Document.Id,
category ?? "unknown");
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_diagnostics.DetailFailure(category, ex.GetType().Name);
_logger.LogError(ex, "KISA detail fetch failed for {Idx}", item.AdvisoryId);
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (item.Published > latestPublished)
{
latestPublished = item.Published;
_diagnostics.CursorAdvanced();
_logger.LogDebug("KISA advanced published cursor to {Published:O}", latestPublished);
}
}
var trimmedKnown = knownIds.Count > _options.MaxKnownAdvisories
? knownIds.OrderByDescending(id => id, StringComparer.OrdinalIgnoreCase)
.Take(_options.MaxKnownAdvisories)
.ToArray()
: knownIds.ToArray();
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithKnownIds(trimmedKnown)
.WithLastPublished(latestPublished == DateTimeOffset.MinValue ? cursor.LastPublished : latestPublished)
.WithLastFetch(now);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("KISA fetch stored {Processed} new documents (knownIds={KnownCount})", processed, trimmedKnown.Length);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var now = _timeProvider.GetUtcNow();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
_diagnostics.ParseFailure(null, "document-missing");
_logger.LogWarning("KISA document {DocumentId} missing during parse", documentId);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var category = GetCategory(document);
if (!document.PayloadId.HasValue)
{
_diagnostics.ParseFailure(category, "missing-gridfs");
_logger.LogWarning("KISA document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
_diagnostics.ParseAttempt(category);
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure(category, "download");
_logger.LogError(ex, "KISA unable to download document {DocumentId}", document.Id);
throw;
}
KisaParsedAdvisory parsed;
try
{
@@ -279,28 +279,28 @@ public sealed class KisaConnector : IFeedConnector
{
_diagnostics.ParseFailure(category, "parse");
_logger.LogError(ex, "KISA failed to parse detail {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
_diagnostics.ParseSuccess(category);
_logger.LogDebug("KISA parsed detail for {DocumentId} ({Category})", document.Id, category ?? "unknown");
var dtoBson = BsonDocument.Parse(JsonSerializer.Serialize(parsed, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "kisa.detail.v1", dtoBson, now);
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
_diagnostics.ParseSuccess(category);
_logger.LogDebug("KISA parsed detail for {DocumentId} ({Category})", document.Id, category ?? "unknown");
var dtoBson = BsonDocument.Parse(JsonSerializer.Serialize(parsed, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "kisa.detail.v1", dtoBson, now);
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private static Uri? TryGetUri(IReadOnlyDictionary<string, string>? metadata, string key)
@@ -318,107 +318,107 @@ public sealed class KisaConnector : IFeedConnector
return Uri.TryCreate(value, UriKind.Absolute, out var uri) ? uri : null;
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
_diagnostics.MapFailure(null, "document-missing");
_logger.LogWarning("KISA document {DocumentId} missing during map", documentId);
pendingMappings.Remove(documentId);
continue;
}
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null)
{
_diagnostics.MapFailure(null, "dto-missing");
_logger.LogWarning("KISA DTO missing for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
KisaParsedAdvisory? parsed;
try
{
parsed = JsonSerializer.Deserialize<KisaParsedAdvisory>(dtoRecord.Payload.ToJson(), SerializerOptions);
}
catch (Exception ex)
{
_diagnostics.MapFailure(null, "dto-deserialize");
_logger.LogError(ex, "KISA failed to deserialize DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (parsed is null)
{
_diagnostics.MapFailure(null, "dto-null");
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
try
{
var advisory = KisaMapper.Map(parsed, document, dtoRecord.ValidatedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapSuccess(parsed.Severity);
_logger.LogInformation("KISA mapped advisory {AdvisoryId} (severity={Severity})", parsed.AdvisoryId, parsed.Severity ?? "unknown");
}
catch (Exception ex)
{
_diagnostics.MapFailure(parsed.Severity, "map");
_logger.LogError(ex, "KISA mapping failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private static string? GetCategory(DocumentRecord document)
{
if (document.Metadata is null)
{
return null;
}
return document.Metadata.TryGetValue("kisa.category", out var category)
? category
: null;
}
private async Task<KisaCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? KisaCursor.Empty : KisaCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(KisaCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
var completedAt = cursor.LastFetchAt ?? _timeProvider.GetUtcNow();
return _stateRepository.UpdateCursorAsync(SourceName, document, completedAt, cancellationToken);
}
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
_diagnostics.MapFailure(null, "document-missing");
_logger.LogWarning("KISA document {DocumentId} missing during map", documentId);
pendingMappings.Remove(documentId);
continue;
}
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null)
{
_diagnostics.MapFailure(null, "dto-missing");
_logger.LogWarning("KISA DTO missing for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
KisaParsedAdvisory? parsed;
try
{
parsed = JsonSerializer.Deserialize<KisaParsedAdvisory>(dtoRecord.Payload.ToJson(), SerializerOptions);
}
catch (Exception ex)
{
_diagnostics.MapFailure(null, "dto-deserialize");
_logger.LogError(ex, "KISA failed to deserialize DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (parsed is null)
{
_diagnostics.MapFailure(null, "dto-null");
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
try
{
var advisory = KisaMapper.Map(parsed, document, dtoRecord.ValidatedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapSuccess(parsed.Severity);
_logger.LogInformation("KISA mapped advisory {AdvisoryId} (severity={Severity})", parsed.AdvisoryId, parsed.Severity ?? "unknown");
}
catch (Exception ex)
{
_diagnostics.MapFailure(parsed.Severity, "map");
_logger.LogError(ex, "KISA mapping failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private static string? GetCategory(DocumentRecord document)
{
if (document.Metadata is null)
{
return null;
}
return document.Metadata.TryGetValue("kisa.category", out var category)
? category
: null;
}
private async Task<KisaCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? KisaCursor.Empty : KisaCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(KisaCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
var completedAt = cursor.LastFetchAt ?? _timeProvider.GetUtcNow();
return _stateRepository.UpdateCursorAsync(SourceName, document, completedAt, cancellationToken);
}
}

View File

@@ -1,43 +1,43 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Osv.Configuration;
using StellaOps.Concelier.Connector.Osv.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Osv.Configuration;
using StellaOps.Concelier.Connector.Osv.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
using StellaOps.Cryptography;
namespace StellaOps.Concelier.Connector.Osv;
public sealed class OsvConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
PropertyNameCaseInsensitive = true,
};
private readonly IHttpClientFactory _httpClientFactory;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
namespace StellaOps.Concelier.Connector.Osv;
public sealed class OsvConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
PropertyNameCaseInsensitive = true,
};
private readonly IHttpClientFactory _httpClientFactory;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
@@ -46,10 +46,10 @@ public sealed class OsvConnector : IFeedConnector
private readonly ILogger<OsvConnector> _logger;
private readonly OsvDiagnostics _diagnostics;
private readonly ICryptoHash _hash;
public OsvConnector(
IHttpClientFactory httpClientFactory,
RawDocumentStorage rawDocumentStorage,
public OsvConnector(
IHttpClientFactory httpClientFactory,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
@@ -73,197 +73,197 @@ public sealed class OsvConnector : IFeedConnector
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => OsvConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var cursorState = cursor;
var remainingCapacity = _options.MaxAdvisoriesPerFetch;
foreach (var ecosystem in _options.Ecosystems)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
try
{
var result = await FetchEcosystemAsync(
ecosystem,
cursorState,
pendingDocuments,
now,
remainingCapacity,
cancellationToken).ConfigureAwait(false);
cursorState = result.Cursor;
remainingCapacity -= result.NewDocuments;
}
catch (Exception ex)
{
_logger.LogError(ex, "OSV fetch failed for ecosystem {Ecosystem}", ecosystem);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
cursorState = cursorState
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings);
await UpdateCursorAsync(cursorState, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("OSV document {DocumentId} missing GridFS content", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] bytes;
try
{
bytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Unable to download OSV raw document {DocumentId}", document.Id);
throw;
}
OsvVulnerabilityDto? dto;
try
{
dto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to deserialize OSV document {DocumentId} ({Uri})", document.Id, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (dto is null || string.IsNullOrWhiteSpace(dto.Id))
{
_logger.LogWarning("OSV document {DocumentId} produced empty payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
var sanitized = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = MongoDB.Bson.BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
"osv.v1",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var payloadJson = dto.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
OsvVulnerabilityDto? osvDto;
try
{
osvDto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(payloadJson, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize OSV DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (osvDto is null || string.IsNullOrWhiteSpace(osvDto.Id))
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
public string SourceName => OsvConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var cursorState = cursor;
var remainingCapacity = _options.MaxAdvisoriesPerFetch;
foreach (var ecosystem in _options.Ecosystems)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
try
{
var result = await FetchEcosystemAsync(
ecosystem,
cursorState,
pendingDocuments,
now,
remainingCapacity,
cancellationToken).ConfigureAwait(false);
cursorState = result.Cursor;
remainingCapacity -= result.NewDocuments;
}
catch (Exception ex)
{
_logger.LogError(ex, "OSV fetch failed for ecosystem {Ecosystem}", ecosystem);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
cursorState = cursorState
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings);
await UpdateCursorAsync(cursorState, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("OSV document {DocumentId} missing GridFS content", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] bytes;
try
{
bytes = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Unable to download OSV raw document {DocumentId}", document.Id);
throw;
}
OsvVulnerabilityDto? dto;
try
{
dto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to deserialize OSV document {DocumentId} ({Uri})", document.Id, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (dto is null || string.IsNullOrWhiteSpace(dto.Id))
{
_logger.LogWarning("OSV document {DocumentId} produced empty payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
var sanitized = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = MongoDB.Bson.BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
"osv.v1",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var payloadJson = dto.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
OsvVulnerabilityDto? osvDto;
try
{
osvDto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(payloadJson, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize OSV DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (osvDto is null || string.IsNullOrWhiteSpace(osvDto.Id))
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var ecosystem = document.Metadata is not null && document.Metadata.TryGetValue("osv.ecosystem", out var ecosystemValue)
? ecosystemValue
: "unknown";
@@ -289,232 +289,232 @@ public sealed class OsvConnector : IFeedConnector
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<OsvCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? OsvCursor.Empty : OsvCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(OsvCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private async Task<(OsvCursor Cursor, int NewDocuments)> FetchEcosystemAsync(
string ecosystem,
OsvCursor cursor,
HashSet<Guid> pendingDocuments,
DateTimeOffset now,
int remainingCapacity,
CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(OsvOptions.HttpClientName);
client.Timeout = _options.HttpTimeout;
var archiveUri = BuildArchiveUri(ecosystem);
using var request = new HttpRequestMessage(HttpMethod.Get, archiveUri);
if (cursor.TryGetArchiveMetadata(ecosystem, out var archiveMetadata))
{
if (!string.IsNullOrWhiteSpace(archiveMetadata.ETag))
{
request.Headers.TryAddWithoutValidation("If-None-Match", archiveMetadata.ETag);
}
if (archiveMetadata.LastModified.HasValue)
{
request.Headers.IfModifiedSince = archiveMetadata.LastModified.Value;
}
}
using var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotModified)
{
return (cursor, 0);
}
response.EnsureSuccessStatusCode();
await using var archiveStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read, leaveOpen: false);
var existingLastModified = cursor.GetLastModified(ecosystem);
var processedIdsSet = cursor.ProcessedIdsByEcosystem.TryGetValue(ecosystem, out var processedIds)
? new HashSet<string>(processedIds, StringComparer.OrdinalIgnoreCase)
: new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var currentMaxModified = existingLastModified ?? DateTimeOffset.MinValue;
var currentProcessedIds = new HashSet<string>(processedIdsSet, StringComparer.OrdinalIgnoreCase);
var processedUpdated = false;
var newDocuments = 0;
var minimumModified = existingLastModified.HasValue
? existingLastModified.Value - _options.ModifiedTolerance
: now - _options.InitialBackfill;
ProvenanceDiagnostics.ReportResumeWindow(SourceName, minimumModified, _logger);
foreach (var entry in archive.Entries)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
if (!entry.FullName.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
{
continue;
}
await using var entryStream = entry.Open();
using var memory = new MemoryStream();
await entryStream.CopyToAsync(memory, cancellationToken).ConfigureAwait(false);
var bytes = memory.ToArray();
OsvVulnerabilityDto? dto;
try
{
dto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse OSV entry {Entry} for ecosystem {Ecosystem}", entry.FullName, ecosystem);
continue;
}
if (dto is null || string.IsNullOrWhiteSpace(dto.Id))
{
continue;
}
var modified = (dto.Modified ?? dto.Published ?? DateTimeOffset.MinValue).ToUniversalTime();
if (modified < minimumModified)
{
continue;
}
if (existingLastModified.HasValue && modified < existingLastModified.Value - _options.ModifiedTolerance)
{
continue;
}
if (modified < currentMaxModified - _options.ModifiedTolerance)
{
continue;
}
if (modified == currentMaxModified && currentProcessedIds.Contains(dto.Id))
{
continue;
}
var documentUri = BuildDocumentUri(ecosystem, dto.Id);
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<OsvCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? OsvCursor.Empty : OsvCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(OsvCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private async Task<(OsvCursor Cursor, int NewDocuments)> FetchEcosystemAsync(
string ecosystem,
OsvCursor cursor,
HashSet<Guid> pendingDocuments,
DateTimeOffset now,
int remainingCapacity,
CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(OsvOptions.HttpClientName);
client.Timeout = _options.HttpTimeout;
var archiveUri = BuildArchiveUri(ecosystem);
using var request = new HttpRequestMessage(HttpMethod.Get, archiveUri);
if (cursor.TryGetArchiveMetadata(ecosystem, out var archiveMetadata))
{
if (!string.IsNullOrWhiteSpace(archiveMetadata.ETag))
{
request.Headers.TryAddWithoutValidation("If-None-Match", archiveMetadata.ETag);
}
if (archiveMetadata.LastModified.HasValue)
{
request.Headers.IfModifiedSince = archiveMetadata.LastModified.Value;
}
}
using var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotModified)
{
return (cursor, 0);
}
response.EnsureSuccessStatusCode();
await using var archiveStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read, leaveOpen: false);
var existingLastModified = cursor.GetLastModified(ecosystem);
var processedIdsSet = cursor.ProcessedIdsByEcosystem.TryGetValue(ecosystem, out var processedIds)
? new HashSet<string>(processedIds, StringComparer.OrdinalIgnoreCase)
: new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var currentMaxModified = existingLastModified ?? DateTimeOffset.MinValue;
var currentProcessedIds = new HashSet<string>(processedIdsSet, StringComparer.OrdinalIgnoreCase);
var processedUpdated = false;
var newDocuments = 0;
var minimumModified = existingLastModified.HasValue
? existingLastModified.Value - _options.ModifiedTolerance
: now - _options.InitialBackfill;
ProvenanceDiagnostics.ReportResumeWindow(SourceName, minimumModified, _logger);
foreach (var entry in archive.Entries)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
if (!entry.FullName.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
{
continue;
}
await using var entryStream = entry.Open();
using var memory = new MemoryStream();
await entryStream.CopyToAsync(memory, cancellationToken).ConfigureAwait(false);
var bytes = memory.ToArray();
OsvVulnerabilityDto? dto;
try
{
dto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse OSV entry {Entry} for ecosystem {Ecosystem}", entry.FullName, ecosystem);
continue;
}
if (dto is null || string.IsNullOrWhiteSpace(dto.Id))
{
continue;
}
var modified = (dto.Modified ?? dto.Published ?? DateTimeOffset.MinValue).ToUniversalTime();
if (modified < minimumModified)
{
continue;
}
if (existingLastModified.HasValue && modified < existingLastModified.Value - _options.ModifiedTolerance)
{
continue;
}
if (modified < currentMaxModified - _options.ModifiedTolerance)
{
continue;
}
if (modified == currentMaxModified && currentProcessedIds.Contains(dto.Id))
{
continue;
}
var documentUri = BuildDocumentUri(ecosystem, dto.Id);
var sha256 = _hash.ComputeHashHex(bytes, HashAlgorithms.Sha256);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, documentUri, cancellationToken).ConfigureAwait(false);
if (existing is not null && string.Equals(existing.Sha256, sha256, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var gridFsId = await _rawDocumentStorage.UploadAsync(SourceName, documentUri, bytes, "application/json", null, cancellationToken).ConfigureAwait(false);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["osv.ecosystem"] = ecosystem,
["osv.id"] = dto.Id,
["osv.modified"] = modified.ToString("O"),
};
var recordId = existing?.Id ?? Guid.NewGuid();
var record = new DocumentRecord(
recordId,
SourceName,
documentUri,
_timeProvider.GetUtcNow(),
sha256,
DocumentStatuses.PendingParse,
"application/json",
Headers: null,
Metadata: metadata,
Etag: null,
LastModified: modified,
GridFsId: gridFsId,
ExpiresAt: null);
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
pendingDocuments.Add(upserted.Id);
newDocuments++;
remainingCapacity--;
if (modified > currentMaxModified)
{
currentMaxModified = modified;
currentProcessedIds = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { dto.Id };
processedUpdated = true;
}
else if (modified == currentMaxModified)
{
currentProcessedIds.Add(dto.Id);
processedUpdated = true;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
try
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
catch (TaskCanceledException)
{
break;
}
}
}
if (processedUpdated && currentMaxModified != DateTimeOffset.MinValue)
{
cursor = cursor.WithLastModified(ecosystem, currentMaxModified, currentProcessedIds);
}
else if (processedUpdated && existingLastModified.HasValue)
{
cursor = cursor.WithLastModified(ecosystem, existingLastModified.Value, currentProcessedIds);
}
var etag = response.Headers.ETag?.Tag;
var lastModifiedHeader = response.Content.Headers.LastModified;
cursor = cursor.WithArchiveMetadata(ecosystem, etag, lastModifiedHeader);
return (cursor, newDocuments);
}
private Uri BuildArchiveUri(string ecosystem)
{
var trimmed = ecosystem.Trim('/');
var baseUri = _options.BaseUri;
var builder = new UriBuilder(baseUri);
var path = builder.Path;
if (!path.EndsWith('/'))
{
path += "/";
}
path += $"{trimmed}/{_options.ArchiveFileName}";
builder.Path = path;
return builder.Uri;
}
private static string BuildDocumentUri(string ecosystem, string vulnerabilityId)
{
var safeId = vulnerabilityId.Replace(' ', '-');
return $"https://osv-vulnerabilities.storage.googleapis.com/{ecosystem}/{safeId}.json";
}
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, documentUri, cancellationToken).ConfigureAwait(false);
if (existing is not null && string.Equals(existing.Sha256, sha256, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var gridFsId = await _rawDocumentStorage.UploadAsync(SourceName, documentUri, bytes, "application/json", null, cancellationToken).ConfigureAwait(false);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["osv.ecosystem"] = ecosystem,
["osv.id"] = dto.Id,
["osv.modified"] = modified.ToString("O"),
};
var recordId = existing?.Id ?? Guid.NewGuid();
var record = new DocumentRecord(
recordId,
SourceName,
documentUri,
_timeProvider.GetUtcNow(),
sha256,
DocumentStatuses.PendingParse,
"application/json",
Headers: null,
Metadata: metadata,
Etag: null,
LastModified: modified,
PayloadId: gridFsId,
ExpiresAt: null);
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
pendingDocuments.Add(upserted.Id);
newDocuments++;
remainingCapacity--;
if (modified > currentMaxModified)
{
currentMaxModified = modified;
currentProcessedIds = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { dto.Id };
processedUpdated = true;
}
else if (modified == currentMaxModified)
{
currentProcessedIds.Add(dto.Id);
processedUpdated = true;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
try
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
catch (TaskCanceledException)
{
break;
}
}
}
if (processedUpdated && currentMaxModified != DateTimeOffset.MinValue)
{
cursor = cursor.WithLastModified(ecosystem, currentMaxModified, currentProcessedIds);
}
else if (processedUpdated && existingLastModified.HasValue)
{
cursor = cursor.WithLastModified(ecosystem, existingLastModified.Value, currentProcessedIds);
}
var etag = response.Headers.ETag?.Tag;
var lastModifiedHeader = response.Content.Headers.LastModified;
cursor = cursor.WithArchiveMetadata(ecosystem, etag, lastModifiedHeader);
return (cursor, newDocuments);
}
private Uri BuildArchiveUri(string ecosystem)
{
var trimmed = ecosystem.Trim('/');
var baseUri = _options.BaseUri;
var builder = new UriBuilder(baseUri);
var path = builder.Path;
if (!path.EndsWith('/'))
{
path += "/";
}
path += $"{trimmed}/{_options.ArchiveFileName}";
builder.Path = path;
return builder.Uri;
}
private static string BuildDocumentUri(string ecosystem, string vulnerabilityId)
{
var safeId = vulnerabilityId.Replace(' ', '-');
return $"https://osv-vulnerabilities.storage.googleapis.com/{ecosystem}/{safeId}.json";
}
}

View File

@@ -1,439 +1,439 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Vndr.Apple.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Vndr.Apple;
public sealed class AppleConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
PropertyNameCaseInsensitive = true,
};
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly AppleOptions _options;
private readonly AppleDiagnostics _diagnostics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<AppleConnector> _logger;
public AppleConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
AppleDiagnostics diagnostics,
IOptions<AppleOptions> options,
TimeProvider? timeProvider,
ILogger<AppleConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrAppleConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var processedIds = cursor.ProcessedIds.ToHashSet(StringComparer.OrdinalIgnoreCase);
var maxPosted = cursor.LastPosted ?? DateTimeOffset.MinValue;
var baseline = cursor.LastPosted?.Add(-_options.ModifiedTolerance) ?? _timeProvider.GetUtcNow().Add(-_options.InitialBackfill);
SourceFetchContentResult indexResult;
try
{
var request = new SourceFetchRequest(AppleOptions.HttpClientName, SourceName, _options.SoftwareLookupUri!)
{
AcceptHeaders = new[] { "application/json", "application/vnd.apple.security+json;q=0.9" },
};
indexResult = await _fetchService.FetchContentAsync(request, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Apple software index fetch failed from {Uri}", _options.SoftwareLookupUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (!indexResult.IsSuccess || indexResult.Content is null)
{
if (indexResult.IsNotModified)
{
_diagnostics.FetchUnchanged();
}
await UpdateCursorAsync(cursor, cancellationToken).ConfigureAwait(false);
return;
}
var indexEntries = AppleIndexParser.Parse(indexResult.Content, _options.AdvisoryBaseUri!);
if (indexEntries.Count == 0)
{
await UpdateCursorAsync(cursor, cancellationToken).ConfigureAwait(false);
return;
}
var allowlist = _options.AdvisoryAllowlist;
var blocklist = _options.AdvisoryBlocklist;
var ordered = indexEntries
.Where(entry => ShouldInclude(entry, allowlist, blocklist))
.OrderBy(entry => entry.PostingDate)
.ThenBy(entry => entry.ArticleId, StringComparer.OrdinalIgnoreCase)
.ToArray();
foreach (var entry in ordered)
{
cancellationToken.ThrowIfCancellationRequested();
if (entry.PostingDate < baseline)
{
continue;
}
if (cursor.LastPosted.HasValue
&& entry.PostingDate <= cursor.LastPosted.Value
&& processedIds.Contains(entry.UpdateId))
{
continue;
}
var metadata = BuildMetadata(entry);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, entry.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(AppleOptions.HttpClientName, SourceName, entry.DetailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[]
{
"text/html",
"application/xhtml+xml",
"text/plain;q=0.5"
},
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Apple advisory fetch failed for {Uri}", entry.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (result.StatusCode == HttpStatusCode.NotModified)
{
_diagnostics.FetchUnchanged();
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
_diagnostics.FetchItem();
pendingDocuments.Add(result.Document.Id);
processedIds.Add(entry.UpdateId);
if (entry.PostingDate > maxPosted)
{
maxPosted = entry.PostingDate;
}
}
var updated = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastPosted(maxPosted == DateTimeOffset.MinValue ? cursor.LastPosted ?? DateTimeOffset.MinValue : maxPosted, processedIds);
await UpdateCursorAsync(updated, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_diagnostics.ParseFailure();
_logger.LogWarning("Apple document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
AppleDetailDto dto;
try
{
var content = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
var html = System.Text.Encoding.UTF8.GetString(content);
var entry = RehydrateIndexEntry(document);
dto = AppleDetailParser.Parse(html, entry);
}
catch (Exception ex)
{
_diagnostics.ParseFailure();
_logger.LogError(ex, "Apple parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var validatedAt = _timeProvider.GetUtcNow();
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "apple.security.update.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "apple.security.update.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null)
{
pendingMappings.Remove(documentId);
continue;
}
AppleDetailDto dto;
try
{
dto = JsonSerializer.Deserialize<AppleDetailDto>(dtoRecord.Payload.ToJson(), SerializerOptions)
?? throw new InvalidOperationException("Unable to deserialize Apple DTO.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Apple DTO deserialization failed for document {DocumentId}", document.Id);
pendingMappings.Remove(documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
continue;
}
var (advisory, flag) = AppleMapper.Map(dto, document, dtoRecord);
_diagnostics.MapAffectedCount(advisory.AffectedPackages.Length);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
if (flag is not null)
{
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
}
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private AppleIndexEntry RehydrateIndexEntry(DocumentRecord document)
{
var metadata = document.Metadata ?? new Dictionary<string, string>(StringComparer.Ordinal);
metadata.TryGetValue("apple.articleId", out var articleId);
metadata.TryGetValue("apple.updateId", out var updateId);
metadata.TryGetValue("apple.title", out var title);
metadata.TryGetValue("apple.postingDate", out var postingDateRaw);
metadata.TryGetValue("apple.detailUri", out var detailUriRaw);
metadata.TryGetValue("apple.rapidResponse", out var rapidRaw);
metadata.TryGetValue("apple.products", out var productsJson);
if (!DateTimeOffset.TryParse(postingDateRaw, out var postingDate))
{
postingDate = document.FetchedAt;
}
var detailUri = !string.IsNullOrWhiteSpace(detailUriRaw) && Uri.TryCreate(detailUriRaw, UriKind.Absolute, out var parsedUri)
? parsedUri
: new Uri(_options.AdvisoryBaseUri!, articleId ?? document.Uri);
var rapid = string.Equals(rapidRaw, "true", StringComparison.OrdinalIgnoreCase);
var products = DeserializeProducts(productsJson);
return new AppleIndexEntry(
UpdateId: string.IsNullOrWhiteSpace(updateId) ? articleId ?? document.Uri : updateId,
ArticleId: articleId ?? document.Uri,
Title: title ?? document.Metadata?["apple.originalTitle"] ?? "Apple Security Update",
PostingDate: postingDate.ToUniversalTime(),
DetailUri: detailUri,
Products: products,
IsRapidSecurityResponse: rapid);
}
private static IReadOnlyList<AppleIndexProduct> DeserializeProducts(string? json)
{
if (string.IsNullOrWhiteSpace(json))
{
return Array.Empty<AppleIndexProduct>();
}
try
{
var products = JsonSerializer.Deserialize<List<AppleIndexProduct>>(json, SerializerOptions);
return products is { Count: > 0 } ? products : Array.Empty<AppleIndexProduct>();
}
catch (JsonException)
{
return Array.Empty<AppleIndexProduct>();
}
}
private static Dictionary<string, string> BuildMetadata(AppleIndexEntry entry)
{
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["apple.articleId"] = entry.ArticleId,
["apple.updateId"] = entry.UpdateId,
["apple.title"] = entry.Title,
["apple.postingDate"] = entry.PostingDate.ToString("O"),
["apple.detailUri"] = entry.DetailUri.ToString(),
["apple.rapidResponse"] = entry.IsRapidSecurityResponse ? "true" : "false",
["apple.products"] = JsonSerializer.Serialize(entry.Products, SerializerOptions),
};
return metadata;
}
private static bool ShouldInclude(AppleIndexEntry entry, IReadOnlyCollection<string> allowlist, IReadOnlyCollection<string> blocklist)
{
if (allowlist.Count > 0 && !allowlist.Contains(entry.ArticleId))
{
return false;
}
if (blocklist.Count > 0 && blocklist.Contains(entry.ArticleId))
{
return false;
}
return true;
}
private async Task<AppleCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? AppleCursor.Empty : AppleCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(AppleCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBson();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Vndr.Apple.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Vndr.Apple;
public sealed class AppleConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
PropertyNameCaseInsensitive = true,
};
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly AppleOptions _options;
private readonly AppleDiagnostics _diagnostics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<AppleConnector> _logger;
public AppleConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
AppleDiagnostics diagnostics,
IOptions<AppleOptions> options,
TimeProvider? timeProvider,
ILogger<AppleConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrAppleConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var processedIds = cursor.ProcessedIds.ToHashSet(StringComparer.OrdinalIgnoreCase);
var maxPosted = cursor.LastPosted ?? DateTimeOffset.MinValue;
var baseline = cursor.LastPosted?.Add(-_options.ModifiedTolerance) ?? _timeProvider.GetUtcNow().Add(-_options.InitialBackfill);
SourceFetchContentResult indexResult;
try
{
var request = new SourceFetchRequest(AppleOptions.HttpClientName, SourceName, _options.SoftwareLookupUri!)
{
AcceptHeaders = new[] { "application/json", "application/vnd.apple.security+json;q=0.9" },
};
indexResult = await _fetchService.FetchContentAsync(request, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Apple software index fetch failed from {Uri}", _options.SoftwareLookupUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (!indexResult.IsSuccess || indexResult.Content is null)
{
if (indexResult.IsNotModified)
{
_diagnostics.FetchUnchanged();
}
await UpdateCursorAsync(cursor, cancellationToken).ConfigureAwait(false);
return;
}
var indexEntries = AppleIndexParser.Parse(indexResult.Content, _options.AdvisoryBaseUri!);
if (indexEntries.Count == 0)
{
await UpdateCursorAsync(cursor, cancellationToken).ConfigureAwait(false);
return;
}
var allowlist = _options.AdvisoryAllowlist;
var blocklist = _options.AdvisoryBlocklist;
var ordered = indexEntries
.Where(entry => ShouldInclude(entry, allowlist, blocklist))
.OrderBy(entry => entry.PostingDate)
.ThenBy(entry => entry.ArticleId, StringComparer.OrdinalIgnoreCase)
.ToArray();
foreach (var entry in ordered)
{
cancellationToken.ThrowIfCancellationRequested();
if (entry.PostingDate < baseline)
{
continue;
}
if (cursor.LastPosted.HasValue
&& entry.PostingDate <= cursor.LastPosted.Value
&& processedIds.Contains(entry.UpdateId))
{
continue;
}
var metadata = BuildMetadata(entry);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, entry.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(AppleOptions.HttpClientName, SourceName, entry.DetailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[]
{
"text/html",
"application/xhtml+xml",
"text/plain;q=0.5"
},
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Apple advisory fetch failed for {Uri}", entry.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (result.StatusCode == HttpStatusCode.NotModified)
{
_diagnostics.FetchUnchanged();
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
_diagnostics.FetchItem();
pendingDocuments.Add(result.Document.Id);
processedIds.Add(entry.UpdateId);
if (entry.PostingDate > maxPosted)
{
maxPosted = entry.PostingDate;
}
}
var updated = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastPosted(maxPosted == DateTimeOffset.MinValue ? cursor.LastPosted ?? DateTimeOffset.MinValue : maxPosted, processedIds);
await UpdateCursorAsync(updated, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_diagnostics.ParseFailure();
_logger.LogWarning("Apple document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
AppleDetailDto dto;
try
{
var content = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
var html = System.Text.Encoding.UTF8.GetString(content);
var entry = RehydrateIndexEntry(document);
dto = AppleDetailParser.Parse(html, entry);
}
catch (Exception ex)
{
_diagnostics.ParseFailure();
_logger.LogError(ex, "Apple parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var validatedAt = _timeProvider.GetUtcNow();
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "apple.security.update.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "apple.security.update.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null)
{
pendingMappings.Remove(documentId);
continue;
}
AppleDetailDto dto;
try
{
dto = JsonSerializer.Deserialize<AppleDetailDto>(dtoRecord.Payload.ToJson(), SerializerOptions)
?? throw new InvalidOperationException("Unable to deserialize Apple DTO.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Apple DTO deserialization failed for document {DocumentId}", document.Id);
pendingMappings.Remove(documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
continue;
}
var (advisory, flag) = AppleMapper.Map(dto, document, dtoRecord);
_diagnostics.MapAffectedCount(advisory.AffectedPackages.Length);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
if (flag is not null)
{
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
}
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private AppleIndexEntry RehydrateIndexEntry(DocumentRecord document)
{
var metadata = document.Metadata ?? new Dictionary<string, string>(StringComparer.Ordinal);
metadata.TryGetValue("apple.articleId", out var articleId);
metadata.TryGetValue("apple.updateId", out var updateId);
metadata.TryGetValue("apple.title", out var title);
metadata.TryGetValue("apple.postingDate", out var postingDateRaw);
metadata.TryGetValue("apple.detailUri", out var detailUriRaw);
metadata.TryGetValue("apple.rapidResponse", out var rapidRaw);
metadata.TryGetValue("apple.products", out var productsJson);
if (!DateTimeOffset.TryParse(postingDateRaw, out var postingDate))
{
postingDate = document.FetchedAt;
}
var detailUri = !string.IsNullOrWhiteSpace(detailUriRaw) && Uri.TryCreate(detailUriRaw, UriKind.Absolute, out var parsedUri)
? parsedUri
: new Uri(_options.AdvisoryBaseUri!, articleId ?? document.Uri);
var rapid = string.Equals(rapidRaw, "true", StringComparison.OrdinalIgnoreCase);
var products = DeserializeProducts(productsJson);
return new AppleIndexEntry(
UpdateId: string.IsNullOrWhiteSpace(updateId) ? articleId ?? document.Uri : updateId,
ArticleId: articleId ?? document.Uri,
Title: title ?? document.Metadata?["apple.originalTitle"] ?? "Apple Security Update",
PostingDate: postingDate.ToUniversalTime(),
DetailUri: detailUri,
Products: products,
IsRapidSecurityResponse: rapid);
}
private static IReadOnlyList<AppleIndexProduct> DeserializeProducts(string? json)
{
if (string.IsNullOrWhiteSpace(json))
{
return Array.Empty<AppleIndexProduct>();
}
try
{
var products = JsonSerializer.Deserialize<List<AppleIndexProduct>>(json, SerializerOptions);
return products is { Count: > 0 } ? products : Array.Empty<AppleIndexProduct>();
}
catch (JsonException)
{
return Array.Empty<AppleIndexProduct>();
}
}
private static Dictionary<string, string> BuildMetadata(AppleIndexEntry entry)
{
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["apple.articleId"] = entry.ArticleId,
["apple.updateId"] = entry.UpdateId,
["apple.title"] = entry.Title,
["apple.postingDate"] = entry.PostingDate.ToString("O"),
["apple.detailUri"] = entry.DetailUri.ToString(),
["apple.rapidResponse"] = entry.IsRapidSecurityResponse ? "true" : "false",
["apple.products"] = JsonSerializer.Serialize(entry.Products, SerializerOptions),
};
return metadata;
}
private static bool ShouldInclude(AppleIndexEntry entry, IReadOnlyCollection<string> allowlist, IReadOnlyCollection<string> blocklist)
{
if (allowlist.Count > 0 && !allowlist.Contains(entry.ArticleId))
{
return false;
}
if (blocklist.Count > 0 && blocklist.Contains(entry.ArticleId))
{
return false;
}
return true;
}
private async Task<AppleCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? AppleCursor.Empty : AppleCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(AppleCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBson();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -1,366 +1,366 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Json;
using StellaOps.Concelier.Connector.Vndr.Chromium.Configuration;
using StellaOps.Concelier.Connector.Vndr.Chromium.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
using Json.Schema;
namespace StellaOps.Concelier.Connector.Vndr.Chromium;
public sealed class ChromiumConnector : IFeedConnector
{
private static readonly JsonSchema Schema = ChromiumSchemaProvider.Schema;
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly ChromiumFeedLoader _feedLoader;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IJsonSchemaValidator _schemaValidator;
private readonly ChromiumOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ChromiumDiagnostics _diagnostics;
private readonly ILogger<ChromiumConnector> _logger;
public ChromiumConnector(
ChromiumFeedLoader feedLoader,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
IJsonSchemaValidator schemaValidator,
IOptions<ChromiumOptions> options,
TimeProvider? timeProvider,
ChromiumDiagnostics diagnostics,
ILogger<ChromiumConnector> logger)
{
_feedLoader = feedLoader ?? throw new ArgumentNullException(nameof(feedLoader));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrChromiumConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var (windowStart, windowEnd) = CalculateWindow(cursor, now);
ProvenanceDiagnostics.ReportResumeWindow(SourceName, windowStart, _logger);
IReadOnlyList<ChromiumFeedEntry> feedEntries;
_diagnostics.FetchAttempt();
try
{
feedEntries = await _feedLoader.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium feed load failed {Start}-{End}", windowStart, windowEnd);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var fetchCache = new Dictionary<string, ChromiumFetchCacheEntry>(cursor.FetchCache, StringComparer.Ordinal);
var touchedResources = new HashSet<string>(StringComparer.Ordinal);
var candidates = feedEntries
.Where(static entry => entry.IsSecurityUpdate())
.OrderBy(static entry => entry.Published)
.ToArray();
if (candidates.Length == 0)
{
var untouched = cursor
.WithLastPublished(cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(untouched, cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var maxPublished = cursor.LastPublished;
foreach (var entry in candidates)
{
try
{
var cacheKey = entry.DetailUri.ToString();
touchedResources.Add(cacheKey);
var metadata = ChromiumDocumentMetadata.CreateMetadata(entry.PostId, entry.Title, entry.Published, entry.Updated, entry.Summary);
var request = new SourceFetchRequest(ChromiumOptions.HttpClientName, SourceName, entry.DetailUri)
{
Metadata = metadata,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (cursor.TryGetFetchCache(cacheKey, out var cached) && string.Equals(cached.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase))
{
_diagnostics.FetchUnchanged();
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
await _documentStore.UpdateStatusAsync(result.Document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
continue;
}
_diagnostics.FetchDocument();
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium fetch failed for {Uri}", entry.DetailUri);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (touchedResources.Count > 0)
{
var keysToRemove = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in keysToRemove)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings)
.WithLastPublished(maxPublished ?? cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Chromium document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
ChromiumDto dto;
try
{
var metadata = ChromiumDocumentMetadata.FromDocument(document);
var content = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
var html = Encoding.UTF8.GetString(content);
dto = ChromiumParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium parse failed for {Uri}", document.Uri);
_diagnostics.ParseFailure();
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
using var jsonDocument = JsonDocument.Parse(json);
try
{
_schemaValidator.Validate(jsonDocument, Schema, dto.PostId);
}
catch (StellaOps.Concelier.Connector.Common.Json.JsonSchemaValidationException ex)
{
_logger.LogError(ex, "Chromium schema validation failed for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var payload = BsonDocument.Parse(json);
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var validatedAt = _timeProvider.GetUtcNow();
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "chromium.post.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "chromium.post.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
_diagnostics.ParseSuccess();
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dtoRecord.Payload.ToJson(new JsonWriterSettings { OutputMode = JsonOutputMode.RelaxedExtendedJson });
var dto = JsonSerializer.Deserialize<ChromiumDto>(json, SerializerOptions);
if (dto is null)
{
_logger.LogWarning("Chromium DTO deserialization failed for {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var recordedAt = _timeProvider.GetUtcNow();
var (advisory, flag) = ChromiumMapper.Map(dto, SourceName, recordedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
_diagnostics.MapSuccess();
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<ChromiumCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return ChromiumCursor.FromBsonDocument(record?.Cursor);
}
private async Task UpdateCursorAsync(ChromiumCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private (DateTimeOffset start, DateTimeOffset end) CalculateWindow(ChromiumCursor cursor, DateTimeOffset now)
{
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
var start = lastPublished - _options.WindowOverlap;
var backfill = now - _options.InitialBackfill;
if (start < backfill)
{
start = backfill;
}
var end = now;
if (end <= start)
{
end = start.AddHours(1);
}
return (start, end);
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Json;
using StellaOps.Concelier.Connector.Vndr.Chromium.Configuration;
using StellaOps.Concelier.Connector.Vndr.Chromium.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
using Json.Schema;
namespace StellaOps.Concelier.Connector.Vndr.Chromium;
public sealed class ChromiumConnector : IFeedConnector
{
private static readonly JsonSchema Schema = ChromiumSchemaProvider.Schema;
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly ChromiumFeedLoader _feedLoader;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IJsonSchemaValidator _schemaValidator;
private readonly ChromiumOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ChromiumDiagnostics _diagnostics;
private readonly ILogger<ChromiumConnector> _logger;
public ChromiumConnector(
ChromiumFeedLoader feedLoader,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
IJsonSchemaValidator schemaValidator,
IOptions<ChromiumOptions> options,
TimeProvider? timeProvider,
ChromiumDiagnostics diagnostics,
ILogger<ChromiumConnector> logger)
{
_feedLoader = feedLoader ?? throw new ArgumentNullException(nameof(feedLoader));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrChromiumConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var (windowStart, windowEnd) = CalculateWindow(cursor, now);
ProvenanceDiagnostics.ReportResumeWindow(SourceName, windowStart, _logger);
IReadOnlyList<ChromiumFeedEntry> feedEntries;
_diagnostics.FetchAttempt();
try
{
feedEntries = await _feedLoader.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium feed load failed {Start}-{End}", windowStart, windowEnd);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var fetchCache = new Dictionary<string, ChromiumFetchCacheEntry>(cursor.FetchCache, StringComparer.Ordinal);
var touchedResources = new HashSet<string>(StringComparer.Ordinal);
var candidates = feedEntries
.Where(static entry => entry.IsSecurityUpdate())
.OrderBy(static entry => entry.Published)
.ToArray();
if (candidates.Length == 0)
{
var untouched = cursor
.WithLastPublished(cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(untouched, cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var maxPublished = cursor.LastPublished;
foreach (var entry in candidates)
{
try
{
var cacheKey = entry.DetailUri.ToString();
touchedResources.Add(cacheKey);
var metadata = ChromiumDocumentMetadata.CreateMetadata(entry.PostId, entry.Title, entry.Published, entry.Updated, entry.Summary);
var request = new SourceFetchRequest(ChromiumOptions.HttpClientName, SourceName, entry.DetailUri)
{
Metadata = metadata,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (cursor.TryGetFetchCache(cacheKey, out var cached) && string.Equals(cached.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase))
{
_diagnostics.FetchUnchanged();
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
await _documentStore.UpdateStatusAsync(result.Document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
continue;
}
_diagnostics.FetchDocument();
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium fetch failed for {Uri}", entry.DetailUri);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (touchedResources.Count > 0)
{
var keysToRemove = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in keysToRemove)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings)
.WithLastPublished(maxPublished ?? cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("Chromium document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
ChromiumDto dto;
try
{
var metadata = ChromiumDocumentMetadata.FromDocument(document);
var content = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
var html = Encoding.UTF8.GetString(content);
dto = ChromiumParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium parse failed for {Uri}", document.Uri);
_diagnostics.ParseFailure();
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
using var jsonDocument = JsonDocument.Parse(json);
try
{
_schemaValidator.Validate(jsonDocument, Schema, dto.PostId);
}
catch (StellaOps.Concelier.Connector.Common.Json.JsonSchemaValidationException ex)
{
_logger.LogError(ex, "Chromium schema validation failed for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var payload = BsonDocument.Parse(json);
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var validatedAt = _timeProvider.GetUtcNow();
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "chromium.post.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "chromium.post.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
_diagnostics.ParseSuccess();
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dtoRecord.Payload.ToJson(new JsonWriterSettings { OutputMode = JsonOutputMode.RelaxedExtendedJson });
var dto = JsonSerializer.Deserialize<ChromiumDto>(json, SerializerOptions);
if (dto is null)
{
_logger.LogWarning("Chromium DTO deserialization failed for {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var recordedAt = _timeProvider.GetUtcNow();
var (advisory, flag) = ChromiumMapper.Map(dto, SourceName, recordedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
_diagnostics.MapSuccess();
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<ChromiumCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return ChromiumCursor.FromBsonDocument(record?.Cursor);
}
private async Task UpdateCursorAsync(ChromiumCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private (DateTimeOffset start, DateTimeOffset end) CalculateWindow(ChromiumCursor cursor, DateTimeOffset now)
{
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
var start = lastPublished - _options.WindowOverlap;
var backfill = now - _options.InitialBackfill;
if (start < backfill)
{
start = backfill;
}
var end = now;
if (end <= start)
{
end = start.AddHours(1);
}
return (start, end);
}
}

View File

@@ -163,7 +163,7 @@ public sealed class CiscoConnector : IFeedConnector
BuildMetadata(advisory),
Etag: null,
LastModified: advisory.LastUpdated ?? advisory.FirstPublished ?? now,
GridFsId: gridFsId,
PayloadId: gridFsId,
ExpiresAt: null);
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
@@ -259,7 +259,7 @@ public sealed class CiscoConnector : IFeedConnector
continue;
}
if (!document.GridFsId.HasValue)
if (!document.PayloadId.HasValue)
{
_diagnostics.ParseFailure();
_logger.LogWarning("Cisco document {DocumentId} missing GridFS payload", documentId);
@@ -273,7 +273,7 @@ public sealed class CiscoConnector : IFeedConnector
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{

View File

@@ -133,7 +133,7 @@ public sealed class MsrcConnector : IFeedConnector
}
_diagnostics.DetailFetchAttempt();
if (existing?.GridFsId is { } oldGridId)
if (existing?.PayloadId is { } oldGridId)
{
await _rawDocumentStorage.DeleteAsync(oldGridId, cancellationToken).ConfigureAwait(false);
}
@@ -238,7 +238,7 @@ public sealed class MsrcConnector : IFeedConnector
continue;
}
if (!document.GridFsId.HasValue)
if (!document.PayloadId.HasValue)
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
@@ -250,7 +250,7 @@ public sealed class MsrcConnector : IFeedConnector
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{

View File

@@ -1,366 +1,366 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Vndr.Oracle.Configuration;
using StellaOps.Concelier.Connector.Vndr.Oracle.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Vndr.Oracle;
public sealed class OracleConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly OracleCalendarFetcher _calendarFetcher;
private readonly OracleOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<OracleConnector> _logger;
public OracleConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
OracleCalendarFetcher calendarFetcher,
IOptions<OracleOptions> options,
TimeProvider? timeProvider,
ILogger<OracleConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_calendarFetcher = calendarFetcher ?? throw new ArgumentNullException(nameof(calendarFetcher));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrOracleConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
var fetchCache = new Dictionary<string, OracleFetchCacheEntry>(cursor.FetchCache, StringComparer.OrdinalIgnoreCase);
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var now = _timeProvider.GetUtcNow();
var advisoryUris = await ResolveAdvisoryUrisAsync(cancellationToken).ConfigureAwait(false);
foreach (var uri in advisoryUris)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var cacheKey = uri.AbsoluteUri;
touchedResources.Add(cacheKey);
var advisoryId = DeriveAdvisoryId(uri);
var title = advisoryId.Replace('-', ' ');
var published = now;
var metadata = OracleDocumentMetadata.CreateMetadata(advisoryId, title, published);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, uri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(OracleOptions.HttpClientName, SourceName, uri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
continue;
}
var cacheEntry = OracleFetchCacheEntry.FromDocument(result.Document);
if (existing is not null
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal)
&& cursor.TryGetFetchCache(cacheKey, out var cached)
&& cached.Matches(result.Document))
{
_logger.LogDebug("Oracle advisory {AdvisoryId} unchanged; skipping parse/map", advisoryId);
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
fetchCache[cacheKey] = cacheEntry;
continue;
}
fetchCache[cacheKey] = cacheEntry;
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Oracle fetch failed for {Uri}", uri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (fetchCache.Count > 0 && touchedResources.Count > 0)
{
var stale = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in stale)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithFetchCache(fetchCache)
.WithLastProcessed(now);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Oracle document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
OracleDto dto;
try
{
var metadata = OracleDocumentMetadata.FromDocument(document);
var content = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
var html = System.Text.Encoding.UTF8.GetString(content);
dto = OracleParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Oracle parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!OracleDtoValidator.TryNormalize(dto, out var normalized, out var validationError))
{
_logger.LogWarning("Oracle validation failed for document {DocumentId}: {Reason}", document.Id, validationError ?? "unknown");
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
dto = normalized;
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var validatedAt = _timeProvider.GetUtcNow();
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "oracle.advisory.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "oracle.advisory.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
OracleDto? dto;
try
{
var json = dtoRecord.Payload.ToJson();
dto = JsonSerializer.Deserialize<OracleDto>(json, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Oracle DTO deserialization failed for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (dto is null)
{
_logger.LogWarning("Oracle DTO payload deserialized as null for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var mappedAt = _timeProvider.GetUtcNow();
var (advisory, flag) = OracleMapper.Map(dto, document, dtoRecord, SourceName, mappedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<OracleCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return OracleCursor.FromBson(record?.Cursor);
}
private async Task UpdateCursorAsync(OracleCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyCollection<Uri>> ResolveAdvisoryUrisAsync(CancellationToken cancellationToken)
{
var uris = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var uri in _options.AdvisoryUris)
{
if (uri is not null)
{
uris.Add(uri.AbsoluteUri);
}
}
var calendarUris = await _calendarFetcher.GetAdvisoryUrisAsync(cancellationToken).ConfigureAwait(false);
foreach (var uri in calendarUris)
{
uris.Add(uri.AbsoluteUri);
}
return uris
.Select(static value => new Uri(value, UriKind.Absolute))
.OrderBy(static value => value.AbsoluteUri, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static string DeriveAdvisoryId(Uri uri)
{
var segments = uri.Segments;
if (segments.Length == 0)
{
return uri.AbsoluteUri;
}
var slug = segments[^1].Trim('/');
if (string.IsNullOrWhiteSpace(slug))
{
return uri.AbsoluteUri;
}
return slug.Replace('.', '-');
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Vndr.Oracle.Configuration;
using StellaOps.Concelier.Connector.Vndr.Oracle.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Vndr.Oracle;
public sealed class OracleConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly OracleCalendarFetcher _calendarFetcher;
private readonly OracleOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<OracleConnector> _logger;
public OracleConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
OracleCalendarFetcher calendarFetcher,
IOptions<OracleOptions> options,
TimeProvider? timeProvider,
ILogger<OracleConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_calendarFetcher = calendarFetcher ?? throw new ArgumentNullException(nameof(calendarFetcher));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrOracleConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
var fetchCache = new Dictionary<string, OracleFetchCacheEntry>(cursor.FetchCache, StringComparer.OrdinalIgnoreCase);
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var now = _timeProvider.GetUtcNow();
var advisoryUris = await ResolveAdvisoryUrisAsync(cancellationToken).ConfigureAwait(false);
foreach (var uri in advisoryUris)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var cacheKey = uri.AbsoluteUri;
touchedResources.Add(cacheKey);
var advisoryId = DeriveAdvisoryId(uri);
var title = advisoryId.Replace('-', ' ');
var published = now;
var metadata = OracleDocumentMetadata.CreateMetadata(advisoryId, title, published);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, uri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(OracleOptions.HttpClientName, SourceName, uri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
continue;
}
var cacheEntry = OracleFetchCacheEntry.FromDocument(result.Document);
if (existing is not null
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal)
&& cursor.TryGetFetchCache(cacheKey, out var cached)
&& cached.Matches(result.Document))
{
_logger.LogDebug("Oracle advisory {AdvisoryId} unchanged; skipping parse/map", advisoryId);
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
fetchCache[cacheKey] = cacheEntry;
continue;
}
fetchCache[cacheKey] = cacheEntry;
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Oracle fetch failed for {Uri}", uri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (fetchCache.Count > 0 && touchedResources.Count > 0)
{
var stale = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in stale)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithFetchCache(fetchCache)
.WithLastProcessed(now);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("Oracle document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
OracleDto dto;
try
{
var metadata = OracleDocumentMetadata.FromDocument(document);
var content = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
var html = System.Text.Encoding.UTF8.GetString(content);
dto = OracleParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Oracle parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!OracleDtoValidator.TryNormalize(dto, out var normalized, out var validationError))
{
_logger.LogWarning("Oracle validation failed for document {DocumentId}: {Reason}", document.Id, validationError ?? "unknown");
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
dto = normalized;
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var validatedAt = _timeProvider.GetUtcNow();
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "oracle.advisory.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "oracle.advisory.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
OracleDto? dto;
try
{
var json = dtoRecord.Payload.ToJson();
dto = JsonSerializer.Deserialize<OracleDto>(json, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Oracle DTO deserialization failed for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (dto is null)
{
_logger.LogWarning("Oracle DTO payload deserialized as null for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var mappedAt = _timeProvider.GetUtcNow();
var (advisory, flag) = OracleMapper.Map(dto, document, dtoRecord, SourceName, mappedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<OracleCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return OracleCursor.FromBson(record?.Cursor);
}
private async Task UpdateCursorAsync(OracleCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyCollection<Uri>> ResolveAdvisoryUrisAsync(CancellationToken cancellationToken)
{
var uris = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var uri in _options.AdvisoryUris)
{
if (uri is not null)
{
uris.Add(uri.AbsoluteUri);
}
}
var calendarUris = await _calendarFetcher.GetAdvisoryUrisAsync(cancellationToken).ConfigureAwait(false);
foreach (var uri in calendarUris)
{
uris.Add(uri.AbsoluteUri);
}
return uris
.Select(static value => new Uri(value, UriKind.Absolute))
.OrderBy(static value => value.AbsoluteUri, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static string DeriveAdvisoryId(Uri uri)
{
var segments = uri.Segments;
if (segments.Length == 0)
{
return uri.AbsoluteUri;
}
var slug = segments[^1].Trim('/');
if (string.IsNullOrWhiteSpace(slug))
{
return uri.AbsoluteUri;
}
return slug.Replace('.', '-');
}
}

View File

@@ -1,454 +1,454 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Vndr.Vmware.Configuration;
using StellaOps.Concelier.Connector.Vndr.Vmware.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Vndr.Vmware;
public sealed class VmwareConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly IHttpClientFactory _httpClientFactory;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly VmwareOptions _options;
private readonly TimeProvider _timeProvider;
private readonly VmwareDiagnostics _diagnostics;
private readonly ILogger<VmwareConnector> _logger;
public VmwareConnector(
IHttpClientFactory httpClientFactory,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IPsirtFlagStore psirtFlagStore,
IOptions<VmwareOptions> options,
TimeProvider? timeProvider,
VmwareDiagnostics diagnostics,
ILogger<VmwareConnector> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VmwareConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var fetchCache = new Dictionary<string, VmwareFetchCacheEntry>(cursor.FetchCache, StringComparer.OrdinalIgnoreCase);
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var remainingCapacity = _options.MaxAdvisoriesPerFetch;
IReadOnlyList<VmwareIndexItem> indexItems;
try
{
indexItems = await FetchIndexAsync(cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Failed to retrieve VMware advisory index");
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (indexItems.Count == 0)
{
return;
}
var orderedItems = indexItems
.Where(static item => !string.IsNullOrWhiteSpace(item.Id) && !string.IsNullOrWhiteSpace(item.DetailUrl))
.OrderBy(static item => item.Modified ?? DateTimeOffset.MinValue)
.ThenBy(static item => item.Id, StringComparer.OrdinalIgnoreCase)
.ToArray();
var baseline = cursor.LastModified ?? now - _options.InitialBackfill;
var resumeStart = baseline - _options.ModifiedTolerance;
ProvenanceDiagnostics.ReportResumeWindow(SourceName, resumeStart, _logger);
var processedIds = new HashSet<string>(cursor.ProcessedIds, StringComparer.OrdinalIgnoreCase);
var maxModified = cursor.LastModified ?? DateTimeOffset.MinValue;
var processedUpdated = false;
foreach (var item in orderedItems)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
var modified = (item.Modified ?? DateTimeOffset.MinValue).ToUniversalTime();
if (modified < baseline - _options.ModifiedTolerance)
{
continue;
}
if (cursor.LastModified.HasValue && modified < cursor.LastModified.Value - _options.ModifiedTolerance)
{
continue;
}
if (modified == cursor.LastModified && cursor.ProcessedIds.Contains(item.Id, StringComparer.OrdinalIgnoreCase))
{
continue;
}
if (!Uri.TryCreate(item.DetailUrl, UriKind.Absolute, out var detailUri))
{
_logger.LogWarning("VMware advisory {AdvisoryId} has invalid detail URL {Url}", item.Id, item.DetailUrl);
continue;
}
var cacheKey = detailUri.AbsoluteUri;
touchedResources.Add(cacheKey);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, cacheKey, cancellationToken).ConfigureAwait(false);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["vmware.id"] = item.Id,
["vmware.modified"] = modified.ToString("O"),
};
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(VmwareOptions.HttpClientName, SourceName, detailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "application/json" },
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Failed to fetch VMware advisory {AdvisoryId}", item.Id);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (result.IsNotModified)
{
_diagnostics.FetchUnchanged();
if (existing is not null)
{
fetchCache[cacheKey] = VmwareFetchCacheEntry.FromDocument(existing);
pendingDocuments.Remove(existing.Id);
pendingMappings.Remove(existing.Id);
_logger.LogInformation("VMware advisory {AdvisoryId} returned 304 Not Modified", item.Id);
}
continue;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.FetchFailure();
continue;
}
remainingCapacity--;
if (modified > maxModified)
{
maxModified = modified;
processedIds.Clear();
processedUpdated = true;
}
if (modified == maxModified)
{
processedIds.Add(item.Id);
processedUpdated = true;
}
var cacheEntry = VmwareFetchCacheEntry.FromDocument(result.Document);
if (existing is not null
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal)
&& cursor.TryGetFetchCache(cacheKey, out var cachedEntry)
&& cachedEntry.Matches(result.Document))
{
_diagnostics.FetchUnchanged();
fetchCache[cacheKey] = cacheEntry;
pendingDocuments.Remove(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("VMware advisory {AdvisoryId} unchanged; skipping reprocessing", item.Id);
continue;
}
_diagnostics.FetchItem();
fetchCache[cacheKey] = cacheEntry;
pendingDocuments.Add(result.Document.Id);
_logger.LogInformation(
"VMware advisory {AdvisoryId} fetched (documentId={DocumentId}, sha256={Sha})",
item.Id,
result.Document.Id,
result.Document.Sha256);
if (_options.RequestDelay > TimeSpan.Zero)
{
try
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
catch (TaskCanceledException)
{
break;
}
}
}
if (fetchCache.Count > 0 && touchedResources.Count > 0)
{
var stale = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in stale)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithFetchCache(fetchCache);
if (processedUpdated)
{
updatedCursor = updatedCursor.WithLastModified(maxModified, processedIds);
}
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remaining = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remaining.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("VMware document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
_diagnostics.ParseFailure();
continue;
}
byte[] bytes;
try
{
bytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed downloading VMware document {DocumentId}", document.Id);
throw;
}
VmwareDetailDto? detail;
try
{
detail = JsonSerializer.Deserialize<VmwareDetailDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to deserialize VMware advisory {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
_diagnostics.ParseFailure();
continue;
}
if (detail is null || string.IsNullOrWhiteSpace(detail.AdvisoryId))
{
_logger.LogWarning("VMware advisory document {DocumentId} contained empty payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
_diagnostics.ParseFailure();
continue;
}
var sanitized = JsonSerializer.Serialize(detail, SerializerOptions);
var payload = MongoDB.Bson.BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "vmware.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remaining)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dto.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
VmwareDetailDto? detail;
try
{
detail = JsonSerializer.Deserialize<VmwareDetailDto>(json, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize VMware DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (detail is null || string.IsNullOrWhiteSpace(detail.AdvisoryId))
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var (advisory, flag) = VmwareMapper.Map(detail, document, dto);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
_diagnostics.MapAffectedCount(advisory.AffectedPackages.Length);
_logger.LogInformation(
"VMware advisory {AdvisoryId} mapped with {AffectedCount} affected packages",
detail.AdvisoryId,
advisory.AffectedPackages.Length);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyList<VmwareIndexItem>> FetchIndexAsync(CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(VmwareOptions.HttpClientName);
using var response = await client.GetAsync(_options.IndexUri, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
var items = await JsonSerializer.DeserializeAsync<IReadOnlyList<VmwareIndexItem>>(stream, SerializerOptions, cancellationToken).ConfigureAwait(false);
return items ?? Array.Empty<VmwareIndexItem>();
}
private async Task<VmwareCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? VmwareCursor.Empty : VmwareCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(VmwareCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Vndr.Vmware.Configuration;
using StellaOps.Concelier.Connector.Vndr.Vmware.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Vndr.Vmware;
public sealed class VmwareConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly IHttpClientFactory _httpClientFactory;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly VmwareOptions _options;
private readonly TimeProvider _timeProvider;
private readonly VmwareDiagnostics _diagnostics;
private readonly ILogger<VmwareConnector> _logger;
public VmwareConnector(
IHttpClientFactory httpClientFactory,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IPsirtFlagStore psirtFlagStore,
IOptions<VmwareOptions> options,
TimeProvider? timeProvider,
VmwareDiagnostics diagnostics,
ILogger<VmwareConnector> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VmwareConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var fetchCache = new Dictionary<string, VmwareFetchCacheEntry>(cursor.FetchCache, StringComparer.OrdinalIgnoreCase);
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var remainingCapacity = _options.MaxAdvisoriesPerFetch;
IReadOnlyList<VmwareIndexItem> indexItems;
try
{
indexItems = await FetchIndexAsync(cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Failed to retrieve VMware advisory index");
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (indexItems.Count == 0)
{
return;
}
var orderedItems = indexItems
.Where(static item => !string.IsNullOrWhiteSpace(item.Id) && !string.IsNullOrWhiteSpace(item.DetailUrl))
.OrderBy(static item => item.Modified ?? DateTimeOffset.MinValue)
.ThenBy(static item => item.Id, StringComparer.OrdinalIgnoreCase)
.ToArray();
var baseline = cursor.LastModified ?? now - _options.InitialBackfill;
var resumeStart = baseline - _options.ModifiedTolerance;
ProvenanceDiagnostics.ReportResumeWindow(SourceName, resumeStart, _logger);
var processedIds = new HashSet<string>(cursor.ProcessedIds, StringComparer.OrdinalIgnoreCase);
var maxModified = cursor.LastModified ?? DateTimeOffset.MinValue;
var processedUpdated = false;
foreach (var item in orderedItems)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
var modified = (item.Modified ?? DateTimeOffset.MinValue).ToUniversalTime();
if (modified < baseline - _options.ModifiedTolerance)
{
continue;
}
if (cursor.LastModified.HasValue && modified < cursor.LastModified.Value - _options.ModifiedTolerance)
{
continue;
}
if (modified == cursor.LastModified && cursor.ProcessedIds.Contains(item.Id, StringComparer.OrdinalIgnoreCase))
{
continue;
}
if (!Uri.TryCreate(item.DetailUrl, UriKind.Absolute, out var detailUri))
{
_logger.LogWarning("VMware advisory {AdvisoryId} has invalid detail URL {Url}", item.Id, item.DetailUrl);
continue;
}
var cacheKey = detailUri.AbsoluteUri;
touchedResources.Add(cacheKey);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, cacheKey, cancellationToken).ConfigureAwait(false);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["vmware.id"] = item.Id,
["vmware.modified"] = modified.ToString("O"),
};
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(VmwareOptions.HttpClientName, SourceName, detailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "application/json" },
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "Failed to fetch VMware advisory {AdvisoryId}", item.Id);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (result.IsNotModified)
{
_diagnostics.FetchUnchanged();
if (existing is not null)
{
fetchCache[cacheKey] = VmwareFetchCacheEntry.FromDocument(existing);
pendingDocuments.Remove(existing.Id);
pendingMappings.Remove(existing.Id);
_logger.LogInformation("VMware advisory {AdvisoryId} returned 304 Not Modified", item.Id);
}
continue;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.FetchFailure();
continue;
}
remainingCapacity--;
if (modified > maxModified)
{
maxModified = modified;
processedIds.Clear();
processedUpdated = true;
}
if (modified == maxModified)
{
processedIds.Add(item.Id);
processedUpdated = true;
}
var cacheEntry = VmwareFetchCacheEntry.FromDocument(result.Document);
if (existing is not null
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal)
&& cursor.TryGetFetchCache(cacheKey, out var cachedEntry)
&& cachedEntry.Matches(result.Document))
{
_diagnostics.FetchUnchanged();
fetchCache[cacheKey] = cacheEntry;
pendingDocuments.Remove(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("VMware advisory {AdvisoryId} unchanged; skipping reprocessing", item.Id);
continue;
}
_diagnostics.FetchItem();
fetchCache[cacheKey] = cacheEntry;
pendingDocuments.Add(result.Document.Id);
_logger.LogInformation(
"VMware advisory {AdvisoryId} fetched (documentId={DocumentId}, sha256={Sha})",
item.Id,
result.Document.Id,
result.Document.Sha256);
if (_options.RequestDelay > TimeSpan.Zero)
{
try
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
catch (TaskCanceledException)
{
break;
}
}
}
if (fetchCache.Count > 0 && touchedResources.Count > 0)
{
var stale = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in stale)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithFetchCache(fetchCache);
if (processedUpdated)
{
updatedCursor = updatedCursor.WithLastModified(maxModified, processedIds);
}
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remaining = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remaining.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("VMware document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
_diagnostics.ParseFailure();
continue;
}
byte[] bytes;
try
{
bytes = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed downloading VMware document {DocumentId}", document.Id);
throw;
}
VmwareDetailDto? detail;
try
{
detail = JsonSerializer.Deserialize<VmwareDetailDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to deserialize VMware advisory {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
_diagnostics.ParseFailure();
continue;
}
if (detail is null || string.IsNullOrWhiteSpace(detail.AdvisoryId))
{
_logger.LogWarning("VMware advisory document {DocumentId} contained empty payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
_diagnostics.ParseFailure();
continue;
}
var sanitized = JsonSerializer.Serialize(detail, SerializerOptions);
var payload = MongoDB.Bson.BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "vmware.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remaining)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dto.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
VmwareDetailDto? detail;
try
{
detail = JsonSerializer.Deserialize<VmwareDetailDto>(json, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize VMware DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (detail is null || string.IsNullOrWhiteSpace(detail.AdvisoryId))
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var (advisory, flag) = VmwareMapper.Map(detail, document, dto);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
_diagnostics.MapAffectedCount(advisory.AffectedPackages.Length);
_logger.LogInformation(
"VMware advisory {AdvisoryId} mapped with {AffectedCount} affected packages",
detail.AdvisoryId,
advisory.AffectedPackages.Length);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyList<VmwareIndexItem>> FetchIndexAsync(CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(VmwareOptions.HttpClientName);
using var response = await client.GetAsync(_options.IndexUri, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
var items = await JsonSerializer.DeserializeAsync<IReadOnlyList<VmwareIndexItem>>(stream, SerializerOptions, cancellationToken).ConfigureAwait(false);
return items ?? Array.Empty<VmwareIndexItem>();
}
private async Task<VmwareCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? VmwareCursor.Empty : VmwareCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(VmwareCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -10,6 +10,7 @@
<ItemGroup>
<ProjectReference Include="..\StellaOps.Concelier.Models\StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Normalization\StellaOps.Concelier.Normalization.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Core\StellaOps.Concelier.Core.csproj" />
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../../../__Libraries/StellaOps.DependencyInjection/StellaOps.DependencyInjection.csproj" />
<ProjectReference Include="../../../__Libraries/StellaOps.Cryptography/StellaOps.Cryptography.csproj" />
@@ -20,4 +21,4 @@
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="10.0.0" />
</ItemGroup>
</Project>
</Project>

View File

@@ -10,6 +10,7 @@
<ItemGroup>
<ProjectReference Include="..\StellaOps.Concelier.Exporter.Json\StellaOps.Concelier.Exporter.Json.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Models\StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Core\StellaOps.Concelier.Core.csproj" />
<ProjectReference Include="../../../__Libraries/StellaOps.DependencyInjection/StellaOps.DependencyInjection.csproj" />
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
</ItemGroup>
@@ -18,4 +19,4 @@
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="10.0.0" />
</ItemGroup>
</Project>
</Project>

View File

@@ -24,6 +24,8 @@ namespace MongoDB.Bson
{
protected readonly object? _value;
public BsonValue(object? value) => _value = value;
internal object? RawValue => _value;
public static BsonValue Create(object? value) => BsonDocument.WrapExternal(value);
public virtual BsonType BsonType => _value switch
{
null => BsonType.Null,
@@ -59,12 +61,24 @@ namespace MongoDB.Bson
public class BsonInt64 : BsonValue { public BsonInt64(long value) : base(value) { } }
public class BsonDouble : BsonValue { public BsonDouble(double value) : base(value) { } }
public class BsonDateTime : BsonValue { public BsonDateTime(DateTime value) : base(value) { } }
public class BsonNull : BsonValue
{
private BsonNull() : base(null) { }
public static BsonNull Value { get; } = new();
}
public class BsonArray : BsonValue, IEnumerable<BsonValue>
{
private readonly List<BsonValue> _items = new();
public BsonArray() : base(null) { }
public BsonArray(IEnumerable<BsonValue> values) : this() => _items.AddRange(values);
public BsonArray(IEnumerable<object?> values) : this()
{
foreach (var value in values)
{
_items.Add(BsonDocument.WrapExternal(value));
}
}
public void Add(BsonValue value) => _items.Add(value);
public IEnumerator<BsonValue> GetEnumerator() => _items.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
@@ -93,6 +107,8 @@ namespace MongoDB.Bson
_ => new BsonValue(value)
};
internal static BsonValue WrapExternal(object? value) => Wrap(value);
public BsonValue this[string key]
{
get => _values[key];
@@ -104,6 +120,7 @@ namespace MongoDB.Bson
public bool TryGetValue(string key, out BsonValue value) => _values.TryGetValue(key, out value!);
public void Add(string key, BsonValue value) => _values[key] = value;
public void Add(string key, object? value) => _values[key] = Wrap(value);
public IEnumerator<KeyValuePair<string, BsonValue>> GetEnumerator() => _values.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
@@ -156,7 +173,7 @@ namespace MongoDB.Bson
{
BsonDocument doc => doc._values.ToDictionary(kvp => kvp.Key, kvp => Unwrap(kvp.Value)),
BsonArray array => array.Select(Unwrap).ToArray(),
_ => value._value
_ => value.RawValue
};
}
}

View File

@@ -1,4 +1,5 @@
using System.Collections.Concurrent;
using System.IO;
using StellaOps.Concelier.Models;
namespace StellaOps.Concelier.Storage.Mongo
@@ -33,8 +34,9 @@ namespace StellaOps.Concelier.Storage.Mongo
IReadOnlyDictionary<string, string>? Metadata = null,
string? Etag = null,
DateTimeOffset? LastModified = null,
MongoDB.Bson.ObjectId? GridFsId = null,
DateTimeOffset? ExpiresAt = null);
Guid? PayloadId = null,
DateTimeOffset? ExpiresAt = null,
byte[]? Payload = null);
public interface IDocumentStore
{
@@ -85,7 +87,7 @@ namespace StellaOps.Concelier.Storage.Mongo
Guid DocumentId,
string SourceName,
string Format,
MongoDB.Bson.BsonDocument Payload,
string Payload,
DateTimeOffset CreatedAt);
public interface IDtoStore
@@ -113,40 +115,40 @@ namespace StellaOps.Concelier.Storage.Mongo
public sealed class RawDocumentStorage
{
private readonly ConcurrentDictionary<MongoDB.Bson.ObjectId, byte[]> _blobs = new();
private readonly ConcurrentDictionary<Guid, byte[]> _blobs = new();
public Task<MongoDB.Bson.ObjectId> UploadAsync(string sourceName, string uri, byte[] content, string? contentType, DateTimeOffset? expiresAt, CancellationToken cancellationToken)
public Task<Guid> UploadAsync(string sourceName, string uri, byte[] content, string? contentType, DateTimeOffset? expiresAt, CancellationToken cancellationToken)
{
var id = MongoDB.Bson.ObjectId.GenerateNewId();
var id = Guid.NewGuid();
_blobs[id] = content.ToArray();
return Task.FromResult(id);
}
public Task<MongoDB.Bson.ObjectId> UploadAsync(string sourceName, string uri, byte[] content, string? contentType, CancellationToken cancellationToken)
public Task<Guid> UploadAsync(string sourceName, string uri, byte[] content, string? contentType, CancellationToken cancellationToken)
=> UploadAsync(sourceName, uri, content, contentType, null, cancellationToken);
public Task<byte[]> DownloadAsync(MongoDB.Bson.ObjectId id, CancellationToken cancellationToken)
public Task<byte[]> DownloadAsync(Guid id, CancellationToken cancellationToken)
{
if (_blobs.TryGetValue(id, out var bytes))
{
return Task.FromResult(bytes);
}
throw new MongoDB.Driver.GridFSFileNotFoundException($"Blob {id} not found.");
throw new FileNotFoundException($"Blob {id} not found.");
}
public Task DeleteAsync(MongoDB.Bson.ObjectId id, CancellationToken cancellationToken)
public Task DeleteAsync(Guid id, CancellationToken cancellationToken)
{
_blobs.TryRemove(id, out _);
return Task.CompletedTask;
}
}
public sealed record SourceStateRecord(string SourceName, MongoDB.Bson.BsonDocument? Cursor, DateTimeOffset UpdatedAt);
public sealed record SourceStateRecord(string SourceName, string? CursorJson, DateTimeOffset UpdatedAt);
public interface ISourceStateRepository
{
Task<SourceStateRecord?> TryGetAsync(string sourceName, CancellationToken cancellationToken);
Task UpdateCursorAsync(string sourceName, MongoDB.Bson.BsonDocument cursor, DateTimeOffset completedAt, CancellationToken cancellationToken);
Task UpdateCursorAsync(string sourceName, string cursorJson, DateTimeOffset completedAt, CancellationToken cancellationToken);
Task MarkFailureAsync(string sourceName, DateTimeOffset now, TimeSpan backoff, string reason, CancellationToken cancellationToken);
}
@@ -160,9 +162,9 @@ namespace StellaOps.Concelier.Storage.Mongo
return Task.FromResult<SourceStateRecord?>(record);
}
public Task UpdateCursorAsync(string sourceName, MongoDB.Bson.BsonDocument cursor, DateTimeOffset completedAt, CancellationToken cancellationToken)
public Task UpdateCursorAsync(string sourceName, string cursorJson, DateTimeOffset completedAt, CancellationToken cancellationToken)
{
_states[sourceName] = new SourceStateRecord(sourceName, cursor.DeepClone(), completedAt);
_states[sourceName] = new SourceStateRecord(sourceName, cursorJson, completedAt);
return Task.CompletedTask;
}
@@ -174,6 +176,53 @@ namespace StellaOps.Concelier.Storage.Mongo
}
}
namespace StellaOps.Concelier.Storage.Mongo.Advisories
{
public interface IAdvisoryStore
{
Task UpsertAsync(Advisory advisory, CancellationToken cancellationToken);
Task<Advisory?> FindAsync(string advisoryKey, CancellationToken cancellationToken);
Task<IReadOnlyList<Advisory>> GetRecentAsync(int limit, CancellationToken cancellationToken);
IAsyncEnumerable<Advisory> StreamAsync(CancellationToken cancellationToken);
}
public sealed class InMemoryAdvisoryStore : IAdvisoryStore
{
private readonly ConcurrentDictionary<string, Advisory> _advisories = new(StringComparer.OrdinalIgnoreCase);
public Task UpsertAsync(Advisory advisory, CancellationToken cancellationToken)
{
_advisories[advisory.AdvisoryKey] = advisory;
return Task.CompletedTask;
}
public Task<Advisory?> FindAsync(string advisoryKey, CancellationToken cancellationToken)
{
_advisories.TryGetValue(advisoryKey, out var advisory);
return Task.FromResult<Advisory?>(advisory);
}
public Task<IReadOnlyList<Advisory>> GetRecentAsync(int limit, CancellationToken cancellationToken)
{
var result = _advisories.Values
.OrderByDescending(a => a.Modified ?? a.Published ?? DateTimeOffset.MinValue)
.Take(limit)
.ToArray();
return Task.FromResult<IReadOnlyList<Advisory>>(result);
}
public async IAsyncEnumerable<Advisory> StreamAsync([System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
foreach (var advisory in _advisories.Values.OrderBy(a => a.AdvisoryKey, StringComparer.OrdinalIgnoreCase))
{
cancellationToken.ThrowIfCancellationRequested();
yield return advisory;
await Task.Yield();
}
}
}
}
namespace StellaOps.Concelier.Storage.Mongo.Aliases
{
public sealed record AliasRecord(string AdvisoryKey, string Scheme, string Value);
@@ -192,13 +241,13 @@ namespace StellaOps.Concelier.Storage.Mongo.Aliases
public Task<IReadOnlyList<AliasRecord>> GetByAdvisoryAsync(string advisoryKey, CancellationToken cancellationToken)
{
_byAdvisory.TryGetValue(advisoryKey, out var records);
return Task.FromResult<IReadOnlyList<AliasRecord>>(records ?? Array.Empty<AliasRecord>());
return Task.FromResult<IReadOnlyList<AliasRecord>>(records ?? (IReadOnlyList<AliasRecord>)Array.Empty<AliasRecord>());
}
public Task<IReadOnlyList<AliasRecord>> GetByAliasAsync(string scheme, string value, CancellationToken cancellationToken)
{
_byAlias.TryGetValue((scheme, value), out var records);
return Task.FromResult<IReadOnlyList<AliasRecord>>(records ?? Array.Empty<AliasRecord>());
return Task.FromResult<IReadOnlyList<AliasRecord>>(records ?? (IReadOnlyList<AliasRecord>)Array.Empty<AliasRecord>());
}
}
}
@@ -286,10 +335,10 @@ namespace StellaOps.Concelier.Storage.Mongo.Exporting
id,
cursor ?? digest,
digest,
lastDeltaDigest: null,
baseExportId: resetBaseline ? exportId : null,
baseDigest: resetBaseline ? digest : null,
targetRepository,
LastDeltaDigest: null,
BaseExportId: resetBaseline ? exportId : null,
BaseDigest: resetBaseline ? digest : null,
TargetRepository: targetRepository,
manifest,
exporterVersion,
_timeProvider.GetUtcNow());
@@ -307,11 +356,11 @@ namespace StellaOps.Concelier.Storage.Mongo.Exporting
var record = new ExportStateRecord(
id,
cursor ?? deltaDigest,
lastFullDigest: null,
lastDeltaDigest: deltaDigest,
baseExportId: null,
baseDigest: null,
targetRepository: null,
LastFullDigest: null,
LastDeltaDigest: deltaDigest,
BaseExportId: null,
BaseDigest: null,
TargetRepository: null,
manifest,
exporterVersion,
_timeProvider.GetUtcNow());

View File

@@ -0,0 +1,88 @@
using System.Text.Json;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Postgres.Models;
using StellaOps.Concelier.Storage.Postgres.Repositories;
namespace StellaOps.Concelier.Storage.Postgres;
/// <summary>
/// Postgres-backed implementation that satisfies the legacy IDocumentStore contract.
/// </summary>
public sealed class PostgresDocumentStore : IDocumentStore
{
private readonly IDocumentRepository _repository;
private readonly ISourceRepository _sourceRepository;
private readonly JsonSerializerOptions _json = new(JsonSerializerDefaults.Web);
public PostgresDocumentStore(IDocumentRepository repository, ISourceRepository sourceRepository)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_sourceRepository = sourceRepository ?? throw new ArgumentNullException(nameof(sourceRepository));
}
public async Task<DocumentRecord?> FindAsync(Guid id, CancellationToken cancellationToken, MongoDB.Driver.IClientSessionHandle? session = null)
{
var row = await _repository.FindAsync(id, cancellationToken).ConfigureAwait(false);
return row is null ? null : Map(row);
}
public async Task<DocumentRecord?> FindBySourceAndUriAsync(string sourceName, string uri, CancellationToken cancellationToken, MongoDB.Driver.IClientSessionHandle? session = null)
{
var row = await _repository.FindBySourceAndUriAsync(sourceName, uri, cancellationToken).ConfigureAwait(false);
return row is null ? null : Map(row);
}
public async Task<DocumentRecord> UpsertAsync(DocumentRecord record, CancellationToken cancellationToken, MongoDB.Driver.IClientSessionHandle? session = null)
{
// Ensure source exists
var source = await _sourceRepository.GetByNameAsync(record.SourceName, cancellationToken).ConfigureAwait(false)
?? throw new InvalidOperationException($"Source '{record.SourceName}' not provisioned.");
var entity = new DocumentRecordEntity(
Id: record.Id == Guid.Empty ? Guid.NewGuid() : record.Id,
SourceId: source.Id,
SourceName: record.SourceName,
Uri: record.Uri,
Sha256: record.Sha256,
Status: record.Status,
ContentType: record.ContentType,
HeadersJson: record.Headers is null ? null : JsonSerializer.Serialize(record.Headers, _json),
MetadataJson: record.Metadata is null ? null : JsonSerializer.Serialize(record.Metadata, _json),
Etag: record.Etag,
LastModified: record.LastModified,
Payload: Array.Empty<byte>(), // payload handled via RawDocumentStorage; keep pointer zero-length here
CreatedAt: record.CreatedAt,
UpdatedAt: DateTimeOffset.UtcNow,
ExpiresAt: record.ExpiresAt);
var saved = await _repository.UpsertAsync(entity, cancellationToken).ConfigureAwait(false);
return Map(saved);
}
public async Task UpdateStatusAsync(Guid id, string status, CancellationToken cancellationToken, MongoDB.Driver.IClientSessionHandle? session = null)
{
await _repository.UpdateStatusAsync(id, status, cancellationToken).ConfigureAwait(false);
}
private DocumentRecord Map(DocumentRecordEntity row)
{
return new DocumentRecord(
row.Id,
row.SourceName,
row.Uri,
row.CreatedAt,
row.Sha256,
row.Status,
row.ContentType,
row.HeadersJson is null
? null
: JsonSerializer.Deserialize<Dictionary<string, string>>(row.HeadersJson, _json),
row.MetadataJson is null
? null
: JsonSerializer.Deserialize<Dictionary<string, string>>(row.MetadataJson, _json),
row.Etag,
row.LastModified,
PayloadId: null,
ExpiresAt: row.ExpiresAt);
}
}

View File

@@ -0,0 +1,23 @@
-- Concelier Postgres Migration 004: Source documents and payload storage (Mongo replacement)
CREATE TABLE IF NOT EXISTS concelier.source_documents (
id UUID NOT NULL,
source_id UUID NOT NULL,
source_name TEXT NOT NULL,
uri TEXT NOT NULL,
sha256 TEXT NOT NULL,
status TEXT NOT NULL,
content_type TEXT,
headers_json JSONB,
metadata_json JSONB,
etag TEXT,
last_modified TIMESTAMPTZ,
payload BYTEA NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ,
CONSTRAINT pk_source_documents PRIMARY KEY (source_name, uri)
);
CREATE INDEX IF NOT EXISTS idx_source_documents_source_id ON concelier.source_documents(source_id);
CREATE INDEX IF NOT EXISTS idx_source_documents_status ON concelier.source_documents(status);

View File

@@ -0,0 +1,18 @@
namespace StellaOps.Concelier.Storage.Postgres.Models;
public sealed record DocumentRecordEntity(
Guid Id,
Guid SourceId,
string SourceName,
string Uri,
string Sha256,
string Status,
string? ContentType,
string? HeadersJson,
string? MetadataJson,
string? Etag,
DateTimeOffset? LastModified,
byte[] Payload,
DateTimeOffset CreatedAt,
DateTimeOffset UpdatedAt,
DateTimeOffset? ExpiresAt);

View File

@@ -0,0 +1,125 @@
using System.Text.Json;
using Dapper;
using StellaOps.Concelier.Storage.Postgres.Models;
using StellaOps.Infrastructure.Postgres;
using StellaOps.Infrastructure.Postgres.Connections;
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
public interface IDocumentRepository
{
Task<DocumentRecordEntity?> FindAsync(Guid id, CancellationToken cancellationToken);
Task<DocumentRecordEntity?> FindBySourceAndUriAsync(string sourceName, string uri, CancellationToken cancellationToken);
Task<DocumentRecordEntity> UpsertAsync(DocumentRecordEntity record, CancellationToken cancellationToken);
Task UpdateStatusAsync(Guid id, string status, CancellationToken cancellationToken);
}
public sealed class DocumentRepository : RepositoryBase<ConcelierDataSource>, IDocumentRepository
{
private readonly JsonSerializerOptions _json = new(JsonSerializerDefaults.Web);
public DocumentRepository(ConcelierDataSource dataSource, ILogger<DocumentRepository> logger)
: base(dataSource, logger)
{
}
public async Task<DocumentRecordEntity?> FindAsync(Guid id, CancellationToken cancellationToken)
{
const string sql = """
SELECT * FROM concelier.source_documents
WHERE id = @Id
LIMIT 1;
""";
await using var conn = await DataSource.OpenSystemConnectionAsync(cancellationToken);
var row = await conn.QuerySingleOrDefaultAsync(sql, new { Id = id });
return row is null ? null : Map(row);
}
public async Task<DocumentRecordEntity?> FindBySourceAndUriAsync(string sourceName, string uri, CancellationToken cancellationToken)
{
const string sql = """
SELECT * FROM concelier.source_documents
WHERE source_name = @SourceName AND uri = @Uri
LIMIT 1;
""";
await using var conn = await DataSource.OpenSystemConnectionAsync(cancellationToken);
var row = await conn.QuerySingleOrDefaultAsync(sql, new { SourceName = sourceName, Uri = uri });
return row is null ? null : Map(row);
}
public async Task<DocumentRecordEntity> UpsertAsync(DocumentRecordEntity record, CancellationToken cancellationToken)
{
const string sql = """
INSERT INTO concelier.source_documents (
id, source_id, source_name, uri, sha256, status, content_type,
headers_json, metadata_json, etag, last_modified, payload, created_at, updated_at, expires_at)
VALUES (
@Id, @SourceId, @SourceName, @Uri, @Sha256, @Status, @ContentType,
@HeadersJson, @MetadataJson, @Etag, @LastModified, @Payload, @CreatedAt, @UpdatedAt, @ExpiresAt)
ON CONFLICT (source_name, uri) DO UPDATE SET
sha256 = EXCLUDED.sha256,
status = EXCLUDED.status,
content_type = EXCLUDED.content_type,
headers_json = EXCLUDED.headers_json,
metadata_json = EXCLUDED.metadata_json,
etag = EXCLUDED.etag,
last_modified = EXCLUDED.last_modified,
payload = EXCLUDED.payload,
updated_at = EXCLUDED.updated_at,
expires_at = EXCLUDED.expires_at
RETURNING *;
""";
await using var conn = await DataSource.OpenSystemConnectionAsync(cancellationToken);
var row = await conn.QuerySingleAsync(sql, new
{
record.Id,
record.SourceId,
record.SourceName,
record.Uri,
record.Sha256,
record.Status,
record.ContentType,
record.HeadersJson,
record.MetadataJson,
record.Etag,
record.LastModified,
record.Payload,
record.CreatedAt,
record.UpdatedAt,
record.ExpiresAt
});
return Map(row);
}
public async Task UpdateStatusAsync(Guid id, string status, CancellationToken cancellationToken)
{
const string sql = """
UPDATE concelier.source_documents
SET status = @Status, updated_at = NOW()
WHERE id = @Id;
""";
await using var conn = await DataSource.OpenSystemConnectionAsync(cancellationToken);
await conn.ExecuteAsync(sql, new { Id = id, Status = status });
}
private DocumentRecordEntity Map(dynamic row)
{
return new DocumentRecordEntity(
row.id,
row.source_id,
row.source_name,
row.uri,
row.sha256,
row.status,
(string?)row.content_type,
(string?)row.headers_json,
(string?)row.metadata_json,
(string?)row.etag,
(DateTimeOffset?)row.last_modified,
(byte[])row.payload,
DateTime.SpecifyKind(row.created_at, DateTimeKind.Utc),
DateTime.SpecifyKind(row.updated_at, DateTimeKind.Utc),
row.expires_at is null ? null : DateTime.SpecifyKind(row.expires_at, DateTimeKind.Utc));
}
}

View File

@@ -4,6 +4,7 @@ using StellaOps.Concelier.Storage.Postgres.Repositories;
using StellaOps.Infrastructure.Postgres;
using StellaOps.Infrastructure.Postgres.Options;
using StellaOps.Concelier.Core.Linksets;
using StellaOps.Concelier.Storage.Mongo;
namespace StellaOps.Concelier.Storage.Postgres;
@@ -38,11 +39,13 @@ public static class ServiceCollectionExtensions
services.AddScoped<IAdvisoryWeaknessRepository, AdvisoryWeaknessRepository>();
services.AddScoped<IKevFlagRepository, KevFlagRepository>();
services.AddScoped<ISourceStateRepository, SourceStateRepository>();
services.AddScoped<IDocumentRepository, DocumentRepository>();
services.AddScoped<IFeedSnapshotRepository, FeedSnapshotRepository>();
services.AddScoped<IAdvisorySnapshotRepository, AdvisorySnapshotRepository>();
services.AddScoped<IMergeEventRepository, MergeEventRepository>();
services.AddScoped<IAdvisoryLinksetStore, AdvisoryLinksetCacheRepository>();
services.AddScoped<IAdvisoryLinksetLookup>(sp => sp.GetRequiredService<IAdvisoryLinksetStore>());
services.AddScoped<IDocumentStore, PostgresDocumentStore>();
return services;
}
@@ -71,11 +74,13 @@ public static class ServiceCollectionExtensions
services.AddScoped<IAdvisoryWeaknessRepository, AdvisoryWeaknessRepository>();
services.AddScoped<IKevFlagRepository, KevFlagRepository>();
services.AddScoped<ISourceStateRepository, SourceStateRepository>();
services.AddScoped<IDocumentRepository, DocumentRepository>();
services.AddScoped<IFeedSnapshotRepository, FeedSnapshotRepository>();
services.AddScoped<IAdvisorySnapshotRepository, AdvisorySnapshotRepository>();
services.AddScoped<IMergeEventRepository, MergeEventRepository>();
services.AddScoped<IAdvisoryLinksetStore, AdvisoryLinksetCacheRepository>();
services.AddScoped<IAdvisoryLinksetLookup>(sp => sp.GetRequiredService<IAdvisoryLinksetStore>());
services.AddScoped<IDocumentStore, PostgresDocumentStore>();
return services;
}

View File

@@ -10,6 +10,11 @@
<RootNamespace>StellaOps.Concelier.Storage.Postgres</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Migrations\**\*.sql" LogicalName="%(RecursiveDir)%(Filename)%(Extension)" />
</ItemGroup>
@@ -25,6 +30,7 @@
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Core\StellaOps.Concelier.Core.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.DependencyInjection\StellaOps.DependencyInjection.csproj" />
</ItemGroup>
</Project>