feat: Add CVSS receipt management endpoints and related functionality
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled

- Introduced new API endpoints for creating, retrieving, amending, and listing CVSS receipts.
- Updated IPolicyEngineClient interface to include methods for CVSS receipt operations.
- Implemented PolicyEngineClient to handle CVSS receipt requests.
- Enhanced Program.cs to map new CVSS receipt routes with appropriate authorization.
- Added necessary models and contracts for CVSS receipt requests and responses.
- Integrated Postgres document store for managing CVSS receipts and related data.
- Updated database schema with new migrations for source documents and payload storage.
- Refactored existing components to support new CVSS functionality.
This commit is contained in:
StellaOps Bot
2025-12-07 00:43:14 +02:00
parent 0de92144d2
commit 53889d85e7
67 changed files with 17207 additions and 16293 deletions

View File

@@ -1,366 +1,366 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Json;
using StellaOps.Concelier.Connector.Vndr.Chromium.Configuration;
using StellaOps.Concelier.Connector.Vndr.Chromium.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
using Json.Schema;
namespace StellaOps.Concelier.Connector.Vndr.Chromium;
public sealed class ChromiumConnector : IFeedConnector
{
private static readonly JsonSchema Schema = ChromiumSchemaProvider.Schema;
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly ChromiumFeedLoader _feedLoader;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IJsonSchemaValidator _schemaValidator;
private readonly ChromiumOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ChromiumDiagnostics _diagnostics;
private readonly ILogger<ChromiumConnector> _logger;
public ChromiumConnector(
ChromiumFeedLoader feedLoader,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
IJsonSchemaValidator schemaValidator,
IOptions<ChromiumOptions> options,
TimeProvider? timeProvider,
ChromiumDiagnostics diagnostics,
ILogger<ChromiumConnector> logger)
{
_feedLoader = feedLoader ?? throw new ArgumentNullException(nameof(feedLoader));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrChromiumConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var (windowStart, windowEnd) = CalculateWindow(cursor, now);
ProvenanceDiagnostics.ReportResumeWindow(SourceName, windowStart, _logger);
IReadOnlyList<ChromiumFeedEntry> feedEntries;
_diagnostics.FetchAttempt();
try
{
feedEntries = await _feedLoader.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium feed load failed {Start}-{End}", windowStart, windowEnd);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var fetchCache = new Dictionary<string, ChromiumFetchCacheEntry>(cursor.FetchCache, StringComparer.Ordinal);
var touchedResources = new HashSet<string>(StringComparer.Ordinal);
var candidates = feedEntries
.Where(static entry => entry.IsSecurityUpdate())
.OrderBy(static entry => entry.Published)
.ToArray();
if (candidates.Length == 0)
{
var untouched = cursor
.WithLastPublished(cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(untouched, cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var maxPublished = cursor.LastPublished;
foreach (var entry in candidates)
{
try
{
var cacheKey = entry.DetailUri.ToString();
touchedResources.Add(cacheKey);
var metadata = ChromiumDocumentMetadata.CreateMetadata(entry.PostId, entry.Title, entry.Published, entry.Updated, entry.Summary);
var request = new SourceFetchRequest(ChromiumOptions.HttpClientName, SourceName, entry.DetailUri)
{
Metadata = metadata,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (cursor.TryGetFetchCache(cacheKey, out var cached) && string.Equals(cached.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase))
{
_diagnostics.FetchUnchanged();
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
await _documentStore.UpdateStatusAsync(result.Document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
continue;
}
_diagnostics.FetchDocument();
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium fetch failed for {Uri}", entry.DetailUri);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (touchedResources.Count > 0)
{
var keysToRemove = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in keysToRemove)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings)
.WithLastPublished(maxPublished ?? cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Chromium document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
ChromiumDto dto;
try
{
var metadata = ChromiumDocumentMetadata.FromDocument(document);
var content = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
var html = Encoding.UTF8.GetString(content);
dto = ChromiumParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium parse failed for {Uri}", document.Uri);
_diagnostics.ParseFailure();
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
using var jsonDocument = JsonDocument.Parse(json);
try
{
_schemaValidator.Validate(jsonDocument, Schema, dto.PostId);
}
catch (StellaOps.Concelier.Connector.Common.Json.JsonSchemaValidationException ex)
{
_logger.LogError(ex, "Chromium schema validation failed for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var payload = BsonDocument.Parse(json);
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var validatedAt = _timeProvider.GetUtcNow();
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "chromium.post.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "chromium.post.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
_diagnostics.ParseSuccess();
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dtoRecord.Payload.ToJson(new JsonWriterSettings { OutputMode = JsonOutputMode.RelaxedExtendedJson });
var dto = JsonSerializer.Deserialize<ChromiumDto>(json, SerializerOptions);
if (dto is null)
{
_logger.LogWarning("Chromium DTO deserialization failed for {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var recordedAt = _timeProvider.GetUtcNow();
var (advisory, flag) = ChromiumMapper.Map(dto, SourceName, recordedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
_diagnostics.MapSuccess();
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<ChromiumCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return ChromiumCursor.FromBsonDocument(record?.Cursor);
}
private async Task UpdateCursorAsync(ChromiumCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private (DateTimeOffset start, DateTimeOffset end) CalculateWindow(ChromiumCursor cursor, DateTimeOffset now)
{
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
var start = lastPublished - _options.WindowOverlap;
var backfill = now - _options.InitialBackfill;
if (start < backfill)
{
start = backfill;
}
var end = now;
if (end <= start)
{
end = start.AddHours(1);
}
return (start, end);
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Json;
using StellaOps.Concelier.Connector.Vndr.Chromium.Configuration;
using StellaOps.Concelier.Connector.Vndr.Chromium.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
using StellaOps.Plugin;
using Json.Schema;
namespace StellaOps.Concelier.Connector.Vndr.Chromium;
public sealed class ChromiumConnector : IFeedConnector
{
private static readonly JsonSchema Schema = ChromiumSchemaProvider.Schema;
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly ChromiumFeedLoader _feedLoader;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly IPsirtFlagStore _psirtFlagStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IJsonSchemaValidator _schemaValidator;
private readonly ChromiumOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ChromiumDiagnostics _diagnostics;
private readonly ILogger<ChromiumConnector> _logger;
public ChromiumConnector(
ChromiumFeedLoader feedLoader,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
IPsirtFlagStore psirtFlagStore,
ISourceStateRepository stateRepository,
IJsonSchemaValidator schemaValidator,
IOptions<ChromiumOptions> options,
TimeProvider? timeProvider,
ChromiumDiagnostics diagnostics,
ILogger<ChromiumConnector> logger)
{
_feedLoader = feedLoader ?? throw new ArgumentNullException(nameof(feedLoader));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => VndrChromiumConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var (windowStart, windowEnd) = CalculateWindow(cursor, now);
ProvenanceDiagnostics.ReportResumeWindow(SourceName, windowStart, _logger);
IReadOnlyList<ChromiumFeedEntry> feedEntries;
_diagnostics.FetchAttempt();
try
{
feedEntries = await _feedLoader.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium feed load failed {Start}-{End}", windowStart, windowEnd);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var fetchCache = new Dictionary<string, ChromiumFetchCacheEntry>(cursor.FetchCache, StringComparer.Ordinal);
var touchedResources = new HashSet<string>(StringComparer.Ordinal);
var candidates = feedEntries
.Where(static entry => entry.IsSecurityUpdate())
.OrderBy(static entry => entry.Published)
.ToArray();
if (candidates.Length == 0)
{
var untouched = cursor
.WithLastPublished(cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(untouched, cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var maxPublished = cursor.LastPublished;
foreach (var entry in candidates)
{
try
{
var cacheKey = entry.DetailUri.ToString();
touchedResources.Add(cacheKey);
var metadata = ChromiumDocumentMetadata.CreateMetadata(entry.PostId, entry.Title, entry.Published, entry.Updated, entry.Summary);
var request = new SourceFetchRequest(ChromiumOptions.HttpClientName, SourceName, entry.DetailUri)
{
Metadata = metadata,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (cursor.TryGetFetchCache(cacheKey, out var cached) && string.Equals(cached.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase))
{
_diagnostics.FetchUnchanged();
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
await _documentStore.UpdateStatusAsync(result.Document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
continue;
}
_diagnostics.FetchDocument();
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (!maxPublished.HasValue || entry.Published > maxPublished)
{
maxPublished = entry.Published;
}
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium fetch failed for {Uri}", entry.DetailUri);
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (touchedResources.Count > 0)
{
var keysToRemove = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in keysToRemove)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings)
.WithLastPublished(maxPublished ?? cursor.LastPublished ?? windowEnd)
.WithFetchCache(fetchCache);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_logger.LogWarning("Chromium document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
ChromiumDto dto;
try
{
var metadata = ChromiumDocumentMetadata.FromDocument(document);
var content = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
var html = Encoding.UTF8.GetString(content);
dto = ChromiumParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Chromium parse failed for {Uri}", document.Uri);
_diagnostics.ParseFailure();
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
using var jsonDocument = JsonDocument.Parse(json);
try
{
_schemaValidator.Validate(jsonDocument, Schema, dto.PostId);
}
catch (StellaOps.Concelier.Connector.Common.Json.JsonSchemaValidationException ex)
{
_logger.LogError(ex, "Chromium schema validation failed for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var payload = BsonDocument.Parse(json);
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var validatedAt = _timeProvider.GetUtcNow();
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "chromium.post.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "chromium.post.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
_diagnostics.ParseSuccess();
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var json = dtoRecord.Payload.ToJson(new JsonWriterSettings { OutputMode = JsonOutputMode.RelaxedExtendedJson });
var dto = JsonSerializer.Deserialize<ChromiumDto>(json, SerializerOptions);
if (dto is null)
{
_logger.LogWarning("Chromium DTO deserialization failed for {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var recordedAt = _timeProvider.GetUtcNow();
var (advisory, flag) = ChromiumMapper.Map(dto, SourceName, recordedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
_diagnostics.MapSuccess();
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<ChromiumCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return ChromiumCursor.FromBsonDocument(record?.Cursor);
}
private async Task UpdateCursorAsync(ChromiumCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private (DateTimeOffset start, DateTimeOffset end) CalculateWindow(ChromiumCursor cursor, DateTimeOffset now)
{
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
var start = lastPublished - _options.WindowOverlap;
var backfill = now - _options.InitialBackfill;
if (start < backfill)
{
start = backfill;
}
var end = now;
if (end <= start)
{
end = start.AddHours(1);
}
return (start, end);
}
}