feat: Add CVSS receipt management endpoints and related functionality
- Introduced new API endpoints for creating, retrieving, amending, and listing CVSS receipts. - Updated IPolicyEngineClient interface to include methods for CVSS receipt operations. - Implemented PolicyEngineClient to handle CVSS receipt requests. - Enhanced Program.cs to map new CVSS receipt routes with appropriate authorization. - Added necessary models and contracts for CVSS receipt requests and responses. - Integrated Postgres document store for managing CVSS receipts and related data. - Updated database schema with new migrations for source documents and payload storage. - Refactored existing components to support new CVSS functionality.
This commit is contained in:
@@ -1,366 +1,366 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using MongoDB.Bson;
|
||||
using MongoDB.Bson.IO;
|
||||
using StellaOps.Concelier.Models;
|
||||
using StellaOps.Concelier.Connector.Common;
|
||||
using StellaOps.Concelier.Connector.Common.Fetch;
|
||||
using StellaOps.Concelier.Connector.Common.Json;
|
||||
using StellaOps.Concelier.Connector.Vndr.Chromium.Configuration;
|
||||
using StellaOps.Concelier.Connector.Vndr.Chromium.Internal;
|
||||
using StellaOps.Concelier.Storage.Mongo;
|
||||
using StellaOps.Concelier.Storage.Mongo.Advisories;
|
||||
using StellaOps.Concelier.Storage.Mongo.Documents;
|
||||
using StellaOps.Concelier.Storage.Mongo.Dtos;
|
||||
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
|
||||
using StellaOps.Plugin;
|
||||
using Json.Schema;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Vndr.Chromium;
|
||||
|
||||
public sealed class ChromiumConnector : IFeedConnector
|
||||
{
|
||||
private static readonly JsonSchema Schema = ChromiumSchemaProvider.Schema;
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private readonly ChromiumFeedLoader _feedLoader;
|
||||
private readonly SourceFetchService _fetchService;
|
||||
private readonly RawDocumentStorage _rawDocumentStorage;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private readonly IDtoStore _dtoStore;
|
||||
private readonly IAdvisoryStore _advisoryStore;
|
||||
private readonly IPsirtFlagStore _psirtFlagStore;
|
||||
private readonly ISourceStateRepository _stateRepository;
|
||||
private readonly IJsonSchemaValidator _schemaValidator;
|
||||
private readonly ChromiumOptions _options;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ChromiumDiagnostics _diagnostics;
|
||||
private readonly ILogger<ChromiumConnector> _logger;
|
||||
|
||||
public ChromiumConnector(
|
||||
ChromiumFeedLoader feedLoader,
|
||||
SourceFetchService fetchService,
|
||||
RawDocumentStorage rawDocumentStorage,
|
||||
IDocumentStore documentStore,
|
||||
IDtoStore dtoStore,
|
||||
IAdvisoryStore advisoryStore,
|
||||
IPsirtFlagStore psirtFlagStore,
|
||||
ISourceStateRepository stateRepository,
|
||||
IJsonSchemaValidator schemaValidator,
|
||||
IOptions<ChromiumOptions> options,
|
||||
TimeProvider? timeProvider,
|
||||
ChromiumDiagnostics diagnostics,
|
||||
ILogger<ChromiumConnector> logger)
|
||||
{
|
||||
_feedLoader = feedLoader ?? throw new ArgumentNullException(nameof(feedLoader));
|
||||
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
|
||||
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
|
||||
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
|
||||
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
|
||||
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
|
||||
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
|
||||
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
|
||||
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
|
||||
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
||||
_options.Validate();
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public string SourceName => VndrChromiumConnectorPlugin.SourceName;
|
||||
|
||||
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var (windowStart, windowEnd) = CalculateWindow(cursor, now);
|
||||
ProvenanceDiagnostics.ReportResumeWindow(SourceName, windowStart, _logger);
|
||||
|
||||
IReadOnlyList<ChromiumFeedEntry> feedEntries;
|
||||
_diagnostics.FetchAttempt();
|
||||
try
|
||||
{
|
||||
feedEntries = await _feedLoader.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium feed load failed {Start}-{End}", windowStart, windowEnd);
|
||||
_diagnostics.FetchFailure();
|
||||
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
|
||||
var fetchCache = new Dictionary<string, ChromiumFetchCacheEntry>(cursor.FetchCache, StringComparer.Ordinal);
|
||||
var touchedResources = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
var candidates = feedEntries
|
||||
.Where(static entry => entry.IsSecurityUpdate())
|
||||
.OrderBy(static entry => entry.Published)
|
||||
.ToArray();
|
||||
|
||||
if (candidates.Length == 0)
|
||||
{
|
||||
var untouched = cursor
|
||||
.WithLastPublished(cursor.LastPublished ?? windowEnd)
|
||||
.WithFetchCache(fetchCache);
|
||||
await UpdateCursorAsync(untouched, cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingDocuments = cursor.PendingDocuments.ToList();
|
||||
var maxPublished = cursor.LastPublished;
|
||||
|
||||
foreach (var entry in candidates)
|
||||
{
|
||||
try
|
||||
{
|
||||
var cacheKey = entry.DetailUri.ToString();
|
||||
touchedResources.Add(cacheKey);
|
||||
|
||||
var metadata = ChromiumDocumentMetadata.CreateMetadata(entry.PostId, entry.Title, entry.Published, entry.Updated, entry.Summary);
|
||||
var request = new SourceFetchRequest(ChromiumOptions.HttpClientName, SourceName, entry.DetailUri)
|
||||
{
|
||||
Metadata = metadata,
|
||||
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
|
||||
};
|
||||
|
||||
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
if (!result.IsSuccess || result.Document is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cursor.TryGetFetchCache(cacheKey, out var cached) && string.Equals(cached.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
_diagnostics.FetchUnchanged();
|
||||
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
|
||||
await _documentStore.UpdateStatusAsync(result.Document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!maxPublished.HasValue || entry.Published > maxPublished)
|
||||
{
|
||||
maxPublished = entry.Published;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
_diagnostics.FetchDocument();
|
||||
if (!pendingDocuments.Contains(result.Document.Id))
|
||||
{
|
||||
pendingDocuments.Add(result.Document.Id);
|
||||
}
|
||||
|
||||
if (!maxPublished.HasValue || entry.Published > maxPublished)
|
||||
{
|
||||
maxPublished = entry.Published;
|
||||
}
|
||||
|
||||
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium fetch failed for {Uri}", entry.DetailUri);
|
||||
_diagnostics.FetchFailure();
|
||||
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
if (touchedResources.Count > 0)
|
||||
{
|
||||
var keysToRemove = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
|
||||
foreach (var key in keysToRemove)
|
||||
{
|
||||
fetchCache.Remove(key);
|
||||
}
|
||||
}
|
||||
|
||||
var updatedCursor = cursor
|
||||
.WithPendingDocuments(pendingDocuments)
|
||||
.WithPendingMappings(cursor.PendingMappings)
|
||||
.WithLastPublished(maxPublished ?? cursor.LastPublished ?? windowEnd)
|
||||
.WithFetchCache(fetchCache);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingDocuments.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingDocuments = cursor.PendingDocuments.ToList();
|
||||
var pendingMappings = cursor.PendingMappings.ToList();
|
||||
|
||||
foreach (var documentId in cursor.PendingDocuments)
|
||||
{
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
if (document is null)
|
||||
{
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!document.GridFsId.HasValue)
|
||||
{
|
||||
_logger.LogWarning("Chromium document {DocumentId} missing GridFS payload", document.Id);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
ChromiumDto dto;
|
||||
try
|
||||
{
|
||||
var metadata = ChromiumDocumentMetadata.FromDocument(document);
|
||||
var content = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
|
||||
var html = Encoding.UTF8.GetString(content);
|
||||
dto = ChromiumParser.Parse(html, metadata);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium parse failed for {Uri}", document.Uri);
|
||||
_diagnostics.ParseFailure();
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var json = JsonSerializer.Serialize(dto, SerializerOptions);
|
||||
using var jsonDocument = JsonDocument.Parse(json);
|
||||
try
|
||||
{
|
||||
_schemaValidator.Validate(jsonDocument, Schema, dto.PostId);
|
||||
}
|
||||
catch (StellaOps.Concelier.Connector.Common.Json.JsonSchemaValidationException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium schema validation failed for {DocumentId}", document.Id);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var payload = BsonDocument.Parse(json);
|
||||
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
|
||||
var validatedAt = _timeProvider.GetUtcNow();
|
||||
|
||||
var dtoRecord = existingDto is null
|
||||
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "chromium.post.v1", payload, validatedAt)
|
||||
: existingDto with
|
||||
{
|
||||
Payload = payload,
|
||||
SchemaVersion = "chromium.post.v1",
|
||||
ValidatedAt = validatedAt,
|
||||
};
|
||||
|
||||
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
|
||||
_diagnostics.ParseSuccess();
|
||||
|
||||
pendingDocuments.Remove(documentId);
|
||||
if (!pendingMappings.Contains(documentId))
|
||||
{
|
||||
pendingMappings.Add(documentId);
|
||||
}
|
||||
}
|
||||
|
||||
var updatedCursor = cursor
|
||||
.WithPendingDocuments(pendingDocuments)
|
||||
.WithPendingMappings(pendingMappings);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingMappings.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingMappings = cursor.PendingMappings.ToList();
|
||||
|
||||
foreach (var documentId in cursor.PendingMappings)
|
||||
{
|
||||
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (dtoRecord is null || document is null)
|
||||
{
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var json = dtoRecord.Payload.ToJson(new JsonWriterSettings { OutputMode = JsonOutputMode.RelaxedExtendedJson });
|
||||
var dto = JsonSerializer.Deserialize<ChromiumDto>(json, SerializerOptions);
|
||||
if (dto is null)
|
||||
{
|
||||
_logger.LogWarning("Chromium DTO deserialization failed for {DocumentId}", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var recordedAt = _timeProvider.GetUtcNow();
|
||||
var (advisory, flag) = ChromiumMapper.Map(dto, SourceName, recordedAt);
|
||||
|
||||
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
|
||||
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
|
||||
_diagnostics.MapSuccess();
|
||||
|
||||
pendingMappings.Remove(documentId);
|
||||
}
|
||||
|
||||
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task<ChromiumCursor> GetCursorAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
|
||||
return ChromiumCursor.FromBsonDocument(record?.Cursor);
|
||||
}
|
||||
|
||||
private async Task UpdateCursorAsync(ChromiumCursor cursor, CancellationToken cancellationToken)
|
||||
{
|
||||
var completedAt = _timeProvider.GetUtcNow();
|
||||
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private (DateTimeOffset start, DateTimeOffset end) CalculateWindow(ChromiumCursor cursor, DateTimeOffset now)
|
||||
{
|
||||
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
|
||||
var start = lastPublished - _options.WindowOverlap;
|
||||
var backfill = now - _options.InitialBackfill;
|
||||
if (start < backfill)
|
||||
{
|
||||
start = backfill;
|
||||
}
|
||||
|
||||
var end = now;
|
||||
if (end <= start)
|
||||
{
|
||||
end = start.AddHours(1);
|
||||
}
|
||||
|
||||
return (start, end);
|
||||
}
|
||||
}
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using MongoDB.Bson;
|
||||
using MongoDB.Bson.IO;
|
||||
using StellaOps.Concelier.Models;
|
||||
using StellaOps.Concelier.Connector.Common;
|
||||
using StellaOps.Concelier.Connector.Common.Fetch;
|
||||
using StellaOps.Concelier.Connector.Common.Json;
|
||||
using StellaOps.Concelier.Connector.Vndr.Chromium.Configuration;
|
||||
using StellaOps.Concelier.Connector.Vndr.Chromium.Internal;
|
||||
using StellaOps.Concelier.Storage.Mongo;
|
||||
using StellaOps.Concelier.Storage.Mongo.Advisories;
|
||||
using StellaOps.Concelier.Storage.Mongo.Documents;
|
||||
using StellaOps.Concelier.Storage.Mongo.Dtos;
|
||||
using StellaOps.Concelier.Storage.Mongo.PsirtFlags;
|
||||
using StellaOps.Plugin;
|
||||
using Json.Schema;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Vndr.Chromium;
|
||||
|
||||
public sealed class ChromiumConnector : IFeedConnector
|
||||
{
|
||||
private static readonly JsonSchema Schema = ChromiumSchemaProvider.Schema;
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private readonly ChromiumFeedLoader _feedLoader;
|
||||
private readonly SourceFetchService _fetchService;
|
||||
private readonly RawDocumentStorage _rawDocumentStorage;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private readonly IDtoStore _dtoStore;
|
||||
private readonly IAdvisoryStore _advisoryStore;
|
||||
private readonly IPsirtFlagStore _psirtFlagStore;
|
||||
private readonly ISourceStateRepository _stateRepository;
|
||||
private readonly IJsonSchemaValidator _schemaValidator;
|
||||
private readonly ChromiumOptions _options;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ChromiumDiagnostics _diagnostics;
|
||||
private readonly ILogger<ChromiumConnector> _logger;
|
||||
|
||||
public ChromiumConnector(
|
||||
ChromiumFeedLoader feedLoader,
|
||||
SourceFetchService fetchService,
|
||||
RawDocumentStorage rawDocumentStorage,
|
||||
IDocumentStore documentStore,
|
||||
IDtoStore dtoStore,
|
||||
IAdvisoryStore advisoryStore,
|
||||
IPsirtFlagStore psirtFlagStore,
|
||||
ISourceStateRepository stateRepository,
|
||||
IJsonSchemaValidator schemaValidator,
|
||||
IOptions<ChromiumOptions> options,
|
||||
TimeProvider? timeProvider,
|
||||
ChromiumDiagnostics diagnostics,
|
||||
ILogger<ChromiumConnector> logger)
|
||||
{
|
||||
_feedLoader = feedLoader ?? throw new ArgumentNullException(nameof(feedLoader));
|
||||
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
|
||||
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
|
||||
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
|
||||
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
|
||||
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
|
||||
_psirtFlagStore = psirtFlagStore ?? throw new ArgumentNullException(nameof(psirtFlagStore));
|
||||
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
|
||||
_schemaValidator = schemaValidator ?? throw new ArgumentNullException(nameof(schemaValidator));
|
||||
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
||||
_options.Validate();
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public string SourceName => VndrChromiumConnectorPlugin.SourceName;
|
||||
|
||||
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var (windowStart, windowEnd) = CalculateWindow(cursor, now);
|
||||
ProvenanceDiagnostics.ReportResumeWindow(SourceName, windowStart, _logger);
|
||||
|
||||
IReadOnlyList<ChromiumFeedEntry> feedEntries;
|
||||
_diagnostics.FetchAttempt();
|
||||
try
|
||||
{
|
||||
feedEntries = await _feedLoader.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium feed load failed {Start}-{End}", windowStart, windowEnd);
|
||||
_diagnostics.FetchFailure();
|
||||
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
|
||||
var fetchCache = new Dictionary<string, ChromiumFetchCacheEntry>(cursor.FetchCache, StringComparer.Ordinal);
|
||||
var touchedResources = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
var candidates = feedEntries
|
||||
.Where(static entry => entry.IsSecurityUpdate())
|
||||
.OrderBy(static entry => entry.Published)
|
||||
.ToArray();
|
||||
|
||||
if (candidates.Length == 0)
|
||||
{
|
||||
var untouched = cursor
|
||||
.WithLastPublished(cursor.LastPublished ?? windowEnd)
|
||||
.WithFetchCache(fetchCache);
|
||||
await UpdateCursorAsync(untouched, cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingDocuments = cursor.PendingDocuments.ToList();
|
||||
var maxPublished = cursor.LastPublished;
|
||||
|
||||
foreach (var entry in candidates)
|
||||
{
|
||||
try
|
||||
{
|
||||
var cacheKey = entry.DetailUri.ToString();
|
||||
touchedResources.Add(cacheKey);
|
||||
|
||||
var metadata = ChromiumDocumentMetadata.CreateMetadata(entry.PostId, entry.Title, entry.Published, entry.Updated, entry.Summary);
|
||||
var request = new SourceFetchRequest(ChromiumOptions.HttpClientName, SourceName, entry.DetailUri)
|
||||
{
|
||||
Metadata = metadata,
|
||||
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
|
||||
};
|
||||
|
||||
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
if (!result.IsSuccess || result.Document is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cursor.TryGetFetchCache(cacheKey, out var cached) && string.Equals(cached.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
_diagnostics.FetchUnchanged();
|
||||
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
|
||||
await _documentStore.UpdateStatusAsync(result.Document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!maxPublished.HasValue || entry.Published > maxPublished)
|
||||
{
|
||||
maxPublished = entry.Published;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
_diagnostics.FetchDocument();
|
||||
if (!pendingDocuments.Contains(result.Document.Id))
|
||||
{
|
||||
pendingDocuments.Add(result.Document.Id);
|
||||
}
|
||||
|
||||
if (!maxPublished.HasValue || entry.Published > maxPublished)
|
||||
{
|
||||
maxPublished = entry.Published;
|
||||
}
|
||||
|
||||
fetchCache[cacheKey] = new ChromiumFetchCacheEntry(result.Document.Sha256);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium fetch failed for {Uri}", entry.DetailUri);
|
||||
_diagnostics.FetchFailure();
|
||||
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
if (touchedResources.Count > 0)
|
||||
{
|
||||
var keysToRemove = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
|
||||
foreach (var key in keysToRemove)
|
||||
{
|
||||
fetchCache.Remove(key);
|
||||
}
|
||||
}
|
||||
|
||||
var updatedCursor = cursor
|
||||
.WithPendingDocuments(pendingDocuments)
|
||||
.WithPendingMappings(cursor.PendingMappings)
|
||||
.WithLastPublished(maxPublished ?? cursor.LastPublished ?? windowEnd)
|
||||
.WithFetchCache(fetchCache);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingDocuments.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingDocuments = cursor.PendingDocuments.ToList();
|
||||
var pendingMappings = cursor.PendingMappings.ToList();
|
||||
|
||||
foreach (var documentId in cursor.PendingDocuments)
|
||||
{
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
if (document is null)
|
||||
{
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!document.PayloadId.HasValue)
|
||||
{
|
||||
_logger.LogWarning("Chromium document {DocumentId} missing GridFS payload", document.Id);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
ChromiumDto dto;
|
||||
try
|
||||
{
|
||||
var metadata = ChromiumDocumentMetadata.FromDocument(document);
|
||||
var content = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
|
||||
var html = Encoding.UTF8.GetString(content);
|
||||
dto = ChromiumParser.Parse(html, metadata);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium parse failed for {Uri}", document.Uri);
|
||||
_diagnostics.ParseFailure();
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var json = JsonSerializer.Serialize(dto, SerializerOptions);
|
||||
using var jsonDocument = JsonDocument.Parse(json);
|
||||
try
|
||||
{
|
||||
_schemaValidator.Validate(jsonDocument, Schema, dto.PostId);
|
||||
}
|
||||
catch (StellaOps.Concelier.Connector.Common.Json.JsonSchemaValidationException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Chromium schema validation failed for {DocumentId}", document.Id);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var payload = BsonDocument.Parse(json);
|
||||
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
|
||||
var validatedAt = _timeProvider.GetUtcNow();
|
||||
|
||||
var dtoRecord = existingDto is null
|
||||
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "chromium.post.v1", payload, validatedAt)
|
||||
: existingDto with
|
||||
{
|
||||
Payload = payload,
|
||||
SchemaVersion = "chromium.post.v1",
|
||||
ValidatedAt = validatedAt,
|
||||
};
|
||||
|
||||
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
|
||||
_diagnostics.ParseSuccess();
|
||||
|
||||
pendingDocuments.Remove(documentId);
|
||||
if (!pendingMappings.Contains(documentId))
|
||||
{
|
||||
pendingMappings.Add(documentId);
|
||||
}
|
||||
}
|
||||
|
||||
var updatedCursor = cursor
|
||||
.WithPendingDocuments(pendingDocuments)
|
||||
.WithPendingMappings(pendingMappings);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingMappings.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingMappings = cursor.PendingMappings.ToList();
|
||||
|
||||
foreach (var documentId in cursor.PendingMappings)
|
||||
{
|
||||
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (dtoRecord is null || document is null)
|
||||
{
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var json = dtoRecord.Payload.ToJson(new JsonWriterSettings { OutputMode = JsonOutputMode.RelaxedExtendedJson });
|
||||
var dto = JsonSerializer.Deserialize<ChromiumDto>(json, SerializerOptions);
|
||||
if (dto is null)
|
||||
{
|
||||
_logger.LogWarning("Chromium DTO deserialization failed for {DocumentId}", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var recordedAt = _timeProvider.GetUtcNow();
|
||||
var (advisory, flag) = ChromiumMapper.Map(dto, SourceName, recordedAt);
|
||||
|
||||
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
|
||||
await _psirtFlagStore.UpsertAsync(flag, cancellationToken).ConfigureAwait(false);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
|
||||
_diagnostics.MapSuccess();
|
||||
|
||||
pendingMappings.Remove(documentId);
|
||||
}
|
||||
|
||||
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task<ChromiumCursor> GetCursorAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
|
||||
return ChromiumCursor.FromBsonDocument(record?.Cursor);
|
||||
}
|
||||
|
||||
private async Task UpdateCursorAsync(ChromiumCursor cursor, CancellationToken cancellationToken)
|
||||
{
|
||||
var completedAt = _timeProvider.GetUtcNow();
|
||||
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private (DateTimeOffset start, DateTimeOffset end) CalculateWindow(ChromiumCursor cursor, DateTimeOffset now)
|
||||
{
|
||||
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
|
||||
var start = lastPublished - _options.WindowOverlap;
|
||||
var backfill = now - _options.InitialBackfill;
|
||||
if (start < backfill)
|
||||
{
|
||||
start = backfill;
|
||||
}
|
||||
|
||||
var end = now;
|
||||
if (end <= start)
|
||||
{
|
||||
end = start.AddHours(1);
|
||||
}
|
||||
|
||||
return (start, end);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user