Refactor code structure for improved readability and maintainability; optimize performance in key functions.
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
# AGENTS.md - EPSS Connector
|
||||
|
||||
## Purpose
|
||||
Ingests EPSS (Exploit Prediction Scoring System) scores from FIRST.org to provide exploitation probability signals for CVE prioritization.
|
||||
|
||||
## Data Source
|
||||
- **URL**: https://epss.empiricalsecurity.com/
|
||||
- **Format**: `epss_scores-YYYY-MM-DD.csv.gz` (gzip-compressed CSV)
|
||||
- **Update cadence**: Daily snapshot (typically published ~08:00 UTC)
|
||||
- **Offline bundle**: Directory or file path with optional `manifest.json`
|
||||
|
||||
## Data Flow
|
||||
1. Fetch daily snapshot via HTTP or air-gapped bundle path.
|
||||
2. Parse with `StellaOps.Scanner.Storage.Epss.EpssCsvStreamParser` for deterministic row counts and content hash.
|
||||
3. Map rows to `EpssObservation` records with band classification (Low/Medium/High/Critical).
|
||||
4. Store raw document + DTO metadata; mapping currently records counts and marks documents mapped.
|
||||
|
||||
## Configuration
|
||||
```yaml
|
||||
concelier:
|
||||
sources:
|
||||
epss:
|
||||
baseUri: "https://epss.empiricalsecurity.com/"
|
||||
fetchCurrent: true
|
||||
catchUpDays: 7
|
||||
httpTimeout: "00:02:00"
|
||||
maxRetries: 3
|
||||
airgapMode: false
|
||||
bundlePath: "/var/stellaops/bundles/epss"
|
||||
```
|
||||
|
||||
## Orchestrator Registration
|
||||
- ConnectorId: `epss`
|
||||
- Default Schedule: Daily 10:00 UTC
|
||||
- Egress Allowlist: `epss.empiricalsecurity.com`
|
||||
@@ -0,0 +1,59 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss.Configuration;
|
||||
|
||||
public sealed class EpssOptions
|
||||
{
|
||||
public const string SectionName = "Concelier:Epss";
|
||||
public const string HttpClientName = "source.epss";
|
||||
|
||||
public Uri BaseUri { get; set; } = new("https://epss.empiricalsecurity.com/", UriKind.Absolute);
|
||||
|
||||
public bool FetchCurrent { get; set; } = true;
|
||||
|
||||
public int CatchUpDays { get; set; } = 7;
|
||||
|
||||
public TimeSpan HttpTimeout { get; set; } = TimeSpan.FromMinutes(2);
|
||||
|
||||
public int MaxRetries { get; set; } = 3;
|
||||
|
||||
public bool AirgapMode { get; set; }
|
||||
|
||||
public string? BundlePath { get; set; }
|
||||
|
||||
public string UserAgent { get; set; } = "StellaOps.Concelier.Epss/1.0";
|
||||
|
||||
[MemberNotNull(nameof(BaseUri), nameof(UserAgent))]
|
||||
public void Validate()
|
||||
{
|
||||
if (BaseUri is null || !BaseUri.IsAbsoluteUri)
|
||||
{
|
||||
throw new InvalidOperationException("BaseUri must be an absolute URI.");
|
||||
}
|
||||
|
||||
if (CatchUpDays < 0)
|
||||
{
|
||||
throw new InvalidOperationException("CatchUpDays cannot be negative.");
|
||||
}
|
||||
|
||||
if (HttpTimeout <= TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException("HttpTimeout must be greater than zero.");
|
||||
}
|
||||
|
||||
if (MaxRetries < 0)
|
||||
{
|
||||
throw new InvalidOperationException("MaxRetries cannot be negative.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(UserAgent))
|
||||
{
|
||||
throw new InvalidOperationException("UserAgent must be provided.");
|
||||
}
|
||||
|
||||
if (AirgapMode && string.IsNullOrWhiteSpace(BundlePath))
|
||||
{
|
||||
throw new InvalidOperationException("BundlePath must be provided when AirgapMode is enabled.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Concelier.Connector.Epss.Internal;
|
||||
using StellaOps.Plugin;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss;
|
||||
|
||||
/// <summary>
|
||||
/// Plugin entry point for EPSS feed connector.
|
||||
/// </summary>
|
||||
public sealed class EpssConnectorPlugin : IConnectorPlugin
|
||||
{
|
||||
public const string SourceName = "epss";
|
||||
|
||||
public string Name => SourceName;
|
||||
|
||||
public bool IsAvailable(IServiceProvider services)
|
||||
=> services.GetService<EpssConnector>() is not null;
|
||||
|
||||
public IFeedConnector Create(IServiceProvider services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
return services.GetRequiredService<EpssConnector>();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
using System;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Concelier.Core.Jobs;
|
||||
using StellaOps.Concelier.Connector.Epss.Configuration;
|
||||
using StellaOps.DependencyInjection;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss;
|
||||
|
||||
public sealed class EpssDependencyInjectionRoutine : IDependencyInjectionRoutine
|
||||
{
|
||||
private const string ConfigurationSection = "concelier:sources:epss";
|
||||
|
||||
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(configuration);
|
||||
|
||||
services.AddEpssConnector(options =>
|
||||
{
|
||||
configuration.GetSection(ConfigurationSection).Bind(options);
|
||||
options.Validate();
|
||||
});
|
||||
|
||||
services.AddTransient<EpssFetchJob>();
|
||||
services.AddTransient<EpssParseJob>();
|
||||
services.AddTransient<EpssMapJob>();
|
||||
|
||||
services.PostConfigure<JobSchedulerOptions>(options =>
|
||||
{
|
||||
EnsureJob(options, EpssJobKinds.Fetch, typeof(EpssFetchJob));
|
||||
EnsureJob(options, EpssJobKinds.Parse, typeof(EpssParseJob));
|
||||
EnsureJob(options, EpssJobKinds.Map, typeof(EpssMapJob));
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
|
||||
{
|
||||
if (options.Definitions.ContainsKey(kind))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
options.Definitions[kind] = new JobDefinition(
|
||||
kind,
|
||||
jobType,
|
||||
options.DefaultTimeout,
|
||||
options.DefaultLeaseDuration,
|
||||
CronExpression: null,
|
||||
Enabled: true);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
using System;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Concelier.Connector.Common.Http;
|
||||
using StellaOps.Concelier.Connector.Epss.Configuration;
|
||||
using StellaOps.Concelier.Connector.Epss.Internal;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss;
|
||||
|
||||
public static class EpssServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddEpssConnector(this IServiceCollection services, Action<EpssOptions> configure)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(configure);
|
||||
|
||||
services.AddOptions<EpssOptions>()
|
||||
.Configure(configure)
|
||||
.PostConfigure(static opts => opts.Validate());
|
||||
|
||||
services.AddSourceHttpClient(EpssOptions.HttpClientName, (sp, clientOptions) =>
|
||||
{
|
||||
var options = sp.GetRequiredService<IOptions<EpssOptions>>().Value;
|
||||
clientOptions.BaseAddress = options.BaseUri;
|
||||
clientOptions.Timeout = options.HttpTimeout;
|
||||
clientOptions.UserAgent = options.UserAgent;
|
||||
clientOptions.MaxAttempts = Math.Max(1, options.MaxRetries + 1);
|
||||
clientOptions.AllowedHosts.Clear();
|
||||
clientOptions.AllowedHosts.Add(options.BaseUri.Host);
|
||||
clientOptions.DefaultRequestHeaders["Accept"] = "application/gzip,application/octet-stream,application/x-gzip";
|
||||
});
|
||||
|
||||
services.AddSingleton<EpssDiagnostics>();
|
||||
services.AddTransient<EpssConnector>();
|
||||
services.AddTransient<EpssFetchJob>();
|
||||
services.AddTransient<EpssParseJob>();
|
||||
services.AddTransient<EpssMapJob>();
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,778 @@
|
||||
using System.Globalization;
|
||||
using System.Net;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Concelier.Connector.Common;
|
||||
using StellaOps.Concelier.Connector.Common.Fetch;
|
||||
using StellaOps.Concelier.Connector.Epss.Configuration;
|
||||
using StellaOps.Concelier.Documents;
|
||||
using StellaOps.Concelier.Storage;
|
||||
using StellaOps.Cryptography;
|
||||
using StellaOps.Plugin;
|
||||
using StellaOps.Scanner.Storage.Epss;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss.Internal;
|
||||
|
||||
public sealed class EpssConnector : IFeedConnector
|
||||
{
|
||||
private const string DtoSchemaVersion = "epss.snapshot.v1";
|
||||
private const string ManifestFileName = "manifest.json";
|
||||
private static readonly string[] AcceptTypes = { "application/gzip", "application/octet-stream", "application/x-gzip" };
|
||||
|
||||
private readonly IHttpClientFactory _httpClientFactory;
|
||||
private readonly RawDocumentStorage _rawDocumentStorage;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private readonly IDtoStore _dtoStore;
|
||||
private readonly ISourceStateRepository _stateRepository;
|
||||
private readonly EpssOptions _options;
|
||||
private readonly EpssDiagnostics _diagnostics;
|
||||
private readonly ICryptoHash _hash;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EpssConnector> _logger;
|
||||
private readonly EpssCsvStreamParser _parser = new();
|
||||
|
||||
public EpssConnector(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
RawDocumentStorage rawDocumentStorage,
|
||||
IDocumentStore documentStore,
|
||||
IDtoStore dtoStore,
|
||||
ISourceStateRepository stateRepository,
|
||||
IOptions<EpssOptions> options,
|
||||
EpssDiagnostics diagnostics,
|
||||
ICryptoHash hash,
|
||||
TimeProvider? timeProvider,
|
||||
ILogger<EpssConnector> logger)
|
||||
{
|
||||
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
|
||||
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
|
||||
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
|
||||
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
|
||||
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
|
||||
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
|
||||
_options.Validate();
|
||||
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
|
||||
_hash = hash ?? throw new ArgumentNullException(nameof(hash));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public string SourceName => EpssConnectorPlugin.SourceName;
|
||||
|
||||
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
|
||||
var pendingMappings = cursor.PendingMappings.ToHashSet();
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var nowDate = DateOnly.FromDateTime(now.UtcDateTime);
|
||||
|
||||
var candidates = GetCandidateDates(cursor, nowDate).ToArray();
|
||||
if (candidates.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_diagnostics.FetchAttempt();
|
||||
|
||||
EpssFetchResult? fetchResult = null;
|
||||
try
|
||||
{
|
||||
foreach (var date in candidates)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
fetchResult = _options.AirgapMode
|
||||
? await TryFetchFromBundleAsync(date, cancellationToken).ConfigureAwait(false)
|
||||
: await TryFetchFromHttpAsync(date, cursor, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (fetchResult is not null)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fetchResult is null)
|
||||
{
|
||||
_logger.LogWarning("EPSS fetch: no snapshot found for {CandidateCount} candidate dates.", candidates.Length);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fetchResult.IsNotModified)
|
||||
{
|
||||
_diagnostics.FetchUnchanged();
|
||||
var unchangedCursor = cursor.WithSnapshotMetadata(
|
||||
cursor.ModelVersion,
|
||||
cursor.LastProcessedDate,
|
||||
fetchResult.ETag ?? cursor.ETag,
|
||||
cursor.ContentHash,
|
||||
cursor.LastRowCount,
|
||||
now);
|
||||
await UpdateCursorAsync(unchangedCursor, cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!fetchResult.IsSuccess || fetchResult.Content is null)
|
||||
{
|
||||
_diagnostics.FetchFailure();
|
||||
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), "EPSS fetch returned no content.", cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
var record = await StoreSnapshotAsync(fetchResult, now, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Add(record.Id);
|
||||
pendingMappings.Remove(record.Id);
|
||||
|
||||
var updatedCursor = cursor
|
||||
.WithPendingDocuments(pendingDocuments)
|
||||
.WithPendingMappings(pendingMappings)
|
||||
.WithSnapshotMetadata(
|
||||
cursor.ModelVersion,
|
||||
cursor.LastProcessedDate,
|
||||
fetchResult.ETag,
|
||||
cursor.ContentHash,
|
||||
cursor.LastRowCount,
|
||||
now);
|
||||
|
||||
_diagnostics.FetchSuccess();
|
||||
|
||||
_logger.LogInformation(
|
||||
"Fetched EPSS snapshot {SnapshotDate} ({Uri}) document {DocumentId} pendingDocuments={PendingDocuments} pendingMappings={PendingMappings}",
|
||||
fetchResult.SnapshotDate,
|
||||
fetchResult.SourceUri,
|
||||
record.Id,
|
||||
pendingDocuments.Count,
|
||||
pendingMappings.Count);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.FetchFailure();
|
||||
_logger.LogError(ex, "EPSS fetch failed for {BaseUri}", _options.BaseUri);
|
||||
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingDocuments.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var remainingDocuments = cursor.PendingDocuments.ToList();
|
||||
var pendingMappings = cursor.PendingMappings.ToHashSet();
|
||||
var cursorState = cursor;
|
||||
|
||||
foreach (var documentId in cursor.PendingDocuments)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
if (document is null)
|
||||
{
|
||||
remainingDocuments.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!document.PayloadId.HasValue)
|
||||
{
|
||||
_diagnostics.ParseFailure("missing_payload");
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
remainingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] payload;
|
||||
try
|
||||
{
|
||||
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.ParseFailure("download");
|
||||
_logger.LogError(ex, "EPSS parse failed downloading document {DocumentId}", document.Id);
|
||||
throw;
|
||||
}
|
||||
|
||||
EpssCsvStreamParser.EpssCsvParseSession session;
|
||||
try
|
||||
{
|
||||
await using var stream = new MemoryStream(payload, writable: false);
|
||||
await using var parseSession = _parser.ParseGzip(stream);
|
||||
session = parseSession;
|
||||
await foreach (var _ in parseSession.WithCancellation(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.ParseFailure("parse");
|
||||
_logger.LogWarning(ex, "EPSS parse failed for document {DocumentId}", document.Id);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
remainingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var publishedDate = session.PublishedDate ?? TryParseDateFromMetadata(document.Metadata) ?? DateOnly.FromDateTime(document.CreatedAt.UtcDateTime);
|
||||
var modelVersion = string.IsNullOrWhiteSpace(session.ModelVersionTag) ? "unknown" : session.ModelVersionTag!;
|
||||
var contentHash = session.DecompressedSha256 ?? string.Empty;
|
||||
|
||||
var payloadDoc = new DocumentObject
|
||||
{
|
||||
["modelVersion"] = modelVersion,
|
||||
["publishedDate"] = publishedDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture),
|
||||
["rowCount"] = session.RowCount,
|
||||
["contentHash"] = contentHash
|
||||
};
|
||||
|
||||
var dtoRecord = new DtoRecord(
|
||||
Guid.NewGuid(),
|
||||
document.Id,
|
||||
SourceName,
|
||||
DtoSchemaVersion,
|
||||
payloadDoc,
|
||||
_timeProvider.GetUtcNow());
|
||||
|
||||
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var metadata = document.Metadata is null
|
||||
? new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
: new Dictionary<string, string>(document.Metadata, StringComparer.Ordinal);
|
||||
metadata["epss.modelVersion"] = modelVersion;
|
||||
metadata["epss.publishedDate"] = publishedDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture);
|
||||
metadata["epss.rowCount"] = session.RowCount.ToString(CultureInfo.InvariantCulture);
|
||||
metadata["epss.contentHash"] = contentHash;
|
||||
|
||||
var updatedDocument = document with { Metadata = metadata };
|
||||
await _documentStore.UpsertAsync(updatedDocument, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
remainingDocuments.Remove(documentId);
|
||||
pendingMappings.Add(documentId);
|
||||
|
||||
cursorState = cursorState.WithSnapshotMetadata(
|
||||
modelVersion,
|
||||
publishedDate,
|
||||
document.Etag,
|
||||
contentHash,
|
||||
session.RowCount,
|
||||
_timeProvider.GetUtcNow());
|
||||
|
||||
_diagnostics.ParseRows(session.RowCount, modelVersion);
|
||||
}
|
||||
|
||||
var updatedCursor = cursorState
|
||||
.WithPendingDocuments(remainingDocuments)
|
||||
.WithPendingMappings(pendingMappings);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingMappings.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingMappings = cursor.PendingMappings.ToList();
|
||||
var cursorState = cursor;
|
||||
|
||||
foreach (var documentId in cursor.PendingMappings)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
if (dtoRecord is null || document is null)
|
||||
{
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var modelVersion = TryGetString(dtoRecord.Payload, "modelVersion") ?? "unknown";
|
||||
var publishedDate = TryGetDate(dtoRecord.Payload, "publishedDate")
|
||||
?? TryParseDateFromMetadata(document.Metadata)
|
||||
?? DateOnly.FromDateTime(document.CreatedAt.UtcDateTime);
|
||||
|
||||
if (!document.PayloadId.HasValue)
|
||||
{
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] payload;
|
||||
try
|
||||
{
|
||||
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "EPSS map failed downloading document {DocumentId}", document.Id);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
int mappedRows = 0;
|
||||
try
|
||||
{
|
||||
await using var stream = new MemoryStream(payload, writable: false);
|
||||
await using var session = _parser.ParseGzip(stream);
|
||||
await foreach (var row in session.WithCancellation(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
_ = EpssMapper.ToObservation(row, modelVersion, publishedDate);
|
||||
mappedRows++;
|
||||
}
|
||||
|
||||
cursorState = cursorState.WithSnapshotMetadata(
|
||||
modelVersion,
|
||||
publishedDate,
|
||||
document.Etag,
|
||||
TryGetString(dtoRecord.Payload, "contentHash"),
|
||||
mappedRows,
|
||||
_timeProvider.GetUtcNow());
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "EPSS map failed for document {DocumentId}", document.Id);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
continue;
|
||||
}
|
||||
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
_diagnostics.MapRows(mappedRows, modelVersion);
|
||||
}
|
||||
|
||||
var updatedCursor = cursorState.WithPendingMappings(pendingMappings);
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task<EpssFetchResult?> TryFetchFromHttpAsync(
|
||||
DateOnly snapshotDate,
|
||||
EpssCursor cursor,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var fileName = GetSnapshotFileName(snapshotDate);
|
||||
var uri = new Uri(_options.BaseUri, fileName);
|
||||
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, uri.ToString(), cancellationToken).ConfigureAwait(false);
|
||||
var etag = existing?.Etag ?? cursor.ETag;
|
||||
var lastModified = existing?.LastModified;
|
||||
|
||||
var client = _httpClientFactory.CreateClient(EpssOptions.HttpClientName);
|
||||
client.Timeout = _options.HttpTimeout;
|
||||
|
||||
HttpResponseMessage response;
|
||||
try
|
||||
{
|
||||
response = await SendWithRetryAsync(() => CreateRequest(uri, etag, lastModified), client, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.NotFound)
|
||||
{
|
||||
response.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.NotModified)
|
||||
{
|
||||
var notModified = new EpssFetchResult(
|
||||
SnapshotDate: snapshotDate,
|
||||
SourceUri: uri.ToString(),
|
||||
IsSuccess: false,
|
||||
IsNotModified: true,
|
||||
Content: null,
|
||||
ContentType: response.Content.Headers.ContentType?.ToString(),
|
||||
ETag: response.Headers.ETag?.Tag ?? etag,
|
||||
LastModified: response.Content.Headers.LastModified);
|
||||
response.Dispose();
|
||||
return notModified;
|
||||
}
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var bytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false);
|
||||
var result = new EpssFetchResult(
|
||||
SnapshotDate: snapshotDate,
|
||||
SourceUri: uri.ToString(),
|
||||
IsSuccess: true,
|
||||
IsNotModified: false,
|
||||
Content: bytes,
|
||||
ContentType: response.Content.Headers.ContentType?.ToString(),
|
||||
ETag: response.Headers.ETag?.Tag ?? etag,
|
||||
LastModified: response.Content.Headers.LastModified);
|
||||
response.Dispose();
|
||||
return result;
|
||||
}
|
||||
|
||||
private async Task<EpssFetchResult?> TryFetchFromBundleAsync(DateOnly snapshotDate, CancellationToken cancellationToken)
|
||||
{
|
||||
var fileName = GetSnapshotFileName(snapshotDate);
|
||||
var bundlePath = ResolveBundlePath(_options.BundlePath, fileName);
|
||||
|
||||
if (bundlePath is null || !File.Exists(bundlePath))
|
||||
{
|
||||
_logger.LogWarning("EPSS bundle file not found: {Path}", bundlePath ?? fileName);
|
||||
return null;
|
||||
}
|
||||
|
||||
var bytes = await File.ReadAllBytesAsync(bundlePath, cancellationToken).ConfigureAwait(false);
|
||||
return new EpssFetchResult(
|
||||
SnapshotDate: snapshotDate,
|
||||
SourceUri: $"bundle://{Path.GetFileName(bundlePath)}",
|
||||
IsSuccess: true,
|
||||
IsNotModified: false,
|
||||
Content: bytes,
|
||||
ContentType: "application/gzip",
|
||||
ETag: null,
|
||||
LastModified: new DateTimeOffset(File.GetLastWriteTimeUtc(bundlePath)));
|
||||
}
|
||||
|
||||
private async Task<DocumentRecord> StoreSnapshotAsync(
|
||||
EpssFetchResult fetchResult,
|
||||
DateTimeOffset fetchedAt,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var sha256 = _hash.ComputeHashHex(fetchResult.Content, HashAlgorithms.Sha256);
|
||||
|
||||
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["epss.date"] = fetchResult.SnapshotDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture),
|
||||
["epss.file"] = GetSnapshotFileName(fetchResult.SnapshotDate)
|
||||
};
|
||||
|
||||
if (_options.AirgapMode)
|
||||
{
|
||||
TryApplyBundleManifest(fetchResult.SnapshotDate, fetchResult.Content, metadata);
|
||||
}
|
||||
|
||||
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, fetchResult.SourceUri, cancellationToken).ConfigureAwait(false);
|
||||
var recordId = existing?.Id ?? Guid.NewGuid();
|
||||
|
||||
await _rawDocumentStorage.UploadAsync(
|
||||
SourceName,
|
||||
fetchResult.SourceUri,
|
||||
fetchResult.Content,
|
||||
fetchResult.ContentType,
|
||||
ExpiresAt: null,
|
||||
cancellationToken,
|
||||
recordId).ConfigureAwait(false);
|
||||
|
||||
var record = new DocumentRecord(
|
||||
recordId,
|
||||
SourceName,
|
||||
fetchResult.SourceUri,
|
||||
fetchedAt,
|
||||
sha256,
|
||||
DocumentStatuses.PendingParse,
|
||||
fetchResult.ContentType,
|
||||
Headers: null,
|
||||
Metadata: metadata,
|
||||
Etag: fetchResult.ETag,
|
||||
LastModified: fetchResult.LastModified,
|
||||
PayloadId: recordId,
|
||||
ExpiresAt: null,
|
||||
Payload: fetchResult.Content,
|
||||
FetchedAt: fetchedAt);
|
||||
|
||||
return await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private void TryApplyBundleManifest(DateOnly snapshotDate, byte[] content, IDictionary<string, string> metadata)
|
||||
{
|
||||
var bundlePath = _options.BundlePath;
|
||||
if (string.IsNullOrWhiteSpace(bundlePath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var manifestPath = ResolveBundleManifestPath(bundlePath);
|
||||
if (manifestPath is null || !File.Exists(manifestPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var entry = TryReadBundleManifestEntry(manifestPath, GetSnapshotFileName(snapshotDate));
|
||||
if (entry is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(entry.ModelVersion))
|
||||
{
|
||||
metadata["epss.manifest.modelVersion"] = entry.ModelVersion!;
|
||||
}
|
||||
|
||||
if (entry.RowCount.HasValue)
|
||||
{
|
||||
metadata["epss.manifest.rowCount"] = entry.RowCount.Value.ToString(CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(entry.Sha256))
|
||||
{
|
||||
var actual = _hash.ComputeHashHex(content, HashAlgorithms.Sha256);
|
||||
var expected = entry.Sha256!.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? entry.Sha256![7..]
|
||||
: entry.Sha256!;
|
||||
|
||||
metadata["epss.manifest.sha256"] = entry.Sha256!;
|
||||
|
||||
if (!string.Equals(actual, expected, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
_logger.LogWarning("EPSS bundle hash mismatch: expected {Expected}, actual {Actual}", entry.Sha256, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "EPSS bundle manifest parsing failed for {Path}", manifestPath);
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ResolveBundlePath(string? bundlePath, string fileName)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(bundlePath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Directory.Exists(bundlePath))
|
||||
{
|
||||
return Path.Combine(bundlePath, fileName);
|
||||
}
|
||||
|
||||
return bundlePath;
|
||||
}
|
||||
|
||||
private static string? ResolveBundleManifestPath(string bundlePath)
|
||||
{
|
||||
if (Directory.Exists(bundlePath))
|
||||
{
|
||||
return Path.Combine(bundlePath, ManifestFileName);
|
||||
}
|
||||
|
||||
var directory = Path.GetDirectoryName(bundlePath);
|
||||
if (string.IsNullOrWhiteSpace(directory))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return Path.Combine(directory, ManifestFileName);
|
||||
}
|
||||
|
||||
private static BundleManifestEntry? TryReadBundleManifestEntry(string manifestPath, string fileName)
|
||||
{
|
||||
using var stream = File.OpenRead(manifestPath);
|
||||
using var doc = JsonDocument.Parse(stream);
|
||||
if (!doc.RootElement.TryGetProperty("files", out var files) || files.ValueKind != JsonValueKind.Array)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
foreach (var entry in files.EnumerateArray())
|
||||
{
|
||||
if (!entry.TryGetProperty("name", out var nameValue))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var name = nameValue.GetString();
|
||||
if (string.IsNullOrWhiteSpace(name) || !string.Equals(name, fileName, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var modelVersion = entry.TryGetProperty("modelVersion", out var modelValue) ? modelValue.GetString() : null;
|
||||
var sha256 = entry.TryGetProperty("sha256", out var shaValue) ? shaValue.GetString() : null;
|
||||
var rowCount = entry.TryGetProperty("rowCount", out var rowValue) && rowValue.TryGetInt32(out var parsed)
|
||||
? parsed
|
||||
: (int?)null;
|
||||
|
||||
return new BundleManifestEntry(name, modelVersion, sha256, rowCount);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private IEnumerable<DateOnly> GetCandidateDates(EpssCursor cursor, DateOnly nowDate)
|
||||
{
|
||||
var startDate = _options.FetchCurrent
|
||||
? nowDate
|
||||
: cursor.LastProcessedDate?.AddDays(1) ?? nowDate.AddDays(-Math.Max(0, _options.CatchUpDays));
|
||||
|
||||
if (startDate > nowDate)
|
||||
{
|
||||
startDate = nowDate;
|
||||
}
|
||||
|
||||
var maxBackfill = Math.Max(0, _options.CatchUpDays);
|
||||
for (var i = 0; i <= maxBackfill; i++)
|
||||
{
|
||||
yield return startDate.AddDays(-i);
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetSnapshotFileName(DateOnly date)
|
||||
=> $"epss_scores-{date:yyyy-MM-dd}.csv.gz";
|
||||
|
||||
private static HttpRequestMessage CreateRequest(Uri uri, string? etag, DateTimeOffset? lastModified)
|
||||
{
|
||||
var request = new HttpRequestMessage(HttpMethod.Get, uri);
|
||||
request.Headers.Accept.Clear();
|
||||
foreach (var acceptType in AcceptTypes)
|
||||
{
|
||||
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(acceptType));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(etag) && EntityTagHeaderValue.TryParse(etag, out var etagHeader))
|
||||
{
|
||||
request.Headers.IfNoneMatch.Add(etagHeader);
|
||||
}
|
||||
|
||||
if (lastModified.HasValue)
|
||||
{
|
||||
request.Headers.IfModifiedSince = lastModified.Value;
|
||||
}
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
private async Task<HttpResponseMessage> SendWithRetryAsync(
|
||||
Func<HttpRequestMessage> requestFactory,
|
||||
HttpClient client,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var maxAttempts = Math.Max(1, _options.MaxRetries + 1);
|
||||
|
||||
for (var attempt = 1; attempt <= maxAttempts; attempt++)
|
||||
{
|
||||
using var request = requestFactory();
|
||||
try
|
||||
{
|
||||
var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
|
||||
if (ShouldRetry(response) && attempt < maxAttempts)
|
||||
{
|
||||
response.Dispose();
|
||||
await Task.Delay(GetRetryDelay(attempt), cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
catch (Exception ex) when (attempt < maxAttempts && ex is HttpRequestException or TaskCanceledException)
|
||||
{
|
||||
await Task.Delay(GetRetryDelay(attempt), cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
throw new HttpRequestException("EPSS fetch exceeded retry attempts.");
|
||||
}
|
||||
|
||||
private static bool ShouldRetry(HttpResponseMessage response)
|
||||
{
|
||||
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
var status = (int)response.StatusCode;
|
||||
return status >= 500 && status < 600;
|
||||
}
|
||||
|
||||
private static TimeSpan GetRetryDelay(int attempt)
|
||||
{
|
||||
var seconds = Math.Min(30, Math.Pow(2, attempt - 1));
|
||||
return TimeSpan.FromSeconds(seconds);
|
||||
}
|
||||
|
||||
private static string? TryGetString(DocumentObject payload, string key)
|
||||
=> payload.TryGetValue(key, out var value) ? value.AsString : null;
|
||||
|
||||
private static DateOnly? TryGetDate(DocumentObject payload, string key)
|
||||
{
|
||||
if (!payload.TryGetValue(key, out var value))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (value.DocumentType == DocumentType.DateTime)
|
||||
{
|
||||
return DateOnly.FromDateTime(value.ToUniversalTime());
|
||||
}
|
||||
|
||||
if (value.DocumentType == DocumentType.String &&
|
||||
DateOnly.TryParseExact(value.AsString, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsed))
|
||||
{
|
||||
return parsed;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static DateOnly? TryParseDateFromMetadata(IReadOnlyDictionary<string, string>? metadata)
|
||||
{
|
||||
if (metadata is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!metadata.TryGetValue("epss.date", out var value) || string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return DateOnly.TryParseExact(value, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsed)
|
||||
? parsed
|
||||
: null;
|
||||
}
|
||||
|
||||
private async Task<EpssCursor> GetCursorAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
|
||||
return state is null ? EpssCursor.Empty : EpssCursor.FromDocument(state.Cursor);
|
||||
}
|
||||
|
||||
private Task UpdateCursorAsync(EpssCursor cursor, CancellationToken cancellationToken)
|
||||
{
|
||||
var document = cursor.ToDocumentObject();
|
||||
return _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken);
|
||||
}
|
||||
|
||||
private sealed record EpssFetchResult(
|
||||
DateOnly SnapshotDate,
|
||||
string SourceUri,
|
||||
bool IsSuccess,
|
||||
bool IsNotModified,
|
||||
byte[]? Content,
|
||||
string? ContentType,
|
||||
string? ETag,
|
||||
DateTimeOffset? LastModified);
|
||||
|
||||
private sealed record BundleManifestEntry(
|
||||
string Name,
|
||||
string? ModelVersion,
|
||||
string? Sha256,
|
||||
int? RowCount);
|
||||
}
|
||||
@@ -0,0 +1,164 @@
|
||||
using System.Globalization;
|
||||
using StellaOps.Concelier.Documents;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss.Internal;
|
||||
|
||||
internal sealed record EpssCursor(
|
||||
string? ModelVersion,
|
||||
DateOnly? LastProcessedDate,
|
||||
string? ETag,
|
||||
string? ContentHash,
|
||||
int? LastRowCount,
|
||||
DateTimeOffset UpdatedAt,
|
||||
IReadOnlyCollection<Guid> PendingDocuments,
|
||||
IReadOnlyCollection<Guid> PendingMappings)
|
||||
{
|
||||
private static readonly IReadOnlyCollection<Guid> EmptyGuidCollection = Array.Empty<Guid>();
|
||||
|
||||
public static EpssCursor Empty { get; } = new(
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
DateTimeOffset.MinValue,
|
||||
EmptyGuidCollection,
|
||||
EmptyGuidCollection);
|
||||
|
||||
public DocumentObject ToDocumentObject()
|
||||
{
|
||||
var document = new DocumentObject
|
||||
{
|
||||
["pendingDocuments"] = new DocumentArray(PendingDocuments.Select(id => id.ToString())),
|
||||
["pendingMappings"] = new DocumentArray(PendingMappings.Select(id => id.ToString()))
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(ModelVersion))
|
||||
{
|
||||
document["modelVersion"] = ModelVersion;
|
||||
}
|
||||
|
||||
if (LastProcessedDate.HasValue)
|
||||
{
|
||||
document["lastProcessedDate"] = LastProcessedDate.Value.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(ETag))
|
||||
{
|
||||
document["etag"] = ETag;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(ContentHash))
|
||||
{
|
||||
document["contentHash"] = ContentHash;
|
||||
}
|
||||
|
||||
if (LastRowCount.HasValue)
|
||||
{
|
||||
document["lastRowCount"] = LastRowCount.Value;
|
||||
}
|
||||
|
||||
if (UpdatedAt > DateTimeOffset.MinValue)
|
||||
{
|
||||
document["updatedAt"] = UpdatedAt.UtcDateTime;
|
||||
}
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
public static EpssCursor FromDocument(DocumentObject? document)
|
||||
{
|
||||
if (document is null || document.ElementCount == 0)
|
||||
{
|
||||
return Empty;
|
||||
}
|
||||
|
||||
var modelVersion = document.TryGetValue("modelVersion", out var modelValue) ? modelValue.AsString : null;
|
||||
|
||||
DateOnly? lastProcessed = null;
|
||||
if (document.TryGetValue("lastProcessedDate", out var lastProcessedValue))
|
||||
{
|
||||
lastProcessed = lastProcessedValue.DocumentType switch
|
||||
{
|
||||
DocumentType.String when DateOnly.TryParseExact(lastProcessedValue.AsString, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsed) => parsed,
|
||||
DocumentType.DateTime => DateOnly.FromDateTime(lastProcessedValue.ToUniversalTime()),
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
|
||||
var etag = document.TryGetValue("etag", out var etagValue) ? etagValue.AsString : null;
|
||||
var contentHash = document.TryGetValue("contentHash", out var hashValue) ? hashValue.AsString : null;
|
||||
|
||||
int? lastRowCount = null;
|
||||
if (document.TryGetValue("lastRowCount", out var countValue))
|
||||
{
|
||||
var count = countValue.AsInt32;
|
||||
if (count > 0)
|
||||
{
|
||||
lastRowCount = count;
|
||||
}
|
||||
}
|
||||
|
||||
DateTimeOffset updatedAt = DateTimeOffset.MinValue;
|
||||
if (document.TryGetValue("updatedAt", out var updatedValue))
|
||||
{
|
||||
var parsed = updatedValue.AsDateTimeOffset;
|
||||
if (parsed > DateTimeOffset.MinValue)
|
||||
{
|
||||
updatedAt = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return new EpssCursor(
|
||||
string.IsNullOrWhiteSpace(modelVersion) ? null : modelVersion.Trim(),
|
||||
lastProcessed,
|
||||
string.IsNullOrWhiteSpace(etag) ? null : etag.Trim(),
|
||||
string.IsNullOrWhiteSpace(contentHash) ? null : contentHash.Trim(),
|
||||
lastRowCount,
|
||||
updatedAt,
|
||||
ReadGuidArray(document, "pendingDocuments"),
|
||||
ReadGuidArray(document, "pendingMappings"));
|
||||
}
|
||||
|
||||
public EpssCursor WithPendingDocuments(IEnumerable<Guid> documents)
|
||||
=> this with { PendingDocuments = documents?.Distinct().ToArray() ?? EmptyGuidCollection };
|
||||
|
||||
public EpssCursor WithPendingMappings(IEnumerable<Guid> mappings)
|
||||
=> this with { PendingMappings = mappings?.Distinct().ToArray() ?? EmptyGuidCollection };
|
||||
|
||||
public EpssCursor WithSnapshotMetadata(
|
||||
string? modelVersion,
|
||||
DateOnly? publishedDate,
|
||||
string? etag,
|
||||
string? contentHash,
|
||||
int? rowCount,
|
||||
DateTimeOffset updatedAt)
|
||||
=> this with
|
||||
{
|
||||
ModelVersion = string.IsNullOrWhiteSpace(modelVersion) ? null : modelVersion.Trim(),
|
||||
LastProcessedDate = publishedDate,
|
||||
ETag = string.IsNullOrWhiteSpace(etag) ? null : etag.Trim(),
|
||||
ContentHash = string.IsNullOrWhiteSpace(contentHash) ? null : contentHash.Trim(),
|
||||
LastRowCount = rowCount > 0 ? rowCount : null,
|
||||
UpdatedAt = updatedAt
|
||||
};
|
||||
|
||||
private static IReadOnlyCollection<Guid> ReadGuidArray(DocumentObject document, string key)
|
||||
{
|
||||
if (!document.TryGetValue(key, out var value) || value is not DocumentArray array)
|
||||
{
|
||||
return EmptyGuidCollection;
|
||||
}
|
||||
|
||||
var results = new List<Guid>(array.Count);
|
||||
foreach (var element in array)
|
||||
{
|
||||
if (Guid.TryParse(element.ToString(), out var guid))
|
||||
{
|
||||
results.Add(guid);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss.Internal;
|
||||
|
||||
public sealed class EpssDiagnostics : IDisposable
|
||||
{
|
||||
public const string MeterName = "StellaOps.Concelier.Connector.Epss";
|
||||
private const string MeterVersion = "1.0.0";
|
||||
|
||||
private readonly Meter _meter;
|
||||
private readonly Counter<long> _fetchAttempts;
|
||||
private readonly Counter<long> _fetchSuccess;
|
||||
private readonly Counter<long> _fetchFailures;
|
||||
private readonly Counter<long> _fetchUnchanged;
|
||||
private readonly Counter<long> _parsedRows;
|
||||
private readonly Counter<long> _parseFailures;
|
||||
private readonly Counter<long> _mappedRows;
|
||||
|
||||
public EpssDiagnostics()
|
||||
{
|
||||
_meter = new Meter(MeterName, MeterVersion);
|
||||
_fetchAttempts = _meter.CreateCounter<long>(
|
||||
name: "epss.fetch.attempts",
|
||||
unit: "operations",
|
||||
description: "Number of EPSS fetch attempts performed.");
|
||||
_fetchSuccess = _meter.CreateCounter<long>(
|
||||
name: "epss.fetch.success",
|
||||
unit: "operations",
|
||||
description: "Number of EPSS fetch attempts that produced new content.");
|
||||
_fetchFailures = _meter.CreateCounter<long>(
|
||||
name: "epss.fetch.failures",
|
||||
unit: "operations",
|
||||
description: "Number of EPSS fetch attempts that failed.");
|
||||
_fetchUnchanged = _meter.CreateCounter<long>(
|
||||
name: "epss.fetch.unchanged",
|
||||
unit: "operations",
|
||||
description: "Number of EPSS fetch attempts returning unchanged content.");
|
||||
_parsedRows = _meter.CreateCounter<long>(
|
||||
name: "epss.parse.rows",
|
||||
unit: "rows",
|
||||
description: "Number of EPSS rows parsed from snapshots.");
|
||||
_parseFailures = _meter.CreateCounter<long>(
|
||||
name: "epss.parse.failures",
|
||||
unit: "documents",
|
||||
description: "Number of EPSS snapshot parse failures.");
|
||||
_mappedRows = _meter.CreateCounter<long>(
|
||||
name: "epss.map.rows",
|
||||
unit: "rows",
|
||||
description: "Number of EPSS rows mapped into observations.");
|
||||
}
|
||||
|
||||
public void FetchAttempt() => _fetchAttempts.Add(1);
|
||||
|
||||
public void FetchSuccess() => _fetchSuccess.Add(1);
|
||||
|
||||
public void FetchFailure() => _fetchFailures.Add(1);
|
||||
|
||||
public void FetchUnchanged() => _fetchUnchanged.Add(1);
|
||||
|
||||
public void ParseRows(int rowCount, string? modelVersion)
|
||||
{
|
||||
if (rowCount <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_parsedRows.Add(rowCount, new KeyValuePair<string, object?>("modelVersion", modelVersion ?? string.Empty));
|
||||
}
|
||||
|
||||
public void ParseFailure(string reason)
|
||||
=> _parseFailures.Add(1, new KeyValuePair<string, object?>("reason", reason));
|
||||
|
||||
public void MapRows(int rowCount, string? modelVersion)
|
||||
{
|
||||
if (rowCount <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_mappedRows.Add(rowCount, new KeyValuePair<string, object?>("modelVersion", modelVersion ?? string.Empty));
|
||||
}
|
||||
|
||||
public void Dispose() => _meter.Dispose();
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
using StellaOps.Scanner.Storage.Epss;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss.Internal;
|
||||
|
||||
public static class EpssMapper
|
||||
{
|
||||
public static EpssObservation ToObservation(
|
||||
EpssScoreRow row,
|
||||
string modelVersion,
|
||||
DateOnly publishedDate)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(modelVersion))
|
||||
{
|
||||
throw new ArgumentException("Model version is required.", nameof(modelVersion));
|
||||
}
|
||||
|
||||
return new EpssObservation
|
||||
{
|
||||
CveId = row.CveId,
|
||||
Score = (decimal)row.Score,
|
||||
Percentile = (decimal)row.Percentile,
|
||||
ModelVersion = modelVersion,
|
||||
PublishedDate = publishedDate,
|
||||
Band = DetermineBand((decimal)row.Score)
|
||||
};
|
||||
}
|
||||
|
||||
private static EpssBand DetermineBand(decimal score) => score switch
|
||||
{
|
||||
>= 0.70m => EpssBand.Critical,
|
||||
>= 0.40m => EpssBand.High,
|
||||
>= 0.10m => EpssBand.Medium,
|
||||
_ => EpssBand.Low
|
||||
};
|
||||
}
|
||||
|
||||
public sealed record EpssObservation
|
||||
{
|
||||
public required string CveId { get; init; }
|
||||
public required decimal Score { get; init; }
|
||||
public required decimal Percentile { get; init; }
|
||||
public required string ModelVersion { get; init; }
|
||||
public required DateOnly PublishedDate { get; init; }
|
||||
public required EpssBand Band { get; init; }
|
||||
}
|
||||
|
||||
public enum EpssBand
|
||||
{
|
||||
Low = 0,
|
||||
Medium = 1,
|
||||
High = 2,
|
||||
Critical = 3
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.Concelier.Core.Jobs;
|
||||
using StellaOps.Concelier.Connector.Epss.Internal;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Epss;
|
||||
|
||||
internal static class EpssJobKinds
|
||||
{
|
||||
public const string Fetch = "source:epss:fetch";
|
||||
public const string Parse = "source:epss:parse";
|
||||
public const string Map = "source:epss:map";
|
||||
}
|
||||
|
||||
internal sealed class EpssFetchJob : IJob
|
||||
{
|
||||
private readonly EpssConnector _connector;
|
||||
|
||||
public EpssFetchJob(EpssConnector connector)
|
||||
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
|
||||
|
||||
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
|
||||
=> _connector.FetchAsync(context.Services, cancellationToken);
|
||||
}
|
||||
|
||||
internal sealed class EpssParseJob : IJob
|
||||
{
|
||||
private readonly EpssConnector _connector;
|
||||
|
||||
public EpssParseJob(EpssConnector connector)
|
||||
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
|
||||
|
||||
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
|
||||
=> _connector.ParseAsync(context.Services, cancellationToken);
|
||||
}
|
||||
|
||||
internal sealed class EpssMapJob : IJob
|
||||
{
|
||||
private readonly EpssConnector _connector;
|
||||
|
||||
public EpssMapJob(EpssConnector connector)
|
||||
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
|
||||
|
||||
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
|
||||
=> _connector.MapAsync(context.Services, cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
using StellaOps.Plugin.Versioning;
|
||||
|
||||
[assembly: StellaPluginVersion("1.0.0", MinimumHostVersion = "1.0.0", MaximumHostVersion = "1.99.99")]
|
||||
@@ -0,0 +1,24 @@
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
|
||||
<ProjectReference Include="../../../__Libraries/StellaOps.Cryptography/StellaOps.Cryptography.csproj" />
|
||||
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
|
||||
<ProjectReference Include="../StellaOps.Concelier.Core/StellaOps.Concelier.Core.csproj" />
|
||||
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
|
||||
<ProjectReference Include="../../../Scanner/__Libraries/StellaOps.Scanner.Storage/StellaOps.Scanner.Storage.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
|
||||
<_Parameter1>StellaOps.Concelier.Connector.Epss.Tests</_Parameter1>
|
||||
</AssemblyAttribute>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user