Refactor code structure for improved readability and maintainability; optimize performance in key functions.

This commit is contained in:
master
2025-12-22 19:06:31 +02:00
parent dfaa2079aa
commit 4602ccc3a3
1444 changed files with 109919 additions and 8058 deletions

View File

@@ -0,0 +1,35 @@
# AGENTS.md - EPSS Connector
## Purpose
Ingests EPSS (Exploit Prediction Scoring System) scores from FIRST.org to provide exploitation probability signals for CVE prioritization.
## Data Source
- **URL**: https://epss.empiricalsecurity.com/
- **Format**: `epss_scores-YYYY-MM-DD.csv.gz` (gzip-compressed CSV)
- **Update cadence**: Daily snapshot (typically published ~08:00 UTC)
- **Offline bundle**: Directory or file path with optional `manifest.json`
## Data Flow
1. Fetch daily snapshot via HTTP or air-gapped bundle path.
2. Parse with `StellaOps.Scanner.Storage.Epss.EpssCsvStreamParser` for deterministic row counts and content hash.
3. Map rows to `EpssObservation` records with band classification (Low/Medium/High/Critical).
4. Store raw document + DTO metadata; mapping currently records counts and marks documents mapped.
## Configuration
```yaml
concelier:
sources:
epss:
baseUri: "https://epss.empiricalsecurity.com/"
fetchCurrent: true
catchUpDays: 7
httpTimeout: "00:02:00"
maxRetries: 3
airgapMode: false
bundlePath: "/var/stellaops/bundles/epss"
```
## Orchestrator Registration
- ConnectorId: `epss`
- Default Schedule: Daily 10:00 UTC
- Egress Allowlist: `epss.empiricalsecurity.com`

View File

@@ -0,0 +1,59 @@
using System.Diagnostics.CodeAnalysis;
namespace StellaOps.Concelier.Connector.Epss.Configuration;
public sealed class EpssOptions
{
public const string SectionName = "Concelier:Epss";
public const string HttpClientName = "source.epss";
public Uri BaseUri { get; set; } = new("https://epss.empiricalsecurity.com/", UriKind.Absolute);
public bool FetchCurrent { get; set; } = true;
public int CatchUpDays { get; set; } = 7;
public TimeSpan HttpTimeout { get; set; } = TimeSpan.FromMinutes(2);
public int MaxRetries { get; set; } = 3;
public bool AirgapMode { get; set; }
public string? BundlePath { get; set; }
public string UserAgent { get; set; } = "StellaOps.Concelier.Epss/1.0";
[MemberNotNull(nameof(BaseUri), nameof(UserAgent))]
public void Validate()
{
if (BaseUri is null || !BaseUri.IsAbsoluteUri)
{
throw new InvalidOperationException("BaseUri must be an absolute URI.");
}
if (CatchUpDays < 0)
{
throw new InvalidOperationException("CatchUpDays cannot be negative.");
}
if (HttpTimeout <= TimeSpan.Zero)
{
throw new InvalidOperationException("HttpTimeout must be greater than zero.");
}
if (MaxRetries < 0)
{
throw new InvalidOperationException("MaxRetries cannot be negative.");
}
if (string.IsNullOrWhiteSpace(UserAgent))
{
throw new InvalidOperationException("UserAgent must be provided.");
}
if (AirgapMode && string.IsNullOrWhiteSpace(BundlePath))
{
throw new InvalidOperationException("BundlePath must be provided when AirgapMode is enabled.");
}
}
}

View File

@@ -0,0 +1,24 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Concelier.Connector.Epss.Internal;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Epss;
/// <summary>
/// Plugin entry point for EPSS feed connector.
/// </summary>
public sealed class EpssConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "epss";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services)
=> services.GetService<EpssConnector>() is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return services.GetRequiredService<EpssConnector>();
}
}

View File

@@ -0,0 +1,54 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.Epss.Configuration;
using StellaOps.DependencyInjection;
namespace StellaOps.Concelier.Connector.Epss;
public sealed class EpssDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:epss";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddEpssConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<EpssFetchJob>();
services.AddTransient<EpssParseJob>();
services.AddTransient<EpssMapJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, EpssJobKinds.Fetch, typeof(EpssFetchJob));
EnsureJob(options, EpssJobKinds.Parse, typeof(EpssParseJob));
EnsureJob(options, EpssJobKinds.Map, typeof(EpssMapJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,40 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.Common.Http;
using StellaOps.Concelier.Connector.Epss.Configuration;
using StellaOps.Concelier.Connector.Epss.Internal;
namespace StellaOps.Concelier.Connector.Epss;
public static class EpssServiceCollectionExtensions
{
public static IServiceCollection AddEpssConnector(this IServiceCollection services, Action<EpssOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<EpssOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
services.AddSourceHttpClient(EpssOptions.HttpClientName, (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<EpssOptions>>().Value;
clientOptions.BaseAddress = options.BaseUri;
clientOptions.Timeout = options.HttpTimeout;
clientOptions.UserAgent = options.UserAgent;
clientOptions.MaxAttempts = Math.Max(1, options.MaxRetries + 1);
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.BaseUri.Host);
clientOptions.DefaultRequestHeaders["Accept"] = "application/gzip,application/octet-stream,application/x-gzip";
});
services.AddSingleton<EpssDiagnostics>();
services.AddTransient<EpssConnector>();
services.AddTransient<EpssFetchJob>();
services.AddTransient<EpssParseJob>();
services.AddTransient<EpssMapJob>();
return services;
}
}

View File

@@ -0,0 +1,778 @@
using System.Globalization;
using System.Net;
using System.Net.Http.Headers;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Epss.Configuration;
using StellaOps.Concelier.Documents;
using StellaOps.Concelier.Storage;
using StellaOps.Cryptography;
using StellaOps.Plugin;
using StellaOps.Scanner.Storage.Epss;
namespace StellaOps.Concelier.Connector.Epss.Internal;
public sealed class EpssConnector : IFeedConnector
{
private const string DtoSchemaVersion = "epss.snapshot.v1";
private const string ManifestFileName = "manifest.json";
private static readonly string[] AcceptTypes = { "application/gzip", "application/octet-stream", "application/x-gzip" };
private readonly IHttpClientFactory _httpClientFactory;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly ISourceStateRepository _stateRepository;
private readonly EpssOptions _options;
private readonly EpssDiagnostics _diagnostics;
private readonly ICryptoHash _hash;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EpssConnector> _logger;
private readonly EpssCsvStreamParser _parser = new();
public EpssConnector(
IHttpClientFactory httpClientFactory,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
ISourceStateRepository stateRepository,
IOptions<EpssOptions> options,
EpssDiagnostics diagnostics,
ICryptoHash hash,
TimeProvider? timeProvider,
ILogger<EpssConnector> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_hash = hash ?? throw new ArgumentNullException(nameof(hash));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => EpssConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var now = _timeProvider.GetUtcNow();
var nowDate = DateOnly.FromDateTime(now.UtcDateTime);
var candidates = GetCandidateDates(cursor, nowDate).ToArray();
if (candidates.Length == 0)
{
return;
}
_diagnostics.FetchAttempt();
EpssFetchResult? fetchResult = null;
try
{
foreach (var date in candidates)
{
cancellationToken.ThrowIfCancellationRequested();
fetchResult = _options.AirgapMode
? await TryFetchFromBundleAsync(date, cancellationToken).ConfigureAwait(false)
: await TryFetchFromHttpAsync(date, cursor, cancellationToken).ConfigureAwait(false);
if (fetchResult is not null)
{
break;
}
}
if (fetchResult is null)
{
_logger.LogWarning("EPSS fetch: no snapshot found for {CandidateCount} candidate dates.", candidates.Length);
return;
}
if (fetchResult.IsNotModified)
{
_diagnostics.FetchUnchanged();
var unchangedCursor = cursor.WithSnapshotMetadata(
cursor.ModelVersion,
cursor.LastProcessedDate,
fetchResult.ETag ?? cursor.ETag,
cursor.ContentHash,
cursor.LastRowCount,
now);
await UpdateCursorAsync(unchangedCursor, cancellationToken).ConfigureAwait(false);
return;
}
if (!fetchResult.IsSuccess || fetchResult.Content is null)
{
_diagnostics.FetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), "EPSS fetch returned no content.", cancellationToken).ConfigureAwait(false);
return;
}
var record = await StoreSnapshotAsync(fetchResult, now, cancellationToken).ConfigureAwait(false);
pendingDocuments.Add(record.Id);
pendingMappings.Remove(record.Id);
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithSnapshotMetadata(
cursor.ModelVersion,
cursor.LastProcessedDate,
fetchResult.ETag,
cursor.ContentHash,
cursor.LastRowCount,
now);
_diagnostics.FetchSuccess();
_logger.LogInformation(
"Fetched EPSS snapshot {SnapshotDate} ({Uri}) document {DocumentId} pendingDocuments={PendingDocuments} pendingMappings={PendingMappings}",
fetchResult.SnapshotDate,
fetchResult.SourceUri,
record.Id,
pendingDocuments.Count,
pendingMappings.Count);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "EPSS fetch failed for {BaseUri}", _options.BaseUri);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var cursorState = cursor;
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.PayloadId.HasValue)
{
_diagnostics.ParseFailure("missing_payload");
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure("download");
_logger.LogError(ex, "EPSS parse failed downloading document {DocumentId}", document.Id);
throw;
}
EpssCsvStreamParser.EpssCsvParseSession session;
try
{
await using var stream = new MemoryStream(payload, writable: false);
await using var parseSession = _parser.ParseGzip(stream);
session = parseSession;
await foreach (var _ in parseSession.WithCancellation(cancellationToken).ConfigureAwait(false))
{
}
}
catch (Exception ex)
{
_diagnostics.ParseFailure("parse");
_logger.LogWarning(ex, "EPSS parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var publishedDate = session.PublishedDate ?? TryParseDateFromMetadata(document.Metadata) ?? DateOnly.FromDateTime(document.CreatedAt.UtcDateTime);
var modelVersion = string.IsNullOrWhiteSpace(session.ModelVersionTag) ? "unknown" : session.ModelVersionTag!;
var contentHash = session.DecompressedSha256 ?? string.Empty;
var payloadDoc = new DocumentObject
{
["modelVersion"] = modelVersion,
["publishedDate"] = publishedDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture),
["rowCount"] = session.RowCount,
["contentHash"] = contentHash
};
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
DtoSchemaVersion,
payloadDoc,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
var metadata = document.Metadata is null
? new Dictionary<string, string>(StringComparer.Ordinal)
: new Dictionary<string, string>(document.Metadata, StringComparer.Ordinal);
metadata["epss.modelVersion"] = modelVersion;
metadata["epss.publishedDate"] = publishedDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture);
metadata["epss.rowCount"] = session.RowCount.ToString(CultureInfo.InvariantCulture);
metadata["epss.contentHash"] = contentHash;
var updatedDocument = document with { Metadata = metadata };
await _documentStore.UpsertAsync(updatedDocument, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(documentId);
cursorState = cursorState.WithSnapshotMetadata(
modelVersion,
publishedDate,
document.Etag,
contentHash,
session.RowCount,
_timeProvider.GetUtcNow());
_diagnostics.ParseRows(session.RowCount, modelVersion);
}
var updatedCursor = cursorState
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
var cursorState = cursor;
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var modelVersion = TryGetString(dtoRecord.Payload, "modelVersion") ?? "unknown";
var publishedDate = TryGetDate(dtoRecord.Payload, "publishedDate")
?? TryParseDateFromMetadata(document.Metadata)
?? DateOnly.FromDateTime(document.CreatedAt.UtcDateTime);
if (!document.PayloadId.HasValue)
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.PayloadId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "EPSS map failed downloading document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
int mappedRows = 0;
try
{
await using var stream = new MemoryStream(payload, writable: false);
await using var session = _parser.ParseGzip(stream);
await foreach (var row in session.WithCancellation(cancellationToken).ConfigureAwait(false))
{
_ = EpssMapper.ToObservation(row, modelVersion, publishedDate);
mappedRows++;
}
cursorState = cursorState.WithSnapshotMetadata(
modelVersion,
publishedDate,
document.Etag,
TryGetString(dtoRecord.Payload, "contentHash"),
mappedRows,
_timeProvider.GetUtcNow());
}
catch (Exception ex)
{
_logger.LogWarning(ex, "EPSS map failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapRows(mappedRows, modelVersion);
}
var updatedCursor = cursorState.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<EpssFetchResult?> TryFetchFromHttpAsync(
DateOnly snapshotDate,
EpssCursor cursor,
CancellationToken cancellationToken)
{
var fileName = GetSnapshotFileName(snapshotDate);
var uri = new Uri(_options.BaseUri, fileName);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, uri.ToString(), cancellationToken).ConfigureAwait(false);
var etag = existing?.Etag ?? cursor.ETag;
var lastModified = existing?.LastModified;
var client = _httpClientFactory.CreateClient(EpssOptions.HttpClientName);
client.Timeout = _options.HttpTimeout;
HttpResponseMessage response;
try
{
response = await SendWithRetryAsync(() => CreateRequest(uri, etag, lastModified), client, cancellationToken).ConfigureAwait(false);
}
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
{
return null;
}
if (response.StatusCode == HttpStatusCode.NotFound)
{
response.Dispose();
return null;
}
if (response.StatusCode == HttpStatusCode.NotModified)
{
var notModified = new EpssFetchResult(
SnapshotDate: snapshotDate,
SourceUri: uri.ToString(),
IsSuccess: false,
IsNotModified: true,
Content: null,
ContentType: response.Content.Headers.ContentType?.ToString(),
ETag: response.Headers.ETag?.Tag ?? etag,
LastModified: response.Content.Headers.LastModified);
response.Dispose();
return notModified;
}
response.EnsureSuccessStatusCode();
var bytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false);
var result = new EpssFetchResult(
SnapshotDate: snapshotDate,
SourceUri: uri.ToString(),
IsSuccess: true,
IsNotModified: false,
Content: bytes,
ContentType: response.Content.Headers.ContentType?.ToString(),
ETag: response.Headers.ETag?.Tag ?? etag,
LastModified: response.Content.Headers.LastModified);
response.Dispose();
return result;
}
private async Task<EpssFetchResult?> TryFetchFromBundleAsync(DateOnly snapshotDate, CancellationToken cancellationToken)
{
var fileName = GetSnapshotFileName(snapshotDate);
var bundlePath = ResolveBundlePath(_options.BundlePath, fileName);
if (bundlePath is null || !File.Exists(bundlePath))
{
_logger.LogWarning("EPSS bundle file not found: {Path}", bundlePath ?? fileName);
return null;
}
var bytes = await File.ReadAllBytesAsync(bundlePath, cancellationToken).ConfigureAwait(false);
return new EpssFetchResult(
SnapshotDate: snapshotDate,
SourceUri: $"bundle://{Path.GetFileName(bundlePath)}",
IsSuccess: true,
IsNotModified: false,
Content: bytes,
ContentType: "application/gzip",
ETag: null,
LastModified: new DateTimeOffset(File.GetLastWriteTimeUtc(bundlePath)));
}
private async Task<DocumentRecord> StoreSnapshotAsync(
EpssFetchResult fetchResult,
DateTimeOffset fetchedAt,
CancellationToken cancellationToken)
{
var sha256 = _hash.ComputeHashHex(fetchResult.Content, HashAlgorithms.Sha256);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["epss.date"] = fetchResult.SnapshotDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture),
["epss.file"] = GetSnapshotFileName(fetchResult.SnapshotDate)
};
if (_options.AirgapMode)
{
TryApplyBundleManifest(fetchResult.SnapshotDate, fetchResult.Content, metadata);
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, fetchResult.SourceUri, cancellationToken).ConfigureAwait(false);
var recordId = existing?.Id ?? Guid.NewGuid();
await _rawDocumentStorage.UploadAsync(
SourceName,
fetchResult.SourceUri,
fetchResult.Content,
fetchResult.ContentType,
ExpiresAt: null,
cancellationToken,
recordId).ConfigureAwait(false);
var record = new DocumentRecord(
recordId,
SourceName,
fetchResult.SourceUri,
fetchedAt,
sha256,
DocumentStatuses.PendingParse,
fetchResult.ContentType,
Headers: null,
Metadata: metadata,
Etag: fetchResult.ETag,
LastModified: fetchResult.LastModified,
PayloadId: recordId,
ExpiresAt: null,
Payload: fetchResult.Content,
FetchedAt: fetchedAt);
return await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
}
private void TryApplyBundleManifest(DateOnly snapshotDate, byte[] content, IDictionary<string, string> metadata)
{
var bundlePath = _options.BundlePath;
if (string.IsNullOrWhiteSpace(bundlePath))
{
return;
}
var manifestPath = ResolveBundleManifestPath(bundlePath);
if (manifestPath is null || !File.Exists(manifestPath))
{
return;
}
try
{
var entry = TryReadBundleManifestEntry(manifestPath, GetSnapshotFileName(snapshotDate));
if (entry is null)
{
return;
}
if (!string.IsNullOrWhiteSpace(entry.ModelVersion))
{
metadata["epss.manifest.modelVersion"] = entry.ModelVersion!;
}
if (entry.RowCount.HasValue)
{
metadata["epss.manifest.rowCount"] = entry.RowCount.Value.ToString(CultureInfo.InvariantCulture);
}
if (!string.IsNullOrWhiteSpace(entry.Sha256))
{
var actual = _hash.ComputeHashHex(content, HashAlgorithms.Sha256);
var expected = entry.Sha256!.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? entry.Sha256![7..]
: entry.Sha256!;
metadata["epss.manifest.sha256"] = entry.Sha256!;
if (!string.Equals(actual, expected, StringComparison.OrdinalIgnoreCase))
{
_logger.LogWarning("EPSS bundle hash mismatch: expected {Expected}, actual {Actual}", entry.Sha256, actual);
}
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "EPSS bundle manifest parsing failed for {Path}", manifestPath);
}
}
private static string? ResolveBundlePath(string? bundlePath, string fileName)
{
if (string.IsNullOrWhiteSpace(bundlePath))
{
return null;
}
if (Directory.Exists(bundlePath))
{
return Path.Combine(bundlePath, fileName);
}
return bundlePath;
}
private static string? ResolveBundleManifestPath(string bundlePath)
{
if (Directory.Exists(bundlePath))
{
return Path.Combine(bundlePath, ManifestFileName);
}
var directory = Path.GetDirectoryName(bundlePath);
if (string.IsNullOrWhiteSpace(directory))
{
return null;
}
return Path.Combine(directory, ManifestFileName);
}
private static BundleManifestEntry? TryReadBundleManifestEntry(string manifestPath, string fileName)
{
using var stream = File.OpenRead(manifestPath);
using var doc = JsonDocument.Parse(stream);
if (!doc.RootElement.TryGetProperty("files", out var files) || files.ValueKind != JsonValueKind.Array)
{
return null;
}
foreach (var entry in files.EnumerateArray())
{
if (!entry.TryGetProperty("name", out var nameValue))
{
continue;
}
var name = nameValue.GetString();
if (string.IsNullOrWhiteSpace(name) || !string.Equals(name, fileName, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var modelVersion = entry.TryGetProperty("modelVersion", out var modelValue) ? modelValue.GetString() : null;
var sha256 = entry.TryGetProperty("sha256", out var shaValue) ? shaValue.GetString() : null;
var rowCount = entry.TryGetProperty("rowCount", out var rowValue) && rowValue.TryGetInt32(out var parsed)
? parsed
: (int?)null;
return new BundleManifestEntry(name, modelVersion, sha256, rowCount);
}
return null;
}
private IEnumerable<DateOnly> GetCandidateDates(EpssCursor cursor, DateOnly nowDate)
{
var startDate = _options.FetchCurrent
? nowDate
: cursor.LastProcessedDate?.AddDays(1) ?? nowDate.AddDays(-Math.Max(0, _options.CatchUpDays));
if (startDate > nowDate)
{
startDate = nowDate;
}
var maxBackfill = Math.Max(0, _options.CatchUpDays);
for (var i = 0; i <= maxBackfill; i++)
{
yield return startDate.AddDays(-i);
}
}
private static string GetSnapshotFileName(DateOnly date)
=> $"epss_scores-{date:yyyy-MM-dd}.csv.gz";
private static HttpRequestMessage CreateRequest(Uri uri, string? etag, DateTimeOffset? lastModified)
{
var request = new HttpRequestMessage(HttpMethod.Get, uri);
request.Headers.Accept.Clear();
foreach (var acceptType in AcceptTypes)
{
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(acceptType));
}
if (!string.IsNullOrWhiteSpace(etag) && EntityTagHeaderValue.TryParse(etag, out var etagHeader))
{
request.Headers.IfNoneMatch.Add(etagHeader);
}
if (lastModified.HasValue)
{
request.Headers.IfModifiedSince = lastModified.Value;
}
return request;
}
private async Task<HttpResponseMessage> SendWithRetryAsync(
Func<HttpRequestMessage> requestFactory,
HttpClient client,
CancellationToken cancellationToken)
{
var maxAttempts = Math.Max(1, _options.MaxRetries + 1);
for (var attempt = 1; attempt <= maxAttempts; attempt++)
{
using var request = requestFactory();
try
{
var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
if (ShouldRetry(response) && attempt < maxAttempts)
{
response.Dispose();
await Task.Delay(GetRetryDelay(attempt), cancellationToken).ConfigureAwait(false);
continue;
}
return response;
}
catch (Exception ex) when (attempt < maxAttempts && ex is HttpRequestException or TaskCanceledException)
{
await Task.Delay(GetRetryDelay(attempt), cancellationToken).ConfigureAwait(false);
}
}
throw new HttpRequestException("EPSS fetch exceeded retry attempts.");
}
private static bool ShouldRetry(HttpResponseMessage response)
{
if (response.StatusCode == HttpStatusCode.TooManyRequests)
{
return true;
}
var status = (int)response.StatusCode;
return status >= 500 && status < 600;
}
private static TimeSpan GetRetryDelay(int attempt)
{
var seconds = Math.Min(30, Math.Pow(2, attempt - 1));
return TimeSpan.FromSeconds(seconds);
}
private static string? TryGetString(DocumentObject payload, string key)
=> payload.TryGetValue(key, out var value) ? value.AsString : null;
private static DateOnly? TryGetDate(DocumentObject payload, string key)
{
if (!payload.TryGetValue(key, out var value))
{
return null;
}
if (value.DocumentType == DocumentType.DateTime)
{
return DateOnly.FromDateTime(value.ToUniversalTime());
}
if (value.DocumentType == DocumentType.String &&
DateOnly.TryParseExact(value.AsString, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsed))
{
return parsed;
}
return null;
}
private static DateOnly? TryParseDateFromMetadata(IReadOnlyDictionary<string, string>? metadata)
{
if (metadata is null)
{
return null;
}
if (!metadata.TryGetValue("epss.date", out var value) || string.IsNullOrWhiteSpace(value))
{
return null;
}
return DateOnly.TryParseExact(value, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsed)
? parsed
: null;
}
private async Task<EpssCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? EpssCursor.Empty : EpssCursor.FromDocument(state.Cursor);
}
private Task UpdateCursorAsync(EpssCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToDocumentObject();
return _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken);
}
private sealed record EpssFetchResult(
DateOnly SnapshotDate,
string SourceUri,
bool IsSuccess,
bool IsNotModified,
byte[]? Content,
string? ContentType,
string? ETag,
DateTimeOffset? LastModified);
private sealed record BundleManifestEntry(
string Name,
string? ModelVersion,
string? Sha256,
int? RowCount);
}

View File

@@ -0,0 +1,164 @@
using System.Globalization;
using StellaOps.Concelier.Documents;
namespace StellaOps.Concelier.Connector.Epss.Internal;
internal sealed record EpssCursor(
string? ModelVersion,
DateOnly? LastProcessedDate,
string? ETag,
string? ContentHash,
int? LastRowCount,
DateTimeOffset UpdatedAt,
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings)
{
private static readonly IReadOnlyCollection<Guid> EmptyGuidCollection = Array.Empty<Guid>();
public static EpssCursor Empty { get; } = new(
null,
null,
null,
null,
null,
DateTimeOffset.MinValue,
EmptyGuidCollection,
EmptyGuidCollection);
public DocumentObject ToDocumentObject()
{
var document = new DocumentObject
{
["pendingDocuments"] = new DocumentArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new DocumentArray(PendingMappings.Select(id => id.ToString()))
};
if (!string.IsNullOrWhiteSpace(ModelVersion))
{
document["modelVersion"] = ModelVersion;
}
if (LastProcessedDate.HasValue)
{
document["lastProcessedDate"] = LastProcessedDate.Value.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture);
}
if (!string.IsNullOrWhiteSpace(ETag))
{
document["etag"] = ETag;
}
if (!string.IsNullOrWhiteSpace(ContentHash))
{
document["contentHash"] = ContentHash;
}
if (LastRowCount.HasValue)
{
document["lastRowCount"] = LastRowCount.Value;
}
if (UpdatedAt > DateTimeOffset.MinValue)
{
document["updatedAt"] = UpdatedAt.UtcDateTime;
}
return document;
}
public static EpssCursor FromDocument(DocumentObject? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var modelVersion = document.TryGetValue("modelVersion", out var modelValue) ? modelValue.AsString : null;
DateOnly? lastProcessed = null;
if (document.TryGetValue("lastProcessedDate", out var lastProcessedValue))
{
lastProcessed = lastProcessedValue.DocumentType switch
{
DocumentType.String when DateOnly.TryParseExact(lastProcessedValue.AsString, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsed) => parsed,
DocumentType.DateTime => DateOnly.FromDateTime(lastProcessedValue.ToUniversalTime()),
_ => null
};
}
var etag = document.TryGetValue("etag", out var etagValue) ? etagValue.AsString : null;
var contentHash = document.TryGetValue("contentHash", out var hashValue) ? hashValue.AsString : null;
int? lastRowCount = null;
if (document.TryGetValue("lastRowCount", out var countValue))
{
var count = countValue.AsInt32;
if (count > 0)
{
lastRowCount = count;
}
}
DateTimeOffset updatedAt = DateTimeOffset.MinValue;
if (document.TryGetValue("updatedAt", out var updatedValue))
{
var parsed = updatedValue.AsDateTimeOffset;
if (parsed > DateTimeOffset.MinValue)
{
updatedAt = parsed;
}
}
return new EpssCursor(
string.IsNullOrWhiteSpace(modelVersion) ? null : modelVersion.Trim(),
lastProcessed,
string.IsNullOrWhiteSpace(etag) ? null : etag.Trim(),
string.IsNullOrWhiteSpace(contentHash) ? null : contentHash.Trim(),
lastRowCount,
updatedAt,
ReadGuidArray(document, "pendingDocuments"),
ReadGuidArray(document, "pendingMappings"));
}
public EpssCursor WithPendingDocuments(IEnumerable<Guid> documents)
=> this with { PendingDocuments = documents?.Distinct().ToArray() ?? EmptyGuidCollection };
public EpssCursor WithPendingMappings(IEnumerable<Guid> mappings)
=> this with { PendingMappings = mappings?.Distinct().ToArray() ?? EmptyGuidCollection };
public EpssCursor WithSnapshotMetadata(
string? modelVersion,
DateOnly? publishedDate,
string? etag,
string? contentHash,
int? rowCount,
DateTimeOffset updatedAt)
=> this with
{
ModelVersion = string.IsNullOrWhiteSpace(modelVersion) ? null : modelVersion.Trim(),
LastProcessedDate = publishedDate,
ETag = string.IsNullOrWhiteSpace(etag) ? null : etag.Trim(),
ContentHash = string.IsNullOrWhiteSpace(contentHash) ? null : contentHash.Trim(),
LastRowCount = rowCount > 0 ? rowCount : null,
UpdatedAt = updatedAt
};
private static IReadOnlyCollection<Guid> ReadGuidArray(DocumentObject document, string key)
{
if (!document.TryGetValue(key, out var value) || value is not DocumentArray array)
{
return EmptyGuidCollection;
}
var results = new List<Guid>(array.Count);
foreach (var element in array)
{
if (Guid.TryParse(element.ToString(), out var guid))
{
results.Add(guid);
}
}
return results;
}
}

View File

@@ -0,0 +1,85 @@
using System.Collections.Generic;
using System.Diagnostics.Metrics;
namespace StellaOps.Concelier.Connector.Epss.Internal;
public sealed class EpssDiagnostics : IDisposable
{
public const string MeterName = "StellaOps.Concelier.Connector.Epss";
private const string MeterVersion = "1.0.0";
private readonly Meter _meter;
private readonly Counter<long> _fetchAttempts;
private readonly Counter<long> _fetchSuccess;
private readonly Counter<long> _fetchFailures;
private readonly Counter<long> _fetchUnchanged;
private readonly Counter<long> _parsedRows;
private readonly Counter<long> _parseFailures;
private readonly Counter<long> _mappedRows;
public EpssDiagnostics()
{
_meter = new Meter(MeterName, MeterVersion);
_fetchAttempts = _meter.CreateCounter<long>(
name: "epss.fetch.attempts",
unit: "operations",
description: "Number of EPSS fetch attempts performed.");
_fetchSuccess = _meter.CreateCounter<long>(
name: "epss.fetch.success",
unit: "operations",
description: "Number of EPSS fetch attempts that produced new content.");
_fetchFailures = _meter.CreateCounter<long>(
name: "epss.fetch.failures",
unit: "operations",
description: "Number of EPSS fetch attempts that failed.");
_fetchUnchanged = _meter.CreateCounter<long>(
name: "epss.fetch.unchanged",
unit: "operations",
description: "Number of EPSS fetch attempts returning unchanged content.");
_parsedRows = _meter.CreateCounter<long>(
name: "epss.parse.rows",
unit: "rows",
description: "Number of EPSS rows parsed from snapshots.");
_parseFailures = _meter.CreateCounter<long>(
name: "epss.parse.failures",
unit: "documents",
description: "Number of EPSS snapshot parse failures.");
_mappedRows = _meter.CreateCounter<long>(
name: "epss.map.rows",
unit: "rows",
description: "Number of EPSS rows mapped into observations.");
}
public void FetchAttempt() => _fetchAttempts.Add(1);
public void FetchSuccess() => _fetchSuccess.Add(1);
public void FetchFailure() => _fetchFailures.Add(1);
public void FetchUnchanged() => _fetchUnchanged.Add(1);
public void ParseRows(int rowCount, string? modelVersion)
{
if (rowCount <= 0)
{
return;
}
_parsedRows.Add(rowCount, new KeyValuePair<string, object?>("modelVersion", modelVersion ?? string.Empty));
}
public void ParseFailure(string reason)
=> _parseFailures.Add(1, new KeyValuePair<string, object?>("reason", reason));
public void MapRows(int rowCount, string? modelVersion)
{
if (rowCount <= 0)
{
return;
}
_mappedRows.Add(rowCount, new KeyValuePair<string, object?>("modelVersion", modelVersion ?? string.Empty));
}
public void Dispose() => _meter.Dispose();
}

View File

@@ -0,0 +1,53 @@
using StellaOps.Scanner.Storage.Epss;
namespace StellaOps.Concelier.Connector.Epss.Internal;
public static class EpssMapper
{
public static EpssObservation ToObservation(
EpssScoreRow row,
string modelVersion,
DateOnly publishedDate)
{
if (string.IsNullOrWhiteSpace(modelVersion))
{
throw new ArgumentException("Model version is required.", nameof(modelVersion));
}
return new EpssObservation
{
CveId = row.CveId,
Score = (decimal)row.Score,
Percentile = (decimal)row.Percentile,
ModelVersion = modelVersion,
PublishedDate = publishedDate,
Band = DetermineBand((decimal)row.Score)
};
}
private static EpssBand DetermineBand(decimal score) => score switch
{
>= 0.70m => EpssBand.Critical,
>= 0.40m => EpssBand.High,
>= 0.10m => EpssBand.Medium,
_ => EpssBand.Low
};
}
public sealed record EpssObservation
{
public required string CveId { get; init; }
public required decimal Score { get; init; }
public required decimal Percentile { get; init; }
public required string ModelVersion { get; init; }
public required DateOnly PublishedDate { get; init; }
public required EpssBand Band { get; init; }
}
public enum EpssBand
{
Low = 0,
Medium = 1,
High = 2,
Critical = 3
}

View File

@@ -0,0 +1,47 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.Epss.Internal;
namespace StellaOps.Concelier.Connector.Epss;
internal static class EpssJobKinds
{
public const string Fetch = "source:epss:fetch";
public const string Parse = "source:epss:parse";
public const string Map = "source:epss:map";
}
internal sealed class EpssFetchJob : IJob
{
private readonly EpssConnector _connector;
public EpssFetchJob(EpssConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}
internal sealed class EpssParseJob : IJob
{
private readonly EpssConnector _connector;
public EpssParseJob(EpssConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.ParseAsync(context.Services, cancellationToken);
}
internal sealed class EpssMapJob : IJob
{
private readonly EpssConnector _connector;
public EpssMapJob(EpssConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.MapAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,3 @@
using StellaOps.Plugin.Versioning;
[assembly: StellaPluginVersion("1.0.0", MinimumHostVersion = "1.0.0", MaximumHostVersion = "1.99.99")]

View File

@@ -0,0 +1,24 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../../../__Libraries/StellaOps.Cryptography/StellaOps.Cryptography.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Core/StellaOps.Concelier.Core.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="../../../Scanner/__Libraries/StellaOps.Scanner.Storage/StellaOps.Scanner.Storage.csproj" />
</ItemGroup>
<ItemGroup>
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
<_Parameter1>StellaOps.Concelier.Connector.Epss.Tests</_Parameter1>
</AssemblyAttribute>
</ItemGroup>
</Project>