Restructure solution layout by module
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled

This commit is contained in:
root
2025-10-28 15:10:40 +02:00
parent 4e3e575db5
commit 68da90a11a
4103 changed files with 192899 additions and 187024 deletions

View File

@@ -0,0 +1,40 @@
# AGENTS
## Role
Bootstrap the ACSC (Australian Cyber Security Centre) advisories connector so the Concelier pipeline can ingest, normalise, and enrich ACSC security bulletins.
## Scope
- Research the authoritative ACSC advisory feed (RSS/Atom, JSON API, or HTML).
- Implement fetch windowing, cursor persistence, and retry strategy consistent with other external connectors.
- Parse advisory content (summary, affected products, mitigation guidance, references).
- Map advisories into canonical `Advisory` records with aliases, references, affected packages, and provenance metadata.
- Provide deterministic fixtures and regression tests that cover fetch/parse/map flows.
## Participants
- `Source.Common` for HTTP client creation, fetch service, and DTO persistence helpers.
- `Storage.Mongo` for raw/document/DTO/advisory storage plus cursor management.
- `Concelier.Models` for canonical advisory structures and provenance utilities.
- `Concelier.Testing` for integration harnesses and snapshot helpers.
## Interfaces & Contracts
- Job kinds should follow the pattern `acsc:fetch`, `acsc:parse`, `acsc:map`.
- Documents persisted to Mongo must include ETag/Last-Modified metadata when the source exposes it.
- Canonical advisories must emit aliases (ACSC ID + CVE IDs) and references (official bulletin + vendor notices).
## In/Out of scope
In scope:
- Initial end-to-end connector implementation with tests, fixtures, and range primitive coverage.
- Minimal telemetry (logging + diagnostics counters) consistent with other connectors.
Out of scope:
- Upstream remediation automation or vendor-specific enrichment beyond ACSC data.
- Export-related changes (handled by exporter teams).
## Observability & Security Expectations
- Log key lifecycle events (fetch/page processed, parse success/error counts, mapping stats).
- Sanitise HTML safely and avoid persisting external scripts or embedded media.
- Handle transient fetch failures gracefully with exponential backoff and mark failures in source state.
## Tests
- Add integration-style tests under `StellaOps.Concelier.Connector.Acsc.Tests` covering fetch/parse/map with canned fixtures.
- Snapshot canonical advisories; provide UPDATE flag flow for regeneration.
- Validate determinism (ordering, casing, timestamps) to satisfy pipeline reproducibility requirements.

View File

@@ -0,0 +1,699 @@
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Connector.Acsc.Configuration;
using StellaOps.Concelier.Connector.Acsc.Internal;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Html;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Acsc;
public sealed class AcscConnector : IFeedConnector
{
private static readonly string[] AcceptHeaders =
{
"application/rss+xml",
"application/atom+xml;q=0.9",
"application/xml;q=0.8",
"text/xml;q=0.7",
};
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
WriteIndented = false,
};
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly IHttpClientFactory _httpClientFactory;
private readonly AcscOptions _options;
private readonly AcscDiagnostics _diagnostics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<AcscConnector> _logger;
private readonly HtmlContentSanitizer _htmlSanitizer = new();
public AcscConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IHttpClientFactory httpClientFactory,
IOptions<AcscOptions> options,
AcscDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<AcscConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => AcscConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var now = _timeProvider.GetUtcNow();
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var lastPublished = new Dictionary<string, DateTimeOffset?>(cursor.LastPublishedByFeed, StringComparer.OrdinalIgnoreCase);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var failures = new List<(AcscFeedOptions Feed, Exception Error)>();
var preferredEndpoint = ResolveInitialPreference(cursor);
AcscEndpointPreference? successPreference = null;
foreach (var feed in GetEnabledFeeds())
{
cancellationToken.ThrowIfCancellationRequested();
Exception? lastError = null;
bool handled = false;
foreach (var mode in BuildFetchOrder(preferredEndpoint))
{
cancellationToken.ThrowIfCancellationRequested();
if (mode == AcscFetchMode.Relay && !IsRelayConfigured)
{
continue;
}
var modeName = ModeName(mode);
var targetUri = BuildFeedUri(feed, mode);
var metadata = CreateMetadata(feed, cursor, modeName);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, targetUri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(AcscOptions.HttpClientName, SourceName, targetUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = AcceptHeaders,
TimeoutOverride = _options.RequestTimeout,
};
try
{
_diagnostics.FetchAttempt(feed.Slug, modeName);
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.FetchUnchanged(feed.Slug, modeName);
successPreference ??= mode switch
{
AcscFetchMode.Relay => AcscEndpointPreference.Relay,
_ => AcscEndpointPreference.Direct,
};
handled = true;
_logger.LogDebug("ACSC feed {Feed} returned 304 via {Mode}", feed.Slug, modeName);
break;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.FetchFailure(feed.Slug, modeName);
lastError = new InvalidOperationException($"Fetch returned no document for {targetUri}");
continue;
}
pendingDocuments.Add(result.Document.Id);
successPreference = mode switch
{
AcscFetchMode.Relay => AcscEndpointPreference.Relay,
_ => AcscEndpointPreference.Direct,
};
handled = true;
_diagnostics.FetchSuccess(feed.Slug, modeName);
_logger.LogInformation("ACSC fetched {Feed} via {Mode} (documentId={DocumentId})", feed.Slug, modeName, result.Document.Id);
var latestPublished = await TryComputeLatestPublishedAsync(result.Document, cancellationToken).ConfigureAwait(false);
if (latestPublished.HasValue)
{
if (!lastPublished.TryGetValue(feed.Slug, out var existingPublished) || latestPublished.Value > existingPublished)
{
lastPublished[feed.Slug] = latestPublished.Value;
_diagnostics.CursorUpdated(feed.Slug);
_logger.LogDebug("ACSC feed {Feed} advanced published cursor to {Timestamp:O}", feed.Slug, latestPublished.Value);
}
}
break;
}
catch (HttpRequestException ex) when (ShouldRetryWithRelay(mode))
{
lastError = ex;
_diagnostics.FetchFallback(feed.Slug, modeName, "http-request");
_logger.LogWarning(ex, "ACSC fetch via {Mode} failed for {Feed}; attempting relay fallback.", modeName, feed.Slug);
continue;
}
catch (TaskCanceledException ex) when (ShouldRetryWithRelay(mode))
{
lastError = ex;
_diagnostics.FetchFallback(feed.Slug, modeName, "timeout");
_logger.LogWarning(ex, "ACSC fetch via {Mode} timed out for {Feed}; attempting relay fallback.", modeName, feed.Slug);
continue;
}
catch (Exception ex)
{
lastError = ex;
_diagnostics.FetchFailure(feed.Slug, modeName);
_logger.LogError(ex, "ACSC fetch failed for {Feed} via {Mode}", feed.Slug, modeName);
break;
}
}
if (!handled && lastError is not null)
{
failures.Add((feed, lastError));
}
}
if (failures.Count > 0)
{
var failureReason = string.Join("; ", failures.Select(f => $"{f.Feed.Slug}: {f.Error.Message}"));
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, failureReason, cancellationToken).ConfigureAwait(false);
throw new AggregateException($"ACSC fetch failed for {failures.Count} feed(s): {failureReason}", failures.Select(f => f.Error));
}
var updatedPreference = successPreference ?? preferredEndpoint;
if (_options.ForceRelay)
{
updatedPreference = AcscEndpointPreference.Relay;
}
else if (!IsRelayConfigured)
{
updatedPreference = AcscEndpointPreference.Direct;
}
var updatedCursor = cursor
.WithPreferredEndpoint(updatedPreference)
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastPublished(lastPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var metadata = AcscDocumentMetadata.FromDocument(document);
var feedTag = string.IsNullOrWhiteSpace(metadata.FeedSlug) ? "(unknown)" : metadata.FeedSlug;
_diagnostics.ParseAttempt(feedTag);
if (!document.GridFsId.HasValue)
{
_diagnostics.ParseFailure(feedTag, "missingPayload");
_logger.LogWarning("ACSC document {DocumentId} missing GridFS payload (feed={Feed})", document.Id, feedTag);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure(feedTag, "download");
_logger.LogError(ex, "ACSC failed to download payload for document {DocumentId} (feed={Feed})", document.Id, feedTag);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
try
{
var parsedAt = _timeProvider.GetUtcNow();
var dto = AcscFeedParser.Parse(rawBytes, metadata.FeedSlug, parsedAt, _htmlSanitizer);
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "acsc.feed.v1", payload, parsedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "acsc.feed.v1",
ValidatedAt = parsedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
_diagnostics.ParseSuccess(feedTag);
_logger.LogInformation("ACSC parsed document {DocumentId} (feed={Feed}, entries={EntryCount})", document.Id, feedTag, dto.Entries.Count);
}
catch (Exception ex)
{
_diagnostics.ParseFailure(feedTag, "parse");
_logger.LogError(ex, "ACSC parse failed for document {DocumentId} (feed={Feed})", document.Id, feedTag);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
var documentIds = cursor.PendingMappings.ToList();
foreach (var documentId in documentIds)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
AcscFeedDto? feed;
try
{
var dtoJson = dtoRecord.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
feed = JsonSerializer.Deserialize<AcscFeedDto>(dtoJson, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "ACSC mapping failed to deserialize DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (feed is null)
{
_logger.LogWarning("ACSC mapping encountered null DTO payload for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var mappedAt = _timeProvider.GetUtcNow();
var advisories = AcscMapper.Map(feed, document, dtoRecord, SourceName, mappedAt);
if (advisories.Count > 0)
{
foreach (var advisory in advisories)
{
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
}
_diagnostics.MapSuccess(advisories.Count);
_logger.LogInformation(
"ACSC mapped {Count} advisories from document {DocumentId} (feed={Feed})",
advisories.Count,
document.Id,
feed.FeedSlug ?? "(unknown)");
}
else
{
_logger.LogInformation(
"ACSC mapping produced no advisories for document {DocumentId} (feed={Feed})",
document.Id,
feed.FeedSlug ?? "(unknown)");
}
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ProbeAsync(CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (_options.ForceRelay)
{
if (cursor.PreferredEndpoint != AcscEndpointPreference.Relay)
{
await UpdateCursorAsync(cursor.WithPreferredEndpoint(AcscEndpointPreference.Relay), cancellationToken).ConfigureAwait(false);
}
return;
}
if (!IsRelayConfigured)
{
if (cursor.PreferredEndpoint != AcscEndpointPreference.Direct)
{
await UpdateCursorAsync(cursor.WithPreferredEndpoint(AcscEndpointPreference.Direct), cancellationToken).ConfigureAwait(false);
}
return;
}
var feed = GetEnabledFeeds().FirstOrDefault();
if (feed is null)
{
return;
}
var httpClient = _httpClientFactory.CreateClient(AcscOptions.HttpClientName);
httpClient.Timeout = TimeSpan.FromSeconds(15);
var directUri = BuildFeedUri(feed, AcscFetchMode.Direct);
try
{
using var headRequest = new HttpRequestMessage(HttpMethod.Head, directUri);
using var response = await httpClient.SendAsync(headRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
if (response.IsSuccessStatusCode)
{
if (cursor.PreferredEndpoint != AcscEndpointPreference.Direct)
{
await UpdateCursorAsync(cursor.WithPreferredEndpoint(AcscEndpointPreference.Direct), cancellationToken).ConfigureAwait(false);
_logger.LogInformation("ACSC probe succeeded via direct endpoint ({StatusCode}); relay preference cleared.", (int)response.StatusCode);
}
return;
}
if (response.StatusCode == HttpStatusCode.MethodNotAllowed)
{
using var probeRequest = new HttpRequestMessage(HttpMethod.Get, directUri);
using var probeResponse = await httpClient.SendAsync(probeRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
if (probeResponse.IsSuccessStatusCode)
{
if (cursor.PreferredEndpoint != AcscEndpointPreference.Direct)
{
await UpdateCursorAsync(cursor.WithPreferredEndpoint(AcscEndpointPreference.Direct), cancellationToken).ConfigureAwait(false);
_logger.LogInformation("ACSC probe succeeded via direct endpoint after GET fallback ({StatusCode}).", (int)probeResponse.StatusCode);
}
return;
}
}
_logger.LogWarning("ACSC direct probe returned HTTP {StatusCode}; relay preference enabled.", (int)response.StatusCode);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "ACSC direct probe failed; relay preference will be enabled.");
}
if (cursor.PreferredEndpoint != AcscEndpointPreference.Relay)
{
await UpdateCursorAsync(cursor.WithPreferredEndpoint(AcscEndpointPreference.Relay), cancellationToken).ConfigureAwait(false);
}
}
private bool ShouldRetryWithRelay(AcscFetchMode mode)
=> mode == AcscFetchMode.Direct && _options.EnableRelayFallback && IsRelayConfigured && !_options.ForceRelay;
private IEnumerable<AcscFetchMode> BuildFetchOrder(AcscEndpointPreference preference)
{
if (_options.ForceRelay)
{
if (IsRelayConfigured)
{
yield return AcscFetchMode.Relay;
}
yield break;
}
if (!IsRelayConfigured)
{
yield return AcscFetchMode.Direct;
yield break;
}
var preferRelay = preference == AcscEndpointPreference.Relay;
if (preference == AcscEndpointPreference.Auto)
{
preferRelay = _options.PreferRelayByDefault;
}
if (preferRelay)
{
yield return AcscFetchMode.Relay;
if (_options.EnableRelayFallback)
{
yield return AcscFetchMode.Direct;
}
}
else
{
yield return AcscFetchMode.Direct;
if (_options.EnableRelayFallback)
{
yield return AcscFetchMode.Relay;
}
}
}
private AcscEndpointPreference ResolveInitialPreference(AcscCursor cursor)
{
if (_options.ForceRelay)
{
return AcscEndpointPreference.Relay;
}
if (!IsRelayConfigured)
{
return AcscEndpointPreference.Direct;
}
if (cursor.PreferredEndpoint != AcscEndpointPreference.Auto)
{
return cursor.PreferredEndpoint;
}
return _options.PreferRelayByDefault ? AcscEndpointPreference.Relay : AcscEndpointPreference.Direct;
}
private async Task<DateTimeOffset?> TryComputeLatestPublishedAsync(DocumentRecord document, CancellationToken cancellationToken)
{
if (!document.GridFsId.HasValue)
{
return null;
}
var rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
if (rawBytes.Length == 0)
{
return null;
}
try
{
using var memoryStream = new MemoryStream(rawBytes, writable: false);
var xml = XDocument.Load(memoryStream, LoadOptions.None);
DateTimeOffset? latest = null;
foreach (var element in xml.Descendants())
{
if (!IsEntryElement(element.Name.LocalName))
{
continue;
}
var published = ExtractPublished(element);
if (!published.HasValue)
{
continue;
}
if (latest is null || published.Value > latest.Value)
{
latest = published;
}
}
return latest;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "ACSC failed to derive published cursor for document {DocumentId} ({Uri})", document.Id, document.Uri);
return null;
}
}
private static bool IsEntryElement(string localName)
=> string.Equals(localName, "item", StringComparison.OrdinalIgnoreCase)
|| string.Equals(localName, "entry", StringComparison.OrdinalIgnoreCase);
private static DateTimeOffset? ExtractPublished(XElement element)
{
foreach (var name in EnumerateTimestampNames(element))
{
if (DateTimeOffset.TryParse(
name.Value,
CultureInfo.InvariantCulture,
DateTimeStyles.AllowWhiteSpaces | DateTimeStyles.AssumeUniversal,
out var parsed))
{
return parsed.ToUniversalTime();
}
}
return null;
}
private static IEnumerable<XElement> EnumerateTimestampNames(XElement element)
{
foreach (var child in element.Elements())
{
var localName = child.Name.LocalName;
if (string.Equals(localName, "pubDate", StringComparison.OrdinalIgnoreCase) ||
string.Equals(localName, "published", StringComparison.OrdinalIgnoreCase) ||
string.Equals(localName, "updated", StringComparison.OrdinalIgnoreCase) ||
string.Equals(localName, "date", StringComparison.OrdinalIgnoreCase))
{
yield return child;
}
}
}
private Dictionary<string, string> CreateMetadata(AcscFeedOptions feed, AcscCursor cursor, string mode)
{
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["acsc.feed.slug"] = feed.Slug,
["acsc.fetch.mode"] = mode,
};
if (cursor.LastPublishedByFeed.TryGetValue(feed.Slug, out var published) && published.HasValue)
{
metadata["acsc.cursor.lastPublished"] = published.Value.ToString("O");
}
return metadata;
}
private Uri BuildFeedUri(AcscFeedOptions feed, AcscFetchMode mode)
{
var baseUri = mode switch
{
AcscFetchMode.Relay when IsRelayConfigured => _options.RelayEndpoint!,
_ => _options.BaseEndpoint,
};
return new Uri(baseUri, feed.RelativePath);
}
private IEnumerable<AcscFeedOptions> GetEnabledFeeds()
=> _options.Feeds.Where(feed => feed is { Enabled: true });
private Task<AcscCursor> GetCursorAsync(CancellationToken cancellationToken)
=> GetCursorCoreAsync(cancellationToken);
private async Task<AcscCursor> GetCursorCoreAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? AcscCursor.Empty : AcscCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(AcscCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
var completedAt = _timeProvider.GetUtcNow();
return _stateRepository.UpdateCursorAsync(SourceName, document, completedAt, cancellationToken);
}
private bool IsRelayConfigured => _options.RelayEndpoint is not null;
private static string ModeName(AcscFetchMode mode) => mode switch
{
AcscFetchMode.Relay => "relay",
_ => "direct",
};
private enum AcscFetchMode
{
Direct = 0,
Relay = 1,
}
}

View File

@@ -0,0 +1,19 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Acsc;
public sealed class AcscConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "acsc";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services) => services is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return ActivatorUtilities.CreateInstance<AcscConnector>(services);
}
}

View File

@@ -0,0 +1,44 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.Acsc.Configuration;
namespace StellaOps.Concelier.Connector.Acsc;
public sealed class AcscDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:acsc";
private const string FetchCron = "7,37 * * * *";
private const string ParseCron = "12,42 * * * *";
private const string MapCron = "17,47 * * * *";
private const string ProbeCron = "25,55 * * * *";
private static readonly TimeSpan FetchTimeout = TimeSpan.FromMinutes(4);
private static readonly TimeSpan ParseTimeout = TimeSpan.FromMinutes(3);
private static readonly TimeSpan MapTimeout = TimeSpan.FromMinutes(3);
private static readonly TimeSpan ProbeTimeout = TimeSpan.FromMinutes(1);
private static readonly TimeSpan LeaseDuration = TimeSpan.FromMinutes(3);
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddAcscConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
var scheduler = new JobSchedulerBuilder(services);
scheduler
.AddJob<AcscFetchJob>(AcscJobKinds.Fetch, FetchCron, FetchTimeout, LeaseDuration)
.AddJob<AcscParseJob>(AcscJobKinds.Parse, ParseCron, ParseTimeout, LeaseDuration)
.AddJob<AcscMapJob>(AcscJobKinds.Map, MapCron, MapTimeout, LeaseDuration)
.AddJob<AcscProbeJob>(AcscJobKinds.Probe, ProbeCron, ProbeTimeout, LeaseDuration);
return services;
}
}

View File

@@ -0,0 +1,56 @@
using System.Net;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.Acsc.Configuration;
using StellaOps.Concelier.Connector.Acsc.Internal;
using StellaOps.Concelier.Connector.Common.Http;
namespace StellaOps.Concelier.Connector.Acsc;
public static class AcscServiceCollectionExtensions
{
public static IServiceCollection AddAcscConnector(this IServiceCollection services, Action<AcscOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<AcscOptions>()
.Configure(configure)
.PostConfigure(static options => options.Validate());
services.AddSourceHttpClient(AcscOptions.HttpClientName, (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<AcscOptions>>().Value;
clientOptions.Timeout = options.RequestTimeout;
clientOptions.UserAgent = options.UserAgent;
clientOptions.RequestVersion = options.RequestVersion;
clientOptions.VersionPolicy = options.VersionPolicy;
clientOptions.AllowAutoRedirect = true;
clientOptions.ConfigureHandler = handler =>
{
handler.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
handler.AllowAutoRedirect = true;
};
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.BaseEndpoint.Host);
if (options.RelayEndpoint is not null)
{
clientOptions.AllowedHosts.Add(options.RelayEndpoint.Host);
}
clientOptions.DefaultRequestHeaders["Accept"] = string.Join(", ", new[]
{
"application/rss+xml",
"application/atom+xml;q=0.9",
"application/xml;q=0.8",
"text/xml;q=0.7",
});
});
services.AddSingleton<AcscDiagnostics>();
services.AddTransient<AcscConnector>();
return services;
}
}

View File

@@ -0,0 +1,54 @@
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Connector.Acsc.Configuration;
/// <summary>
/// Defines a single ACSC RSS feed endpoint.
/// </summary>
public sealed class AcscFeedOptions
{
private static readonly Regex SlugPattern = new("^[a-z0-9][a-z0-9\\-]*$", RegexOptions.Compiled | RegexOptions.CultureInvariant);
/// <summary>
/// Logical slug for the feed (alerts, advisories, threats, etc.).
/// </summary>
public string Slug { get; set; } = "alerts";
/// <summary>
/// Relative path (under <see cref="AcscOptions.BaseEndpoint"/>) for the RSS feed.
/// </summary>
public string RelativePath { get; set; } = "/acsc/view-all-content/alerts/rss";
/// <summary>
/// Indicates whether the feed is active.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// Optional display name for logging.
/// </summary>
public string? DisplayName { get; set; }
internal void Validate(int index)
{
if (string.IsNullOrWhiteSpace(Slug))
{
throw new InvalidOperationException($"ACSC feed entry #{index} must define a slug.");
}
if (!SlugPattern.IsMatch(Slug))
{
throw new InvalidOperationException($"ACSC feed slug '{Slug}' is invalid. Slugs must be lower-case alphanumeric with optional hyphen separators.");
}
if (string.IsNullOrWhiteSpace(RelativePath))
{
throw new InvalidOperationException($"ACSC feed '{Slug}' must specify a relative path.");
}
if (!RelativePath.StartsWith("/", StringComparison.Ordinal))
{
throw new InvalidOperationException($"ACSC feed '{Slug}' relative path must begin with '/' (value: '{RelativePath}').");
}
}
}

View File

@@ -0,0 +1,153 @@
using System.Net;
using System.Net.Http;
namespace StellaOps.Concelier.Connector.Acsc.Configuration;
/// <summary>
/// Connector options governing ACSC feed access and retry behaviour.
/// </summary>
public sealed class AcscOptions
{
public const string HttpClientName = "acsc";
private static readonly TimeSpan DefaultRequestTimeout = TimeSpan.FromSeconds(45);
private static readonly TimeSpan DefaultFailureBackoff = TimeSpan.FromMinutes(5);
private static readonly TimeSpan DefaultInitialBackfill = TimeSpan.FromDays(120);
public AcscOptions()
{
Feeds = new List<AcscFeedOptions>
{
new() { Slug = "alerts", RelativePath = "/acsc/view-all-content/alerts/rss" },
new() { Slug = "advisories", RelativePath = "/acsc/view-all-content/advisories/rss" },
new() { Slug = "news", RelativePath = "/acsc/view-all-content/news/rss", Enabled = false },
new() { Slug = "publications", RelativePath = "/acsc/view-all-content/publications/rss", Enabled = false },
new() { Slug = "threats", RelativePath = "/acsc/view-all-content/threats/rss", Enabled = false },
};
}
/// <summary>
/// Base endpoint for direct ACSC fetches.
/// </summary>
public Uri BaseEndpoint { get; set; } = new("https://www.cyber.gov.au/", UriKind.Absolute);
/// <summary>
/// Optional relay endpoint used when Akamai terminates direct HTTP/2 connections.
/// </summary>
public Uri? RelayEndpoint { get; set; }
/// <summary>
/// Default mode when no preference has been captured in connector state. When <c>true</c>, the relay will be preferred for initial fetches.
/// </summary>
public bool PreferRelayByDefault { get; set; }
/// <summary>
/// If enabled, the connector may switch to the relay endpoint when direct fetches fail.
/// </summary>
public bool EnableRelayFallback { get; set; } = true;
/// <summary>
/// If set, the connector will always use the relay endpoint and skip direct attempts.
/// </summary>
public bool ForceRelay { get; set; }
/// <summary>
/// Timeout applied to fetch requests (overrides HttpClient default).
/// </summary>
public TimeSpan RequestTimeout { get; set; } = DefaultRequestTimeout;
/// <summary>
/// Backoff applied when marking fetch failures.
/// </summary>
public TimeSpan FailureBackoff { get; set; } = DefaultFailureBackoff;
/// <summary>
/// Look-back period used when deriving initial published cursors.
/// </summary>
public TimeSpan InitialBackfill { get; set; } = DefaultInitialBackfill;
/// <summary>
/// User-agent header sent with outbound requests.
/// </summary>
public string UserAgent { get; set; } = "StellaOps/Concelier (+https://stella-ops.org)";
/// <summary>
/// RSS feeds requested during fetch.
/// </summary>
public IList<AcscFeedOptions> Feeds { get; }
/// <summary>
/// HTTP version policy requested for outbound requests.
/// </summary>
public HttpVersionPolicy VersionPolicy { get; set; } = HttpVersionPolicy.RequestVersionOrLower;
/// <summary>
/// Default HTTP version requested when connecting to ACSC (defaults to HTTP/2 but allows downgrade).
/// </summary>
public Version RequestVersion { get; set; } = HttpVersion.Version20;
public void Validate()
{
if (BaseEndpoint is null || !BaseEndpoint.IsAbsoluteUri)
{
throw new InvalidOperationException("ACSC BaseEndpoint must be an absolute URI.");
}
if (!BaseEndpoint.AbsoluteUri.EndsWith("/", StringComparison.Ordinal))
{
throw new InvalidOperationException("ACSC BaseEndpoint must include a trailing slash.");
}
if (RelayEndpoint is not null && !RelayEndpoint.IsAbsoluteUri)
{
throw new InvalidOperationException("ACSC RelayEndpoint must be an absolute URI when specified.");
}
if (RelayEndpoint is not null && !RelayEndpoint.AbsoluteUri.EndsWith("/", StringComparison.Ordinal))
{
throw new InvalidOperationException("ACSC RelayEndpoint must include a trailing slash when specified.");
}
if (RequestTimeout <= TimeSpan.Zero)
{
throw new InvalidOperationException("ACSC RequestTimeout must be positive.");
}
if (FailureBackoff < TimeSpan.Zero)
{
throw new InvalidOperationException("ACSC FailureBackoff cannot be negative.");
}
if (InitialBackfill <= TimeSpan.Zero)
{
throw new InvalidOperationException("ACSC InitialBackfill must be positive.");
}
if (string.IsNullOrWhiteSpace(UserAgent))
{
throw new InvalidOperationException("ACSC UserAgent cannot be empty.");
}
if (Feeds.Count == 0)
{
throw new InvalidOperationException("At least one ACSC feed must be configured.");
}
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
for (var i = 0; i < Feeds.Count; i++)
{
var feed = Feeds[i];
feed.Validate(i);
if (!feed.Enabled)
{
continue;
}
if (!seen.Add(feed.Slug))
{
throw new InvalidOperationException($"Duplicate ACSC feed slug '{feed.Slug}' detected. Slugs must be unique (case-insensitive).");
}
}
}
}

View File

@@ -0,0 +1,141 @@
using MongoDB.Bson;
namespace StellaOps.Concelier.Connector.Acsc.Internal;
internal enum AcscEndpointPreference
{
Auto = 0,
Direct = 1,
Relay = 2,
}
internal sealed record AcscCursor(
AcscEndpointPreference PreferredEndpoint,
IReadOnlyDictionary<string, DateTimeOffset?> LastPublishedByFeed,
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings)
{
private static readonly IReadOnlyCollection<Guid> EmptyGuidList = Array.Empty<Guid>();
private static readonly IReadOnlyDictionary<string, DateTimeOffset?> EmptyFeedDictionary =
new Dictionary<string, DateTimeOffset?>(StringComparer.OrdinalIgnoreCase);
public static AcscCursor Empty { get; } = new(
AcscEndpointPreference.Auto,
EmptyFeedDictionary,
EmptyGuidList,
EmptyGuidList);
public AcscCursor WithPendingDocuments(IEnumerable<Guid> documents)
=> this with { PendingDocuments = documents?.Distinct().ToArray() ?? EmptyGuidList };
public AcscCursor WithPendingMappings(IEnumerable<Guid> mappings)
=> this with { PendingMappings = mappings?.Distinct().ToArray() ?? EmptyGuidList };
public AcscCursor WithPreferredEndpoint(AcscEndpointPreference preference)
=> this with { PreferredEndpoint = preference };
public AcscCursor WithLastPublished(IDictionary<string, DateTimeOffset?> values)
{
var snapshot = new Dictionary<string, DateTimeOffset?>(StringComparer.OrdinalIgnoreCase);
if (values is not null)
{
foreach (var kvp in values)
{
snapshot[kvp.Key] = kvp.Value;
}
}
return this with { LastPublishedByFeed = snapshot };
}
public BsonDocument ToBsonDocument()
{
var document = new BsonDocument
{
["preferredEndpoint"] = PreferredEndpoint.ToString(),
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
};
var feedsDocument = new BsonDocument();
foreach (var kvp in LastPublishedByFeed)
{
if (kvp.Value.HasValue)
{
feedsDocument[kvp.Key] = kvp.Value.Value.UtcDateTime;
}
}
document["feeds"] = feedsDocument;
return document;
}
public static AcscCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var preferredEndpoint = document.TryGetValue("preferredEndpoint", out var endpointValue)
? ParseEndpointPreference(endpointValue.AsString)
: AcscEndpointPreference.Auto;
var feeds = new Dictionary<string, DateTimeOffset?>(StringComparer.OrdinalIgnoreCase);
if (document.TryGetValue("feeds", out var feedsValue) && feedsValue is BsonDocument feedsDocument)
{
foreach (var element in feedsDocument.Elements)
{
feeds[element.Name] = ParseDate(element.Value);
}
}
var pendingDocuments = ReadGuidArray(document, "pendingDocuments");
var pendingMappings = ReadGuidArray(document, "pendingMappings");
return new AcscCursor(
preferredEndpoint,
feeds,
pendingDocuments,
pendingMappings);
}
private static IReadOnlyCollection<Guid> ReadGuidArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
{
return EmptyGuidList;
}
var list = new List<Guid>(array.Count);
foreach (var element in array)
{
if (Guid.TryParse(element?.ToString(), out var guid))
{
list.Add(guid);
}
}
return list;
}
private static DateTimeOffset? ParseDate(BsonValue value)
{
return value.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
}
private static AcscEndpointPreference ParseEndpointPreference(string? value)
{
if (Enum.TryParse<AcscEndpointPreference>(value, ignoreCase: true, out var parsed))
{
return parsed;
}
return AcscEndpointPreference.Auto;
}
}

View File

@@ -0,0 +1,97 @@
using System.Diagnostics.Metrics;
namespace StellaOps.Concelier.Connector.Acsc.Internal;
public sealed class AcscDiagnostics : IDisposable
{
private const string MeterName = "StellaOps.Concelier.Connector.Acsc";
private const string MeterVersion = "1.0.0";
private readonly Meter _meter;
private readonly Counter<long> _fetchAttempts;
private readonly Counter<long> _fetchSuccess;
private readonly Counter<long> _fetchFailures;
private readonly Counter<long> _fetchUnchanged;
private readonly Counter<long> _fetchFallbacks;
private readonly Counter<long> _cursorUpdates;
private readonly Counter<long> _parseAttempts;
private readonly Counter<long> _parseSuccess;
private readonly Counter<long> _parseFailures;
private readonly Counter<long> _mapSuccess;
public AcscDiagnostics()
{
_meter = new Meter(MeterName, MeterVersion);
_fetchAttempts = _meter.CreateCounter<long>("acsc.fetch.attempts", unit: "operations");
_fetchSuccess = _meter.CreateCounter<long>("acsc.fetch.success", unit: "operations");
_fetchFailures = _meter.CreateCounter<long>("acsc.fetch.failures", unit: "operations");
_fetchUnchanged = _meter.CreateCounter<long>("acsc.fetch.unchanged", unit: "operations");
_fetchFallbacks = _meter.CreateCounter<long>("acsc.fetch.fallbacks", unit: "operations");
_cursorUpdates = _meter.CreateCounter<long>("acsc.cursor.published_updates", unit: "feeds");
_parseAttempts = _meter.CreateCounter<long>("acsc.parse.attempts", unit: "documents");
_parseSuccess = _meter.CreateCounter<long>("acsc.parse.success", unit: "documents");
_parseFailures = _meter.CreateCounter<long>("acsc.parse.failures", unit: "documents");
_mapSuccess = _meter.CreateCounter<long>("acsc.map.success", unit: "advisories");
}
public void FetchAttempt(string feed, string mode)
=> _fetchAttempts.Add(1, GetTags(feed, mode));
public void FetchSuccess(string feed, string mode)
=> _fetchSuccess.Add(1, GetTags(feed, mode));
public void FetchFailure(string feed, string mode)
=> _fetchFailures.Add(1, GetTags(feed, mode));
public void FetchUnchanged(string feed, string mode)
=> _fetchUnchanged.Add(1, GetTags(feed, mode));
public void FetchFallback(string feed, string mode, string reason)
=> _fetchFallbacks.Add(1, GetTags(feed, mode, new KeyValuePair<string, object?>("reason", reason)));
public void CursorUpdated(string feed)
=> _cursorUpdates.Add(1, new KeyValuePair<string, object?>("feed", feed));
public void ParseAttempt(string feed)
=> _parseAttempts.Add(1, new KeyValuePair<string, object?>("feed", feed));
public void ParseSuccess(string feed)
=> _parseSuccess.Add(1, new KeyValuePair<string, object?>("feed", feed));
public void ParseFailure(string feed, string reason)
=> _parseFailures.Add(1, new KeyValuePair<string, object?>[]
{
new("feed", feed),
new("reason", reason),
});
public void MapSuccess(int advisoryCount)
{
if (advisoryCount <= 0)
{
return;
}
_mapSuccess.Add(advisoryCount);
}
private static KeyValuePair<string, object?>[] GetTags(string feed, string mode)
=> new[]
{
new KeyValuePair<string, object?>("feed", feed),
new KeyValuePair<string, object?>("mode", mode),
};
private static KeyValuePair<string, object?>[] GetTags(string feed, string mode, KeyValuePair<string, object?> extra)
=> new[]
{
new KeyValuePair<string, object?>("feed", feed),
new KeyValuePair<string, object?>("mode", mode),
extra,
};
public void Dispose()
{
_meter.Dispose();
}
}

View File

@@ -0,0 +1,20 @@
using StellaOps.Concelier.Storage.Mongo.Documents;
namespace StellaOps.Concelier.Connector.Acsc.Internal;
internal readonly record struct AcscDocumentMetadata(string FeedSlug, string FetchMode)
{
public static AcscDocumentMetadata FromDocument(DocumentRecord document)
{
if (document.Metadata is null)
{
return new AcscDocumentMetadata(string.Empty, string.Empty);
}
document.Metadata.TryGetValue("acsc.feed.slug", out var slug);
document.Metadata.TryGetValue("acsc.fetch.mode", out var mode);
return new AcscDocumentMetadata(
string.IsNullOrWhiteSpace(slug) ? string.Empty : slug.Trim(),
string.IsNullOrWhiteSpace(mode) ? string.Empty : mode.Trim());
}
}

View File

@@ -0,0 +1,58 @@
using System.Text.Json.Serialization;
namespace StellaOps.Concelier.Connector.Acsc.Internal;
internal sealed record AcscFeedDto(
[property: JsonPropertyName("feedSlug")] string FeedSlug,
[property: JsonPropertyName("feedTitle")] string? FeedTitle,
[property: JsonPropertyName("feedLink")] string? FeedLink,
[property: JsonPropertyName("feedUpdated")] DateTimeOffset? FeedUpdated,
[property: JsonPropertyName("parsedAt")] DateTimeOffset ParsedAt,
[property: JsonPropertyName("entries")] IReadOnlyList<AcscEntryDto> Entries)
{
public static AcscFeedDto Empty { get; } = new(
FeedSlug: string.Empty,
FeedTitle: null,
FeedLink: null,
FeedUpdated: null,
ParsedAt: DateTimeOffset.UnixEpoch,
Entries: Array.Empty<AcscEntryDto>());
}
internal sealed record AcscEntryDto(
[property: JsonPropertyName("entryId")] string EntryId,
[property: JsonPropertyName("title")] string Title,
[property: JsonPropertyName("link")] string? Link,
[property: JsonPropertyName("feedSlug")] string FeedSlug,
[property: JsonPropertyName("published")] DateTimeOffset? Published,
[property: JsonPropertyName("updated")] DateTimeOffset? Updated,
[property: JsonPropertyName("summary")] string Summary,
[property: JsonPropertyName("contentHtml")] string ContentHtml,
[property: JsonPropertyName("contentText")] string ContentText,
[property: JsonPropertyName("references")] IReadOnlyList<AcscReferenceDto> References,
[property: JsonPropertyName("aliases")] IReadOnlyList<string> Aliases,
[property: JsonPropertyName("fields")] IReadOnlyDictionary<string, string> Fields)
{
public static AcscEntryDto Empty { get; } = new(
EntryId: string.Empty,
Title: string.Empty,
Link: null,
FeedSlug: string.Empty,
Published: null,
Updated: null,
Summary: string.Empty,
ContentHtml: string.Empty,
ContentText: string.Empty,
References: Array.Empty<AcscReferenceDto>(),
Aliases: Array.Empty<string>(),
Fields: new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase));
}
internal sealed record AcscReferenceDto(
[property: JsonPropertyName("title")] string Title,
[property: JsonPropertyName("url")] string Url)
{
public static AcscReferenceDto Empty { get; } = new(
Title: string.Empty,
Url: string.Empty);
}

View File

@@ -0,0 +1,594 @@
using System.Globalization;
using System.Text;
using System.Xml.Linq;
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using System.Security.Cryptography;
using StellaOps.Concelier.Connector.Common.Html;
namespace StellaOps.Concelier.Connector.Acsc.Internal;
internal static class AcscFeedParser
{
private static readonly XNamespace AtomNamespace = "http://www.w3.org/2005/Atom";
private static readonly XNamespace ContentNamespace = "http://purl.org/rss/1.0/modules/content/";
public static AcscFeedDto Parse(byte[] payload, string feedSlug, DateTimeOffset parsedAt, HtmlContentSanitizer sanitizer)
{
ArgumentNullException.ThrowIfNull(payload);
ArgumentNullException.ThrowIfNull(sanitizer);
if (payload.Length == 0)
{
return AcscFeedDto.Empty with
{
FeedSlug = feedSlug ?? string.Empty,
ParsedAt = parsedAt,
Entries = Array.Empty<AcscEntryDto>(),
};
}
var xml = XDocument.Parse(Encoding.UTF8.GetString(payload));
var (feedTitle, feedLink, feedUpdated) = ExtractFeedMetadata(xml);
var items = ExtractEntries(xml).ToArray();
var entries = new List<AcscEntryDto>(items.Length);
foreach (var item in items)
{
var entryId = ExtractEntryId(item);
if (string.IsNullOrWhiteSpace(entryId))
{
// Fall back to hash of title + link to avoid duplicates.
entryId = GenerateFallbackId(item);
}
var title = ExtractTitle(item);
var link = ExtractLink(item);
var published = ExtractDate(item, "pubDate") ?? ExtractAtomDate(item, "published") ?? ExtractDcDate(item);
var updated = ExtractAtomDate(item, "updated");
var rawHtml = ExtractContent(item);
var baseUri = TryCreateUri(link);
var sanitizedHtml = sanitizer.Sanitize(rawHtml, baseUri);
var htmlFragment = ParseHtmlFragment(sanitizedHtml);
var summary = BuildSummary(htmlFragment) ?? string.Empty;
var contentText = NormalizeWhitespace(htmlFragment?.TextContent ?? string.Empty);
var references = ExtractReferences(htmlFragment);
var fields = ExtractFields(htmlFragment, out var serialNumber, out var advisoryType);
var aliases = BuildAliases(serialNumber, advisoryType);
var entry = new AcscEntryDto(
EntryId: entryId,
Title: title,
Link: link,
FeedSlug: feedSlug ?? string.Empty,
Published: published,
Updated: updated,
Summary: summary,
ContentHtml: sanitizedHtml,
ContentText: contentText,
References: references,
Aliases: aliases,
Fields: fields);
entries.Add(entry);
}
return new AcscFeedDto(
FeedSlug: feedSlug ?? string.Empty,
FeedTitle: feedTitle,
FeedLink: feedLink,
FeedUpdated: feedUpdated,
ParsedAt: parsedAt,
Entries: entries);
}
private static (string? Title, string? Link, DateTimeOffset? Updated) ExtractFeedMetadata(XDocument xml)
{
var root = xml.Root;
if (root is null)
{
return (null, null, null);
}
if (string.Equals(root.Name.LocalName, "rss", StringComparison.OrdinalIgnoreCase))
{
var channel = root.Element("channel");
var title = channel?.Element("title")?.Value?.Trim();
var link = channel?.Element("link")?.Value?.Trim();
var updated = TryParseDate(channel?.Element("lastBuildDate")?.Value);
return (title, link, updated);
}
if (root.Name == AtomNamespace + "feed")
{
var title = root.Element(AtomNamespace + "title")?.Value?.Trim();
var link = root.Elements(AtomNamespace + "link")
.FirstOrDefault(static element =>
string.Equals(element.Attribute("rel")?.Value, "alternate", StringComparison.OrdinalIgnoreCase))
?.Attribute("href")?.Value?.Trim()
?? root.Element(AtomNamespace + "link")?.Attribute("href")?.Value?.Trim();
var updated = TryParseDate(root.Element(AtomNamespace + "updated")?.Value);
return (title, link, updated);
}
return (null, null, null);
}
private static IEnumerable<XElement> ExtractEntries(XDocument xml)
{
var root = xml.Root;
if (root is null)
{
yield break;
}
if (string.Equals(root.Name.LocalName, "rss", StringComparison.OrdinalIgnoreCase))
{
var channel = root.Element("channel");
if (channel is null)
{
yield break;
}
foreach (var item in channel.Elements("item"))
{
yield return item;
}
yield break;
}
if (root.Name == AtomNamespace + "feed")
{
foreach (var entry in root.Elements(AtomNamespace + "entry"))
{
yield return entry;
}
}
}
private static string ExtractTitle(XElement element)
{
var title = element.Element("title")?.Value
?? element.Element(AtomNamespace + "title")?.Value
?? string.Empty;
return title.Trim();
}
private static string? ExtractLink(XElement element)
{
var linkValue = element.Element("link")?.Value;
if (!string.IsNullOrWhiteSpace(linkValue))
{
return linkValue.Trim();
}
var atomLink = element.Elements(AtomNamespace + "link")
.FirstOrDefault(static el =>
string.Equals(el.Attribute("rel")?.Value, "alternate", StringComparison.OrdinalIgnoreCase))
?? element.Element(AtomNamespace + "link");
if (atomLink is not null)
{
var href = atomLink.Attribute("href")?.Value;
if (!string.IsNullOrWhiteSpace(href))
{
return href.Trim();
}
}
return null;
}
private static string ExtractEntryId(XElement element)
{
var guid = element.Element("guid")?.Value;
if (!string.IsNullOrWhiteSpace(guid))
{
return guid.Trim();
}
var atomId = element.Element(AtomNamespace + "id")?.Value;
if (!string.IsNullOrWhiteSpace(atomId))
{
return atomId.Trim();
}
if (!string.IsNullOrWhiteSpace(element.Element("link")?.Value))
{
return element.Element("link")!.Value.Trim();
}
if (!string.IsNullOrWhiteSpace(element.Element("title")?.Value))
{
return GenerateStableKey(element.Element("title")!.Value);
}
return string.Empty;
}
private static string GenerateFallbackId(XElement element)
{
var builder = new StringBuilder();
var title = element.Element("title")?.Value;
if (!string.IsNullOrWhiteSpace(title))
{
builder.Append(title.Trim());
}
var link = ExtractLink(element);
if (!string.IsNullOrWhiteSpace(link))
{
if (builder.Length > 0)
{
builder.Append("::");
}
builder.Append(link);
}
if (builder.Length == 0)
{
return Guid.NewGuid().ToString("n");
}
return GenerateStableKey(builder.ToString());
}
private static string GenerateStableKey(string value)
{
using var sha = SHA256.Create();
var bytes = Encoding.UTF8.GetBytes(value);
var hash = sha.ComputeHash(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static string ExtractContent(XElement element)
{
var encoded = element.Element(ContentNamespace + "encoded")?.Value;
if (!string.IsNullOrWhiteSpace(encoded))
{
return encoded;
}
var description = element.Element("description")?.Value;
if (!string.IsNullOrWhiteSpace(description))
{
return description;
}
var summary = element.Element(AtomNamespace + "summary")?.Value;
if (!string.IsNullOrWhiteSpace(summary))
{
return summary;
}
return string.Empty;
}
private static DateTimeOffset? ExtractDate(XElement element, string name)
{
var value = element.Element(name)?.Value;
return TryParseDate(value);
}
private static DateTimeOffset? ExtractAtomDate(XElement element, string name)
{
var value = element.Element(AtomNamespace + name)?.Value;
return TryParseDate(value);
}
private static DateTimeOffset? ExtractDcDate(XElement element)
{
var value = element.Element(XName.Get("date", "http://purl.org/dc/elements/1.1/"))?.Value;
return TryParseDate(value);
}
private static DateTimeOffset? TryParseDate(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
if (DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AllowWhiteSpaces, out var result))
{
return result.ToUniversalTime();
}
if (DateTimeOffset.TryParse(value, CultureInfo.CurrentCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AllowWhiteSpaces, out result))
{
return result.ToUniversalTime();
}
return null;
}
private static Uri? TryCreateUri(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
return Uri.TryCreate(value, UriKind.Absolute, out var uri) ? uri : null;
}
private static IElement? ParseHtmlFragment(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return null;
}
var parser = new HtmlParser(new HtmlParserOptions
{
IsKeepingSourceReferences = false,
});
var document = parser.ParseDocument($"<body>{html}</body>");
return document.Body;
}
private static string? BuildSummary(IElement? root)
{
if (root is null || !root.HasChildNodes)
{
return root?.TextContent is { Length: > 0 } text
? NormalizeWhitespace(text)
: string.Empty;
}
var segments = new List<string>();
foreach (var child in root.Children)
{
var text = NormalizeWhitespace(child.TextContent);
if (string.IsNullOrEmpty(text))
{
continue;
}
if (string.Equals(child.NodeName, "LI", StringComparison.OrdinalIgnoreCase))
{
segments.Add($"- {text}");
continue;
}
segments.Add(text);
}
if (segments.Count == 0)
{
var fallback = NormalizeWhitespace(root.TextContent);
return fallback;
}
return string.Join("\n\n", segments);
}
private static IReadOnlyList<AcscReferenceDto> ExtractReferences(IElement? root)
{
if (root is null)
{
return Array.Empty<AcscReferenceDto>();
}
var anchors = root.QuerySelectorAll("a");
if (anchors.Length == 0)
{
return Array.Empty<AcscReferenceDto>();
}
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var references = new List<AcscReferenceDto>(anchors.Length);
foreach (var anchor in anchors)
{
var href = anchor.GetAttribute("href");
if (string.IsNullOrWhiteSpace(href))
{
continue;
}
if (!seen.Add(href))
{
continue;
}
var text = NormalizeWhitespace(anchor.TextContent);
if (string.IsNullOrEmpty(text))
{
text = href;
}
references.Add(new AcscReferenceDto(text, href));
}
return references;
}
private static IReadOnlyDictionary<string, string> ExtractFields(IElement? root, out string? serialNumber, out string? advisoryType)
{
serialNumber = null;
advisoryType = null;
if (root is null)
{
return EmptyFields;
}
var map = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
foreach (var element in root.QuerySelectorAll("strong"))
{
var labelRaw = NormalizeWhitespace(element.TextContent);
if (string.IsNullOrEmpty(labelRaw))
{
continue;
}
var label = labelRaw.TrimEnd(':').Trim();
if (string.IsNullOrEmpty(label))
{
continue;
}
var key = NormalizeFieldKey(label);
if (string.IsNullOrEmpty(key))
{
continue;
}
var value = ExtractFieldValue(element);
if (string.IsNullOrEmpty(value))
{
continue;
}
if (!map.ContainsKey(key))
{
map[key] = value;
}
if (string.Equals(key, "serialNumber", StringComparison.OrdinalIgnoreCase))
{
serialNumber ??= value;
}
else if (string.Equals(key, "advisoryType", StringComparison.OrdinalIgnoreCase))
{
advisoryType ??= value;
}
}
return map.Count == 0
? EmptyFields
: map;
}
private static string? ExtractFieldValue(IElement strongElement)
{
var builder = new StringBuilder();
var node = strongElement.NextSibling;
while (node is not null)
{
if (node.NodeType == NodeType.Text)
{
builder.Append(node.TextContent);
}
else if (node is IElement element)
{
builder.Append(element.TextContent);
}
node = node.NextSibling;
}
var value = builder.ToString();
if (string.IsNullOrWhiteSpace(value))
{
var parent = strongElement.ParentElement;
if (parent is not null)
{
var parentText = parent.TextContent ?? string.Empty;
var trimmed = parentText.Replace(strongElement.TextContent ?? string.Empty, string.Empty, StringComparison.OrdinalIgnoreCase);
value = trimmed;
}
}
value = NormalizeWhitespace(value);
if (string.IsNullOrEmpty(value))
{
return null;
}
value = value.TrimStart(':', '-', '', '—', ' ');
return value.Trim();
}
private static IReadOnlyList<string> BuildAliases(string? serialNumber, string? advisoryType)
{
var aliases = new List<string>(capacity: 2);
if (!string.IsNullOrWhiteSpace(serialNumber))
{
aliases.Add(serialNumber.Trim());
}
if (!string.IsNullOrWhiteSpace(advisoryType))
{
aliases.Add(advisoryType.Trim());
}
return aliases.Count == 0 ? Array.Empty<string>() : aliases;
}
private static string NormalizeFieldKey(string label)
{
if (string.IsNullOrWhiteSpace(label))
{
return string.Empty;
}
var builder = new StringBuilder(label.Length);
var upperNext = false;
foreach (var c in label)
{
if (char.IsLetterOrDigit(c))
{
if (builder.Length == 0)
{
builder.Append(char.ToLowerInvariant(c));
}
else if (upperNext)
{
builder.Append(char.ToUpperInvariant(c));
upperNext = false;
}
else
{
builder.Append(char.ToLowerInvariant(c));
}
}
else
{
if (builder.Length > 0)
{
upperNext = true;
}
}
}
return builder.Length == 0 ? label.Trim() : builder.ToString();
}
private static string NormalizeWhitespace(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
var builder = new StringBuilder(value.Length);
var previousIsWhitespace = false;
foreach (var ch in value)
{
if (char.IsWhiteSpace(ch))
{
if (!previousIsWhitespace)
{
builder.Append(' ');
previousIsWhitespace = true;
}
continue;
}
builder.Append(ch);
previousIsWhitespace = false;
}
return builder.ToString().Trim();
}
private static readonly IReadOnlyDictionary<string, string> EmptyFields = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
}

View File

@@ -0,0 +1,312 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
namespace StellaOps.Concelier.Connector.Acsc.Internal;
internal static class AcscMapper
{
private static readonly Regex CveRegex = new("CVE-\\d{4}-\\d{4,7}", RegexOptions.IgnoreCase | RegexOptions.Compiled);
public static IReadOnlyList<Advisory> Map(
AcscFeedDto feed,
DocumentRecord document,
DtoRecord dtoRecord,
string sourceName,
DateTimeOffset mappedAt)
{
ArgumentNullException.ThrowIfNull(feed);
ArgumentNullException.ThrowIfNull(document);
ArgumentNullException.ThrowIfNull(dtoRecord);
ArgumentException.ThrowIfNullOrEmpty(sourceName);
if (feed.Entries is null || feed.Entries.Count == 0)
{
return Array.Empty<Advisory>();
}
var advisories = new List<Advisory>(feed.Entries.Count);
foreach (var entry in feed.Entries)
{
if (entry is null)
{
continue;
}
var advisoryKey = CreateAdvisoryKey(sourceName, feed.FeedSlug, entry);
var fetchProvenance = new AdvisoryProvenance(
sourceName,
"document",
document.Uri,
document.FetchedAt.ToUniversalTime(),
fieldMask: new[] { "summary", "aliases", "references", "affectedPackages" });
var feedProvenance = new AdvisoryProvenance(
sourceName,
"feed",
feed.FeedSlug ?? string.Empty,
feed.ParsedAt.ToUniversalTime(),
fieldMask: new[] { "summary" });
var mappingProvenance = new AdvisoryProvenance(
sourceName,
"mapping",
entry.EntryId ?? entry.Link ?? advisoryKey,
mappedAt.ToUniversalTime(),
fieldMask: new[] { "summary", "aliases", "references", "affectedpackages" });
var provenance = new[]
{
fetchProvenance,
feedProvenance,
mappingProvenance,
};
var aliases = BuildAliases(entry);
var severity = TryGetSeverity(entry.Fields);
var references = BuildReferences(entry, sourceName, mappedAt);
var affectedPackages = BuildAffectedPackages(entry, sourceName, mappedAt);
var advisory = new Advisory(
advisoryKey,
string.IsNullOrWhiteSpace(entry.Title) ? $"ACSC Advisory {entry.EntryId}" : entry.Title,
string.IsNullOrWhiteSpace(entry.Summary) ? null : entry.Summary,
language: "en",
published: entry.Published?.ToUniversalTime() ?? feed.FeedUpdated?.ToUniversalTime() ?? document.FetchedAt.ToUniversalTime(),
modified: entry.Updated?.ToUniversalTime(),
severity: severity,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: provenance);
advisories.Add(advisory);
}
return advisories;
}
private static IReadOnlyList<string> BuildAliases(AcscEntryDto entry)
{
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
if (!string.IsNullOrWhiteSpace(entry.EntryId))
{
aliases.Add(entry.EntryId.Trim());
}
foreach (var alias in entry.Aliases ?? Array.Empty<string>())
{
if (!string.IsNullOrWhiteSpace(alias))
{
aliases.Add(alias.Trim());
}
}
foreach (var match in CveRegex.Matches(entry.Summary ?? string.Empty).Cast<Match>())
{
var value = match.Value.ToUpperInvariant();
aliases.Add(value);
}
foreach (var match in CveRegex.Matches(entry.ContentText ?? string.Empty).Cast<Match>())
{
var value = match.Value.ToUpperInvariant();
aliases.Add(value);
}
return aliases.Count == 0
? Array.Empty<string>()
: aliases.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase).ToArray();
}
private static IReadOnlyList<AdvisoryReference> BuildReferences(AcscEntryDto entry, string sourceName, DateTimeOffset recordedAt)
{
var references = new List<AdvisoryReference>();
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
void AddReference(string? url, string? kind, string? sourceTag, string? summary)
{
if (string.IsNullOrWhiteSpace(url))
{
return;
}
if (!Validation.LooksLikeHttpUrl(url))
{
return;
}
if (!seen.Add(url))
{
return;
}
references.Add(new AdvisoryReference(
url,
kind,
sourceTag,
summary,
new AdvisoryProvenance(sourceName, "reference", url, recordedAt.ToUniversalTime())));
}
AddReference(entry.Link, "advisory", entry.FeedSlug, entry.Title);
foreach (var reference in entry.References ?? Array.Empty<AcscReferenceDto>())
{
if (reference is null)
{
continue;
}
AddReference(reference.Url, "reference", null, reference.Title);
}
return references.Count == 0
? Array.Empty<AdvisoryReference>()
: references
.OrderBy(static reference => reference.Url, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static IReadOnlyList<AffectedPackage> BuildAffectedPackages(AcscEntryDto entry, string sourceName, DateTimeOffset recordedAt)
{
if (entry.Fields is null || entry.Fields.Count == 0)
{
return Array.Empty<AffectedPackage>();
}
if (!entry.Fields.TryGetValue("systemsAffected", out var systemsAffected) && !entry.Fields.TryGetValue("productsAffected", out systemsAffected))
{
return Array.Empty<AffectedPackage>();
}
if (string.IsNullOrWhiteSpace(systemsAffected))
{
return Array.Empty<AffectedPackage>();
}
var identifiers = systemsAffected
.Split(new[] { ',', ';', '\n' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.Select(static value => value.Trim())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray();
if (identifiers.Length == 0)
{
return Array.Empty<AffectedPackage>();
}
var packages = new List<AffectedPackage>(identifiers.Length);
foreach (var identifier in identifiers)
{
var provenance = new[]
{
new AdvisoryProvenance(sourceName, "affected", identifier, recordedAt.ToUniversalTime(), fieldMask: new[] { "affectedpackages" }),
};
packages.Add(new AffectedPackage(
AffectedPackageTypes.Vendor,
identifier,
platform: null,
versionRanges: Array.Empty<AffectedVersionRange>(),
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: provenance,
normalizedVersions: Array.Empty<NormalizedVersionRule>()));
}
return packages
.OrderBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static string? TryGetSeverity(IReadOnlyDictionary<string, string> fields)
{
if (fields is null || fields.Count == 0)
{
return null;
}
var keys = new[]
{
"severity",
"riskLevel",
"threatLevel",
"impact",
};
foreach (var key in keys)
{
if (fields.TryGetValue(key, out var value) && !string.IsNullOrWhiteSpace(value))
{
return value.Trim();
}
}
return null;
}
private static string CreateAdvisoryKey(string sourceName, string? feedSlug, AcscEntryDto entry)
{
var slug = string.IsNullOrWhiteSpace(feedSlug) ? "general" : ToSlug(feedSlug);
var candidate = !string.IsNullOrWhiteSpace(entry.EntryId)
? entry.EntryId
: !string.IsNullOrWhiteSpace(entry.Link)
? entry.Link
: entry.Title;
var identifier = !string.IsNullOrWhiteSpace(candidate) ? ToSlug(candidate!) : null;
if (string.IsNullOrEmpty(identifier))
{
identifier = CreateHash(entry.Title ?? Guid.NewGuid().ToString());
}
return $"{sourceName}/{slug}/{identifier}";
}
private static string ToSlug(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return "unknown";
}
var builder = new StringBuilder(value.Length);
var previousDash = false;
foreach (var ch in value)
{
if (char.IsLetterOrDigit(ch))
{
builder.Append(char.ToLowerInvariant(ch));
previousDash = false;
}
else if (!previousDash)
{
builder.Append('-');
previousDash = true;
}
}
var slug = builder.ToString().Trim('-');
if (string.IsNullOrEmpty(slug))
{
slug = CreateHash(value);
}
return slug.Length <= 64 ? slug : slug[..64];
}
private static string CreateHash(string value)
{
var bytes = Encoding.UTF8.GetBytes(value);
var hash = SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant()[..16];
}
}

View File

@@ -0,0 +1,55 @@
using StellaOps.Concelier.Core.Jobs;
namespace StellaOps.Concelier.Connector.Acsc;
internal static class AcscJobKinds
{
public const string Fetch = "source:acsc:fetch";
public const string Parse = "source:acsc:parse";
public const string Map = "source:acsc:map";
public const string Probe = "source:acsc:probe";
}
internal sealed class AcscFetchJob : IJob
{
private readonly AcscConnector _connector;
public AcscFetchJob(AcscConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}
internal sealed class AcscParseJob : IJob
{
private readonly AcscConnector _connector;
public AcscParseJob(AcscConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.ParseAsync(context.Services, cancellationToken);
}
internal sealed class AcscMapJob : IJob
{
private readonly AcscConnector _connector;
public AcscMapJob(AcscConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.MapAsync(context.Services, cancellationToken);
}
internal sealed class AcscProbeJob : IJob
{
private readonly AcscConnector _connector;
public AcscProbeJob(AcscConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.ProbeAsync(cancellationToken);
}

View File

@@ -0,0 +1,4 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("FixtureUpdater")]
[assembly: InternalsVisibleTo("StellaOps.Concelier.Connector.Acsc.Tests")]

View File

@@ -0,0 +1,68 @@
## StellaOps.Concelier.Connector.Acsc
Australian Cyber Security Centre (ACSC) connector that ingests RSS/Atom advisories, sanitises embedded HTML, and maps entries into canonical `Advisory` records for Concelier.
### Configuration
Settings live under `concelier:sources:acsc` (see `AcscOptions`):
| Setting | Description | Default |
| --- | --- | --- |
| `baseEndpoint` | Base URI for direct ACSC requests (trailing slash required). | `https://www.cyber.gov.au/` |
| `relayEndpoint` | Optional relay host to fall back to when Akamai refuses HTTP/2. | empty |
| `preferRelayByDefault` | Default endpoint preference when no cursor state exists. | `false` |
| `enableRelayFallback` | Allows automatic relay fallback when direct fetch fails. | `true` |
| `forceRelay` | Forces all fetches through the relay (skips direct attempts). | `false` |
| `feeds` | Array of feed descriptors (`slug`, `relativePath`, `enabled`). | alerts/advisories enabled |
| `requestTimeout` | Per-request timeout override. | 45 seconds |
| `failureBackoff` | Backoff window when fetch fails. | 5 minutes |
| `initialBackfill` | Sliding window used to seed published cursors. | 120 days |
| `userAgent` | Outbound `User-Agent` header. | `StellaOps/Concelier (+https://stella-ops.org)` |
| `requestVersion`/`versionPolicy` | HTTP version negotiation knobs. | HTTP/2 with downgrade |
The dependency injection routine registers the connector plus scheduled jobs:
| Job | Cron | Purpose |
| --- | --- | --- |
| `source:acsc:fetch` | `7,37 * * * *` | Fetch RSS/Atom feeds (direct + relay fallback). |
| `source:acsc:parse` | `12,42 * * * *` | Persist sanitised DTOs (`acsc.feed.v1`). |
| `source:acsc:map` | `17,47 * * * *` | Map DTO entries into canonical advisories. |
| `source:acsc:probe` | `25,55 * * * *` | Verify direct endpoint health and adjust cursor preference. |
### Metrics
Emitted via `AcscDiagnostics` (`Meter` = `StellaOps.Concelier.Connector.Acsc`):
| Instrument | Unit | Description |
| --- | --- | --- |
| `acsc.fetch.attempts` | operations | Feed fetch attempts (tags: `feed`, `mode`). |
| `acsc.fetch.success` | operations | Successful fetches. |
| `acsc.fetch.failures` | operations | Failed fetches before retry backoff. |
| `acsc.fetch.unchanged` | operations | 304 Not Modified responses. |
| `acsc.fetch.fallbacks` | operations | Relay fallbacks triggered (`reason` tag). |
| `acsc.cursor.published_updates` | feeds | Published cursor updates per feed slug. |
| `acsc.parse.attempts` | documents | Parse attempts per feed. |
| `acsc.parse.success` | documents | Successful RSS → DTO conversions. |
| `acsc.parse.failures` | documents | Parse failures (tags: `feed`, `reason`). |
| `acsc.map.success` | advisories | Advisories emitted from a mapping pass. |
### Logging
Key log messages include:
- Fetch successes/failures, HTTP status codes, and relay fallbacks.
- Parse failures with reasons (download, schema, sanitisation).
- Mapping summaries showing advisory counts per document.
- Probe results toggling relay usage.
Logs include feed slug metadata for troubleshooting parallel ingestion.
### Tests & fixtures
`StellaOps.Concelier.Connector.Acsc.Tests` exercises the fetch→parse→map pipeline using canned RSS content. Deterministic snapshots live in `Acsc/Fixtures`. To refresh them after intentional behavioural changes:
```bash
UPDATE_ACSC_FIXTURES=1 dotnet test src/Concelier/__Tests/StellaOps.Concelier.Connector.Acsc.Tests/StellaOps.Concelier.Connector.Acsc.Tests.csproj
```
Remember to review the generated `.actual.json` files when assertions fail without fixture updates.
### Operational notes
- Keep the relay endpoint allowlisted for air-gapped deployments; the probe job will automatically switch back to direct fetching when Akamai stabilises.
- Mapping currently emits vendor `affectedPackages` from “Systems/Products affected” fields; expand range primitives once structured version data appears in ACSC feeds.
- The connector is offline-friendly—no outbound calls beyond the configured feeds.

View File

@@ -0,0 +1,18 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Storage.Mongo/StellaOps.Concelier.Storage.Mongo.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Core/StellaOps.Concelier.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,11 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|FEEDCONN-ACSC-02-001 Source discovery & feed contract|BE-Conn-ACSC|Research|**DONE (2025-10-11)** Catalogued feed slugs `/acsc/view-all-content/{alerts,advisories,news,publications,threats}/rss`; every endpoint currently negotiates HTTP/2 then aborts with `INTERNAL_ERROR` (curl exit92) and hanging >600s when forcing `--http1.1`. Documented traces + mitigations in `docs/concelier-connector-research-20251011.md` and opened `FEEDCONN-SHARED-HTTP2-001` for shared handler tweaks (force `RequestVersionOrLower`, jittered retries, relay option).|
|FEEDCONN-ACSC-02-002 Fetch pipeline & cursor persistence|BE-Conn-ACSC|Source.Common, Storage.Mongo|**DONE (2025-10-12)** HTTP client now pins `HttpRequestMessage.VersionPolicy = RequestVersionOrLower`, forces `AutomaticDecompression = GZip | Deflate`, and sends `User-Agent: StellaOps/Concelier (+https://stella-ops.org)` via `AddAcscConnector`. Fetch pipeline implemented in `AcscConnector` with relay-aware fallback (`AcscProbeJob` seeds preference), deterministic cursor updates (`preferredEndpoint`, published timestamp per feed), and metadata-deduped documents. Unit tests `AcscConnectorFetchTests` + `AcscHttpClientConfigurationTests` cover direct/relay flows and client wiring.|
|FEEDCONN-ACSC-02-003 Parser & DTO sanitiser|BE-Conn-ACSC|Source.Common|**DONE (2025-10-12)** Added `AcscFeedParser` to sanitise RSS payloads, collapse multi-paragraph summaries, dedupe references, and surface `serialNumber`/`advisoryType` fields as structured metadata + alias candidates. `ParseAsync` now materialises `acsc.feed.v1` DTOs, promotes documents to `pending-map`, and advances cursor state. Covered by `AcscConnectorParseTests`.|
|FEEDCONN-ACSC-02-004 Canonical mapper + range primitives|BE-Conn-ACSC|Models|**DONE (2025-10-12)** Introduced `AcscMapper` and wired `MapAsync` to emit canonical advisories with normalized aliases, source-tagged references, and optional vendor `affectedPackages` derived from “Systems/Products affected” fields. Documents transition to `mapped`, advisories persist via `IAdvisoryStore`, and metrics/logging capture mapped counts. `AcscConnectorParseTests` exercise fetch→parse→map flow.|
|FEEDCONN-ACSC-02-005 Deterministic fixtures & regression tests|QA|Testing|**DONE (2025-10-12)** `AcscConnectorParseTests` now snapshots fetch→parse→map output via `Acsc/Fixtures/acsc-advisories.snapshot.json`; set `UPDATE_ACSC_FIXTURES=1` to regenerate. Tests assert DTO status transitions, advisory persistence, and state cleanup.|
|FEEDCONN-ACSC-02-006 Diagnostics & documentation|DevEx|Docs|**DONE (2025-10-12)** Added module README describing configuration, job schedules, metrics (including new `acsc.map.success` counter), relay behaviour, and fixture workflow. Diagnostics updated to count map successes alongside existing fetch/parse metrics.|
|FEEDCONN-ACSC-02-007 Feed retention & pagination validation|BE-Conn-ACSC|Research|**DONE (2025-10-11)** Relay sampling shows retention ≥ July 2025; need to re-run once direct HTTP/2 path is stable to see if feed caps at ~50 items and whether `?page=` exists. Pending action tracked in shared HTTP downgrade task.|
|FEEDCONN-ACSC-02-008 HTTP client compatibility plan|BE-Conn-ACSC|Source.Common|**DONE (2025-10-11)** Reproduced Akamai resets, drafted downgrade plan (two-stage HTTP/2 retry + relay fallback), and filed `FEEDCONN-SHARED-HTTP2-001`; module README TODO will host the per-environment knob matrix.|

View File

@@ -0,0 +1,40 @@
# AGENTS
## Role
Build the CCCS (Canadian Centre for Cyber Security) advisories connector so Concelier can ingest national cyber bulletins alongside other vendor/regional sources.
## Scope
- Research CCCS advisory feeds (RSS/Atom, JSON API, or HTML listings) and define the canonical fetch workflow.
- Implement fetch, parse, and mapping stages with deterministic cursoring and retry/backoff behaviour.
- Normalise advisory content (summary, affected vendors/products, mitigation guidance, references, CVE IDs).
- Emit canonical `Advisory` records with aliases, references, affected packages, and provenance metadata.
- Provide fixtures and regression tests to keep the connector deterministic.
## Participants
- `Source.Common` (HTTP clients, fetch service, DTO storage helpers).
- `Storage.Mongo` (raw/document/DTO/advisory stores + source state).
- `Concelier.Models` (canonical advisory data structures).
- `Concelier.Testing` (integration fixtures and snapshot utilities).
## Interfaces & Contracts
- Job kinds: `cccs:fetch`, `cccs:parse`, `cccs:map`.
- Persist ETag/Last-Modified metadata when the upstream supports it.
- Include alias entries for CCCS advisory IDs plus referenced CVE IDs.
## In/Out of scope
In scope:
- End-to-end connector implementation with range primitive coverage for affected packages.
- Minimal telemetry logging/counters matching other connectors.
Out of scope:
- Automated remediation actions or vendor-specific enrichment beyond CCCS published data.
- Export or downstream pipeline changes.
## Observability & Security Expectations
- Log fetch attempts, success/failure counts, and mapping statistics.
- Sanitize HTML safely, dropping scripts/styles before storing DTOs.
- Respect upstream rate limits; mark failures in source state with backoff.
## Tests
- Add `StellaOps.Concelier.Connector.Cccs.Tests` covering fetch/parse/map with canned fixtures.
- Snapshot canonical advisories; support fixture regeneration via env flag.
- Validate deterministic ordering and timestamps to maintain reproducibility.

View File

@@ -0,0 +1,606 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using System.Globalization;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Cccs.Configuration;
using StellaOps.Concelier.Connector.Cccs.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Cccs;
public sealed class CccsConnector : IFeedConnector
{
private static readonly JsonSerializerOptions RawSerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private static readonly JsonSerializerOptions DtoSerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private const string DtoSchemaVersion = "cccs.dto.v1";
private readonly CccsFeedClient _feedClient;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CccsHtmlParser _htmlParser;
private readonly CccsDiagnostics _diagnostics;
private readonly CccsOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CccsConnector> _logger;
public CccsConnector(
CccsFeedClient feedClient,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
CccsHtmlParser htmlParser,
CccsDiagnostics diagnostics,
IOptions<CccsOptions> options,
TimeProvider? timeProvider,
ILogger<CccsConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_htmlParser = htmlParser ?? throw new ArgumentNullException(nameof(htmlParser));
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CccsConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var now = _timeProvider.GetUtcNow();
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = new HashSet<Guid>(cursor.PendingDocuments);
var pendingMappings = new HashSet<Guid>(cursor.PendingMappings);
var knownHashes = new Dictionary<string, string>(cursor.KnownEntryHashes, StringComparer.Ordinal);
var feedsProcessed = 0;
var totalItems = 0;
var added = 0;
var unchanged = 0;
try
{
foreach (var feed in _options.Feeds)
{
cancellationToken.ThrowIfCancellationRequested();
_diagnostics.FetchAttempt();
var result = await _feedClient.FetchAsync(feed, _options.RequestTimeout, cancellationToken).ConfigureAwait(false);
feedsProcessed++;
totalItems += result.Items.Count;
if (result.Items.Count == 0)
{
_diagnostics.FetchSuccess();
await DelayBetweenRequestsAsync(cancellationToken).ConfigureAwait(false);
continue;
}
var items = result.Items
.Where(static item => !string.IsNullOrWhiteSpace(item.Title))
.OrderByDescending(item => ParseDate(item.DateModifiedTimestamp) ?? ParseDate(item.DateModified) ?? DateTimeOffset.MinValue)
.ThenByDescending(item => ParseDate(item.DateCreated) ?? DateTimeOffset.MinValue)
.ToList();
foreach (var item in items)
{
cancellationToken.ThrowIfCancellationRequested();
var documentUri = BuildDocumentUri(item, feed);
var rawDocument = CreateRawDocument(item, feed, result.AlertTypes);
var payload = JsonSerializer.SerializeToUtf8Bytes(rawDocument, RawSerializerOptions);
var sha = ComputeSha256(payload);
if (knownHashes.TryGetValue(documentUri, out var existingHash)
&& string.Equals(existingHash, sha, StringComparison.Ordinal))
{
unchanged++;
_diagnostics.FetchUnchanged();
continue;
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, documentUri, cancellationToken).ConfigureAwait(false);
if (existing is not null
&& string.Equals(existing.Sha256, sha, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
knownHashes[documentUri] = sha;
unchanged++;
_diagnostics.FetchUnchanged();
continue;
}
var gridFsId = await _rawDocumentStorage.UploadAsync(
SourceName,
documentUri,
payload,
"application/json",
expiresAt: null,
cancellationToken).ConfigureAwait(false);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["cccs.language"] = rawDocument.Language,
["cccs.sourceId"] = rawDocument.SourceId,
};
if (!string.IsNullOrWhiteSpace(rawDocument.SerialNumber))
{
metadata["cccs.serialNumber"] = rawDocument.SerialNumber!;
}
if (!string.IsNullOrWhiteSpace(rawDocument.AlertType))
{
metadata["cccs.alertType"] = rawDocument.AlertType!;
}
var recordId = existing?.Id ?? Guid.NewGuid();
var record = new DocumentRecord(
recordId,
SourceName,
documentUri,
now,
sha,
DocumentStatuses.PendingParse,
"application/json",
Headers: null,
Metadata: metadata,
Etag: null,
LastModified: rawDocument.Modified ?? rawDocument.Published ?? result.LastModifiedUtc,
GridFsId: gridFsId,
ExpiresAt: null);
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
pendingDocuments.Add(upserted.Id);
pendingMappings.Remove(upserted.Id);
knownHashes[documentUri] = sha;
added++;
_diagnostics.FetchDocument();
if (added >= _options.MaxEntriesPerFetch)
{
break;
}
}
_diagnostics.FetchSuccess();
await DelayBetweenRequestsAsync(cancellationToken).ConfigureAwait(false);
if (added >= _options.MaxEntriesPerFetch)
{
break;
}
}
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or JsonException or InvalidOperationException)
{
_diagnostics.FetchFailure();
_logger.LogError(ex, "CCCS fetch failed");
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var trimmedHashes = TrimKnownHashes(knownHashes, _options.MaxKnownEntries);
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithKnownEntryHashes(trimmedHashes)
.WithLastFetch(now);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"CCCS fetch completed feeds={Feeds} items={Items} newDocuments={Added} unchanged={Unchanged} pendingDocuments={PendingDocuments} pendingMappings={PendingMappings}",
feedsProcessed,
totalItems,
added,
unchanged,
pendingDocuments.Count,
pendingMappings.Count);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
var now = _timeProvider.GetUtcNow();
var parsed = 0;
var parseFailures = 0;
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
_diagnostics.ParseFailure();
parseFailures++;
continue;
}
if (!document.GridFsId.HasValue)
{
_diagnostics.ParseFailure();
_logger.LogWarning("CCCS document {DocumentId} missing GridFS payload", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
parseFailures++;
continue;
}
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_diagnostics.ParseFailure();
_logger.LogError(ex, "CCCS unable to download raw document {DocumentId}", documentId);
throw;
}
CccsRawAdvisoryDocument? raw;
try
{
raw = JsonSerializer.Deserialize<CccsRawAdvisoryDocument>(payload, RawSerializerOptions);
}
catch (Exception ex)
{
_diagnostics.ParseFailure();
_logger.LogWarning(ex, "CCCS failed to deserialize raw document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
parseFailures++;
continue;
}
if (raw is null)
{
_diagnostics.ParseFailure();
_logger.LogWarning("CCCS raw document {DocumentId} produced null payload", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
parseFailures++;
continue;
}
CccsAdvisoryDto dto;
try
{
dto = _htmlParser.Parse(raw);
}
catch (Exception ex)
{
_diagnostics.ParseFailure();
_logger.LogWarning(ex, "CCCS failed to parse advisory DTO for {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
parseFailures++;
continue;
}
var dtoJson = JsonSerializer.Serialize(dto, DtoSerializerOptions);
var dtoBson = BsonDocument.Parse(dtoJson);
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, DtoSchemaVersion, dtoBson, now);
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
_diagnostics.ParseSuccess();
parsed++;
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
if (parsed > 0 || parseFailures > 0)
{
_logger.LogInformation(
"CCCS parse completed parsed={Parsed} failures={Failures} pendingDocuments={PendingDocuments} pendingMappings={PendingMappings}",
parsed,
parseFailures,
pendingDocuments.Count,
pendingMappings.Count);
}
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
var mapped = 0;
var mappingFailures = 0;
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingMappings.Remove(documentId);
_diagnostics.MapFailure();
mappingFailures++;
continue;
}
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null)
{
_diagnostics.MapFailure();
_logger.LogWarning("CCCS document {DocumentId} missing DTO payload", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
mappingFailures++;
continue;
}
CccsAdvisoryDto? dto;
try
{
var json = dtoRecord.Payload.ToJson();
dto = JsonSerializer.Deserialize<CccsAdvisoryDto>(json, DtoSerializerOptions);
}
catch (Exception ex)
{
_diagnostics.MapFailure();
_logger.LogWarning(ex, "CCCS failed to deserialize DTO for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
mappingFailures++;
continue;
}
if (dto is null)
{
_diagnostics.MapFailure();
_logger.LogWarning("CCCS DTO for document {DocumentId} evaluated to null", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
mappingFailures++;
continue;
}
try
{
var advisory = CccsMapper.Map(dto, document, dtoRecord.ValidatedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapSuccess();
mapped++;
}
catch (Exception ex)
{
_diagnostics.MapFailure();
_logger.LogError(ex, "CCCS mapping failed for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
mappingFailures++;
}
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
if (mapped > 0 || mappingFailures > 0)
{
_logger.LogInformation(
"CCCS map completed mapped={Mapped} failures={Failures} pendingMappings={PendingMappings}",
mapped,
mappingFailures,
pendingMappings.Count);
}
}
private async Task<CccsCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? CccsCursor.Empty : CccsCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(CccsCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
var completedAt = cursor.LastFetchAt ?? _timeProvider.GetUtcNow();
return _stateRepository.UpdateCursorAsync(SourceName, document, completedAt, cancellationToken);
}
private async Task DelayBetweenRequestsAsync(CancellationToken cancellationToken)
{
if (_options.RequestDelay <= TimeSpan.Zero)
{
return;
}
try
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
catch (TaskCanceledException)
{
// Ignore cancellation during delay; caller handles.
}
}
private static string BuildDocumentUri(CccsFeedItem item, CccsFeedEndpoint feed)
{
if (!string.IsNullOrWhiteSpace(item.Url))
{
if (Uri.TryCreate(item.Url, UriKind.Absolute, out var absolute))
{
return absolute.ToString();
}
var baseUri = new Uri("https://www.cyber.gc.ca", UriKind.Absolute);
if (Uri.TryCreate(baseUri, item.Url, out var combined))
{
return combined.ToString();
}
}
return $"https://www.cyber.gc.ca/api/cccs/threats/{feed.Language}/{item.Nid}";
}
private static CccsRawAdvisoryDocument CreateRawDocument(CccsFeedItem item, CccsFeedEndpoint feed, IReadOnlyDictionary<int, string> taxonomy)
{
var language = string.IsNullOrWhiteSpace(item.Language) ? feed.Language : item.Language!.Trim();
var identifier = !string.IsNullOrWhiteSpace(item.SerialNumber)
? item.SerialNumber!.Trim()
: !string.IsNullOrWhiteSpace(item.Uuid)
? item.Uuid!.Trim()
: $"nid-{item.Nid}";
var canonicalUrl = BuildDocumentUri(item, feed);
var bodySegments = item.Body ?? Array.Empty<string>();
var bodyHtml = string.Join(Environment.NewLine, bodySegments);
var published = ParseDate(item.DateCreated);
var modified = ParseDate(item.DateModifiedTimestamp) ?? ParseDate(item.DateModified);
var alertType = ResolveAlertType(item, taxonomy);
return new CccsRawAdvisoryDocument
{
SourceId = identifier,
SerialNumber = item.SerialNumber?.Trim(),
Uuid = item.Uuid,
Language = language.ToLowerInvariant(),
Title = item.Title?.Trim() ?? identifier,
Summary = item.Summary?.Trim(),
CanonicalUrl = canonicalUrl,
ExternalUrl = item.ExternalUrl,
BodyHtml = bodyHtml,
BodySegments = bodySegments,
AlertType = alertType,
Subject = item.Subject,
Banner = item.Banner,
Published = published,
Modified = modified,
RawDateCreated = item.DateCreated,
RawDateModified = item.DateModifiedTimestamp ?? item.DateModified,
};
}
private static string? ResolveAlertType(CccsFeedItem item, IReadOnlyDictionary<int, string> taxonomy)
{
if (item.AlertType.ValueKind == JsonValueKind.Number)
{
var id = item.AlertType.GetInt32();
return taxonomy.TryGetValue(id, out var label) ? label : id.ToString(CultureInfo.InvariantCulture);
}
if (item.AlertType.ValueKind == JsonValueKind.String)
{
return item.AlertType.GetString();
}
if (item.AlertType.ValueKind == JsonValueKind.Array)
{
foreach (var element in item.AlertType.EnumerateArray())
{
if (element.ValueKind == JsonValueKind.Number)
{
var id = element.GetInt32();
if (taxonomy.TryGetValue(id, out var label))
{
return label;
}
}
else if (element.ValueKind == JsonValueKind.String)
{
var label = element.GetString();
if (!string.IsNullOrWhiteSpace(label))
{
return label;
}
}
}
}
return null;
}
private static Dictionary<string, string> TrimKnownHashes(Dictionary<string, string> hashes, int maxEntries)
{
if (hashes.Count <= maxEntries)
{
return hashes;
}
var overflow = hashes.Count - maxEntries;
foreach (var key in hashes.Keys.Take(overflow).ToList())
{
hashes.Remove(key);
}
return hashes;
}
private static DateTimeOffset? ParseDate(string? value)
=> string.IsNullOrWhiteSpace(value)
? null
: DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var parsed)
? parsed
: null;
private static string ComputeSha256(byte[] payload)
=> Convert.ToHexString(SHA256.HashData(payload)).ToLowerInvariant();
}

View File

@@ -0,0 +1,21 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Cccs;
public sealed class CccsConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "cccs";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services)
=> services.GetService<CccsConnector>() is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return services.GetRequiredService<CccsConnector>();
}
}

View File

@@ -0,0 +1,50 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.Cccs.Configuration;
namespace StellaOps.Concelier.Connector.Cccs;
public sealed class CccsDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:cccs";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddCccsConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<CccsFetchJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, CccsJobKinds.Fetch, typeof(CccsFetchJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,47 @@
using System;
using System.Linq;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.Cccs.Configuration;
using StellaOps.Concelier.Connector.Cccs.Internal;
using StellaOps.Concelier.Connector.Common.Http;
using StellaOps.Concelier.Connector.Common.Html;
namespace StellaOps.Concelier.Connector.Cccs;
public static class CccsServiceCollectionExtensions
{
public static IServiceCollection AddCccsConnector(this IServiceCollection services, Action<CccsOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<CccsOptions>()
.Configure(configure)
.PostConfigure(static options => options.Validate());
services.AddSourceHttpClient(CccsOptions.HttpClientName, static (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<CccsOptions>>().Value;
clientOptions.UserAgent = "StellaOps.Concelier.Cccs/1.0";
clientOptions.Timeout = options.RequestTimeout;
clientOptions.AllowedHosts.Clear();
foreach (var feed in options.Feeds.Where(static feed => feed.Uri is not null))
{
clientOptions.AllowedHosts.Add(feed.Uri!.Host);
}
clientOptions.AllowedHosts.Add("www.cyber.gc.ca");
clientOptions.AllowedHosts.Add("cyber.gc.ca");
});
services.TryAddSingleton<HtmlContentSanitizer>();
services.TryAddSingleton<CccsDiagnostics>();
services.TryAddSingleton<CccsHtmlParser>();
services.TryAddSingleton<CccsFeedClient>();
services.AddTransient<CccsConnector>();
return services;
}
}

View File

@@ -0,0 +1,175 @@
using System;
using System.Collections.Generic;
namespace StellaOps.Concelier.Connector.Cccs.Configuration;
public sealed class CccsOptions
{
public const string HttpClientName = "concelier.source.cccs";
private readonly List<CccsFeedEndpoint> _feeds = new();
public CccsOptions()
{
_feeds.Add(new CccsFeedEndpoint("en", new Uri("https://www.cyber.gc.ca/api/cccs/threats/v1/get?lang=en&content_type=cccs_threat")));
_feeds.Add(new CccsFeedEndpoint("fr", new Uri("https://www.cyber.gc.ca/api/cccs/threats/v1/get?lang=fr&content_type=cccs_threat")));
}
/// <summary>
/// Feed endpoints to poll; configure per language or content category.
/// </summary>
public IList<CccsFeedEndpoint> Feeds => _feeds;
/// <summary>
/// Maximum number of entries to enqueue per fetch cycle.
/// </summary>
public int MaxEntriesPerFetch { get; set; } = 80;
/// <summary>
/// Maximum remembered entries (URI+hash) for deduplication.
/// </summary>
public int MaxKnownEntries { get; set; } = 512;
/// <summary>
/// Timeout applied to feed and taxonomy requests.
/// </summary>
public TimeSpan RequestTimeout { get; set; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Delay between successive feed requests to respect upstream throttling.
/// </summary>
public TimeSpan RequestDelay { get; set; } = TimeSpan.FromMilliseconds(250);
/// <summary>
/// Backoff recorded in source state when fetch fails.
/// </summary>
public TimeSpan FailureBackoff { get; set; } = TimeSpan.FromMinutes(1);
public void Validate()
{
if (_feeds.Count == 0)
{
throw new InvalidOperationException("At least one CCCS feed endpoint must be configured.");
}
var seenLanguages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var feed in _feeds)
{
feed.Validate();
if (!seenLanguages.Add(feed.Language))
{
throw new InvalidOperationException($"Duplicate CCCS feed language configured: '{feed.Language}'. Each language should be unique to avoid duplicate ingestion.");
}
}
if (MaxEntriesPerFetch <= 0)
{
throw new InvalidOperationException($"{nameof(MaxEntriesPerFetch)} must be greater than zero.");
}
if (MaxKnownEntries <= 0)
{
throw new InvalidOperationException($"{nameof(MaxKnownEntries)} must be greater than zero.");
}
if (RequestTimeout <= TimeSpan.Zero)
{
throw new InvalidOperationException($"{nameof(RequestTimeout)} must be positive.");
}
if (RequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException($"{nameof(RequestDelay)} cannot be negative.");
}
if (FailureBackoff <= TimeSpan.Zero)
{
throw new InvalidOperationException($"{nameof(FailureBackoff)} must be positive.");
}
}
}
public sealed class CccsFeedEndpoint
{
public CccsFeedEndpoint()
{
}
public CccsFeedEndpoint(string language, Uri uri)
{
Language = language;
Uri = uri;
}
public string Language { get; set; } = "en";
public Uri? Uri { get; set; }
public void Validate()
{
if (string.IsNullOrWhiteSpace(Language))
{
throw new InvalidOperationException("CCCS feed language must be specified.");
}
if (Uri is null || !Uri.IsAbsoluteUri)
{
throw new InvalidOperationException($"CCCS feed endpoint URI must be an absolute URI (language='{Language}').");
}
}
public Uri BuildTaxonomyUri()
{
if (Uri is null)
{
throw new InvalidOperationException("Feed endpoint URI must be configured before building taxonomy URI.");
}
var language = Uri.GetQueryParameterValueOrDefault("lang", Language);
var builder = $"https://www.cyber.gc.ca/api/cccs/taxonomy/v1/get?lang={language}&vocabulary=cccs_alert_type";
return new Uri(builder, UriKind.Absolute);
}
}
internal static class CccsUriExtensions
{
public static string GetQueryParameterValueOrDefault(this Uri uri, string key, string fallback)
{
if (uri is null)
{
return fallback;
}
var query = uri.Query;
if (string.IsNullOrEmpty(query))
{
return fallback;
}
var trimmed = query.StartsWith("?", StringComparison.Ordinal) ? query[1..] : query;
foreach (var pair in trimmed.Split(new[] { '&' }, StringSplitOptions.RemoveEmptyEntries))
{
var separatorIndex = pair.IndexOf('=');
if (separatorIndex < 0)
{
continue;
}
var left = pair[..separatorIndex].Trim();
if (!left.Equals(key, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var right = pair[(separatorIndex + 1)..].Trim();
if (right.Length == 0)
{
continue;
}
return Uri.UnescapeDataString(right);
}
return fallback;
}
}

View File

@@ -0,0 +1,54 @@
using System;
using System.Collections.Generic;
using System.Text.Json.Serialization;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
internal sealed record CccsAdvisoryDto
{
[JsonPropertyName("sourceId")]
public string SourceId { get; init; } = string.Empty;
[JsonPropertyName("serialNumber")]
public string SerialNumber { get; init; } = string.Empty;
[JsonPropertyName("language")]
public string Language { get; init; } = "en";
[JsonPropertyName("title")]
public string Title { get; init; } = string.Empty;
[JsonPropertyName("summary")]
public string? Summary { get; init; }
[JsonPropertyName("canonicalUrl")]
public string CanonicalUrl { get; init; } = string.Empty;
[JsonPropertyName("contentHtml")]
public string ContentHtml { get; init; } = string.Empty;
[JsonPropertyName("published")]
public DateTimeOffset? Published { get; init; }
[JsonPropertyName("modified")]
public DateTimeOffset? Modified { get; init; }
[JsonPropertyName("alertType")]
public string? AlertType { get; init; }
[JsonPropertyName("subject")]
public string? Subject { get; init; }
[JsonPropertyName("products")]
public IReadOnlyList<string> Products { get; init; } = Array.Empty<string>();
[JsonPropertyName("references")]
public IReadOnlyList<CccsReferenceDto> References { get; init; } = Array.Empty<CccsReferenceDto>();
[JsonPropertyName("cveIds")]
public IReadOnlyList<string> CveIds { get; init; } = Array.Empty<string>();
}
internal sealed record CccsReferenceDto(
[property: JsonPropertyName("url")] string Url,
[property: JsonPropertyName("label")] string? Label);

View File

@@ -0,0 +1,145 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
internal sealed record CccsCursor(
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings,
IReadOnlyDictionary<string, string> KnownEntryHashes,
DateTimeOffset? LastFetchAt)
{
private static readonly IReadOnlyCollection<Guid> EmptyGuidCollection = Array.Empty<Guid>();
private static readonly IReadOnlyDictionary<string, string> EmptyHashes = new Dictionary<string, string>(StringComparer.Ordinal);
public static CccsCursor Empty { get; } = new(EmptyGuidCollection, EmptyGuidCollection, EmptyHashes, null);
public CccsCursor WithPendingDocuments(IEnumerable<Guid> documents)
{
var distinct = (documents ?? Enumerable.Empty<Guid>()).Distinct().ToArray();
return this with { PendingDocuments = distinct };
}
public CccsCursor WithPendingMappings(IEnumerable<Guid> mappings)
{
var distinct = (mappings ?? Enumerable.Empty<Guid>()).Distinct().ToArray();
return this with { PendingMappings = distinct };
}
public CccsCursor WithKnownEntryHashes(IReadOnlyDictionary<string, string> hashes)
{
var map = hashes is null || hashes.Count == 0
? EmptyHashes
: new Dictionary<string, string>(hashes, StringComparer.Ordinal);
return this with { KnownEntryHashes = map };
}
public CccsCursor WithLastFetch(DateTimeOffset? timestamp)
=> this with { LastFetchAt = timestamp };
public BsonDocument ToBsonDocument()
{
var doc = new BsonDocument
{
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
};
if (KnownEntryHashes.Count > 0)
{
var hashes = new BsonArray();
foreach (var kvp in KnownEntryHashes)
{
hashes.Add(new BsonDocument
{
["uri"] = kvp.Key,
["hash"] = kvp.Value,
});
}
doc["knownEntryHashes"] = hashes;
}
if (LastFetchAt.HasValue)
{
doc["lastFetchAt"] = LastFetchAt.Value.UtcDateTime;
}
return doc;
}
public static CccsCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var pendingDocuments = ReadGuidArray(document, "pendingDocuments");
var pendingMappings = ReadGuidArray(document, "pendingMappings");
var hashes = ReadHashMap(document);
var lastFetch = document.TryGetValue("lastFetchAt", out var value)
? ParseDateTime(value)
: null;
return new CccsCursor(pendingDocuments, pendingMappings, hashes, lastFetch);
}
private static IReadOnlyCollection<Guid> ReadGuidArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
{
return EmptyGuidCollection;
}
var items = new List<Guid>(array.Count);
foreach (var element in array)
{
if (Guid.TryParse(element?.ToString(), out var guid))
{
items.Add(guid);
}
}
return items;
}
private static IReadOnlyDictionary<string, string> ReadHashMap(BsonDocument document)
{
if (!document.TryGetValue("knownEntryHashes", out var value) || value is not BsonArray array || array.Count == 0)
{
return EmptyHashes;
}
var map = new Dictionary<string, string>(array.Count, StringComparer.Ordinal);
foreach (var element in array)
{
if (element is not BsonDocument entry)
{
continue;
}
if (!entry.TryGetValue("uri", out var uriValue) || uriValue.IsBsonNull || string.IsNullOrWhiteSpace(uriValue.AsString))
{
continue;
}
var hash = entry.TryGetValue("hash", out var hashValue) && !hashValue.IsBsonNull
? hashValue.AsString
: string.Empty;
map[uriValue.AsString] = hash;
}
return map;
}
private static DateTimeOffset? ParseDateTime(BsonValue value)
=> value.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
}

View File

@@ -0,0 +1,58 @@
using System.Diagnostics.Metrics;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
public sealed class CccsDiagnostics : IDisposable
{
private const string MeterName = "StellaOps.Concelier.Connector.Cccs";
private const string MeterVersion = "1.0.0";
private readonly Meter _meter;
private readonly Counter<long> _fetchAttempts;
private readonly Counter<long> _fetchSuccess;
private readonly Counter<long> _fetchDocuments;
private readonly Counter<long> _fetchUnchanged;
private readonly Counter<long> _fetchFailures;
private readonly Counter<long> _parseSuccess;
private readonly Counter<long> _parseFailures;
private readonly Counter<long> _parseQuarantine;
private readonly Counter<long> _mapSuccess;
private readonly Counter<long> _mapFailures;
public CccsDiagnostics()
{
_meter = new Meter(MeterName, MeterVersion);
_fetchAttempts = _meter.CreateCounter<long>("cccs.fetch.attempts", unit: "operations");
_fetchSuccess = _meter.CreateCounter<long>("cccs.fetch.success", unit: "operations");
_fetchDocuments = _meter.CreateCounter<long>("cccs.fetch.documents", unit: "documents");
_fetchUnchanged = _meter.CreateCounter<long>("cccs.fetch.unchanged", unit: "documents");
_fetchFailures = _meter.CreateCounter<long>("cccs.fetch.failures", unit: "operations");
_parseSuccess = _meter.CreateCounter<long>("cccs.parse.success", unit: "documents");
_parseFailures = _meter.CreateCounter<long>("cccs.parse.failures", unit: "documents");
_parseQuarantine = _meter.CreateCounter<long>("cccs.parse.quarantine", unit: "documents");
_mapSuccess = _meter.CreateCounter<long>("cccs.map.success", unit: "advisories");
_mapFailures = _meter.CreateCounter<long>("cccs.map.failures", unit: "advisories");
}
public void FetchAttempt() => _fetchAttempts.Add(1);
public void FetchSuccess() => _fetchSuccess.Add(1);
public void FetchDocument() => _fetchDocuments.Add(1);
public void FetchUnchanged() => _fetchUnchanged.Add(1);
public void FetchFailure() => _fetchFailures.Add(1);
public void ParseSuccess() => _parseSuccess.Add(1);
public void ParseFailure() => _parseFailures.Add(1);
public void ParseQuarantine() => _parseQuarantine.Add(1);
public void MapSuccess() => _mapSuccess.Add(1);
public void MapFailure() => _mapFailures.Add(1);
public void Dispose() => _meter.Dispose();
}

View File

@@ -0,0 +1,146 @@
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.Concelier.Connector.Cccs.Configuration;
using StellaOps.Concelier.Connector.Common.Fetch;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
public sealed class CccsFeedClient
{
private static readonly string[] AcceptHeaders =
{
"application/json",
"application/vnd.api+json;q=0.9",
"text/json;q=0.8",
"application/*+json;q=0.7",
};
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private readonly SourceFetchService _fetchService;
private readonly ILogger<CccsFeedClient> _logger;
public CccsFeedClient(SourceFetchService fetchService, ILogger<CccsFeedClient> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
internal async Task<CccsFeedResult> FetchAsync(CccsFeedEndpoint endpoint, TimeSpan requestTimeout, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(endpoint);
if (endpoint.Uri is null)
{
throw new InvalidOperationException("Feed endpoint URI must be configured.");
}
var request = new SourceFetchRequest(CccsOptions.HttpClientName, CccsConnectorPlugin.SourceName, endpoint.Uri)
{
AcceptHeaders = AcceptHeaders,
TimeoutOverride = requestTimeout,
Metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["cccs.language"] = endpoint.Language,
["cccs.feedUri"] = endpoint.Uri.ToString(),
},
};
try
{
var result = await _fetchService.FetchContentAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Content is null)
{
_logger.LogWarning("CCCS feed fetch returned no content for {Uri} (status={Status})", endpoint.Uri, result.StatusCode);
return CccsFeedResult.Empty;
}
var feedResponse = Deserialize<CccsFeedResponse>(result.Content);
if (feedResponse is null || feedResponse.Error)
{
_logger.LogWarning("CCCS feed response flagged an error for {Uri}", endpoint.Uri);
return CccsFeedResult.Empty;
}
var taxonomy = await FetchTaxonomyAsync(endpoint, requestTimeout, cancellationToken).ConfigureAwait(false);
var items = (IReadOnlyList<CccsFeedItem>)feedResponse.Response ?? Array.Empty<CccsFeedItem>();
return new CccsFeedResult(items, taxonomy, result.LastModified);
}
catch (Exception ex) when (ex is JsonException or InvalidOperationException)
{
_logger.LogError(ex, "CCCS feed deserialization failed for {Uri}", endpoint.Uri);
throw;
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
{
_logger.LogWarning(ex, "CCCS feed fetch failed for {Uri}", endpoint.Uri);
throw;
}
}
private async Task<IReadOnlyDictionary<int, string>> FetchTaxonomyAsync(CccsFeedEndpoint endpoint, TimeSpan timeout, CancellationToken cancellationToken)
{
var taxonomyUri = endpoint.BuildTaxonomyUri();
var request = new SourceFetchRequest(CccsOptions.HttpClientName, CccsConnectorPlugin.SourceName, taxonomyUri)
{
AcceptHeaders = AcceptHeaders,
TimeoutOverride = timeout,
Metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["cccs.language"] = endpoint.Language,
["cccs.taxonomyUri"] = taxonomyUri.ToString(),
},
};
try
{
var result = await _fetchService.FetchContentAsync(request, cancellationToken).ConfigureAwait(false);
if (!result.IsSuccess || result.Content is null)
{
_logger.LogDebug("CCCS taxonomy fetch returned no content for {Uri}", taxonomyUri);
return new Dictionary<int, string>(0);
}
var taxonomyResponse = Deserialize<CccsTaxonomyResponse>(result.Content);
if (taxonomyResponse is null || taxonomyResponse.Error)
{
_logger.LogDebug("CCCS taxonomy response indicated error for {Uri}", taxonomyUri);
return new Dictionary<int, string>(0);
}
var map = new Dictionary<int, string>(taxonomyResponse.Response.Count);
foreach (var item in taxonomyResponse.Response)
{
if (!string.IsNullOrWhiteSpace(item.Title))
{
map[item.Id] = item.Title!;
}
}
return map;
}
catch (Exception ex) when (ex is JsonException or InvalidOperationException)
{
_logger.LogWarning(ex, "Failed to deserialize CCCS taxonomy for {Uri}", taxonomyUri);
return new Dictionary<int, string>(0);
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
{
_logger.LogWarning(ex, "CCCS taxonomy fetch failed for {Uri}", taxonomyUri);
return new Dictionary<int, string>(0);
}
}
private static T? Deserialize<T>(byte[] content)
=> JsonSerializer.Deserialize<T>(content, SerializerOptions);
}

View File

@@ -0,0 +1,101 @@
using System;
using System.Collections.Generic;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
internal sealed class CccsFeedResponse
{
[JsonPropertyName("ERROR")]
public bool Error { get; init; }
[JsonPropertyName("response")]
public List<CccsFeedItem> Response { get; init; } = new();
}
internal sealed class CccsFeedItem
{
[JsonPropertyName("nid")]
public int Nid { get; init; }
[JsonPropertyName("title")]
public string? Title { get; init; }
[JsonPropertyName("uuid")]
public string? Uuid { get; init; }
[JsonPropertyName("banner")]
public string? Banner { get; init; }
[JsonPropertyName("lang")]
public string? Language { get; init; }
[JsonPropertyName("date_modified")]
public string? DateModified { get; init; }
[JsonPropertyName("date_modified_ts")]
public string? DateModifiedTimestamp { get; init; }
[JsonPropertyName("date_created")]
public string? DateCreated { get; init; }
[JsonPropertyName("summary")]
public string? Summary { get; init; }
[JsonPropertyName("body")]
public string[] Body { get; init; } = Array.Empty<string>();
[JsonPropertyName("url")]
public string? Url { get; init; }
[JsonPropertyName("alert_type")]
public JsonElement AlertType { get; init; }
[JsonPropertyName("serial_number")]
public string? SerialNumber { get; init; }
[JsonPropertyName("subject")]
public string? Subject { get; init; }
[JsonPropertyName("moderation_state")]
public string? ModerationState { get; init; }
[JsonPropertyName("external_url")]
public string? ExternalUrl { get; init; }
}
internal sealed class CccsTaxonomyResponse
{
[JsonPropertyName("ERROR")]
public bool Error { get; init; }
[JsonPropertyName("response")]
public List<CccsTaxonomyItem> Response { get; init; } = new();
}
internal sealed class CccsTaxonomyItem
{
[JsonPropertyName("id")]
public int Id { get; init; }
[JsonPropertyName("title")]
public string? Title { get; init; }
}
internal sealed record CccsFeedResult(
IReadOnlyList<CccsFeedItem> Items,
IReadOnlyDictionary<int, string> AlertTypes,
DateTimeOffset? LastModifiedUtc)
{
public static CccsFeedResult Empty { get; } = new(
Array.Empty<CccsFeedItem>(),
new Dictionary<int, string>(0),
null);
}
internal static class CccsFeedResultExtensions
{
public static CccsFeedResult ToResult(this IReadOnlyList<CccsFeedItem> items, DateTimeOffset? lastModified, IReadOnlyDictionary<int, string> alertTypes)
=> new(items, alertTypes, lastModified);
}

View File

@@ -0,0 +1,449 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser;
using StellaOps.Concelier.Connector.Common.Html;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
public sealed class CccsHtmlParser
{
private static readonly Regex SerialRegex = new(@"(?:(Number|Num[eé]ro)\s*[:]\s*)(?<id>[A-Z0-9\-\/]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex DateRegex = new(@"(?:(Date|Date de publication)\s*[:]\s*)(?<date>[A-Za-zÀ-ÿ0-9,\.\s\-]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CveRegex = new(@"CVE-\d{4}-\d{4,}", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CollapseWhitespaceRegex = new(@"\s+", RegexOptions.Compiled);
private static readonly CultureInfo[] EnglishCultures =
{
CultureInfo.GetCultureInfo("en-CA"),
CultureInfo.GetCultureInfo("en-US"),
CultureInfo.InvariantCulture,
};
private static readonly CultureInfo[] FrenchCultures =
{
CultureInfo.GetCultureInfo("fr-CA"),
CultureInfo.GetCultureInfo("fr-FR"),
CultureInfo.InvariantCulture,
};
private static readonly string[] ProductHeadingKeywords =
{
"affected",
"produit",
"produits",
"produits touch",
"produits concern",
"mesures recommand",
};
private static readonly string[] TrackingParameterPrefixes =
{
"utm_",
"mc_",
"mkt_",
"elq",
};
private readonly HtmlContentSanitizer _sanitizer;
private readonly HtmlParser _parser;
public CccsHtmlParser(HtmlContentSanitizer sanitizer)
{
_sanitizer = sanitizer ?? throw new ArgumentNullException(nameof(sanitizer));
_parser = new HtmlParser(new HtmlParserOptions
{
IsScripting = false,
IsKeepingSourceReferences = false,
});
}
internal CccsAdvisoryDto Parse(CccsRawAdvisoryDocument raw)
{
ArgumentNullException.ThrowIfNull(raw);
var baseUri = TryCreateUri(raw.CanonicalUrl);
var document = _parser.ParseDocument(raw.BodyHtml ?? string.Empty);
var body = document.Body ?? document.DocumentElement;
var sanitized = _sanitizer.Sanitize(body?.InnerHtml ?? raw.BodyHtml ?? string.Empty, baseUri);
var contentRoot = body ?? document.DocumentElement;
var serialNumber = !string.IsNullOrWhiteSpace(raw.SerialNumber)
? raw.SerialNumber!.Trim()
: ExtractSerialNumber(document) ?? raw.SourceId;
var published = raw.Published ?? ExtractDate(document, raw.Language) ?? raw.Modified;
var references = ExtractReferences(contentRoot, baseUri, raw.Language);
var products = ExtractProducts(contentRoot);
var cveIds = ExtractCveIds(document);
return new CccsAdvisoryDto
{
SourceId = raw.SourceId,
SerialNumber = serialNumber,
Language = raw.Language,
Title = raw.Title,
Summary = CollapseWhitespace(raw.Summary),
CanonicalUrl = raw.CanonicalUrl,
ContentHtml = sanitized,
Published = published,
Modified = raw.Modified ?? published,
AlertType = raw.AlertType,
Subject = raw.Subject,
Products = products,
References = references,
CveIds = cveIds,
};
}
private static Uri? TryCreateUri(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
return Uri.TryCreate(value, UriKind.Absolute, out var absolute) ? absolute : null;
}
private static string? ExtractSerialNumber(IDocument document)
{
if (document.Body is null)
{
return null;
}
foreach (var element in document.QuerySelectorAll("strong, p, div"))
{
var text = element.TextContent;
if (string.IsNullOrWhiteSpace(text))
{
continue;
}
var match = SerialRegex.Match(text);
if (match.Success && match.Groups["id"].Success)
{
var value = match.Groups["id"].Value.Trim();
if (!string.IsNullOrWhiteSpace(value))
{
return value;
}
}
}
var bodyText = document.Body.TextContent;
var fallback = SerialRegex.Match(bodyText ?? string.Empty);
return fallback.Success && fallback.Groups["id"].Success
? fallback.Groups["id"].Value.Trim()
: null;
}
private static DateTimeOffset? ExtractDate(IDocument document, string language)
{
if (document.Body is null)
{
return null;
}
var textSegments = new List<string>();
foreach (var element in document.QuerySelectorAll("strong, p, div"))
{
var text = element.TextContent;
if (string.IsNullOrWhiteSpace(text))
{
continue;
}
var match = DateRegex.Match(text);
if (match.Success && match.Groups["date"].Success)
{
textSegments.Add(match.Groups["date"].Value.Trim());
}
}
if (textSegments.Count == 0 && !string.IsNullOrWhiteSpace(document.Body.TextContent))
{
textSegments.Add(document.Body.TextContent);
}
var cultures = language.StartsWith("fr", StringComparison.OrdinalIgnoreCase) ? FrenchCultures : EnglishCultures;
foreach (var segment in textSegments)
{
foreach (var culture in cultures)
{
if (DateTime.TryParse(segment, culture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var parsed))
{
return new DateTimeOffset(parsed.ToUniversalTime());
}
}
}
return null;
}
private static IReadOnlyList<string> ExtractProducts(IElement? root)
{
if (root is null)
{
return Array.Empty<string>();
}
var results = new List<string>();
foreach (var heading in root.QuerySelectorAll("h1,h2,h3,h4,h5,h6"))
{
var text = heading.TextContent?.Trim();
if (!IsProductHeading(text))
{
continue;
}
var sibling = heading.NextElementSibling;
while (sibling is not null)
{
if (IsHeading(sibling))
{
break;
}
if (IsListElement(sibling))
{
AppendListItems(sibling, results);
if (results.Count > 0)
{
break;
}
}
else if (IsContentContainer(sibling))
{
foreach (var list in sibling.QuerySelectorAll("ul,ol"))
{
AppendListItems(list, results);
}
if (results.Count > 0)
{
break;
}
}
sibling = sibling.NextElementSibling;
}
if (results.Count > 0)
{
break;
}
}
if (results.Count == 0)
{
foreach (var li in root.QuerySelectorAll("ul li,ol li"))
{
var itemText = CollapseWhitespace(li.TextContent);
if (!string.IsNullOrWhiteSpace(itemText))
{
results.Add(itemText);
}
}
}
return results.Count == 0
? Array.Empty<string>()
: results
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static bool IsProductHeading(string? heading)
{
if (string.IsNullOrWhiteSpace(heading))
{
return false;
}
var lowered = heading.ToLowerInvariant();
return ProductHeadingKeywords.Any(keyword => lowered.Contains(keyword, StringComparison.OrdinalIgnoreCase));
}
private static bool IsHeading(IElement element)
=> element.LocalName.Length == 2
&& element.LocalName[0] == 'h'
&& char.IsDigit(element.LocalName[1]);
private static bool IsListElement(IElement element)
=> string.Equals(element.LocalName, "ul", StringComparison.OrdinalIgnoreCase)
|| string.Equals(element.LocalName, "ol", StringComparison.OrdinalIgnoreCase);
private static bool IsContentContainer(IElement element)
=> string.Equals(element.LocalName, "div", StringComparison.OrdinalIgnoreCase)
|| string.Equals(element.LocalName, "section", StringComparison.OrdinalIgnoreCase)
|| string.Equals(element.LocalName, "article", StringComparison.OrdinalIgnoreCase);
private static void AppendListItems(IElement listElement, ICollection<string> buffer)
{
foreach (var li in listElement.QuerySelectorAll("li"))
{
if (li is null)
{
continue;
}
var clone = li.Clone(true) as IElement;
if (clone is null)
{
continue;
}
foreach (var nested in clone.QuerySelectorAll("ul,ol"))
{
nested.Remove();
}
var itemText = CollapseWhitespace(clone.TextContent);
if (!string.IsNullOrWhiteSpace(itemText))
{
buffer.Add(itemText);
}
}
}
private static IReadOnlyList<CccsReferenceDto> ExtractReferences(IElement? root, Uri? baseUri, string language)
{
if (root is null)
{
return Array.Empty<CccsReferenceDto>();
}
var references = new List<CccsReferenceDto>();
foreach (var anchor in root.QuerySelectorAll("a[href]"))
{
var href = anchor.GetAttribute("href");
var normalized = NormalizeReferenceUrl(href, baseUri, language);
if (normalized is null)
{
continue;
}
var label = CollapseWhitespace(anchor.TextContent);
references.Add(new CccsReferenceDto(normalized, string.IsNullOrWhiteSpace(label) ? null : label));
}
return references.Count == 0
? Array.Empty<CccsReferenceDto>()
: references
.GroupBy(reference => reference.Url, StringComparer.Ordinal)
.Select(group => group.First())
.OrderBy(reference => reference.Url, StringComparer.Ordinal)
.ToArray();
}
private static string? NormalizeReferenceUrl(string? href, Uri? baseUri, string language)
{
if (string.IsNullOrWhiteSpace(href))
{
return null;
}
if (!Uri.TryCreate(href, UriKind.Absolute, out var absolute))
{
if (baseUri is null || !Uri.TryCreate(baseUri, href, out absolute))
{
return null;
}
}
var builder = new UriBuilder(absolute)
{
Fragment = string.Empty,
};
var filteredQuery = FilterTrackingParameters(builder.Query, builder.Uri, language);
builder.Query = filteredQuery;
return builder.Uri.ToString();
}
private static string FilterTrackingParameters(string query, Uri uri, string language)
{
if (string.IsNullOrWhiteSpace(query))
{
return string.Empty;
}
var trimmed = query.TrimStart('?');
if (string.IsNullOrWhiteSpace(trimmed))
{
return string.Empty;
}
var parameters = trimmed.Split('&', StringSplitOptions.RemoveEmptyEntries);
var kept = new List<string>();
foreach (var parameter in parameters)
{
var separatorIndex = parameter.IndexOf('=');
var key = separatorIndex >= 0 ? parameter[..separatorIndex] : parameter;
if (TrackingParameterPrefixes.Any(prefix => key.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)))
{
continue;
}
if (uri.Host.Contains("cyber.gc.ca", StringComparison.OrdinalIgnoreCase)
&& key.Equals("lang", StringComparison.OrdinalIgnoreCase))
{
kept.Add($"lang={language}");
continue;
}
kept.Add(parameter);
}
if (uri.Host.Contains("cyber.gc.ca", StringComparison.OrdinalIgnoreCase)
&& kept.All(parameter => !parameter.StartsWith("lang=", StringComparison.OrdinalIgnoreCase)))
{
kept.Add($"lang={language}");
}
return kept.Count == 0 ? string.Empty : string.Join("&", kept);
}
private static IReadOnlyList<string> ExtractCveIds(IDocument document)
{
if (document.Body is null)
{
return Array.Empty<string>();
}
var matches = CveRegex.Matches(document.Body.TextContent ?? string.Empty);
if (matches.Count == 0)
{
return Array.Empty<string>();
}
return matches
.Select(match => match.Value.ToUpperInvariant())
.Distinct(StringComparer.Ordinal)
.OrderBy(value => value, StringComparer.Ordinal)
.ToArray();
}
private static string? CollapseWhitespace(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var collapsed = CollapseWhitespaceRegex.Replace(value, " ").Trim();
return collapsed.Length == 0 ? null : collapsed;
}
}

View File

@@ -0,0 +1,151 @@
using System;
using System.Collections.Generic;
using System.Linq;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Mongo.Documents;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
internal static class CccsMapper
{
public static Advisory Map(CccsAdvisoryDto dto, DocumentRecord document, DateTimeOffset recordedAt)
{
ArgumentNullException.ThrowIfNull(dto);
ArgumentNullException.ThrowIfNull(document);
var aliases = BuildAliases(dto);
var references = BuildReferences(dto, recordedAt);
var packages = BuildPackages(dto, recordedAt);
var provenance = new[]
{
new AdvisoryProvenance(
CccsConnectorPlugin.SourceName,
"advisory",
dto.AlertType ?? dto.SerialNumber,
recordedAt,
new[] { ProvenanceFieldMasks.Advisory })
};
return new Advisory(
advisoryKey: dto.SerialNumber,
title: dto.Title,
summary: dto.Summary,
language: dto.Language,
published: dto.Published ?? dto.Modified,
modified: dto.Modified ?? dto.Published,
severity: null,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: packages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: provenance);
}
private static IReadOnlyList<string> BuildAliases(CccsAdvisoryDto dto)
{
var aliases = new List<string>(capacity: 4)
{
dto.SerialNumber,
};
if (!string.IsNullOrWhiteSpace(dto.SourceId)
&& !string.Equals(dto.SourceId, dto.SerialNumber, StringComparison.OrdinalIgnoreCase))
{
aliases.Add(dto.SourceId);
}
foreach (var cve in dto.CveIds)
{
if (!string.IsNullOrWhiteSpace(cve))
{
aliases.Add(cve);
}
}
return aliases
.Where(static alias => !string.IsNullOrWhiteSpace(alias))
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static alias => alias, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static IReadOnlyList<AdvisoryReference> BuildReferences(CccsAdvisoryDto dto, DateTimeOffset recordedAt)
{
var references = new List<AdvisoryReference>
{
new(dto.CanonicalUrl, "details", "cccs", null, new AdvisoryProvenance(
CccsConnectorPlugin.SourceName,
"reference",
dto.CanonicalUrl,
recordedAt,
new[] { ProvenanceFieldMasks.References }))
};
foreach (var reference in dto.References)
{
if (string.IsNullOrWhiteSpace(reference.Url))
{
continue;
}
references.Add(new AdvisoryReference(
reference.Url,
"reference",
"cccs",
reference.Label,
new AdvisoryProvenance(
CccsConnectorPlugin.SourceName,
"reference",
reference.Url,
recordedAt,
new[] { ProvenanceFieldMasks.References })));
}
return references
.DistinctBy(static reference => reference.Url, StringComparer.Ordinal)
.OrderBy(static reference => reference.Url, StringComparer.Ordinal)
.ToArray();
}
private static IReadOnlyList<AffectedPackage> BuildPackages(CccsAdvisoryDto dto, DateTimeOffset recordedAt)
{
if (dto.Products.Count == 0)
{
return Array.Empty<AffectedPackage>();
}
var packages = new List<AffectedPackage>(dto.Products.Count);
foreach (var product in dto.Products)
{
if (string.IsNullOrWhiteSpace(product))
{
continue;
}
var identifier = product.Trim();
var provenance = new AdvisoryProvenance(
CccsConnectorPlugin.SourceName,
"package",
identifier,
recordedAt,
new[] { ProvenanceFieldMasks.AffectedPackages });
packages.Add(new AffectedPackage(
AffectedPackageTypes.Vendor,
identifier,
platform: null,
versionRanges: Array.Empty<AffectedVersionRange>(),
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: new[] { provenance },
normalizedVersions: Array.Empty<NormalizedVersionRule>()));
}
return packages.Count == 0
? Array.Empty<AffectedPackage>()
: packages
.DistinctBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
.OrderBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
}

View File

@@ -0,0 +1,58 @@
using System;
using System.Text.Json.Serialization;
namespace StellaOps.Concelier.Connector.Cccs.Internal;
internal sealed record CccsRawAdvisoryDocument
{
[JsonPropertyName("sourceId")]
public string SourceId { get; init; } = string.Empty;
[JsonPropertyName("serialNumber")]
public string? SerialNumber { get; init; }
[JsonPropertyName("uuid")]
public string? Uuid { get; init; }
[JsonPropertyName("language")]
public string Language { get; init; } = "en";
[JsonPropertyName("title")]
public string Title { get; init; } = string.Empty;
[JsonPropertyName("summary")]
public string? Summary { get; init; }
[JsonPropertyName("canonicalUrl")]
public string CanonicalUrl { get; init; } = string.Empty;
[JsonPropertyName("externalUrl")]
public string? ExternalUrl { get; init; }
[JsonPropertyName("bodyHtml")]
public string BodyHtml { get; init; } = string.Empty;
[JsonPropertyName("bodySegments")]
public string[] BodySegments { get; init; } = Array.Empty<string>();
[JsonPropertyName("alertType")]
public string? AlertType { get; init; }
[JsonPropertyName("subject")]
public string? Subject { get; init; }
[JsonPropertyName("banner")]
public string? Banner { get; init; }
[JsonPropertyName("published")]
public DateTimeOffset? Published { get; init; }
[JsonPropertyName("modified")]
public DateTimeOffset? Modified { get; init; }
[JsonPropertyName("rawCreated")]
public string? RawDateCreated { get; init; }
[JsonPropertyName("rawModified")]
public string? RawDateModified { get; init; }
}

View File

@@ -0,0 +1,22 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Concelier.Core.Jobs;
namespace StellaOps.Concelier.Connector.Cccs;
internal static class CccsJobKinds
{
public const string Fetch = "source:cccs:fetch";
}
internal sealed class CccsFetchJob : IJob
{
private readonly CccsConnector _connector;
public CccsFetchJob(CccsConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,3 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("StellaOps.Concelier.Connector.Cccs.Tests")]

View File

@@ -0,0 +1,17 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Storage.Mongo/StellaOps.Concelier.Storage.Mongo.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,12 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|FEEDCONN-CCCS-02-001 Catalogue official CCCS advisory feeds|BE-Conn-CCCS|Research|**DONE (2025-10-11)** Resolved RSS→Atom redirects (`/api/cccs/rss/v1/get?...``/api/cccs/atom/v1/get?...`), confirmed feed caps at 50 entries with inline HTML bodies, no `Last-Modified`/`ETag`, and `updated` timestamps in UTC. Findings and packet captures parked in `docs/concelier-connector-research-20251011.md`; retention sweep follow-up tracked in 02-007.|
|FEEDCONN-CCCS-02-002 Implement fetch & source state handling|BE-Conn-CCCS|Source.Common, Storage.Mongo|**DONE (2025-10-14)** `CccsConnector.FetchAsync` now hydrates feeds via `CccsFeedClient`, persists per-entry JSON payloads with SHA256 dedupe and cursor state, throttles requests, and records taxonomy + language metadata in document state.|
|FEEDCONN-CCCS-02-003 DTO/parser implementation|BE-Conn-CCCS|Source.Common|**DONE (2025-10-14)** Added `CccsHtmlParser` to sanitize Atom body HTML, extract serial/date/product bullets, collapse whitespace, and emit normalized reference URLs; `ParseAsync` now persists DTO records under schema `cccs.dto.v1`.|
|FEEDCONN-CCCS-02-004 Canonical mapping & range primitives|BE-Conn-CCCS|Models|**DONE (2025-10-14)** `CccsMapper` now materializes canonical advisories (aliases from serial/source/CVEs, references incl. canonical URL, vendor package records) with provenance masks; `MapAsync` stores results in `AdvisoryStore`.|
|FEEDCONN-CCCS-02-005 Deterministic fixtures & tests|QA|Testing|**DONE (2025-10-14)** Added English/French fixtures plus parser + connector end-to-end tests (`StellaOps.Concelier.Connector.Cccs.Tests`). Canned HTTP handler + Mongo fixture enables fetch→parse→map regression; fixtures refresh via `UPDATE_CCCS_FIXTURES=1`.|
|FEEDCONN-CCCS-02-006 Observability & documentation|DevEx|Docs|**DONE (2025-10-15)** Added `CccsDiagnostics` meter (fetch/parse/map counters), enriched connector logs with document counts, and published `docs/ops/concelier-cccs-operations.md` covering config, telemetry, and sanitiser guidance.|
|FEEDCONN-CCCS-02-007 Historical advisory harvesting plan|BE-Conn-CCCS|Research|**DONE (2025-10-15)** Measured `/api/cccs/threats/v1/get` inventory (~5.1k rows/lang; earliest 2018-06-08), documented backfill workflow + language split strategy, and linked the runbook for Offline Kit execution.|
|FEEDCONN-CCCS-02-008 Raw DOM parsing refinement|BE-Conn-CCCS|Source.Common|**DONE (2025-10-15)** Parser now walks unsanitised DOM (heading + nested list coverage), sanitizer keeps `<h#>`/`section` nodes, and regression fixtures/tests assert EN/FR list handling + preserved HTML structure.|
|FEEDCONN-CCCS-02-009 Normalized versions rollout (Oct 2025)|BE-Conn-CCCS|Merge coordination (`FEEDMERGE-COORD-02-900`)|**TODO (due 2025-10-21)** Implement trailing-version split helper per Merge guidance (see `../Merge/RANGE_PRIMITIVES_COORDINATION.md` “Helper snippets”) to emit `NormalizedVersions` via `SemVerRangeRuleBuilder`; refresh mapper tests/fixtures to assert provenance notes (`cccs:{serial}:{index}`) and confirm merge counters drop.|

View File

@@ -0,0 +1,40 @@
# AGENTS
## Role
Deliver a connector for Germanys CERT-Bund advisories so Concelier can ingest, normalise, and enrich BSI alerts alongside other national feeds.
## Scope
- Identify the authoritative CERT-Bund advisory feed(s) (RSS/Atom, JSON, CSV, or HTML).
- Implement fetch/cursor logic with proper windowing, dedupe, and failure backoff.
- Parse advisory detail pages for summary, affected products/vendors, mitigation, and references.
- Map advisories into canonical `Advisory` objects including aliases, references, affected packages, and provenance/range primitives.
- Provide deterministic fixtures and regression tests.
## Participants
- `Source.Common` (HTTP/fetch utilities, DTO storage).
- `Storage.Mongo` (raw/document/DTO/advisory stores, source state).
- `Concelier.Models` (canonical data model).
- `Concelier.Testing` (integration harness, snapshot utilities).
## Interfaces & Contracts
- Job kinds: `certbund:fetch`, `certbund:parse`, `certbund:map`.
- Persist upstream metadata (ETag/Last-Modified) if provided.
- Alias set should include CERT-Bund ID and referenced CVE entries.
## In/Out of scope
In scope:
- End-to-end connector implementation with deterministic tests and range primitive coverage.
- Baseline logging/metrics for pipeline observability.
Out of scope:
- Non-advisory CERT-Bund digests or newsletters.
- Downstream exporter changes.
## Observability & Security Expectations
- Log fetch attempts, item counts, and mapping metrics.
- Sanitize HTML thoroughly before persistence.
- Handle transient failures gracefully with exponential backoff and failure records in source state.
## Tests
- Add `StellaOps.Concelier.Connector.CertBund.Tests` covering fetch/parse/map with canned fixtures.
- Snapshot canonical advisories; support regeneration via environment flag.
- Ensure deterministic ordering, casing, and timestamps.

View File

@@ -0,0 +1,435 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.CertBund.Configuration;
using StellaOps.Concelier.Connector.CertBund.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Common.Html;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertBund;
public sealed class CertBundConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertBundFeedClient _feedClient;
private readonly CertBundDetailParser _detailParser;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertBundOptions _options;
private readonly TimeProvider _timeProvider;
private readonly CertBundDiagnostics _diagnostics;
private readonly ILogger<CertBundConnector> _logger;
public CertBundConnector(
CertBundFeedClient feedClient,
CertBundDetailParser detailParser,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertBundOptions> options,
CertBundDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<CertBundConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_detailParser = detailParser ?? throw new ArgumentNullException(nameof(detailParser));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertBundConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
IReadOnlyList<CertBundFeedItem> feedItems;
_diagnostics.FeedFetchAttempt();
try
{
feedItems = await _feedClient.LoadAsync(cancellationToken).ConfigureAwait(false);
_diagnostics.FeedFetchSuccess(feedItems.Count);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-Bund feed fetch failed");
_diagnostics.FeedFetchFailure();
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var coverageDays = CalculateCoverageDays(feedItems, now);
_diagnostics.RecordFeedCoverage(coverageDays);
if (feedItems.Count == 0)
{
await UpdateCursorAsync(cursor.WithLastFetch(now), cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var knownAdvisories = new HashSet<string>(cursor.KnownAdvisories, StringComparer.OrdinalIgnoreCase);
var processed = 0;
var alreadyKnown = 0;
var notModified = 0;
var detailFailures = 0;
var truncated = false;
var latestPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
foreach (var item in feedItems.OrderByDescending(static i => i.Published))
{
cancellationToken.ThrowIfCancellationRequested();
if (knownAdvisories.Contains(item.AdvisoryId))
{
alreadyKnown++;
continue;
}
if (processed >= _options.MaxAdvisoriesPerFetch)
{
truncated = true;
break;
}
try
{
_diagnostics.DetailFetchAttempt();
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, item.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(CertBundOptions.HttpClientName, SourceName, item.DetailUri)
{
AcceptHeaders = new[] { "application/json", "text/json" },
Metadata = CertBundDocumentMetadata.CreateMetadata(item),
ETag = existing?.Etag,
LastModified = existing?.LastModified,
TimeoutOverride = _options.RequestTimeout,
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.DetailFetchNotModified();
notModified++;
knownAdvisories.Add(item.AdvisoryId);
continue;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.DetailFetchFailure("skipped");
detailFailures++;
continue;
}
_diagnostics.DetailFetchSuccess();
pendingDocuments.Add(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
knownAdvisories.Add(item.AdvisoryId);
processed++;
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-Bund detail fetch failed for {AdvisoryId}", item.AdvisoryId);
_diagnostics.DetailFetchFailure("exception");
detailFailures++;
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (item.Published > latestPublished)
{
latestPublished = item.Published;
}
}
_diagnostics.DetailFetchEnqueued(processed);
if (feedItems.Count > 0 || processed > 0 || detailFailures > 0)
{
_logger.LogInformation(
"CERT-Bund fetch cycle: feed items {FeedItems}, enqueued {Enqueued}, already known {Known}, not modified {NotModified}, detail failures {DetailFailures}, pending documents {PendingDocuments}, pending mappings {PendingMappings}, truncated {Truncated}, coverageDays={CoverageDays}",
feedItems.Count,
processed,
alreadyKnown,
notModified,
detailFailures,
pendingDocuments.Count,
pendingMappings.Count,
truncated,
coverageDays ?? double.NaN);
}
var trimmedKnown = knownAdvisories.Count > _options.MaxKnownAdvisories
? knownAdvisories.OrderByDescending(id => id, StringComparer.OrdinalIgnoreCase)
.Take(_options.MaxKnownAdvisories)
.ToArray()
: knownAdvisories.ToArray();
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithKnownAdvisories(trimmedKnown)
.WithLastPublished(latestPublished == DateTimeOffset.MinValue ? cursor.LastPublished : latestPublished)
.WithLastFetch(now);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var now = _timeProvider.GetUtcNow();
var parsedCount = 0;
var failedCount = 0;
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
_diagnostics.ParseFailure("missing_payload");
failedCount++;
continue;
}
byte[] payload;
try
{
payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-Bund unable to download document {DocumentId}", document.Id);
_diagnostics.ParseFailure("download_failed");
throw;
}
CertBundAdvisoryDto dto;
try
{
dto = _detailParser.Parse(new Uri(document.Uri), new Uri(document.Metadata?["certbund.portalUri"] ?? document.Uri), payload);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-Bund failed to parse advisory detail {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
_diagnostics.ParseFailure("parse_error");
failedCount++;
continue;
}
_diagnostics.ParseSuccess(dto.Products.Count, dto.CveIds.Count);
parsedCount++;
var bson = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "cert-bund.detail.v1", bson, now);
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
pendingMappings.Add(document.Id);
}
if (cursor.PendingDocuments.Count > 0)
{
_logger.LogInformation(
"CERT-Bund parse cycle: parsed {Parsed}, failures {Failures}, remaining documents {RemainingDocuments}, pending mappings {PendingMappings}",
parsedCount,
failedCount,
remainingDocuments.Count,
pendingMappings.Count);
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
var mappedCount = 0;
var failedCount = 0;
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null)
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapFailure("missing_dto");
failedCount++;
continue;
}
CertBundAdvisoryDto? dto;
try
{
dto = JsonSerializer.Deserialize<CertBundAdvisoryDto>(dtoRecord.Payload.ToJson(), SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-Bund failed to deserialize DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapFailure("deserialize_failed");
failedCount++;
continue;
}
if (dto is null)
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapFailure("null_dto");
failedCount++;
continue;
}
try
{
var advisory = CertBundMapper.Map(dto, document, dtoRecord.ValidatedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapSuccess(advisory.AffectedPackages.Length, advisory.Aliases.Length);
mappedCount++;
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-Bund mapping failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
_diagnostics.MapFailure("exception");
failedCount++;
}
}
if (cursor.PendingMappings.Count > 0)
{
_logger.LogInformation(
"CERT-Bund map cycle: mapped {Mapped}, failures {Failures}, remaining pending mappings {PendingMappings}",
mappedCount,
failedCount,
pendingMappings.Count);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private static double? CalculateCoverageDays(IReadOnlyList<CertBundFeedItem> items, DateTimeOffset fetchedAt)
{
if (items is null || items.Count == 0)
{
return null;
}
var oldest = items.Min(static item => item.Published);
if (oldest == DateTimeOffset.MinValue)
{
return null;
}
var span = fetchedAt - oldest;
return span >= TimeSpan.Zero ? span.TotalDays : null;
}
private async Task<CertBundCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? CertBundCursor.Empty : CertBundCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(CertBundCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
var completedAt = cursor.LastFetchAt ?? _timeProvider.GetUtcNow();
return _stateRepository.UpdateCursorAsync(SourceName, document, completedAt, cancellationToken);
}
}

View File

@@ -0,0 +1,21 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertBund;
public sealed class CertBundConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "cert-bund";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services)
=> services.GetService<CertBundConnector>() is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return services.GetRequiredService<CertBundConnector>();
}
}

View File

@@ -0,0 +1,50 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.CertBund.Configuration;
namespace StellaOps.Concelier.Connector.CertBund;
public sealed class CertBundDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:cert-bund";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddCertBundConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<CertBundFetchJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, CertBundJobKinds.Fetch, typeof(CertBundFetchJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,48 @@
using System;
using System.Net;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertBund.Configuration;
using StellaOps.Concelier.Connector.CertBund.Internal;
using StellaOps.Concelier.Connector.Common.Html;
using StellaOps.Concelier.Connector.Common.Http;
namespace StellaOps.Concelier.Connector.CertBund;
public static class CertBundServiceCollectionExtensions
{
public static IServiceCollection AddCertBundConnector(this IServiceCollection services, Action<CertBundOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<CertBundOptions>()
.Configure(configure)
.PostConfigure(static options => options.Validate());
services.AddSourceHttpClient(CertBundOptions.HttpClientName, static (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<CertBundOptions>>().Value;
clientOptions.Timeout = options.RequestTimeout;
clientOptions.UserAgent = "StellaOps.Concelier.CertBund/1.0";
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.FeedUri.Host);
clientOptions.AllowedHosts.Add(options.DetailApiUri.Host);
clientOptions.AllowedHosts.Add(options.PortalBootstrapUri.Host);
clientOptions.ConfigureHandler = handler =>
{
handler.AutomaticDecompression = DecompressionMethods.All;
handler.UseCookies = true;
handler.CookieContainer = new System.Net.CookieContainer();
};
});
services.TryAddSingleton<HtmlContentSanitizer>();
services.TryAddSingleton<CertBundDiagnostics>();
services.TryAddSingleton<CertBundFeedClient>();
services.TryAddSingleton<CertBundDetailParser>();
services.AddTransient<CertBundConnector>();
return services;
}
}

View File

@@ -0,0 +1,104 @@
using System.Net;
namespace StellaOps.Concelier.Connector.CertBund.Configuration;
public sealed class CertBundOptions
{
public const string HttpClientName = "concelier.source.certbund";
/// <summary>
/// RSS feed providing the latest CERT-Bund advisories.
/// </summary>
public Uri FeedUri { get; set; } = new("https://wid.cert-bund.de/content/public/securityAdvisory/rss");
/// <summary>
/// Portal endpoint used to bootstrap session cookies (required for the SPA JSON API).
/// </summary>
public Uri PortalBootstrapUri { get; set; } = new("https://wid.cert-bund.de/portal/");
/// <summary>
/// Detail API endpoint template; advisory identifier is appended as the <c>name</c> query parameter.
/// </summary>
public Uri DetailApiUri { get; set; } = new("https://wid.cert-bund.de/portal/api/securityadvisory");
/// <summary>
/// Optional timeout override for feed/detail requests.
/// </summary>
public TimeSpan RequestTimeout { get; set; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Delay applied between successive detail fetches to respect upstream politeness.
/// </summary>
public TimeSpan RequestDelay { get; set; } = TimeSpan.FromMilliseconds(250);
/// <summary>
/// Backoff recorded in source state when a fetch attempt fails.
/// </summary>
public TimeSpan FailureBackoff { get; set; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Maximum number of advisories to enqueue per fetch iteration.
/// </summary>
public int MaxAdvisoriesPerFetch { get; set; } = 50;
/// <summary>
/// Maximum number of advisory identifiers remembered to prevent re-processing.
/// </summary>
public int MaxKnownAdvisories { get; set; } = 512;
public void Validate()
{
if (FeedUri is null || !FeedUri.IsAbsoluteUri)
{
throw new InvalidOperationException("CERT-Bund feed URI must be an absolute URI.");
}
if (PortalBootstrapUri is null || !PortalBootstrapUri.IsAbsoluteUri)
{
throw new InvalidOperationException("CERT-Bund portal bootstrap URI must be an absolute URI.");
}
if (DetailApiUri is null || !DetailApiUri.IsAbsoluteUri)
{
throw new InvalidOperationException("CERT-Bund detail API URI must be an absolute URI.");
}
if (RequestTimeout <= TimeSpan.Zero)
{
throw new InvalidOperationException($"{nameof(RequestTimeout)} must be positive.");
}
if (RequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException($"{nameof(RequestDelay)} cannot be negative.");
}
if (FailureBackoff <= TimeSpan.Zero)
{
throw new InvalidOperationException($"{nameof(FailureBackoff)} must be positive.");
}
if (MaxAdvisoriesPerFetch <= 0)
{
throw new InvalidOperationException($"{nameof(MaxAdvisoriesPerFetch)} must be greater than zero.");
}
if (MaxKnownAdvisories <= 0)
{
throw new InvalidOperationException($"{nameof(MaxKnownAdvisories)} must be greater than zero.");
}
}
public Uri BuildDetailUri(string advisoryId)
{
if (string.IsNullOrWhiteSpace(advisoryId))
{
throw new ArgumentException("Advisory identifier must be provided.", nameof(advisoryId));
}
var builder = new UriBuilder(DetailApiUri);
var queryPrefix = string.IsNullOrEmpty(builder.Query) ? string.Empty : builder.Query.TrimStart('?') + "&";
builder.Query = $"{queryPrefix}name={Uri.EscapeDataString(advisoryId)}";
return builder.Uri;
}
}

View File

@@ -0,0 +1,68 @@
using System;
using System.Collections.Generic;
using System.Text.Json.Serialization;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
public sealed record CertBundAdvisoryDto
{
[JsonPropertyName("advisoryId")]
public string AdvisoryId { get; init; } = string.Empty;
[JsonPropertyName("title")]
public string Title { get; init; } = string.Empty;
[JsonPropertyName("summary")]
public string? Summary { get; init; }
[JsonPropertyName("contentHtml")]
public string ContentHtml { get; init; } = string.Empty;
[JsonPropertyName("severity")]
public string? Severity { get; init; }
[JsonPropertyName("language")]
public string Language { get; init; } = "de";
[JsonPropertyName("published")]
public DateTimeOffset? Published { get; init; }
[JsonPropertyName("modified")]
public DateTimeOffset? Modified { get; init; }
[JsonPropertyName("portalUri")]
public Uri PortalUri { get; init; } = new("https://wid.cert-bund.de/");
[JsonPropertyName("detailUri")]
public Uri DetailUri { get; init; } = new("https://wid.cert-bund.de/");
[JsonPropertyName("cveIds")]
public IReadOnlyList<string> CveIds { get; init; } = Array.Empty<string>();
[JsonPropertyName("products")]
public IReadOnlyList<CertBundProductDto> Products { get; init; } = Array.Empty<CertBundProductDto>();
[JsonPropertyName("references")]
public IReadOnlyList<CertBundReferenceDto> References { get; init; } = Array.Empty<CertBundReferenceDto>();
}
public sealed record CertBundProductDto
{
[JsonPropertyName("vendor")]
public string? Vendor { get; init; }
[JsonPropertyName("name")]
public string? Name { get; init; }
[JsonPropertyName("versions")]
public string? Versions { get; init; }
}
public sealed record CertBundReferenceDto
{
[JsonPropertyName("url")]
public string Url { get; init; } = string.Empty;
[JsonPropertyName("label")]
public string? Label { get; init; }
}

View File

@@ -0,0 +1,118 @@
using System;
using System.Linq;
using MongoDB.Bson;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
internal sealed record CertBundCursor(
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings,
IReadOnlyCollection<string> KnownAdvisories,
DateTimeOffset? LastPublished,
DateTimeOffset? LastFetchAt)
{
private static readonly IReadOnlyCollection<Guid> EmptyGuids = Array.Empty<Guid>();
private static readonly IReadOnlyCollection<string> EmptyStrings = Array.Empty<string>();
public static CertBundCursor Empty { get; } = new(EmptyGuids, EmptyGuids, EmptyStrings, null, null);
public CertBundCursor WithPendingDocuments(IEnumerable<Guid> documents)
=> this with { PendingDocuments = Distinct(documents) };
public CertBundCursor WithPendingMappings(IEnumerable<Guid> mappings)
=> this with { PendingMappings = Distinct(mappings) };
public CertBundCursor WithKnownAdvisories(IEnumerable<string> advisories)
=> this with { KnownAdvisories = advisories?.Distinct(StringComparer.OrdinalIgnoreCase).ToArray() ?? EmptyStrings };
public CertBundCursor WithLastPublished(DateTimeOffset? published)
=> this with { LastPublished = published };
public CertBundCursor WithLastFetch(DateTimeOffset? timestamp)
=> this with { LastFetchAt = timestamp };
public BsonDocument ToBsonDocument()
{
var document = new BsonDocument
{
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
["knownAdvisories"] = new BsonArray(KnownAdvisories),
};
if (LastPublished.HasValue)
{
document["lastPublished"] = LastPublished.Value.UtcDateTime;
}
if (LastFetchAt.HasValue)
{
document["lastFetchAt"] = LastFetchAt.Value.UtcDateTime;
}
return document;
}
public static CertBundCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var pendingDocuments = ReadGuidArray(document, "pendingDocuments");
var pendingMappings = ReadGuidArray(document, "pendingMappings");
var knownAdvisories = ReadStringArray(document, "knownAdvisories");
var lastPublished = document.TryGetValue("lastPublished", out var publishedValue)
? ParseDate(publishedValue)
: null;
var lastFetch = document.TryGetValue("lastFetchAt", out var fetchValue)
? ParseDate(fetchValue)
: null;
return new CertBundCursor(pendingDocuments, pendingMappings, knownAdvisories, lastPublished, lastFetch);
}
private static IReadOnlyCollection<Guid> Distinct(IEnumerable<Guid>? values)
=> values?.Distinct().ToArray() ?? EmptyGuids;
private static IReadOnlyCollection<Guid> ReadGuidArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
{
return EmptyGuids;
}
var items = new List<Guid>(array.Count);
foreach (var element in array)
{
if (Guid.TryParse(element?.ToString(), out var id))
{
items.Add(id);
}
}
return items;
}
private static IReadOnlyCollection<string> ReadStringArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
{
return EmptyStrings;
}
return array.Select(element => element?.ToString() ?? string.Empty)
.Where(static s => !string.IsNullOrWhiteSpace(s))
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static DateTimeOffset? ParseDate(BsonValue value)
=> value.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
}

View File

@@ -0,0 +1,87 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Text.Json.Serialization;
using StellaOps.Concelier.Connector.Common.Html;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
public sealed class CertBundDetailParser
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private readonly HtmlContentSanitizer _sanitizer;
public CertBundDetailParser(HtmlContentSanitizer sanitizer)
=> _sanitizer = sanitizer ?? throw new ArgumentNullException(nameof(sanitizer));
public CertBundAdvisoryDto Parse(Uri detailUri, Uri portalUri, byte[] payload)
{
var detail = JsonSerializer.Deserialize<CertBundDetailResponse>(payload, SerializerOptions)
?? throw new InvalidOperationException("CERT-Bund detail payload deserialized to null.");
var advisoryId = detail.Name ?? throw new InvalidOperationException("CERT-Bund detail missing advisory name.");
var contentHtml = _sanitizer.Sanitize(detail.Description ?? string.Empty, portalUri);
return new CertBundAdvisoryDto
{
AdvisoryId = advisoryId,
Title = detail.Title ?? advisoryId,
Summary = detail.Summary,
ContentHtml = contentHtml,
Severity = detail.Severity,
Language = string.IsNullOrWhiteSpace(detail.Language) ? "de" : detail.Language!,
Published = detail.Published,
Modified = detail.Updated ?? detail.Published,
PortalUri = portalUri,
DetailUri = detailUri,
CveIds = detail.CveIds?.Where(static id => !string.IsNullOrWhiteSpace(id))
.Select(static id => id!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray() ?? Array.Empty<string>(),
References = MapReferences(detail.References),
Products = MapProducts(detail.Products),
};
}
private static IReadOnlyList<CertBundReferenceDto> MapReferences(CertBundDetailReference[]? references)
{
if (references is null || references.Length == 0)
{
return Array.Empty<CertBundReferenceDto>();
}
return references
.Where(static reference => !string.IsNullOrWhiteSpace(reference.Url))
.Select(reference => new CertBundReferenceDto
{
Url = reference.Url!,
Label = reference.Label,
})
.DistinctBy(static reference => reference.Url, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static IReadOnlyList<CertBundProductDto> MapProducts(CertBundDetailProduct[]? products)
{
if (products is null || products.Length == 0)
{
return Array.Empty<CertBundProductDto>();
}
return products
.Where(static product => !string.IsNullOrWhiteSpace(product.Vendor) || !string.IsNullOrWhiteSpace(product.Name))
.Select(product => new CertBundProductDto
{
Vendor = product.Vendor,
Name = product.Name,
Versions = product.Versions,
})
.ToArray();
}
}

View File

@@ -0,0 +1,60 @@
using System.Text.Json.Serialization;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
internal sealed record CertBundDetailResponse
{
[JsonPropertyName("name")]
public string? Name { get; init; }
[JsonPropertyName("title")]
public string? Title { get; init; }
[JsonPropertyName("summary")]
public string? Summary { get; init; }
[JsonPropertyName("description")]
public string? Description { get; init; }
[JsonPropertyName("severity")]
public string? Severity { get; init; }
[JsonPropertyName("language")]
public string? Language { get; init; }
[JsonPropertyName("published")]
public DateTimeOffset? Published { get; init; }
[JsonPropertyName("updated")]
public DateTimeOffset? Updated { get; init; }
[JsonPropertyName("cveIds")]
public string[]? CveIds { get; init; }
[JsonPropertyName("references")]
public CertBundDetailReference[]? References { get; init; }
[JsonPropertyName("products")]
public CertBundDetailProduct[]? Products { get; init; }
}
internal sealed record CertBundDetailReference
{
[JsonPropertyName("url")]
public string? Url { get; init; }
[JsonPropertyName("label")]
public string? Label { get; init; }
}
internal sealed record CertBundDetailProduct
{
[JsonPropertyName("vendor")]
public string? Vendor { get; init; }
[JsonPropertyName("name")]
public string? Name { get; init; }
[JsonPropertyName("versions")]
public string? Versions { get; init; }
}

View File

@@ -0,0 +1,191 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
/// <summary>
/// Emits OpenTelemetry counters and histograms for the CERT-Bund connector.
/// </summary>
public sealed class CertBundDiagnostics : IDisposable
{
private const string MeterName = "StellaOps.Concelier.Connector.CertBund";
private const string MeterVersion = "1.0.0";
private readonly Meter _meter;
private readonly Counter<long> _feedFetchAttempts;
private readonly Counter<long> _feedFetchSuccess;
private readonly Counter<long> _feedFetchFailures;
private readonly Histogram<long> _feedItemCount;
private readonly Histogram<long> _feedEnqueuedCount;
private readonly Histogram<double> _feedCoverageDays;
private readonly Counter<long> _detailFetchAttempts;
private readonly Counter<long> _detailFetchSuccess;
private readonly Counter<long> _detailFetchNotModified;
private readonly Counter<long> _detailFetchFailures;
private readonly Counter<long> _parseSuccess;
private readonly Counter<long> _parseFailures;
private readonly Histogram<long> _parseProductCount;
private readonly Histogram<long> _parseCveCount;
private readonly Counter<long> _mapSuccess;
private readonly Counter<long> _mapFailures;
private readonly Histogram<long> _mapPackageCount;
private readonly Histogram<long> _mapAliasCount;
public CertBundDiagnostics()
{
_meter = new Meter(MeterName, MeterVersion);
_feedFetchAttempts = _meter.CreateCounter<long>(
name: "certbund.feed.fetch.attempts",
unit: "operations",
description: "Number of RSS feed load attempts.");
_feedFetchSuccess = _meter.CreateCounter<long>(
name: "certbund.feed.fetch.success",
unit: "operations",
description: "Number of successful RSS feed loads.");
_feedFetchFailures = _meter.CreateCounter<long>(
name: "certbund.feed.fetch.failures",
unit: "operations",
description: "Number of RSS feed load failures.");
_feedItemCount = _meter.CreateHistogram<long>(
name: "certbund.feed.items.count",
unit: "items",
description: "Distribution of RSS item counts per fetch.");
_feedEnqueuedCount = _meter.CreateHistogram<long>(
name: "certbund.feed.enqueued.count",
unit: "documents",
description: "Distribution of advisory documents enqueued per fetch.");
_feedCoverageDays = _meter.CreateHistogram<double>(
name: "certbund.feed.coverage.days",
unit: "days",
description: "Coverage window in days between fetch time and the oldest published advisory in the feed.");
_detailFetchAttempts = _meter.CreateCounter<long>(
name: "certbund.detail.fetch.attempts",
unit: "operations",
description: "Number of detail fetch attempts.");
_detailFetchSuccess = _meter.CreateCounter<long>(
name: "certbund.detail.fetch.success",
unit: "operations",
description: "Number of detail fetches that persisted a document.");
_detailFetchNotModified = _meter.CreateCounter<long>(
name: "certbund.detail.fetch.not_modified",
unit: "operations",
description: "Number of detail fetches returning HTTP 304.");
_detailFetchFailures = _meter.CreateCounter<long>(
name: "certbund.detail.fetch.failures",
unit: "operations",
description: "Number of detail fetches that failed.");
_parseSuccess = _meter.CreateCounter<long>(
name: "certbund.parse.success",
unit: "documents",
description: "Number of documents parsed into CERT-Bund DTOs.");
_parseFailures = _meter.CreateCounter<long>(
name: "certbund.parse.failures",
unit: "documents",
description: "Number of documents that failed to parse.");
_parseProductCount = _meter.CreateHistogram<long>(
name: "certbund.parse.products.count",
unit: "products",
description: "Distribution of product entries captured per advisory.");
_parseCveCount = _meter.CreateHistogram<long>(
name: "certbund.parse.cve.count",
unit: "aliases",
description: "Distribution of CVE identifiers captured per advisory.");
_mapSuccess = _meter.CreateCounter<long>(
name: "certbund.map.success",
unit: "advisories",
description: "Number of canonical advisories emitted by the mapper.");
_mapFailures = _meter.CreateCounter<long>(
name: "certbund.map.failures",
unit: "advisories",
description: "Number of mapping failures.");
_mapPackageCount = _meter.CreateHistogram<long>(
name: "certbund.map.affected.count",
unit: "packages",
description: "Distribution of affected packages emitted per advisory.");
_mapAliasCount = _meter.CreateHistogram<long>(
name: "certbund.map.aliases.count",
unit: "aliases",
description: "Distribution of alias counts per advisory.");
}
public void FeedFetchAttempt() => _feedFetchAttempts.Add(1);
public void FeedFetchSuccess(int itemCount)
{
_feedFetchSuccess.Add(1);
if (itemCount >= 0)
{
_feedItemCount.Record(itemCount);
}
}
public void FeedFetchFailure(string reason = "error")
=> _feedFetchFailures.Add(1, ReasonTag(reason));
public void RecordFeedCoverage(double? coverageDays)
{
if (coverageDays is { } days && days >= 0)
{
_feedCoverageDays.Record(days);
}
}
public void DetailFetchAttempt() => _detailFetchAttempts.Add(1);
public void DetailFetchSuccess() => _detailFetchSuccess.Add(1);
public void DetailFetchNotModified() => _detailFetchNotModified.Add(1);
public void DetailFetchFailure(string reason = "error")
=> _detailFetchFailures.Add(1, ReasonTag(reason));
public void DetailFetchEnqueued(int count)
{
if (count >= 0)
{
_feedEnqueuedCount.Record(count);
}
}
public void ParseSuccess(int productCount, int cveCount)
{
_parseSuccess.Add(1);
if (productCount >= 0)
{
_parseProductCount.Record(productCount);
}
if (cveCount >= 0)
{
_parseCveCount.Record(cveCount);
}
}
public void ParseFailure(string reason = "error")
=> _parseFailures.Add(1, ReasonTag(reason));
public void MapSuccess(int affectedPackages, int aliasCount)
{
_mapSuccess.Add(1);
if (affectedPackages >= 0)
{
_mapPackageCount.Record(affectedPackages);
}
if (aliasCount >= 0)
{
_mapAliasCount.Record(aliasCount);
}
}
public void MapFailure(string reason = "error")
=> _mapFailures.Add(1, ReasonTag(reason));
private static KeyValuePair<string, object?> ReasonTag(string reason)
=> new("reason", string.IsNullOrWhiteSpace(reason) ? "unknown" : reason.ToLowerInvariant());
public void Dispose() => _meter.Dispose();
}

View File

@@ -0,0 +1,29 @@
using System;
using System.Collections.Generic;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
internal static class CertBundDocumentMetadata
{
public static Dictionary<string, string> CreateMetadata(CertBundFeedItem item)
{
var metadata = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["certbund.advisoryId"] = item.AdvisoryId,
["certbund.portalUri"] = item.PortalUri.ToString(),
["certbund.published"] = item.Published.ToString("O"),
};
if (!string.IsNullOrWhiteSpace(item.Category))
{
metadata["certbund.category"] = item.Category!;
}
if (!string.IsNullOrWhiteSpace(item.Title))
{
metadata["certbund.title"] = item.Title!;
}
return metadata;
}
}

View File

@@ -0,0 +1,143 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertBund.Configuration;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
public sealed class CertBundFeedClient
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly CertBundOptions _options;
private readonly ILogger<CertBundFeedClient> _logger;
private readonly SemaphoreSlim _bootstrapSemaphore = new(1, 1);
private volatile bool _bootstrapped;
public CertBundFeedClient(
IHttpClientFactory httpClientFactory,
IOptions<CertBundOptions> options,
ILogger<CertBundFeedClient> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<IReadOnlyList<CertBundFeedItem>> LoadAsync(CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(CertBundOptions.HttpClientName);
await EnsureSessionAsync(client, cancellationToken).ConfigureAwait(false);
using var request = new HttpRequestMessage(HttpMethod.Get, _options.FeedUri);
request.Headers.TryAddWithoutValidation("Accept", "application/rss+xml, application/xml;q=0.9, text/xml;q=0.8");
using var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
var document = XDocument.Load(stream);
var items = new List<CertBundFeedItem>();
foreach (var element in document.Descendants("item"))
{
cancellationToken.ThrowIfCancellationRequested();
var linkValue = element.Element("link")?.Value?.Trim();
if (string.IsNullOrWhiteSpace(linkValue) || !Uri.TryCreate(linkValue, UriKind.Absolute, out var portalUri))
{
continue;
}
var advisoryId = TryExtractNameParameter(portalUri);
if (string.IsNullOrWhiteSpace(advisoryId))
{
continue;
}
var detailUri = _options.BuildDetailUri(advisoryId);
var pubDateText = element.Element("pubDate")?.Value;
var published = ParseDate(pubDateText);
var title = element.Element("title")?.Value?.Trim();
var category = element.Element("category")?.Value?.Trim();
items.Add(new CertBundFeedItem(advisoryId, detailUri, portalUri, published, title, category));
}
return items;
}
private async Task EnsureSessionAsync(HttpClient client, CancellationToken cancellationToken)
{
if (_bootstrapped)
{
return;
}
await _bootstrapSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_bootstrapped)
{
return;
}
using var request = new HttpRequestMessage(HttpMethod.Get, _options.PortalBootstrapUri);
request.Headers.TryAddWithoutValidation("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
using var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
_bootstrapped = true;
}
finally
{
_bootstrapSemaphore.Release();
}
}
private static string? TryExtractNameParameter(Uri portalUri)
{
if (portalUri is null)
{
return null;
}
var query = portalUri.Query;
if (string.IsNullOrEmpty(query))
{
return null;
}
var trimmed = query.TrimStart('?');
foreach (var pair in trimmed.Split('&', StringSplitOptions.RemoveEmptyEntries))
{
var separatorIndex = pair.IndexOf('=');
if (separatorIndex <= 0)
{
continue;
}
var key = pair[..separatorIndex].Trim();
if (!key.Equals("name", StringComparison.OrdinalIgnoreCase))
{
continue;
}
var value = pair[(separatorIndex + 1)..];
return Uri.UnescapeDataString(value);
}
return null;
}
private static DateTimeOffset ParseDate(string? value)
=> DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var parsed)
? parsed
: DateTimeOffset.UtcNow;
}

View File

@@ -0,0 +1,11 @@
namespace StellaOps.Concelier.Connector.CertBund.Internal;
using System;
public sealed record CertBundFeedItem(
string AdvisoryId,
Uri DetailUri,
Uri PortalUri,
DateTimeOffset Published,
string? Title,
string? Category);

View File

@@ -0,0 +1,168 @@
using System;
using System.Collections.Generic;
using System.Linq;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Mongo.Documents;
namespace StellaOps.Concelier.Connector.CertBund.Internal;
internal static class CertBundMapper
{
public static Advisory Map(CertBundAdvisoryDto dto, DocumentRecord document, DateTimeOffset recordedAt)
{
ArgumentNullException.ThrowIfNull(dto);
ArgumentNullException.ThrowIfNull(document);
var aliases = BuildAliases(dto);
var references = BuildReferences(dto, recordedAt);
var packages = BuildPackages(dto, recordedAt);
var provenance = new AdvisoryProvenance(
CertBundConnectorPlugin.SourceName,
"advisory",
dto.AdvisoryId,
recordedAt,
new[] { ProvenanceFieldMasks.Advisory });
return new Advisory(
advisoryKey: dto.AdvisoryId,
title: dto.Title,
summary: dto.Summary,
language: dto.Language?.ToLowerInvariant() ?? "de",
published: dto.Published,
modified: dto.Modified,
severity: MapSeverity(dto.Severity),
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: packages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { provenance });
}
private static IReadOnlyList<string> BuildAliases(CertBundAdvisoryDto dto)
{
var aliases = new List<string>(capacity: 4) { dto.AdvisoryId };
foreach (var cve in dto.CveIds)
{
if (!string.IsNullOrWhiteSpace(cve))
{
aliases.Add(cve);
}
}
return aliases
.Where(static alias => !string.IsNullOrWhiteSpace(alias))
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static alias => alias, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static IReadOnlyList<AdvisoryReference> BuildReferences(CertBundAdvisoryDto dto, DateTimeOffset recordedAt)
{
var references = new List<AdvisoryReference>
{
new(dto.DetailUri.ToString(), "details", "cert-bund", null, new AdvisoryProvenance(
CertBundConnectorPlugin.SourceName,
"reference",
dto.DetailUri.ToString(),
recordedAt,
new[] { ProvenanceFieldMasks.References }))
};
foreach (var reference in dto.References)
{
if (string.IsNullOrWhiteSpace(reference.Url))
{
continue;
}
references.Add(new AdvisoryReference(
reference.Url,
kind: "reference",
sourceTag: "cert-bund",
summary: reference.Label,
provenance: new AdvisoryProvenance(
CertBundConnectorPlugin.SourceName,
"reference",
reference.Url,
recordedAt,
new[] { ProvenanceFieldMasks.References })));
}
return references
.DistinctBy(static reference => reference.Url, StringComparer.Ordinal)
.OrderBy(static reference => reference.Url, StringComparer.Ordinal)
.ToArray();
}
private static IReadOnlyList<AffectedPackage> BuildPackages(CertBundAdvisoryDto dto, DateTimeOffset recordedAt)
{
if (dto.Products.Count == 0)
{
return Array.Empty<AffectedPackage>();
}
var packages = new List<AffectedPackage>(dto.Products.Count);
foreach (var product in dto.Products)
{
var vendor = Validation.TrimToNull(product.Vendor) ?? "Unspecified";
var name = Validation.TrimToNull(product.Name);
var identifier = name is null ? vendor : $"{vendor} {name}";
var provenance = new AdvisoryProvenance(
CertBundConnectorPlugin.SourceName,
"package",
identifier,
recordedAt,
new[] { ProvenanceFieldMasks.AffectedPackages });
var ranges = string.IsNullOrWhiteSpace(product.Versions)
? Array.Empty<AffectedVersionRange>()
: new[]
{
new AffectedVersionRange(
rangeKind: "string",
introducedVersion: null,
fixedVersion: null,
lastAffectedVersion: null,
rangeExpression: product.Versions,
provenance: new AdvisoryProvenance(
CertBundConnectorPlugin.SourceName,
"package-range",
product.Versions,
recordedAt,
new[] { ProvenanceFieldMasks.VersionRanges }))
};
packages.Add(new AffectedPackage(
AffectedPackageTypes.Vendor,
identifier,
platform: null,
versionRanges: ranges,
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: new[] { provenance },
normalizedVersions: Array.Empty<NormalizedVersionRule>()));
}
return packages
.DistinctBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
.OrderBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static string? MapSeverity(string? severity)
{
if (string.IsNullOrWhiteSpace(severity))
{
return null;
}
return severity.ToLowerInvariant() switch
{
"hoch" or "high" => "high",
"mittel" or "medium" => "medium",
"gering" or "low" => "low",
_ => severity.ToLowerInvariant(),
};
}
}

View File

@@ -0,0 +1,22 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Concelier.Core.Jobs;
namespace StellaOps.Concelier.Connector.CertBund;
internal static class CertBundJobKinds
{
public const string Fetch = "source:cert-bund:fetch";
}
internal sealed class CertBundFetchJob : IJob
{
private readonly CertBundConnector _connector;
public CertBundFetchJob(CertBundConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,39 @@
# CERT-Bund Security Advisories Connector Notes
## Publication endpoints
- **RSS feed (latest 250 advisories)** `https://wid.cert-bund.de/content/public/securityAdvisory/rss`. The feed refreshes quickly; the current window spans roughly 6days of activity, so fetch jobs must run frequently to avoid churn.
- **Portal bootstrap** `https://wid.cert-bund.de/portal/` is hit once per process start to prime the session (`client_config` cookie) before any API calls.
- **Detail API** `https://wid.cert-bund.de/portal/api/securityadvisory?name=<ID>`. The connector reuses the bootstrapped `SocketsHttpHandler` so cookies and headers match the Angular SPA. Manual reproduction requires the same cookie container; otherwise the endpoint responds with the shell HTML document.
## Telemetry
The OpenTelemetry meter is `StellaOps.Concelier.Connector.CertBund`. Key instruments:
| Metric | Type | Notes |
| --- | --- | --- |
| `certbund.feed.fetch.attempts` / `.success` / `.failures` | counter | Feed poll lifecycle. |
| `certbund.feed.items.count` | histogram | Items returned per RSS fetch. |
| `certbund.feed.enqueued.count` | histogram | Detail documents queued per cycle (post-dedupe, before truncation). |
| `certbund.feed.coverage.days` | histogram | Rolling window (fetch time oldest published entry). Useful to alert when feed depth contracts. |
| `certbund.detail.fetch.*` | counter | Attempts, successes, HTTP304, and failure counts; failures are tagged by reason (`skipped`, `exception`). |
| `certbund.parse.success` / `.failures` | counter | Parsing outcomes; histograms capture product and CVE counts. |
| `certbund.map.success` / `.failures` | counter | Canonical mapping results; histograms capture affected-package and alias volume. |
Dashboards should chart coverage days and enqueued counts alongside fetch failures: sharp drops indicate the upstream window tightened or parsing stalled.
## Logging signals
- `CERT-Bund fetch cycle: feed items …` summarises each RSS run (enqueued, already-known, HTTP304, failures, coverage window).
- Parse and map stages log corresponding counts when work remains in the cursor.
- Errors include advisory/document identifiers to simplify replays.
## Historical coverage
- RSS contains the newest **250** items (≈6days at the current publication rate). The connector prunes the “known advisory” set to 512 IDs to avoid unbounded memory but retains enough headroom for short-term replay.
- Older advisories remain accessible through the same detail API (`WID-SEC-<year>-<sequence>` identifiers). For deep backfills run a scripted sweep that queues historical IDs in descending order; the connector will persist any payloads that still resolve. Document these batches under source state comments so Merge/Docs can track provenance.
## Locale & translation stance
- CERT-Bund publishes advisory titles and summaries **only in German** (language tag `de`). The connector preserves original casing/content and sets `Advisory.Language = "de"`.
- Operator guidance:
1. Front-line analysts consuming Concelier data should maintain German literacy or rely on approved machine-translation pipelines.
2. When mirroring advisories into English dashboards, store translations outside the canonical advisory payload to keep determinism. Suggested approach: create an auxiliary collection keyed by advisory ID with timestamped translated snippets.
3. Offline Kit bundles must document that CERT-Bund content is untranslated to avoid surprise during audits.
The Docs guild will surface the translation policy (retain German source, optionally layer operator-provided translations) in the broader i18n section; this README is the connector-level reference.

View File

@@ -0,0 +1,16 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,13 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|FEEDCONN-CERTBUND-02-001 Research CERT-Bund advisory endpoints|BE-Conn-CERTBUND|Research|**DONE (2025-10-11)** Confirmed public RSS at `https://wid.cert-bund.de/content/public/securityAdvisory/rss` (HTTP200 w/out cookies), 250-item window, German titles/categories, and detail links pointing to Angular SPA. Captured header profile (no cache hints) and logged open item to discover the JSON API used by `portal` frontend.|
|FEEDCONN-CERTBUND-02-002 Fetch job & state persistence|BE-Conn-CERTBUND|Source.Common, Storage.Mongo|**DONE (2025-10-14)** `CertBundConnector.FetchAsync` consumes RSS via session-bootstrapped client, stores per-advisory JSON documents with metadata + SHA, throttles detail requests, and maintains cursor state (pending docs/mappings, known advisory IDs, last published).|
|FEEDCONN-CERTBUND-02-003 Parser/DTO implementation|BE-Conn-CERTBUND|Source.Common|**DONE (2025-10-14)** Detail JSON piped through `CertBundDetailParser` (raw DOM sanitised to HTML), capturing severity, CVEs, product list, and references into DTO records (`cert-bund.detail.v1`).|
|FEEDCONN-CERTBUND-02-004 Canonical mapping & range primitives|BE-Conn-CERTBUND|Models|**DONE (2025-10-14)** `CertBundMapper` emits canonical advisories (aliases, references, vendor package ranges, provenance) with severity normalisation and deterministic ordering.|
|FEEDCONN-CERTBUND-02-005 Regression fixtures & tests|QA|Testing|**DONE (2025-10-14)** Added `StellaOps.Concelier.Connector.CertBund.Tests` covering fetch→parse→map against canned RSS/JSON fixtures; integration harness uses Mongo2Go + canned HTTP handler; fixtures regenerate via `UPDATE_CERTBUND_FIXTURES=1`.|
|FEEDCONN-CERTBUND-02-006 Telemetry & documentation|DevEx|Docs|**DONE (2025-10-15)** Added `CertBundDiagnostics` (meter `StellaOps.Concelier.Connector.CertBund`) with fetch/parse/map counters + histograms, recorded coverage days, wired stage summary logs, and published the ops runbook (`docs/ops/concelier-certbund-operations.md`).|
|FEEDCONN-CERTBUND-02-007 Feed history & locale assessment|BE-Conn-CERTBUND|Research|**DONE (2025-10-15)** Measured RSS retention (~6days/≈250 items), captured connector-driven backfill guidance in the runbook, and aligned locale guidance (preserve `language=de`, Docs glossary follow-up). **Next:** coordinate with Tools to land the state-seeding helper so scripted backfills replace manual Mongo tweaks.|
|FEEDCONN-CERTBUND-02-008 Session bootstrap & cookie strategy|BE-Conn-CERTBUND|Source.Common|**DONE (2025-10-14)** Feed client primes the portal session (cookie container via `SocketsHttpHandler`), shares cookies across detail requests, and documents bootstrap behaviour in options (`PortalBootstrapUri`).|
|FEEDCONN-CERTBUND-02-009 Offline Kit export packaging|BE-Conn-CERTBUND, Docs|Offline Kit|**DONE (2025-10-17)** Added `tools/certbund_offline_snapshot.py` to capture search/export JSON, emit deterministic manifests + SHA files, and refreshed docs (`docs/ops/concelier-certbund-operations.md`, `docs/24_OFFLINE_KIT.md`) with offline-kit instructions and manifest layout guidance. Seed data README/ignore rules cover local snapshot hygiene.|
|FEEDCONN-CERTBUND-02-010 Normalized range translator|BE-Conn-CERTBUND|Merge coordination (`FEEDMERGE-COORD-02-900`)|**TODO (due 2025-10-22)** Translate `product.Versions` phrases (e.g., `2023.1 bis 2024.2`, `alle`) into comparator strings for `SemVerRangeRuleBuilder`, emit `NormalizedVersions` with `certbund:{advisoryId}:{vendor}` provenance, and extend tests/README with localisation notes.|

View File

@@ -0,0 +1,38 @@
# AGENTS
## Role
Implement the CERT/CC (Carnegie Mellon CERT Coordination Center) advisory connector so Concelier can ingest US CERT coordination bulletins.
## Scope
- Identify CERT/CC advisory publication format (VU#, blog, RSS, JSON) and define fetch cadence/windowing.
- Implement fetch, parse, and mapping jobs with cursor persistence and dedupe.
- Normalise advisory content (summary, impacted vendors, products, recommended mitigations, CVEs).
- Produce canonical `Advisory` objects including aliases, references, affected packages, and range primitive metadata.
- Supply fixtures and deterministic regression tests.
## Participants
- `Source.Common` (HTTP/fetch utilities, DTO storage).
- `Storage.Mongo` (raw/document/DTO/advisory stores and state).
- `Concelier.Models` (canonical structures).
- `Concelier.Testing` (integration tests and snapshots).
## Interfaces & Contracts
- Job kinds: `certcc:fetch`, `certcc:parse`, `certcc:map`.
- Persist upstream caching metadata (ETag/Last-Modified) when available.
- Aliases should capture CERT/CC VU IDs and referenced CVEs.
## In/Out of scope
In scope:
- End-to-end connector with range primitive instrumentation and telemetry.
Out of scope:
- ICS-CERT alerts (handled by dedicated connector) or blog posts unrelated to advisories.
## Observability & Security Expectations
- Log fetch and mapping statistics; surface failures with backoff.
- Sanitise HTML sources before persistence.
- Respect upstream throttling via retry/backoff.
## Tests
- Add `StellaOps.Concelier.Connector.CertCc.Tests` to cover fetch/parse/map with canned fixtures.
- Snapshot canonical advisories and support UPDATE flag for regeneration.
- Ensure deterministic ordering and timestamp normalisation.

View File

@@ -0,0 +1,779 @@
using System.Collections.Generic;
using System.Globalization;
using System.Net;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.CertCc.Configuration;
using StellaOps.Concelier.Connector.CertCc.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertCc;
public sealed class CertCcConnector : IFeedConnector
{
private static readonly JsonSerializerOptions DtoSerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
};
private static readonly byte[] EmptyArrayPayload = Encoding.UTF8.GetBytes("[]");
private static readonly string[] DetailEndpoints = { "note", "vendors", "vuls", "vendors-vuls" };
private readonly CertCcSummaryPlanner _summaryPlanner;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertCcOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertCcConnector> _logger;
private readonly CertCcDiagnostics _diagnostics;
public CertCcConnector(
CertCcSummaryPlanner summaryPlanner,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertCcOptions> options,
CertCcDiagnostics diagnostics,
TimeProvider? timeProvider,
ILogger<CertCcConnector> logger)
{
_summaryPlanner = summaryPlanner ?? throw new ArgumentNullException(nameof(summaryPlanner));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertCcConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var pendingNotes = new HashSet<string>(cursor.PendingNotes, StringComparer.OrdinalIgnoreCase);
var processedNotes = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var now = _timeProvider.GetUtcNow();
var remainingBudget = _options.MaxNotesPerFetch;
// Resume notes that previously failed before fetching new summaries.
if (pendingNotes.Count > 0 && remainingBudget > 0)
{
var replay = pendingNotes.ToArray();
foreach (var noteId in replay)
{
if (remainingBudget <= 0)
{
break;
}
try
{
if (!processedNotes.Add(noteId))
{
continue;
}
if (await HasPendingDocumentBundleAsync(noteId, pendingDocuments, cancellationToken).ConfigureAwait(false))
{
pendingNotes.Remove(noteId);
continue;
}
await FetchNoteBundleAsync(noteId, null, pendingDocuments, pendingNotes, cancellationToken).ConfigureAwait(false);
if (!pendingNotes.Contains(noteId))
{
remainingBudget--;
}
}
catch (Exception ex)
{
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
}
var plan = _summaryPlanner.CreatePlan(cursor.SummaryState);
_diagnostics.PlanEvaluated(plan.Window, plan.Requests.Count);
try
{
foreach (var request in plan.Requests)
{
cancellationToken.ThrowIfCancellationRequested();
var shouldProcessNotes = remainingBudget > 0;
try
{
_diagnostics.SummaryFetchAttempt(request.Scope);
var metadata = BuildSummaryMetadata(request);
var existingSummary = await _documentStore.FindBySourceAndUriAsync(SourceName, request.Uri.ToString(), cancellationToken).ConfigureAwait(false);
var fetchRequest = new SourceFetchRequest(
CertCcOptions.HttpClientName,
SourceName,
HttpMethod.Get,
request.Uri,
metadata,
existingSummary?.Etag,
existingSummary?.LastModified,
null,
new[] { "application/json" });
var result = await _fetchService.FetchAsync(fetchRequest, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified)
{
_diagnostics.SummaryFetchUnchanged(request.Scope);
continue;
}
if (!result.IsSuccess || result.Document is null)
{
_diagnostics.SummaryFetchFailure(request.Scope);
continue;
}
_diagnostics.SummaryFetchSuccess(request.Scope);
if (!shouldProcessNotes)
{
continue;
}
var noteTokens = await ReadSummaryNotesAsync(result.Document, cancellationToken).ConfigureAwait(false);
foreach (var token in noteTokens)
{
if (remainingBudget <= 0)
{
break;
}
var noteId = TryNormalizeNoteToken(token, out var vuIdentifier);
if (string.IsNullOrEmpty(noteId))
{
continue;
}
if (!processedNotes.Add(noteId))
{
continue;
}
await FetchNoteBundleAsync(noteId, vuIdentifier, pendingDocuments, pendingNotes, cancellationToken).ConfigureAwait(false);
if (!pendingNotes.Contains(noteId))
{
remainingBudget--;
}
}
}
catch
{
_diagnostics.SummaryFetchFailure(request.Scope);
throw;
}
}
}
catch (Exception ex)
{
var failureCursor = cursor
.WithPendingSummaries(Array.Empty<Guid>())
.WithPendingNotes(pendingNotes)
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastRun(now);
await UpdateCursorAsync(failureCursor, cancellationToken).ConfigureAwait(false);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var updatedCursor = cursor
.WithSummaryState(plan.NextState)
.WithPendingSummaries(Array.Empty<Guid>())
.WithPendingNotes(pendingNotes)
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastRun(now);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<bool> HasPendingDocumentBundleAsync(string noteId, HashSet<Guid> pendingDocuments, CancellationToken cancellationToken)
{
if (pendingDocuments.Count == 0)
{
return false;
}
var required = new HashSet<string>(DetailEndpoints, StringComparer.OrdinalIgnoreCase);
foreach (var documentId in pendingDocuments)
{
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document?.Metadata is null)
{
continue;
}
if (!document.Metadata.TryGetValue("certcc.noteId", out var metadataNoteId) ||
!string.Equals(metadataNoteId, noteId, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var endpoint = document.Metadata.TryGetValue("certcc.endpoint", out var endpointValue)
? endpointValue
: "note";
required.Remove(endpoint);
if (required.Count == 0)
{
return true;
}
}
return false;
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
if (!_options.EnableDetailMapping)
{
return;
}
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var pendingMappings = cursor.PendingMappings.ToHashSet();
var groups = new Dictionary<string, NoteDocumentGroup>(StringComparer.OrdinalIgnoreCase);
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
continue;
}
if (!TryGetMetadata(document, "certcc.noteId", out var noteId) || string.IsNullOrWhiteSpace(noteId))
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
continue;
}
var endpoint = TryGetMetadata(document, "certcc.endpoint", out var endpointValue)
? endpointValue
: "note";
var group = groups.TryGetValue(noteId, out var existing)
? existing
: (groups[noteId] = new NoteDocumentGroup(noteId));
group.Add(endpoint, document);
}
foreach (var group in groups.Values)
{
cancellationToken.ThrowIfCancellationRequested();
if (group.Note is null)
{
continue;
}
try
{
var noteBytes = await DownloadDocumentAsync(group.Note, cancellationToken).ConfigureAwait(false);
var vendorsBytes = group.Vendors is null
? EmptyArrayPayload
: await DownloadDocumentAsync(group.Vendors, cancellationToken).ConfigureAwait(false);
var vulsBytes = group.Vuls is null
? EmptyArrayPayload
: await DownloadDocumentAsync(group.Vuls, cancellationToken).ConfigureAwait(false);
var vendorStatusesBytes = group.VendorStatuses is null
? EmptyArrayPayload
: await DownloadDocumentAsync(group.VendorStatuses, cancellationToken).ConfigureAwait(false);
var dto = CertCcNoteParser.Parse(noteBytes, vendorsBytes, vulsBytes, vendorStatusesBytes);
var json = JsonSerializer.Serialize(dto, DtoSerializerOptions);
var payload = MongoDB.Bson.BsonDocument.Parse(json);
_diagnostics.ParseSuccess(
dto.Vendors.Count,
dto.VendorStatuses.Count,
dto.Vulnerabilities.Count);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
group.Note.Id,
SourceName,
"certcc.vince.note.v1",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(group.Note.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingMappings.Add(group.Note.Id);
pendingDocuments.Remove(group.Note.Id);
if (group.Vendors is not null)
{
await _documentStore.UpdateStatusAsync(group.Vendors.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(group.Vendors.Id);
}
if (group.Vuls is not null)
{
await _documentStore.UpdateStatusAsync(group.Vuls.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(group.Vuls.Id);
}
if (group.VendorStatuses is not null)
{
await _documentStore.UpdateStatusAsync(group.VendorStatuses.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(group.VendorStatuses.Id);
}
}
catch (Exception ex)
{
_diagnostics.ParseFailure();
_logger.LogError(ex, "CERT/CC parse failed for note {NoteId}", group.NoteId);
if (group.Note is not null)
{
await _documentStore.UpdateStatusAsync(group.Note.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(group.Note.Id);
}
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
if (!_options.EnableDetailMapping)
{
return;
}
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToHashSet();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
try
{
var json = dtoRecord.Payload.ToJson();
var dto = JsonSerializer.Deserialize<CertCcNoteDto>(json, DtoSerializerOptions);
if (dto is null)
{
throw new InvalidOperationException($"CERT/CC DTO payload deserialized as null for document {documentId}.");
}
var advisory = CertCcMapper.Map(dto, document, dtoRecord, SourceName);
var affectedCount = advisory.AffectedPackages.Length;
var normalizedRuleCount = advisory.AffectedPackages.Sum(static package => package.NormalizedVersions.Length);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
_diagnostics.MapSuccess(affectedCount, normalizedRuleCount);
}
catch (Exception ex)
{
_diagnostics.MapFailure();
_logger.LogError(ex, "CERT/CC mapping failed for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(documentId, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
}
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task FetchNoteBundleAsync(
string noteId,
string? vuIdentifier,
HashSet<Guid> pendingDocuments,
HashSet<string> pendingNotes,
CancellationToken cancellationToken)
{
var missingEndpoints = new List<(string Endpoint, HttpStatusCode? Status)>();
try
{
foreach (var endpoint in DetailEndpoints)
{
cancellationToken.ThrowIfCancellationRequested();
var uri = BuildDetailUri(noteId, endpoint);
var metadata = BuildDetailMetadata(noteId, vuIdentifier, endpoint);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, uri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(CertCcOptions.HttpClientName, SourceName, uri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "application/json" },
};
SourceFetchResult result;
_diagnostics.DetailFetchAttempt(endpoint);
try
{
result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
}
catch (HttpRequestException httpEx)
{
var status = httpEx.StatusCode ?? TryParseStatusCodeFromMessage(httpEx.Message);
if (ShouldTreatAsMissing(status, endpoint))
{
_diagnostics.DetailFetchMissing(endpoint);
missingEndpoints.Add((endpoint, status));
continue;
}
_diagnostics.DetailFetchFailure(endpoint);
throw;
}
Guid documentId;
if (result.IsSuccess && result.Document is not null)
{
_diagnostics.DetailFetchSuccess(endpoint);
documentId = result.Document.Id;
}
else if (result.IsNotModified)
{
_diagnostics.DetailFetchUnchanged(endpoint);
if (existing is null)
{
continue;
}
documentId = existing.Id;
}
else
{
_diagnostics.DetailFetchFailure(endpoint);
_logger.LogWarning(
"CERT/CC detail endpoint {Endpoint} returned {StatusCode} for note {NoteId}; will retry.",
endpoint,
(int)result.StatusCode,
noteId);
throw new HttpRequestException(
$"CERT/CC endpoint '{endpoint}' returned {(int)result.StatusCode} ({result.StatusCode}) for note {noteId}.",
null,
result.StatusCode);
}
pendingDocuments.Add(documentId);
if (_options.DetailRequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.DetailRequestDelay, cancellationToken).ConfigureAwait(false);
}
}
if (missingEndpoints.Count > 0)
{
var formatted = string.Join(
", ",
missingEndpoints.Select(item =>
item.Status.HasValue
? $"{item.Endpoint} ({(int)item.Status.Value})"
: item.Endpoint));
_logger.LogWarning(
"CERT/CC detail fetch completed with missing endpoints for note {NoteId}: {Endpoints}",
noteId,
formatted);
}
pendingNotes.Remove(noteId);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT/CC detail fetch failed for note {NoteId}", noteId);
pendingNotes.Add(noteId);
throw;
}
}
private static Dictionary<string, string> BuildSummaryMetadata(CertCcSummaryRequest request)
{
var metadata = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["certcc.scope"] = request.Scope.ToString().ToLowerInvariant(),
["certcc.year"] = request.Year.ToString("D4", CultureInfo.InvariantCulture),
};
if (request.Month.HasValue)
{
metadata["certcc.month"] = request.Month.Value.ToString("D2", CultureInfo.InvariantCulture);
}
return metadata;
}
private static Dictionary<string, string> BuildDetailMetadata(string noteId, string? vuIdentifier, string endpoint)
{
var metadata = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["certcc.endpoint"] = endpoint,
["certcc.noteId"] = noteId,
};
if (!string.IsNullOrWhiteSpace(vuIdentifier))
{
metadata["certcc.vuid"] = vuIdentifier;
}
return metadata;
}
private async Task<IReadOnlyList<string>> ReadSummaryNotesAsync(DocumentRecord document, CancellationToken cancellationToken)
{
if (!document.GridFsId.HasValue)
{
return Array.Empty<string>();
}
var payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
return CertCcSummaryParser.ParseNotes(payload);
}
private async Task<byte[]> DownloadDocumentAsync(DocumentRecord document, CancellationToken cancellationToken)
{
if (!document.GridFsId.HasValue)
{
throw new InvalidOperationException($"Document {document.Id} has no GridFS payload.");
}
return await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
private Uri BuildDetailUri(string noteId, string endpoint)
{
var suffix = endpoint switch
{
"note" => $"{noteId}/",
"vendors" => $"{noteId}/vendors/",
"vuls" => $"{noteId}/vuls/",
"vendors-vuls" => $"{noteId}/vendors/vuls/",
_ => $"{noteId}/",
};
return new Uri(_options.BaseApiUri, suffix);
}
private static string? TryNormalizeNoteToken(string token, out string? vuIdentifier)
{
vuIdentifier = null;
if (string.IsNullOrWhiteSpace(token))
{
return null;
}
var trimmed = token.Trim();
var digits = new string(trimmed.Where(char.IsDigit).ToArray());
if (digits.Length == 0)
{
return null;
}
vuIdentifier = trimmed.StartsWith("vu", StringComparison.OrdinalIgnoreCase)
? trimmed.Replace(" ", string.Empty, StringComparison.Ordinal)
: $"VU#{digits}";
return digits;
}
private static bool TryGetMetadata(DocumentRecord document, string key, out string value)
{
value = string.Empty;
if (document.Metadata is null)
{
return false;
}
if (!document.Metadata.TryGetValue(key, out var metadataValue))
{
return false;
}
value = metadataValue;
return true;
}
private async Task<CertCcCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return CertCcCursor.FromBson(record?.Cursor);
}
private async Task UpdateCursorAsync(CertCcCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
private sealed class NoteDocumentGroup
{
public NoteDocumentGroup(string noteId)
{
NoteId = noteId;
}
public string NoteId { get; }
public DocumentRecord? Note { get; private set; }
public DocumentRecord? Vendors { get; private set; }
public DocumentRecord? Vuls { get; private set; }
public DocumentRecord? VendorStatuses { get; private set; }
public void Add(string endpoint, DocumentRecord document)
{
switch (endpoint)
{
case "note":
Note = document;
break;
case "vendors":
Vendors = document;
break;
case "vuls":
Vuls = document;
break;
case "vendors-vuls":
VendorStatuses = document;
break;
default:
Note ??= document;
break;
}
}
}
private static bool ShouldTreatAsMissing(HttpStatusCode? statusCode, string endpoint)
{
if (statusCode is null)
{
return false;
}
if (statusCode is HttpStatusCode.NotFound or HttpStatusCode.Gone)
{
return !string.Equals(endpoint, "note", StringComparison.OrdinalIgnoreCase);
}
// Treat vendors/vendors-vuls/vuls 403 as optional air-gapped responses.
if (statusCode == HttpStatusCode.Forbidden && !string.Equals(endpoint, "note", StringComparison.OrdinalIgnoreCase))
{
return true;
}
return false;
}
private static HttpStatusCode? TryParseStatusCodeFromMessage(string? message)
{
if (string.IsNullOrWhiteSpace(message))
{
return null;
}
const string marker = "status ";
var index = message.IndexOf(marker, StringComparison.OrdinalIgnoreCase);
if (index < 0)
{
return null;
}
index += marker.Length;
var end = index;
while (end < message.Length && char.IsDigit(message[end]))
{
end++;
}
if (end == index)
{
return null;
}
if (int.TryParse(message[index..end], NumberStyles.Integer, CultureInfo.InvariantCulture, out var code) &&
Enum.IsDefined(typeof(HttpStatusCode), code))
{
return (HttpStatusCode)code;
}
return null;
}
}

View File

@@ -0,0 +1,21 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertCc;
public sealed class CertCcConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "cert-cc";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services)
=> services.GetService<CertCcConnector>() is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return services.GetRequiredService<CertCcConnector>();
}
}

View File

@@ -0,0 +1,50 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.CertCc.Configuration;
namespace StellaOps.Concelier.Connector.CertCc;
public sealed class CertCcDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:cert-cc";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddCertCcConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<CertCcFetchJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, CertCcJobKinds.Fetch, typeof(CertCcFetchJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,37 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertCc.Configuration;
using StellaOps.Concelier.Connector.CertCc.Internal;
using StellaOps.Concelier.Connector.Common.Http;
namespace StellaOps.Concelier.Connector.CertCc;
public static class CertCcServiceCollectionExtensions
{
public static IServiceCollection AddCertCcConnector(this IServiceCollection services, Action<CertCcOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<CertCcOptions>()
.Configure(configure)
.PostConfigure(static options => options.Validate());
services.AddSourceHttpClient(CertCcOptions.HttpClientName, static (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<CertCcOptions>>().Value;
clientOptions.BaseAddress = options.BaseApiUri;
clientOptions.UserAgent = "StellaOps.Concelier.CertCc/1.0";
clientOptions.Timeout = TimeSpan.FromSeconds(20);
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.BaseApiUri.Host);
});
services.TryAddSingleton<CertCcSummaryPlanner>();
services.TryAddSingleton<CertCcDiagnostics>();
services.AddTransient<CertCcConnector>();
return services;
}
}

View File

@@ -0,0 +1,79 @@
using System;
using StellaOps.Concelier.Connector.Common.Cursors;
namespace StellaOps.Concelier.Connector.CertCc.Configuration;
/// <summary>
/// Connector options governing CERT/CC fetch cadence and API endpoints.
/// </summary>
public sealed class CertCcOptions
{
public const string HttpClientName = "certcc";
/// <summary>
/// Root URI for the VINCE Vulnerability Notes API (must end with a slash).
/// </summary>
public Uri BaseApiUri { get; set; } = new("https://www.kb.cert.org/vuls/api/", UriKind.Absolute);
/// <summary>
/// Sliding window settings controlling which summary endpoints are requested.
/// </summary>
public TimeWindowCursorOptions SummaryWindow { get; set; } = new()
{
WindowSize = TimeSpan.FromDays(30),
Overlap = TimeSpan.FromDays(3),
InitialBackfill = TimeSpan.FromDays(365),
MinimumWindowSize = TimeSpan.FromDays(1),
};
/// <summary>
/// Maximum number of monthly summary endpoints to request in a single plan.
/// </summary>
public int MaxMonthlySummaries { get; set; } = 6;
/// <summary>
/// Maximum number of vulnerability notes (detail bundles) to process per fetch pass.
/// </summary>
public int MaxNotesPerFetch { get; set; } = 25;
/// <summary>
/// Optional delay inserted between successive detail requests to respect upstream throttling.
/// </summary>
public TimeSpan DetailRequestDelay { get; set; } = TimeSpan.FromMilliseconds(100);
/// <summary>
/// When disabled, parse/map stages skip detail mapping—useful for dry runs or migration staging.
/// </summary>
public bool EnableDetailMapping { get; set; } = true;
public void Validate()
{
if (BaseApiUri is null || !BaseApiUri.IsAbsoluteUri)
{
throw new InvalidOperationException("CertCcOptions.BaseApiUri must be an absolute URI.");
}
if (!BaseApiUri.AbsoluteUri.EndsWith("/", StringComparison.Ordinal))
{
throw new InvalidOperationException("CertCcOptions.BaseApiUri must end with a trailing slash.");
}
SummaryWindow ??= new TimeWindowCursorOptions();
SummaryWindow.EnsureValid();
if (MaxMonthlySummaries <= 0)
{
throw new InvalidOperationException("CertCcOptions.MaxMonthlySummaries must be positive.");
}
if (MaxNotesPerFetch <= 0)
{
throw new InvalidOperationException("CertCcOptions.MaxNotesPerFetch must be positive.");
}
if (DetailRequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException("CertCcOptions.DetailRequestDelay cannot be negative.");
}
}
}

View File

@@ -0,0 +1,59 @@
# FEEDCONN-CERTCC-02-009 VINCE Detail & Map Reintegration Plan
- **Author:** BE-Conn-CERTCC (current on-call)
- **Date:** 2025-10-11
- **Scope:** Restore VINCE detail parsing and canonical mapping in Concelier without destabilising downstream Merge/Export pipelines.
## 1. Current State Snapshot (2025-10-11)
- ✅ Fetch pipeline, VINCE summary planner, and detail queue are live; documents land with `DocumentStatuses.PendingParse`.
- ✅ DTO aggregate (`CertCcNoteDto`) plus mapper emit vendor-centric `normalizedVersions` (`scheme=certcc.vendor`) and provenance aligned with `src/Concelier/__Libraries/StellaOps.Concelier.Models/PROVENANCE_GUIDELINES.md`.
- ✅ Regression coverage exists for fetch/parse/map flows (`CertCcConnectorSnapshotTests`), but snapshot regeneration is gated on harness refresh (FEEDCONN-CERTCC-02-007) and QA handoff (FEEDCONN-CERTCC-02-008).
- ⚠️ Parse/map jobs are not scheduled; production still operates in fetch-only mode.
- ⚠️ Downstream Merge team is finalising normalized range ingestion per `src/FASTER_MODELING_AND_NORMALIZATION.md`; we must avoid publishing canonical records until they certify compatibility.
## 2. Required Dependencies & Coordinated Tasks
| Dependency | Owner(s) | Blocking Condition | Handshake |
|------------|----------|--------------------|-----------|
| FEEDCONN-CERTCC-02-004 (Canonical mapping & range primitives hardening) | BE-Conn-CERTCC + Models | Ensure mapper emits deterministic `normalizedVersions` array and provenance field masks | Daily sync with Models/Merge leads; share fixture diff before each enablement phase |
| FEEDCONN-CERTCC-02-007 (Connector test harness remediation) | BE-Conn-CERTCC, QA | Restore `AddSourceCommon` harness + canned VINCE fixtures so we can shadow-run parse/map | Required before Phase 1 |
| FEEDCONN-CERTCC-02-008 (Snapshot coverage handoff) | QA | Snapshot refresh process green to surface regressions | Required before Phase 2 |
| FEEDCONN-CERTCC-02-010 (Partial-detail graceful degradation) | BE-Conn-CERTCC | Resiliency for missing VINCE endpoints to avoid job wedging after reintegration | Should land before Phase 2 cutover |
## 3. Phased Rollout Plan
| Phase | Window (UTC) | Actions | Success Signals | Rollback |
|-------|--------------|---------|-----------------|----------|
| **0 Pre-flight validation** | 2025-10-11 → 2025-10-12 | • Finish FEEDCONN-CERTCC-02-007 harness fixes and regenerate fixtures.<br>• Run `dotnet test src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests` with `UPDATE_CERTCC_FIXTURES=0` to confirm deterministic baselines.<br>• Generate sample advisory batch (`dotnet test … --filter SnapshotSmoke`) and deliver JSON diff to Merge for schema verification (`normalizedVersions[].scheme == certcc.vendor`, provenance masks populated). | • Harness tests green locally and in CI.<br>• Merge sign-off that sample advisories conform to `FASTER_MODELING_AND_NORMALIZATION.md`. | N/A (no production enablement yet). |
| **1 Shadow parse/map in staging** | Target start 2025-10-13 | • Register `source:cert-cc:parse` and `source:cert-cc:map` jobs, but gate them behind new config flag `concelier:sources:cert-cc:enableDetailMapping` (default `false`).<br>• Deploy (restart required for options rebinding), enable flag, and point connector at staging Mongo with isolated collection (`advisories_certcc_shadow`).<br>• Run connector for ≥2 cycles; compare advisory counts vs. fetch-only baseline and validate `concelier.range.primitives` metrics include `scheme=certcc.vendor`. | • No uncaught exceptions in staging logs.<br>• Shadow advisories match expected vendor counts (±5%).<br>`certcc.summary.fetch.*` + new `certcc.map.duration.ms` metrics stable. | Disable flag; staging returns to fetch-only. No production impact. |
| **2 Controlled production enablement** | Target start 2025-10-14 | • Redeploy production with flag enabled, start with job concurrency `1`, and reduce `MaxNotesPerFetch` to 5 for first 24h.<br>• Observe metrics dashboards hourly (fetch/map latency, pending queues, Mongo write throughput).<br>• QA to replay latest snapshots and confirm no deterministic drift.<br>• Publish advisory sample (top 10 changed docs) to Merge Slack channel for validation. | • Pending parse/mapping queues drain within expected SLA (<30min).<br>• No increase in merge dedupe anomalies.<br>• Mongo writes stay within 10% of baseline. | Toggle flag off, re-run fetch-only. Clear `pendingMappings` via connector cursor reset if stuck. |
| **3 Full production & cleanup** | Target start 2025-10-15 | • Restore `MaxNotesPerFetch` to configured default (20).<br>• Remove temporary throttles and leave flag enabled by default.<br>• Update `README.md` rollout notes; close FEEDCONN-CERTCC-02-009.<br>• Kick off post-merge audit with Merge to ensure new advisories dedupe with other sources. | • Stable operations for ≥48h, no degradation alerts.<br>• Merge confirms conflict resolver behaviour unchanged. | If regression detected, revert to Phase2 state or disable jobs; retain plan for reuse. |
## 4. Monitoring & Validation Checklist
- Dashboards: `certcc.*` meters (plan, summary fetch, detail fetch) plus `concelier.range.primitives` with tag `scheme=certcc.vendor`.
- Logs: ensure Parse/Map jobs emit `correlationId` aligned with fetch events for traceability.
- Data QA: run `tools/dump_advisory` against two VINCE notes (one multi-vendor, one single-vendor) every phase to spot-check normalized versions ordering and provenance.
- Storage: verify Mongo TTL/size for `raw_documents` and `dtos`—detail payload volume increases by ~3× when mapping resumes.
## 5. Rollback / Contingency Playbook
1. Disable `concelier:sources:cert-cc:enableDetailMapping` flag (and optionally set `MaxNotesPerFetch=0` for a single cycle) to halt new detail ingestion.
2. Run connector once to update cursor; verify `pendingMappings` drains.
3. If advisories already persisted, coordinate with Merge to soft-delete affected `certcc/*` advisories by advisory key hash (no schema rollback required).
4. Re-run Phase1 shadow validation before retrying.
## 6. Communication Cadence
- Daily check-in with Models/Merge leads (09:30 EDT) to surface normalizedVersions/provenance diffs.
- Post-phase reports in `#concelier-certcc` Slack channel summarising metrics, advisory counts, and outstanding issues.
- Escalate blockers >12h via Runbook SEV-3 path and annotate `TASKS.md`.
## 7. Open Questions / Next Actions
- [ ] Confirm whether Merge requires additional provenance field masks before Phase2 (waiting on feedback from 2025-10-11 sample).
- [ ] Decide if CSAF endpoint ingestion (optional) should piggyback on Phase3 or stay deferred.
- [ ] Validate that FEEDCONN-CERTCC-02-010 coverage handles mixed 200/404 VINCE endpoints during partial outages.
Once Dependencies (Section2) are cleared and Phase3 completes, update `src/Concelier/StellaOps.Concelier.PluginBinaries/StellaOps.Concelier.Connector.CertCc/TASKS.md` and close FEEDCONN-CERTCC-02-009.

View File

@@ -0,0 +1,20 @@
# FEEDCONN-CERTCC-02-012 Schema Sync & Snapshot Regeneration
## Summary
- Re-ran `StellaOps.Concelier.Connector.CertCc.Tests` with `UPDATE_CERTCC_FIXTURES=1`; fixtures now capture SemVer-style normalized versions (`scheme=certcc.vendor`) and `provenance.decisionReason` values emitted by the mapper.
- Recorded HTTP request ordering is persisted in `certcc-requests.snapshot.json` to keep Merge aware of the deterministic fetch plan.
- Advisories snapshot (`certcc-advisories.snapshot.json`) reflects the dual-write storage changes (normalized versions + provenance) introduced by FEEDMODELS-SCHEMA-* and FEEDSTORAGE-DATA-*.
## Artifacts
- `src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests/Fixtures/certcc-advisories.snapshot.json`
- `src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests/Fixtures/certcc-documents.snapshot.json`
- `src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests/Fixtures/certcc-requests.snapshot.json`
- `src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests/Fixtures/certcc-state.snapshot.json`
## Validation steps
```bash
dotnet test src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests
UPDATE_CERTCC_FIXTURES=1 dotnet test src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests
```
The first command verifies deterministic behavior; the second regenerates fixtures if a future schema change occurs. Share the four snapshot files above with Merge for their backfill diff.

View File

@@ -0,0 +1,187 @@
using MongoDB.Bson;
using StellaOps.Concelier.Connector.Common.Cursors;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
internal sealed record CertCcCursor(
TimeWindowCursorState SummaryState,
IReadOnlyCollection<Guid> PendingSummaries,
IReadOnlyCollection<string> PendingNotes,
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings,
DateTimeOffset? LastRun)
{
private static readonly Guid[] EmptyGuidArray = Array.Empty<Guid>();
private static readonly string[] EmptyStringArray = Array.Empty<string>();
public static CertCcCursor Empty { get; } = new(
TimeWindowCursorState.Empty,
EmptyGuidArray,
EmptyStringArray,
EmptyGuidArray,
EmptyGuidArray,
null);
public BsonDocument ToBsonDocument()
{
var document = new BsonDocument();
var summary = new BsonDocument();
SummaryState.WriteTo(summary, "start", "end");
document["summary"] = summary;
document["pendingSummaries"] = new BsonArray(PendingSummaries.Select(static id => id.ToString()));
document["pendingNotes"] = new BsonArray(PendingNotes.Select(static note => note));
document["pendingDocuments"] = new BsonArray(PendingDocuments.Select(static id => id.ToString()));
document["pendingMappings"] = new BsonArray(PendingMappings.Select(static id => id.ToString()));
if (LastRun.HasValue)
{
document["lastRun"] = LastRun.Value.UtcDateTime;
}
return document;
}
public static CertCcCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
TimeWindowCursorState summaryState = TimeWindowCursorState.Empty;
if (document.TryGetValue("summary", out var summaryValue) && summaryValue is BsonDocument summaryDocument)
{
summaryState = TimeWindowCursorState.FromBsonDocument(summaryDocument, "start", "end");
}
var pendingSummaries = ReadGuidArray(document, "pendingSummaries");
var pendingNotes = ReadStringArray(document, "pendingNotes");
var pendingDocuments = ReadGuidArray(document, "pendingDocuments");
var pendingMappings = ReadGuidArray(document, "pendingMappings");
DateTimeOffset? lastRun = null;
if (document.TryGetValue("lastRun", out var lastRunValue))
{
lastRun = lastRunValue.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(lastRunValue.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(lastRunValue.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
}
return new CertCcCursor(summaryState, pendingSummaries, pendingNotes, pendingDocuments, pendingMappings, lastRun);
}
public CertCcCursor WithSummaryState(TimeWindowCursorState state)
=> this with { SummaryState = state ?? TimeWindowCursorState.Empty };
public CertCcCursor WithPendingSummaries(IEnumerable<Guid>? ids)
=> this with { PendingSummaries = NormalizeGuidSet(ids) };
public CertCcCursor WithPendingNotes(IEnumerable<string>? notes)
=> this with { PendingNotes = NormalizeStringSet(notes) };
public CertCcCursor WithPendingDocuments(IEnumerable<Guid>? ids)
=> this with { PendingDocuments = NormalizeGuidSet(ids) };
public CertCcCursor WithPendingMappings(IEnumerable<Guid>? ids)
=> this with { PendingMappings = NormalizeGuidSet(ids) };
public CertCcCursor WithLastRun(DateTimeOffset? timestamp)
=> this with { LastRun = timestamp };
private static Guid[] ReadGuidArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array || array.Count == 0)
{
return EmptyGuidArray;
}
var results = new List<Guid>(array.Count);
foreach (var element in array)
{
if (TryReadGuid(element, out var parsed))
{
results.Add(parsed);
}
}
return results.Count == 0 ? EmptyGuidArray : results.Distinct().ToArray();
}
private static string[] ReadStringArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array || array.Count == 0)
{
return EmptyStringArray;
}
var results = new List<string>(array.Count);
foreach (var element in array)
{
switch (element)
{
case BsonString bsonString when !string.IsNullOrWhiteSpace(bsonString.AsString):
results.Add(bsonString.AsString.Trim());
break;
case BsonDocument bsonDocument when bsonDocument.TryGetValue("value", out var inner) && inner.IsString:
results.Add(inner.AsString.Trim());
break;
}
}
return results.Count == 0
? EmptyStringArray
: results
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static bool TryReadGuid(BsonValue value, out Guid guid)
{
if (value is BsonString bsonString && Guid.TryParse(bsonString.AsString, out guid))
{
return true;
}
if (value is BsonBinaryData binary)
{
try
{
guid = binary.ToGuid();
return true;
}
catch (FormatException)
{
// ignore and fall back to byte array parsing
}
var bytes = binary.AsByteArray;
if (bytes.Length == 16)
{
guid = new Guid(bytes);
return true;
}
}
guid = default;
return false;
}
private static Guid[] NormalizeGuidSet(IEnumerable<Guid>? ids)
=> ids?.Where(static id => id != Guid.Empty).Distinct().ToArray() ?? EmptyGuidArray;
private static string[] NormalizeStringSet(IEnumerable<string>? values)
=> values is null
? EmptyStringArray
: values
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray();
}

View File

@@ -0,0 +1,214 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using StellaOps.Concelier.Connector.Common.Cursors;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
/// <summary>
/// Emits CERT/CC-specific telemetry for summary planning and fetch activity.
/// </summary>
public sealed class CertCcDiagnostics : IDisposable
{
private const string MeterName = "StellaOps.Concelier.Connector.CertCc";
private const string MeterVersion = "1.0.0";
private readonly Meter _meter;
private readonly Counter<long> _planWindows;
private readonly Counter<long> _planRequests;
private readonly Histogram<double> _planWindowDays;
private readonly Counter<long> _summaryFetchAttempts;
private readonly Counter<long> _summaryFetchSuccess;
private readonly Counter<long> _summaryFetchUnchanged;
private readonly Counter<long> _summaryFetchFailures;
private readonly Counter<long> _detailFetchAttempts;
private readonly Counter<long> _detailFetchSuccess;
private readonly Counter<long> _detailFetchUnchanged;
private readonly Counter<long> _detailFetchMissing;
private readonly Counter<long> _detailFetchFailures;
private readonly Counter<long> _parseSuccess;
private readonly Counter<long> _parseFailures;
private readonly Histogram<long> _parseVendorCount;
private readonly Histogram<long> _parseStatusCount;
private readonly Histogram<long> _parseVulnerabilityCount;
private readonly Counter<long> _mapSuccess;
private readonly Counter<long> _mapFailures;
private readonly Histogram<long> _mapAffectedPackageCount;
private readonly Histogram<long> _mapNormalizedVersionCount;
public CertCcDiagnostics()
{
_meter = new Meter(MeterName, MeterVersion);
_planWindows = _meter.CreateCounter<long>(
name: "certcc.plan.windows",
unit: "windows",
description: "Number of summary planning windows evaluated.");
_planRequests = _meter.CreateCounter<long>(
name: "certcc.plan.requests",
unit: "requests",
description: "Total CERT/CC summary endpoints queued by the planner.");
_planWindowDays = _meter.CreateHistogram<double>(
name: "certcc.plan.window_days",
unit: "day",
description: "Duration of each planning window in days.");
_summaryFetchAttempts = _meter.CreateCounter<long>(
name: "certcc.summary.fetch.attempts",
unit: "operations",
description: "Number of VINCE summary fetch attempts.");
_summaryFetchSuccess = _meter.CreateCounter<long>(
name: "certcc.summary.fetch.success",
unit: "operations",
description: "Number of VINCE summary fetches persisted to storage.");
_summaryFetchUnchanged = _meter.CreateCounter<long>(
name: "certcc.summary.fetch.not_modified",
unit: "operations",
description: "Number of VINCE summary fetches returning HTTP 304.");
_summaryFetchFailures = _meter.CreateCounter<long>(
name: "certcc.summary.fetch.failures",
unit: "operations",
description: "Number of VINCE summary fetches that failed after retries.");
_detailFetchAttempts = _meter.CreateCounter<long>(
name: "certcc.detail.fetch.attempts",
unit: "operations",
description: "Number of VINCE detail fetch attempts.");
_detailFetchSuccess = _meter.CreateCounter<long>(
name: "certcc.detail.fetch.success",
unit: "operations",
description: "Number of VINCE detail fetches that returned payloads.");
_detailFetchUnchanged = _meter.CreateCounter<long>(
name: "certcc.detail.fetch.unchanged",
unit: "operations",
description: "Number of VINCE detail fetches returning HTTP 304.");
_detailFetchMissing = _meter.CreateCounter<long>(
name: "certcc.detail.fetch.missing",
unit: "operations",
description: "Number of optional VINCE detail endpoints missing but tolerated.");
_detailFetchFailures = _meter.CreateCounter<long>(
name: "certcc.detail.fetch.failures",
unit: "operations",
description: "Number of VINCE detail fetches that failed after retries.");
_parseSuccess = _meter.CreateCounter<long>(
name: "certcc.parse.success",
unit: "documents",
description: "Number of VINCE note bundles parsed into DTOs.");
_parseFailures = _meter.CreateCounter<long>(
name: "certcc.parse.failures",
unit: "documents",
description: "Number of VINCE note bundles that failed to parse.");
_parseVendorCount = _meter.CreateHistogram<long>(
name: "certcc.parse.vendors.count",
unit: "vendors",
description: "Distribution of vendor statements per VINCE note.");
_parseStatusCount = _meter.CreateHistogram<long>(
name: "certcc.parse.statuses.count",
unit: "entries",
description: "Distribution of vendor status entries per VINCE note.");
_parseVulnerabilityCount = _meter.CreateHistogram<long>(
name: "certcc.parse.vulnerabilities.count",
unit: "entries",
description: "Distribution of vulnerability records per VINCE note.");
_mapSuccess = _meter.CreateCounter<long>(
name: "certcc.map.success",
unit: "advisories",
description: "Number of canonical advisories emitted by the CERT/CC mapper.");
_mapFailures = _meter.CreateCounter<long>(
name: "certcc.map.failures",
unit: "advisories",
description: "Number of CERT/CC advisory mapping attempts that failed.");
_mapAffectedPackageCount = _meter.CreateHistogram<long>(
name: "certcc.map.affected.count",
unit: "packages",
description: "Distribution of affected packages emitted per CERT/CC advisory.");
_mapNormalizedVersionCount = _meter.CreateHistogram<long>(
name: "certcc.map.normalized_versions.count",
unit: "rules",
description: "Distribution of normalized version rules emitted per CERT/CC advisory.");
}
public void PlanEvaluated(TimeWindow window, int requestCount)
{
_planWindows.Add(1);
if (requestCount > 0)
{
_planRequests.Add(requestCount);
}
var duration = window.Duration;
if (duration > TimeSpan.Zero)
{
_planWindowDays.Record(duration.TotalDays);
}
}
public void SummaryFetchAttempt(CertCcSummaryScope scope)
=> _summaryFetchAttempts.Add(1, ScopeTag(scope));
public void SummaryFetchSuccess(CertCcSummaryScope scope)
=> _summaryFetchSuccess.Add(1, ScopeTag(scope));
public void SummaryFetchUnchanged(CertCcSummaryScope scope)
=> _summaryFetchUnchanged.Add(1, ScopeTag(scope));
public void SummaryFetchFailure(CertCcSummaryScope scope)
=> _summaryFetchFailures.Add(1, ScopeTag(scope));
public void DetailFetchAttempt(string endpoint)
=> _detailFetchAttempts.Add(1, EndpointTag(endpoint));
public void DetailFetchSuccess(string endpoint)
=> _detailFetchSuccess.Add(1, EndpointTag(endpoint));
public void DetailFetchUnchanged(string endpoint)
=> _detailFetchUnchanged.Add(1, EndpointTag(endpoint));
public void DetailFetchMissing(string endpoint)
=> _detailFetchMissing.Add(1, EndpointTag(endpoint));
public void DetailFetchFailure(string endpoint)
=> _detailFetchFailures.Add(1, EndpointTag(endpoint));
public void ParseSuccess(int vendorCount, int statusCount, int vulnerabilityCount)
{
_parseSuccess.Add(1);
if (vendorCount >= 0)
{
_parseVendorCount.Record(vendorCount);
}
if (statusCount >= 0)
{
_parseStatusCount.Record(statusCount);
}
if (vulnerabilityCount >= 0)
{
_parseVulnerabilityCount.Record(vulnerabilityCount);
}
}
public void ParseFailure()
=> _parseFailures.Add(1);
public void MapSuccess(int affectedPackageCount, int normalizedVersionCount)
{
_mapSuccess.Add(1);
if (affectedPackageCount >= 0)
{
_mapAffectedPackageCount.Record(affectedPackageCount);
}
if (normalizedVersionCount >= 0)
{
_mapNormalizedVersionCount.Record(normalizedVersionCount);
}
}
public void MapFailure()
=> _mapFailures.Add(1);
private static KeyValuePair<string, object?> ScopeTag(CertCcSummaryScope scope)
=> new("scope", scope.ToString().ToLowerInvariant());
private static KeyValuePair<string, object?> EndpointTag(string endpoint)
=> new("endpoint", string.IsNullOrWhiteSpace(endpoint) ? "note" : endpoint.ToLowerInvariant());
public void Dispose() => _meter.Dispose();
}

View File

@@ -0,0 +1,607 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
internal static class CertCcMapper
{
private const string AdvisoryPrefix = "certcc";
private const string VendorNormalizedVersionScheme = "certcc.vendor";
public static Advisory Map(
CertCcNoteDto dto,
DocumentRecord document,
DtoRecord dtoRecord,
string sourceName)
{
ArgumentNullException.ThrowIfNull(dto);
ArgumentNullException.ThrowIfNull(document);
ArgumentNullException.ThrowIfNull(dtoRecord);
ArgumentException.ThrowIfNullOrEmpty(sourceName);
var recordedAt = dtoRecord.ValidatedAt.ToUniversalTime();
var fetchedAt = document.FetchedAt.ToUniversalTime();
var metadata = dto.Metadata ?? CertCcNoteMetadata.Empty;
var advisoryKey = BuildAdvisoryKey(metadata);
var title = string.IsNullOrWhiteSpace(metadata.Title) ? advisoryKey : metadata.Title.Trim();
var summary = ExtractSummary(metadata);
var aliases = BuildAliases(dto).ToArray();
var references = BuildReferences(dto, metadata, sourceName, recordedAt).ToArray();
var affectedPackages = BuildAffectedPackages(dto, metadata, sourceName, recordedAt).ToArray();
var provenance = new[]
{
new AdvisoryProvenance(sourceName, "document", document.Uri, fetchedAt),
new AdvisoryProvenance(sourceName, "map", metadata.VuId ?? metadata.IdNumber ?? advisoryKey, recordedAt),
};
return new Advisory(
advisoryKey,
title,
summary,
language: "en",
metadata.Published?.ToUniversalTime(),
metadata.Updated?.ToUniversalTime(),
severity: null,
exploitKnown: false,
aliases,
references,
affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance);
}
private static string BuildAdvisoryKey(CertCcNoteMetadata metadata)
{
if (metadata is null)
{
return $"{AdvisoryPrefix}/{Guid.NewGuid():N}";
}
var vuKey = NormalizeVuId(metadata.VuId);
if (vuKey.Length > 0)
{
return $"{AdvisoryPrefix}/{vuKey}";
}
var id = SanitizeToken(metadata.IdNumber);
if (id.Length > 0)
{
return $"{AdvisoryPrefix}/vu-{id}";
}
return $"{AdvisoryPrefix}/{Guid.NewGuid():N}";
}
private static string NormalizeVuId(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
var digits = new string(value.Where(char.IsDigit).ToArray());
if (digits.Length > 0)
{
return $"vu-{digits}";
}
var sanitized = value.Trim().ToLowerInvariant();
sanitized = sanitized.Replace("vu#", "vu-", StringComparison.OrdinalIgnoreCase);
sanitized = sanitized.Replace('#', '-');
sanitized = sanitized.Replace(' ', '-');
return SanitizeToken(sanitized);
}
private static string SanitizeToken(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
var trimmed = value.Trim();
var filtered = new string(trimmed
.Select(ch => char.IsLetterOrDigit(ch) || ch is '-' or '_' ? ch : '-')
.ToArray());
return filtered.Trim('-').ToLowerInvariant();
}
private static readonly Regex HtmlTagRegex = new("<[^>]+>", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex WhitespaceRegex = new("[ \t\f\r]+", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex ParagraphRegex = new("<\\s*/?\\s*p[^>]*>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static string? ExtractSummary(CertCcNoteMetadata metadata)
{
if (metadata is null)
{
return null;
}
var summary = string.IsNullOrWhiteSpace(metadata.Summary) ? metadata.Overview : metadata.Summary;
if (string.IsNullOrWhiteSpace(summary))
{
return null;
}
return HtmlToPlainText(summary);
}
private static string HtmlToPlainText(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return string.Empty;
}
var normalized = html
.Replace("<br>", "\n", StringComparison.OrdinalIgnoreCase)
.Replace("<br/>", "\n", StringComparison.OrdinalIgnoreCase)
.Replace("<br />", "\n", StringComparison.OrdinalIgnoreCase)
.Replace("<li>", "\n", StringComparison.OrdinalIgnoreCase)
.Replace("</li>", "\n", StringComparison.OrdinalIgnoreCase);
normalized = ParagraphRegex.Replace(normalized, "\n");
var withoutTags = HtmlTagRegex.Replace(normalized, " ");
var decoded = WebUtility.HtmlDecode(withoutTags) ?? string.Empty;
var collapsedSpaces = WhitespaceRegex.Replace(decoded, " ");
var collapsedNewlines = Regex.Replace(collapsedSpaces, "\n{2,}", "\n", RegexOptions.Compiled);
return collapsedNewlines.Trim();
}
private static IEnumerable<string> BuildAliases(CertCcNoteDto dto)
{
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var metadata = dto.Metadata ?? CertCcNoteMetadata.Empty;
if (!string.IsNullOrWhiteSpace(metadata.VuId))
{
aliases.Add(metadata.VuId.Trim());
}
if (!string.IsNullOrWhiteSpace(metadata.IdNumber))
{
aliases.Add($"VU#{metadata.IdNumber.Trim()}");
}
foreach (var cve in metadata.CveIds ?? Array.Empty<string>())
{
if (string.IsNullOrWhiteSpace(cve))
{
continue;
}
aliases.Add(cve.Trim());
}
foreach (var vulnerability in dto.Vulnerabilities ?? Array.Empty<CertCcVulnerabilityDto>())
{
if (string.IsNullOrWhiteSpace(vulnerability.CveId))
{
continue;
}
aliases.Add(vulnerability.CveId.Trim());
}
return aliases.OrderBy(static alias => alias, StringComparer.OrdinalIgnoreCase);
}
private static IEnumerable<AdvisoryReference> BuildReferences(
CertCcNoteDto dto,
CertCcNoteMetadata metadata,
string sourceName,
DateTimeOffset recordedAt)
{
var references = new List<AdvisoryReference>();
var canonicalUri = !string.IsNullOrWhiteSpace(metadata.PrimaryUrl)
? metadata.PrimaryUrl!
: (string.IsNullOrWhiteSpace(metadata.IdNumber)
? "https://www.kb.cert.org/vuls/"
: $"https://www.kb.cert.org/vuls/id/{metadata.IdNumber.Trim()}/");
var provenance = new AdvisoryProvenance(sourceName, "reference", canonicalUri, recordedAt);
TryAddReference(references, canonicalUri, "advisory", "certcc.note", null, provenance);
foreach (var url in metadata.PublicUrls ?? Array.Empty<string>())
{
TryAddReference(references, url, "reference", "certcc.public", null, provenance);
}
foreach (var vendor in dto.Vendors ?? Array.Empty<CertCcVendorDto>())
{
foreach (var url in vendor.References ?? Array.Empty<string>())
{
TryAddReference(references, url, "reference", "certcc.vendor", vendor.Vendor, provenance);
}
var statementText = vendor.Statement ?? string.Empty;
var patches = CertCcVendorStatementParser.Parse(statementText);
foreach (var patch in patches)
{
if (!string.IsNullOrWhiteSpace(patch.RawLine) && TryFindEmbeddedUrl(patch.RawLine!, out var rawUrl))
{
TryAddReference(references, rawUrl, "reference", "certcc.vendor.statement", vendor.Vendor, provenance);
}
}
}
foreach (var status in dto.VendorStatuses ?? Array.Empty<CertCcVendorStatusDto>())
{
foreach (var url in status.References ?? Array.Empty<string>())
{
TryAddReference(references, url, "reference", "certcc.vendor.status", status.Vendor, provenance);
}
if (!string.IsNullOrWhiteSpace(status.Statement) && TryFindEmbeddedUrl(status.Statement!, out var embedded))
{
TryAddReference(references, embedded, "reference", "certcc.vendor.status", status.Vendor, provenance);
}
}
return references
.GroupBy(static reference => reference.Url, StringComparer.OrdinalIgnoreCase)
.Select(static group => group
.OrderBy(static reference => reference.Kind ?? string.Empty, StringComparer.Ordinal)
.ThenBy(static reference => reference.SourceTag ?? string.Empty, StringComparer.Ordinal)
.ThenBy(static reference => reference.Url, StringComparer.OrdinalIgnoreCase)
.First())
.OrderBy(static reference => reference.Kind ?? string.Empty, StringComparer.Ordinal)
.ThenBy(static reference => reference.Url, StringComparer.OrdinalIgnoreCase);
}
private static void TryAddReference(
ICollection<AdvisoryReference> references,
string? url,
string kind,
string? sourceTag,
string? summary,
AdvisoryProvenance provenance)
{
if (string.IsNullOrWhiteSpace(url))
{
return;
}
var candidate = url.Trim();
if (!Uri.TryCreate(candidate, UriKind.Absolute, out var parsed))
{
return;
}
if (parsed.Scheme != Uri.UriSchemeHttp && parsed.Scheme != Uri.UriSchemeHttps)
{
return;
}
var normalized = parsed.ToString();
try
{
references.Add(new AdvisoryReference(normalized, kind, sourceTag, summary, provenance));
}
catch (ArgumentException)
{
// ignore invalid references
}
}
private static bool TryFindEmbeddedUrl(string text, out string? url)
{
url = null;
if (string.IsNullOrWhiteSpace(text))
{
return false;
}
var tokens = text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var token in tokens)
{
var trimmed = token.Trim().TrimEnd('.', ',', ')', ';', ']', '}');
if (trimmed.Length == 0)
{
continue;
}
if (!Uri.TryCreate(trimmed, UriKind.Absolute, out var parsed))
{
continue;
}
if (parsed.Scheme != Uri.UriSchemeHttp && parsed.Scheme != Uri.UriSchemeHttps)
{
continue;
}
url = parsed.ToString();
return true;
}
return false;
}
private static IEnumerable<AffectedPackage> BuildAffectedPackages(
CertCcNoteDto dto,
CertCcNoteMetadata metadata,
string sourceName,
DateTimeOffset recordedAt)
{
var vendors = dto.Vendors ?? Array.Empty<CertCcVendorDto>();
var statuses = dto.VendorStatuses ?? Array.Empty<CertCcVendorStatusDto>();
if (vendors.Count == 0 && statuses.Count == 0)
{
return Array.Empty<AffectedPackage>();
}
var statusLookup = statuses
.GroupBy(static status => NormalizeVendorKey(status.Vendor))
.ToDictionary(static group => group.Key, static group => group.ToArray(), StringComparer.OrdinalIgnoreCase);
var packages = new List<AffectedPackage>();
foreach (var vendor in vendors.OrderBy(static v => v.Vendor, StringComparer.OrdinalIgnoreCase))
{
var key = NormalizeVendorKey(vendor.Vendor);
var vendorStatuses = statusLookup.TryGetValue(key, out var value)
? value
: Array.Empty<CertCcVendorStatusDto>();
if (BuildVendorPackage(vendor, vendorStatuses, sourceName, recordedAt) is { } package)
{
packages.Add(package);
}
statusLookup.Remove(key);
}
foreach (var remaining in statusLookup.Values)
{
if (remaining.Length == 0)
{
continue;
}
var vendorName = remaining[0].Vendor;
var fallbackVendor = new CertCcVendorDto(
vendorName,
ContactDate: null,
StatementDate: null,
Updated: remaining
.Select(static status => status.DateUpdated)
.Where(static update => update.HasValue)
.OrderByDescending(static update => update)
.FirstOrDefault(),
Statement: remaining
.Select(static status => status.Statement)
.FirstOrDefault(static statement => !string.IsNullOrWhiteSpace(statement)),
Addendum: null,
References: remaining
.SelectMany(static status => status.References ?? Array.Empty<string>())
.ToArray());
if (BuildVendorPackage(fallbackVendor, remaining, sourceName, recordedAt) is { } package)
{
packages.Add(package);
}
}
return packages
.OrderBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static AffectedPackage? BuildVendorPackage(
CertCcVendorDto vendor,
IReadOnlyList<CertCcVendorStatusDto> statuses,
string sourceName,
DateTimeOffset recordedAt)
{
var vendorName = string.IsNullOrWhiteSpace(vendor.Vendor)
? (statuses.FirstOrDefault()?.Vendor?.Trim() ?? string.Empty)
: vendor.Vendor.Trim();
if (vendorName.Length == 0)
{
return null;
}
var packageProvenance = new AdvisoryProvenance(sourceName, "vendor", vendorName, recordedAt);
var rangeProvenance = new AdvisoryProvenance(sourceName, "vendor-range", vendorName, recordedAt);
var patches = CertCcVendorStatementParser.Parse(vendor.Statement ?? string.Empty);
var normalizedVersions = BuildNormalizedVersions(vendorName, patches);
var vendorStatuses = BuildStatuses(vendorName, statuses, sourceName, recordedAt);
var primitives = BuildRangePrimitives(vendor, vendorStatuses, patches);
var range = new AffectedVersionRange(
rangeKind: "vendor",
introducedVersion: null,
fixedVersion: null,
lastAffectedVersion: null,
rangeExpression: null,
provenance: rangeProvenance,
primitives: primitives);
return new AffectedPackage(
AffectedPackageTypes.Vendor,
vendorName,
platform: null,
versionRanges: new[] { range },
normalizedVersions: normalizedVersions,
statuses: vendorStatuses,
provenance: new[] { packageProvenance });
}
private static IReadOnlyList<NormalizedVersionRule> BuildNormalizedVersions(
string vendorName,
IReadOnlyList<CertCcVendorPatch> patches)
{
if (patches.Count == 0)
{
return Array.Empty<NormalizedVersionRule>();
}
var rules = new List<NormalizedVersionRule>();
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var patch in patches)
{
if (string.IsNullOrWhiteSpace(patch.Version))
{
continue;
}
var version = patch.Version.Trim();
if (!seen.Add($"{patch.Product}|{version}"))
{
continue;
}
var notes = string.IsNullOrWhiteSpace(patch.Product)
? vendorName
: $"{vendorName}::{patch.Product.Trim()}";
rules.Add(new NormalizedVersionRule(
VendorNormalizedVersionScheme,
NormalizedVersionRuleTypes.Exact,
value: version,
notes: notes));
}
return rules.Count == 0 ? Array.Empty<NormalizedVersionRule>() : rules;
}
private static IReadOnlyList<AffectedPackageStatus> BuildStatuses(
string vendorName,
IReadOnlyList<CertCcVendorStatusDto> statuses,
string sourceName,
DateTimeOffset recordedAt)
{
if (statuses.Count == 0)
{
return Array.Empty<AffectedPackageStatus>();
}
var result = new List<AffectedPackageStatus>();
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var status in statuses)
{
if (!AffectedPackageStatusCatalog.TryNormalize(status.Status, out var normalized))
{
continue;
}
var cve = status.CveId?.Trim() ?? string.Empty;
var key = string.IsNullOrWhiteSpace(cve)
? normalized
: $"{normalized}|{cve}";
if (!seen.Add(key))
{
continue;
}
var provenance = new AdvisoryProvenance(
sourceName,
"vendor-status",
string.IsNullOrWhiteSpace(cve) ? vendorName : $"{vendorName}:{cve}",
recordedAt);
result.Add(new AffectedPackageStatus(normalized, provenance));
}
return result
.OrderBy(static status => status.Status, StringComparer.Ordinal)
.ThenBy(static status => status.Provenance.Value ?? string.Empty, StringComparer.Ordinal)
.ToArray();
}
private static RangePrimitives? BuildRangePrimitives(
CertCcVendorDto vendor,
IReadOnlyList<AffectedPackageStatus> statuses,
IReadOnlyList<CertCcVendorPatch> patches)
{
var extensions = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
AddVendorExtension(extensions, "certcc.vendor.name", vendor.Vendor);
AddVendorExtension(extensions, "certcc.vendor.statement.raw", HtmlToPlainText(vendor.Statement ?? string.Empty), 2048);
AddVendorExtension(extensions, "certcc.vendor.addendum", HtmlToPlainText(vendor.Addendum ?? string.Empty), 1024);
AddVendorExtension(extensions, "certcc.vendor.contactDate", FormatDate(vendor.ContactDate));
AddVendorExtension(extensions, "certcc.vendor.statementDate", FormatDate(vendor.StatementDate));
AddVendorExtension(extensions, "certcc.vendor.updated", FormatDate(vendor.Updated));
if (vendor.References is { Count: > 0 })
{
AddVendorExtension(extensions, "certcc.vendor.references", string.Join(" ", vendor.References));
}
if (statuses.Count > 0)
{
var serialized = string.Join(";", statuses
.Select(static status => status.Provenance.Value is { Length: > 0 }
? $"{status.Provenance.Value.Split(':').Last()}={status.Status}"
: status.Status));
AddVendorExtension(extensions, "certcc.vendor.statuses", serialized);
}
if (patches.Count > 0)
{
var serialized = string.Join(";", patches.Select(static patch =>
{
var product = string.IsNullOrWhiteSpace(patch.Product) ? "unknown" : patch.Product.Trim();
return $"{product}={patch.Version.Trim()}";
}));
AddVendorExtension(extensions, "certcc.vendor.patches", serialized, 2048);
}
return extensions.Count == 0
? null
: new RangePrimitives(null, null, null, extensions);
}
private static void AddVendorExtension(IDictionary<string, string> extensions, string key, string? value, int maxLength = 512)
{
if (string.IsNullOrWhiteSpace(value))
{
return;
}
var trimmed = value.Trim();
if (trimmed.Length > maxLength)
{
trimmed = trimmed[..maxLength].Trim();
}
if (trimmed.Length == 0)
{
return;
}
extensions[key] = trimmed;
}
private static string? FormatDate(DateTimeOffset? value)
=> value?.ToUniversalTime().ToString("O", CultureInfo.InvariantCulture);
private static string NormalizeVendorKey(string? value)
=> string.IsNullOrWhiteSpace(value) ? string.Empty : value.Trim().ToLowerInvariant();
}

View File

@@ -0,0 +1,97 @@
using System;
using System.Collections.Generic;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
internal sealed record CertCcNoteDto(
CertCcNoteMetadata Metadata,
IReadOnlyList<CertCcVendorDto> Vendors,
IReadOnlyList<CertCcVendorStatusDto> VendorStatuses,
IReadOnlyList<CertCcVulnerabilityDto> Vulnerabilities)
{
public static CertCcNoteDto Empty { get; } = new(
CertCcNoteMetadata.Empty,
Array.Empty<CertCcVendorDto>(),
Array.Empty<CertCcVendorStatusDto>(),
Array.Empty<CertCcVulnerabilityDto>());
}
internal sealed record CertCcNoteMetadata(
string? VuId,
string IdNumber,
string Title,
string? Overview,
string? Summary,
DateTimeOffset? Published,
DateTimeOffset? Updated,
DateTimeOffset? Created,
int? Revision,
IReadOnlyList<string> CveIds,
IReadOnlyList<string> PublicUrls,
string? PrimaryUrl)
{
public static CertCcNoteMetadata Empty { get; } = new(
VuId: null,
IdNumber: string.Empty,
Title: string.Empty,
Overview: null,
Summary: null,
Published: null,
Updated: null,
Created: null,
Revision: null,
CveIds: Array.Empty<string>(),
PublicUrls: Array.Empty<string>(),
PrimaryUrl: null);
}
internal sealed record CertCcVendorDto(
string Vendor,
DateTimeOffset? ContactDate,
DateTimeOffset? StatementDate,
DateTimeOffset? Updated,
string? Statement,
string? Addendum,
IReadOnlyList<string> References)
{
public static CertCcVendorDto Empty { get; } = new(
Vendor: string.Empty,
ContactDate: null,
StatementDate: null,
Updated: null,
Statement: null,
Addendum: null,
References: Array.Empty<string>());
}
internal sealed record CertCcVendorStatusDto(
string Vendor,
string CveId,
string Status,
string? Statement,
IReadOnlyList<string> References,
DateTimeOffset? DateAdded,
DateTimeOffset? DateUpdated)
{
public static CertCcVendorStatusDto Empty { get; } = new(
Vendor: string.Empty,
CveId: string.Empty,
Status: string.Empty,
Statement: null,
References: Array.Empty<string>(),
DateAdded: null,
DateUpdated: null);
}
internal sealed record CertCcVulnerabilityDto(
string CveId,
string? Description,
DateTimeOffset? DateAdded,
DateTimeOffset? DateUpdated)
{
public static CertCcVulnerabilityDto Empty { get; } = new(
CveId: string.Empty,
Description: null,
DateAdded: null,
DateUpdated: null);
}

View File

@@ -0,0 +1,539 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using Markdig;
using StellaOps.Concelier.Connector.Common.Html;
using StellaOps.Concelier.Connector.Common.Url;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
internal static class CertCcNoteParser
{
private static readonly MarkdownPipeline MarkdownPipeline = new MarkdownPipelineBuilder()
.UseAdvancedExtensions()
.UseSoftlineBreakAsHardlineBreak()
.DisableHtml()
.Build();
private static readonly HtmlContentSanitizer HtmlSanitizer = new();
private static readonly Regex HtmlTagRegex = new("<[^>]+>", RegexOptions.Compiled | RegexOptions.CultureInvariant);
public static CertCcNoteDto Parse(
ReadOnlySpan<byte> noteJson,
ReadOnlySpan<byte> vendorsJson,
ReadOnlySpan<byte> vulnerabilitiesJson,
ReadOnlySpan<byte> vendorStatusesJson)
{
using var noteDocument = JsonDocument.Parse(noteJson.ToArray());
var (metadata, detailUri) = ParseNoteMetadata(noteDocument.RootElement);
using var vendorsDocument = JsonDocument.Parse(vendorsJson.ToArray());
var vendors = ParseVendors(vendorsDocument.RootElement, detailUri);
using var vulnerabilitiesDocument = JsonDocument.Parse(vulnerabilitiesJson.ToArray());
var vulnerabilities = ParseVulnerabilities(vulnerabilitiesDocument.RootElement);
using var statusesDocument = JsonDocument.Parse(vendorStatusesJson.ToArray());
var statuses = ParseVendorStatuses(statusesDocument.RootElement);
return new CertCcNoteDto(metadata, vendors, statuses, vulnerabilities);
}
public static CertCcNoteDto ParseNote(ReadOnlySpan<byte> noteJson)
{
using var noteDocument = JsonDocument.Parse(noteJson.ToArray());
var (metadata, _) = ParseNoteMetadata(noteDocument.RootElement);
return new CertCcNoteDto(metadata, Array.Empty<CertCcVendorDto>(), Array.Empty<CertCcVendorStatusDto>(), Array.Empty<CertCcVulnerabilityDto>());
}
private static (CertCcNoteMetadata Metadata, Uri DetailUri) ParseNoteMetadata(JsonElement root)
{
if (root.ValueKind != JsonValueKind.Object)
{
throw new JsonException("CERT/CC note payload must be a JSON object.");
}
var vuId = GetString(root, "vuid");
var idNumber = GetString(root, "idnumber") ?? throw new JsonException("CERT/CC note missing idnumber.");
var title = GetString(root, "name") ?? throw new JsonException("CERT/CC note missing name.");
var detailUri = BuildDetailUri(idNumber);
var overview = NormalizeMarkdownToPlainText(root, "overview", detailUri);
var summary = NormalizeMarkdownToPlainText(root, "clean_desc", detailUri);
if (string.IsNullOrWhiteSpace(summary))
{
summary = NormalizeMarkdownToPlainText(root, "impact", detailUri);
}
var published = ParseDate(root, "publicdate") ?? ParseDate(root, "datefirstpublished");
var updated = ParseDate(root, "dateupdated");
var created = ParseDate(root, "datecreated");
var revision = ParseInt(root, "revision");
var cveIds = ExtractCveIds(root, "cveids");
var references = ExtractReferenceList(root, "public", detailUri);
var metadata = new CertCcNoteMetadata(
VuId: string.IsNullOrWhiteSpace(vuId) ? null : vuId.Trim(),
IdNumber: idNumber.Trim(),
Title: title.Trim(),
Overview: overview,
Summary: summary,
Published: published?.ToUniversalTime(),
Updated: updated?.ToUniversalTime(),
Created: created?.ToUniversalTime(),
Revision: revision,
CveIds: cveIds,
PublicUrls: references,
PrimaryUrl: detailUri.ToString());
return (metadata, detailUri);
}
private static IReadOnlyList<CertCcVendorDto> ParseVendors(JsonElement root, Uri baseUri)
{
if (root.ValueKind != JsonValueKind.Array || root.GetArrayLength() == 0)
{
return Array.Empty<CertCcVendorDto>();
}
var parsed = new List<CertCcVendorDto>(root.GetArrayLength());
foreach (var element in root.EnumerateArray())
{
if (element.ValueKind != JsonValueKind.Object)
{
continue;
}
var vendor = GetString(element, "vendor");
if (string.IsNullOrWhiteSpace(vendor))
{
continue;
}
var statement = NormalizeFreeformText(GetString(element, "statement"));
var addendum = NormalizeFreeformText(GetString(element, "addendum"));
var references = ExtractReferenceStringList(GetString(element, "references"), baseUri);
parsed.Add(new CertCcVendorDto(
vendor.Trim(),
ContactDate: ParseDate(element, "contact_date"),
StatementDate: ParseDate(element, "statement_date"),
Updated: ParseDate(element, "dateupdated"),
Statement: statement,
Addendum: addendum,
References: references));
}
if (parsed.Count == 0)
{
return Array.Empty<CertCcVendorDto>();
}
return parsed
.OrderBy(static vendor => vendor.Vendor, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static IReadOnlyList<CertCcVulnerabilityDto> ParseVulnerabilities(JsonElement root)
{
if (root.ValueKind != JsonValueKind.Array || root.GetArrayLength() == 0)
{
return Array.Empty<CertCcVulnerabilityDto>();
}
var parsed = new List<CertCcVulnerabilityDto>(root.GetArrayLength());
foreach (var element in root.EnumerateArray())
{
if (element.ValueKind != JsonValueKind.Object)
{
continue;
}
var cve = GetString(element, "cve");
if (string.IsNullOrWhiteSpace(cve))
{
continue;
}
parsed.Add(new CertCcVulnerabilityDto(
NormalizeCve(cve),
Description: NormalizeFreeformText(GetString(element, "description")),
DateAdded: ParseDate(element, "date_added"),
DateUpdated: ParseDate(element, "dateupdated")));
}
if (parsed.Count == 0)
{
return Array.Empty<CertCcVulnerabilityDto>();
}
return parsed
.OrderBy(static vuln => vuln.CveId, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static IReadOnlyList<CertCcVendorStatusDto> ParseVendorStatuses(JsonElement root)
{
if (root.ValueKind != JsonValueKind.Array || root.GetArrayLength() == 0)
{
return Array.Empty<CertCcVendorStatusDto>();
}
var parsed = new List<CertCcVendorStatusDto>(root.GetArrayLength());
foreach (var element in root.EnumerateArray())
{
if (element.ValueKind != JsonValueKind.Object)
{
continue;
}
var vendor = GetString(element, "vendor");
var cve = GetString(element, "vul");
var status = GetString(element, "status");
if (string.IsNullOrWhiteSpace(vendor) || string.IsNullOrWhiteSpace(cve) || string.IsNullOrWhiteSpace(status))
{
continue;
}
var references = ExtractReferenceStringList(GetString(element, "references"), baseUri: null);
parsed.Add(new CertCcVendorStatusDto(
vendor.Trim(),
NormalizeCve(cve),
status.Trim(),
NormalizeFreeformText(GetString(element, "statement")),
references,
DateAdded: ParseDate(element, "date_added"),
DateUpdated: ParseDate(element, "dateupdated")));
}
if (parsed.Count == 0)
{
return Array.Empty<CertCcVendorStatusDto>();
}
return parsed
.OrderBy(static entry => entry.CveId, StringComparer.OrdinalIgnoreCase)
.ThenBy(static entry => entry.Vendor, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static string? NormalizeMarkdownToPlainText(JsonElement element, string propertyName, Uri baseUri)
=> NormalizeMarkdownToPlainText(GetString(element, propertyName), baseUri);
private static string? NormalizeMarkdownToPlainText(string? markdown, Uri baseUri)
{
if (string.IsNullOrWhiteSpace(markdown))
{
return null;
}
var normalized = NormalizeLineEndings(markdown.Trim());
if (normalized.Length == 0)
{
return null;
}
var html = Markdig.Markdown.ToHtml(normalized, MarkdownPipeline);
if (string.IsNullOrWhiteSpace(html))
{
return null;
}
var sanitized = HtmlSanitizer.Sanitize(html, baseUri);
if (string.IsNullOrWhiteSpace(sanitized))
{
return null;
}
var plain = ConvertHtmlToPlainText(sanitized);
return string.IsNullOrWhiteSpace(plain) ? null : plain;
}
private static string? NormalizeFreeformText(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var normalized = NormalizeLineEndings(value).Trim();
if (normalized.Length == 0)
{
return null;
}
var lines = normalized
.Split('\n')
.Select(static line => line.TrimEnd())
.ToArray();
return string.Join('\n', lines).Trim();
}
private static string ConvertHtmlToPlainText(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return string.Empty;
}
var decoded = WebUtility.HtmlDecode(html);
decoded = decoded
.Replace("<br />", "\n", StringComparison.OrdinalIgnoreCase)
.Replace("<br/>", "\n", StringComparison.OrdinalIgnoreCase)
.Replace("<br>", "\n", StringComparison.OrdinalIgnoreCase);
decoded = Regex.Replace(decoded, "</p>", "\n\n", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
decoded = Regex.Replace(decoded, "</div>", "\n", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
decoded = Regex.Replace(decoded, "<li>", "- ", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
decoded = Regex.Replace(decoded, "</li>", "\n", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
decoded = Regex.Replace(decoded, "</tr>", "\n", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
decoded = Regex.Replace(decoded, "</td>", " \t", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
decoded = HtmlTagRegex.Replace(decoded, string.Empty);
decoded = NormalizeLineEndings(decoded);
var lines = decoded
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
.Select(static line => line.Trim())
.ToArray();
return string.Join('\n', lines).Trim();
}
private static IReadOnlyList<string> ExtractReferenceList(JsonElement element, string propertyName, Uri baseUri)
{
if (!element.TryGetProperty(propertyName, out var raw) || raw.ValueKind != JsonValueKind.Array || raw.GetArrayLength() == 0)
{
return Array.Empty<string>();
}
var references = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var candidate in raw.EnumerateArray())
{
if (candidate.ValueKind != JsonValueKind.String)
{
continue;
}
var text = candidate.GetString();
if (UrlNormalizer.TryNormalize(text, baseUri, out var normalized, stripFragment: true, forceHttps: false) && normalized is not null)
{
references.Add(normalized.ToString());
}
}
if (references.Count == 0)
{
return Array.Empty<string>();
}
return references
.OrderBy(static url => url, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static IReadOnlyList<string> ExtractReferenceStringList(string? value, Uri? baseUri)
{
if (string.IsNullOrWhiteSpace(value))
{
return Array.Empty<string>();
}
var buffer = ArrayPool<string>.Shared.Rent(16);
try
{
var count = 0;
var span = value.AsSpan();
var start = 0;
for (var index = 0; index < span.Length; index++)
{
var ch = span[index];
if (ch == '\r' || ch == '\n')
{
if (index > start)
{
AppendSegment(span, start, index - start, baseUri, buffer, ref count);
}
if (ch == '\r' && index + 1 < span.Length && span[index + 1] == '\n')
{
index++;
}
start = index + 1;
}
}
if (start < span.Length)
{
AppendSegment(span, start, span.Length - start, baseUri, buffer, ref count);
}
if (count == 0)
{
return Array.Empty<string>();
}
return buffer.AsSpan(0, count)
.ToArray()
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static url => url, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
finally
{
ArrayPool<string>.Shared.Return(buffer, clearArray: true);
}
}
private static void AppendSegment(ReadOnlySpan<char> span, int start, int length, Uri? baseUri, string[] buffer, ref int count)
{
var segment = span.Slice(start, length).ToString().Trim();
if (segment.Length == 0)
{
return;
}
if (!UrlNormalizer.TryNormalize(segment, baseUri, out var normalized, stripFragment: true, forceHttps: false) || normalized is null)
{
return;
}
if (count >= buffer.Length)
{
return;
}
buffer[count++] = normalized.ToString();
}
private static IReadOnlyList<string> ExtractCveIds(JsonElement element, string propertyName)
{
if (!element.TryGetProperty(propertyName, out var raw) || raw.ValueKind != JsonValueKind.Array || raw.GetArrayLength() == 0)
{
return Array.Empty<string>();
}
var values = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var entry in raw.EnumerateArray())
{
if (entry.ValueKind != JsonValueKind.String)
{
continue;
}
var text = entry.GetString();
if (string.IsNullOrWhiteSpace(text))
{
continue;
}
values.Add(NormalizeCve(text));
}
if (values.Count == 0)
{
return Array.Empty<string>();
}
return values
.OrderBy(static id => id, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
private static string NormalizeCve(string value)
{
var trimmed = value.Trim();
if (!trimmed.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
{
trimmed = $"CVE-{trimmed}";
}
var builder = new StringBuilder(trimmed.Length);
foreach (var ch in trimmed)
{
builder.Append(char.ToUpperInvariant(ch));
}
return builder.ToString();
}
private static string? GetString(JsonElement element, string propertyName)
{
if (element.ValueKind != JsonValueKind.Object)
{
return null;
}
if (!element.TryGetProperty(propertyName, out var property))
{
return null;
}
return property.ValueKind switch
{
JsonValueKind.String => property.GetString(),
JsonValueKind.Number => property.ToString(),
_ => null,
};
}
private static DateTimeOffset? ParseDate(JsonElement element, string propertyName)
{
var text = GetString(element, propertyName);
if (string.IsNullOrWhiteSpace(text))
{
return null;
}
return DateTimeOffset.TryParse(text, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out var parsed)
? parsed.ToUniversalTime()
: null;
}
private static int? ParseInt(JsonElement element, string propertyName)
{
if (!element.TryGetProperty(propertyName, out var property))
{
return null;
}
if (property.ValueKind == JsonValueKind.Number && property.TryGetInt32(out var value))
{
return value;
}
var text = GetString(element, propertyName);
if (string.IsNullOrWhiteSpace(text))
{
return null;
}
return int.TryParse(text, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsed) ? parsed : (int?)null;
}
private static Uri BuildDetailUri(string idNumber)
{
var sanitized = idNumber.Trim();
return new Uri($"https://www.kb.cert.org/vuls/id/{sanitized}", UriKind.Absolute);
}
private static string NormalizeLineEndings(string value)
{
if (value.IndexOf('\r') < 0)
{
return value;
}
return value.Replace("\r\n", "\n", StringComparison.Ordinal).Replace('\r', '\n');
}
}

View File

@@ -0,0 +1,108 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.Json;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
internal static class CertCcSummaryParser
{
public static IReadOnlyList<string> ParseNotes(byte[] payload)
{
if (payload is null || payload.Length == 0)
{
return Array.Empty<string>();
}
using var document = JsonDocument.Parse(payload, new JsonDocumentOptions
{
AllowTrailingCommas = true,
CommentHandling = JsonCommentHandling.Skip,
});
var notesElement = document.RootElement.ValueKind switch
{
JsonValueKind.Object when document.RootElement.TryGetProperty("notes", out var notes) => notes,
JsonValueKind.Array => document.RootElement,
JsonValueKind.Null or JsonValueKind.Undefined => default,
_ => throw new JsonException("CERT/CC summary payload must contain a 'notes' array."),
};
if (notesElement.ValueKind != JsonValueKind.Array || notesElement.GetArrayLength() == 0)
{
return Array.Empty<string>();
}
var results = new List<string>(notesElement.GetArrayLength());
var seen = new HashSet<string>(StringComparer.Ordinal);
foreach (var element in notesElement.EnumerateArray())
{
var token = ExtractToken(element);
if (string.IsNullOrWhiteSpace(token))
{
continue;
}
var normalized = token.Trim();
var dedupKey = CreateDedupKey(normalized);
if (seen.Add(dedupKey))
{
results.Add(normalized);
}
}
return results.Count == 0 ? Array.Empty<string>() : results;
}
private static string CreateDedupKey(string token)
{
var digits = string.Concat(token.Where(char.IsDigit));
return digits.Length > 0
? digits
: token.Trim().ToUpperInvariant();
}
private static string? ExtractToken(JsonElement element)
{
return element.ValueKind switch
{
JsonValueKind.String => element.GetString(),
JsonValueKind.Number => element.TryGetInt64(out var number)
? number.ToString(CultureInfo.InvariantCulture)
: element.GetRawText(),
JsonValueKind.Object => ExtractFromObject(element),
_ => null,
};
}
private static string? ExtractFromObject(JsonElement element)
{
foreach (var propertyName in PropertyCandidates)
{
if (element.TryGetProperty(propertyName, out var property) && property.ValueKind == JsonValueKind.String)
{
var value = property.GetString();
if (!string.IsNullOrWhiteSpace(value))
{
return value;
}
}
}
return null;
}
private static readonly string[] PropertyCandidates =
{
"note",
"notes",
"id",
"idnumber",
"noteId",
"vu",
"vuid",
"vuId",
};
}

View File

@@ -0,0 +1,22 @@
using System;
using System.Collections.Generic;
using StellaOps.Concelier.Connector.Common.Cursors;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
public sealed record CertCcSummaryPlan(
TimeWindow Window,
IReadOnlyList<CertCcSummaryRequest> Requests,
TimeWindowCursorState NextState);
public enum CertCcSummaryScope
{
Monthly,
Yearly,
}
public sealed record CertCcSummaryRequest(
Uri Uri,
CertCcSummaryScope Scope,
int Year,
int? Month);

View File

@@ -0,0 +1,96 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertCc.Configuration;
using StellaOps.Concelier.Connector.Common.Cursors;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
/// <summary>
/// Computes which CERT/CC summary endpoints should be fetched for the next export window.
/// </summary>
public sealed class CertCcSummaryPlanner
{
private readonly CertCcOptions _options;
private readonly TimeProvider _timeProvider;
public CertCcSummaryPlanner(
IOptions<CertCcOptions> options,
TimeProvider? timeProvider = null)
{
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
}
public CertCcSummaryPlan CreatePlan(TimeWindowCursorState? state)
{
var now = _timeProvider.GetUtcNow();
var window = TimeWindowCursorPlanner.GetNextWindow(now, state, _options.SummaryWindow);
var nextState = (state ?? TimeWindowCursorState.Empty).WithWindow(window);
var months = EnumerateYearMonths(window.Start, window.End)
.Take(_options.MaxMonthlySummaries)
.ToArray();
if (months.Length == 0)
{
return new CertCcSummaryPlan(window, Array.Empty<CertCcSummaryRequest>(), nextState);
}
var requests = new List<CertCcSummaryRequest>(months.Length * 2);
foreach (var month in months)
{
requests.Add(new CertCcSummaryRequest(
BuildMonthlyUri(month.Year, month.Month),
CertCcSummaryScope.Monthly,
month.Year,
month.Month));
}
foreach (var year in months.Select(static value => value.Year).Distinct().OrderBy(static year => year))
{
requests.Add(new CertCcSummaryRequest(
BuildYearlyUri(year),
CertCcSummaryScope.Yearly,
year,
Month: null));
}
return new CertCcSummaryPlan(window, requests, nextState);
}
private Uri BuildMonthlyUri(int year, int month)
{
var path = $"{year:D4}/{month:D2}/summary/";
return new Uri(_options.BaseApiUri, path);
}
private Uri BuildYearlyUri(int year)
{
var path = $"{year:D4}/summary/";
return new Uri(_options.BaseApiUri, path);
}
private static IEnumerable<(int Year, int Month)> EnumerateYearMonths(DateTimeOffset start, DateTimeOffset end)
{
if (end <= start)
{
yield break;
}
var cursor = new DateTime(start.Year, start.Month, 1, 0, 0, 0, DateTimeKind.Utc);
var limit = new DateTime(end.Year, end.Month, 1, 0, 0, 0, DateTimeKind.Utc);
if (end.Day != 1 || end.TimeOfDay != TimeSpan.Zero)
{
limit = limit.AddMonths(1);
}
while (cursor < limit)
{
yield return (cursor.Year, cursor.Month);
cursor = cursor.AddMonths(1);
}
}
}

View File

@@ -0,0 +1,235 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Connector.CertCc.Internal;
internal static class CertCcVendorStatementParser
{
private static readonly string[] PairSeparators =
{
"\t",
" - ",
" ",
" — ",
" : ",
": ",
" :",
":",
};
private static readonly char[] BulletPrefixes = { '-', '*', '•', '+', '\t' };
private static readonly char[] ProductDelimiters = { '/', ',', ';', '&' };
// Matches dotted numeric versions and simple alphanumeric suffixes (e.g., 4.4.3.6, 3.9.9.12, 10.2a)
private static readonly Regex VersionTokenRegex = new(@"(?<![A-Za-z0-9])(\d+(?:\.\d+){1,3}(?:[A-Za-z0-9\-]+)?)", RegexOptions.Compiled);
public static IReadOnlyList<CertCcVendorPatch> Parse(string? statement)
{
if (string.IsNullOrWhiteSpace(statement))
{
return Array.Empty<CertCcVendorPatch>();
}
var patches = new List<CertCcVendorPatch>();
var lines = statement
.Replace("\r\n", "\n", StringComparison.Ordinal)
.Replace('\r', '\n')
.Split('\n', StringSplitOptions.RemoveEmptyEntries);
foreach (var rawLine in lines)
{
var line = rawLine.Trim();
if (line.Length == 0)
{
continue;
}
line = TrimBulletPrefix(line);
if (line.Length == 0)
{
continue;
}
if (!TrySplitLine(line, out var productSegment, out var versionSegment))
{
continue;
}
var versions = ExtractVersions(versionSegment);
if (versions.Count == 0)
{
continue;
}
var products = ExtractProducts(productSegment);
if (products.Count == 0)
{
products.Add(string.Empty);
}
if (versions.Count == products.Count)
{
for (var index = 0; index < versions.Count; index++)
{
patches.Add(new CertCcVendorPatch(products[index], versions[index], line));
}
continue;
}
if (versions.Count > 1 && products.Count > versions.Count && products.Count % versions.Count == 0)
{
var groupSize = products.Count / versions.Count;
for (var versionIndex = 0; versionIndex < versions.Count; versionIndex++)
{
var start = versionIndex * groupSize;
var end = start + groupSize;
var version = versions[versionIndex];
for (var productIndex = start; productIndex < end && productIndex < products.Count; productIndex++)
{
patches.Add(new CertCcVendorPatch(products[productIndex], version, line));
}
}
continue;
}
var primaryVersion = versions[0];
foreach (var product in products)
{
patches.Add(new CertCcVendorPatch(product, primaryVersion, line));
}
}
if (patches.Count == 0)
{
return Array.Empty<CertCcVendorPatch>();
}
return patches
.Where(static patch => !string.IsNullOrWhiteSpace(patch.Version))
.Distinct(CertCcVendorPatch.Comparer)
.OrderBy(static patch => patch.Product, StringComparer.OrdinalIgnoreCase)
.ThenBy(static patch => patch.Version, StringComparer.Ordinal)
.ToArray();
}
private static string TrimBulletPrefix(string value)
{
var trimmed = value.TrimStart(BulletPrefixes).Trim();
return trimmed.Length == 0 ? value.Trim() : trimmed;
}
private static bool TrySplitLine(string line, out string productSegment, out string versionSegment)
{
foreach (var separator in PairSeparators)
{
var parts = line.Split(separator, 2, StringSplitOptions.TrimEntries);
if (parts.Length == 2)
{
productSegment = parts[0];
versionSegment = parts[1];
return true;
}
}
var whitespaceSplit = line.Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (whitespaceSplit.Length >= 2)
{
productSegment = string.Join(' ', whitespaceSplit[..^1]);
versionSegment = whitespaceSplit[^1];
return true;
}
productSegment = string.Empty;
versionSegment = string.Empty;
return false;
}
private static List<string> ExtractProducts(string segment)
{
if (string.IsNullOrWhiteSpace(segment))
{
return new List<string>();
}
var normalized = segment.Replace('\t', ' ').Trim();
var tokens = normalized
.Split(ProductDelimiters, StringSplitOptions.RemoveEmptyEntries)
.Select(static token => token.Trim())
.Where(static token => token.Length > 0)
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToList();
return tokens;
}
private static List<string> ExtractVersions(string segment)
{
if (string.IsNullOrWhiteSpace(segment))
{
return new List<string>();
}
var matches = VersionTokenRegex.Matches(segment);
if (matches.Count == 0)
{
return new List<string>();
}
var versions = new List<string>(matches.Count);
foreach (Match match in matches)
{
if (match.Groups.Count == 0)
{
continue;
}
var value = match.Groups[1].Value.Trim();
if (value.Length == 0)
{
continue;
}
versions.Add(value);
}
return versions
.Distinct(StringComparer.OrdinalIgnoreCase)
.Take(32)
.ToList();
}
}
internal sealed record CertCcVendorPatch(string Product, string Version, string? RawLine)
{
public static IEqualityComparer<CertCcVendorPatch> Comparer { get; } = new CertCcVendorPatchComparer();
private sealed class CertCcVendorPatchComparer : IEqualityComparer<CertCcVendorPatch>
{
public bool Equals(CertCcVendorPatch? x, CertCcVendorPatch? y)
{
if (ReferenceEquals(x, y))
{
return true;
}
if (x is null || y is null)
{
return false;
}
return string.Equals(x.Product, y.Product, StringComparison.OrdinalIgnoreCase)
&& string.Equals(x.Version, y.Version, StringComparison.OrdinalIgnoreCase);
}
public int GetHashCode(CertCcVendorPatch obj)
{
var product = obj.Product?.ToLowerInvariant() ?? string.Empty;
var version = obj.Version?.ToLowerInvariant() ?? string.Empty;
return HashCode.Combine(product, version);
}
}
}

View File

@@ -0,0 +1,22 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Concelier.Core.Jobs;
namespace StellaOps.Concelier.Connector.CertCc;
internal static class CertCcJobKinds
{
public const string Fetch = "source:cert-cc:fetch";
}
internal sealed class CertCcFetchJob : IJob
{
private readonly CertCcConnector _connector;
public CertCcFetchJob(CertCcConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,3 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("StellaOps.Concelier.Connector.CertCc.Tests")]

View File

@@ -0,0 +1,63 @@
# CERT/CC Vulnerability Notes Source Research
## Canonical publication endpoints
- **Public portal** `https://www.kb.cert.org/vuls/` lists recently published Vulnerability Notes and exposes a “Subscribe to our feed” link for automation entry points.citeturn0search0
- **Atom feed** `https://www.kb.cert.org/vulfeed` returns an Atom 1.0 feed of the same notes (`<title>`, `<updated>`, `<summary>` HTML payload). Feed metadata advertises `rel="self"` at `https://kb.cert.org/vuls/atomfeed/`. Use conditional GET headers (`If-Modified-Since`, `If-None-Match`) to avoid refetching unchanged entries.citeturn0search2
## VINCE Vulnerability Note API
The VINCE documentation describes an unauthenticated REST-style API for structured retrieval:citeturn1view0
| Endpoint | Payload | Notes |
| --- | --- | --- |
| `GET /vuls/api/{id}/` | Canonical note metadata (title, overview, markdown segments, timestamps, aliases). | Use numeric ID (e.g., `257161`). |
| `GET /vuls/api/{id}/vuls/` | Per-CVE vulnerability records tied to the note. | Includes CVE, description, timestamps. |
| `GET /vuls/api/{id}/vendors/` | Vendor statements per advisory. | Provides status text and optional references. |
| `GET /vuls/api/{id}/vendors/vuls/` | Vendor × vulnerability status matrix. | “known_affected” vs “known_not_affected” semantics. |
| `GET /vuls/api/vuls/cve/{cve}/` | Reverse lookup by CVE. | Returns combined note + vendor context. |
| `GET /vuls/api/{year}/summary/` | Annual summary listing (`count`, `notes[]`). | Year-month variants exist (`/{year}/{month}/summary/`). |
| `GET /vuls/api/{id}/csaf/` | CSAF 2.0 export generated by VINCE. | Useful for downstream CSAF tooling. |
Operational considerations:
- API responses are JSON (UTF-8) and publicly accessible; no authentication tokens or cookies are required.citeturn1view0
- Monthly and annual summary endpoints enable incremental crawling without diffing the Atom feed.
- Expect high-volume notes to expose dozens of vendor records—prepare batching and pagination at the connector layer even though the API returns full arrays today.
- Apply polite backoff: the documentation does not publish explicit rate limits, but the kb.cert.org infrastructure throttles bursts; mirror existing backoff strategy (exponential with jitter) used by other connectors.
- Detail fetch tolerates missing optional endpoints (`vendors`, `vendors-vuls`, `vuls`) by logging a warning and continuing with partial data; repeated 4xx responses will not wedge the cursor.
## Telemetry & monitoring
The connector exposes an OpenTelemetry meter named `StellaOps.Concelier.Connector.CertCc`. Key instruments include:
- Planning: `certcc.plan.windows`, `certcc.plan.requests`, and `certcc.plan.window_days`.
- Summary fetch: `certcc.summary.fetch.attempts`, `.success`, `.not_modified`, `.failures`.
- Detail fetch: `certcc.detail.fetch.attempts`, `.success`, `.unchanged`, `.missing`, `.failures` with an `endpoint` dimension (note/vendors/vuls/vendors-vuls).
- Parsing: `certcc.parse.success`, `.failures`, plus histograms for vendor/status/vulnerability counts.
- Mapping: `certcc.map.success`, `.failures`, and histograms `certcc.map.affected.count` / `certcc.map.normalized_versions.count`.
Structured logs surface correlation IDs across fetch, parse, and map stages. Failures emit warnings for tolerated missing endpoints and errors for retry-worthy conditions so operators can hook them into existing alert policies.
## Historical data sets
CERT/CC publishes a Vulnerability Data Archive (JSON exports plus tooling) for deep history or backfills. The archive is hosted on the SEI site with mirrored GitHub repositories containing normalized JSON conversions.citeturn0search3turn0search4
## Snapshot regression workflow
The connector ships deterministic fixtures so QA and Merge teams can replay fetch→parse→map without live calls. Use the following flow when validating changes or refreshing snapshots:
1. `dotnet test src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests` runs the connector snapshot suite against canned VINCE responses.
2. `UPDATE_CERTCC_FIXTURES=1 dotnet test src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests` regenerates fixtures under `src/Concelier/__Tests/StellaOps.Concelier.Connector.CertCc.Tests/Fixtures/*.snapshot.json` and mirrors them in the test output directory (`bin/Debug/net10.0/Source/CertCc/Fixtures`).
- The harness now records every HTTP request; `certcc-requests.snapshot.json` must list summaries/months in canonical order.
- Expect `certcc-advisories.snapshot.json` to include normalized versions (`scheme=certcc.vendor`) and provenance decision reasons.
3. Review diffs and attach `certcc-*.snapshot.json` plus test logs when handing off to Merge.
Fixtures are sorted and timestamps normalized to UTC ISO8601 to preserve determinism across machines.
## Next steps for the connector
1. Implement Atom polling for quick detection, with VINCE API lookups for structured details. `CertCcSummaryPlanner` already computes the VINCE year/month summary URIs to fetch per window; wire this into the fetch job and persist the resulting `TimeWindowCursorState`.
2. Persist `updated` timestamps and VINCE `revision` counters to drive resume logic.
3. Capture vendor statements/CSAF exports to populate range primitives once model hooks exist.
4. Evaluate using the data archive for seed fixtures covering legacy notes (pre-2010).***

View File

@@ -0,0 +1,19 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Markdig" Version="0.31.0" />
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Storage.Mongo/StellaOps.Concelier.Storage.Mongo.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,14 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|Document CERT/CC advisory sources|BE-Conn-CERTCC|Research|**DONE (2025-10-10)** Catalogued Atom feed + VINCE API endpoints and archive references in `README.md`; include polling/backoff guidance.|
|Fetch pipeline & state tracking|BE-Conn-CERTCC|Source.Common, Storage.Mongo|**DONE (2025-10-12)** Summary planner + fetch job persist monthly/yearly VINCE JSON to `DocumentStore`, hydrate the `TimeWindowCursorState`, and snapshot regression (`dotnet test` 2025-10-12) confirmed deterministic resume behaviour.|
|VINCE note detail fetcher|BE-Conn-CERTCC|Source.Common, Storage.Mongo|**DONE (2025-10-12)** Detail bundle fetch now enqueues VU identifiers and persists note/vendors/vuls/vendors-vuls documents with ETag/Last-Modified metadata, tolerating missing optional endpoints without wedging the cursor.|
|DTO & parser implementation|BE-Conn-CERTCC|Source.Common|**DONE (2025-10-12)** VINCE DTO aggregate materialises note/vendor/vulnerability payloads, normalises markdown to HTML-safe fragments, and surfaces vendor impact statements covered by parser unit tests.|
|Canonical mapping & range primitives|BE-Conn-CERTCC|Models|**DONE (2025-10-12)** Mapper emits aliases (VU#, CVE), vendor range primitives, and normalizedVersions (`scheme=certcc.vendor`) with provenance masks; `certcc-advisories.snapshot.json` validates canonical output after schema sync.|
|Deterministic fixtures/tests|QA|Testing|**DONE (2025-10-11)** Snapshot harness regenerated (`certcc-*.snapshot.json`), request ordering assertions added, and `UPDATE_CERTCC_FIXTURES` workflow verified for CI determinism.|
|Connector test harness remediation|BE-Conn-CERTCC, QA|Testing|**DONE (2025-10-11)** Connector test harness now rebuilds `FakeTimeProvider`, wires `AddSourceCommon`, and drives canned VINCE responses across fetch→parse→map with recorded-request assertions.|
|Snapshot coverage handoff|QA|Models, Merge|**DONE (2025-10-11)** Fixtures + request/advisory snapshots refreshed, README documents `UPDATE_CERTCC_FIXTURES` workflow, and recorded-request ordering is enforced for QA handoff.|
|FEEDCONN-CERTCC-02-010 Partial-detail graceful degradation|BE-Conn-CERTCC|Connector plan|**DONE (2025-10-12)** Detail fetch now catches 404/410/403 responses for optional endpoints, logs missing bundles, feeds empty payloads into parsing, and ships regression coverage for mixed responses.|
|FEEDCONN-CERTCC-02-012 Schema sync & snapshot regen follow-up|QA, BE-Conn-CERTCC|Models `FEEDMODELS-SCHEMA-01-001`/`-002`/`-003`, Storage `FEEDSTORAGE-DATA-02-001`|**DONE (2025-10-12)** Snapshot suite rerun, fixtures updated, and handoff notes (`FEEDCONN-CERTCC-02-012_HANDOFF.md`) document normalizedVersions/provenance expectations for Merge backfill.|
|Telemetry & documentation|DevEx|Docs|**DONE (2025-10-12)** `CertCcDiagnostics` now publishes summary/detail/parse/map metrics, README documents meter names, and structured logging guidance is captured for Ops handoff.|

View File

@@ -0,0 +1,27 @@
# AGENTS
## Role
ANSSI CERT-FR advisories connector (avis/alertes) providing national enrichment: advisory metadata, CVE links, mitigation notes, and references.
## Scope
- Harvest CERT-FR items via RSS and/or list pages; follow item pages for detail; window by publish/update date.
- Validate HTML or JSON payloads; extract structured fields; map to canonical aliases, references, severity text.
- Maintain watermarks and de-duplication by content hash; idempotent processing.
## Participants
- Source.Common (HTTP, HTML parsing helpers, validators).
- Storage.Mongo (document, dto, advisory, reference, source_state).
- Models (canonical).
- Core/WebService (jobs: source:certfr:fetch|parse|map).
- Merge engine (later) to enrich only.
## Interfaces & contracts
- Treat CERT-FR as enrichment; never override distro or PSIRT version ranges absent concrete evidence.
- References must include primary bulletin URL and vendor links; tag kind=bulletin/vendor/mitigation appropriately.
- Provenance records cite "cert-fr" with method=parser and source URL.
## In/Out of scope
In: advisory metadata extraction, references, severity text, watermarking.
Out: OVAL or package-level authority.
## Observability & security expectations
- Metrics: SourceDiagnostics emits shared `concelier.source.http.*` counters/histograms tagged `concelier.source=certfr`, covering fetch counts, parse failures, and map activity.
- Logs: feed URL(s), item ids/urls, extraction durations; no PII; allowlist hostnames.
## Tests
- Author and review coverage in `../StellaOps.Concelier.Connector.CertFr.Tests`.
- Shared fixtures (e.g., `MongoIntegrationFixture`, `ConnectorTestHarness`) live in `../StellaOps.Concelier.Testing`.
- Keep fixtures deterministic; match new cases to real-world advisories or regression scenarios.

View File

@@ -0,0 +1,337 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Connector.CertFr.Configuration;
using StellaOps.Concelier.Connector.CertFr.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertFr;
public sealed class CertFrConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertFrFeedClient _feedClient;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertFrOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertFrConnector> _logger;
public CertFrConnector(
CertFrFeedClient feedClient,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertFrOptions> options,
TimeProvider? timeProvider,
ILogger<CertFrConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertFrConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowEnd = now;
var lastPublished = cursor.LastPublished ?? now - _options.InitialBackfill;
var windowStart = lastPublished - _options.WindowOverlap;
var minStart = now - _options.InitialBackfill;
if (windowStart < minStart)
{
windowStart = minStart;
}
IReadOnlyList<CertFrFeedItem> items;
try
{
items = await _feedClient.LoadAsync(windowStart, windowEnd, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR feed load failed {Start:o}-{End:o}", windowStart, windowEnd);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
await UpdateCursorAsync(cursor.WithLastPublished(windowEnd), cancellationToken).ConfigureAwait(false);
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
foreach (var item in items)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, item.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
var request = new SourceFetchRequest(CertFrOptions.HttpClientName, SourceName, item.DetailUri)
{
Metadata = CertFrDocumentMetadata.CreateMetadata(item),
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
};
var result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
if (result.IsNotModified || !result.IsSuccess || result.Document is null)
{
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
if (!pendingDocuments.Contains(result.Document.Id))
{
pendingDocuments.Add(result.Document.Id);
}
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR fetch failed for {Uri}", item.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
if (maxPublished == DateTimeOffset.MinValue)
{
maxPublished = cursor.LastPublished ?? windowEnd;
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithLastPublished(maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var pendingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Cert-FR document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
CertFrDocumentMetadata metadata;
try
{
metadata = CertFrDocumentMetadata.FromDocument(document);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR metadata parse failed for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
CertFrDto dto;
try
{
var content = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
var html = System.Text.Encoding.UTF8.GetString(content);
dto = CertFrParser.Parse(html, metadata);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR parse failed for advisory {AdvisoryId} ({Uri})", metadata.AdvisoryId, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
pendingMappings.Remove(documentId);
continue;
}
var json = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = BsonDocument.Parse(json);
var validatedAt = _timeProvider.GetUtcNow();
var existingDto = await _dtoStore.FindByDocumentIdAsync(document.Id, cancellationToken).ConfigureAwait(false);
var dtoRecord = existingDto is null
? new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certfr.detail.v1", payload, validatedAt)
: existingDto with
{
Payload = payload,
SchemaVersion = "certfr.detail.v1",
ValidatedAt = validatedAt,
};
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
pendingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
CertFrDto? dto;
try
{
var json = dtoRecord.Payload.ToJson();
dto = JsonSerializer.Deserialize<CertFrDto>(json, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Cert-FR DTO deserialization failed for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (dto is null)
{
_logger.LogWarning("Cert-FR DTO payload deserialized as null for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var mappedAt = _timeProvider.GetUtcNow();
var advisory = CertFrMapper.Map(dto, SourceName, mappedAt);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<CertFrCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var record = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return CertFrCursor.FromBson(record?.Cursor);
}
private async Task UpdateCursorAsync(CertFrCursor cursor, CancellationToken cancellationToken)
{
var completedAt = _timeProvider.GetUtcNow();
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), completedAt, cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -0,0 +1,21 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertFr;
public sealed class CertFrConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "cert-fr";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services)
=> services.GetService<CertFrConnector>() is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return services.GetRequiredService<CertFrConnector>();
}
}

View File

@@ -0,0 +1,54 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.CertFr.Configuration;
namespace StellaOps.Concelier.Connector.CertFr;
public sealed class CertFrDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:cert-fr";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddCertFrConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<CertFrFetchJob>();
services.AddTransient<CertFrParseJob>();
services.AddTransient<CertFrMapJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, CertFrJobKinds.Fetch, typeof(CertFrFetchJob));
EnsureJob(options, CertFrJobKinds.Parse, typeof(CertFrParseJob));
EnsureJob(options, CertFrJobKinds.Map, typeof(CertFrMapJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,36 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertFr.Configuration;
using StellaOps.Concelier.Connector.CertFr.Internal;
using StellaOps.Concelier.Connector.Common.Http;
namespace StellaOps.Concelier.Connector.CertFr;
public static class CertFrServiceCollectionExtensions
{
public static IServiceCollection AddCertFrConnector(this IServiceCollection services, Action<CertFrOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<CertFrOptions>()
.Configure(configure)
.PostConfigure(static options => options.Validate());
services.AddSourceHttpClient(CertFrOptions.HttpClientName, static (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<CertFrOptions>>().Value;
clientOptions.BaseAddress = options.FeedUri;
clientOptions.UserAgent = "StellaOps.Concelier.CertFr/1.0";
clientOptions.Timeout = TimeSpan.FromSeconds(20);
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.FeedUri.Host);
});
services.TryAddSingleton<CertFrFeedClient>();
services.AddTransient<CertFrConnector>();
return services;
}
}

View File

@@ -0,0 +1,46 @@
using System;
namespace StellaOps.Concelier.Connector.CertFr.Configuration;
public sealed class CertFrOptions
{
public const string HttpClientName = "cert-fr";
public Uri FeedUri { get; set; } = new("https://www.cert.ssi.gouv.fr/feed/alertes/");
public TimeSpan InitialBackfill { get; set; } = TimeSpan.FromDays(30);
public TimeSpan WindowOverlap { get; set; } = TimeSpan.FromDays(2);
public int MaxItemsPerFetch { get; set; } = 100;
public TimeSpan RequestDelay { get; set; } = TimeSpan.Zero;
public void Validate()
{
if (FeedUri is null || !FeedUri.IsAbsoluteUri)
{
throw new InvalidOperationException("Cert-FR FeedUri must be an absolute URI.");
}
if (InitialBackfill <= TimeSpan.Zero)
{
throw new InvalidOperationException("InitialBackfill must be a positive duration.");
}
if (WindowOverlap < TimeSpan.Zero)
{
throw new InvalidOperationException("WindowOverlap cannot be negative.");
}
if (MaxItemsPerFetch <= 0)
{
throw new InvalidOperationException("MaxItemsPerFetch must be positive.");
}
if (RequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException("RequestDelay cannot be negative.");
}
}
}

View File

@@ -0,0 +1,88 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
namespace StellaOps.Concelier.Connector.CertFr.Internal;
internal sealed record CertFrCursor(
DateTimeOffset? LastPublished,
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings)
{
public static CertFrCursor Empty { get; } = new(null, Array.Empty<Guid>(), Array.Empty<Guid>());
public BsonDocument ToBsonDocument()
{
var document = new BsonDocument
{
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
};
if (LastPublished.HasValue)
{
document["lastPublished"] = LastPublished.Value.UtcDateTime;
}
return document;
}
public static CertFrCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var lastPublished = document.TryGetValue("lastPublished", out var value)
? ParseDate(value)
: null;
return new CertFrCursor(
lastPublished,
ReadGuidArray(document, "pendingDocuments"),
ReadGuidArray(document, "pendingMappings"));
}
public CertFrCursor WithLastPublished(DateTimeOffset? timestamp)
=> this with { LastPublished = timestamp };
public CertFrCursor WithPendingDocuments(IEnumerable<Guid> ids)
=> this with { PendingDocuments = ids?.Distinct().ToArray() ?? Array.Empty<Guid>() };
public CertFrCursor WithPendingMappings(IEnumerable<Guid> ids)
=> this with { PendingMappings = ids?.Distinct().ToArray() ?? Array.Empty<Guid>() };
private static DateTimeOffset? ParseDate(BsonValue value)
=> value.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
private static IReadOnlyCollection<Guid> ReadGuidArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var raw) || raw is not BsonArray array)
{
return Array.Empty<Guid>();
}
var result = new List<Guid>(array.Count);
foreach (var element in array)
{
if (element is null)
{
continue;
}
if (Guid.TryParse(element.ToString(), out var guid))
{
result.Add(guid);
}
}
return result;
}
}

View File

@@ -0,0 +1,77 @@
using System;
using System.Collections.Generic;
using StellaOps.Concelier.Storage.Mongo.Documents;
namespace StellaOps.Concelier.Connector.CertFr.Internal;
internal sealed record CertFrDocumentMetadata(
string AdvisoryId,
string Title,
DateTimeOffset Published,
Uri DetailUri,
string? Summary)
{
private const string AdvisoryIdKey = "certfr.advisoryId";
private const string TitleKey = "certfr.title";
private const string PublishedKey = "certfr.published";
private const string SummaryKey = "certfr.summary";
public static CertFrDocumentMetadata FromDocument(DocumentRecord document)
{
ArgumentNullException.ThrowIfNull(document);
if (document.Metadata is null)
{
throw new InvalidOperationException("Cert-FR document metadata is missing.");
}
var metadata = document.Metadata;
if (!metadata.TryGetValue(AdvisoryIdKey, out var advisoryId) || string.IsNullOrWhiteSpace(advisoryId))
{
throw new InvalidOperationException("Cert-FR advisory id metadata missing.");
}
if (!metadata.TryGetValue(TitleKey, out var title) || string.IsNullOrWhiteSpace(title))
{
throw new InvalidOperationException("Cert-FR title metadata missing.");
}
if (!metadata.TryGetValue(PublishedKey, out var publishedRaw) || !DateTimeOffset.TryParse(publishedRaw, out var published))
{
throw new InvalidOperationException("Cert-FR published metadata invalid.");
}
if (!Uri.TryCreate(document.Uri, UriKind.Absolute, out var detailUri))
{
throw new InvalidOperationException("Cert-FR document URI invalid.");
}
metadata.TryGetValue(SummaryKey, out var summary);
return new CertFrDocumentMetadata(
advisoryId.Trim(),
title.Trim(),
published.ToUniversalTime(),
detailUri,
string.IsNullOrWhiteSpace(summary) ? null : summary.Trim());
}
public static IReadOnlyDictionary<string, string> CreateMetadata(CertFrFeedItem item)
{
ArgumentNullException.ThrowIfNull(item);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
[AdvisoryIdKey] = item.AdvisoryId,
[TitleKey] = item.Title ?? item.AdvisoryId,
[PublishedKey] = item.Published.ToString("O"),
};
if (!string.IsNullOrWhiteSpace(item.Summary))
{
metadata[SummaryKey] = item.Summary!;
}
return metadata;
}
}

View File

@@ -0,0 +1,14 @@
using System;
using System.Collections.Generic;
using System.Text.Json.Serialization;
namespace StellaOps.Concelier.Connector.CertFr.Internal;
internal sealed record CertFrDto(
[property: JsonPropertyName("advisoryId")] string AdvisoryId,
[property: JsonPropertyName("title")] string Title,
[property: JsonPropertyName("detailUrl")] string DetailUrl,
[property: JsonPropertyName("published")] DateTimeOffset Published,
[property: JsonPropertyName("summary")] string? Summary,
[property: JsonPropertyName("content")] string Content,
[property: JsonPropertyName("references")] IReadOnlyList<string> References);

View File

@@ -0,0 +1,109 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertFr.Configuration;
namespace StellaOps.Concelier.Connector.CertFr.Internal;
public sealed class CertFrFeedClient
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly CertFrOptions _options;
private readonly ILogger<CertFrFeedClient> _logger;
public CertFrFeedClient(IHttpClientFactory httpClientFactory, IOptions<CertFrOptions> options, ILogger<CertFrFeedClient> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<IReadOnlyList<CertFrFeedItem>> LoadAsync(DateTimeOffset windowStart, DateTimeOffset windowEnd, CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(CertFrOptions.HttpClientName);
using var response = await client.GetAsync(_options.FeedUri, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
var document = XDocument.Load(stream);
var items = new List<CertFrFeedItem>();
var now = DateTimeOffset.UtcNow;
foreach (var itemElement in document.Descendants("item"))
{
var link = itemElement.Element("link")?.Value;
if (string.IsNullOrWhiteSpace(link) || !Uri.TryCreate(link.Trim(), UriKind.Absolute, out var detailUri))
{
continue;
}
var title = itemElement.Element("title")?.Value?.Trim();
var summary = itemElement.Element("description")?.Value?.Trim();
var published = ParsePublished(itemElement.Element("pubDate")?.Value) ?? now;
if (published < windowStart)
{
continue;
}
if (published > windowEnd)
{
published = windowEnd;
}
var advisoryId = ResolveAdvisoryId(itemElement, detailUri);
items.Add(new CertFrFeedItem(advisoryId, detailUri, published.ToUniversalTime(), title, summary));
}
return items
.OrderBy(item => item.Published)
.Take(_options.MaxItemsPerFetch)
.ToArray();
}
private static DateTimeOffset? ParsePublished(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
if (DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var parsed))
{
return parsed;
}
return null;
}
private static string ResolveAdvisoryId(XElement itemElement, Uri detailUri)
{
var guid = itemElement.Element("guid")?.Value;
if (!string.IsNullOrWhiteSpace(guid))
{
return guid.Trim();
}
var segments = detailUri.Segments;
if (segments.Length > 0)
{
var slug = segments[^1].Trim('/');
if (!string.IsNullOrWhiteSpace(slug))
{
return slug;
}
}
return detailUri.AbsoluteUri;
}
}

View File

@@ -0,0 +1,10 @@
using System;
namespace StellaOps.Concelier.Connector.CertFr.Internal;
public sealed record CertFrFeedItem(
string AdvisoryId,
Uri DetailUri,
DateTimeOffset Published,
string? Title,
string? Summary);

View File

@@ -0,0 +1,116 @@
using System;
using System.Collections.Generic;
using System.Linq;
using StellaOps.Concelier.Models;
namespace StellaOps.Concelier.Connector.CertFr.Internal;
internal static class CertFrMapper
{
public static Advisory Map(CertFrDto dto, string sourceName, DateTimeOffset recordedAt)
{
ArgumentNullException.ThrowIfNull(dto);
ArgumentException.ThrowIfNullOrEmpty(sourceName);
var advisoryKey = $"cert-fr/{dto.AdvisoryId}";
var provenance = new AdvisoryProvenance(sourceName, "document", dto.DetailUrl, recordedAt.ToUniversalTime());
var aliases = new List<string>
{
$"CERT-FR:{dto.AdvisoryId}",
};
var references = BuildReferences(dto, provenance).ToArray();
var affectedPackages = BuildAffectedPackages(dto, provenance).ToArray();
return new Advisory(
advisoryKey,
dto.Title,
dto.Summary ?? dto.Title,
language: "fr",
published: dto.Published.ToUniversalTime(),
modified: null,
severity: null,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { provenance });
}
private static IEnumerable<AdvisoryReference> BuildReferences(CertFrDto dto, AdvisoryProvenance provenance)
{
var comparer = StringComparer.OrdinalIgnoreCase;
var entries = new List<(AdvisoryReference Reference, int Priority)>
{
(new AdvisoryReference(dto.DetailUrl, "advisory", "cert-fr", dto.Summary, provenance), 0),
};
foreach (var url in dto.References)
{
entries.Add((new AdvisoryReference(url, "reference", null, null, provenance), 1));
}
return entries
.GroupBy(tuple => tuple.Reference.Url, comparer)
.Select(group => group
.OrderBy(t => t.Priority)
.ThenBy(t => t.Reference.Kind ?? string.Empty, comparer)
.ThenBy(t => t.Reference.Url, comparer)
.First())
.OrderBy(t => t.Priority)
.ThenBy(t => t.Reference.Kind ?? string.Empty, comparer)
.ThenBy(t => t.Reference.Url, comparer)
.Select(t => t.Reference);
}
private static IEnumerable<AffectedPackage> BuildAffectedPackages(CertFrDto dto, AdvisoryProvenance provenance)
{
var extensions = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
if (!string.IsNullOrWhiteSpace(dto.Summary))
{
extensions["certfr.summary"] = dto.Summary.Trim();
}
if (!string.IsNullOrWhiteSpace(dto.Content))
{
var trimmed = dto.Content.Length > 1024 ? dto.Content[..1024].Trim() : dto.Content.Trim();
if (trimmed.Length > 0)
{
extensions["certfr.content"] = trimmed;
}
}
if (dto.References.Count > 0)
{
extensions["certfr.reference.count"] = dto.References.Count.ToString();
}
if (extensions.Count == 0)
{
return Array.Empty<AffectedPackage>();
}
var range = new AffectedVersionRange(
rangeKind: "vendor",
introducedVersion: null,
fixedVersion: null,
lastAffectedVersion: null,
rangeExpression: null,
provenance: provenance,
primitives: new RangePrimitives(null, null, null, extensions));
return new[]
{
new AffectedPackage(
AffectedPackageTypes.Vendor,
identifier: dto.AdvisoryId,
platform: null,
versionRanges: new[] { range },
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: new[] { provenance })
};
}
}

View File

@@ -0,0 +1,80 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Connector.CertFr.Internal;
internal static class CertFrParser
{
private static readonly Regex AnchorRegex = new("<a[^>]+href=\"(?<url>https?://[^\"]+)\"", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex ScriptRegex = new("<script[\\s\\S]*?</script>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex StyleRegex = new("<style[\\s\\S]*?</style>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex TagRegex = new("<[^>]+>", RegexOptions.Compiled);
private static readonly Regex WhitespaceRegex = new("\\s+", RegexOptions.Compiled);
public static CertFrDto Parse(string html, CertFrDocumentMetadata metadata)
{
ArgumentException.ThrowIfNullOrEmpty(html);
ArgumentNullException.ThrowIfNull(metadata);
var sanitized = SanitizeHtml(html);
var summary = BuildSummary(metadata.Summary, sanitized);
var references = ExtractReferences(html);
return new CertFrDto(
metadata.AdvisoryId,
metadata.Title,
metadata.DetailUri.ToString(),
metadata.Published,
summary,
sanitized,
references);
}
private static string SanitizeHtml(string html)
{
var withoutScripts = ScriptRegex.Replace(html, string.Empty);
var withoutStyles = StyleRegex.Replace(withoutScripts, string.Empty);
var withoutTags = TagRegex.Replace(withoutStyles, " ");
var decoded = System.Net.WebUtility.HtmlDecode(withoutTags) ?? string.Empty;
return WhitespaceRegex.Replace(decoded, " ").Trim();
}
private static string? BuildSummary(string? metadataSummary, string content)
{
if (!string.IsNullOrWhiteSpace(metadataSummary))
{
return metadataSummary.Trim();
}
if (string.IsNullOrWhiteSpace(content))
{
return null;
}
var sentences = content.Split(new[] { '.','!','?' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (sentences.Length > 0)
{
return sentences[0].Trim();
}
return content.Length > 280 ? content[..280].Trim() : content;
}
private static IReadOnlyList<string> ExtractReferences(string html)
{
var references = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (Match match in AnchorRegex.Matches(html))
{
if (match.Success)
{
references.Add(match.Groups["url"].Value.Trim());
}
}
return references.Count == 0
? Array.Empty<string>()
: references.OrderBy(url => url, StringComparer.OrdinalIgnoreCase).ToArray();
}
}

View File

@@ -0,0 +1,46 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Concelier.Core.Jobs;
namespace StellaOps.Concelier.Connector.CertFr;
internal static class CertFrJobKinds
{
public const string Fetch = "source:cert-fr:fetch";
public const string Parse = "source:cert-fr:parse";
public const string Map = "source:cert-fr:map";
}
internal sealed class CertFrFetchJob : IJob
{
private readonly CertFrConnector _connector;
public CertFrFetchJob(CertFrConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}
internal sealed class CertFrParseJob : IJob
{
private readonly CertFrConnector _connector;
public CertFrParseJob(CertFrConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.ParseAsync(context.Services, cancellationToken);
}
internal sealed class CertFrMapJob : IJob
{
private readonly CertFrConnector _connector;
public CertFrMapJob(CertFrConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.MapAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,14 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Connector.Common\StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Storage.Mongo\StellaOps.Concelier.Storage.Mongo.csproj" />
<ProjectReference Include="..\StellaOps.Concelier.Models\StellaOps.Concelier.Models.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,11 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|RSS/list fetcher with sliding window|BE-Conn-CertFr|Source.Common|**DONE** RSS/list ingestion implemented with sliding date cursor.|
|Detail page fetch and sanitizer|BE-Conn-CertFr|Source.Common|**DONE** HTML sanitizer trims boilerplate prior to DTO mapping.|
|Extractor and schema validation of DTO|BE-Conn-CertFr, QA|Source.Common|**DONE** DTO parsing validates structure before persistence.|
|Canonical mapping (aliases, refs, severity text)|BE-Conn-CertFr|Models|**DONE** mapper emits enrichment references with severity text.|
|Watermark plus dedupe by sha256|BE-Conn-CertFr|Storage.Mongo|**DONE** SHA comparisons skip unchanged docs; covered by duplicate/not-modified connector tests.|
|Golden fixtures and determinism tests|QA|Source.CertFr|**DONE** snapshot fixtures added in `CertFrConnectorTests` to enforce deterministic output.|
|Mark failure/backoff on fetch errors|BE-Conn-CertFr|Storage.Mongo|**DONE** fetch path now marks failures/backoff and tests assert state repository updates.|
|Conditional fetch caching|BE-Conn-CertFr|Source.Common|**DONE** ETag/Last-Modified support wired via `SourceFetchService` and verified in not-modified test.|

View File

@@ -0,0 +1,28 @@
# AGENTS
## Role
CERT-In national CERT connector; enrichment advisories for India; maps CVE lists, advisory text, mitigations, and references; non-authoritative for package ranges unless explicit evidence is present.
## Scope
- Discover and fetch advisories from the CERT-In portal; window by advisory code/date; follow detail pages.
- Validate HTML or JSON; extract title, summary, CVEs, affected vendor names, mitigations; map references; normalize dates and IDs.
- Persist raw docs and maintain source_state cursor; idempotent mapping.
## Participants
- Source.Common (HTTP, HTML parsing, normalization, validators).
- Storage.Mongo (document, dto, advisory, alias, reference, source_state).
- Models (canonical).
- Core/WebService (jobs: source:certin:fetch|parse|map).
- Merge engine treats CERT-In as enrichment (no override of PSIRT or OVAL without concrete ranges).
## Interfaces & contracts
- Aliases: advisory code if stable (scheme "CERT-IN") and CVE ids; if code is not stable, store as reference only.
- References typed: bulletin/advisory/vendor/mitigation; deduped.
- Affected omitted unless CERT-In publishes explicit version or fix details.
- Provenance: method=parser; value=advisory code or URL; recordedAt.
## In/Out of scope
In: enrichment, aliasing where stable, references, mitigation text.
Out: package range authority; scraping behind auth walls.
## Observability & security expectations
- Metrics: shared `concelier.source.http.*` counters/histograms from SourceDiagnostics tagged `concelier.source=certin` capture fetch volume, parse failures, and map enrich counts.
- Logs: advisory codes, CVE counts per advisory, timing; allowlist host; redact personal data if present.
## Tests
- Author and review coverage in `../StellaOps.Concelier.Connector.CertIn.Tests`.
- Shared fixtures (e.g., `MongoIntegrationFixture`, `ConnectorTestHarness`) live in `../StellaOps.Concelier.Testing`.
- Keep fixtures deterministic; match new cases to real-world advisories or regression scenarios.

View File

@@ -0,0 +1,462 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.CertIn.Configuration;
using StellaOps.Concelier.Connector.CertIn.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertInClient _client;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertInOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertInConnector> _logger;
public CertInConnector(
CertInClient client,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertInOptions> options,
TimeProvider? timeProvider,
ILogger<CertInConnector> logger)
{
_client = client ?? throw new ArgumentNullException(nameof(client));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertInConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<CertInListingItem> listings;
try
{
listings = await _client.GetListingsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In listings fetch failed for page {Page}", page);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (listings.Count == 0)
{
break;
}
foreach (var listing in listings.OrderByDescending(static item => item.Published))
{
if (listing.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["certin.advisoryId"] = listing.AdvisoryId,
["certin.title"] = listing.Title,
["certin.link"] = listing.DetailUri.ToString(),
["certin.published"] = listing.Published.ToString("O")
};
if (!string.IsNullOrWhiteSpace(listing.Summary))
{
metadata["certin.summary"] = listing.Summary!;
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, listing.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(CertInOptions.HttpClientName, SourceName, listing.DetailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In fetch failed for {Uri}", listing.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(3), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
continue;
}
pendingDocuments.Add(result.Document.Id);
if (listing.Published > maxPublished)
{
maxPublished = listing.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("CERT-In document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (!TryDeserializeListing(document.Metadata, out var listing))
{
_logger.LogWarning("CERT-In metadata missing for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download raw CERT-In document {DocumentId}", document.Id);
throw;
}
var dto = CertInDetailParser.Parse(listing, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certin.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
CertInAdvisoryDto dto;
try
{
dto = JsonSerializer.Deserialize<CertInAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized CERT-In DTO is null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize CERT-In DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var advisory = MapAdvisory(dto, document, dtoRecord);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private Advisory MapAdvisory(CertInAdvisoryDto dto, DocumentRecord document, DtoRecord dtoRecord)
{
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", dto.AdvisoryId, dtoRecord.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
dto.AdvisoryId,
};
foreach (var cve in dto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
dto.Link,
"advisory",
"cert-in",
null,
new AdvisoryProvenance(SourceName, "reference", dto.Link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid CERT-In link {Link} for advisory {AdvisoryId}", dto.Link, dto.AdvisoryId);
}
foreach (var cve in dto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
foreach (var link in dto.ReferenceLinks)
{
try
{
references.Add(new AdvisoryReference(
link,
"reference",
null,
null,
new AdvisoryProvenance(SourceName, "reference", link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
var affectedPackages = dto.VendorNames.Select(vendor =>
{
var provenance = new AdvisoryProvenance(SourceName, "affected", vendor, dtoRecord.ValidatedAt);
var primitives = new RangePrimitives(
null,
null,
null,
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["certin.vendor"] = vendor
});
var ranges = new[]
{
new AffectedVersionRange(
rangeKind: "vendor",
introducedVersion: null,
fixedVersion: null,
lastAffectedVersion: null,
rangeExpression: null,
provenance: provenance,
primitives: primitives)
};
return new AffectedPackage(
AffectedPackageTypes.IcsVendor,
vendor,
platform: null,
versionRanges: ranges,
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: new[] { provenance });
})
.ToArray();
return new Advisory(
dto.AdvisoryId,
dto.Title,
dto.Summary ?? dto.Content,
language: "en",
published: dto.Published,
modified: dto.Published,
severity: dto.Severity,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
}
private async Task<CertInCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? CertInCursor.Empty : CertInCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(CertInCursor cursor, CancellationToken cancellationToken)
{
return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken);
}
private static bool TryDeserializeListing(IReadOnlyDictionary<string, string>? metadata, out CertInListingItem listing)
{
listing = null!;
if (metadata is null)
{
return false;
}
if (!metadata.TryGetValue("certin.advisoryId", out var advisoryId))
{
return false;
}
if (!metadata.TryGetValue("certin.title", out var title))
{
return false;
}
if (!metadata.TryGetValue("certin.link", out var link) || !Uri.TryCreate(link, UriKind.Absolute, out var detailUri))
{
return false;
}
if (!metadata.TryGetValue("certin.published", out var publishedText) || !DateTimeOffset.TryParse(publishedText, out var published))
{
return false;
}
metadata.TryGetValue("certin.summary", out var summary);
listing = new CertInListingItem(advisoryId, title, detailUri, published.ToUniversalTime(), summary);
return true;
}
}

View File

@@ -0,0 +1,19 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "cert-in";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services) => services is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return ActivatorUtilities.CreateInstance<CertInConnector>(services);
}
}

View File

@@ -0,0 +1,54 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.CertIn.Configuration;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:cert-in";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddCertInConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<CertInFetchJob>();
services.AddTransient<CertInParseJob>();
services.AddTransient<CertInMapJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, CertInJobKinds.Fetch, typeof(CertInFetchJob));
EnsureJob(options, CertInJobKinds.Parse, typeof(CertInParseJob));
EnsureJob(options, CertInJobKinds.Map, typeof(CertInMapJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,37 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertIn.Configuration;
using StellaOps.Concelier.Connector.CertIn.Internal;
using StellaOps.Concelier.Connector.Common.Http;
namespace StellaOps.Concelier.Connector.CertIn;
public static class CertInServiceCollectionExtensions
{
public static IServiceCollection AddCertInConnector(this IServiceCollection services, Action<CertInOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<CertInOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
services.AddSourceHttpClient(CertInOptions.HttpClientName, (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<CertInOptions>>().Value;
clientOptions.BaseAddress = options.AlertsEndpoint;
clientOptions.Timeout = TimeSpan.FromSeconds(30);
clientOptions.UserAgent = "StellaOps.Concelier.CertIn/1.0";
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.AlertsEndpoint.Host);
clientOptions.DefaultRequestHeaders["Accept"] = "application/json";
});
services.AddTransient<CertInClient>();
services.AddTransient<CertInConnector>();
return services;
}
}

View File

@@ -0,0 +1,68 @@
using System;
using System.Diagnostics.CodeAnalysis;
namespace StellaOps.Concelier.Connector.CertIn.Configuration;
public sealed class CertInOptions
{
public static string HttpClientName => "source.certin";
/// <summary>
/// Endpoint returning a paginated list of recent advisories.
/// </summary>
public Uri AlertsEndpoint { get; set; } = new("https://www.cert-in.org.in/api/alerts", UriKind.Absolute);
/// <summary>
/// Size of the rolling fetch window.
/// </summary>
public TimeSpan WindowSize { get; set; } = TimeSpan.FromDays(30);
/// <summary>
/// Overlap applied to subsequent windows.
/// </summary>
public TimeSpan WindowOverlap { get; set; } = TimeSpan.FromDays(2);
/// <summary>
/// Maximum pages fetched per cycle.
/// </summary>
public int MaxPagesPerFetch { get; set; } = 5;
/// <summary>
/// Delay between successive HTTP requests.
/// </summary>
public TimeSpan RequestDelay { get; set; } = TimeSpan.FromMilliseconds(500);
[MemberNotNull(nameof(AlertsEndpoint))]
public void Validate()
{
if (AlertsEndpoint is null || !AlertsEndpoint.IsAbsoluteUri)
{
throw new InvalidOperationException("AlertsEndpoint must be an absolute URI.");
}
if (WindowSize <= TimeSpan.Zero)
{
throw new InvalidOperationException("WindowSize must be greater than zero.");
}
if (WindowOverlap < TimeSpan.Zero)
{
throw new InvalidOperationException("WindowOverlap cannot be negative.");
}
if (WindowOverlap >= WindowSize)
{
throw new InvalidOperationException("WindowOverlap must be smaller than WindowSize.");
}
if (MaxPagesPerFetch <= 0)
{
throw new InvalidOperationException("MaxPagesPerFetch must be positive.");
}
if (RequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException("RequestDelay cannot be negative.");
}
}
}

View File

@@ -0,0 +1,16 @@
using System;
using System.Collections.Immutable;
namespace StellaOps.Concelier.Connector.CertIn.Internal;
internal sealed record CertInAdvisoryDto(
string AdvisoryId,
string Title,
string Link,
DateTimeOffset Published,
string? Summary,
string Content,
string? Severity,
ImmutableArray<string> CveIds,
ImmutableArray<string> VendorNames,
ImmutableArray<string> ReferenceLinks);

Some files were not shown because too many files have changed in this diff Show More