Restructure solution layout by module
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
# AGENTS
|
||||
## Role
|
||||
Build the CCCS (Canadian Centre for Cyber Security) advisories connector so Concelier can ingest national cyber bulletins alongside other vendor/regional sources.
|
||||
|
||||
## Scope
|
||||
- Research CCCS advisory feeds (RSS/Atom, JSON API, or HTML listings) and define the canonical fetch workflow.
|
||||
- Implement fetch, parse, and mapping stages with deterministic cursoring and retry/backoff behaviour.
|
||||
- Normalise advisory content (summary, affected vendors/products, mitigation guidance, references, CVE IDs).
|
||||
- Emit canonical `Advisory` records with aliases, references, affected packages, and provenance metadata.
|
||||
- Provide fixtures and regression tests to keep the connector deterministic.
|
||||
|
||||
## Participants
|
||||
- `Source.Common` (HTTP clients, fetch service, DTO storage helpers).
|
||||
- `Storage.Mongo` (raw/document/DTO/advisory stores + source state).
|
||||
- `Concelier.Models` (canonical advisory data structures).
|
||||
- `Concelier.Testing` (integration fixtures and snapshot utilities).
|
||||
|
||||
## Interfaces & Contracts
|
||||
- Job kinds: `cccs:fetch`, `cccs:parse`, `cccs:map`.
|
||||
- Persist ETag/Last-Modified metadata when the upstream supports it.
|
||||
- Include alias entries for CCCS advisory IDs plus referenced CVE IDs.
|
||||
|
||||
## In/Out of scope
|
||||
In scope:
|
||||
- End-to-end connector implementation with range primitive coverage for affected packages.
|
||||
- Minimal telemetry logging/counters matching other connectors.
|
||||
|
||||
Out of scope:
|
||||
- Automated remediation actions or vendor-specific enrichment beyond CCCS published data.
|
||||
- Export or downstream pipeline changes.
|
||||
|
||||
## Observability & Security Expectations
|
||||
- Log fetch attempts, success/failure counts, and mapping statistics.
|
||||
- Sanitize HTML safely, dropping scripts/styles before storing DTOs.
|
||||
- Respect upstream rate limits; mark failures in source state with backoff.
|
||||
|
||||
## Tests
|
||||
- Add `StellaOps.Concelier.Connector.Cccs.Tests` covering fetch/parse/map with canned fixtures.
|
||||
- Snapshot canonical advisories; support fixture regeneration via env flag.
|
||||
- Validate deterministic ordering and timestamps to maintain reproducibility.
|
||||
@@ -0,0 +1,606 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.Globalization;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using MongoDB.Bson;
|
||||
using StellaOps.Concelier.Connector.Cccs.Configuration;
|
||||
using StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
using StellaOps.Concelier.Connector.Common;
|
||||
using StellaOps.Concelier.Connector.Common.Fetch;
|
||||
using StellaOps.Concelier.Storage.Mongo;
|
||||
using StellaOps.Concelier.Storage.Mongo.Advisories;
|
||||
using StellaOps.Concelier.Storage.Mongo.Documents;
|
||||
using StellaOps.Concelier.Storage.Mongo.Dtos;
|
||||
using StellaOps.Plugin;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs;
|
||||
|
||||
public sealed class CccsConnector : IFeedConnector
|
||||
{
|
||||
private static readonly JsonSerializerOptions RawSerializerOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private static readonly JsonSerializerOptions DtoSerializerOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private const string DtoSchemaVersion = "cccs.dto.v1";
|
||||
|
||||
private readonly CccsFeedClient _feedClient;
|
||||
private readonly RawDocumentStorage _rawDocumentStorage;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private readonly IDtoStore _dtoStore;
|
||||
private readonly IAdvisoryStore _advisoryStore;
|
||||
private readonly ISourceStateRepository _stateRepository;
|
||||
private readonly CccsHtmlParser _htmlParser;
|
||||
private readonly CccsDiagnostics _diagnostics;
|
||||
private readonly CccsOptions _options;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<CccsConnector> _logger;
|
||||
|
||||
public CccsConnector(
|
||||
CccsFeedClient feedClient,
|
||||
RawDocumentStorage rawDocumentStorage,
|
||||
IDocumentStore documentStore,
|
||||
IDtoStore dtoStore,
|
||||
IAdvisoryStore advisoryStore,
|
||||
ISourceStateRepository stateRepository,
|
||||
CccsHtmlParser htmlParser,
|
||||
CccsDiagnostics diagnostics,
|
||||
IOptions<CccsOptions> options,
|
||||
TimeProvider? timeProvider,
|
||||
ILogger<CccsConnector> logger)
|
||||
{
|
||||
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
|
||||
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
|
||||
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
|
||||
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
|
||||
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
|
||||
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
|
||||
_htmlParser = htmlParser ?? throw new ArgumentNullException(nameof(htmlParser));
|
||||
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
|
||||
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
|
||||
_options.Validate();
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public string SourceName => CccsConnectorPlugin.SourceName;
|
||||
|
||||
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
var pendingDocuments = new HashSet<Guid>(cursor.PendingDocuments);
|
||||
var pendingMappings = new HashSet<Guid>(cursor.PendingMappings);
|
||||
var knownHashes = new Dictionary<string, string>(cursor.KnownEntryHashes, StringComparer.Ordinal);
|
||||
var feedsProcessed = 0;
|
||||
var totalItems = 0;
|
||||
var added = 0;
|
||||
var unchanged = 0;
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var feed in _options.Feeds)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
_diagnostics.FetchAttempt();
|
||||
var result = await _feedClient.FetchAsync(feed, _options.RequestTimeout, cancellationToken).ConfigureAwait(false);
|
||||
feedsProcessed++;
|
||||
totalItems += result.Items.Count;
|
||||
|
||||
if (result.Items.Count == 0)
|
||||
{
|
||||
_diagnostics.FetchSuccess();
|
||||
await DelayBetweenRequestsAsync(cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
var items = result.Items
|
||||
.Where(static item => !string.IsNullOrWhiteSpace(item.Title))
|
||||
.OrderByDescending(item => ParseDate(item.DateModifiedTimestamp) ?? ParseDate(item.DateModified) ?? DateTimeOffset.MinValue)
|
||||
.ThenByDescending(item => ParseDate(item.DateCreated) ?? DateTimeOffset.MinValue)
|
||||
.ToList();
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var documentUri = BuildDocumentUri(item, feed);
|
||||
var rawDocument = CreateRawDocument(item, feed, result.AlertTypes);
|
||||
var payload = JsonSerializer.SerializeToUtf8Bytes(rawDocument, RawSerializerOptions);
|
||||
var sha = ComputeSha256(payload);
|
||||
|
||||
if (knownHashes.TryGetValue(documentUri, out var existingHash)
|
||||
&& string.Equals(existingHash, sha, StringComparison.Ordinal))
|
||||
{
|
||||
unchanged++;
|
||||
_diagnostics.FetchUnchanged();
|
||||
continue;
|
||||
}
|
||||
|
||||
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, documentUri, cancellationToken).ConfigureAwait(false);
|
||||
if (existing is not null
|
||||
&& string.Equals(existing.Sha256, sha, StringComparison.OrdinalIgnoreCase)
|
||||
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
|
||||
{
|
||||
knownHashes[documentUri] = sha;
|
||||
unchanged++;
|
||||
_diagnostics.FetchUnchanged();
|
||||
continue;
|
||||
}
|
||||
|
||||
var gridFsId = await _rawDocumentStorage.UploadAsync(
|
||||
SourceName,
|
||||
documentUri,
|
||||
payload,
|
||||
"application/json",
|
||||
expiresAt: null,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["cccs.language"] = rawDocument.Language,
|
||||
["cccs.sourceId"] = rawDocument.SourceId,
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(rawDocument.SerialNumber))
|
||||
{
|
||||
metadata["cccs.serialNumber"] = rawDocument.SerialNumber!;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(rawDocument.AlertType))
|
||||
{
|
||||
metadata["cccs.alertType"] = rawDocument.AlertType!;
|
||||
}
|
||||
|
||||
var recordId = existing?.Id ?? Guid.NewGuid();
|
||||
var record = new DocumentRecord(
|
||||
recordId,
|
||||
SourceName,
|
||||
documentUri,
|
||||
now,
|
||||
sha,
|
||||
DocumentStatuses.PendingParse,
|
||||
"application/json",
|
||||
Headers: null,
|
||||
Metadata: metadata,
|
||||
Etag: null,
|
||||
LastModified: rawDocument.Modified ?? rawDocument.Published ?? result.LastModifiedUtc,
|
||||
GridFsId: gridFsId,
|
||||
ExpiresAt: null);
|
||||
|
||||
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Add(upserted.Id);
|
||||
pendingMappings.Remove(upserted.Id);
|
||||
knownHashes[documentUri] = sha;
|
||||
added++;
|
||||
_diagnostics.FetchDocument();
|
||||
|
||||
if (added >= _options.MaxEntriesPerFetch)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
_diagnostics.FetchSuccess();
|
||||
await DelayBetweenRequestsAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (added >= _options.MaxEntriesPerFetch)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or JsonException or InvalidOperationException)
|
||||
{
|
||||
_diagnostics.FetchFailure();
|
||||
_logger.LogError(ex, "CCCS fetch failed");
|
||||
await _stateRepository.MarkFailureAsync(SourceName, now, _options.FailureBackoff, ex.Message, cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
|
||||
var trimmedHashes = TrimKnownHashes(knownHashes, _options.MaxKnownEntries);
|
||||
var updatedCursor = cursor
|
||||
.WithPendingDocuments(pendingDocuments)
|
||||
.WithPendingMappings(pendingMappings)
|
||||
.WithKnownEntryHashes(trimmedHashes)
|
||||
.WithLastFetch(now);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"CCCS fetch completed feeds={Feeds} items={Items} newDocuments={Added} unchanged={Unchanged} pendingDocuments={PendingDocuments} pendingMappings={PendingMappings}",
|
||||
feedsProcessed,
|
||||
totalItems,
|
||||
added,
|
||||
unchanged,
|
||||
pendingDocuments.Count,
|
||||
pendingMappings.Count);
|
||||
}
|
||||
|
||||
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingDocuments.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingDocuments = cursor.PendingDocuments.ToList();
|
||||
var pendingMappings = cursor.PendingMappings.ToList();
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var parsed = 0;
|
||||
var parseFailures = 0;
|
||||
|
||||
foreach (var documentId in cursor.PendingDocuments)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
if (document is null)
|
||||
{
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
_diagnostics.ParseFailure();
|
||||
parseFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!document.GridFsId.HasValue)
|
||||
{
|
||||
_diagnostics.ParseFailure();
|
||||
_logger.LogWarning("CCCS document {DocumentId} missing GridFS payload", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
parseFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] payload;
|
||||
try
|
||||
{
|
||||
payload = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.ParseFailure();
|
||||
_logger.LogError(ex, "CCCS unable to download raw document {DocumentId}", documentId);
|
||||
throw;
|
||||
}
|
||||
|
||||
CccsRawAdvisoryDocument? raw;
|
||||
try
|
||||
{
|
||||
raw = JsonSerializer.Deserialize<CccsRawAdvisoryDocument>(payload, RawSerializerOptions);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.ParseFailure();
|
||||
_logger.LogWarning(ex, "CCCS failed to deserialize raw document {DocumentId}", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
parseFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (raw is null)
|
||||
{
|
||||
_diagnostics.ParseFailure();
|
||||
_logger.LogWarning("CCCS raw document {DocumentId} produced null payload", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
parseFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
CccsAdvisoryDto dto;
|
||||
try
|
||||
{
|
||||
dto = _htmlParser.Parse(raw);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.ParseFailure();
|
||||
_logger.LogWarning(ex, "CCCS failed to parse advisory DTO for {DocumentId}", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingDocuments.Remove(documentId);
|
||||
pendingMappings.Remove(documentId);
|
||||
parseFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var dtoJson = JsonSerializer.Serialize(dto, DtoSerializerOptions);
|
||||
var dtoBson = BsonDocument.Parse(dtoJson);
|
||||
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, DtoSchemaVersion, dtoBson, now);
|
||||
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
pendingDocuments.Remove(documentId);
|
||||
if (!pendingMappings.Contains(documentId))
|
||||
{
|
||||
pendingMappings.Add(documentId);
|
||||
}
|
||||
_diagnostics.ParseSuccess();
|
||||
parsed++;
|
||||
}
|
||||
|
||||
var updatedCursor = cursor
|
||||
.WithPendingDocuments(pendingDocuments)
|
||||
.WithPendingMappings(pendingMappings);
|
||||
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
if (parsed > 0 || parseFailures > 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"CCCS parse completed parsed={Parsed} failures={Failures} pendingDocuments={PendingDocuments} pendingMappings={PendingMappings}",
|
||||
parsed,
|
||||
parseFailures,
|
||||
pendingDocuments.Count,
|
||||
pendingMappings.Count);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (cursor.PendingMappings.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var pendingMappings = cursor.PendingMappings.ToList();
|
||||
var mapped = 0;
|
||||
var mappingFailures = 0;
|
||||
|
||||
foreach (var documentId in cursor.PendingMappings)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
if (document is null)
|
||||
{
|
||||
pendingMappings.Remove(documentId);
|
||||
_diagnostics.MapFailure();
|
||||
mappingFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
|
||||
if (dtoRecord is null)
|
||||
{
|
||||
_diagnostics.MapFailure();
|
||||
_logger.LogWarning("CCCS document {DocumentId} missing DTO payload", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
mappingFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
CccsAdvisoryDto? dto;
|
||||
try
|
||||
{
|
||||
var json = dtoRecord.Payload.ToJson();
|
||||
dto = JsonSerializer.Deserialize<CccsAdvisoryDto>(json, DtoSerializerOptions);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.MapFailure();
|
||||
_logger.LogWarning(ex, "CCCS failed to deserialize DTO for document {DocumentId}", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
mappingFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dto is null)
|
||||
{
|
||||
_diagnostics.MapFailure();
|
||||
_logger.LogWarning("CCCS DTO for document {DocumentId} evaluated to null", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
mappingFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var advisory = CccsMapper.Map(dto, document, dtoRecord.ValidatedAt);
|
||||
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
_diagnostics.MapSuccess();
|
||||
mapped++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_diagnostics.MapFailure();
|
||||
_logger.LogError(ex, "CCCS mapping failed for document {DocumentId}", documentId);
|
||||
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
|
||||
pendingMappings.Remove(documentId);
|
||||
mappingFailures++;
|
||||
}
|
||||
}
|
||||
|
||||
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
|
||||
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
|
||||
if (mapped > 0 || mappingFailures > 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"CCCS map completed mapped={Mapped} failures={Failures} pendingMappings={PendingMappings}",
|
||||
mapped,
|
||||
mappingFailures,
|
||||
pendingMappings.Count);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<CccsCursor> GetCursorAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
|
||||
return state is null ? CccsCursor.Empty : CccsCursor.FromBson(state.Cursor);
|
||||
}
|
||||
|
||||
private Task UpdateCursorAsync(CccsCursor cursor, CancellationToken cancellationToken)
|
||||
{
|
||||
var document = cursor.ToBsonDocument();
|
||||
var completedAt = cursor.LastFetchAt ?? _timeProvider.GetUtcNow();
|
||||
return _stateRepository.UpdateCursorAsync(SourceName, document, completedAt, cancellationToken);
|
||||
}
|
||||
|
||||
private async Task DelayBetweenRequestsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (_options.RequestDelay <= TimeSpan.Zero)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
// Ignore cancellation during delay; caller handles.
|
||||
}
|
||||
}
|
||||
|
||||
private static string BuildDocumentUri(CccsFeedItem item, CccsFeedEndpoint feed)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(item.Url))
|
||||
{
|
||||
if (Uri.TryCreate(item.Url, UriKind.Absolute, out var absolute))
|
||||
{
|
||||
return absolute.ToString();
|
||||
}
|
||||
|
||||
var baseUri = new Uri("https://www.cyber.gc.ca", UriKind.Absolute);
|
||||
if (Uri.TryCreate(baseUri, item.Url, out var combined))
|
||||
{
|
||||
return combined.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
return $"https://www.cyber.gc.ca/api/cccs/threats/{feed.Language}/{item.Nid}";
|
||||
}
|
||||
|
||||
private static CccsRawAdvisoryDocument CreateRawDocument(CccsFeedItem item, CccsFeedEndpoint feed, IReadOnlyDictionary<int, string> taxonomy)
|
||||
{
|
||||
var language = string.IsNullOrWhiteSpace(item.Language) ? feed.Language : item.Language!.Trim();
|
||||
var identifier = !string.IsNullOrWhiteSpace(item.SerialNumber)
|
||||
? item.SerialNumber!.Trim()
|
||||
: !string.IsNullOrWhiteSpace(item.Uuid)
|
||||
? item.Uuid!.Trim()
|
||||
: $"nid-{item.Nid}";
|
||||
|
||||
var canonicalUrl = BuildDocumentUri(item, feed);
|
||||
var bodySegments = item.Body ?? Array.Empty<string>();
|
||||
var bodyHtml = string.Join(Environment.NewLine, bodySegments);
|
||||
var published = ParseDate(item.DateCreated);
|
||||
var modified = ParseDate(item.DateModifiedTimestamp) ?? ParseDate(item.DateModified);
|
||||
var alertType = ResolveAlertType(item, taxonomy);
|
||||
|
||||
return new CccsRawAdvisoryDocument
|
||||
{
|
||||
SourceId = identifier,
|
||||
SerialNumber = item.SerialNumber?.Trim(),
|
||||
Uuid = item.Uuid,
|
||||
Language = language.ToLowerInvariant(),
|
||||
Title = item.Title?.Trim() ?? identifier,
|
||||
Summary = item.Summary?.Trim(),
|
||||
CanonicalUrl = canonicalUrl,
|
||||
ExternalUrl = item.ExternalUrl,
|
||||
BodyHtml = bodyHtml,
|
||||
BodySegments = bodySegments,
|
||||
AlertType = alertType,
|
||||
Subject = item.Subject,
|
||||
Banner = item.Banner,
|
||||
Published = published,
|
||||
Modified = modified,
|
||||
RawDateCreated = item.DateCreated,
|
||||
RawDateModified = item.DateModifiedTimestamp ?? item.DateModified,
|
||||
};
|
||||
}
|
||||
|
||||
private static string? ResolveAlertType(CccsFeedItem item, IReadOnlyDictionary<int, string> taxonomy)
|
||||
{
|
||||
if (item.AlertType.ValueKind == JsonValueKind.Number)
|
||||
{
|
||||
var id = item.AlertType.GetInt32();
|
||||
return taxonomy.TryGetValue(id, out var label) ? label : id.ToString(CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
if (item.AlertType.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
return item.AlertType.GetString();
|
||||
}
|
||||
|
||||
if (item.AlertType.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var element in item.AlertType.EnumerateArray())
|
||||
{
|
||||
if (element.ValueKind == JsonValueKind.Number)
|
||||
{
|
||||
var id = element.GetInt32();
|
||||
if (taxonomy.TryGetValue(id, out var label))
|
||||
{
|
||||
return label;
|
||||
}
|
||||
}
|
||||
else if (element.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var label = element.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(label))
|
||||
{
|
||||
return label;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static Dictionary<string, string> TrimKnownHashes(Dictionary<string, string> hashes, int maxEntries)
|
||||
{
|
||||
if (hashes.Count <= maxEntries)
|
||||
{
|
||||
return hashes;
|
||||
}
|
||||
|
||||
var overflow = hashes.Count - maxEntries;
|
||||
foreach (var key in hashes.Keys.Take(overflow).ToList())
|
||||
{
|
||||
hashes.Remove(key);
|
||||
}
|
||||
|
||||
return hashes;
|
||||
}
|
||||
|
||||
private static DateTimeOffset? ParseDate(string? value)
|
||||
=> string.IsNullOrWhiteSpace(value)
|
||||
? null
|
||||
: DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var parsed)
|
||||
? parsed
|
||||
: null;
|
||||
|
||||
private static string ComputeSha256(byte[] payload)
|
||||
=> Convert.ToHexString(SHA256.HashData(payload)).ToLowerInvariant();
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using System;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Plugin;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs;
|
||||
|
||||
public sealed class CccsConnectorPlugin : IConnectorPlugin
|
||||
{
|
||||
public const string SourceName = "cccs";
|
||||
|
||||
public string Name => SourceName;
|
||||
|
||||
public bool IsAvailable(IServiceProvider services)
|
||||
=> services.GetService<CccsConnector>() is not null;
|
||||
|
||||
public IFeedConnector Create(IServiceProvider services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
return services.GetRequiredService<CccsConnector>();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
using System;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.DependencyInjection;
|
||||
using StellaOps.Concelier.Core.Jobs;
|
||||
using StellaOps.Concelier.Connector.Cccs.Configuration;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs;
|
||||
|
||||
public sealed class CccsDependencyInjectionRoutine : IDependencyInjectionRoutine
|
||||
{
|
||||
private const string ConfigurationSection = "concelier:sources:cccs";
|
||||
|
||||
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(configuration);
|
||||
|
||||
services.AddCccsConnector(options =>
|
||||
{
|
||||
configuration.GetSection(ConfigurationSection).Bind(options);
|
||||
options.Validate();
|
||||
});
|
||||
|
||||
services.AddTransient<CccsFetchJob>();
|
||||
|
||||
services.PostConfigure<JobSchedulerOptions>(options =>
|
||||
{
|
||||
EnsureJob(options, CccsJobKinds.Fetch, typeof(CccsFetchJob));
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
|
||||
{
|
||||
if (options.Definitions.ContainsKey(kind))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
options.Definitions[kind] = new JobDefinition(
|
||||
kind,
|
||||
jobType,
|
||||
options.DefaultTimeout,
|
||||
options.DefaultLeaseDuration,
|
||||
CronExpression: null,
|
||||
Enabled: true);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
using System;
|
||||
using System.Linq;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Concelier.Connector.Cccs.Configuration;
|
||||
using StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
using StellaOps.Concelier.Connector.Common.Http;
|
||||
using StellaOps.Concelier.Connector.Common.Html;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs;
|
||||
|
||||
public static class CccsServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddCccsConnector(this IServiceCollection services, Action<CccsOptions> configure)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(configure);
|
||||
|
||||
services.AddOptions<CccsOptions>()
|
||||
.Configure(configure)
|
||||
.PostConfigure(static options => options.Validate());
|
||||
|
||||
services.AddSourceHttpClient(CccsOptions.HttpClientName, static (sp, clientOptions) =>
|
||||
{
|
||||
var options = sp.GetRequiredService<IOptions<CccsOptions>>().Value;
|
||||
clientOptions.UserAgent = "StellaOps.Concelier.Cccs/1.0";
|
||||
clientOptions.Timeout = options.RequestTimeout;
|
||||
clientOptions.AllowedHosts.Clear();
|
||||
|
||||
foreach (var feed in options.Feeds.Where(static feed => feed.Uri is not null))
|
||||
{
|
||||
clientOptions.AllowedHosts.Add(feed.Uri!.Host);
|
||||
}
|
||||
|
||||
clientOptions.AllowedHosts.Add("www.cyber.gc.ca");
|
||||
clientOptions.AllowedHosts.Add("cyber.gc.ca");
|
||||
});
|
||||
|
||||
services.TryAddSingleton<HtmlContentSanitizer>();
|
||||
services.TryAddSingleton<CccsDiagnostics>();
|
||||
services.TryAddSingleton<CccsHtmlParser>();
|
||||
services.TryAddSingleton<CccsFeedClient>();
|
||||
services.AddTransient<CccsConnector>();
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Configuration;
|
||||
|
||||
public sealed class CccsOptions
|
||||
{
|
||||
public const string HttpClientName = "concelier.source.cccs";
|
||||
|
||||
private readonly List<CccsFeedEndpoint> _feeds = new();
|
||||
|
||||
public CccsOptions()
|
||||
{
|
||||
_feeds.Add(new CccsFeedEndpoint("en", new Uri("https://www.cyber.gc.ca/api/cccs/threats/v1/get?lang=en&content_type=cccs_threat")));
|
||||
_feeds.Add(new CccsFeedEndpoint("fr", new Uri("https://www.cyber.gc.ca/api/cccs/threats/v1/get?lang=fr&content_type=cccs_threat")));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Feed endpoints to poll; configure per language or content category.
|
||||
/// </summary>
|
||||
public IList<CccsFeedEndpoint> Feeds => _feeds;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of entries to enqueue per fetch cycle.
|
||||
/// </summary>
|
||||
public int MaxEntriesPerFetch { get; set; } = 80;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum remembered entries (URI+hash) for deduplication.
|
||||
/// </summary>
|
||||
public int MaxKnownEntries { get; set; } = 512;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout applied to feed and taxonomy requests.
|
||||
/// </summary>
|
||||
public TimeSpan RequestTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Delay between successive feed requests to respect upstream throttling.
|
||||
/// </summary>
|
||||
public TimeSpan RequestDelay { get; set; } = TimeSpan.FromMilliseconds(250);
|
||||
|
||||
/// <summary>
|
||||
/// Backoff recorded in source state when fetch fails.
|
||||
/// </summary>
|
||||
public TimeSpan FailureBackoff { get; set; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
public void Validate()
|
||||
{
|
||||
if (_feeds.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException("At least one CCCS feed endpoint must be configured.");
|
||||
}
|
||||
|
||||
var seenLanguages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var feed in _feeds)
|
||||
{
|
||||
feed.Validate();
|
||||
if (!seenLanguages.Add(feed.Language))
|
||||
{
|
||||
throw new InvalidOperationException($"Duplicate CCCS feed language configured: '{feed.Language}'. Each language should be unique to avoid duplicate ingestion.");
|
||||
}
|
||||
}
|
||||
|
||||
if (MaxEntriesPerFetch <= 0)
|
||||
{
|
||||
throw new InvalidOperationException($"{nameof(MaxEntriesPerFetch)} must be greater than zero.");
|
||||
}
|
||||
|
||||
if (MaxKnownEntries <= 0)
|
||||
{
|
||||
throw new InvalidOperationException($"{nameof(MaxKnownEntries)} must be greater than zero.");
|
||||
}
|
||||
|
||||
if (RequestTimeout <= TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException($"{nameof(RequestTimeout)} must be positive.");
|
||||
}
|
||||
|
||||
if (RequestDelay < TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException($"{nameof(RequestDelay)} cannot be negative.");
|
||||
}
|
||||
|
||||
if (FailureBackoff <= TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException($"{nameof(FailureBackoff)} must be positive.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class CccsFeedEndpoint
|
||||
{
|
||||
public CccsFeedEndpoint()
|
||||
{
|
||||
}
|
||||
|
||||
public CccsFeedEndpoint(string language, Uri uri)
|
||||
{
|
||||
Language = language;
|
||||
Uri = uri;
|
||||
}
|
||||
|
||||
public string Language { get; set; } = "en";
|
||||
|
||||
public Uri? Uri { get; set; }
|
||||
|
||||
public void Validate()
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(Language))
|
||||
{
|
||||
throw new InvalidOperationException("CCCS feed language must be specified.");
|
||||
}
|
||||
|
||||
if (Uri is null || !Uri.IsAbsoluteUri)
|
||||
{
|
||||
throw new InvalidOperationException($"CCCS feed endpoint URI must be an absolute URI (language='{Language}').");
|
||||
}
|
||||
}
|
||||
|
||||
public Uri BuildTaxonomyUri()
|
||||
{
|
||||
if (Uri is null)
|
||||
{
|
||||
throw new InvalidOperationException("Feed endpoint URI must be configured before building taxonomy URI.");
|
||||
}
|
||||
|
||||
var language = Uri.GetQueryParameterValueOrDefault("lang", Language);
|
||||
var builder = $"https://www.cyber.gc.ca/api/cccs/taxonomy/v1/get?lang={language}&vocabulary=cccs_alert_type";
|
||||
return new Uri(builder, UriKind.Absolute);
|
||||
}
|
||||
}
|
||||
|
||||
internal static class CccsUriExtensions
|
||||
{
|
||||
public static string GetQueryParameterValueOrDefault(this Uri uri, string key, string fallback)
|
||||
{
|
||||
if (uri is null)
|
||||
{
|
||||
return fallback;
|
||||
}
|
||||
|
||||
var query = uri.Query;
|
||||
if (string.IsNullOrEmpty(query))
|
||||
{
|
||||
return fallback;
|
||||
}
|
||||
|
||||
var trimmed = query.StartsWith("?", StringComparison.Ordinal) ? query[1..] : query;
|
||||
foreach (var pair in trimmed.Split(new[] { '&' }, StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
var separatorIndex = pair.IndexOf('=');
|
||||
if (separatorIndex < 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var left = pair[..separatorIndex].Trim();
|
||||
if (!left.Equals(key, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var right = pair[(separatorIndex + 1)..].Trim();
|
||||
if (right.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
return Uri.UnescapeDataString(right);
|
||||
}
|
||||
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
internal sealed record CccsAdvisoryDto
|
||||
{
|
||||
[JsonPropertyName("sourceId")]
|
||||
public string SourceId { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("serialNumber")]
|
||||
public string SerialNumber { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("language")]
|
||||
public string Language { get; init; } = "en";
|
||||
|
||||
[JsonPropertyName("title")]
|
||||
public string Title { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("summary")]
|
||||
public string? Summary { get; init; }
|
||||
|
||||
[JsonPropertyName("canonicalUrl")]
|
||||
public string CanonicalUrl { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("contentHtml")]
|
||||
public string ContentHtml { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("published")]
|
||||
public DateTimeOffset? Published { get; init; }
|
||||
|
||||
[JsonPropertyName("modified")]
|
||||
public DateTimeOffset? Modified { get; init; }
|
||||
|
||||
[JsonPropertyName("alertType")]
|
||||
public string? AlertType { get; init; }
|
||||
|
||||
[JsonPropertyName("subject")]
|
||||
public string? Subject { get; init; }
|
||||
|
||||
[JsonPropertyName("products")]
|
||||
public IReadOnlyList<string> Products { get; init; } = Array.Empty<string>();
|
||||
|
||||
[JsonPropertyName("references")]
|
||||
public IReadOnlyList<CccsReferenceDto> References { get; init; } = Array.Empty<CccsReferenceDto>();
|
||||
|
||||
[JsonPropertyName("cveIds")]
|
||||
public IReadOnlyList<string> CveIds { get; init; } = Array.Empty<string>();
|
||||
}
|
||||
|
||||
internal sealed record CccsReferenceDto(
|
||||
[property: JsonPropertyName("url")] string Url,
|
||||
[property: JsonPropertyName("label")] string? Label);
|
||||
@@ -0,0 +1,145 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using MongoDB.Bson;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
internal sealed record CccsCursor(
|
||||
IReadOnlyCollection<Guid> PendingDocuments,
|
||||
IReadOnlyCollection<Guid> PendingMappings,
|
||||
IReadOnlyDictionary<string, string> KnownEntryHashes,
|
||||
DateTimeOffset? LastFetchAt)
|
||||
{
|
||||
private static readonly IReadOnlyCollection<Guid> EmptyGuidCollection = Array.Empty<Guid>();
|
||||
private static readonly IReadOnlyDictionary<string, string> EmptyHashes = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
|
||||
public static CccsCursor Empty { get; } = new(EmptyGuidCollection, EmptyGuidCollection, EmptyHashes, null);
|
||||
|
||||
public CccsCursor WithPendingDocuments(IEnumerable<Guid> documents)
|
||||
{
|
||||
var distinct = (documents ?? Enumerable.Empty<Guid>()).Distinct().ToArray();
|
||||
return this with { PendingDocuments = distinct };
|
||||
}
|
||||
|
||||
public CccsCursor WithPendingMappings(IEnumerable<Guid> mappings)
|
||||
{
|
||||
var distinct = (mappings ?? Enumerable.Empty<Guid>()).Distinct().ToArray();
|
||||
return this with { PendingMappings = distinct };
|
||||
}
|
||||
|
||||
public CccsCursor WithKnownEntryHashes(IReadOnlyDictionary<string, string> hashes)
|
||||
{
|
||||
var map = hashes is null || hashes.Count == 0
|
||||
? EmptyHashes
|
||||
: new Dictionary<string, string>(hashes, StringComparer.Ordinal);
|
||||
return this with { KnownEntryHashes = map };
|
||||
}
|
||||
|
||||
public CccsCursor WithLastFetch(DateTimeOffset? timestamp)
|
||||
=> this with { LastFetchAt = timestamp };
|
||||
|
||||
public BsonDocument ToBsonDocument()
|
||||
{
|
||||
var doc = new BsonDocument
|
||||
{
|
||||
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
|
||||
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
|
||||
};
|
||||
|
||||
if (KnownEntryHashes.Count > 0)
|
||||
{
|
||||
var hashes = new BsonArray();
|
||||
foreach (var kvp in KnownEntryHashes)
|
||||
{
|
||||
hashes.Add(new BsonDocument
|
||||
{
|
||||
["uri"] = kvp.Key,
|
||||
["hash"] = kvp.Value,
|
||||
});
|
||||
}
|
||||
|
||||
doc["knownEntryHashes"] = hashes;
|
||||
}
|
||||
|
||||
if (LastFetchAt.HasValue)
|
||||
{
|
||||
doc["lastFetchAt"] = LastFetchAt.Value.UtcDateTime;
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
public static CccsCursor FromBson(BsonDocument? document)
|
||||
{
|
||||
if (document is null || document.ElementCount == 0)
|
||||
{
|
||||
return Empty;
|
||||
}
|
||||
|
||||
var pendingDocuments = ReadGuidArray(document, "pendingDocuments");
|
||||
var pendingMappings = ReadGuidArray(document, "pendingMappings");
|
||||
var hashes = ReadHashMap(document);
|
||||
var lastFetch = document.TryGetValue("lastFetchAt", out var value)
|
||||
? ParseDateTime(value)
|
||||
: null;
|
||||
|
||||
return new CccsCursor(pendingDocuments, pendingMappings, hashes, lastFetch);
|
||||
}
|
||||
|
||||
private static IReadOnlyCollection<Guid> ReadGuidArray(BsonDocument document, string field)
|
||||
{
|
||||
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
|
||||
{
|
||||
return EmptyGuidCollection;
|
||||
}
|
||||
|
||||
var items = new List<Guid>(array.Count);
|
||||
foreach (var element in array)
|
||||
{
|
||||
if (Guid.TryParse(element?.ToString(), out var guid))
|
||||
{
|
||||
items.Add(guid);
|
||||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
private static IReadOnlyDictionary<string, string> ReadHashMap(BsonDocument document)
|
||||
{
|
||||
if (!document.TryGetValue("knownEntryHashes", out var value) || value is not BsonArray array || array.Count == 0)
|
||||
{
|
||||
return EmptyHashes;
|
||||
}
|
||||
|
||||
var map = new Dictionary<string, string>(array.Count, StringComparer.Ordinal);
|
||||
foreach (var element in array)
|
||||
{
|
||||
if (element is not BsonDocument entry)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!entry.TryGetValue("uri", out var uriValue) || uriValue.IsBsonNull || string.IsNullOrWhiteSpace(uriValue.AsString))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var hash = entry.TryGetValue("hash", out var hashValue) && !hashValue.IsBsonNull
|
||||
? hashValue.AsString
|
||||
: string.Empty;
|
||||
map[uriValue.AsString] = hash;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private static DateTimeOffset? ParseDateTime(BsonValue value)
|
||||
=> value.BsonType switch
|
||||
{
|
||||
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
|
||||
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
public sealed class CccsDiagnostics : IDisposable
|
||||
{
|
||||
private const string MeterName = "StellaOps.Concelier.Connector.Cccs";
|
||||
private const string MeterVersion = "1.0.0";
|
||||
|
||||
private readonly Meter _meter;
|
||||
private readonly Counter<long> _fetchAttempts;
|
||||
private readonly Counter<long> _fetchSuccess;
|
||||
private readonly Counter<long> _fetchDocuments;
|
||||
private readonly Counter<long> _fetchUnchanged;
|
||||
private readonly Counter<long> _fetchFailures;
|
||||
private readonly Counter<long> _parseSuccess;
|
||||
private readonly Counter<long> _parseFailures;
|
||||
private readonly Counter<long> _parseQuarantine;
|
||||
private readonly Counter<long> _mapSuccess;
|
||||
private readonly Counter<long> _mapFailures;
|
||||
|
||||
public CccsDiagnostics()
|
||||
{
|
||||
_meter = new Meter(MeterName, MeterVersion);
|
||||
_fetchAttempts = _meter.CreateCounter<long>("cccs.fetch.attempts", unit: "operations");
|
||||
_fetchSuccess = _meter.CreateCounter<long>("cccs.fetch.success", unit: "operations");
|
||||
_fetchDocuments = _meter.CreateCounter<long>("cccs.fetch.documents", unit: "documents");
|
||||
_fetchUnchanged = _meter.CreateCounter<long>("cccs.fetch.unchanged", unit: "documents");
|
||||
_fetchFailures = _meter.CreateCounter<long>("cccs.fetch.failures", unit: "operations");
|
||||
_parseSuccess = _meter.CreateCounter<long>("cccs.parse.success", unit: "documents");
|
||||
_parseFailures = _meter.CreateCounter<long>("cccs.parse.failures", unit: "documents");
|
||||
_parseQuarantine = _meter.CreateCounter<long>("cccs.parse.quarantine", unit: "documents");
|
||||
_mapSuccess = _meter.CreateCounter<long>("cccs.map.success", unit: "advisories");
|
||||
_mapFailures = _meter.CreateCounter<long>("cccs.map.failures", unit: "advisories");
|
||||
}
|
||||
|
||||
public void FetchAttempt() => _fetchAttempts.Add(1);
|
||||
|
||||
public void FetchSuccess() => _fetchSuccess.Add(1);
|
||||
|
||||
public void FetchDocument() => _fetchDocuments.Add(1);
|
||||
|
||||
public void FetchUnchanged() => _fetchUnchanged.Add(1);
|
||||
|
||||
public void FetchFailure() => _fetchFailures.Add(1);
|
||||
|
||||
public void ParseSuccess() => _parseSuccess.Add(1);
|
||||
|
||||
public void ParseFailure() => _parseFailures.Add(1);
|
||||
|
||||
public void ParseQuarantine() => _parseQuarantine.Add(1);
|
||||
|
||||
public void MapSuccess() => _mapSuccess.Add(1);
|
||||
|
||||
public void MapFailure() => _mapFailures.Add(1);
|
||||
|
||||
public void Dispose() => _meter.Dispose();
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Concelier.Connector.Cccs.Configuration;
|
||||
using StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
public sealed class CccsFeedClient
|
||||
{
|
||||
private static readonly string[] AcceptHeaders =
|
||||
{
|
||||
"application/json",
|
||||
"application/vnd.api+json;q=0.9",
|
||||
"text/json;q=0.8",
|
||||
"application/*+json;q=0.7",
|
||||
};
|
||||
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private readonly SourceFetchService _fetchService;
|
||||
private readonly ILogger<CccsFeedClient> _logger;
|
||||
|
||||
public CccsFeedClient(SourceFetchService fetchService, ILogger<CccsFeedClient> logger)
|
||||
{
|
||||
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
internal async Task<CccsFeedResult> FetchAsync(CccsFeedEndpoint endpoint, TimeSpan requestTimeout, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(endpoint);
|
||||
if (endpoint.Uri is null)
|
||||
{
|
||||
throw new InvalidOperationException("Feed endpoint URI must be configured.");
|
||||
}
|
||||
|
||||
var request = new SourceFetchRequest(CccsOptions.HttpClientName, CccsConnectorPlugin.SourceName, endpoint.Uri)
|
||||
{
|
||||
AcceptHeaders = AcceptHeaders,
|
||||
TimeoutOverride = requestTimeout,
|
||||
Metadata = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["cccs.language"] = endpoint.Language,
|
||||
["cccs.feedUri"] = endpoint.Uri.ToString(),
|
||||
},
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var result = await _fetchService.FetchContentAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!result.IsSuccess || result.Content is null)
|
||||
{
|
||||
_logger.LogWarning("CCCS feed fetch returned no content for {Uri} (status={Status})", endpoint.Uri, result.StatusCode);
|
||||
return CccsFeedResult.Empty;
|
||||
}
|
||||
|
||||
var feedResponse = Deserialize<CccsFeedResponse>(result.Content);
|
||||
if (feedResponse is null || feedResponse.Error)
|
||||
{
|
||||
_logger.LogWarning("CCCS feed response flagged an error for {Uri}", endpoint.Uri);
|
||||
return CccsFeedResult.Empty;
|
||||
}
|
||||
|
||||
var taxonomy = await FetchTaxonomyAsync(endpoint, requestTimeout, cancellationToken).ConfigureAwait(false);
|
||||
var items = (IReadOnlyList<CccsFeedItem>)feedResponse.Response ?? Array.Empty<CccsFeedItem>();
|
||||
return new CccsFeedResult(items, taxonomy, result.LastModified);
|
||||
}
|
||||
catch (Exception ex) when (ex is JsonException or InvalidOperationException)
|
||||
{
|
||||
_logger.LogError(ex, "CCCS feed deserialization failed for {Uri}", endpoint.Uri);
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "CCCS feed fetch failed for {Uri}", endpoint.Uri);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<IReadOnlyDictionary<int, string>> FetchTaxonomyAsync(CccsFeedEndpoint endpoint, TimeSpan timeout, CancellationToken cancellationToken)
|
||||
{
|
||||
var taxonomyUri = endpoint.BuildTaxonomyUri();
|
||||
var request = new SourceFetchRequest(CccsOptions.HttpClientName, CccsConnectorPlugin.SourceName, taxonomyUri)
|
||||
{
|
||||
AcceptHeaders = AcceptHeaders,
|
||||
TimeoutOverride = timeout,
|
||||
Metadata = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["cccs.language"] = endpoint.Language,
|
||||
["cccs.taxonomyUri"] = taxonomyUri.ToString(),
|
||||
},
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var result = await _fetchService.FetchContentAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
if (!result.IsSuccess || result.Content is null)
|
||||
{
|
||||
_logger.LogDebug("CCCS taxonomy fetch returned no content for {Uri}", taxonomyUri);
|
||||
return new Dictionary<int, string>(0);
|
||||
}
|
||||
|
||||
var taxonomyResponse = Deserialize<CccsTaxonomyResponse>(result.Content);
|
||||
if (taxonomyResponse is null || taxonomyResponse.Error)
|
||||
{
|
||||
_logger.LogDebug("CCCS taxonomy response indicated error for {Uri}", taxonomyUri);
|
||||
return new Dictionary<int, string>(0);
|
||||
}
|
||||
|
||||
var map = new Dictionary<int, string>(taxonomyResponse.Response.Count);
|
||||
foreach (var item in taxonomyResponse.Response)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(item.Title))
|
||||
{
|
||||
map[item.Id] = item.Title!;
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
catch (Exception ex) when (ex is JsonException or InvalidOperationException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to deserialize CCCS taxonomy for {Uri}", taxonomyUri);
|
||||
return new Dictionary<int, string>(0);
|
||||
}
|
||||
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "CCCS taxonomy fetch failed for {Uri}", taxonomyUri);
|
||||
return new Dictionary<int, string>(0);
|
||||
}
|
||||
}
|
||||
|
||||
private static T? Deserialize<T>(byte[] content)
|
||||
=> JsonSerializer.Deserialize<T>(content, SerializerOptions);
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
internal sealed class CccsFeedResponse
|
||||
{
|
||||
[JsonPropertyName("ERROR")]
|
||||
public bool Error { get; init; }
|
||||
|
||||
[JsonPropertyName("response")]
|
||||
public List<CccsFeedItem> Response { get; init; } = new();
|
||||
}
|
||||
|
||||
internal sealed class CccsFeedItem
|
||||
{
|
||||
[JsonPropertyName("nid")]
|
||||
public int Nid { get; init; }
|
||||
|
||||
[JsonPropertyName("title")]
|
||||
public string? Title { get; init; }
|
||||
|
||||
[JsonPropertyName("uuid")]
|
||||
public string? Uuid { get; init; }
|
||||
|
||||
[JsonPropertyName("banner")]
|
||||
public string? Banner { get; init; }
|
||||
|
||||
[JsonPropertyName("lang")]
|
||||
public string? Language { get; init; }
|
||||
|
||||
[JsonPropertyName("date_modified")]
|
||||
public string? DateModified { get; init; }
|
||||
|
||||
[JsonPropertyName("date_modified_ts")]
|
||||
public string? DateModifiedTimestamp { get; init; }
|
||||
|
||||
[JsonPropertyName("date_created")]
|
||||
public string? DateCreated { get; init; }
|
||||
|
||||
[JsonPropertyName("summary")]
|
||||
public string? Summary { get; init; }
|
||||
|
||||
[JsonPropertyName("body")]
|
||||
public string[] Body { get; init; } = Array.Empty<string>();
|
||||
|
||||
[JsonPropertyName("url")]
|
||||
public string? Url { get; init; }
|
||||
|
||||
[JsonPropertyName("alert_type")]
|
||||
public JsonElement AlertType { get; init; }
|
||||
|
||||
[JsonPropertyName("serial_number")]
|
||||
public string? SerialNumber { get; init; }
|
||||
|
||||
[JsonPropertyName("subject")]
|
||||
public string? Subject { get; init; }
|
||||
|
||||
[JsonPropertyName("moderation_state")]
|
||||
public string? ModerationState { get; init; }
|
||||
|
||||
[JsonPropertyName("external_url")]
|
||||
public string? ExternalUrl { get; init; }
|
||||
}
|
||||
|
||||
internal sealed class CccsTaxonomyResponse
|
||||
{
|
||||
[JsonPropertyName("ERROR")]
|
||||
public bool Error { get; init; }
|
||||
|
||||
[JsonPropertyName("response")]
|
||||
public List<CccsTaxonomyItem> Response { get; init; } = new();
|
||||
}
|
||||
|
||||
internal sealed class CccsTaxonomyItem
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public int Id { get; init; }
|
||||
|
||||
[JsonPropertyName("title")]
|
||||
public string? Title { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record CccsFeedResult(
|
||||
IReadOnlyList<CccsFeedItem> Items,
|
||||
IReadOnlyDictionary<int, string> AlertTypes,
|
||||
DateTimeOffset? LastModifiedUtc)
|
||||
{
|
||||
public static CccsFeedResult Empty { get; } = new(
|
||||
Array.Empty<CccsFeedItem>(),
|
||||
new Dictionary<int, string>(0),
|
||||
null);
|
||||
}
|
||||
|
||||
internal static class CccsFeedResultExtensions
|
||||
{
|
||||
public static CccsFeedResult ToResult(this IReadOnlyList<CccsFeedItem> items, DateTimeOffset? lastModified, IReadOnlyDictionary<int, string> alertTypes)
|
||||
=> new(items, alertTypes, lastModified);
|
||||
}
|
||||
@@ -0,0 +1,449 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using AngleSharp.Dom;
|
||||
using AngleSharp.Html.Dom;
|
||||
using AngleSharp.Html.Parser;
|
||||
using StellaOps.Concelier.Connector.Common.Html;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
public sealed class CccsHtmlParser
|
||||
{
|
||||
private static readonly Regex SerialRegex = new(@"(?:(Number|Num[eé]ro)\s*[::]\s*)(?<id>[A-Z0-9\-\/]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex DateRegex = new(@"(?:(Date|Date de publication)\s*[::]\s*)(?<date>[A-Za-zÀ-ÿ0-9,\.\s\-]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex CveRegex = new(@"CVE-\d{4}-\d{4,}", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex CollapseWhitespaceRegex = new(@"\s+", RegexOptions.Compiled);
|
||||
|
||||
private static readonly CultureInfo[] EnglishCultures =
|
||||
{
|
||||
CultureInfo.GetCultureInfo("en-CA"),
|
||||
CultureInfo.GetCultureInfo("en-US"),
|
||||
CultureInfo.InvariantCulture,
|
||||
};
|
||||
|
||||
private static readonly CultureInfo[] FrenchCultures =
|
||||
{
|
||||
CultureInfo.GetCultureInfo("fr-CA"),
|
||||
CultureInfo.GetCultureInfo("fr-FR"),
|
||||
CultureInfo.InvariantCulture,
|
||||
};
|
||||
|
||||
private static readonly string[] ProductHeadingKeywords =
|
||||
{
|
||||
"affected",
|
||||
"produit",
|
||||
"produits",
|
||||
"produits touch",
|
||||
"produits concern",
|
||||
"mesures recommand",
|
||||
};
|
||||
|
||||
private static readonly string[] TrackingParameterPrefixes =
|
||||
{
|
||||
"utm_",
|
||||
"mc_",
|
||||
"mkt_",
|
||||
"elq",
|
||||
};
|
||||
|
||||
private readonly HtmlContentSanitizer _sanitizer;
|
||||
private readonly HtmlParser _parser;
|
||||
|
||||
public CccsHtmlParser(HtmlContentSanitizer sanitizer)
|
||||
{
|
||||
_sanitizer = sanitizer ?? throw new ArgumentNullException(nameof(sanitizer));
|
||||
_parser = new HtmlParser(new HtmlParserOptions
|
||||
{
|
||||
IsScripting = false,
|
||||
IsKeepingSourceReferences = false,
|
||||
});
|
||||
}
|
||||
|
||||
internal CccsAdvisoryDto Parse(CccsRawAdvisoryDocument raw)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(raw);
|
||||
|
||||
var baseUri = TryCreateUri(raw.CanonicalUrl);
|
||||
var document = _parser.ParseDocument(raw.BodyHtml ?? string.Empty);
|
||||
var body = document.Body ?? document.DocumentElement;
|
||||
var sanitized = _sanitizer.Sanitize(body?.InnerHtml ?? raw.BodyHtml ?? string.Empty, baseUri);
|
||||
var contentRoot = body ?? document.DocumentElement;
|
||||
|
||||
var serialNumber = !string.IsNullOrWhiteSpace(raw.SerialNumber)
|
||||
? raw.SerialNumber!.Trim()
|
||||
: ExtractSerialNumber(document) ?? raw.SourceId;
|
||||
|
||||
var published = raw.Published ?? ExtractDate(document, raw.Language) ?? raw.Modified;
|
||||
var references = ExtractReferences(contentRoot, baseUri, raw.Language);
|
||||
var products = ExtractProducts(contentRoot);
|
||||
var cveIds = ExtractCveIds(document);
|
||||
|
||||
return new CccsAdvisoryDto
|
||||
{
|
||||
SourceId = raw.SourceId,
|
||||
SerialNumber = serialNumber,
|
||||
Language = raw.Language,
|
||||
Title = raw.Title,
|
||||
Summary = CollapseWhitespace(raw.Summary),
|
||||
CanonicalUrl = raw.CanonicalUrl,
|
||||
ContentHtml = sanitized,
|
||||
Published = published,
|
||||
Modified = raw.Modified ?? published,
|
||||
AlertType = raw.AlertType,
|
||||
Subject = raw.Subject,
|
||||
Products = products,
|
||||
References = references,
|
||||
CveIds = cveIds,
|
||||
};
|
||||
}
|
||||
|
||||
private static Uri? TryCreateUri(string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return Uri.TryCreate(value, UriKind.Absolute, out var absolute) ? absolute : null;
|
||||
}
|
||||
|
||||
private static string? ExtractSerialNumber(IDocument document)
|
||||
{
|
||||
if (document.Body is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
foreach (var element in document.QuerySelectorAll("strong, p, div"))
|
||||
{
|
||||
var text = element.TextContent;
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var match = SerialRegex.Match(text);
|
||||
if (match.Success && match.Groups["id"].Success)
|
||||
{
|
||||
var value = match.Groups["id"].Value.Trim();
|
||||
if (!string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var bodyText = document.Body.TextContent;
|
||||
var fallback = SerialRegex.Match(bodyText ?? string.Empty);
|
||||
return fallback.Success && fallback.Groups["id"].Success
|
||||
? fallback.Groups["id"].Value.Trim()
|
||||
: null;
|
||||
}
|
||||
|
||||
private static DateTimeOffset? ExtractDate(IDocument document, string language)
|
||||
{
|
||||
if (document.Body is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var textSegments = new List<string>();
|
||||
foreach (var element in document.QuerySelectorAll("strong, p, div"))
|
||||
{
|
||||
var text = element.TextContent;
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var match = DateRegex.Match(text);
|
||||
if (match.Success && match.Groups["date"].Success)
|
||||
{
|
||||
textSegments.Add(match.Groups["date"].Value.Trim());
|
||||
}
|
||||
}
|
||||
|
||||
if (textSegments.Count == 0 && !string.IsNullOrWhiteSpace(document.Body.TextContent))
|
||||
{
|
||||
textSegments.Add(document.Body.TextContent);
|
||||
}
|
||||
|
||||
var cultures = language.StartsWith("fr", StringComparison.OrdinalIgnoreCase) ? FrenchCultures : EnglishCultures;
|
||||
|
||||
foreach (var segment in textSegments)
|
||||
{
|
||||
foreach (var culture in cultures)
|
||||
{
|
||||
if (DateTime.TryParse(segment, culture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var parsed))
|
||||
{
|
||||
return new DateTimeOffset(parsed.ToUniversalTime());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> ExtractProducts(IElement? root)
|
||||
{
|
||||
if (root is null)
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var results = new List<string>();
|
||||
|
||||
foreach (var heading in root.QuerySelectorAll("h1,h2,h3,h4,h5,h6"))
|
||||
{
|
||||
var text = heading.TextContent?.Trim();
|
||||
if (!IsProductHeading(text))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var sibling = heading.NextElementSibling;
|
||||
while (sibling is not null)
|
||||
{
|
||||
if (IsHeading(sibling))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (IsListElement(sibling))
|
||||
{
|
||||
AppendListItems(sibling, results);
|
||||
if (results.Count > 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (IsContentContainer(sibling))
|
||||
{
|
||||
foreach (var list in sibling.QuerySelectorAll("ul,ol"))
|
||||
{
|
||||
AppendListItems(list, results);
|
||||
}
|
||||
|
||||
if (results.Count > 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sibling = sibling.NextElementSibling;
|
||||
}
|
||||
|
||||
if (results.Count > 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (results.Count == 0)
|
||||
{
|
||||
foreach (var li in root.QuerySelectorAll("ul li,ol li"))
|
||||
{
|
||||
var itemText = CollapseWhitespace(li.TextContent);
|
||||
if (!string.IsNullOrWhiteSpace(itemText))
|
||||
{
|
||||
results.Add(itemText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results.Count == 0
|
||||
? Array.Empty<string>()
|
||||
: results
|
||||
.Where(static value => !string.IsNullOrWhiteSpace(value))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static bool IsProductHeading(string? heading)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(heading))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var lowered = heading.ToLowerInvariant();
|
||||
return ProductHeadingKeywords.Any(keyword => lowered.Contains(keyword, StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static bool IsHeading(IElement element)
|
||||
=> element.LocalName.Length == 2
|
||||
&& element.LocalName[0] == 'h'
|
||||
&& char.IsDigit(element.LocalName[1]);
|
||||
|
||||
private static bool IsListElement(IElement element)
|
||||
=> string.Equals(element.LocalName, "ul", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(element.LocalName, "ol", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static bool IsContentContainer(IElement element)
|
||||
=> string.Equals(element.LocalName, "div", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(element.LocalName, "section", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(element.LocalName, "article", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static void AppendListItems(IElement listElement, ICollection<string> buffer)
|
||||
{
|
||||
foreach (var li in listElement.QuerySelectorAll("li"))
|
||||
{
|
||||
if (li is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var clone = li.Clone(true) as IElement;
|
||||
if (clone is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var nested in clone.QuerySelectorAll("ul,ol"))
|
||||
{
|
||||
nested.Remove();
|
||||
}
|
||||
|
||||
var itemText = CollapseWhitespace(clone.TextContent);
|
||||
if (!string.IsNullOrWhiteSpace(itemText))
|
||||
{
|
||||
buffer.Add(itemText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyList<CccsReferenceDto> ExtractReferences(IElement? root, Uri? baseUri, string language)
|
||||
{
|
||||
if (root is null)
|
||||
{
|
||||
return Array.Empty<CccsReferenceDto>();
|
||||
}
|
||||
|
||||
var references = new List<CccsReferenceDto>();
|
||||
foreach (var anchor in root.QuerySelectorAll("a[href]"))
|
||||
{
|
||||
var href = anchor.GetAttribute("href");
|
||||
var normalized = NormalizeReferenceUrl(href, baseUri, language);
|
||||
if (normalized is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var label = CollapseWhitespace(anchor.TextContent);
|
||||
references.Add(new CccsReferenceDto(normalized, string.IsNullOrWhiteSpace(label) ? null : label));
|
||||
}
|
||||
|
||||
return references.Count == 0
|
||||
? Array.Empty<CccsReferenceDto>()
|
||||
: references
|
||||
.GroupBy(reference => reference.Url, StringComparer.Ordinal)
|
||||
.Select(group => group.First())
|
||||
.OrderBy(reference => reference.Url, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static string? NormalizeReferenceUrl(string? href, Uri? baseUri, string language)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(href))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!Uri.TryCreate(href, UriKind.Absolute, out var absolute))
|
||||
{
|
||||
if (baseUri is null || !Uri.TryCreate(baseUri, href, out absolute))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
var builder = new UriBuilder(absolute)
|
||||
{
|
||||
Fragment = string.Empty,
|
||||
};
|
||||
|
||||
var filteredQuery = FilterTrackingParameters(builder.Query, builder.Uri, language);
|
||||
builder.Query = filteredQuery;
|
||||
|
||||
return builder.Uri.ToString();
|
||||
}
|
||||
|
||||
private static string FilterTrackingParameters(string query, Uri uri, string language)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(query))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = query.TrimStart('?');
|
||||
if (string.IsNullOrWhiteSpace(trimmed))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var parameters = trimmed.Split('&', StringSplitOptions.RemoveEmptyEntries);
|
||||
var kept = new List<string>();
|
||||
|
||||
foreach (var parameter in parameters)
|
||||
{
|
||||
var separatorIndex = parameter.IndexOf('=');
|
||||
var key = separatorIndex >= 0 ? parameter[..separatorIndex] : parameter;
|
||||
if (TrackingParameterPrefixes.Any(prefix => key.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uri.Host.Contains("cyber.gc.ca", StringComparison.OrdinalIgnoreCase)
|
||||
&& key.Equals("lang", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
kept.Add($"lang={language}");
|
||||
continue;
|
||||
}
|
||||
|
||||
kept.Add(parameter);
|
||||
}
|
||||
|
||||
if (uri.Host.Contains("cyber.gc.ca", StringComparison.OrdinalIgnoreCase)
|
||||
&& kept.All(parameter => !parameter.StartsWith("lang=", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
kept.Add($"lang={language}");
|
||||
}
|
||||
|
||||
return kept.Count == 0 ? string.Empty : string.Join("&", kept);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> ExtractCveIds(IDocument document)
|
||||
{
|
||||
if (document.Body is null)
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var matches = CveRegex.Matches(document.Body.TextContent ?? string.Empty);
|
||||
if (matches.Count == 0)
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
return matches
|
||||
.Select(match => match.Value.ToUpperInvariant())
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.OrderBy(value => value, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static string? CollapseWhitespace(string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var collapsed = CollapseWhitespaceRegex.Replace(value, " ").Trim();
|
||||
return collapsed.Length == 0 ? null : collapsed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,151 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using StellaOps.Concelier.Models;
|
||||
using StellaOps.Concelier.Storage.Mongo.Documents;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
internal static class CccsMapper
|
||||
{
|
||||
public static Advisory Map(CccsAdvisoryDto dto, DocumentRecord document, DateTimeOffset recordedAt)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(dto);
|
||||
ArgumentNullException.ThrowIfNull(document);
|
||||
|
||||
var aliases = BuildAliases(dto);
|
||||
var references = BuildReferences(dto, recordedAt);
|
||||
var packages = BuildPackages(dto, recordedAt);
|
||||
var provenance = new[]
|
||||
{
|
||||
new AdvisoryProvenance(
|
||||
CccsConnectorPlugin.SourceName,
|
||||
"advisory",
|
||||
dto.AlertType ?? dto.SerialNumber,
|
||||
recordedAt,
|
||||
new[] { ProvenanceFieldMasks.Advisory })
|
||||
};
|
||||
|
||||
return new Advisory(
|
||||
advisoryKey: dto.SerialNumber,
|
||||
title: dto.Title,
|
||||
summary: dto.Summary,
|
||||
language: dto.Language,
|
||||
published: dto.Published ?? dto.Modified,
|
||||
modified: dto.Modified ?? dto.Published,
|
||||
severity: null,
|
||||
exploitKnown: false,
|
||||
aliases: aliases,
|
||||
references: references,
|
||||
affectedPackages: packages,
|
||||
cvssMetrics: Array.Empty<CvssMetric>(),
|
||||
provenance: provenance);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> BuildAliases(CccsAdvisoryDto dto)
|
||||
{
|
||||
var aliases = new List<string>(capacity: 4)
|
||||
{
|
||||
dto.SerialNumber,
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(dto.SourceId)
|
||||
&& !string.Equals(dto.SourceId, dto.SerialNumber, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
aliases.Add(dto.SourceId);
|
||||
}
|
||||
|
||||
foreach (var cve in dto.CveIds)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(cve))
|
||||
{
|
||||
aliases.Add(cve);
|
||||
}
|
||||
}
|
||||
|
||||
return aliases
|
||||
.Where(static alias => !string.IsNullOrWhiteSpace(alias))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.OrderBy(static alias => alias, StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static IReadOnlyList<AdvisoryReference> BuildReferences(CccsAdvisoryDto dto, DateTimeOffset recordedAt)
|
||||
{
|
||||
var references = new List<AdvisoryReference>
|
||||
{
|
||||
new(dto.CanonicalUrl, "details", "cccs", null, new AdvisoryProvenance(
|
||||
CccsConnectorPlugin.SourceName,
|
||||
"reference",
|
||||
dto.CanonicalUrl,
|
||||
recordedAt,
|
||||
new[] { ProvenanceFieldMasks.References }))
|
||||
};
|
||||
|
||||
foreach (var reference in dto.References)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(reference.Url))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
references.Add(new AdvisoryReference(
|
||||
reference.Url,
|
||||
"reference",
|
||||
"cccs",
|
||||
reference.Label,
|
||||
new AdvisoryProvenance(
|
||||
CccsConnectorPlugin.SourceName,
|
||||
"reference",
|
||||
reference.Url,
|
||||
recordedAt,
|
||||
new[] { ProvenanceFieldMasks.References })));
|
||||
}
|
||||
|
||||
return references
|
||||
.DistinctBy(static reference => reference.Url, StringComparer.Ordinal)
|
||||
.OrderBy(static reference => reference.Url, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static IReadOnlyList<AffectedPackage> BuildPackages(CccsAdvisoryDto dto, DateTimeOffset recordedAt)
|
||||
{
|
||||
if (dto.Products.Count == 0)
|
||||
{
|
||||
return Array.Empty<AffectedPackage>();
|
||||
}
|
||||
|
||||
var packages = new List<AffectedPackage>(dto.Products.Count);
|
||||
foreach (var product in dto.Products)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(product))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var identifier = product.Trim();
|
||||
var provenance = new AdvisoryProvenance(
|
||||
CccsConnectorPlugin.SourceName,
|
||||
"package",
|
||||
identifier,
|
||||
recordedAt,
|
||||
new[] { ProvenanceFieldMasks.AffectedPackages });
|
||||
|
||||
packages.Add(new AffectedPackage(
|
||||
AffectedPackageTypes.Vendor,
|
||||
identifier,
|
||||
platform: null,
|
||||
versionRanges: Array.Empty<AffectedVersionRange>(),
|
||||
statuses: Array.Empty<AffectedPackageStatus>(),
|
||||
provenance: new[] { provenance },
|
||||
normalizedVersions: Array.Empty<NormalizedVersionRule>()));
|
||||
}
|
||||
|
||||
return packages.Count == 0
|
||||
? Array.Empty<AffectedPackage>()
|
||||
: packages
|
||||
.DistinctBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
|
||||
.OrderBy(static package => package.Identifier, StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
using System;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs.Internal;
|
||||
|
||||
internal sealed record CccsRawAdvisoryDocument
|
||||
{
|
||||
[JsonPropertyName("sourceId")]
|
||||
public string SourceId { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("serialNumber")]
|
||||
public string? SerialNumber { get; init; }
|
||||
|
||||
[JsonPropertyName("uuid")]
|
||||
public string? Uuid { get; init; }
|
||||
|
||||
[JsonPropertyName("language")]
|
||||
public string Language { get; init; } = "en";
|
||||
|
||||
[JsonPropertyName("title")]
|
||||
public string Title { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("summary")]
|
||||
public string? Summary { get; init; }
|
||||
|
||||
[JsonPropertyName("canonicalUrl")]
|
||||
public string CanonicalUrl { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("externalUrl")]
|
||||
public string? ExternalUrl { get; init; }
|
||||
|
||||
[JsonPropertyName("bodyHtml")]
|
||||
public string BodyHtml { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("bodySegments")]
|
||||
public string[] BodySegments { get; init; } = Array.Empty<string>();
|
||||
|
||||
[JsonPropertyName("alertType")]
|
||||
public string? AlertType { get; init; }
|
||||
|
||||
[JsonPropertyName("subject")]
|
||||
public string? Subject { get; init; }
|
||||
|
||||
[JsonPropertyName("banner")]
|
||||
public string? Banner { get; init; }
|
||||
|
||||
[JsonPropertyName("published")]
|
||||
public DateTimeOffset? Published { get; init; }
|
||||
|
||||
[JsonPropertyName("modified")]
|
||||
public DateTimeOffset? Modified { get; init; }
|
||||
|
||||
[JsonPropertyName("rawCreated")]
|
||||
public string? RawDateCreated { get; init; }
|
||||
|
||||
[JsonPropertyName("rawModified")]
|
||||
public string? RawDateModified { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.Concelier.Core.Jobs;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Cccs;
|
||||
|
||||
internal static class CccsJobKinds
|
||||
{
|
||||
public const string Fetch = "source:cccs:fetch";
|
||||
}
|
||||
|
||||
internal sealed class CccsFetchJob : IJob
|
||||
{
|
||||
private readonly CccsConnector _connector;
|
||||
|
||||
public CccsFetchJob(CccsConnector connector)
|
||||
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
|
||||
|
||||
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
|
||||
=> _connector.FetchAsync(context.Services, cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("StellaOps.Concelier.Connector.Cccs.Tests")]
|
||||
@@ -0,0 +1,17 @@
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../../../__Libraries/StellaOps.Plugin/StellaOps.Plugin.csproj" />
|
||||
|
||||
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
|
||||
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
|
||||
<ProjectReference Include="../StellaOps.Concelier.Storage.Mongo/StellaOps.Concelier.Storage.Mongo.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,12 @@
|
||||
# TASKS
|
||||
| Task | Owner(s) | Depends on | Notes |
|
||||
|---|---|---|---|
|
||||
|FEEDCONN-CCCS-02-001 Catalogue official CCCS advisory feeds|BE-Conn-CCCS|Research|**DONE (2025-10-11)** – Resolved RSS→Atom redirects (`/api/cccs/rss/v1/get?...` → `/api/cccs/atom/v1/get?...`), confirmed feed caps at 50 entries with inline HTML bodies, no `Last-Modified`/`ETag`, and `updated` timestamps in UTC. Findings and packet captures parked in `docs/concelier-connector-research-20251011.md`; retention sweep follow-up tracked in 02-007.|
|
||||
|FEEDCONN-CCCS-02-002 Implement fetch & source state handling|BE-Conn-CCCS|Source.Common, Storage.Mongo|**DONE (2025-10-14)** – `CccsConnector.FetchAsync` now hydrates feeds via `CccsFeedClient`, persists per-entry JSON payloads with SHA256 dedupe and cursor state, throttles requests, and records taxonomy + language metadata in document state.|
|
||||
|FEEDCONN-CCCS-02-003 DTO/parser implementation|BE-Conn-CCCS|Source.Common|**DONE (2025-10-14)** – Added `CccsHtmlParser` to sanitize Atom body HTML, extract serial/date/product bullets, collapse whitespace, and emit normalized reference URLs; `ParseAsync` now persists DTO records under schema `cccs.dto.v1`.|
|
||||
|FEEDCONN-CCCS-02-004 Canonical mapping & range primitives|BE-Conn-CCCS|Models|**DONE (2025-10-14)** – `CccsMapper` now materializes canonical advisories (aliases from serial/source/CVEs, references incl. canonical URL, vendor package records) with provenance masks; `MapAsync` stores results in `AdvisoryStore`.|
|
||||
|FEEDCONN-CCCS-02-005 Deterministic fixtures & tests|QA|Testing|**DONE (2025-10-14)** – Added English/French fixtures plus parser + connector end-to-end tests (`StellaOps.Concelier.Connector.Cccs.Tests`). Canned HTTP handler + Mongo fixture enables fetch→parse→map regression; fixtures refresh via `UPDATE_CCCS_FIXTURES=1`.|
|
||||
|FEEDCONN-CCCS-02-006 Observability & documentation|DevEx|Docs|**DONE (2025-10-15)** – Added `CccsDiagnostics` meter (fetch/parse/map counters), enriched connector logs with document counts, and published `docs/ops/concelier-cccs-operations.md` covering config, telemetry, and sanitiser guidance.|
|
||||
|FEEDCONN-CCCS-02-007 Historical advisory harvesting plan|BE-Conn-CCCS|Research|**DONE (2025-10-15)** – Measured `/api/cccs/threats/v1/get` inventory (~5.1k rows/lang; earliest 2018-06-08), documented backfill workflow + language split strategy, and linked the runbook for Offline Kit execution.|
|
||||
|FEEDCONN-CCCS-02-008 Raw DOM parsing refinement|BE-Conn-CCCS|Source.Common|**DONE (2025-10-15)** – Parser now walks unsanitised DOM (heading + nested list coverage), sanitizer keeps `<h#>`/`section` nodes, and regression fixtures/tests assert EN/FR list handling + preserved HTML structure.|
|
||||
|FEEDCONN-CCCS-02-009 Normalized versions rollout (Oct 2025)|BE-Conn-CCCS|Merge coordination (`FEEDMERGE-COORD-02-900`)|**TODO (due 2025-10-21)** – Implement trailing-version split helper per Merge guidance (see `../Merge/RANGE_PRIMITIVES_COORDINATION.md` “Helper snippets”) to emit `NormalizedVersions` via `SemVerRangeRuleBuilder`; refresh mapper tests/fixtures to assert provenance notes (`cccs:{serial}:{index}`) and confirm merge counters drop.|
|
||||
Reference in New Issue
Block a user