Rename Concelier Source modules to Connector

This commit is contained in:
master
2025-10-18 20:11:18 +03:00
parent 89ede53cc3
commit 052da7a7d0
789 changed files with 1489 additions and 1489 deletions

View File

@@ -0,0 +1,28 @@
# AGENTS
## Role
CERT-In national CERT connector; enrichment advisories for India; maps CVE lists, advisory text, mitigations, and references; non-authoritative for package ranges unless explicit evidence is present.
## Scope
- Discover and fetch advisories from the CERT-In portal; window by advisory code/date; follow detail pages.
- Validate HTML or JSON; extract title, summary, CVEs, affected vendor names, mitigations; map references; normalize dates and IDs.
- Persist raw docs and maintain source_state cursor; idempotent mapping.
## Participants
- Source.Common (HTTP, HTML parsing, normalization, validators).
- Storage.Mongo (document, dto, advisory, alias, reference, source_state).
- Models (canonical).
- Core/WebService (jobs: source:certin:fetch|parse|map).
- Merge engine treats CERT-In as enrichment (no override of PSIRT or OVAL without concrete ranges).
## Interfaces & contracts
- Aliases: advisory code if stable (scheme "CERT-IN") and CVE ids; if code is not stable, store as reference only.
- References typed: bulletin/advisory/vendor/mitigation; deduped.
- Affected omitted unless CERT-In publishes explicit version or fix details.
- Provenance: method=parser; value=advisory code or URL; recordedAt.
## In/Out of scope
In: enrichment, aliasing where stable, references, mitigation text.
Out: package range authority; scraping behind auth walls.
## Observability & security expectations
- Metrics: shared `concelier.source.http.*` counters/histograms from SourceDiagnostics tagged `concelier.source=certin` capture fetch volume, parse failures, and map enrich counts.
- Logs: advisory codes, CVE counts per advisory, timing; allowlist host; redact personal data if present.
## Tests
- Author and review coverage in `../StellaOps.Concelier.Connector.CertIn.Tests`.
- Shared fixtures (e.g., `MongoIntegrationFixture`, `ConnectorTestHarness`) live in `../StellaOps.Concelier.Testing`.
- Keep fixtures deterministic; match new cases to real-world advisories or regression scenarios.

View File

@@ -0,0 +1,462 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.CertIn.Configuration;
using StellaOps.Concelier.Connector.CertIn.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertInClient _client;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertInOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertInConnector> _logger;
public CertInConnector(
CertInClient client,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertInOptions> options,
TimeProvider? timeProvider,
ILogger<CertInConnector> logger)
{
_client = client ?? throw new ArgumentNullException(nameof(client));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertInConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<CertInListingItem> listings;
try
{
listings = await _client.GetListingsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In listings fetch failed for page {Page}", page);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (listings.Count == 0)
{
break;
}
foreach (var listing in listings.OrderByDescending(static item => item.Published))
{
if (listing.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["certin.advisoryId"] = listing.AdvisoryId,
["certin.title"] = listing.Title,
["certin.link"] = listing.DetailUri.ToString(),
["certin.published"] = listing.Published.ToString("O")
};
if (!string.IsNullOrWhiteSpace(listing.Summary))
{
metadata["certin.summary"] = listing.Summary!;
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, listing.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(CertInOptions.HttpClientName, SourceName, listing.DetailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In fetch failed for {Uri}", listing.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(3), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
continue;
}
pendingDocuments.Add(result.Document.Id);
if (listing.Published > maxPublished)
{
maxPublished = listing.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("CERT-In document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (!TryDeserializeListing(document.Metadata, out var listing))
{
_logger.LogWarning("CERT-In metadata missing for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download raw CERT-In document {DocumentId}", document.Id);
throw;
}
var dto = CertInDetailParser.Parse(listing, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certin.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
CertInAdvisoryDto dto;
try
{
dto = JsonSerializer.Deserialize<CertInAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized CERT-In DTO is null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize CERT-In DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var advisory = MapAdvisory(dto, document, dtoRecord);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private Advisory MapAdvisory(CertInAdvisoryDto dto, DocumentRecord document, DtoRecord dtoRecord)
{
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", dto.AdvisoryId, dtoRecord.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
dto.AdvisoryId,
};
foreach (var cve in dto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
dto.Link,
"advisory",
"cert-in",
null,
new AdvisoryProvenance(SourceName, "reference", dto.Link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid CERT-In link {Link} for advisory {AdvisoryId}", dto.Link, dto.AdvisoryId);
}
foreach (var cve in dto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
foreach (var link in dto.ReferenceLinks)
{
try
{
references.Add(new AdvisoryReference(
link,
"reference",
null,
null,
new AdvisoryProvenance(SourceName, "reference", link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
var affectedPackages = dto.VendorNames.Select(vendor =>
{
var provenance = new AdvisoryProvenance(SourceName, "affected", vendor, dtoRecord.ValidatedAt);
var primitives = new RangePrimitives(
null,
null,
null,
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["certin.vendor"] = vendor
});
var ranges = new[]
{
new AffectedVersionRange(
rangeKind: "vendor",
introducedVersion: null,
fixedVersion: null,
lastAffectedVersion: null,
rangeExpression: null,
provenance: provenance,
primitives: primitives)
};
return new AffectedPackage(
AffectedPackageTypes.IcsVendor,
vendor,
platform: null,
versionRanges: ranges,
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: new[] { provenance });
})
.ToArray();
return new Advisory(
dto.AdvisoryId,
dto.Title,
dto.Summary ?? dto.Content,
language: "en",
published: dto.Published,
modified: dto.Published,
severity: dto.Severity,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
}
private async Task<CertInCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? CertInCursor.Empty : CertInCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(CertInCursor cursor, CancellationToken cancellationToken)
{
return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken);
}
private static bool TryDeserializeListing(IReadOnlyDictionary<string, string>? metadata, out CertInListingItem listing)
{
listing = null!;
if (metadata is null)
{
return false;
}
if (!metadata.TryGetValue("certin.advisoryId", out var advisoryId))
{
return false;
}
if (!metadata.TryGetValue("certin.title", out var title))
{
return false;
}
if (!metadata.TryGetValue("certin.link", out var link) || !Uri.TryCreate(link, UriKind.Absolute, out var detailUri))
{
return false;
}
if (!metadata.TryGetValue("certin.published", out var publishedText) || !DateTimeOffset.TryParse(publishedText, out var published))
{
return false;
}
metadata.TryGetValue("certin.summary", out var summary);
listing = new CertInListingItem(advisoryId, title, detailUri, published.ToUniversalTime(), summary);
return true;
}
}

View File

@@ -0,0 +1,19 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "cert-in";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services) => services is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return ActivatorUtilities.CreateInstance<CertInConnector>(services);
}
}

View File

@@ -0,0 +1,54 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.CertIn.Configuration;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:cert-in";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddCertInConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<CertInFetchJob>();
services.AddTransient<CertInParseJob>();
services.AddTransient<CertInMapJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, CertInJobKinds.Fetch, typeof(CertInFetchJob));
EnsureJob(options, CertInJobKinds.Parse, typeof(CertInParseJob));
EnsureJob(options, CertInJobKinds.Map, typeof(CertInMapJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,37 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertIn.Configuration;
using StellaOps.Concelier.Connector.CertIn.Internal;
using StellaOps.Concelier.Connector.Common.Http;
namespace StellaOps.Concelier.Connector.CertIn;
public static class CertInServiceCollectionExtensions
{
public static IServiceCollection AddCertInConnector(this IServiceCollection services, Action<CertInOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<CertInOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
services.AddSourceHttpClient(CertInOptions.HttpClientName, (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<CertInOptions>>().Value;
clientOptions.BaseAddress = options.AlertsEndpoint;
clientOptions.Timeout = TimeSpan.FromSeconds(30);
clientOptions.UserAgent = "StellaOps.Concelier.CertIn/1.0";
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.AlertsEndpoint.Host);
clientOptions.DefaultRequestHeaders["Accept"] = "application/json";
});
services.AddTransient<CertInClient>();
services.AddTransient<CertInConnector>();
return services;
}
}

View File

@@ -0,0 +1,68 @@
using System;
using System.Diagnostics.CodeAnalysis;
namespace StellaOps.Concelier.Connector.CertIn.Configuration;
public sealed class CertInOptions
{
public static string HttpClientName => "source.certin";
/// <summary>
/// Endpoint returning a paginated list of recent advisories.
/// </summary>
public Uri AlertsEndpoint { get; set; } = new("https://www.cert-in.org.in/api/alerts", UriKind.Absolute);
/// <summary>
/// Size of the rolling fetch window.
/// </summary>
public TimeSpan WindowSize { get; set; } = TimeSpan.FromDays(30);
/// <summary>
/// Overlap applied to subsequent windows.
/// </summary>
public TimeSpan WindowOverlap { get; set; } = TimeSpan.FromDays(2);
/// <summary>
/// Maximum pages fetched per cycle.
/// </summary>
public int MaxPagesPerFetch { get; set; } = 5;
/// <summary>
/// Delay between successive HTTP requests.
/// </summary>
public TimeSpan RequestDelay { get; set; } = TimeSpan.FromMilliseconds(500);
[MemberNotNull(nameof(AlertsEndpoint))]
public void Validate()
{
if (AlertsEndpoint is null || !AlertsEndpoint.IsAbsoluteUri)
{
throw new InvalidOperationException("AlertsEndpoint must be an absolute URI.");
}
if (WindowSize <= TimeSpan.Zero)
{
throw new InvalidOperationException("WindowSize must be greater than zero.");
}
if (WindowOverlap < TimeSpan.Zero)
{
throw new InvalidOperationException("WindowOverlap cannot be negative.");
}
if (WindowOverlap >= WindowSize)
{
throw new InvalidOperationException("WindowOverlap must be smaller than WindowSize.");
}
if (MaxPagesPerFetch <= 0)
{
throw new InvalidOperationException("MaxPagesPerFetch must be positive.");
}
if (RequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException("RequestDelay cannot be negative.");
}
}
}

View File

@@ -0,0 +1,16 @@
using System;
using System.Collections.Immutable;
namespace StellaOps.Concelier.Connector.CertIn.Internal;
internal sealed record CertInAdvisoryDto(
string AdvisoryId,
string Title,
string Link,
DateTimeOffset Published,
string? Summary,
string Content,
string? Severity,
ImmutableArray<string> CveIds,
ImmutableArray<string> VendorNames,
ImmutableArray<string> ReferenceLinks);

View File

@@ -0,0 +1,129 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.CertIn.Configuration;
namespace StellaOps.Concelier.Connector.CertIn.Internal;
public sealed class CertInClient
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly CertInOptions _options;
private readonly ILogger<CertInClient> _logger;
public CertInClient(IHttpClientFactory httpClientFactory, IOptions<CertInOptions> options, ILogger<CertInClient> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<IReadOnlyList<CertInListingItem>> GetListingsAsync(int page, CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(CertInOptions.HttpClientName);
var requestUri = BuildPageUri(_options.AlertsEndpoint, page);
using var response = await client.GetAsync(requestUri, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
var root = document.RootElement;
if (root.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning("Unexpected CERT-In alert payload shape for {Uri}", requestUri);
return Array.Empty<CertInListingItem>();
}
var items = new List<CertInListingItem>(capacity: root.GetArrayLength());
foreach (var element in root.EnumerateArray())
{
if (!TryParseListing(element, out var item))
{
continue;
}
items.Add(item);
}
return items;
}
private static bool TryParseListing(JsonElement element, out CertInListingItem item)
{
item = null!;
if (!element.TryGetProperty("advisoryId", out var idElement) || idElement.ValueKind != JsonValueKind.String)
{
return false;
}
var advisoryId = idElement.GetString();
if (string.IsNullOrWhiteSpace(advisoryId))
{
return false;
}
var title = element.TryGetProperty("title", out var titleElement) && titleElement.ValueKind == JsonValueKind.String
? titleElement.GetString()
: advisoryId;
if (!element.TryGetProperty("detailUrl", out var linkElement) || linkElement.ValueKind != JsonValueKind.String)
{
return false;
}
if (!Uri.TryCreate(linkElement.GetString(), UriKind.Absolute, out var detailUri))
{
return false;
}
DateTimeOffset published;
if (element.TryGetProperty("publishedOn", out var publishedElement) && publishedElement.ValueKind == JsonValueKind.String)
{
if (!DateTimeOffset.TryParse(publishedElement.GetString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out published))
{
return false;
}
}
else
{
return false;
}
string? summary = null;
if (element.TryGetProperty("summary", out var summaryElement) && summaryElement.ValueKind == JsonValueKind.String)
{
summary = summaryElement.GetString();
}
item = new CertInListingItem(advisoryId.Trim(), title?.Trim() ?? advisoryId.Trim(), detailUri, published.ToUniversalTime(), summary?.Trim());
return true;
}
private static Uri BuildPageUri(Uri baseUri, int page)
{
if (page <= 1)
{
return baseUri;
}
var builder = new UriBuilder(baseUri);
var trimmed = builder.Query.TrimStart('?');
var pageSegment = $"page={page.ToString(CultureInfo.InvariantCulture)}";
builder.Query = string.IsNullOrEmpty(trimmed)
? pageSegment
: $"{trimmed}&{pageSegment}";
return builder.Uri;
}
}

View File

@@ -0,0 +1,88 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
namespace StellaOps.Concelier.Connector.CertIn.Internal;
internal sealed record CertInCursor(
DateTimeOffset? LastPublished,
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings)
{
public static CertInCursor Empty { get; } = new(null, Array.Empty<Guid>(), Array.Empty<Guid>());
public BsonDocument ToBsonDocument()
{
var document = new BsonDocument
{
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
};
if (LastPublished.HasValue)
{
document["lastPublished"] = LastPublished.Value.UtcDateTime;
}
return document;
}
public static CertInCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var lastPublished = document.TryGetValue("lastPublished", out var dateValue)
? ParseDate(dateValue)
: null;
return new CertInCursor(
lastPublished,
ReadGuidArray(document, "pendingDocuments"),
ReadGuidArray(document, "pendingMappings"));
}
public CertInCursor WithLastPublished(DateTimeOffset? timestamp)
=> this with { LastPublished = timestamp };
public CertInCursor WithPendingDocuments(IEnumerable<Guid> ids)
=> this with { PendingDocuments = ids?.Distinct().ToArray() ?? Array.Empty<Guid>() };
public CertInCursor WithPendingMappings(IEnumerable<Guid> ids)
=> this with { PendingMappings = ids?.Distinct().ToArray() ?? Array.Empty<Guid>() };
private static DateTimeOffset? ParseDate(BsonValue value)
=> value.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
private static IReadOnlyCollection<Guid> ReadGuidArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
{
return Array.Empty<Guid>();
}
var results = new List<Guid>(array.Count);
foreach (var element in array)
{
if (element is null)
{
continue;
}
if (Guid.TryParse(element.ToString(), out var guid))
{
results.Add(guid);
}
}
return results;
}
}

View File

@@ -0,0 +1,187 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Connector.CertIn.Internal;
internal static class CertInDetailParser
{
private static readonly Regex CveRegex = new("CVE-\\d{4}-\\d+", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SeverityRegex = new("Severity\\s*[:\\-]\\s*(?<value>[A-Za-z ]{1,32})", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex VendorRegex = new("(?:Vendor|Organisation|Organization|Company)\\s*[:\\-]\\s*(?<value>[^\\n\\r]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex LinkRegex = new("href=\"(https?://[^\"]+)\"", RegexOptions.IgnoreCase | RegexOptions.Compiled);
public static CertInAdvisoryDto Parse(CertInListingItem listing, byte[] rawHtml)
{
ArgumentNullException.ThrowIfNull(listing);
var html = Encoding.UTF8.GetString(rawHtml);
var content = HtmlToPlainText(html);
var summary = listing.Summary ?? ExtractSummary(content);
var severity = ExtractSeverity(content);
var cves = ExtractCves(listing.Title, summary, content);
var vendors = ExtractVendors(summary, content);
var references = ExtractLinks(html);
return new CertInAdvisoryDto(
listing.AdvisoryId,
listing.Title,
listing.DetailUri.ToString(),
listing.Published,
summary,
content,
severity,
cves,
vendors,
references);
}
private static string HtmlToPlainText(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return string.Empty;
}
var withoutScripts = Regex.Replace(html, "<script[\\s\\S]*?</script>", string.Empty, RegexOptions.IgnoreCase);
var withoutStyles = Regex.Replace(withoutScripts, "<style[\\s\\S]*?</style>", string.Empty, RegexOptions.IgnoreCase);
var withoutComments = Regex.Replace(withoutStyles, "<!--.*?-->", string.Empty, RegexOptions.Singleline);
var withoutTags = Regex.Replace(withoutComments, "<[^>]+>", " ");
var decoded = System.Net.WebUtility.HtmlDecode(withoutTags);
return string.IsNullOrWhiteSpace(decoded)
? string.Empty
: Regex.Replace(decoded, "\\s+", " ").Trim();
}
private static string? ExtractSummary(string content)
{
if (string.IsNullOrWhiteSpace(content))
{
return null;
}
var sentenceTerminators = new[] { ".", "!", "?" };
foreach (var terminator in sentenceTerminators)
{
var index = content.IndexOf(terminator, StringComparison.Ordinal);
if (index > 0)
{
return content[..(index + terminator.Length)].Trim();
}
}
return content.Length > 280 ? content[..280].Trim() : content;
}
private static string? ExtractSeverity(string content)
{
var match = SeverityRegex.Match(content);
if (match.Success)
{
return match.Groups["value"].Value.Trim().ToLowerInvariant();
}
return null;
}
private static ImmutableArray<string> ExtractCves(string title, string? summary, string content)
{
var set = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
void Capture(string? text)
{
if (string.IsNullOrWhiteSpace(text))
{
return;
}
foreach (Match match in CveRegex.Matches(text))
{
if (match.Success)
{
set.Add(match.Value.ToUpperInvariant());
}
}
}
Capture(title);
Capture(summary);
Capture(content);
return set.OrderBy(static value => value, StringComparer.Ordinal).ToImmutableArray();
}
private static ImmutableArray<string> ExtractVendors(string? summary, string content)
{
var vendors = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
void Add(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return;
}
var cleaned = value
.Replace("", "'", StringComparison.Ordinal)
.Trim();
if (cleaned.Length > 200)
{
cleaned = cleaned[..200];
}
if (!string.IsNullOrWhiteSpace(cleaned))
{
vendors.Add(cleaned);
}
}
if (!string.IsNullOrWhiteSpace(summary))
{
foreach (Match match in VendorRegex.Matches(summary))
{
Add(match.Groups["value"].Value);
}
}
foreach (Match match in VendorRegex.Matches(content))
{
Add(match.Groups["value"].Value);
}
if (vendors.Count == 0 && !string.IsNullOrWhiteSpace(summary))
{
var fallback = summary.Split('.', 2, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
Add(fallback);
}
return vendors.Count == 0
? ImmutableArray<string>.Empty
: vendors.OrderBy(static value => value, StringComparer.Ordinal).ToImmutableArray();
}
private static ImmutableArray<string> ExtractLinks(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return ImmutableArray<string>.Empty;
}
var links = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (Match match in LinkRegex.Matches(html))
{
if (match.Success)
{
links.Add(match.Groups[1].Value);
}
}
return links.Count == 0
? ImmutableArray<string>.Empty
: links.OrderBy(static value => value, StringComparer.Ordinal).ToImmutableArray();
}
}

View File

@@ -0,0 +1,10 @@
using System;
namespace StellaOps.Concelier.Connector.CertIn.Internal;
public sealed record CertInListingItem(
string AdvisoryId,
string Title,
Uri DetailUri,
DateTimeOffset Published,
string? Summary);

View File

@@ -0,0 +1,46 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Concelier.Core.Jobs;
namespace StellaOps.Concelier.Connector.CertIn;
internal static class CertInJobKinds
{
public const string Fetch = "source:cert-in:fetch";
public const string Parse = "source:cert-in:parse";
public const string Map = "source:cert-in:map";
}
internal sealed class CertInFetchJob : IJob
{
private readonly CertInConnector _connector;
public CertInFetchJob(CertInConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}
internal sealed class CertInParseJob : IJob
{
private readonly CertInConnector _connector;
public CertInParseJob(CertInConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.ParseAsync(context.Services, cancellationToken);
}
internal sealed class CertInMapJob : IJob
{
private readonly CertInConnector _connector;
public CertInMapJob(CertInConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.MapAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Storage.Mongo/StellaOps.Concelier.Storage.Mongo.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,10 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|Index/detail crawler with windowing|BE-Conn-CertIn|Source.Common|**DONE** index/detail fetch implemented with window overlap and pagination.|
|Extractor (title/CVEs/mitigation)|BE-Conn-CertIn|Source.Common|**DONE** parser normalizes encodings, CVE lists, and mitigation text.|
|DTO validation and sanitizer|BE-Conn-CertIn, QA|Source.Common|**DONE** HTML sanitizer produces DTO before persistence.|
|Canonical mapping (aliases, refs)|BE-Conn-CertIn|Models|**DONE** mapper creates CERT-IN aliases plus typed references.|
|State/dedupe and fixtures|BE-Conn-CertIn, QA|Storage.Mongo|**DONE** snapshot/resume tests cover dedupe and cursor handling.|
|Mark failure/backoff on fetch errors|BE-Conn-CertIn|Storage.Mongo|**DONE** fetch pipeline marks failures/backoff with unit coverage.|
|Conditional fetch caching|BE-Conn-CertIn|Source.Common|**DONE** connector reuses ETag/Last-Modified; tests verify not-modified flow.|