Rename Concelier Source modules to Connector

This commit is contained in:
master
2025-10-18 20:11:18 +03:00
parent 89ede53cc3
commit 052da7a7d0
789 changed files with 1489 additions and 1489 deletions

View File

@@ -0,0 +1,462 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.CertIn.Configuration;
using StellaOps.Concelier.Connector.CertIn.Internal;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.CertIn;
public sealed class CertInConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly CertInClient _client;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly CertInOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<CertInConnector> _logger;
public CertInConnector(
CertInClient client,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<CertInOptions> options,
TimeProvider? timeProvider,
ILogger<CertInConnector> logger)
{
_client = client ?? throw new ArgumentNullException(nameof(client));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => CertInConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<CertInListingItem> listings;
try
{
listings = await _client.GetListingsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In listings fetch failed for page {Page}", page);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (listings.Count == 0)
{
break;
}
foreach (var listing in listings.OrderByDescending(static item => item.Published))
{
if (listing.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["certin.advisoryId"] = listing.AdvisoryId,
["certin.title"] = listing.Title,
["certin.link"] = listing.DetailUri.ToString(),
["certin.published"] = listing.Published.ToString("O")
};
if (!string.IsNullOrWhiteSpace(listing.Summary))
{
metadata["certin.summary"] = listing.Summary!;
}
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, listing.DetailUri.ToString(), cancellationToken).ConfigureAwait(false);
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(
new SourceFetchRequest(CertInOptions.HttpClientName, SourceName, listing.DetailUri)
{
Metadata = metadata,
ETag = existing?.Etag,
LastModified = existing?.LastModified,
AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" },
},
cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "CERT-In fetch failed for {Uri}", listing.DetailUri);
await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(3), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
continue;
}
pendingDocuments.Add(result.Document.Id);
if (listing.Published > maxPublished)
{
maxPublished = listing.Published;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("CERT-In document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (!TryDeserializeListing(document.Metadata, out var listing))
{
_logger.LogWarning("CERT-In metadata missing for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download raw CERT-In document {DocumentId}", document.Id);
throw;
}
var dto = CertInDetailParser.Parse(listing, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certin.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
CertInAdvisoryDto dto;
try
{
dto = JsonSerializer.Deserialize<CertInAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized CERT-In DTO is null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize CERT-In DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var advisory = MapAdvisory(dto, document, dtoRecord);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private Advisory MapAdvisory(CertInAdvisoryDto dto, DocumentRecord document, DtoRecord dtoRecord)
{
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", dto.AdvisoryId, dtoRecord.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
dto.AdvisoryId,
};
foreach (var cve in dto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
dto.Link,
"advisory",
"cert-in",
null,
new AdvisoryProvenance(SourceName, "reference", dto.Link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid CERT-In link {Link} for advisory {AdvisoryId}", dto.Link, dto.AdvisoryId);
}
foreach (var cve in dto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
foreach (var link in dto.ReferenceLinks)
{
try
{
references.Add(new AdvisoryReference(
link,
"reference",
null,
null,
new AdvisoryProvenance(SourceName, "reference", link, dtoRecord.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore invalid urls
}
}
var affectedPackages = dto.VendorNames.Select(vendor =>
{
var provenance = new AdvisoryProvenance(SourceName, "affected", vendor, dtoRecord.ValidatedAt);
var primitives = new RangePrimitives(
null,
null,
null,
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["certin.vendor"] = vendor
});
var ranges = new[]
{
new AffectedVersionRange(
rangeKind: "vendor",
introducedVersion: null,
fixedVersion: null,
lastAffectedVersion: null,
rangeExpression: null,
provenance: provenance,
primitives: primitives)
};
return new AffectedPackage(
AffectedPackageTypes.IcsVendor,
vendor,
platform: null,
versionRanges: ranges,
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: new[] { provenance });
})
.ToArray();
return new Advisory(
dto.AdvisoryId,
dto.Title,
dto.Summary ?? dto.Content,
language: "en",
published: dto.Published,
modified: dto.Published,
severity: dto.Severity,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
}
private async Task<CertInCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? CertInCursor.Empty : CertInCursor.FromBson(state.Cursor);
}
private Task UpdateCursorAsync(CertInCursor cursor, CancellationToken cancellationToken)
{
return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken);
}
private static bool TryDeserializeListing(IReadOnlyDictionary<string, string>? metadata, out CertInListingItem listing)
{
listing = null!;
if (metadata is null)
{
return false;
}
if (!metadata.TryGetValue("certin.advisoryId", out var advisoryId))
{
return false;
}
if (!metadata.TryGetValue("certin.title", out var title))
{
return false;
}
if (!metadata.TryGetValue("certin.link", out var link) || !Uri.TryCreate(link, UriKind.Absolute, out var detailUri))
{
return false;
}
if (!metadata.TryGetValue("certin.published", out var publishedText) || !DateTimeOffset.TryParse(publishedText, out var published))
{
return false;
}
metadata.TryGetValue("certin.summary", out var summary);
listing = new CertInListingItem(advisoryId, title, detailUri, published.ToUniversalTime(), summary);
return true;
}
}