using System; using System.Collections.Generic; using System.Linq; using System.Text.Json; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using MongoDB.Bson; using StellaOps.Concelier.Models; using StellaOps.Concelier.Connector.CertIn.Configuration; using StellaOps.Concelier.Connector.CertIn.Internal; using StellaOps.Concelier.Connector.Common; using StellaOps.Concelier.Connector.Common.Fetch; using StellaOps.Concelier.Storage.Mongo; using StellaOps.Concelier.Storage.Mongo.Advisories; using StellaOps.Concelier.Storage.Mongo.Documents; using StellaOps.Concelier.Storage.Mongo.Dtos; using StellaOps.Plugin; namespace StellaOps.Concelier.Connector.CertIn; public sealed class CertInConnector : IFeedConnector { private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General) { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, WriteIndented = false, DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull, }; private readonly CertInClient _client; private readonly SourceFetchService _fetchService; private readonly RawDocumentStorage _rawDocumentStorage; private readonly IDocumentStore _documentStore; private readonly IDtoStore _dtoStore; private readonly IAdvisoryStore _advisoryStore; private readonly ISourceStateRepository _stateRepository; private readonly CertInOptions _options; private readonly TimeProvider _timeProvider; private readonly ILogger _logger; public CertInConnector( CertInClient client, SourceFetchService fetchService, RawDocumentStorage rawDocumentStorage, IDocumentStore documentStore, IDtoStore dtoStore, IAdvisoryStore advisoryStore, ISourceStateRepository stateRepository, IOptions options, TimeProvider? timeProvider, ILogger logger) { _client = client ?? throw new ArgumentNullException(nameof(client)); _fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService)); _rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage)); _documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore)); _dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore)); _advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore)); _stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository)); _options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options)); _options.Validate(); _timeProvider = timeProvider ?? TimeProvider.System; _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public string SourceName => CertInConnectorPlugin.SourceName; public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(services); var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); var now = _timeProvider.GetUtcNow(); var windowStart = cursor.LastPublished.HasValue ? cursor.LastPublished.Value - _options.WindowOverlap : now - _options.WindowSize; var pendingDocuments = cursor.PendingDocuments.ToHashSet(); var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue; for (var page = 1; page <= _options.MaxPagesPerFetch; page++) { IReadOnlyList listings; try { listings = await _client.GetListingsAsync(page, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _logger.LogError(ex, "CERT-In listings fetch failed for page {Page}", page); await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false); throw; } if (listings.Count == 0) { break; } foreach (var listing in listings.OrderByDescending(static item => item.Published)) { if (listing.Published < windowStart) { page = _options.MaxPagesPerFetch + 1; break; } var metadata = new Dictionary(StringComparer.Ordinal) { ["certin.advisoryId"] = listing.AdvisoryId, ["certin.title"] = listing.Title, ["certin.link"] = listing.DetailUri.ToString(), ["certin.published"] = listing.Published.ToString("O") }; if (!string.IsNullOrWhiteSpace(listing.Summary)) { metadata["certin.summary"] = listing.Summary!; } var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, listing.DetailUri.ToString(), cancellationToken).ConfigureAwait(false); SourceFetchResult result; try { result = await _fetchService.FetchAsync( new SourceFetchRequest(CertInOptions.HttpClientName, SourceName, listing.DetailUri) { Metadata = metadata, ETag = existing?.Etag, LastModified = existing?.LastModified, AcceptHeaders = new[] { "text/html", "application/xhtml+xml", "text/plain;q=0.5" }, }, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _logger.LogError(ex, "CERT-In fetch failed for {Uri}", listing.DetailUri); await _stateRepository.MarkFailureAsync(SourceName, _timeProvider.GetUtcNow(), TimeSpan.FromMinutes(3), ex.Message, cancellationToken).ConfigureAwait(false); throw; } if (!result.IsSuccess || result.Document is null) { continue; } if (existing is not null && string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase) && string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal)) { await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false); continue; } pendingDocuments.Add(result.Document.Id); if (listing.Published > maxPublished) { maxPublished = listing.Published; } if (_options.RequestDelay > TimeSpan.Zero) { await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false); } } } var updatedCursor = cursor .WithPendingDocuments(pendingDocuments) .WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished); await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(services); var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); if (cursor.PendingDocuments.Count == 0) { return; } var remainingDocuments = cursor.PendingDocuments.ToList(); var pendingMappings = cursor.PendingMappings.ToList(); foreach (var documentId in cursor.PendingDocuments) { cancellationToken.ThrowIfCancellationRequested(); var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false); if (document is null) { remainingDocuments.Remove(documentId); continue; } if (!document.GridFsId.HasValue) { _logger.LogWarning("CERT-In document {DocumentId} missing GridFS payload", document.Id); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); remainingDocuments.Remove(documentId); continue; } if (!TryDeserializeListing(document.Metadata, out var listing)) { _logger.LogWarning("CERT-In metadata missing for {DocumentId}", document.Id); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); remainingDocuments.Remove(documentId); continue; } byte[] rawBytes; try { rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _logger.LogError(ex, "Failed to download raw CERT-In document {DocumentId}", document.Id); throw; } var dto = CertInDetailParser.Parse(listing, rawBytes); var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions)); var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "certin.v1", payload, _timeProvider.GetUtcNow()); await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false); remainingDocuments.Remove(documentId); if (!pendingMappings.Contains(documentId)) { pendingMappings.Add(documentId); } } var updatedCursor = cursor .WithPendingDocuments(remainingDocuments) .WithPendingMappings(pendingMappings); await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(services); var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false); if (cursor.PendingMappings.Count == 0) { return; } var pendingMappings = cursor.PendingMappings.ToList(); foreach (var documentId in cursor.PendingMappings) { cancellationToken.ThrowIfCancellationRequested(); var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false); var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false); if (dtoRecord is null || document is null) { pendingMappings.Remove(documentId); continue; } var dtoJson = dtoRecord.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings { OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson, }); CertInAdvisoryDto dto; try { dto = JsonSerializer.Deserialize(dtoJson, SerializerOptions) ?? throw new InvalidOperationException("Deserialized CERT-In DTO is null."); } catch (Exception ex) { _logger.LogError(ex, "Failed to deserialize CERT-In DTO for {DocumentId}", document.Id); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false); pendingMappings.Remove(documentId); continue; } var advisory = MapAdvisory(dto, document, dtoRecord); await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false); await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false); pendingMappings.Remove(documentId); } var updatedCursor = cursor.WithPendingMappings(pendingMappings); await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false); } private Advisory MapAdvisory(CertInAdvisoryDto dto, DocumentRecord document, DtoRecord dtoRecord) { var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt); var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", dto.AdvisoryId, dtoRecord.ValidatedAt); var aliases = new HashSet(StringComparer.OrdinalIgnoreCase) { dto.AdvisoryId, }; foreach (var cve in dto.CveIds) { aliases.Add(cve); } var references = new List(); try { references.Add(new AdvisoryReference( dto.Link, "advisory", "cert-in", null, new AdvisoryProvenance(SourceName, "reference", dto.Link, dtoRecord.ValidatedAt))); } catch (ArgumentException) { _logger.LogWarning("Invalid CERT-In link {Link} for advisory {AdvisoryId}", dto.Link, dto.AdvisoryId); } foreach (var cve in dto.CveIds) { var url = $"https://www.cve.org/CVERecord?id={cve}"; try { references.Add(new AdvisoryReference( url, "advisory", cve, null, new AdvisoryProvenance(SourceName, "reference", url, dtoRecord.ValidatedAt))); } catch (ArgumentException) { // ignore invalid urls } } foreach (var link in dto.ReferenceLinks) { try { references.Add(new AdvisoryReference( link, "reference", null, null, new AdvisoryProvenance(SourceName, "reference", link, dtoRecord.ValidatedAt))); } catch (ArgumentException) { // ignore invalid urls } } var affectedPackages = dto.VendorNames.Select(vendor => { var provenance = new AdvisoryProvenance(SourceName, "affected", vendor, dtoRecord.ValidatedAt); var primitives = new RangePrimitives( null, null, null, new Dictionary(StringComparer.OrdinalIgnoreCase) { ["certin.vendor"] = vendor }); var ranges = new[] { new AffectedVersionRange( rangeKind: "vendor", introducedVersion: null, fixedVersion: null, lastAffectedVersion: null, rangeExpression: null, provenance: provenance, primitives: primitives) }; return new AffectedPackage( AffectedPackageTypes.IcsVendor, vendor, platform: null, versionRanges: ranges, statuses: Array.Empty(), provenance: new[] { provenance }); }) .ToArray(); return new Advisory( dto.AdvisoryId, dto.Title, dto.Summary ?? dto.Content, language: "en", published: dto.Published, modified: dto.Published, severity: dto.Severity, exploitKnown: false, aliases: aliases, references: references, affectedPackages: affectedPackages, cvssMetrics: Array.Empty(), provenance: new[] { fetchProvenance, mappingProvenance }); } private async Task GetCursorAsync(CancellationToken cancellationToken) { var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false); return state is null ? CertInCursor.Empty : CertInCursor.FromBson(state.Cursor); } private Task UpdateCursorAsync(CertInCursor cursor, CancellationToken cancellationToken) { return _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken); } private static bool TryDeserializeListing(IReadOnlyDictionary? metadata, out CertInListingItem listing) { listing = null!; if (metadata is null) { return false; } if (!metadata.TryGetValue("certin.advisoryId", out var advisoryId)) { return false; } if (!metadata.TryGetValue("certin.title", out var title)) { return false; } if (!metadata.TryGetValue("certin.link", out var link) || !Uri.TryCreate(link, UriKind.Absolute, out var detailUri)) { return false; } if (!metadata.TryGetValue("certin.published", out var publishedText) || !DateTimeOffset.TryParse(publishedText, out var published)) { return false; } metadata.TryGetValue("certin.summary", out var summary); listing = new CertInListingItem(advisoryId, title, detailUri, published.ToUniversalTime(), summary); return true; } }