using System.Collections.Immutable; using System.Diagnostics; using System.Globalization; using System.Linq; using System.Net; using System.Net.Http; using System.Net.Http.Headers; using System.Security.Cryptography; using System.Text; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using MongoDB.Bson; using StellaOps.Concelier.Connector.Common.Http; using StellaOps.Concelier.Connector.Common.Telemetry; using StellaOps.Concelier.Core.Aoc; using StellaOps.Concelier.Core.Linksets; using StellaOps.Concelier.RawModels; using StellaOps.Concelier.Storage.Mongo; using StellaOps.Concelier.Storage.Mongo.Documents; using System.Text.Json; namespace StellaOps.Concelier.Connector.Common.Fetch; /// /// Executes HTTP fetches for connectors, capturing raw responses with metadata for downstream stages. /// public sealed class SourceFetchService { private static readonly string[] DefaultAcceptHeaders = { "application/json" }; private readonly IHttpClientFactory _httpClientFactory; private readonly RawDocumentStorage _rawDocumentStorage; private readonly IDocumentStore _documentStore; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; private readonly IOptionsMonitor _httpClientOptions; private readonly IOptions _storageOptions; private readonly IJitterSource _jitterSource; private readonly IAdvisoryRawWriteGuard _guard; private readonly IAdvisoryLinksetMapper _linksetMapper; private readonly string _connectorVersion; public SourceFetchService( IHttpClientFactory httpClientFactory, RawDocumentStorage rawDocumentStorage, IDocumentStore documentStore, ILogger logger, IJitterSource jitterSource, IAdvisoryRawWriteGuard guard, IAdvisoryLinksetMapper linksetMapper, TimeProvider? timeProvider = null, IOptionsMonitor? httpClientOptions = null, IOptions? storageOptions = null) { _httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory)); _rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage)); _documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _jitterSource = jitterSource ?? throw new ArgumentNullException(nameof(jitterSource)); _guard = guard ?? throw new ArgumentNullException(nameof(guard)); _linksetMapper = linksetMapper ?? throw new ArgumentNullException(nameof(linksetMapper)); _timeProvider = timeProvider ?? TimeProvider.System; _httpClientOptions = httpClientOptions ?? throw new ArgumentNullException(nameof(httpClientOptions)); _storageOptions = storageOptions ?? throw new ArgumentNullException(nameof(storageOptions)); _connectorVersion = typeof(SourceFetchService).Assembly.GetName().Version?.ToString() ?? "0.0.0"; } public async Task FetchAsync(SourceFetchRequest request, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(request); using var activity = SourceDiagnostics.StartFetch(request.SourceName, request.RequestUri, request.Method.Method, request.ClientName); var stopwatch = Stopwatch.StartNew(); try { var sendResult = await SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); var response = sendResult.Response; using (response) { var duration = stopwatch.Elapsed; activity?.SetTag("http.status_code", (int)response.StatusCode); activity?.SetTag("http.retry.count", sendResult.Attempts - 1); var rateLimitRemaining = TryGetHeaderValue(response.Headers, "x-ratelimit-remaining"); if (response.StatusCode == HttpStatusCode.NotModified) { _logger.LogDebug("Source {Source} returned 304 Not Modified for {Uri}", request.SourceName, request.RequestUri); SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); activity?.SetStatus(ActivityStatusCode.Ok); return SourceFetchResult.NotModified(response.StatusCode); } if (!response.IsSuccessStatusCode) { var body = await ReadResponsePreviewAsync(response, cancellationToken).ConfigureAwait(false); SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); activity?.SetStatus(ActivityStatusCode.Error, body); throw new HttpRequestException($"Fetch failed with status {(int)response.StatusCode} {response.StatusCode} from {request.RequestUri}. Body preview: {body}"); } var contentBytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false); var contentHash = Convert.ToHexString(SHA256.HashData(contentBytes)).ToLowerInvariant(); var fetchedAt = _timeProvider.GetUtcNow(); var contentType = response.Content.Headers.ContentType?.ToString(); var headers = CreateHeaderDictionary(response); var metadata = request.Metadata is null ? new Dictionary(StringComparer.Ordinal) : new Dictionary(request.Metadata, StringComparer.Ordinal); metadata["attempts"] = sendResult.Attempts.ToString(CultureInfo.InvariantCulture); metadata["fetchedAt"] = fetchedAt.ToString("O"); var guardDocument = CreateRawAdvisoryDocument( request, response, contentBytes, contentHash, metadata, headers, fetchedAt); _guard.EnsureValid(guardDocument); var storageOptions = _storageOptions.Value; var retention = storageOptions.RawDocumentRetention; DateTimeOffset? expiresAt = null; if (retention > TimeSpan.Zero) { var grace = storageOptions.RawDocumentRetentionTtlGrace >= TimeSpan.Zero ? storageOptions.RawDocumentRetentionTtlGrace : TimeSpan.Zero; try { expiresAt = fetchedAt.Add(retention).Add(grace); } catch (ArgumentOutOfRangeException) { expiresAt = DateTimeOffset.MaxValue; } } var gridFsId = await _rawDocumentStorage.UploadAsync( request.SourceName, request.RequestUri.ToString(), contentBytes, contentType, expiresAt, cancellationToken).ConfigureAwait(false); var existing = await _documentStore.FindBySourceAndUriAsync(request.SourceName, request.RequestUri.ToString(), cancellationToken).ConfigureAwait(false); var recordId = existing?.Id ?? Guid.NewGuid(); var record = new DocumentRecord( recordId, request.SourceName, request.RequestUri.ToString(), fetchedAt, contentHash, DocumentStatuses.PendingParse, contentType, headers, metadata, response.Headers.ETag?.Tag, response.Content.Headers.LastModified, gridFsId, expiresAt); var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false); SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, contentBytes.LongLength, rateLimitRemaining); activity?.SetStatus(ActivityStatusCode.Ok); _logger.LogInformation("Fetched {Source} document {Uri} (sha256={Sha})", request.SourceName, request.RequestUri, contentHash); return SourceFetchResult.Success(upserted, response.StatusCode); } } catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException) { activity?.SetStatus(ActivityStatusCode.Error, ex.Message); throw; } } private AdvisoryRawDocument CreateRawAdvisoryDocument( SourceFetchRequest request, HttpResponseMessage response, byte[] contentBytes, string contentHash, IDictionary metadata, IDictionary headers, DateTimeOffset fetchedAt) { var tenant = _storageOptions.Value.DefaultTenant; var metadataBuilder = ImmutableDictionary.CreateBuilder(StringComparer.Ordinal); foreach (var pair in metadata) { if (!string.IsNullOrWhiteSpace(pair.Key) && pair.Value is not null) { metadataBuilder[pair.Key] = pair.Value; } } using var jsonDocument = ParseContent(request, contentBytes); var metadataSnapshot = metadataBuilder.ToImmutable(); var stream = ResolveStream(metadataSnapshot, response, request); if (!string.IsNullOrWhiteSpace(stream)) { metadataBuilder["source.stream"] = stream!; metadataSnapshot = metadataBuilder.ToImmutable(); } var vendor = ResolveVendor(request.SourceName, metadataSnapshot); metadataBuilder["source.vendor"] = vendor; metadataBuilder["source.connector_version"] = _connectorVersion; metadataSnapshot = metadataBuilder.ToImmutable(); var headerSnapshot = headers.ToImmutableDictionary( static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal); var provenance = BuildProvenance(request, response, metadataSnapshot, headerSnapshot, fetchedAt, contentHash); var upstreamId = ResolveUpstreamId(metadataSnapshot, request); var documentVersion = ResolveDocumentVersion(metadataSnapshot, response, fetchedAt); var signature = CreateSignatureMetadata(metadataSnapshot); var aliases = upstreamId is null ? ImmutableArray.Empty : ImmutableArray.Create(upstreamId); var identifiers = new RawIdentifiers(aliases, upstreamId ?? contentHash); var contentFormat = ResolveFormat(metadataSnapshot, response); var specVersion = GetMetadataValue(metadataSnapshot, "content.specVersion", "content.spec_version"); var encoding = response.Content.Headers.ContentType?.CharSet; var content = new RawContent( contentFormat, specVersion, jsonDocument.RootElement.Clone(), encoding); var source = new RawSourceMetadata( vendor, request.SourceName, _connectorVersion, stream); var upstream = new RawUpstreamMetadata( upstreamId ?? request.RequestUri.ToString(), documentVersion, fetchedAt, contentHash, signature, provenance); var supersedes = GetMetadataValue(metadataSnapshot, "supersedes"); var rawDocument = new AdvisoryRawDocument( tenant, source, upstream, content, identifiers, new RawLinkset { Aliases = aliases, PackageUrls = ImmutableArray.Empty, Cpes = ImmutableArray.Empty, References = ImmutableArray.Empty, ReconciledFrom = ImmutableArray.Empty, Notes = ImmutableDictionary.Empty }, supersedes); var mappedLinkset = _linksetMapper.Map(rawDocument); rawDocument = rawDocument with { Linkset = mappedLinkset }; ApplyRawDocumentMetadata(metadata, rawDocument); return rawDocument; } private static JsonDocument ParseContent(SourceFetchRequest request, byte[] contentBytes) { if (contentBytes is null || contentBytes.Length == 0) { throw new InvalidOperationException($"Source {request.SourceName} returned an empty payload for {request.RequestUri}."); } try { return JsonDocument.Parse(contentBytes); } catch (JsonException ex) { throw new InvalidOperationException($"Raw advisory payload from {request.SourceName} is not valid JSON ({request.RequestUri}).", ex); } } private static ImmutableDictionary BuildProvenance( SourceFetchRequest request, HttpResponseMessage response, ImmutableDictionary metadata, ImmutableDictionary headers, DateTimeOffset fetchedAt, string contentHash) { var builder = metadata.ToBuilder(); foreach (var header in headers) { var key = $"http.header.{header.Key.Trim().ToLowerInvariant()}"; builder[key] = header.Value; } builder["http.status_code"] = ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture); builder["http.method"] = request.Method.Method; builder["http.uri"] = request.RequestUri.ToString(); if (response.Headers.ETag?.Tag is { } etag) { builder["http.etag"] = etag; } if (response.Content.Headers.LastModified is { } lastModified) { builder["http.last_modified"] = lastModified.ToString("O"); } builder["fetch.fetched_at"] = fetchedAt.ToString("O"); builder["fetch.content_hash"] = contentHash; builder["source.client"] = request.ClientName; return builder.ToImmutable(); } private string ResolveVendor(string sourceName, ImmutableDictionary metadata) { if (metadata.TryGetValue("source.vendor", out var vendor) && !string.IsNullOrWhiteSpace(vendor)) { return vendor.Trim(); } return ExtractVendorIdentifier(sourceName); } private static string? ResolveStream( ImmutableDictionary metadata, HttpResponseMessage response, SourceFetchRequest request) { foreach (var key in new[] { "source.stream", "connector.stream", "stream" }) { if (metadata.TryGetValue(key, out var value) && !string.IsNullOrWhiteSpace(value)) { return value.Trim(); } } if (!string.IsNullOrWhiteSpace(response.Content.Headers.ContentType?.MediaType)) { return response.Content.Headers.ContentType!.MediaType; } return request.RequestUri.Segments.LastOrDefault()?.Trim('/'); } private static string ResolveFormat(ImmutableDictionary metadata, HttpResponseMessage response) { if (metadata.TryGetValue("content.format", out var format) && !string.IsNullOrWhiteSpace(format)) { return format.Trim(); } return response.Content.Headers.ContentType?.MediaType ?? "unknown"; } private static string? ResolveUpstreamId(ImmutableDictionary metadata, SourceFetchRequest request) { var candidateKeys = new[] { "aoc.upstream_id", "upstream.id", "upstreamId", "advisory.id", "advisoryId", "vulnerability.id", "vulnerabilityId", "cve", "cveId", "ghsa", "ghsaId", "msrc.advisoryId", "msrc.vulnerabilityId", "oracle.csaf.entryId", "ubuntu.advisoryId", "ics.advisoryId", "document.id", }; foreach (var key in candidateKeys) { if (metadata.TryGetValue(key, out var value) && !string.IsNullOrWhiteSpace(value)) { return value.Trim(); } } var segments = request.RequestUri.Segments; if (segments.Length > 0) { var last = segments[^1].Trim('/'); if (!string.IsNullOrEmpty(last)) { return last; } } return null; } private static string? ResolveDocumentVersion(ImmutableDictionary metadata, HttpResponseMessage response, DateTimeOffset fetchedAt) { var candidateKeys = new[] { "upstream.version", "document.version", "revision", "msrc.lastModified", "msrc.releaseDate", "ubuntu.version", "oracle.csaf.revision", "lastModified", "modified", "published", }; foreach (var key in candidateKeys) { if (metadata.TryGetValue(key, out var value) && !string.IsNullOrWhiteSpace(value)) { return value.Trim(); } } if (response.Content.Headers.LastModified is { } lastModified) { return lastModified.ToString("O"); } if (response.Headers.TryGetValues("Last-Modified", out var values)) { var first = values.FirstOrDefault(); if (!string.IsNullOrWhiteSpace(first)) { return first.Trim(); } } return fetchedAt.ToString("O"); } private static RawSignatureMetadata CreateSignatureMetadata(ImmutableDictionary metadata) { if (!TryGetBoolean(metadata, out var present, "upstream.signature.present", "signature.present")) { return new RawSignatureMetadata(false); } if (!present) { return new RawSignatureMetadata(false); } var format = GetMetadataValue(metadata, "upstream.signature.format", "signature.format"); var keyId = GetMetadataValue(metadata, "upstream.signature.key_id", "signature.key_id"); var signature = GetMetadataValue(metadata, "upstream.signature.sig", "signature.sig"); var certificate = GetMetadataValue(metadata, "upstream.signature.certificate", "signature.certificate"); var digest = GetMetadataValue(metadata, "upstream.signature.digest", "signature.digest"); return new RawSignatureMetadata(true, format, keyId, signature, certificate, digest); } private static bool TryGetBoolean(ImmutableDictionary metadata, out bool value, params string[] keys) { foreach (var key in keys) { if (metadata.TryGetValue(key, out var raw) && bool.TryParse(raw, out value)) { return true; } } value = default; return false; } private static string? GetMetadataValue(ImmutableDictionary metadata, params string[] keys) { foreach (var key in keys) { if (metadata.TryGetValue(key, out var value) && !string.IsNullOrWhiteSpace(value)) { return value.Trim(); } } return null; } private static string ExtractVendorIdentifier(string sourceName) { if (string.IsNullOrWhiteSpace(sourceName)) { return "unknown"; } var normalized = sourceName.Trim(); var separatorIndex = normalized.LastIndexOfAny(new[] { '.', ':' }); if (separatorIndex >= 0 && separatorIndex < normalized.Length - 1) { return normalized[(separatorIndex + 1)..]; } return normalized; } private static void ApplyRawDocumentMetadata(IDictionary metadata, AdvisoryRawDocument document) { metadata["tenant"] = document.Tenant; metadata["source.vendor"] = document.Source.Vendor; metadata["source.connector_version"] = document.Source.ConnectorVersion; if (!string.IsNullOrWhiteSpace(document.Source.Stream)) { metadata["source.stream"] = document.Source.Stream!; } metadata["upstream.upstream_id"] = document.Upstream.UpstreamId; metadata["upstream.content_hash"] = document.Upstream.ContentHash; if (!string.IsNullOrWhiteSpace(document.Upstream.DocumentVersion)) { metadata["upstream.document_version"] = document.Upstream.DocumentVersion!; } } public async Task FetchContentAsync(SourceFetchRequest request, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(request); using var activity = SourceDiagnostics.StartFetch(request.SourceName, request.RequestUri, request.Method.Method, request.ClientName); var stopwatch = Stopwatch.StartNew(); try { _ = _httpClientOptions.Get(request.ClientName); var sendResult = await SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); var response = sendResult.Response; using (response) { var duration = stopwatch.Elapsed; activity?.SetTag("http.status_code", (int)response.StatusCode); activity?.SetTag("http.retry.count", sendResult.Attempts - 1); var rateLimitRemaining = TryGetHeaderValue(response.Headers, "x-ratelimit-remaining"); if (response.StatusCode == HttpStatusCode.NotModified) { _logger.LogDebug("Source {Source} returned 304 Not Modified for {Uri}", request.SourceName, request.RequestUri); SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); activity?.SetStatus(ActivityStatusCode.Ok); return SourceFetchContentResult.NotModified(response.StatusCode, sendResult.Attempts); } if (!response.IsSuccessStatusCode) { var body = await ReadResponsePreviewAsync(response, cancellationToken).ConfigureAwait(false); SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); activity?.SetStatus(ActivityStatusCode.Error, body); throw new HttpRequestException($"Fetch failed with status {(int)response.StatusCode} {response.StatusCode} from {request.RequestUri}. Body preview: {body}"); } var contentBytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false); var headers = CreateHeaderDictionary(response); SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength ?? contentBytes.LongLength, rateLimitRemaining); activity?.SetStatus(ActivityStatusCode.Ok); return SourceFetchContentResult.Success( response.StatusCode, contentBytes, response.Headers.ETag?.Tag, response.Content.Headers.LastModified, response.Content.Headers.ContentType?.ToString(), sendResult.Attempts, headers); } } catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException) { activity?.SetStatus(ActivityStatusCode.Error, ex.Message); throw; } } private async Task SendAsync(SourceFetchRequest request, HttpCompletionOption completionOption, CancellationToken cancellationToken) { var attemptCount = 0; var options = _httpClientOptions.Get(request.ClientName); var response = await SourceRetryPolicy.SendWithRetryAsync( () => CreateHttpRequestMessage(request), async (httpRequest, ct) => { attemptCount++; var client = _httpClientFactory.CreateClient(request.ClientName); if (request.TimeoutOverride.HasValue) { client.Timeout = request.TimeoutOverride.Value; } return await client.SendAsync(httpRequest, completionOption, ct).ConfigureAwait(false); }, maxAttempts: options.MaxAttempts, baseDelay: options.BaseDelay, _jitterSource, context => SourceDiagnostics.RecordRetry( request.SourceName, request.ClientName, context.Response?.StatusCode, context.Attempt, context.Delay), cancellationToken).ConfigureAwait(false); return new SourceFetchSendResult(response, attemptCount); } internal static HttpRequestMessage CreateHttpRequestMessage(SourceFetchRequest request) { var httpRequest = new HttpRequestMessage(request.Method, request.RequestUri); var acceptValues = request.AcceptHeaders is { Count: > 0 } headers ? headers : DefaultAcceptHeaders; httpRequest.Headers.Accept.Clear(); var added = false; foreach (var mediaType in acceptValues) { if (string.IsNullOrWhiteSpace(mediaType)) { continue; } if (MediaTypeWithQualityHeaderValue.TryParse(mediaType, out var headerValue)) { httpRequest.Headers.Accept.Add(headerValue); added = true; } } if (!added) { httpRequest.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(DefaultAcceptHeaders[0])); } if (!string.IsNullOrWhiteSpace(request.ETag)) { if (System.Net.Http.Headers.EntityTagHeaderValue.TryParse(request.ETag, out var etag)) { httpRequest.Headers.IfNoneMatch.Add(etag); } } if (request.LastModified.HasValue) { httpRequest.Headers.IfModifiedSince = request.LastModified.Value; } return httpRequest; } private static async Task ReadResponsePreviewAsync(HttpResponseMessage response, CancellationToken cancellationToken) { try { var buffer = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false); var preview = Encoding.UTF8.GetString(buffer); return preview.Length > 256 ? preview[..256] : preview; } catch { return ""; } } private static string? TryGetHeaderValue(HttpResponseHeaders headers, string name) { if (headers.TryGetValues(name, out var values)) { return values.FirstOrDefault(); } return null; } private static Dictionary CreateHeaderDictionary(HttpResponseMessage response) { var headers = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (var header in response.Headers) { headers[header.Key] = string.Join(",", header.Value); } foreach (var header in response.Content.Headers) { headers[header.Key] = string.Join(",", header.Value); } return headers; } private readonly record struct SourceFetchSendResult(HttpResponseMessage Response, int Attempts); }