Rename Concelier Source modules to Connector
This commit is contained in:
		| @@ -0,0 +1,338 @@ | ||||
| using System.Diagnostics; | ||||
| using System.Globalization; | ||||
| using System.Linq; | ||||
| using System.Net; | ||||
| using System.Net.Http; | ||||
| using System.Net.Http.Headers; | ||||
| using System.Security.Cryptography; | ||||
| using System.Text; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using Microsoft.Extensions.Options; | ||||
| using MongoDB.Bson; | ||||
| using StellaOps.Concelier.Connector.Common.Http; | ||||
| using StellaOps.Concelier.Connector.Common.Telemetry; | ||||
| using StellaOps.Concelier.Storage.Mongo; | ||||
| using StellaOps.Concelier.Storage.Mongo.Documents; | ||||
|  | ||||
| namespace StellaOps.Concelier.Connector.Common.Fetch; | ||||
|  | ||||
| /// <summary> | ||||
| /// Executes HTTP fetches for connectors, capturing raw responses with metadata for downstream stages. | ||||
| /// </summary> | ||||
| public sealed class SourceFetchService | ||||
| { | ||||
|     private static readonly string[] DefaultAcceptHeaders = { "application/json" }; | ||||
|  | ||||
|     private readonly IHttpClientFactory _httpClientFactory; | ||||
|     private readonly RawDocumentStorage _rawDocumentStorage; | ||||
|     private readonly IDocumentStore _documentStore; | ||||
|     private readonly ILogger<SourceFetchService> _logger; | ||||
|     private readonly TimeProvider _timeProvider; | ||||
|     private readonly IOptionsMonitor<SourceHttpClientOptions> _httpClientOptions; | ||||
|     private readonly IOptions<MongoStorageOptions> _storageOptions; | ||||
|     private readonly IJitterSource _jitterSource; | ||||
|  | ||||
|     public SourceFetchService( | ||||
|         IHttpClientFactory httpClientFactory, | ||||
|         RawDocumentStorage rawDocumentStorage, | ||||
|         IDocumentStore documentStore, | ||||
|         ILogger<SourceFetchService> logger, | ||||
|         IJitterSource jitterSource, | ||||
|         TimeProvider? timeProvider = null, | ||||
|         IOptionsMonitor<SourceHttpClientOptions>? httpClientOptions = null, | ||||
|         IOptions<MongoStorageOptions>? storageOptions = null) | ||||
|     { | ||||
|         _httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory)); | ||||
|         _rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage)); | ||||
|         _documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore)); | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|         _jitterSource = jitterSource ?? throw new ArgumentNullException(nameof(jitterSource)); | ||||
|         _timeProvider = timeProvider ?? TimeProvider.System; | ||||
|         _httpClientOptions = httpClientOptions ?? throw new ArgumentNullException(nameof(httpClientOptions)); | ||||
|         _storageOptions = storageOptions ?? throw new ArgumentNullException(nameof(storageOptions)); | ||||
|     } | ||||
|  | ||||
|     public async Task<SourceFetchResult> FetchAsync(SourceFetchRequest request, CancellationToken cancellationToken) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(request); | ||||
|  | ||||
|         using var activity = SourceDiagnostics.StartFetch(request.SourceName, request.RequestUri, request.Method.Method, request.ClientName); | ||||
|         var stopwatch = Stopwatch.StartNew(); | ||||
|  | ||||
|         try | ||||
|         { | ||||
|             var sendResult = await SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); | ||||
|             var response = sendResult.Response; | ||||
|  | ||||
|             using (response) | ||||
|             { | ||||
|                 var duration = stopwatch.Elapsed; | ||||
|                 activity?.SetTag("http.status_code", (int)response.StatusCode); | ||||
|                 activity?.SetTag("http.retry.count", sendResult.Attempts - 1); | ||||
|  | ||||
|                 var rateLimitRemaining = TryGetHeaderValue(response.Headers, "x-ratelimit-remaining"); | ||||
|  | ||||
|                 if (response.StatusCode == HttpStatusCode.NotModified) | ||||
|                 { | ||||
|                     _logger.LogDebug("Source {Source} returned 304 Not Modified for {Uri}", request.SourceName, request.RequestUri); | ||||
|                     SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); | ||||
|                     activity?.SetStatus(ActivityStatusCode.Ok); | ||||
|                     return SourceFetchResult.NotModified(response.StatusCode); | ||||
|                 } | ||||
|  | ||||
|                 if (!response.IsSuccessStatusCode) | ||||
|                 { | ||||
|                     var body = await ReadResponsePreviewAsync(response, cancellationToken).ConfigureAwait(false); | ||||
|                     SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); | ||||
|                     activity?.SetStatus(ActivityStatusCode.Error, body); | ||||
|                     throw new HttpRequestException($"Fetch failed with status {(int)response.StatusCode} {response.StatusCode} from {request.RequestUri}. Body preview: {body}"); | ||||
|                 } | ||||
|  | ||||
|                 var contentBytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false); | ||||
|                 var sha256 = Convert.ToHexString(SHA256.HashData(contentBytes)).ToLowerInvariant(); | ||||
|                 var fetchedAt = _timeProvider.GetUtcNow(); | ||||
|                 var contentType = response.Content.Headers.ContentType?.ToString(); | ||||
|                 var storageOptions = _storageOptions.Value; | ||||
|                 var retention = storageOptions.RawDocumentRetention; | ||||
|                 DateTimeOffset? expiresAt = null; | ||||
|                 if (retention > TimeSpan.Zero) | ||||
|                 { | ||||
|                     var grace = storageOptions.RawDocumentRetentionTtlGrace >= TimeSpan.Zero | ||||
|                         ? storageOptions.RawDocumentRetentionTtlGrace | ||||
|                         : TimeSpan.Zero; | ||||
|  | ||||
|                     try | ||||
|                     { | ||||
|                         expiresAt = fetchedAt.Add(retention).Add(grace); | ||||
|                     } | ||||
|                     catch (ArgumentOutOfRangeException) | ||||
|                     { | ||||
|                         expiresAt = DateTimeOffset.MaxValue; | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 var gridFsId = await _rawDocumentStorage.UploadAsync( | ||||
|                     request.SourceName, | ||||
|                     request.RequestUri.ToString(), | ||||
|                     contentBytes, | ||||
|                     contentType, | ||||
|                     expiresAt, | ||||
|                     cancellationToken).ConfigureAwait(false); | ||||
|  | ||||
|                 var headers = CreateHeaderDictionary(response); | ||||
|  | ||||
|                 var metadata = request.Metadata is null | ||||
|                     ? new Dictionary<string, string>(StringComparer.Ordinal) | ||||
|                     : new Dictionary<string, string>(request.Metadata, StringComparer.Ordinal); | ||||
|                 metadata["attempts"] = sendResult.Attempts.ToString(CultureInfo.InvariantCulture); | ||||
|                 metadata["fetchedAt"] = fetchedAt.ToString("O"); | ||||
|  | ||||
|                 var existing = await _documentStore.FindBySourceAndUriAsync(request.SourceName, request.RequestUri.ToString(), cancellationToken).ConfigureAwait(false); | ||||
|                 var recordId = existing?.Id ?? Guid.NewGuid(); | ||||
|  | ||||
|                 var record = new DocumentRecord( | ||||
|                     recordId, | ||||
|                     request.SourceName, | ||||
|                     request.RequestUri.ToString(), | ||||
|                     fetchedAt, | ||||
|                     sha256, | ||||
|                     DocumentStatuses.PendingParse, | ||||
|                     contentType, | ||||
|                     headers, | ||||
|                     metadata, | ||||
|                     response.Headers.ETag?.Tag, | ||||
|                     response.Content.Headers.LastModified, | ||||
|                     gridFsId, | ||||
|                     expiresAt); | ||||
|  | ||||
|                 var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false); | ||||
|                 SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, contentBytes.LongLength, rateLimitRemaining); | ||||
|                 activity?.SetStatus(ActivityStatusCode.Ok); | ||||
|                 _logger.LogInformation("Fetched {Source} document {Uri} (sha256={Sha})", request.SourceName, request.RequestUri, sha256); | ||||
|                 return SourceFetchResult.Success(upserted, response.StatusCode); | ||||
|             } | ||||
|         } | ||||
|         catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException) | ||||
|         { | ||||
|             activity?.SetStatus(ActivityStatusCode.Error, ex.Message); | ||||
|             throw; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     public async Task<SourceFetchContentResult> FetchContentAsync(SourceFetchRequest request, CancellationToken cancellationToken) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(request); | ||||
|  | ||||
|         using var activity = SourceDiagnostics.StartFetch(request.SourceName, request.RequestUri, request.Method.Method, request.ClientName); | ||||
|         var stopwatch = Stopwatch.StartNew(); | ||||
|  | ||||
|         try | ||||
|         { | ||||
|             _ = _httpClientOptions.Get(request.ClientName); | ||||
|             var sendResult = await SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); | ||||
|             var response = sendResult.Response; | ||||
|  | ||||
|             using (response) | ||||
|             { | ||||
|                 var duration = stopwatch.Elapsed; | ||||
|                 activity?.SetTag("http.status_code", (int)response.StatusCode); | ||||
|                 activity?.SetTag("http.retry.count", sendResult.Attempts - 1); | ||||
|  | ||||
|                 var rateLimitRemaining = TryGetHeaderValue(response.Headers, "x-ratelimit-remaining"); | ||||
|  | ||||
|                 if (response.StatusCode == HttpStatusCode.NotModified) | ||||
|                 { | ||||
|                     _logger.LogDebug("Source {Source} returned 304 Not Modified for {Uri}", request.SourceName, request.RequestUri); | ||||
|                     SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); | ||||
|                     activity?.SetStatus(ActivityStatusCode.Ok); | ||||
|                     return SourceFetchContentResult.NotModified(response.StatusCode, sendResult.Attempts); | ||||
|                 } | ||||
|  | ||||
|                 if (!response.IsSuccessStatusCode) | ||||
|                 { | ||||
|                     var body = await ReadResponsePreviewAsync(response, cancellationToken).ConfigureAwait(false); | ||||
|                     SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining); | ||||
|                     activity?.SetStatus(ActivityStatusCode.Error, body); | ||||
|                     throw new HttpRequestException($"Fetch failed with status {(int)response.StatusCode} {response.StatusCode} from {request.RequestUri}. Body preview: {body}"); | ||||
|                 } | ||||
|  | ||||
|                 var contentBytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false); | ||||
|                 var headers = CreateHeaderDictionary(response); | ||||
|                 SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength ?? contentBytes.LongLength, rateLimitRemaining); | ||||
|                 activity?.SetStatus(ActivityStatusCode.Ok); | ||||
|                 return SourceFetchContentResult.Success( | ||||
|                     response.StatusCode, | ||||
|                     contentBytes, | ||||
|                     response.Headers.ETag?.Tag, | ||||
|                     response.Content.Headers.LastModified, | ||||
|                     response.Content.Headers.ContentType?.ToString(), | ||||
|                     sendResult.Attempts, | ||||
|                     headers); | ||||
|             } | ||||
|         } | ||||
|         catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException) | ||||
|         { | ||||
|             activity?.SetStatus(ActivityStatusCode.Error, ex.Message); | ||||
|             throw; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private async Task<SourceFetchSendResult> SendAsync(SourceFetchRequest request, HttpCompletionOption completionOption, CancellationToken cancellationToken) | ||||
|     { | ||||
|         var attemptCount = 0; | ||||
|         var options = _httpClientOptions.Get(request.ClientName); | ||||
|  | ||||
|         var response = await SourceRetryPolicy.SendWithRetryAsync( | ||||
|             () => CreateHttpRequestMessage(request), | ||||
|             async (httpRequest, ct) => | ||||
|             { | ||||
|                 attemptCount++; | ||||
|                 var client = _httpClientFactory.CreateClient(request.ClientName); | ||||
|                 if (request.TimeoutOverride.HasValue) | ||||
|                 { | ||||
|                     client.Timeout = request.TimeoutOverride.Value; | ||||
|                 } | ||||
|  | ||||
|                 return await client.SendAsync(httpRequest, completionOption, ct).ConfigureAwait(false); | ||||
|             }, | ||||
|             maxAttempts: options.MaxAttempts, | ||||
|             baseDelay: options.BaseDelay, | ||||
|             _jitterSource, | ||||
|             context => SourceDiagnostics.RecordRetry( | ||||
|                 request.SourceName, | ||||
|                 request.ClientName, | ||||
|                 context.Response?.StatusCode, | ||||
|                 context.Attempt, | ||||
|                 context.Delay), | ||||
|             cancellationToken).ConfigureAwait(false); | ||||
|  | ||||
|         return new SourceFetchSendResult(response, attemptCount); | ||||
|     } | ||||
|  | ||||
|     internal static HttpRequestMessage CreateHttpRequestMessage(SourceFetchRequest request) | ||||
|     { | ||||
|         var httpRequest = new HttpRequestMessage(request.Method, request.RequestUri); | ||||
|         var acceptValues = request.AcceptHeaders is { Count: > 0 } headers | ||||
|             ? headers | ||||
|             : DefaultAcceptHeaders; | ||||
|  | ||||
|         httpRequest.Headers.Accept.Clear(); | ||||
|         var added = false; | ||||
|         foreach (var mediaType in acceptValues) | ||||
|         { | ||||
|             if (string.IsNullOrWhiteSpace(mediaType)) | ||||
|             { | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             if (MediaTypeWithQualityHeaderValue.TryParse(mediaType, out var headerValue)) | ||||
|             { | ||||
|                 httpRequest.Headers.Accept.Add(headerValue); | ||||
|                 added = true; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (!added) | ||||
|         { | ||||
|             httpRequest.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(DefaultAcceptHeaders[0])); | ||||
|         } | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(request.ETag)) | ||||
|         { | ||||
|             if (System.Net.Http.Headers.EntityTagHeaderValue.TryParse(request.ETag, out var etag)) | ||||
|             { | ||||
|                 httpRequest.Headers.IfNoneMatch.Add(etag); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (request.LastModified.HasValue) | ||||
|         { | ||||
|             httpRequest.Headers.IfModifiedSince = request.LastModified.Value; | ||||
|         } | ||||
|  | ||||
|         return httpRequest; | ||||
|     } | ||||
|  | ||||
|     private static async Task<string> ReadResponsePreviewAsync(HttpResponseMessage response, CancellationToken cancellationToken) | ||||
|     { | ||||
|         try | ||||
|         { | ||||
|             var buffer = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false); | ||||
|             var preview = Encoding.UTF8.GetString(buffer); | ||||
|             return preview.Length > 256 ? preview[..256] : preview; | ||||
|         } | ||||
|         catch | ||||
|         { | ||||
|             return "<unavailable>"; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static string? TryGetHeaderValue(HttpResponseHeaders headers, string name) | ||||
|     { | ||||
|         if (headers.TryGetValues(name, out var values)) | ||||
|         { | ||||
|             return values.FirstOrDefault(); | ||||
|         } | ||||
|  | ||||
|         return null; | ||||
|     } | ||||
|  | ||||
|     private static Dictionary<string, string> CreateHeaderDictionary(HttpResponseMessage response) | ||||
|     { | ||||
|         var headers = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase); | ||||
|  | ||||
|         foreach (var header in response.Headers) | ||||
|         { | ||||
|             headers[header.Key] = string.Join(",", header.Value); | ||||
|         } | ||||
|  | ||||
|         foreach (var header in response.Content.Headers) | ||||
|         { | ||||
|             headers[header.Key] = string.Join(",", header.Value); | ||||
|         } | ||||
|  | ||||
|         return headers; | ||||
|     } | ||||
|  | ||||
|     private readonly record struct SourceFetchSendResult(HttpResponseMessage Response, int Attempts); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user