358 lines
13 KiB
C#
358 lines
13 KiB
C#
using System.Diagnostics;
|
|
using System.Globalization;
|
|
using System.Net;
|
|
using System.Net.Http.Headers;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using StellaOps.Notify.Models;
|
|
using StellaOps.Notifier.Worker.Storage;
|
|
using StellaOps.Notifier.Worker.Options;
|
|
|
|
namespace StellaOps.Notifier.Worker.Channels;
|
|
|
|
/// <summary>
|
|
/// Channel adapter for generic HTTP webhook dispatch with retry policies.
|
|
/// </summary>
|
|
public sealed class WebhookChannelAdapter : IChannelAdapter
|
|
{
|
|
private readonly HttpClient _httpClient;
|
|
private readonly INotifyAuditRepository _auditRepository;
|
|
private readonly ChannelAdapterOptions _options;
|
|
private readonly ILogger<WebhookChannelAdapter> _logger;
|
|
private readonly TimeProvider _timeProvider;
|
|
private readonly Func<double> _jitterSource;
|
|
|
|
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
|
{
|
|
WriteIndented = false
|
|
};
|
|
|
|
public WebhookChannelAdapter(
|
|
HttpClient httpClient,
|
|
INotifyAuditRepository auditRepository,
|
|
IOptions<ChannelAdapterOptions> options,
|
|
TimeProvider timeProvider,
|
|
ILogger<WebhookChannelAdapter> logger,
|
|
Func<double>? jitterSource = null)
|
|
{
|
|
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
|
_auditRepository = auditRepository ?? throw new ArgumentNullException(nameof(auditRepository));
|
|
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
|
_timeProvider = timeProvider ?? TimeProvider.System;
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
_jitterSource = jitterSource ?? Random.Shared.NextDouble;
|
|
}
|
|
|
|
public NotifyChannelType ChannelType => NotifyChannelType.Webhook;
|
|
|
|
public async Task<ChannelDispatchResult> DispatchAsync(
|
|
ChannelDispatchContext context,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(context);
|
|
|
|
var endpoint = context.Channel.Config.Endpoint;
|
|
if (string.IsNullOrWhiteSpace(endpoint) || !Uri.TryCreate(endpoint, UriKind.Absolute, out var uri))
|
|
{
|
|
await AuditDispatchAsync(context, false, "Invalid endpoint configuration.", null, cancellationToken);
|
|
return ChannelDispatchResult.Failed(
|
|
"Webhook endpoint is not configured or invalid.",
|
|
ChannelDispatchStatus.InvalidConfiguration);
|
|
}
|
|
|
|
var stopwatch = Stopwatch.StartNew();
|
|
var attempt = 0;
|
|
var maxRetries = _options.MaxRetries;
|
|
Exception? lastException = null;
|
|
int? lastStatusCode = null;
|
|
|
|
while (attempt <= maxRetries)
|
|
{
|
|
attempt++;
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
try
|
|
{
|
|
using var request = BuildRequest(context, uri);
|
|
using var response = await _httpClient
|
|
.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
lastStatusCode = (int)response.StatusCode;
|
|
|
|
if (response.IsSuccessStatusCode)
|
|
{
|
|
stopwatch.Stop();
|
|
var metadata = BuildSuccessMetadata(context, response, attempt);
|
|
await AuditDispatchAsync(context, true, null, metadata, cancellationToken);
|
|
|
|
_logger.LogInformation(
|
|
"Webhook delivery {DeliveryId} succeeded to {Endpoint} on attempt {Attempt} in {Duration}ms.",
|
|
context.DeliveryId, endpoint, attempt, stopwatch.ElapsedMilliseconds);
|
|
|
|
return ChannelDispatchResult.Succeeded(
|
|
message: $"Delivered to {uri.Host} with status {response.StatusCode}.",
|
|
duration: stopwatch.Elapsed,
|
|
metadata: metadata);
|
|
}
|
|
|
|
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
|
{
|
|
var retryAfter = ParseRetryAfter(response.Headers);
|
|
stopwatch.Stop();
|
|
|
|
await AuditDispatchAsync(context, false, "Rate limited by endpoint.", null, cancellationToken);
|
|
|
|
_logger.LogWarning(
|
|
"Webhook delivery {DeliveryId} throttled by {Endpoint}. Retry after: {RetryAfter}.",
|
|
context.DeliveryId, endpoint, retryAfter);
|
|
|
|
return ChannelDispatchResult.Throttled(
|
|
$"Rate limited by {uri.Host}.",
|
|
retryAfter);
|
|
}
|
|
|
|
if (!IsRetryable(response.StatusCode))
|
|
{
|
|
stopwatch.Stop();
|
|
var errorMessage = $"Webhook returned non-retryable status {response.StatusCode}.";
|
|
await AuditDispatchAsync(context, false, errorMessage, null, cancellationToken);
|
|
|
|
_logger.LogWarning(
|
|
"Webhook delivery {DeliveryId} failed with non-retryable status {StatusCode}.",
|
|
context.DeliveryId, response.StatusCode);
|
|
|
|
return ChannelDispatchResult.Failed(
|
|
errorMessage,
|
|
httpStatusCode: lastStatusCode,
|
|
duration: stopwatch.Elapsed);
|
|
}
|
|
|
|
_logger.LogDebug(
|
|
"Webhook delivery {DeliveryId} attempt {Attempt} returned {StatusCode}, will retry.",
|
|
context.DeliveryId, attempt, response.StatusCode);
|
|
}
|
|
catch (HttpRequestException ex)
|
|
{
|
|
lastException = ex;
|
|
_logger.LogDebug(
|
|
ex,
|
|
"Webhook delivery {DeliveryId} attempt {Attempt} failed with network error.",
|
|
context.DeliveryId, attempt);
|
|
}
|
|
catch (TaskCanceledException ex) when (!cancellationToken.IsCancellationRequested)
|
|
{
|
|
lastException = ex;
|
|
_logger.LogDebug(
|
|
"Webhook delivery {DeliveryId} attempt {Attempt} timed out.",
|
|
context.DeliveryId, attempt);
|
|
}
|
|
|
|
if (attempt <= maxRetries)
|
|
{
|
|
var delay = CalculateBackoff(attempt);
|
|
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
}
|
|
|
|
stopwatch.Stop();
|
|
var finalMessage = lastException?.Message ?? $"Failed after {maxRetries + 1} attempts.";
|
|
await AuditDispatchAsync(context, false, finalMessage, null, cancellationToken);
|
|
|
|
_logger.LogError(
|
|
lastException,
|
|
"Webhook delivery {DeliveryId} exhausted all {MaxRetries} retries to {Endpoint}.",
|
|
context.DeliveryId, maxRetries + 1, endpoint);
|
|
|
|
return ChannelDispatchResult.Failed(
|
|
finalMessage,
|
|
lastException is TaskCanceledException ? ChannelDispatchStatus.Timeout : ChannelDispatchStatus.NetworkError,
|
|
httpStatusCode: lastStatusCode,
|
|
exception: lastException,
|
|
duration: stopwatch.Elapsed);
|
|
}
|
|
|
|
public async Task<ChannelHealthCheckResult> CheckHealthAsync(
|
|
NotifyChannel channel,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(channel);
|
|
|
|
var endpoint = channel.Config.Endpoint;
|
|
if (string.IsNullOrWhiteSpace(endpoint) || !Uri.TryCreate(endpoint, UriKind.Absolute, out var uri))
|
|
{
|
|
return ChannelHealthCheckResult.Unhealthy("Webhook endpoint is not configured or invalid.");
|
|
}
|
|
|
|
if (!channel.Enabled)
|
|
{
|
|
return ChannelHealthCheckResult.Degraded("Channel is disabled.");
|
|
}
|
|
|
|
var stopwatch = Stopwatch.StartNew();
|
|
try
|
|
{
|
|
using var request = new HttpRequestMessage(HttpMethod.Head, uri);
|
|
request.Headers.UserAgent.Add(new ProductInfoHeaderValue("StellaOps-Notifier", "1.0"));
|
|
|
|
using var response = await _httpClient
|
|
.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
stopwatch.Stop();
|
|
|
|
if (response.IsSuccessStatusCode || response.StatusCode == HttpStatusCode.MethodNotAllowed)
|
|
{
|
|
return ChannelHealthCheckResult.Ok(
|
|
$"Endpoint responded with {response.StatusCode}.",
|
|
stopwatch.Elapsed);
|
|
}
|
|
|
|
return ChannelHealthCheckResult.Degraded(
|
|
$"Endpoint returned {response.StatusCode}.",
|
|
stopwatch.Elapsed);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
stopwatch.Stop();
|
|
_logger.LogDebug(ex, "Webhook health check failed for channel {ChannelId}.", channel.ChannelId);
|
|
return ChannelHealthCheckResult.Unhealthy($"Connection failed: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
private HttpRequestMessage BuildRequest(ChannelDispatchContext context, Uri uri)
|
|
{
|
|
var request = new HttpRequestMessage(HttpMethod.Post, uri);
|
|
request.Content = new StringContent(context.RenderedBody, Encoding.UTF8, "application/json");
|
|
|
|
request.Headers.UserAgent.Add(new ProductInfoHeaderValue("StellaOps-Notifier", "1.0"));
|
|
request.Headers.Add("X-StellaOps-Delivery-Id", context.DeliveryId);
|
|
request.Headers.Add("X-StellaOps-Trace-Id", context.TraceId);
|
|
request.Headers.Add("X-StellaOps-Timestamp", context.Timestamp.ToString("O", CultureInfo.InvariantCulture));
|
|
|
|
if (_options.EnableHmacSigning && TryGetHmacSecret(context.Channel, out var secret))
|
|
{
|
|
var signature = ComputeHmacSignature(context.RenderedBody, secret);
|
|
request.Headers.Add("X-StellaOps-Signature", $"sha256={signature}");
|
|
}
|
|
|
|
return request;
|
|
}
|
|
|
|
private static bool TryGetHmacSecret(NotifyChannel channel, out string secret)
|
|
{
|
|
secret = string.Empty;
|
|
if (channel.Config.Properties.TryGetValue("hmacSecret", out var s) && !string.IsNullOrWhiteSpace(s))
|
|
{
|
|
secret = s;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private static string ComputeHmacSignature(string body, string secret)
|
|
{
|
|
using var hmac = new HMACSHA256(Encoding.UTF8.GetBytes(secret));
|
|
var hash = hmac.ComputeHash(Encoding.UTF8.GetBytes(body));
|
|
return Convert.ToHexStringLower(hash);
|
|
}
|
|
|
|
private static TimeSpan? ParseRetryAfter(HttpResponseHeaders headers)
|
|
{
|
|
if (headers.RetryAfter?.Delta is { } delta)
|
|
{
|
|
return delta;
|
|
}
|
|
|
|
if (headers.RetryAfter?.Date is { } date)
|
|
{
|
|
var delay = date - DateTimeOffset.UtcNow;
|
|
return delay > TimeSpan.Zero ? delay : null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static bool IsRetryable(HttpStatusCode statusCode)
|
|
{
|
|
return statusCode switch
|
|
{
|
|
HttpStatusCode.RequestTimeout => true,
|
|
HttpStatusCode.BadGateway => true,
|
|
HttpStatusCode.ServiceUnavailable => true,
|
|
HttpStatusCode.GatewayTimeout => true,
|
|
_ => false
|
|
};
|
|
}
|
|
|
|
private TimeSpan CalculateBackoff(int attempt)
|
|
{
|
|
var baseDelay = _options.RetryBaseDelay;
|
|
var maxDelay = _options.RetryMaxDelay;
|
|
var jitter = _jitterSource() * 0.3 + 0.85;
|
|
var delay = TimeSpan.FromMilliseconds(baseDelay.TotalMilliseconds * Math.Pow(2, attempt - 1) * jitter);
|
|
return delay > maxDelay ? maxDelay : delay;
|
|
}
|
|
|
|
private static Dictionary<string, string> BuildSuccessMetadata(
|
|
ChannelDispatchContext context,
|
|
HttpResponseMessage response,
|
|
int attempt)
|
|
{
|
|
return new Dictionary<string, string>
|
|
{
|
|
["endpoint"] = context.Channel.Config.Endpoint ?? string.Empty,
|
|
["statusCode"] = ((int)response.StatusCode).ToString(),
|
|
["attempt"] = attempt.ToString()
|
|
};
|
|
}
|
|
|
|
private async Task AuditDispatchAsync(
|
|
ChannelDispatchContext context,
|
|
bool success,
|
|
string? errorMessage,
|
|
IReadOnlyDictionary<string, string>? metadata,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
try
|
|
{
|
|
var auditMetadata = new Dictionary<string, string>
|
|
{
|
|
["deliveryId"] = context.DeliveryId,
|
|
["channelId"] = context.Channel.ChannelId,
|
|
["channelType"] = context.Channel.Type.ToString(),
|
|
["success"] = success.ToString().ToLowerInvariant(),
|
|
["traceId"] = context.TraceId
|
|
};
|
|
|
|
if (!string.IsNullOrWhiteSpace(errorMessage))
|
|
{
|
|
auditMetadata["error"] = errorMessage;
|
|
}
|
|
|
|
if (metadata is not null)
|
|
{
|
|
foreach (var (key, value) in metadata)
|
|
{
|
|
auditMetadata[$"dispatch.{key}"] = value;
|
|
}
|
|
}
|
|
|
|
await _auditRepository.AppendAsync(
|
|
context.TenantId,
|
|
success ? "channel.dispatch.success" : "channel.dispatch.failure",
|
|
"notifier-worker",
|
|
auditMetadata,
|
|
cancellationToken).ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to write dispatch audit for delivery {DeliveryId}.", context.DeliveryId);
|
|
}
|
|
}
|
|
}
|
|
|