up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-27 08:51:10 +02:00
parent ea970ead2a
commit c34fb7256d
126 changed files with 18553 additions and 693 deletions

View File

@@ -0,0 +1,233 @@
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.Notifier.Worker.Observability;
/// <summary>
/// Default implementation of notification metrics using System.Diagnostics.Metrics.
/// </summary>
public sealed class DefaultNotifyMetrics : INotifyMetrics
{
private static readonly ActivitySource ActivitySource = new("StellaOps.Notifier", "1.0.0");
private static readonly Meter Meter = new("StellaOps.Notifier", "1.0.0");
// Counters
private readonly Counter<long> _deliveryAttempts;
private readonly Counter<long> _escalationEvents;
private readonly Counter<long> _deadLetterEntries;
private readonly Counter<long> _ruleEvaluations;
private readonly Counter<long> _templateRenders;
private readonly Counter<long> _stormEvents;
private readonly Counter<long> _retentionCleanups;
// Histograms
private readonly Histogram<double> _deliveryDuration;
private readonly Histogram<double> _ruleEvaluationDuration;
private readonly Histogram<double> _templateRenderDuration;
// Gauges (using ObservableGauge pattern)
private readonly Dictionary<string, int> _queueDepths = new();
private readonly object _queueDepthLock = new();
public DefaultNotifyMetrics()
{
// Initialize counters
_deliveryAttempts = Meter.CreateCounter<long>(
NotifyMetricNames.DeliveryAttempts,
unit: "{attempts}",
description: "Total number of notification delivery attempts");
_escalationEvents = Meter.CreateCounter<long>(
NotifyMetricNames.EscalationEvents,
unit: "{events}",
description: "Total number of escalation events");
_deadLetterEntries = Meter.CreateCounter<long>(
NotifyMetricNames.DeadLetterEntries,
unit: "{entries}",
description: "Total number of dead-letter entries");
_ruleEvaluations = Meter.CreateCounter<long>(
NotifyMetricNames.RuleEvaluations,
unit: "{evaluations}",
description: "Total number of rule evaluations");
_templateRenders = Meter.CreateCounter<long>(
NotifyMetricNames.TemplateRenders,
unit: "{renders}",
description: "Total number of template render operations");
_stormEvents = Meter.CreateCounter<long>(
NotifyMetricNames.StormEvents,
unit: "{events}",
description: "Total number of storm detection events");
_retentionCleanups = Meter.CreateCounter<long>(
NotifyMetricNames.RetentionCleanups,
unit: "{cleanups}",
description: "Total number of retention cleanup operations");
// Initialize histograms
_deliveryDuration = Meter.CreateHistogram<double>(
NotifyMetricNames.DeliveryDuration,
unit: "ms",
description: "Duration of delivery attempts in milliseconds");
_ruleEvaluationDuration = Meter.CreateHistogram<double>(
NotifyMetricNames.RuleEvaluationDuration,
unit: "ms",
description: "Duration of rule evaluations in milliseconds");
_templateRenderDuration = Meter.CreateHistogram<double>(
NotifyMetricNames.TemplateRenderDuration,
unit: "ms",
description: "Duration of template renders in milliseconds");
// Initialize observable gauge for queue depths
Meter.CreateObservableGauge(
NotifyMetricNames.QueueDepth,
observeValues: ObserveQueueDepths,
unit: "{messages}",
description: "Current queue depth per channel");
}
public void RecordDeliveryAttempt(string tenantId, string channelType, string status, TimeSpan duration)
{
var tags = new TagList
{
{ NotifyMetricTags.TenantId, tenantId },
{ NotifyMetricTags.ChannelType, channelType },
{ NotifyMetricTags.Status, status }
};
_deliveryAttempts.Add(1, tags);
_deliveryDuration.Record(duration.TotalMilliseconds, tags);
}
public void RecordEscalation(string tenantId, int level, string outcome)
{
var tags = new TagList
{
{ NotifyMetricTags.TenantId, tenantId },
{ NotifyMetricTags.Level, level.ToString() },
{ NotifyMetricTags.Outcome, outcome }
};
_escalationEvents.Add(1, tags);
}
public void RecordDeadLetter(string tenantId, string reason, string channelType)
{
var tags = new TagList
{
{ NotifyMetricTags.TenantId, tenantId },
{ NotifyMetricTags.Reason, reason },
{ NotifyMetricTags.ChannelType, channelType }
};
_deadLetterEntries.Add(1, tags);
}
public void RecordRuleEvaluation(string tenantId, string ruleId, bool matched, TimeSpan duration)
{
var tags = new TagList
{
{ NotifyMetricTags.TenantId, tenantId },
{ NotifyMetricTags.RuleId, ruleId },
{ NotifyMetricTags.Matched, matched.ToString().ToLowerInvariant() }
};
_ruleEvaluations.Add(1, tags);
_ruleEvaluationDuration.Record(duration.TotalMilliseconds, tags);
}
public void RecordTemplateRender(string tenantId, string templateKey, bool success, TimeSpan duration)
{
var tags = new TagList
{
{ NotifyMetricTags.TenantId, tenantId },
{ NotifyMetricTags.TemplateKey, templateKey },
{ NotifyMetricTags.Success, success.ToString().ToLowerInvariant() }
};
_templateRenders.Add(1, tags);
_templateRenderDuration.Record(duration.TotalMilliseconds, tags);
}
public void RecordStormEvent(string tenantId, string stormKey, string decision)
{
var tags = new TagList
{
{ NotifyMetricTags.TenantId, tenantId },
{ NotifyMetricTags.StormKey, stormKey },
{ NotifyMetricTags.Decision, decision }
};
_stormEvents.Add(1, tags);
}
public void RecordRetentionCleanup(string tenantId, string entityType, int deletedCount)
{
var tags = new TagList
{
{ NotifyMetricTags.TenantId, tenantId },
{ NotifyMetricTags.EntityType, entityType }
};
_retentionCleanups.Add(deletedCount, tags);
}
public void RecordQueueDepth(string tenantId, string channelType, int depth)
{
var key = $"{tenantId}:{channelType}";
lock (_queueDepthLock)
{
_queueDepths[key] = depth;
}
}
public Activity? StartDeliveryActivity(string tenantId, string deliveryId, string channelType)
{
var activity = ActivitySource.StartActivity("notify.delivery", ActivityKind.Internal);
if (activity is not null)
{
activity.SetTag(NotifyMetricTags.TenantId, tenantId);
activity.SetTag("delivery_id", deliveryId);
activity.SetTag(NotifyMetricTags.ChannelType, channelType);
}
return activity;
}
public Activity? StartEscalationActivity(string tenantId, string incidentId, int level)
{
var activity = ActivitySource.StartActivity("notify.escalation", ActivityKind.Internal);
if (activity is not null)
{
activity.SetTag(NotifyMetricTags.TenantId, tenantId);
activity.SetTag("incident_id", incidentId);
activity.SetTag(NotifyMetricTags.Level, level);
}
return activity;
}
private IEnumerable<Measurement<int>> ObserveQueueDepths()
{
lock (_queueDepthLock)
{
foreach (var (key, depth) in _queueDepths)
{
var parts = key.Split(':');
if (parts.Length == 2)
{
yield return new Measurement<int>(
depth,
new TagList
{
{ NotifyMetricTags.TenantId, parts[0] },
{ NotifyMetricTags.ChannelType, parts[1] }
});
}
}
}
}
}

View File

@@ -0,0 +1,98 @@
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.Notifier.Worker.Observability;
/// <summary>
/// Interface for notification system metrics and tracing.
/// </summary>
public interface INotifyMetrics
{
/// <summary>
/// Records a notification delivery attempt.
/// </summary>
void RecordDeliveryAttempt(string tenantId, string channelType, string status, TimeSpan duration);
/// <summary>
/// Records an escalation event.
/// </summary>
void RecordEscalation(string tenantId, int level, string outcome);
/// <summary>
/// Records a dead-letter entry.
/// </summary>
void RecordDeadLetter(string tenantId, string reason, string channelType);
/// <summary>
/// Records rule evaluation.
/// </summary>
void RecordRuleEvaluation(string tenantId, string ruleId, bool matched, TimeSpan duration);
/// <summary>
/// Records template rendering.
/// </summary>
void RecordTemplateRender(string tenantId, string templateKey, bool success, TimeSpan duration);
/// <summary>
/// Records storm detection event.
/// </summary>
void RecordStormEvent(string tenantId, string stormKey, string decision);
/// <summary>
/// Records retention cleanup.
/// </summary>
void RecordRetentionCleanup(string tenantId, string entityType, int deletedCount);
/// <summary>
/// Gets the current queue depth for a channel.
/// </summary>
void RecordQueueDepth(string tenantId, string channelType, int depth);
/// <summary>
/// Creates an activity for distributed tracing.
/// </summary>
Activity? StartDeliveryActivity(string tenantId, string deliveryId, string channelType);
/// <summary>
/// Creates an activity for escalation tracing.
/// </summary>
Activity? StartEscalationActivity(string tenantId, string incidentId, int level);
}
/// <summary>
/// Metric tag names for consistency.
/// </summary>
public static class NotifyMetricTags
{
public const string TenantId = "tenant_id";
public const string ChannelType = "channel_type";
public const string Status = "status";
public const string Outcome = "outcome";
public const string Level = "level";
public const string Reason = "reason";
public const string RuleId = "rule_id";
public const string Matched = "matched";
public const string TemplateKey = "template_key";
public const string Success = "success";
public const string StormKey = "storm_key";
public const string Decision = "decision";
public const string EntityType = "entity_type";
}
/// <summary>
/// Metric names for the notification system.
/// </summary>
public static class NotifyMetricNames
{
public const string DeliveryAttempts = "notify.delivery.attempts";
public const string DeliveryDuration = "notify.delivery.duration";
public const string EscalationEvents = "notify.escalation.events";
public const string DeadLetterEntries = "notify.deadletter.entries";
public const string RuleEvaluations = "notify.rule.evaluations";
public const string RuleEvaluationDuration = "notify.rule.evaluation.duration";
public const string TemplateRenders = "notify.template.renders";
public const string TemplateRenderDuration = "notify.template.render.duration";
public const string StormEvents = "notify.storm.events";
public const string RetentionCleanups = "notify.retention.cleanups";
public const string QueueDepth = "notify.queue.depth";
}