up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,233 @@
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Notifier.Worker.Observability;
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of notification metrics using System.Diagnostics.Metrics.
|
||||
/// </summary>
|
||||
public sealed class DefaultNotifyMetrics : INotifyMetrics
|
||||
{
|
||||
private static readonly ActivitySource ActivitySource = new("StellaOps.Notifier", "1.0.0");
|
||||
private static readonly Meter Meter = new("StellaOps.Notifier", "1.0.0");
|
||||
|
||||
// Counters
|
||||
private readonly Counter<long> _deliveryAttempts;
|
||||
private readonly Counter<long> _escalationEvents;
|
||||
private readonly Counter<long> _deadLetterEntries;
|
||||
private readonly Counter<long> _ruleEvaluations;
|
||||
private readonly Counter<long> _templateRenders;
|
||||
private readonly Counter<long> _stormEvents;
|
||||
private readonly Counter<long> _retentionCleanups;
|
||||
|
||||
// Histograms
|
||||
private readonly Histogram<double> _deliveryDuration;
|
||||
private readonly Histogram<double> _ruleEvaluationDuration;
|
||||
private readonly Histogram<double> _templateRenderDuration;
|
||||
|
||||
// Gauges (using ObservableGauge pattern)
|
||||
private readonly Dictionary<string, int> _queueDepths = new();
|
||||
private readonly object _queueDepthLock = new();
|
||||
|
||||
public DefaultNotifyMetrics()
|
||||
{
|
||||
// Initialize counters
|
||||
_deliveryAttempts = Meter.CreateCounter<long>(
|
||||
NotifyMetricNames.DeliveryAttempts,
|
||||
unit: "{attempts}",
|
||||
description: "Total number of notification delivery attempts");
|
||||
|
||||
_escalationEvents = Meter.CreateCounter<long>(
|
||||
NotifyMetricNames.EscalationEvents,
|
||||
unit: "{events}",
|
||||
description: "Total number of escalation events");
|
||||
|
||||
_deadLetterEntries = Meter.CreateCounter<long>(
|
||||
NotifyMetricNames.DeadLetterEntries,
|
||||
unit: "{entries}",
|
||||
description: "Total number of dead-letter entries");
|
||||
|
||||
_ruleEvaluations = Meter.CreateCounter<long>(
|
||||
NotifyMetricNames.RuleEvaluations,
|
||||
unit: "{evaluations}",
|
||||
description: "Total number of rule evaluations");
|
||||
|
||||
_templateRenders = Meter.CreateCounter<long>(
|
||||
NotifyMetricNames.TemplateRenders,
|
||||
unit: "{renders}",
|
||||
description: "Total number of template render operations");
|
||||
|
||||
_stormEvents = Meter.CreateCounter<long>(
|
||||
NotifyMetricNames.StormEvents,
|
||||
unit: "{events}",
|
||||
description: "Total number of storm detection events");
|
||||
|
||||
_retentionCleanups = Meter.CreateCounter<long>(
|
||||
NotifyMetricNames.RetentionCleanups,
|
||||
unit: "{cleanups}",
|
||||
description: "Total number of retention cleanup operations");
|
||||
|
||||
// Initialize histograms
|
||||
_deliveryDuration = Meter.CreateHistogram<double>(
|
||||
NotifyMetricNames.DeliveryDuration,
|
||||
unit: "ms",
|
||||
description: "Duration of delivery attempts in milliseconds");
|
||||
|
||||
_ruleEvaluationDuration = Meter.CreateHistogram<double>(
|
||||
NotifyMetricNames.RuleEvaluationDuration,
|
||||
unit: "ms",
|
||||
description: "Duration of rule evaluations in milliseconds");
|
||||
|
||||
_templateRenderDuration = Meter.CreateHistogram<double>(
|
||||
NotifyMetricNames.TemplateRenderDuration,
|
||||
unit: "ms",
|
||||
description: "Duration of template renders in milliseconds");
|
||||
|
||||
// Initialize observable gauge for queue depths
|
||||
Meter.CreateObservableGauge(
|
||||
NotifyMetricNames.QueueDepth,
|
||||
observeValues: ObserveQueueDepths,
|
||||
unit: "{messages}",
|
||||
description: "Current queue depth per channel");
|
||||
}
|
||||
|
||||
public void RecordDeliveryAttempt(string tenantId, string channelType, string status, TimeSpan duration)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, tenantId },
|
||||
{ NotifyMetricTags.ChannelType, channelType },
|
||||
{ NotifyMetricTags.Status, status }
|
||||
};
|
||||
|
||||
_deliveryAttempts.Add(1, tags);
|
||||
_deliveryDuration.Record(duration.TotalMilliseconds, tags);
|
||||
}
|
||||
|
||||
public void RecordEscalation(string tenantId, int level, string outcome)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, tenantId },
|
||||
{ NotifyMetricTags.Level, level.ToString() },
|
||||
{ NotifyMetricTags.Outcome, outcome }
|
||||
};
|
||||
|
||||
_escalationEvents.Add(1, tags);
|
||||
}
|
||||
|
||||
public void RecordDeadLetter(string tenantId, string reason, string channelType)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, tenantId },
|
||||
{ NotifyMetricTags.Reason, reason },
|
||||
{ NotifyMetricTags.ChannelType, channelType }
|
||||
};
|
||||
|
||||
_deadLetterEntries.Add(1, tags);
|
||||
}
|
||||
|
||||
public void RecordRuleEvaluation(string tenantId, string ruleId, bool matched, TimeSpan duration)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, tenantId },
|
||||
{ NotifyMetricTags.RuleId, ruleId },
|
||||
{ NotifyMetricTags.Matched, matched.ToString().ToLowerInvariant() }
|
||||
};
|
||||
|
||||
_ruleEvaluations.Add(1, tags);
|
||||
_ruleEvaluationDuration.Record(duration.TotalMilliseconds, tags);
|
||||
}
|
||||
|
||||
public void RecordTemplateRender(string tenantId, string templateKey, bool success, TimeSpan duration)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, tenantId },
|
||||
{ NotifyMetricTags.TemplateKey, templateKey },
|
||||
{ NotifyMetricTags.Success, success.ToString().ToLowerInvariant() }
|
||||
};
|
||||
|
||||
_templateRenders.Add(1, tags);
|
||||
_templateRenderDuration.Record(duration.TotalMilliseconds, tags);
|
||||
}
|
||||
|
||||
public void RecordStormEvent(string tenantId, string stormKey, string decision)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, tenantId },
|
||||
{ NotifyMetricTags.StormKey, stormKey },
|
||||
{ NotifyMetricTags.Decision, decision }
|
||||
};
|
||||
|
||||
_stormEvents.Add(1, tags);
|
||||
}
|
||||
|
||||
public void RecordRetentionCleanup(string tenantId, string entityType, int deletedCount)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, tenantId },
|
||||
{ NotifyMetricTags.EntityType, entityType }
|
||||
};
|
||||
|
||||
_retentionCleanups.Add(deletedCount, tags);
|
||||
}
|
||||
|
||||
public void RecordQueueDepth(string tenantId, string channelType, int depth)
|
||||
{
|
||||
var key = $"{tenantId}:{channelType}";
|
||||
lock (_queueDepthLock)
|
||||
{
|
||||
_queueDepths[key] = depth;
|
||||
}
|
||||
}
|
||||
|
||||
public Activity? StartDeliveryActivity(string tenantId, string deliveryId, string channelType)
|
||||
{
|
||||
var activity = ActivitySource.StartActivity("notify.delivery", ActivityKind.Internal);
|
||||
if (activity is not null)
|
||||
{
|
||||
activity.SetTag(NotifyMetricTags.TenantId, tenantId);
|
||||
activity.SetTag("delivery_id", deliveryId);
|
||||
activity.SetTag(NotifyMetricTags.ChannelType, channelType);
|
||||
}
|
||||
return activity;
|
||||
}
|
||||
|
||||
public Activity? StartEscalationActivity(string tenantId, string incidentId, int level)
|
||||
{
|
||||
var activity = ActivitySource.StartActivity("notify.escalation", ActivityKind.Internal);
|
||||
if (activity is not null)
|
||||
{
|
||||
activity.SetTag(NotifyMetricTags.TenantId, tenantId);
|
||||
activity.SetTag("incident_id", incidentId);
|
||||
activity.SetTag(NotifyMetricTags.Level, level);
|
||||
}
|
||||
return activity;
|
||||
}
|
||||
|
||||
private IEnumerable<Measurement<int>> ObserveQueueDepths()
|
||||
{
|
||||
lock (_queueDepthLock)
|
||||
{
|
||||
foreach (var (key, depth) in _queueDepths)
|
||||
{
|
||||
var parts = key.Split(':');
|
||||
if (parts.Length == 2)
|
||||
{
|
||||
yield return new Measurement<int>(
|
||||
depth,
|
||||
new TagList
|
||||
{
|
||||
{ NotifyMetricTags.TenantId, parts[0] },
|
||||
{ NotifyMetricTags.ChannelType, parts[1] }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Notifier.Worker.Observability;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for notification system metrics and tracing.
|
||||
/// </summary>
|
||||
public interface INotifyMetrics
|
||||
{
|
||||
/// <summary>
|
||||
/// Records a notification delivery attempt.
|
||||
/// </summary>
|
||||
void RecordDeliveryAttempt(string tenantId, string channelType, string status, TimeSpan duration);
|
||||
|
||||
/// <summary>
|
||||
/// Records an escalation event.
|
||||
/// </summary>
|
||||
void RecordEscalation(string tenantId, int level, string outcome);
|
||||
|
||||
/// <summary>
|
||||
/// Records a dead-letter entry.
|
||||
/// </summary>
|
||||
void RecordDeadLetter(string tenantId, string reason, string channelType);
|
||||
|
||||
/// <summary>
|
||||
/// Records rule evaluation.
|
||||
/// </summary>
|
||||
void RecordRuleEvaluation(string tenantId, string ruleId, bool matched, TimeSpan duration);
|
||||
|
||||
/// <summary>
|
||||
/// Records template rendering.
|
||||
/// </summary>
|
||||
void RecordTemplateRender(string tenantId, string templateKey, bool success, TimeSpan duration);
|
||||
|
||||
/// <summary>
|
||||
/// Records storm detection event.
|
||||
/// </summary>
|
||||
void RecordStormEvent(string tenantId, string stormKey, string decision);
|
||||
|
||||
/// <summary>
|
||||
/// Records retention cleanup.
|
||||
/// </summary>
|
||||
void RecordRetentionCleanup(string tenantId, string entityType, int deletedCount);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current queue depth for a channel.
|
||||
/// </summary>
|
||||
void RecordQueueDepth(string tenantId, string channelType, int depth);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an activity for distributed tracing.
|
||||
/// </summary>
|
||||
Activity? StartDeliveryActivity(string tenantId, string deliveryId, string channelType);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an activity for escalation tracing.
|
||||
/// </summary>
|
||||
Activity? StartEscalationActivity(string tenantId, string incidentId, int level);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Metric tag names for consistency.
|
||||
/// </summary>
|
||||
public static class NotifyMetricTags
|
||||
{
|
||||
public const string TenantId = "tenant_id";
|
||||
public const string ChannelType = "channel_type";
|
||||
public const string Status = "status";
|
||||
public const string Outcome = "outcome";
|
||||
public const string Level = "level";
|
||||
public const string Reason = "reason";
|
||||
public const string RuleId = "rule_id";
|
||||
public const string Matched = "matched";
|
||||
public const string TemplateKey = "template_key";
|
||||
public const string Success = "success";
|
||||
public const string StormKey = "storm_key";
|
||||
public const string Decision = "decision";
|
||||
public const string EntityType = "entity_type";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Metric names for the notification system.
|
||||
/// </summary>
|
||||
public static class NotifyMetricNames
|
||||
{
|
||||
public const string DeliveryAttempts = "notify.delivery.attempts";
|
||||
public const string DeliveryDuration = "notify.delivery.duration";
|
||||
public const string EscalationEvents = "notify.escalation.events";
|
||||
public const string DeadLetterEntries = "notify.deadletter.entries";
|
||||
public const string RuleEvaluations = "notify.rule.evaluations";
|
||||
public const string RuleEvaluationDuration = "notify.rule.evaluation.duration";
|
||||
public const string TemplateRenders = "notify.template.renders";
|
||||
public const string TemplateRenderDuration = "notify.template.render.duration";
|
||||
public const string StormEvents = "notify.storm.events";
|
||||
public const string RetentionCleanups = "notify.retention.cleanups";
|
||||
public const string QueueDepth = "notify.queue.depth";
|
||||
}
|
||||
Reference in New Issue
Block a user