using System.Collections.Concurrent; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; namespace StellaOps.Notifier.Worker.Observability; /// /// Service for managing data retention policies. /// Handles cleanup of old notifications, delivery logs, escalations, and metrics. /// public interface IRetentionPolicyService { /// /// Registers a retention policy. /// Task RegisterPolicyAsync(RetentionPolicy policy, CancellationToken ct = default); /// /// Updates an existing retention policy. /// Task UpdatePolicyAsync(string policyId, RetentionPolicy policy, CancellationToken ct = default); /// /// Gets a retention policy by ID. /// Task GetPolicyAsync(string policyId, CancellationToken ct = default); /// /// Lists all retention policies. /// Task> ListPoliciesAsync(string? tenantId = null, CancellationToken ct = default); /// /// Deletes a retention policy. /// Task DeletePolicyAsync(string policyId, CancellationToken ct = default); /// /// Executes retention policies, returning cleanup results. /// Task ExecuteRetentionAsync(string? policyId = null, CancellationToken ct = default); /// /// Gets the next scheduled execution time for a policy. /// Task GetNextExecutionAsync(string policyId, CancellationToken ct = default); /// /// Gets execution history for a policy. /// Task> GetExecutionHistoryAsync( string policyId, int limit = 100, CancellationToken ct = default); /// /// Previews what would be deleted by a policy without actually deleting. /// Task PreviewRetentionAsync(string policyId, CancellationToken ct = default); /// /// Registers a cleanup handler for a specific data type. /// void RegisterHandler(string dataType, IRetentionHandler handler); } /// /// Handler for cleaning up specific data types. /// public interface IRetentionHandler { /// /// The data type this handler manages. /// string DataType { get; } /// /// Counts items that would be deleted. /// Task CountAsync(RetentionQuery query, CancellationToken ct = default); /// /// Deletes items matching the query. /// Task DeleteAsync(RetentionQuery query, CancellationToken ct = default); /// /// Archives items matching the query (if supported). /// Task ArchiveAsync(RetentionQuery query, string archiveLocation, CancellationToken ct = default); } /// /// Retention policy definition. /// public sealed record RetentionPolicy { /// /// Unique policy identifier. /// public required string Id { get; init; } /// /// Human-readable name. /// public required string Name { get; init; } /// /// Description of what the policy does. /// public string? Description { get; init; } /// /// Tenant ID this policy applies to (null for global). /// public string? TenantId { get; init; } /// /// Data type to clean up. /// public required RetentionDataType DataType { get; init; } /// /// Retention period - data older than this is eligible for cleanup. /// public required TimeSpan RetentionPeriod { get; init; } /// /// Action to take on expired data. /// public RetentionAction Action { get; init; } = RetentionAction.Delete; /// /// Archive location (for Archive action). /// public string? ArchiveLocation { get; init; } /// /// Schedule for policy execution (cron expression). /// public string? Schedule { get; init; } /// /// Whether the policy is enabled. /// public bool Enabled { get; init; } = true; /// /// Additional filters for targeting specific data. /// public RetentionFilters Filters { get; init; } = new(); /// /// Maximum items to process per execution (0 = unlimited). /// public int BatchSize { get; init; } /// /// Whether to use soft delete (mark as deleted vs hard delete). /// public bool SoftDelete { get; init; } /// /// When the policy was created. /// public DateTimeOffset CreatedAt { get; init; } /// /// Who created the policy. /// public string? CreatedBy { get; init; } /// /// When the policy was last modified. /// public DateTimeOffset? ModifiedAt { get; init; } } /// /// Types of data that can have retention policies. /// public enum RetentionDataType { /// /// Notification delivery logs. /// DeliveryLogs, /// /// Escalation records. /// Escalations, /// /// Storm/correlation events. /// StormEvents, /// /// Dead-letter entries. /// DeadLetters, /// /// Audit logs. /// AuditLogs, /// /// Metrics data. /// Metrics, /// /// Trace spans. /// Traces, /// /// Chaos experiment records. /// ChaosExperiments, /// /// Tenant isolation violations. /// IsolationViolations, /// /// Webhook delivery logs. /// WebhookLogs, /// /// Template render cache. /// TemplateCache } /// /// Actions to take on expired data. /// public enum RetentionAction { /// /// Delete the data permanently. /// Delete, /// /// Archive the data to cold storage. /// Archive, /// /// Compress and keep in place. /// Compress, /// /// Mark for manual review. /// FlagForReview } /// /// Additional filters for retention policies. /// public sealed record RetentionFilters { /// /// Filter by channel types. /// public IReadOnlyList ChannelTypes { get; init; } = []; /// /// Filter by delivery status. /// public IReadOnlyList Statuses { get; init; } = []; /// /// Filter by severity levels. /// public IReadOnlyList Severities { get; init; } = []; /// /// Exclude items matching these tags. /// public IReadOnlyDictionary ExcludeTags { get; init; } = new Dictionary(); /// /// Only include items matching these tags. /// public IReadOnlyDictionary IncludeTags { get; init; } = new Dictionary(); /// /// Custom filter expression. /// public string? CustomFilter { get; init; } } /// /// Query for retention operations. /// public sealed record RetentionQuery { /// /// Tenant ID to query. /// public string? TenantId { get; init; } /// /// Data type to query. /// public required RetentionDataType DataType { get; init; } /// /// Cutoff date - data before this date is eligible. /// public required DateTimeOffset CutoffDate { get; init; } /// /// Additional filters. /// public RetentionFilters Filters { get; init; } = new(); /// /// Maximum items to return/delete. /// public int? Limit { get; init; } /// /// Whether to use soft delete. /// public bool SoftDelete { get; init; } } /// /// Result of retention policy execution. /// public sealed record RetentionExecutionResult { /// /// Unique execution identifier. /// public required string ExecutionId { get; init; } /// /// When execution started. /// public required DateTimeOffset StartedAt { get; init; } /// /// When execution completed. /// public required DateTimeOffset CompletedAt { get; init; } /// /// Policies that were executed. /// public IReadOnlyList PoliciesExecuted { get; init; } = []; /// /// Total items processed. /// public required long TotalProcessed { get; init; } /// /// Total items deleted. /// public required long TotalDeleted { get; init; } /// /// Total items archived. /// public required long TotalArchived { get; init; } /// /// Results by policy. /// public IReadOnlyDictionary ByPolicy { get; init; } = new Dictionary(); /// /// Errors encountered during execution. /// public IReadOnlyList Errors { get; init; } = []; /// /// Whether execution completed successfully. /// public bool Success => Errors.Count == 0; } /// /// Result for a single policy execution. /// public sealed record PolicyExecutionResult { /// /// Policy ID. /// public required string PolicyId { get; init; } /// /// Items processed. /// public required long Processed { get; init; } /// /// Items deleted. /// public required long Deleted { get; init; } /// /// Items archived. /// public required long Archived { get; init; } /// /// Duration of execution. /// public required TimeSpan Duration { get; init; } /// /// Error if execution failed. /// public string? Error { get; init; } } /// /// Error during retention execution. /// public sealed record RetentionError { /// /// Policy that caused the error. /// public string? PolicyId { get; init; } /// /// Error message. /// public required string Message { get; init; } /// /// Exception type if applicable. /// public string? ExceptionType { get; init; } /// /// When the error occurred. /// public required DateTimeOffset Timestamp { get; init; } } /// /// Historical record of retention execution. /// public sealed record RetentionExecutionRecord { /// /// Execution identifier. /// public required string ExecutionId { get; init; } /// /// Policy that was executed. /// public required string PolicyId { get; init; } /// /// When execution started. /// public required DateTimeOffset StartedAt { get; init; } /// /// When execution completed. /// public required DateTimeOffset CompletedAt { get; init; } /// /// Items deleted. /// public required long Deleted { get; init; } /// /// Items archived. /// public required long Archived { get; init; } /// /// Whether execution succeeded. /// public required bool Success { get; init; } /// /// Error message if failed. /// public string? Error { get; init; } } /// /// Preview of what retention would delete. /// public sealed record RetentionPreview { /// /// Policy ID being previewed. /// public required string PolicyId { get; init; } /// /// Cutoff date that would be used. /// public required DateTimeOffset CutoffDate { get; init; } /// /// Total items that would be affected. /// public required long TotalAffected { get; init; } /// /// Breakdown by category. /// public IReadOnlyDictionary ByCategory { get; init; } = new Dictionary(); /// /// Sample of items that would be affected. /// public IReadOnlyList SampleItems { get; init; } = []; } /// /// Sample item in retention preview. /// public sealed record RetentionPreviewItem { /// /// Item identifier. /// public required string Id { get; init; } /// /// Item type. /// public required string Type { get; init; } /// /// When the item was created. /// public required DateTimeOffset CreatedAt { get; init; } /// /// Summary of the item. /// public string? Summary { get; init; } } /// /// Options for retention policy service. /// public sealed class RetentionPolicyOptions { public const string SectionName = "Notifier:Observability:Retention"; /// /// Whether retention is enabled. /// public bool Enabled { get; set; } = true; /// /// Default retention period for data without explicit policy. /// public TimeSpan DefaultRetentionPeriod { get; set; } = TimeSpan.FromDays(90); /// /// Maximum retention period allowed. /// public TimeSpan MaxRetentionPeriod { get; set; } = TimeSpan.FromDays(365 * 7); /// /// Minimum retention period allowed. /// public TimeSpan MinRetentionPeriod { get; set; } = TimeSpan.FromDays(1); /// /// Default batch size for cleanup operations. /// public int DefaultBatchSize { get; set; } = 1000; /// /// Maximum concurrent cleanup operations. /// public int MaxConcurrentOperations { get; set; } = 4; /// /// How long to keep execution history. /// public TimeSpan ExecutionHistoryRetention { get; set; } = TimeSpan.FromDays(30); /// /// Default data type retention periods. /// public Dictionary DefaultPeriods { get; set; } = new() { ["DeliveryLogs"] = TimeSpan.FromDays(30), ["Escalations"] = TimeSpan.FromDays(90), ["StormEvents"] = TimeSpan.FromDays(14), ["DeadLetters"] = TimeSpan.FromDays(7), ["AuditLogs"] = TimeSpan.FromDays(365), ["Metrics"] = TimeSpan.FromDays(30), ["Traces"] = TimeSpan.FromDays(7), ["ChaosExperiments"] = TimeSpan.FromDays(7), ["IsolationViolations"] = TimeSpan.FromDays(90), ["WebhookLogs"] = TimeSpan.FromDays(14), ["TemplateCache"] = TimeSpan.FromDays(1) }; } /// /// In-memory implementation of retention policy service. /// public sealed class InMemoryRetentionPolicyService : IRetentionPolicyService { private readonly ConcurrentDictionary _policies = new(); private readonly ConcurrentDictionary> _history = new(); private readonly ConcurrentDictionary _handlers = new(); private readonly RetentionPolicyOptions _options; private readonly TimeProvider _timeProvider; private readonly ILogger _logger; public InMemoryRetentionPolicyService( IOptions options, TimeProvider timeProvider, ILogger logger) { _options = options?.Value ?? new RetentionPolicyOptions(); _timeProvider = timeProvider ?? TimeProvider.System; _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public Task RegisterPolicyAsync(RetentionPolicy policy, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(policy); ValidatePolicy(policy); var policyWithTimestamp = policy with { CreatedAt = _timeProvider.GetUtcNow() }; if (!_policies.TryAdd(policy.Id, policyWithTimestamp)) { throw new InvalidOperationException($"Policy '{policy.Id}' already exists"); } _logger.LogInformation( "Registered retention policy {PolicyId}: {DataType} with {Retention} retention", policy.Id, policy.DataType, policy.RetentionPeriod); return Task.CompletedTask; } public Task UpdatePolicyAsync(string policyId, RetentionPolicy policy, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(policy); ValidatePolicy(policy); if (!_policies.TryGetValue(policyId, out var existing)) { throw new KeyNotFoundException($"Policy '{policyId}' not found"); } var updated = policy with { Id = policyId, CreatedAt = existing.CreatedAt, ModifiedAt = _timeProvider.GetUtcNow() }; _policies[policyId] = updated; _logger.LogInformation("Updated retention policy {PolicyId}", policyId); return Task.CompletedTask; } public Task GetPolicyAsync(string policyId, CancellationToken ct = default) { _policies.TryGetValue(policyId, out var policy); return Task.FromResult(policy); } public Task> ListPoliciesAsync(string? tenantId = null, CancellationToken ct = default) { var query = _policies.Values.AsEnumerable(); if (!string.IsNullOrEmpty(tenantId)) { query = query.Where(p => p.TenantId == tenantId || p.TenantId == null); } var result = query.OrderBy(p => p.Name).ToList(); return Task.FromResult>(result); } public Task DeletePolicyAsync(string policyId, CancellationToken ct = default) { if (_policies.TryRemove(policyId, out _)) { _logger.LogInformation("Deleted retention policy {PolicyId}", policyId); } return Task.CompletedTask; } public async Task ExecuteRetentionAsync(string? policyId = null, CancellationToken ct = default) { if (!_options.Enabled) { return new RetentionExecutionResult { ExecutionId = $"exec-{Guid.NewGuid():N}", StartedAt = _timeProvider.GetUtcNow(), CompletedAt = _timeProvider.GetUtcNow(), TotalProcessed = 0, TotalDeleted = 0, TotalArchived = 0, Errors = [new RetentionError { Message = "Retention is disabled", Timestamp = _timeProvider.GetUtcNow() }] }; } var startedAt = _timeProvider.GetUtcNow(); var executionId = $"exec-{Guid.NewGuid():N}"; var byPolicy = new Dictionary(); var errors = new List(); long totalDeleted = 0; long totalArchived = 0; var policiesToExecute = string.IsNullOrEmpty(policyId) ? _policies.Values.Where(p => p.Enabled).ToList() : _policies.Values.Where(p => p.Id == policyId && p.Enabled).ToList(); foreach (var policy in policiesToExecute) { ct.ThrowIfCancellationRequested(); var policyStart = _timeProvider.GetUtcNow(); try { var result = await ExecutePolicyAsync(policy, ct); totalDeleted += result.Deleted; totalArchived += result.Archived; byPolicy[policy.Id] = result; // Record execution RecordExecution(policy.Id, executionId, policyStart, result, null); } catch (Exception ex) { _logger.LogError(ex, "Error executing retention policy {PolicyId}", policy.Id); errors.Add(new RetentionError { PolicyId = policy.Id, Message = ex.Message, ExceptionType = ex.GetType().Name, Timestamp = _timeProvider.GetUtcNow() }); byPolicy[policy.Id] = new PolicyExecutionResult { PolicyId = policy.Id, Processed = 0, Deleted = 0, Archived = 0, Duration = _timeProvider.GetUtcNow() - policyStart, Error = ex.Message }; RecordExecution(policy.Id, executionId, policyStart, new PolicyExecutionResult { PolicyId = policy.Id, Processed = 0, Deleted = 0, Archived = 0, Duration = TimeSpan.Zero }, ex.Message); } } var completedAt = _timeProvider.GetUtcNow(); _logger.LogInformation( "Retention execution {ExecutionId} completed: {Deleted} deleted, {Archived} archived, {Errors} errors", executionId, totalDeleted, totalArchived, errors.Count); return new RetentionExecutionResult { ExecutionId = executionId, StartedAt = startedAt, CompletedAt = completedAt, PoliciesExecuted = policiesToExecute.Select(p => p.Id).ToList(), TotalProcessed = totalDeleted + totalArchived, TotalDeleted = totalDeleted, TotalArchived = totalArchived, ByPolicy = byPolicy, Errors = errors }; } private async Task ExecutePolicyAsync(RetentionPolicy policy, CancellationToken ct) { var start = _timeProvider.GetUtcNow(); var cutoff = start - policy.RetentionPeriod; var query = new RetentionQuery { TenantId = policy.TenantId, DataType = policy.DataType, CutoffDate = cutoff, Filters = policy.Filters, Limit = policy.BatchSize > 0 ? policy.BatchSize : null, SoftDelete = policy.SoftDelete }; var dataTypeName = policy.DataType.ToString(); long deleted = 0; long archived = 0; if (_handlers.TryGetValue(dataTypeName, out var handler)) { switch (policy.Action) { case RetentionAction.Delete: deleted = await handler.DeleteAsync(query, ct); break; case RetentionAction.Archive: if (!string.IsNullOrEmpty(policy.ArchiveLocation)) { archived = await handler.ArchiveAsync(query, policy.ArchiveLocation, ct); } break; case RetentionAction.FlagForReview: // Just count, don't delete deleted = 0; break; } } return new PolicyExecutionResult { PolicyId = policy.Id, Processed = deleted + archived, Deleted = deleted, Archived = archived, Duration = _timeProvider.GetUtcNow() - start }; } private void RecordExecution(string policyId, string executionId, DateTimeOffset startedAt, PolicyExecutionResult result, string? error) { var record = new RetentionExecutionRecord { ExecutionId = executionId, PolicyId = policyId, StartedAt = startedAt, CompletedAt = startedAt + result.Duration, Deleted = result.Deleted, Archived = result.Archived, Success = error == null, Error = error }; var history = _history.GetOrAdd(policyId, _ => []); lock (history) { history.Add(record); // Trim old history var cutoff = _timeProvider.GetUtcNow() - _options.ExecutionHistoryRetention; history.RemoveAll(r => r.CompletedAt < cutoff); } } public Task GetNextExecutionAsync(string policyId, CancellationToken ct = default) { if (!_policies.TryGetValue(policyId, out var policy)) { return Task.FromResult(null); } if (string.IsNullOrEmpty(policy.Schedule)) { return Task.FromResult(null); } // Simple schedule parsing - in real implementation would use Cronos // For now, return next hour as placeholder var now = _timeProvider.GetUtcNow(); var next = now.AddHours(1); next = new DateTimeOffset(next.Year, next.Month, next.Day, next.Hour, 0, 0, TimeSpan.Zero); return Task.FromResult(next); } public Task> GetExecutionHistoryAsync( string policyId, int limit = 100, CancellationToken ct = default) { if (_history.TryGetValue(policyId, out var history)) { List result; lock (history) { result = history .OrderByDescending(r => r.CompletedAt) .Take(limit) .ToList(); } return Task.FromResult>(result); } return Task.FromResult>([]); } public async Task PreviewRetentionAsync(string policyId, CancellationToken ct = default) { if (!_policies.TryGetValue(policyId, out var policy)) { throw new KeyNotFoundException($"Policy '{policyId}' not found"); } var cutoff = _timeProvider.GetUtcNow() - policy.RetentionPeriod; var query = new RetentionQuery { TenantId = policy.TenantId, DataType = policy.DataType, CutoffDate = cutoff, Filters = policy.Filters }; long totalAffected = 0; var dataTypeName = policy.DataType.ToString(); if (_handlers.TryGetValue(dataTypeName, out var handler)) { totalAffected = await handler.CountAsync(query, ct); } return new RetentionPreview { PolicyId = policyId, CutoffDate = cutoff, TotalAffected = totalAffected, ByCategory = new Dictionary { [dataTypeName] = totalAffected } }; } public void RegisterHandler(string dataType, IRetentionHandler handler) { _handlers[dataType] = handler; _logger.LogDebug("Registered retention handler for {DataType}", dataType); } private void ValidatePolicy(RetentionPolicy policy) { if (string.IsNullOrWhiteSpace(policy.Name)) { throw new ArgumentException("Policy name is required", nameof(policy)); } if (policy.RetentionPeriod < _options.MinRetentionPeriod) { throw new ArgumentException($"Retention period must be at least {_options.MinRetentionPeriod}", nameof(policy)); } if (policy.RetentionPeriod > _options.MaxRetentionPeriod) { throw new ArgumentException($"Retention period cannot exceed {_options.MaxRetentionPeriod}", nameof(policy)); } if (policy.Action == RetentionAction.Archive && string.IsNullOrEmpty(policy.ArchiveLocation)) { throw new ArgumentException("Archive location is required for Archive action", nameof(policy)); } } } /// /// No-op retention handler for testing. /// public sealed class NoOpRetentionHandler : IRetentionHandler { public string DataType { get; } public NoOpRetentionHandler(string dataType) { DataType = dataType; } public Task CountAsync(RetentionQuery query, CancellationToken ct = default) => Task.FromResult(0L); public Task DeleteAsync(RetentionQuery query, CancellationToken ct = default) => Task.FromResult(0L); public Task ArchiveAsync(RetentionQuery query, string archiveLocation, CancellationToken ct = default) => Task.FromResult(0L); } /// /// Extension methods for retention policies. /// public static class RetentionPolicyExtensions { /// /// Creates a default retention policy for delivery logs. /// public static RetentionPolicy CreateDeliveryLogPolicy( string id, TimeSpan retention, string? tenantId = null, string? createdBy = null) { return new RetentionPolicy { Id = id, Name = "Delivery Log Retention", Description = "Automatically clean up old delivery logs", TenantId = tenantId, DataType = RetentionDataType.DeliveryLogs, RetentionPeriod = retention, Action = RetentionAction.Delete, CreatedBy = createdBy }; } /// /// Creates a default retention policy for dead letters. /// public static RetentionPolicy CreateDeadLetterPolicy( string id, TimeSpan retention, string? tenantId = null, string? createdBy = null) { return new RetentionPolicy { Id = id, Name = "Dead Letter Retention", Description = "Automatically clean up old dead letter entries", TenantId = tenantId, DataType = RetentionDataType.DeadLetters, RetentionPeriod = retention, Action = RetentionAction.Delete, CreatedBy = createdBy }; } /// /// Creates an archive policy for audit logs. /// public static RetentionPolicy CreateAuditArchivePolicy( string id, TimeSpan retention, string archiveLocation, string? tenantId = null, string? createdBy = null) { return new RetentionPolicy { Id = id, Name = "Audit Log Archive", Description = "Archive old audit logs to cold storage", TenantId = tenantId, DataType = RetentionDataType.AuditLogs, RetentionPeriod = retention, Action = RetentionAction.Archive, ArchiveLocation = archiveLocation, CreatedBy = createdBy }; } }