using System.Collections.Concurrent;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Notifier.Worker.Observability;
///
/// Service for managing data retention policies.
/// Handles cleanup of old notifications, delivery logs, escalations, and metrics.
///
public interface IRetentionPolicyService
{
///
/// Registers a retention policy.
///
Task RegisterPolicyAsync(RetentionPolicy policy, CancellationToken ct = default);
///
/// Updates an existing retention policy.
///
Task UpdatePolicyAsync(string policyId, RetentionPolicy policy, CancellationToken ct = default);
///
/// Gets a retention policy by ID.
///
Task GetPolicyAsync(string policyId, CancellationToken ct = default);
///
/// Lists all retention policies.
///
Task> ListPoliciesAsync(string? tenantId = null, CancellationToken ct = default);
///
/// Deletes a retention policy.
///
Task DeletePolicyAsync(string policyId, CancellationToken ct = default);
///
/// Executes retention policies, returning cleanup results.
///
Task ExecuteRetentionAsync(string? policyId = null, CancellationToken ct = default);
///
/// Gets the next scheduled execution time for a policy.
///
Task GetNextExecutionAsync(string policyId, CancellationToken ct = default);
///
/// Gets execution history for a policy.
///
Task> GetExecutionHistoryAsync(
string policyId,
int limit = 100,
CancellationToken ct = default);
///
/// Previews what would be deleted by a policy without actually deleting.
///
Task PreviewRetentionAsync(string policyId, CancellationToken ct = default);
///
/// Registers a cleanup handler for a specific data type.
///
void RegisterHandler(string dataType, IRetentionHandler handler);
}
///
/// Handler for cleaning up specific data types.
///
public interface IRetentionHandler
{
///
/// The data type this handler manages.
///
string DataType { get; }
///
/// Counts items that would be deleted.
///
Task CountAsync(RetentionQuery query, CancellationToken ct = default);
///
/// Deletes items matching the query.
///
Task DeleteAsync(RetentionQuery query, CancellationToken ct = default);
///
/// Archives items matching the query (if supported).
///
Task ArchiveAsync(RetentionQuery query, string archiveLocation, CancellationToken ct = default);
}
///
/// Retention policy definition.
///
public sealed record RetentionPolicy
{
///
/// Unique policy identifier.
///
public required string Id { get; init; }
///
/// Human-readable name.
///
public required string Name { get; init; }
///
/// Description of what the policy does.
///
public string? Description { get; init; }
///
/// Tenant ID this policy applies to (null for global).
///
public string? TenantId { get; init; }
///
/// Data type to clean up.
///
public required RetentionDataType DataType { get; init; }
///
/// Retention period - data older than this is eligible for cleanup.
///
public required TimeSpan RetentionPeriod { get; init; }
///
/// Action to take on expired data.
///
public RetentionAction Action { get; init; } = RetentionAction.Delete;
///
/// Archive location (for Archive action).
///
public string? ArchiveLocation { get; init; }
///
/// Schedule for policy execution (cron expression).
///
public string? Schedule { get; init; }
///
/// Whether the policy is enabled.
///
public bool Enabled { get; init; } = true;
///
/// Additional filters for targeting specific data.
///
public RetentionFilters Filters { get; init; } = new();
///
/// Maximum items to process per execution (0 = unlimited).
///
public int BatchSize { get; init; }
///
/// Whether to use soft delete (mark as deleted vs hard delete).
///
public bool SoftDelete { get; init; }
///
/// When the policy was created.
///
public DateTimeOffset CreatedAt { get; init; }
///
/// Who created the policy.
///
public string? CreatedBy { get; init; }
///
/// When the policy was last modified.
///
public DateTimeOffset? ModifiedAt { get; init; }
}
///
/// Types of data that can have retention policies.
///
public enum RetentionDataType
{
///
/// Notification delivery logs.
///
DeliveryLogs,
///
/// Escalation records.
///
Escalations,
///
/// Storm/correlation events.
///
StormEvents,
///
/// Dead-letter entries.
///
DeadLetters,
///
/// Audit logs.
///
AuditLogs,
///
/// Metrics data.
///
Metrics,
///
/// Trace spans.
///
Traces,
///
/// Chaos experiment records.
///
ChaosExperiments,
///
/// Tenant isolation violations.
///
IsolationViolations,
///
/// Webhook delivery logs.
///
WebhookLogs,
///
/// Template render cache.
///
TemplateCache
}
///
/// Actions to take on expired data.
///
public enum RetentionAction
{
///
/// Delete the data permanently.
///
Delete,
///
/// Archive the data to cold storage.
///
Archive,
///
/// Compress and keep in place.
///
Compress,
///
/// Mark for manual review.
///
FlagForReview
}
///
/// Additional filters for retention policies.
///
public sealed record RetentionFilters
{
///
/// Filter by channel types.
///
public IReadOnlyList ChannelTypes { get; init; } = [];
///
/// Filter by delivery status.
///
public IReadOnlyList Statuses { get; init; } = [];
///
/// Filter by severity levels.
///
public IReadOnlyList Severities { get; init; } = [];
///
/// Exclude items matching these tags.
///
public IReadOnlyDictionary ExcludeTags { get; init; } = new Dictionary();
///
/// Only include items matching these tags.
///
public IReadOnlyDictionary IncludeTags { get; init; } = new Dictionary();
///
/// Custom filter expression.
///
public string? CustomFilter { get; init; }
}
///
/// Query for retention operations.
///
public sealed record RetentionQuery
{
///
/// Tenant ID to query.
///
public string? TenantId { get; init; }
///
/// Data type to query.
///
public required RetentionDataType DataType { get; init; }
///
/// Cutoff date - data before this date is eligible.
///
public required DateTimeOffset CutoffDate { get; init; }
///
/// Additional filters.
///
public RetentionFilters Filters { get; init; } = new();
///
/// Maximum items to return/delete.
///
public int? Limit { get; init; }
///
/// Whether to use soft delete.
///
public bool SoftDelete { get; init; }
}
///
/// Result of retention policy execution.
///
public sealed record RetentionExecutionResult
{
///
/// Unique execution identifier.
///
public required string ExecutionId { get; init; }
///
/// When execution started.
///
public required DateTimeOffset StartedAt { get; init; }
///
/// When execution completed.
///
public required DateTimeOffset CompletedAt { get; init; }
///
/// Policies that were executed.
///
public IReadOnlyList PoliciesExecuted { get; init; } = [];
///
/// Total items processed.
///
public required long TotalProcessed { get; init; }
///
/// Total items deleted.
///
public required long TotalDeleted { get; init; }
///
/// Total items archived.
///
public required long TotalArchived { get; init; }
///
/// Results by policy.
///
public IReadOnlyDictionary ByPolicy { get; init; } = new Dictionary();
///
/// Errors encountered during execution.
///
public IReadOnlyList Errors { get; init; } = [];
///
/// Whether execution completed successfully.
///
public bool Success => Errors.Count == 0;
}
///
/// Result for a single policy execution.
///
public sealed record PolicyExecutionResult
{
///
/// Policy ID.
///
public required string PolicyId { get; init; }
///
/// Items processed.
///
public required long Processed { get; init; }
///
/// Items deleted.
///
public required long Deleted { get; init; }
///
/// Items archived.
///
public required long Archived { get; init; }
///
/// Duration of execution.
///
public required TimeSpan Duration { get; init; }
///
/// Error if execution failed.
///
public string? Error { get; init; }
}
///
/// Error during retention execution.
///
public sealed record RetentionError
{
///
/// Policy that caused the error.
///
public string? PolicyId { get; init; }
///
/// Error message.
///
public required string Message { get; init; }
///
/// Exception type if applicable.
///
public string? ExceptionType { get; init; }
///
/// When the error occurred.
///
public required DateTimeOffset Timestamp { get; init; }
}
///
/// Historical record of retention execution.
///
public sealed record RetentionExecutionRecord
{
///
/// Execution identifier.
///
public required string ExecutionId { get; init; }
///
/// Policy that was executed.
///
public required string PolicyId { get; init; }
///
/// When execution started.
///
public required DateTimeOffset StartedAt { get; init; }
///
/// When execution completed.
///
public required DateTimeOffset CompletedAt { get; init; }
///
/// Items deleted.
///
public required long Deleted { get; init; }
///
/// Items archived.
///
public required long Archived { get; init; }
///
/// Whether execution succeeded.
///
public required bool Success { get; init; }
///
/// Error message if failed.
///
public string? Error { get; init; }
}
///
/// Preview of what retention would delete.
///
public sealed record RetentionPreview
{
///
/// Policy ID being previewed.
///
public required string PolicyId { get; init; }
///
/// Cutoff date that would be used.
///
public required DateTimeOffset CutoffDate { get; init; }
///
/// Total items that would be affected.
///
public required long TotalAffected { get; init; }
///
/// Breakdown by category.
///
public IReadOnlyDictionary ByCategory { get; init; } = new Dictionary();
///
/// Sample of items that would be affected.
///
public IReadOnlyList SampleItems { get; init; } = [];
}
///
/// Sample item in retention preview.
///
public sealed record RetentionPreviewItem
{
///
/// Item identifier.
///
public required string Id { get; init; }
///
/// Item type.
///
public required string Type { get; init; }
///
/// When the item was created.
///
public required DateTimeOffset CreatedAt { get; init; }
///
/// Summary of the item.
///
public string? Summary { get; init; }
}
///
/// Options for retention policy service.
///
public sealed class RetentionPolicyOptions
{
public const string SectionName = "Notifier:Observability:Retention";
///
/// Whether retention is enabled.
///
public bool Enabled { get; set; } = true;
///
/// Default retention period for data without explicit policy.
///
public TimeSpan DefaultRetentionPeriod { get; set; } = TimeSpan.FromDays(90);
///
/// Maximum retention period allowed.
///
public TimeSpan MaxRetentionPeriod { get; set; } = TimeSpan.FromDays(365 * 7);
///
/// Minimum retention period allowed.
///
public TimeSpan MinRetentionPeriod { get; set; } = TimeSpan.FromDays(1);
///
/// Default batch size for cleanup operations.
///
public int DefaultBatchSize { get; set; } = 1000;
///
/// Maximum concurrent cleanup operations.
///
public int MaxConcurrentOperations { get; set; } = 4;
///
/// How long to keep execution history.
///
public TimeSpan ExecutionHistoryRetention { get; set; } = TimeSpan.FromDays(30);
///
/// Default data type retention periods.
///
public Dictionary DefaultPeriods { get; set; } = new()
{
["DeliveryLogs"] = TimeSpan.FromDays(30),
["Escalations"] = TimeSpan.FromDays(90),
["StormEvents"] = TimeSpan.FromDays(14),
["DeadLetters"] = TimeSpan.FromDays(7),
["AuditLogs"] = TimeSpan.FromDays(365),
["Metrics"] = TimeSpan.FromDays(30),
["Traces"] = TimeSpan.FromDays(7),
["ChaosExperiments"] = TimeSpan.FromDays(7),
["IsolationViolations"] = TimeSpan.FromDays(90),
["WebhookLogs"] = TimeSpan.FromDays(14),
["TemplateCache"] = TimeSpan.FromDays(1)
};
}
///
/// In-memory implementation of retention policy service.
///
public sealed class InMemoryRetentionPolicyService : IRetentionPolicyService
{
private readonly ConcurrentDictionary _policies = new();
private readonly ConcurrentDictionary> _history = new();
private readonly ConcurrentDictionary _handlers = new();
private readonly RetentionPolicyOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger _logger;
public InMemoryRetentionPolicyService(
IOptions options,
TimeProvider timeProvider,
ILogger logger)
{
_options = options?.Value ?? new RetentionPolicyOptions();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public Task RegisterPolicyAsync(RetentionPolicy policy, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(policy);
ValidatePolicy(policy);
var policyWithTimestamp = policy with
{
CreatedAt = _timeProvider.GetUtcNow()
};
if (!_policies.TryAdd(policy.Id, policyWithTimestamp))
{
throw new InvalidOperationException($"Policy '{policy.Id}' already exists");
}
_logger.LogInformation(
"Registered retention policy {PolicyId}: {DataType} with {Retention} retention",
policy.Id,
policy.DataType,
policy.RetentionPeriod);
return Task.CompletedTask;
}
public Task UpdatePolicyAsync(string policyId, RetentionPolicy policy, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(policy);
ValidatePolicy(policy);
if (!_policies.TryGetValue(policyId, out var existing))
{
throw new KeyNotFoundException($"Policy '{policyId}' not found");
}
var updated = policy with
{
Id = policyId,
CreatedAt = existing.CreatedAt,
ModifiedAt = _timeProvider.GetUtcNow()
};
_policies[policyId] = updated;
_logger.LogInformation("Updated retention policy {PolicyId}", policyId);
return Task.CompletedTask;
}
public Task GetPolicyAsync(string policyId, CancellationToken ct = default)
{
_policies.TryGetValue(policyId, out var policy);
return Task.FromResult(policy);
}
public Task> ListPoliciesAsync(string? tenantId = null, CancellationToken ct = default)
{
var query = _policies.Values.AsEnumerable();
if (!string.IsNullOrEmpty(tenantId))
{
query = query.Where(p => p.TenantId == tenantId || p.TenantId == null);
}
var result = query.OrderBy(p => p.Name).ToList();
return Task.FromResult>(result);
}
public Task DeletePolicyAsync(string policyId, CancellationToken ct = default)
{
if (_policies.TryRemove(policyId, out _))
{
_logger.LogInformation("Deleted retention policy {PolicyId}", policyId);
}
return Task.CompletedTask;
}
public async Task ExecuteRetentionAsync(string? policyId = null, CancellationToken ct = default)
{
if (!_options.Enabled)
{
return new RetentionExecutionResult
{
ExecutionId = $"exec-{Guid.NewGuid():N}",
StartedAt = _timeProvider.GetUtcNow(),
CompletedAt = _timeProvider.GetUtcNow(),
TotalProcessed = 0,
TotalDeleted = 0,
TotalArchived = 0,
Errors = [new RetentionError
{
Message = "Retention is disabled",
Timestamp = _timeProvider.GetUtcNow()
}]
};
}
var startedAt = _timeProvider.GetUtcNow();
var executionId = $"exec-{Guid.NewGuid():N}";
var byPolicy = new Dictionary();
var errors = new List();
long totalDeleted = 0;
long totalArchived = 0;
var policiesToExecute = string.IsNullOrEmpty(policyId)
? _policies.Values.Where(p => p.Enabled).ToList()
: _policies.Values.Where(p => p.Id == policyId && p.Enabled).ToList();
foreach (var policy in policiesToExecute)
{
ct.ThrowIfCancellationRequested();
var policyStart = _timeProvider.GetUtcNow();
try
{
var result = await ExecutePolicyAsync(policy, ct);
totalDeleted += result.Deleted;
totalArchived += result.Archived;
byPolicy[policy.Id] = result;
// Record execution
RecordExecution(policy.Id, executionId, policyStart, result, null);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error executing retention policy {PolicyId}", policy.Id);
errors.Add(new RetentionError
{
PolicyId = policy.Id,
Message = ex.Message,
ExceptionType = ex.GetType().Name,
Timestamp = _timeProvider.GetUtcNow()
});
byPolicy[policy.Id] = new PolicyExecutionResult
{
PolicyId = policy.Id,
Processed = 0,
Deleted = 0,
Archived = 0,
Duration = _timeProvider.GetUtcNow() - policyStart,
Error = ex.Message
};
RecordExecution(policy.Id, executionId, policyStart,
new PolicyExecutionResult
{
PolicyId = policy.Id,
Processed = 0,
Deleted = 0,
Archived = 0,
Duration = TimeSpan.Zero
},
ex.Message);
}
}
var completedAt = _timeProvider.GetUtcNow();
_logger.LogInformation(
"Retention execution {ExecutionId} completed: {Deleted} deleted, {Archived} archived, {Errors} errors",
executionId,
totalDeleted,
totalArchived,
errors.Count);
return new RetentionExecutionResult
{
ExecutionId = executionId,
StartedAt = startedAt,
CompletedAt = completedAt,
PoliciesExecuted = policiesToExecute.Select(p => p.Id).ToList(),
TotalProcessed = totalDeleted + totalArchived,
TotalDeleted = totalDeleted,
TotalArchived = totalArchived,
ByPolicy = byPolicy,
Errors = errors
};
}
private async Task ExecutePolicyAsync(RetentionPolicy policy, CancellationToken ct)
{
var start = _timeProvider.GetUtcNow();
var cutoff = start - policy.RetentionPeriod;
var query = new RetentionQuery
{
TenantId = policy.TenantId,
DataType = policy.DataType,
CutoffDate = cutoff,
Filters = policy.Filters,
Limit = policy.BatchSize > 0 ? policy.BatchSize : null,
SoftDelete = policy.SoftDelete
};
var dataTypeName = policy.DataType.ToString();
long deleted = 0;
long archived = 0;
if (_handlers.TryGetValue(dataTypeName, out var handler))
{
switch (policy.Action)
{
case RetentionAction.Delete:
deleted = await handler.DeleteAsync(query, ct);
break;
case RetentionAction.Archive:
if (!string.IsNullOrEmpty(policy.ArchiveLocation))
{
archived = await handler.ArchiveAsync(query, policy.ArchiveLocation, ct);
}
break;
case RetentionAction.FlagForReview:
// Just count, don't delete
deleted = 0;
break;
}
}
return new PolicyExecutionResult
{
PolicyId = policy.Id,
Processed = deleted + archived,
Deleted = deleted,
Archived = archived,
Duration = _timeProvider.GetUtcNow() - start
};
}
private void RecordExecution(string policyId, string executionId, DateTimeOffset startedAt, PolicyExecutionResult result, string? error)
{
var record = new RetentionExecutionRecord
{
ExecutionId = executionId,
PolicyId = policyId,
StartedAt = startedAt,
CompletedAt = startedAt + result.Duration,
Deleted = result.Deleted,
Archived = result.Archived,
Success = error == null,
Error = error
};
var history = _history.GetOrAdd(policyId, _ => []);
lock (history)
{
history.Add(record);
// Trim old history
var cutoff = _timeProvider.GetUtcNow() - _options.ExecutionHistoryRetention;
history.RemoveAll(r => r.CompletedAt < cutoff);
}
}
public Task GetNextExecutionAsync(string policyId, CancellationToken ct = default)
{
if (!_policies.TryGetValue(policyId, out var policy))
{
return Task.FromResult(null);
}
if (string.IsNullOrEmpty(policy.Schedule))
{
return Task.FromResult(null);
}
// Simple schedule parsing - in real implementation would use Cronos
// For now, return next hour as placeholder
var now = _timeProvider.GetUtcNow();
var next = now.AddHours(1);
next = new DateTimeOffset(next.Year, next.Month, next.Day, next.Hour, 0, 0, TimeSpan.Zero);
return Task.FromResult(next);
}
public Task> GetExecutionHistoryAsync(
string policyId,
int limit = 100,
CancellationToken ct = default)
{
if (_history.TryGetValue(policyId, out var history))
{
List result;
lock (history)
{
result = history
.OrderByDescending(r => r.CompletedAt)
.Take(limit)
.ToList();
}
return Task.FromResult>(result);
}
return Task.FromResult>([]);
}
public async Task PreviewRetentionAsync(string policyId, CancellationToken ct = default)
{
if (!_policies.TryGetValue(policyId, out var policy))
{
throw new KeyNotFoundException($"Policy '{policyId}' not found");
}
var cutoff = _timeProvider.GetUtcNow() - policy.RetentionPeriod;
var query = new RetentionQuery
{
TenantId = policy.TenantId,
DataType = policy.DataType,
CutoffDate = cutoff,
Filters = policy.Filters
};
long totalAffected = 0;
var dataTypeName = policy.DataType.ToString();
if (_handlers.TryGetValue(dataTypeName, out var handler))
{
totalAffected = await handler.CountAsync(query, ct);
}
return new RetentionPreview
{
PolicyId = policyId,
CutoffDate = cutoff,
TotalAffected = totalAffected,
ByCategory = new Dictionary
{
[dataTypeName] = totalAffected
}
};
}
public void RegisterHandler(string dataType, IRetentionHandler handler)
{
_handlers[dataType] = handler;
_logger.LogDebug("Registered retention handler for {DataType}", dataType);
}
private void ValidatePolicy(RetentionPolicy policy)
{
if (string.IsNullOrWhiteSpace(policy.Name))
{
throw new ArgumentException("Policy name is required", nameof(policy));
}
if (policy.RetentionPeriod < _options.MinRetentionPeriod)
{
throw new ArgumentException($"Retention period must be at least {_options.MinRetentionPeriod}", nameof(policy));
}
if (policy.RetentionPeriod > _options.MaxRetentionPeriod)
{
throw new ArgumentException($"Retention period cannot exceed {_options.MaxRetentionPeriod}", nameof(policy));
}
if (policy.Action == RetentionAction.Archive && string.IsNullOrEmpty(policy.ArchiveLocation))
{
throw new ArgumentException("Archive location is required for Archive action", nameof(policy));
}
}
}
///
/// No-op retention handler for testing.
///
public sealed class NoOpRetentionHandler : IRetentionHandler
{
public string DataType { get; }
public NoOpRetentionHandler(string dataType)
{
DataType = dataType;
}
public Task CountAsync(RetentionQuery query, CancellationToken ct = default)
=> Task.FromResult(0L);
public Task DeleteAsync(RetentionQuery query, CancellationToken ct = default)
=> Task.FromResult(0L);
public Task ArchiveAsync(RetentionQuery query, string archiveLocation, CancellationToken ct = default)
=> Task.FromResult(0L);
}
///
/// Extension methods for retention policies.
///
public static class RetentionPolicyExtensions
{
///
/// Creates a default retention policy for delivery logs.
///
public static RetentionPolicy CreateDeliveryLogPolicy(
string id,
TimeSpan retention,
string? tenantId = null,
string? createdBy = null)
{
return new RetentionPolicy
{
Id = id,
Name = "Delivery Log Retention",
Description = "Automatically clean up old delivery logs",
TenantId = tenantId,
DataType = RetentionDataType.DeliveryLogs,
RetentionPeriod = retention,
Action = RetentionAction.Delete,
CreatedBy = createdBy
};
}
///
/// Creates a default retention policy for dead letters.
///
public static RetentionPolicy CreateDeadLetterPolicy(
string id,
TimeSpan retention,
string? tenantId = null,
string? createdBy = null)
{
return new RetentionPolicy
{
Id = id,
Name = "Dead Letter Retention",
Description = "Automatically clean up old dead letter entries",
TenantId = tenantId,
DataType = RetentionDataType.DeadLetters,
RetentionPeriod = retention,
Action = RetentionAction.Delete,
CreatedBy = createdBy
};
}
///
/// Creates an archive policy for audit logs.
///
public static RetentionPolicy CreateAuditArchivePolicy(
string id,
TimeSpan retention,
string archiveLocation,
string? tenantId = null,
string? createdBy = null)
{
return new RetentionPolicy
{
Id = id,
Name = "Audit Log Archive",
Description = "Archive old audit logs to cold storage",
TenantId = tenantId,
DataType = RetentionDataType.AuditLogs,
RetentionPeriod = retention,
Action = RetentionAction.Archive,
ArchiveLocation = archiveLocation,
CreatedBy = createdBy
};
}
}