feat: add entropy policy banner and policy gate indicator components
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Implemented EntropyPolicyBannerComponent with configuration for entropy policies, including thresholds, current scores, and mitigation steps. - Created PolicyGateIndicatorComponent to display the status of policy gates, including passed, failed, and warning gates, with detailed views for determinism and entropy gates. - Added HTML and SCSS for both components to ensure proper styling and layout. - Introduced computed properties and signals for reactive state management in Angular. - Included remediation hints and actions for user interaction within the policy gate indicator.
This commit is contained in:
@@ -0,0 +1,718 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Moq;
|
||||
using Xunit;
|
||||
|
||||
namespace StellaOps.Telemetry.Core.Tests;
|
||||
|
||||
public sealed class IncidentModeServiceTests : IDisposable
|
||||
{
|
||||
private readonly FakeTimeProvider _timeProvider;
|
||||
private readonly Mock<ITelemetryContextAccessor> _contextAccessor;
|
||||
private readonly Mock<ILogger<IncidentModeService>> _logger;
|
||||
|
||||
public IncidentModeServiceTests()
|
||||
{
|
||||
_timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow);
|
||||
_contextAccessor = new Mock<ITelemetryContextAccessor>();
|
||||
_logger = new Mock<ILogger<IncidentModeService>>();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
// Cleanup if needed
|
||||
}
|
||||
|
||||
private IncidentModeService CreateService(Action<IncidentModeOptions>? configure = null)
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
PersistState = false, // Disable persistence for tests
|
||||
RestoreOnStartup = false
|
||||
};
|
||||
configure?.Invoke(options);
|
||||
var monitor = new TestOptionsMonitor<IncidentModeOptions>(options);
|
||||
return new IncidentModeService(monitor, _contextAccessor.Object, _logger.Object, _timeProvider);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_ValidActor_ReturnsSuccess()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var result = await service.ActivateAsync("test-actor");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
Assert.Equal("test-actor", result.State.Actor);
|
||||
Assert.True(service.IsActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_NullActor_ThrowsArgumentException()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
await Assert.ThrowsAsync<ArgumentException>(() =>
|
||||
service.ActivateAsync(null!));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_EmptyActor_ThrowsArgumentException()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
await Assert.ThrowsAsync<ArgumentException>(() =>
|
||||
service.ActivateAsync(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_WithTenantId_StoresTenantId()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var result = await service.ActivateAsync("actor", tenantId: "tenant-123");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
Assert.Equal("tenant-123", result.State.TenantId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_WithReason_StoresReason()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var result = await service.ActivateAsync("actor", reason: "Production incident INC-001");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
Assert.Equal("Production incident INC-001", result.State.Reason);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_DefaultTtl_UsesConfiguredDefault()
|
||||
{
|
||||
using var service = CreateService(opt => opt.DefaultTtl = TimeSpan.FromMinutes(45));
|
||||
|
||||
var result = await service.ActivateAsync("actor");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
var expectedExpiry = _timeProvider.GetUtcNow() + TimeSpan.FromMinutes(45);
|
||||
Assert.Equal(expectedExpiry, result.State.ExpiresAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_CustomTtl_UsesTtlOverride()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var result = await service.ActivateAsync("actor", ttlOverride: TimeSpan.FromHours(2));
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
var expectedExpiry = _timeProvider.GetUtcNow() + TimeSpan.FromHours(2);
|
||||
Assert.Equal(expectedExpiry, result.State.ExpiresAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_TtlBelowMin_ClampedToMin()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.MinTtl = TimeSpan.FromMinutes(10);
|
||||
});
|
||||
|
||||
var result = await service.ActivateAsync("actor", ttlOverride: TimeSpan.FromMinutes(1));
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
var expectedExpiry = _timeProvider.GetUtcNow() + TimeSpan.FromMinutes(10);
|
||||
Assert.Equal(expectedExpiry, result.State.ExpiresAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_TtlAboveMax_ClampedToMax()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.MaxTtl = TimeSpan.FromHours(4);
|
||||
});
|
||||
|
||||
var result = await service.ActivateAsync("actor", ttlOverride: TimeSpan.FromHours(48));
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
var expectedExpiry = _timeProvider.GetUtcNow() + TimeSpan.FromHours(4);
|
||||
Assert.Equal(expectedExpiry, result.State.ExpiresAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_AlreadyActive_ExtendsTtlAndReturnsWasAlreadyActive()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var firstResult = await service.ActivateAsync("actor1");
|
||||
var firstActivationId = firstResult.State!.ActivationId;
|
||||
|
||||
var secondResult = await service.ActivateAsync("actor2");
|
||||
|
||||
Assert.True(secondResult.Success);
|
||||
Assert.True(secondResult.WasAlreadyActive);
|
||||
Assert.Equal(firstActivationId, secondResult.State!.ActivationId); // Same activation
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_RaisesActivatedEvent()
|
||||
{
|
||||
using var service = CreateService();
|
||||
IncidentModeActivatedEventArgs? eventArgs = null;
|
||||
service.Activated += (s, e) => eventArgs = e;
|
||||
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
Assert.NotNull(eventArgs);
|
||||
Assert.NotNull(eventArgs.State);
|
||||
Assert.False(eventArgs.WasReactivation);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateAsync_WhenAlreadyActive_RaisesReactivationEvent()
|
||||
{
|
||||
using var service = CreateService();
|
||||
await service.ActivateAsync("actor1");
|
||||
|
||||
IncidentModeActivatedEventArgs? eventArgs = null;
|
||||
service.Activated += (s, e) => eventArgs = e;
|
||||
|
||||
await service.ActivateAsync("actor2");
|
||||
|
||||
Assert.NotNull(eventArgs);
|
||||
Assert.True(eventArgs.WasReactivation);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeactivateAsync_WhenActive_ReturnsSuccessWithWasActive()
|
||||
{
|
||||
using var service = CreateService();
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
var result = await service.DeactivateAsync("deactivator");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.True(result.WasActive);
|
||||
Assert.Equal(IncidentModeDeactivationReason.Manual, result.Reason);
|
||||
Assert.False(service.IsActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeactivateAsync_WhenNotActive_ReturnsSuccessWithWasNotActive()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var result = await service.DeactivateAsync("actor");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.False(result.WasActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeactivateAsync_RaisesDeactivatedEvent()
|
||||
{
|
||||
using var service = CreateService();
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
IncidentModeDeactivatedEventArgs? eventArgs = null;
|
||||
service.Deactivated += (s, e) => eventArgs = e;
|
||||
|
||||
await service.DeactivateAsync("deactivator");
|
||||
|
||||
Assert.NotNull(eventArgs);
|
||||
Assert.NotNull(eventArgs.State);
|
||||
Assert.Equal(IncidentModeDeactivationReason.Manual, eventArgs.Reason);
|
||||
Assert.Equal("deactivator", eventArgs.DeactivatedBy);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtendTtlAsync_WhenActive_ExtendsExpiry()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.AllowTtlExtension = true;
|
||||
opt.DefaultTtl = TimeSpan.FromMinutes(30);
|
||||
});
|
||||
await service.ActivateAsync("actor");
|
||||
var originalExpiry = service.CurrentState!.ExpiresAt;
|
||||
|
||||
var newExpiry = await service.ExtendTtlAsync(TimeSpan.FromMinutes(15), "extender");
|
||||
|
||||
Assert.NotNull(newExpiry);
|
||||
Assert.Equal(originalExpiry + TimeSpan.FromMinutes(15), newExpiry);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtendTtlAsync_WhenNotActive_ReturnsNull()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var result = await service.ExtendTtlAsync(TimeSpan.FromMinutes(15), "actor");
|
||||
|
||||
Assert.Null(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtendTtlAsync_WhenDisabled_ReturnsNull()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.AllowTtlExtension = false;
|
||||
});
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
var result = await service.ExtendTtlAsync(TimeSpan.FromMinutes(15), "actor");
|
||||
|
||||
Assert.Null(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtendTtlAsync_ExceedsMaxExtensions_ReturnsNull()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.AllowTtlExtension = true;
|
||||
opt.MaxExtensions = 2;
|
||||
});
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
await service.ExtendTtlAsync(TimeSpan.FromMinutes(5), "extender");
|
||||
await service.ExtendTtlAsync(TimeSpan.FromMinutes(5), "extender");
|
||||
var thirdExtension = await service.ExtendTtlAsync(TimeSpan.FromMinutes(5), "extender");
|
||||
|
||||
Assert.Null(thirdExtension);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtendTtlAsync_WouldExceedMaxTtl_ClampedToMax()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.AllowTtlExtension = true;
|
||||
opt.DefaultTtl = TimeSpan.FromHours(23);
|
||||
opt.MaxTtl = TimeSpan.FromHours(24);
|
||||
});
|
||||
await service.ActivateAsync("actor");
|
||||
var activatedAt = service.CurrentState!.ActivatedAt;
|
||||
|
||||
var result = await service.ExtendTtlAsync(TimeSpan.FromHours(10), "extender");
|
||||
|
||||
Assert.NotNull(result);
|
||||
Assert.Equal(activatedAt + TimeSpan.FromHours(24), result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetIncidentTags_WhenActive_ReturnsTagDictionary()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.IncidentTagName = "incident_mode";
|
||||
});
|
||||
await service.ActivateAsync("actor", tenantId: "tenant-123");
|
||||
|
||||
var tags = service.GetIncidentTags();
|
||||
|
||||
Assert.NotEmpty(tags);
|
||||
Assert.Equal("true", tags["incident_mode"]);
|
||||
Assert.Equal("actor", tags["incident_actor"]);
|
||||
Assert.Equal("tenant-123", tags["incident_tenant"]);
|
||||
Assert.True(tags.ContainsKey("incident_activation_id"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetIncidentTags_WhenNotActive_ReturnsEmptyDictionary()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var tags = service.GetIncidentTags();
|
||||
|
||||
Assert.Empty(tags);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetIncidentTags_WithAdditionalTags_IncludesThem()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.AdditionalTags["environment"] = "production";
|
||||
opt.AdditionalTags["region"] = "us-east-1";
|
||||
});
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
var tags = service.GetIncidentTags();
|
||||
|
||||
Assert.Equal("production", tags["environment"]);
|
||||
Assert.Equal("us-east-1", tags["region"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CurrentState_WhenActive_ReturnsState()
|
||||
{
|
||||
using var service = CreateService();
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
var state = service.CurrentState;
|
||||
|
||||
Assert.NotNull(state);
|
||||
Assert.True(state.Enabled);
|
||||
Assert.Equal("actor", state.Actor);
|
||||
Assert.Equal(IncidentModeSource.Api, state.Source);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CurrentState_WhenNotActive_ReturnsNull()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var state = service.CurrentState;
|
||||
|
||||
Assert.Null(state);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsActive_WhenNotActivated_ReturnsFalse()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
Assert.False(service.IsActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task IsActive_WhenActivated_ReturnsTrue()
|
||||
{
|
||||
using var service = CreateService();
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
Assert.True(service.IsActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task IsActive_WhenExpired_ReturnsFalse()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.DefaultTtl = TimeSpan.FromMinutes(1);
|
||||
});
|
||||
await service.ActivateAsync("actor");
|
||||
|
||||
// Advance time past expiry
|
||||
_timeProvider.Advance(TimeSpan.FromMinutes(2));
|
||||
|
||||
Assert.False(service.IsActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateFromCliAsync_SetsSourceToCli()
|
||||
{
|
||||
using var service = CreateService();
|
||||
|
||||
var result = await service.ActivateFromCliAsync("cli-user");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
Assert.Equal(IncidentModeSource.Cli, result.State.Source);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateFromConfigAsync_WhenEnabled_Activates()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
});
|
||||
|
||||
var result = await service.ActivateFromConfigAsync();
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.NotNull(result.State);
|
||||
Assert.Equal(IncidentModeSource.Configuration, result.State.Source);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActivateFromConfigAsync_WhenDisabled_FailsActivation()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = false;
|
||||
});
|
||||
|
||||
var result = await service.ActivateFromConfigAsync();
|
||||
|
||||
Assert.False(result.Success);
|
||||
Assert.NotNull(result.Error);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_Validate_ValidOptions_ReturnsNoErrors()
|
||||
{
|
||||
var options = new IncidentModeOptions();
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Empty(errors);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_Validate_DefaultTtlBelowMin_ReturnsError()
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
DefaultTtl = TimeSpan.FromMinutes(1),
|
||||
MinTtl = TimeSpan.FromMinutes(5)
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("DefaultTtl", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_Validate_DefaultTtlAboveMax_ReturnsError()
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
DefaultTtl = TimeSpan.FromHours(48),
|
||||
MaxTtl = TimeSpan.FromHours(24)
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("DefaultTtl", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_Validate_InvalidSamplingRate_ReturnsError()
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
IncidentSamplingRate = 1.5
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("IncidentSamplingRate", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_Validate_NegativeMaxExtensions_ReturnsError()
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
MaxExtensions = -1
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("MaxExtensions", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_ClampTtl_BelowMin_ReturnsMin()
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
MinTtl = TimeSpan.FromMinutes(10),
|
||||
MaxTtl = TimeSpan.FromHours(24)
|
||||
};
|
||||
|
||||
var result = options.ClampTtl(TimeSpan.FromMinutes(1));
|
||||
|
||||
Assert.Equal(TimeSpan.FromMinutes(10), result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_ClampTtl_AboveMax_ReturnsMax()
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
MinTtl = TimeSpan.FromMinutes(5),
|
||||
MaxTtl = TimeSpan.FromHours(4)
|
||||
};
|
||||
|
||||
var result = options.ClampTtl(TimeSpan.FromHours(48));
|
||||
|
||||
Assert.Equal(TimeSpan.FromHours(4), result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeOptions_ClampTtl_WithinRange_ReturnsSame()
|
||||
{
|
||||
var options = new IncidentModeOptions
|
||||
{
|
||||
MinTtl = TimeSpan.FromMinutes(5),
|
||||
MaxTtl = TimeSpan.FromHours(24)
|
||||
};
|
||||
|
||||
var result = options.ClampTtl(TimeSpan.FromHours(2));
|
||||
|
||||
Assert.Equal(TimeSpan.FromHours(2), result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeState_IsExpired_BeforeExpiry_ReturnsFalse()
|
||||
{
|
||||
var state = new IncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = DateTimeOffset.UtcNow,
|
||||
ExpiresAt = DateTimeOffset.UtcNow.AddHours(1),
|
||||
Actor = "test",
|
||||
Source = IncidentModeSource.Api,
|
||||
ActivationId = "abc123"
|
||||
};
|
||||
|
||||
Assert.False(state.IsExpired);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeState_IsExpired_AfterExpiry_ReturnsTrue()
|
||||
{
|
||||
var state = new IncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = DateTimeOffset.UtcNow.AddHours(-2),
|
||||
ExpiresAt = DateTimeOffset.UtcNow.AddHours(-1),
|
||||
Actor = "test",
|
||||
Source = IncidentModeSource.Api,
|
||||
ActivationId = "abc123"
|
||||
};
|
||||
|
||||
Assert.True(state.IsExpired);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeState_RemainingTime_WhenNotExpired_ReturnsPositive()
|
||||
{
|
||||
var state = new IncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = DateTimeOffset.UtcNow,
|
||||
ExpiresAt = DateTimeOffset.UtcNow.AddMinutes(30),
|
||||
Actor = "test",
|
||||
Source = IncidentModeSource.Api,
|
||||
ActivationId = "abc123"
|
||||
};
|
||||
|
||||
Assert.True(state.RemainingTime > TimeSpan.Zero);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeState_RemainingTime_WhenExpired_ReturnsZero()
|
||||
{
|
||||
var state = new IncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = DateTimeOffset.UtcNow.AddHours(-2),
|
||||
ExpiresAt = DateTimeOffset.UtcNow.AddHours(-1),
|
||||
Actor = "test",
|
||||
Source = IncidentModeSource.Api,
|
||||
ActivationId = "abc123"
|
||||
};
|
||||
|
||||
Assert.Equal(TimeSpan.Zero, state.RemainingTime);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeActivationResult_Succeeded_CreatesSuccessResult()
|
||||
{
|
||||
var state = new IncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = DateTimeOffset.UtcNow,
|
||||
ExpiresAt = DateTimeOffset.UtcNow.AddHours(1),
|
||||
Actor = "test",
|
||||
Source = IncidentModeSource.Api,
|
||||
ActivationId = "abc123"
|
||||
};
|
||||
|
||||
var result = IncidentModeActivationResult.Succeeded(state, wasAlreadyActive: true);
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.Same(state, result.State);
|
||||
Assert.True(result.WasAlreadyActive);
|
||||
Assert.Null(result.Error);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeActivationResult_Failed_CreatesFailureResult()
|
||||
{
|
||||
var result = IncidentModeActivationResult.Failed("Test error message");
|
||||
|
||||
Assert.False(result.Success);
|
||||
Assert.Null(result.State);
|
||||
Assert.Equal("Test error message", result.Error);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeDeactivationResult_Succeeded_CreatesSuccessResult()
|
||||
{
|
||||
var result = IncidentModeDeactivationResult.Succeeded(wasActive: true, IncidentModeDeactivationReason.Manual);
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.True(result.WasActive);
|
||||
Assert.Equal(IncidentModeDeactivationReason.Manual, result.Reason);
|
||||
Assert.Null(result.Error);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncidentModeDeactivationResult_Failed_CreatesFailureResult()
|
||||
{
|
||||
var result = IncidentModeDeactivationResult.Failed("Test error");
|
||||
|
||||
Assert.False(result.Success);
|
||||
Assert.Equal("Test error", result.Error);
|
||||
}
|
||||
|
||||
private sealed class TestOptionsMonitor<T> : IOptionsMonitor<T>
|
||||
{
|
||||
private readonly T _value;
|
||||
|
||||
public TestOptionsMonitor(T value)
|
||||
{
|
||||
_value = value;
|
||||
}
|
||||
|
||||
public T CurrentValue => _value;
|
||||
public T Get(string? name) => _value;
|
||||
public IDisposable? OnChange(Action<T, string?> listener) => null;
|
||||
}
|
||||
|
||||
private sealed class FakeTimeProvider : TimeProvider
|
||||
{
|
||||
private DateTimeOffset _utcNow;
|
||||
|
||||
public FakeTimeProvider(DateTimeOffset initialTime)
|
||||
{
|
||||
_utcNow = initialTime;
|
||||
}
|
||||
|
||||
public override DateTimeOffset GetUtcNow() => _utcNow;
|
||||
|
||||
public void Advance(TimeSpan duration)
|
||||
{
|
||||
_utcNow = _utcNow.Add(duration);
|
||||
}
|
||||
|
||||
public void SetUtcNow(DateTimeOffset time)
|
||||
{
|
||||
_utcNow = time;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Moq;
|
||||
using Xunit;
|
||||
|
||||
namespace StellaOps.Telemetry.Core.Tests;
|
||||
|
||||
public sealed class SealedModeFileExporterTests : IDisposable
|
||||
{
|
||||
private readonly string _testDirectory;
|
||||
private readonly Mock<ILogger<SealedModeFileExporter>> _logger;
|
||||
private readonly FakeTimeProvider _timeProvider;
|
||||
|
||||
public SealedModeFileExporterTests()
|
||||
{
|
||||
_testDirectory = Path.Combine(Path.GetTempPath(), $"sealed-mode-tests-{Guid.NewGuid():N}");
|
||||
Directory.CreateDirectory(_testDirectory);
|
||||
_logger = new Mock<ILogger<SealedModeFileExporter>>();
|
||||
_timeProvider = new FakeTimeProvider(new DateTimeOffset(2025, 11, 27, 10, 0, 0, TimeSpan.Zero));
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (Directory.Exists(_testDirectory))
|
||||
{
|
||||
Directory.Delete(_testDirectory, recursive: true);
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore cleanup errors in tests
|
||||
}
|
||||
}
|
||||
|
||||
private SealedModeFileExporter CreateExporter(Action<SealedModeTelemetryOptions>? configure = null)
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
FilePath = Path.Combine(_testDirectory, "telemetry-sealed.otlp"),
|
||||
MaxBytes = 1024, // Small for testing
|
||||
MaxRotatedFiles = 3,
|
||||
FailOnInsecurePermissions = false // Disable for cross-platform testing
|
||||
};
|
||||
configure?.Invoke(options);
|
||||
var monitor = new TestOptionsMonitor<SealedModeTelemetryOptions>(options);
|
||||
return new SealedModeFileExporter(monitor, _logger.Object, _timeProvider);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Initialize_CreatesFile()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
|
||||
exporter.Initialize();
|
||||
|
||||
Assert.True(exporter.IsInitialized);
|
||||
Assert.NotNull(exporter.CurrentFilePath);
|
||||
Assert.True(File.Exists(exporter.CurrentFilePath));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Initialize_CreatesDirectory_WhenNotExists()
|
||||
{
|
||||
var newDir = Path.Combine(_testDirectory, "subdir", "nested");
|
||||
using var exporter = CreateExporter(opt =>
|
||||
{
|
||||
opt.FilePath = Path.Combine(newDir, "telemetry.otlp");
|
||||
});
|
||||
|
||||
exporter.Initialize();
|
||||
|
||||
Assert.True(Directory.Exists(newDir));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Initialize_CalledMultipleTimes_DoesNotThrow()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
|
||||
exporter.Initialize();
|
||||
exporter.Initialize();
|
||||
|
||||
Assert.True(exporter.IsInitialized);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Write_WritesDataToFile()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
exporter.Initialize();
|
||||
var data = Encoding.UTF8.GetBytes("test data");
|
||||
|
||||
exporter.Write(data, TelemetrySignal.Traces);
|
||||
|
||||
var fileContent = File.ReadAllText(exporter.CurrentFilePath!);
|
||||
Assert.Contains("test data", fileContent);
|
||||
Assert.Contains("[Traces]", fileContent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Write_IncludesTimestamp()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
exporter.Initialize();
|
||||
var data = Encoding.UTF8.GetBytes("test");
|
||||
|
||||
exporter.Write(data, TelemetrySignal.Traces);
|
||||
|
||||
var fileContent = File.ReadAllText(exporter.CurrentFilePath!);
|
||||
Assert.Contains("2025-11-27", fileContent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Write_AutoInitializesIfNotCalled()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
var data = Encoding.UTF8.GetBytes("auto-init test");
|
||||
|
||||
exporter.Write(data, TelemetrySignal.Metrics);
|
||||
|
||||
Assert.True(exporter.IsInitialized);
|
||||
var fileContent = File.ReadAllText(exporter.CurrentFilePath!);
|
||||
Assert.Contains("auto-init test", fileContent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WriteRecord_WritesStringData()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
exporter.Initialize();
|
||||
|
||||
exporter.WriteRecord("string record data", TelemetrySignal.Logs);
|
||||
|
||||
var fileContent = File.ReadAllText(exporter.CurrentFilePath!);
|
||||
Assert.Contains("string record data", fileContent);
|
||||
Assert.Contains("[Logs]", fileContent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Write_RotatesFile_WhenMaxBytesExceeded()
|
||||
{
|
||||
using var exporter = CreateExporter(opt =>
|
||||
{
|
||||
opt.MaxBytes = 100; // Very small for testing rotation
|
||||
});
|
||||
exporter.Initialize();
|
||||
var filePath = exporter.CurrentFilePath!;
|
||||
|
||||
// Write enough data to trigger rotation
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
exporter.WriteRecord($"Record {i} with some padding data to exceed limit", TelemetrySignal.Traces);
|
||||
}
|
||||
|
||||
// Check that rotation happened - original file should exist
|
||||
Assert.True(File.Exists(filePath));
|
||||
// And at least one rotated file
|
||||
Assert.True(File.Exists($"{filePath}.1") || exporter.CurrentSize < 100);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CurrentSize_TracksWrittenBytes()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
exporter.Initialize();
|
||||
var initialSize = exporter.CurrentSize;
|
||||
var data = Encoding.UTF8.GetBytes("test data for size tracking");
|
||||
|
||||
exporter.Write(data, TelemetrySignal.Traces);
|
||||
|
||||
Assert.True(exporter.CurrentSize > initialSize);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Flush_DoesNotThrow()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
exporter.Initialize();
|
||||
exporter.WriteRecord("data", TelemetrySignal.Traces);
|
||||
|
||||
exporter.Flush();
|
||||
|
||||
// Should not throw
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Write_AfterDispose_ThrowsObjectDisposedException()
|
||||
{
|
||||
var exporter = CreateExporter();
|
||||
exporter.Initialize();
|
||||
exporter.Dispose();
|
||||
|
||||
Assert.Throws<ObjectDisposedException>(() =>
|
||||
exporter.Write(Encoding.UTF8.GetBytes("test"), TelemetrySignal.Traces));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Initialize_WithEmptyFilePath_Throws()
|
||||
{
|
||||
using var exporter = CreateExporter(opt =>
|
||||
{
|
||||
opt.FilePath = "";
|
||||
});
|
||||
|
||||
Assert.Throws<InvalidOperationException>(() => exporter.Initialize());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Write_DifferentSignals_IncludesSignalType()
|
||||
{
|
||||
using var exporter = CreateExporter();
|
||||
exporter.Initialize();
|
||||
|
||||
exporter.WriteRecord("traces data", TelemetrySignal.Traces);
|
||||
exporter.WriteRecord("metrics data", TelemetrySignal.Metrics);
|
||||
exporter.WriteRecord("logs data", TelemetrySignal.Logs);
|
||||
|
||||
var fileContent = File.ReadAllText(exporter.CurrentFilePath!);
|
||||
Assert.Contains("[Traces]", fileContent);
|
||||
Assert.Contains("[Metrics]", fileContent);
|
||||
Assert.Contains("[Logs]", fileContent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Rotation_DeletesOldestFile_WhenMaxRotatedFilesExceeded()
|
||||
{
|
||||
using var exporter = CreateExporter(opt =>
|
||||
{
|
||||
opt.MaxBytes = 50;
|
||||
opt.MaxRotatedFiles = 2;
|
||||
});
|
||||
exporter.Initialize();
|
||||
var basePath = exporter.CurrentFilePath!;
|
||||
|
||||
// Write enough to trigger multiple rotations
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
exporter.WriteRecord($"Record {i} with padding to exceed", TelemetrySignal.Traces);
|
||||
}
|
||||
|
||||
// Should not have more than MaxRotatedFiles rotated files
|
||||
var rotatedFiles = 0;
|
||||
for (var i = 1; i <= 5; i++)
|
||||
{
|
||||
if (File.Exists($"{basePath}.{i}"))
|
||||
{
|
||||
rotatedFiles++;
|
||||
}
|
||||
}
|
||||
Assert.True(rotatedFiles <= 2);
|
||||
}
|
||||
|
||||
private sealed class TestOptionsMonitor<T> : IOptionsMonitor<T>
|
||||
{
|
||||
private readonly T _value;
|
||||
|
||||
public TestOptionsMonitor(T value)
|
||||
{
|
||||
_value = value;
|
||||
}
|
||||
|
||||
public T CurrentValue => _value;
|
||||
public T Get(string? name) => _value;
|
||||
public IDisposable? OnChange(Action<T, string?> listener) => null;
|
||||
}
|
||||
|
||||
private sealed class FakeTimeProvider : TimeProvider
|
||||
{
|
||||
private DateTimeOffset _utcNow;
|
||||
|
||||
public FakeTimeProvider(DateTimeOffset initialTime)
|
||||
{
|
||||
_utcNow = initialTime;
|
||||
}
|
||||
|
||||
public override DateTimeOffset GetUtcNow() => _utcNow;
|
||||
|
||||
public void Advance(TimeSpan duration)
|
||||
{
|
||||
_utcNow = _utcNow.Add(duration);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,509 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Moq;
|
||||
using StellaOps.AirGap.Policy;
|
||||
using Xunit;
|
||||
|
||||
namespace StellaOps.Telemetry.Core.Tests;
|
||||
|
||||
public sealed class SealedModeTelemetryServiceTests : IDisposable
|
||||
{
|
||||
private readonly FakeTimeProvider _timeProvider;
|
||||
private readonly Mock<IEgressPolicy> _egressPolicy;
|
||||
private readonly Mock<IIncidentModeService> _incidentModeService;
|
||||
private readonly Mock<ILogger<SealedModeTelemetryService>> _logger;
|
||||
|
||||
public SealedModeTelemetryServiceTests()
|
||||
{
|
||||
_timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow);
|
||||
_egressPolicy = new Mock<IEgressPolicy>();
|
||||
_incidentModeService = new Mock<IIncidentModeService>();
|
||||
_logger = new Mock<ILogger<SealedModeTelemetryService>>();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
// Cleanup if needed
|
||||
}
|
||||
|
||||
private SealedModeTelemetryService CreateService(
|
||||
Action<SealedModeTelemetryOptions>? configure = null,
|
||||
bool useEgressPolicy = false)
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions();
|
||||
configure?.Invoke(options);
|
||||
var monitor = new TestOptionsMonitor<SealedModeTelemetryOptions>(options);
|
||||
|
||||
return new SealedModeTelemetryService(
|
||||
monitor,
|
||||
useEgressPolicy ? _egressPolicy.Object : null,
|
||||
_incidentModeService.Object,
|
||||
_logger.Object,
|
||||
_timeProvider);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsSealed_WhenOptionsEnabled_ReturnsTrue()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = true);
|
||||
|
||||
Assert.True(service.IsSealed);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsSealed_WhenOptionsDisabled_ReturnsFalse()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = false);
|
||||
|
||||
Assert.False(service.IsSealed);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsSealed_WhenEgressPolicySealed_ReturnsTrue()
|
||||
{
|
||||
_egressPolicy.Setup(p => p.IsSealed).Returns(true);
|
||||
using var service = CreateService(opt => opt.Enabled = false, useEgressPolicy: true);
|
||||
|
||||
Assert.True(service.IsSealed);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsSealed_WhenEgressPolicyNotSealed_ReturnsFalse()
|
||||
{
|
||||
_egressPolicy.Setup(p => p.IsSealed).Returns(false);
|
||||
using var service = CreateService(opt => opt.Enabled = true, useEgressPolicy: true);
|
||||
|
||||
Assert.False(service.IsSealed);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EffectiveSamplingRate_WhenNotSealed_ReturnsFullSampling()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = false);
|
||||
|
||||
Assert.Equal(1.0, service.EffectiveSamplingRate);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EffectiveSamplingRate_WhenSealed_ReturnsMaxPercent()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.MaxSamplingPercent = 10;
|
||||
});
|
||||
|
||||
Assert.Equal(0.1, service.EffectiveSamplingRate);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EffectiveSamplingRate_WhenSealedWithIncidentMode_ReturnsFullSampling()
|
||||
{
|
||||
_incidentModeService.Setup(s => s.IsActive).Returns(true);
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.MaxSamplingPercent = 10;
|
||||
opt.AllowIncidentModeOverride = true;
|
||||
});
|
||||
|
||||
Assert.Equal(1.0, service.EffectiveSamplingRate);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EffectiveSamplingRate_WhenSealedWithDisabledIncidentOverride_ReturnsCapped()
|
||||
{
|
||||
_incidentModeService.Setup(s => s.IsActive).Returns(true);
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.MaxSamplingPercent = 10;
|
||||
opt.AllowIncidentModeOverride = false;
|
||||
});
|
||||
|
||||
Assert.Equal(0.1, service.EffectiveSamplingRate);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsIncidentModeOverrideActive_WhenConditionsMet_ReturnsTrue()
|
||||
{
|
||||
_incidentModeService.Setup(s => s.IsActive).Returns(true);
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.AllowIncidentModeOverride = true;
|
||||
});
|
||||
|
||||
Assert.True(service.IsIncidentModeOverrideActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsIncidentModeOverrideActive_WhenNotSealed_ReturnsFalse()
|
||||
{
|
||||
_incidentModeService.Setup(s => s.IsActive).Returns(true);
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = false;
|
||||
opt.AllowIncidentModeOverride = true;
|
||||
});
|
||||
|
||||
Assert.False(service.IsIncidentModeOverrideActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsIncidentModeOverrideActive_WhenIncidentNotActive_ReturnsFalse()
|
||||
{
|
||||
_incidentModeService.Setup(s => s.IsActive).Returns(false);
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.AllowIncidentModeOverride = true;
|
||||
});
|
||||
|
||||
Assert.False(service.IsIncidentModeOverrideActive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetSealedModeTags_WhenNotSealed_ReturnsEmpty()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = false);
|
||||
|
||||
var tags = service.GetSealedModeTags();
|
||||
|
||||
Assert.Empty(tags);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetSealedModeTags_WhenSealed_ReturnsSealedTag()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.SealedTagName = "sealed";
|
||||
});
|
||||
|
||||
var tags = service.GetSealedModeTags();
|
||||
|
||||
Assert.Equal("true", tags["sealed"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetSealedModeTags_WhenSealedWithForceScrub_ReturnsScrubbedTag()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.ForceScrub = true;
|
||||
opt.AddScrubbedTag = true;
|
||||
});
|
||||
|
||||
var tags = service.GetSealedModeTags();
|
||||
|
||||
Assert.Equal("true", tags["scrubbed"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetSealedModeTags_WhenSealedWithIncidentOverride_ReturnsOverrideTag()
|
||||
{
|
||||
_incidentModeService.Setup(s => s.IsActive).Returns(true);
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.AllowIncidentModeOverride = true;
|
||||
});
|
||||
|
||||
var tags = service.GetSealedModeTags();
|
||||
|
||||
Assert.Equal("true", tags["incident_override"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetSealedModeTags_WithAdditionalTags_IncludesThem()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.AdditionalTags["environment"] = "production";
|
||||
opt.AdditionalTags["region"] = "us-east-1";
|
||||
});
|
||||
|
||||
var tags = service.GetSealedModeTags();
|
||||
|
||||
Assert.Equal("production", tags["environment"]);
|
||||
Assert.Equal("us-east-1", tags["region"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsExternalExportAllowed_WhenNotSealed_ReturnsTrue()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = false);
|
||||
var endpoint = new Uri("https://collector.example.com");
|
||||
|
||||
var allowed = service.IsExternalExportAllowed(endpoint);
|
||||
|
||||
Assert.True(allowed);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsExternalExportAllowed_WhenSealed_ReturnsFalse()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = true);
|
||||
var endpoint = new Uri("https://collector.example.com");
|
||||
|
||||
var allowed = service.IsExternalExportAllowed(endpoint);
|
||||
|
||||
Assert.False(allowed);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetLocalExporterConfig_WhenNotSealed_ReturnsNull()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = false);
|
||||
|
||||
var config = service.GetLocalExporterConfig();
|
||||
|
||||
Assert.Null(config);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetLocalExporterConfig_WhenSealed_ReturnsConfig()
|
||||
{
|
||||
using var service = CreateService(opt =>
|
||||
{
|
||||
opt.Enabled = true;
|
||||
opt.Exporter = SealedModeExporterType.File;
|
||||
opt.FilePath = "./logs/test.otlp";
|
||||
opt.MaxBytes = 5_000_000;
|
||||
opt.MaxRotatedFiles = 5;
|
||||
});
|
||||
|
||||
var config = service.GetLocalExporterConfig();
|
||||
|
||||
Assert.NotNull(config);
|
||||
Assert.Equal(SealedModeExporterType.File, config.Type);
|
||||
Assert.Equal("./logs/test.otlp", config.FilePath);
|
||||
Assert.Equal(5_000_000, config.MaxBytes);
|
||||
Assert.Equal(5, config.MaxRotatedFiles);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordSealEvent_RaisesStateChangedEvent()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = true);
|
||||
SealedModeStateChangedEventArgs? eventArgs = null;
|
||||
service.StateChanged += (s, e) => eventArgs = e;
|
||||
|
||||
service.RecordSealEvent("Test reason", "test-actor");
|
||||
|
||||
Assert.NotNull(eventArgs);
|
||||
Assert.True(eventArgs.IsSealed);
|
||||
Assert.Equal("Test reason", eventArgs.Reason);
|
||||
Assert.Equal("test-actor", eventArgs.Actor);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordUnsealEvent_RaisesStateChangedEvent()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = false);
|
||||
SealedModeStateChangedEventArgs? eventArgs = null;
|
||||
service.StateChanged += (s, e) => eventArgs = e;
|
||||
|
||||
service.RecordUnsealEvent("Test unseal", "admin");
|
||||
|
||||
Assert.NotNull(eventArgs);
|
||||
Assert.False(eventArgs.IsSealed);
|
||||
Assert.Equal("Test unseal", eventArgs.Reason);
|
||||
Assert.Equal("admin", eventArgs.Actor);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordDriftEvent_DoesNotThrow()
|
||||
{
|
||||
using var service = CreateService(opt => opt.Enabled = true);
|
||||
var endpoint = new Uri("https://collector.example.com");
|
||||
|
||||
// Should not throw
|
||||
service.RecordDriftEvent(endpoint, TelemetrySignal.Traces);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_Validate_ValidOptions_ReturnsNoErrors()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions();
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Empty(errors);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_Validate_InvalidSamplingPercent_ReturnsError()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
MaxSamplingPercent = 150
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("MaxSamplingPercent", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_Validate_NegativeSamplingPercent_ReturnsError()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
MaxSamplingPercent = -10
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("MaxSamplingPercent", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_Validate_InvalidMaxBytes_ReturnsError()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
MaxBytes = 0
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("MaxBytes", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_Validate_MissingFilePath_ReturnsError()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
Exporter = SealedModeExporterType.File,
|
||||
FilePath = ""
|
||||
};
|
||||
|
||||
var errors = options.Validate();
|
||||
|
||||
Assert.Single(errors);
|
||||
Assert.Contains("FilePath", errors[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_GetEffectiveSamplingRate_WithoutIncident_ReturnsCapped()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
MaxSamplingPercent = 25
|
||||
};
|
||||
|
||||
var rate = options.GetEffectiveSamplingRate(incidentModeActive: false, incidentSamplingRate: 1.0);
|
||||
|
||||
Assert.Equal(0.25, rate);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_GetEffectiveSamplingRate_WithIncidentOverride_ReturnsRequested()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
MaxSamplingPercent = 10,
|
||||
AllowIncidentModeOverride = true
|
||||
};
|
||||
|
||||
var rate = options.GetEffectiveSamplingRate(incidentModeActive: true, incidentSamplingRate: 0.5);
|
||||
|
||||
Assert.Equal(0.5, rate);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeTelemetryOptions_GetEffectiveSamplingRate_WithIncidentOverride_CapsAtOne()
|
||||
{
|
||||
var options = new SealedModeTelemetryOptions
|
||||
{
|
||||
MaxSamplingPercent = 10,
|
||||
AllowIncidentModeOverride = true
|
||||
};
|
||||
|
||||
var rate = options.GetEffectiveSamplingRate(incidentModeActive: true, incidentSamplingRate: 1.5);
|
||||
|
||||
Assert.Equal(1.0, rate);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeExporterConfig_PropertiesAreSet()
|
||||
{
|
||||
var config = new SealedModeExporterConfig
|
||||
{
|
||||
Type = SealedModeExporterType.File,
|
||||
FilePath = "/path/to/file.otlp",
|
||||
MaxBytes = 10_000_000,
|
||||
MaxRotatedFiles = 3
|
||||
};
|
||||
|
||||
Assert.Equal(SealedModeExporterType.File, config.Type);
|
||||
Assert.Equal("/path/to/file.otlp", config.FilePath);
|
||||
Assert.Equal(10_000_000, config.MaxBytes);
|
||||
Assert.Equal(3, config.MaxRotatedFiles);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SealedModeStateChangedEventArgs_PropertiesAreSet()
|
||||
{
|
||||
var timestamp = DateTimeOffset.UtcNow;
|
||||
var args = new SealedModeStateChangedEventArgs
|
||||
{
|
||||
IsSealed = true,
|
||||
Timestamp = timestamp,
|
||||
Reason = "Test reason",
|
||||
Actor = "test-user"
|
||||
};
|
||||
|
||||
Assert.True(args.IsSealed);
|
||||
Assert.Equal(timestamp, args.Timestamp);
|
||||
Assert.Equal("Test reason", args.Reason);
|
||||
Assert.Equal("test-user", args.Actor);
|
||||
}
|
||||
|
||||
private sealed class TestOptionsMonitor<T> : IOptionsMonitor<T>
|
||||
{
|
||||
private readonly T _value;
|
||||
|
||||
public TestOptionsMonitor(T value)
|
||||
{
|
||||
_value = value;
|
||||
}
|
||||
|
||||
public T CurrentValue => _value;
|
||||
public T Get(string? name) => _value;
|
||||
public IDisposable? OnChange(Action<T, string?> listener) => null;
|
||||
}
|
||||
|
||||
private sealed class FakeTimeProvider : TimeProvider
|
||||
{
|
||||
private DateTimeOffset _utcNow;
|
||||
|
||||
public FakeTimeProvider(DateTimeOffset initialTime)
|
||||
{
|
||||
_utcNow = initialTime;
|
||||
}
|
||||
|
||||
public override DateTimeOffset GetUtcNow() => _utcNow;
|
||||
|
||||
public void Advance(TimeSpan duration)
|
||||
{
|
||||
_utcNow = _utcNow.Add(duration);
|
||||
}
|
||||
|
||||
public void SetUtcNow(DateTimeOffset time)
|
||||
{
|
||||
_utcNow = time;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,303 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Telemetry.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Service for managing incident mode state in telemetry.
|
||||
/// Incident mode increases sampling rates and adds special tags to telemetry data.
|
||||
/// </summary>
|
||||
public interface IIncidentModeService
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether incident mode is currently active.
|
||||
/// </summary>
|
||||
bool IsActive { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current incident mode state.
|
||||
/// </summary>
|
||||
IncidentModeState? CurrentState { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Activates incident mode with optional TTL override.
|
||||
/// </summary>
|
||||
/// <param name="actor">The actor (user/service) activating incident mode.</param>
|
||||
/// <param name="tenantId">Optional tenant identifier.</param>
|
||||
/// <param name="ttlOverride">Optional TTL override (uses default if not specified).</param>
|
||||
/// <param name="reason">Optional reason for activation.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The activation result.</returns>
|
||||
Task<IncidentModeActivationResult> ActivateAsync(
|
||||
string actor,
|
||||
string? tenantId = null,
|
||||
TimeSpan? ttlOverride = null,
|
||||
string? reason = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deactivates incident mode.
|
||||
/// </summary>
|
||||
/// <param name="actor">The actor (user/service) deactivating incident mode.</param>
|
||||
/// <param name="reason">Optional reason for deactivation.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The deactivation result.</returns>
|
||||
Task<IncidentModeDeactivationResult> DeactivateAsync(
|
||||
string actor,
|
||||
string? reason = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Extends the current incident mode TTL.
|
||||
/// </summary>
|
||||
/// <param name="extension">The time to add to the current TTL.</param>
|
||||
/// <param name="actor">The actor extending the TTL.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The new expiration time, or null if incident mode is not active.</returns>
|
||||
Task<DateTimeOffset?> ExtendTtlAsync(
|
||||
TimeSpan extension,
|
||||
string actor,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets tags to add to telemetry when incident mode is active.
|
||||
/// </summary>
|
||||
/// <returns>A dictionary of tags, or empty if incident mode is not active.</returns>
|
||||
IReadOnlyDictionary<string, string> GetIncidentTags();
|
||||
|
||||
/// <summary>
|
||||
/// Event raised when incident mode is activated.
|
||||
/// </summary>
|
||||
event EventHandler<IncidentModeActivatedEventArgs>? Activated;
|
||||
|
||||
/// <summary>
|
||||
/// Event raised when incident mode is deactivated or expires.
|
||||
/// </summary>
|
||||
event EventHandler<IncidentModeDeactivatedEventArgs>? Deactivated;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents the current state of incident mode.
|
||||
/// </summary>
|
||||
public sealed record IncidentModeState
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether incident mode is enabled.
|
||||
/// </summary>
|
||||
public required bool Enabled { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the timestamp when incident mode was activated.
|
||||
/// </summary>
|
||||
public required DateTimeOffset ActivatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the timestamp when incident mode will expire.
|
||||
/// </summary>
|
||||
public required DateTimeOffset ExpiresAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the actor who activated incident mode.
|
||||
/// </summary>
|
||||
public required string Actor { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the tenant identifier, if applicable.
|
||||
/// </summary>
|
||||
public string? TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the source of the activation (CLI, API, config).
|
||||
/// </summary>
|
||||
public required IncidentModeSource Source { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the reason for activation.
|
||||
/// </summary>
|
||||
public string? Reason { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the unique activation ID.
|
||||
/// </summary>
|
||||
public required string ActivationId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this state has expired.
|
||||
/// </summary>
|
||||
public bool IsExpired => DateTimeOffset.UtcNow >= ExpiresAt;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the remaining time until expiration.
|
||||
/// </summary>
|
||||
public TimeSpan RemainingTime => IsExpired ? TimeSpan.Zero : ExpiresAt - DateTimeOffset.UtcNow;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Source of incident mode activation.
|
||||
/// </summary>
|
||||
public enum IncidentModeSource
|
||||
{
|
||||
/// <summary>CLI flag activation.</summary>
|
||||
Cli,
|
||||
/// <summary>API activation.</summary>
|
||||
Api,
|
||||
/// <summary>Configuration-based activation.</summary>
|
||||
Configuration,
|
||||
/// <summary>Persisted state restoration.</summary>
|
||||
Restored
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of incident mode activation.
|
||||
/// </summary>
|
||||
public sealed record IncidentModeActivationResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether activation was successful.
|
||||
/// </summary>
|
||||
public required bool Success { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the activation state if successful.
|
||||
/// </summary>
|
||||
public IncidentModeState? State { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the error message if activation failed.
|
||||
/// </summary>
|
||||
public string? Error { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether incident mode was already active.
|
||||
/// </summary>
|
||||
public bool WasAlreadyActive { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a successful activation result.
|
||||
/// </summary>
|
||||
public static IncidentModeActivationResult Succeeded(IncidentModeState state, bool wasAlreadyActive = false)
|
||||
{
|
||||
return new IncidentModeActivationResult
|
||||
{
|
||||
Success = true,
|
||||
State = state,
|
||||
WasAlreadyActive = wasAlreadyActive
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a failed activation result.
|
||||
/// </summary>
|
||||
public static IncidentModeActivationResult Failed(string error)
|
||||
{
|
||||
return new IncidentModeActivationResult
|
||||
{
|
||||
Success = false,
|
||||
Error = error
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of incident mode deactivation.
|
||||
/// </summary>
|
||||
public sealed record IncidentModeDeactivationResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether deactivation was successful.
|
||||
/// </summary>
|
||||
public required bool Success { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether incident mode was active before deactivation.
|
||||
/// </summary>
|
||||
public bool WasActive { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the error message if deactivation failed.
|
||||
/// </summary>
|
||||
public string? Error { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the reason for deactivation.
|
||||
/// </summary>
|
||||
public IncidentModeDeactivationReason Reason { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a successful deactivation result.
|
||||
/// </summary>
|
||||
public static IncidentModeDeactivationResult Succeeded(bool wasActive, IncidentModeDeactivationReason reason)
|
||||
{
|
||||
return new IncidentModeDeactivationResult
|
||||
{
|
||||
Success = true,
|
||||
WasActive = wasActive,
|
||||
Reason = reason
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a failed deactivation result.
|
||||
/// </summary>
|
||||
public static IncidentModeDeactivationResult Failed(string error)
|
||||
{
|
||||
return new IncidentModeDeactivationResult
|
||||
{
|
||||
Success = false,
|
||||
Error = error
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reason for incident mode deactivation.
|
||||
/// </summary>
|
||||
public enum IncidentModeDeactivationReason
|
||||
{
|
||||
/// <summary>Manual deactivation by user/service.</summary>
|
||||
Manual,
|
||||
/// <summary>Deactivation due to TTL expiry.</summary>
|
||||
Expired,
|
||||
/// <summary>Deactivation due to system shutdown.</summary>
|
||||
Shutdown,
|
||||
/// <summary>Deactivation due to sealed mode activation.</summary>
|
||||
SealedMode
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Event args for incident mode activation.
|
||||
/// </summary>
|
||||
public sealed class IncidentModeActivatedEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the activation state.
|
||||
/// </summary>
|
||||
public required IncidentModeState State { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this was a reactivation (was already active).
|
||||
/// </summary>
|
||||
public bool WasReactivation { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Event args for incident mode deactivation.
|
||||
/// </summary>
|
||||
public sealed class IncidentModeDeactivatedEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the state at time of deactivation.
|
||||
/// </summary>
|
||||
public required IncidentModeState State { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the reason for deactivation.
|
||||
/// </summary>
|
||||
public required IncidentModeDeactivationReason Reason { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the actor who deactivated (if manual).
|
||||
/// </summary>
|
||||
public string? DeactivatedBy { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Telemetry.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Service for managing sealed-mode telemetry behavior.
|
||||
/// When sealed mode is active, external exporters are disabled and
|
||||
/// telemetry is written to local storage instead.
|
||||
/// </summary>
|
||||
public interface ISealedModeTelemetryService
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether sealed mode is currently active.
|
||||
/// </summary>
|
||||
bool IsSealed { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current effective sampling rate (0.0-1.0).
|
||||
/// </summary>
|
||||
double EffectiveSamplingRate { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether incident mode is currently overriding sealed mode sampling.
|
||||
/// </summary>
|
||||
bool IsIncidentModeOverrideActive { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets tags to add to telemetry when sealed mode is active.
|
||||
/// </summary>
|
||||
/// <returns>A dictionary of tags, or empty if sealed mode is not active.</returns>
|
||||
IReadOnlyDictionary<string, string> GetSealedModeTags();
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether an external exporter should be allowed.
|
||||
/// Always returns false when sealed mode is active.
|
||||
/// </summary>
|
||||
/// <param name="endpoint">The exporter endpoint.</param>
|
||||
/// <returns><c>true</c> if external export is allowed.</returns>
|
||||
bool IsExternalExportAllowed(Uri endpoint);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the local exporter configuration for sealed mode.
|
||||
/// </summary>
|
||||
/// <returns>The exporter configuration, or null if sealed mode is not active.</returns>
|
||||
SealedModeExporterConfig? GetLocalExporterConfig();
|
||||
|
||||
/// <summary>
|
||||
/// Records a seal event (entry into sealed mode).
|
||||
/// </summary>
|
||||
/// <param name="reason">Optional reason for sealing.</param>
|
||||
/// <param name="actor">The actor who initiated the seal.</param>
|
||||
void RecordSealEvent(string? reason = null, string? actor = null);
|
||||
|
||||
/// <summary>
|
||||
/// Records an unseal event (exit from sealed mode).
|
||||
/// </summary>
|
||||
/// <param name="reason">Optional reason for unsealing.</param>
|
||||
/// <param name="actor">The actor who initiated the unseal.</param>
|
||||
void RecordUnsealEvent(string? reason = null, string? actor = null);
|
||||
|
||||
/// <summary>
|
||||
/// Records a drift event when external export was blocked.
|
||||
/// </summary>
|
||||
/// <param name="endpoint">The blocked endpoint.</param>
|
||||
/// <param name="signal">The telemetry signal type.</param>
|
||||
void RecordDriftEvent(Uri endpoint, TelemetrySignal signal);
|
||||
|
||||
/// <summary>
|
||||
/// Event raised when sealed mode state changes.
|
||||
/// </summary>
|
||||
event EventHandler<SealedModeStateChangedEventArgs>? StateChanged;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for the local exporter in sealed mode.
|
||||
/// </summary>
|
||||
public sealed record SealedModeExporterConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the exporter type.
|
||||
/// </summary>
|
||||
public required SealedModeExporterType Type { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the file path for file-based exporters.
|
||||
/// </summary>
|
||||
public string? FilePath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum bytes before rotation.
|
||||
/// </summary>
|
||||
public long MaxBytes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum number of rotated files.
|
||||
/// </summary>
|
||||
public int MaxRotatedFiles { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Event args for sealed mode state changes.
|
||||
/// </summary>
|
||||
public sealed class SealedModeStateChangedEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether sealed mode is now active.
|
||||
/// </summary>
|
||||
public required bool IsSealed { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the timestamp of the state change.
|
||||
/// </summary>
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the reason for the state change.
|
||||
/// </summary>
|
||||
public string? Reason { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the actor who initiated the change.
|
||||
/// </summary>
|
||||
public string? Actor { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,191 @@
|
||||
using System;
|
||||
|
||||
namespace StellaOps.Telemetry.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Options for incident mode configuration.
|
||||
/// </summary>
|
||||
public sealed class IncidentModeOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Configuration section name.
|
||||
/// </summary>
|
||||
public const string SectionName = "Telemetry:Incident";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether incident mode is enabled by configuration.
|
||||
/// CLI flag can override this.
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the default TTL for incident mode.
|
||||
/// </summary>
|
||||
public TimeSpan DefaultTtl { get; set; } = TimeSpan.FromMinutes(30);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum allowed TTL for incident mode.
|
||||
/// </summary>
|
||||
public TimeSpan MaxTtl { get; set; } = TimeSpan.FromHours(24);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the minimum allowed TTL for incident mode.
|
||||
/// </summary>
|
||||
public TimeSpan MinTtl { get; set; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the sampling rate to use during incident mode (0.0-1.0).
|
||||
/// </summary>
|
||||
public double IncidentSamplingRate { get; set; } = 1.0;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the flush interval for exporters during incident mode.
|
||||
/// </summary>
|
||||
public TimeSpan IncidentFlushInterval { get; set; } = TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the normal flush interval for comparison.
|
||||
/// </summary>
|
||||
public TimeSpan NormalFlushInterval { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to persist incident mode state to local file.
|
||||
/// </summary>
|
||||
public bool PersistState { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the state file path. Uses default if not specified.
|
||||
/// </summary>
|
||||
public string? StateFilePath { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to emit audit events for activation/deactivation.
|
||||
/// </summary>
|
||||
public bool EmitAuditEvents { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the tag name for incident mode indicator.
|
||||
/// </summary>
|
||||
public string IncidentTagName { get; set; } = "incident";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether sealed mode disables incident mode.
|
||||
/// </summary>
|
||||
public bool DisableInSealedMode { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets additional tags to add during incident mode.
|
||||
/// </summary>
|
||||
public System.Collections.Generic.Dictionary<string, string> AdditionalTags { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to allow TTL extension.
|
||||
/// </summary>
|
||||
public bool AllowTtlExtension { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum number of extensions allowed per activation.
|
||||
/// </summary>
|
||||
public int MaxExtensions { get; set; } = 5;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to restore state from persisted file on startup.
|
||||
/// </summary>
|
||||
public bool RestoreOnStartup { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Validates the options and returns any validation errors.
|
||||
/// </summary>
|
||||
public System.Collections.Generic.List<string> Validate()
|
||||
{
|
||||
var errors = new System.Collections.Generic.List<string>();
|
||||
|
||||
if (DefaultTtl < MinTtl)
|
||||
{
|
||||
errors.Add($"DefaultTtl ({DefaultTtl}) cannot be less than MinTtl ({MinTtl})");
|
||||
}
|
||||
|
||||
if (DefaultTtl > MaxTtl)
|
||||
{
|
||||
errors.Add($"DefaultTtl ({DefaultTtl}) cannot be greater than MaxTtl ({MaxTtl})");
|
||||
}
|
||||
|
||||
if (IncidentSamplingRate < 0.0 || IncidentSamplingRate > 1.0)
|
||||
{
|
||||
errors.Add($"IncidentSamplingRate ({IncidentSamplingRate}) must be between 0.0 and 1.0");
|
||||
}
|
||||
|
||||
if (IncidentFlushInterval <= TimeSpan.Zero)
|
||||
{
|
||||
errors.Add("IncidentFlushInterval must be positive");
|
||||
}
|
||||
|
||||
if (MaxExtensions < 0)
|
||||
{
|
||||
errors.Add("MaxExtensions cannot be negative");
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clamps a TTL value to the allowed range.
|
||||
/// </summary>
|
||||
public TimeSpan ClampTtl(TimeSpan ttl)
|
||||
{
|
||||
if (ttl < MinTtl) return MinTtl;
|
||||
if (ttl > MaxTtl) return MaxTtl;
|
||||
return ttl;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Persisted state for incident mode.
|
||||
/// </summary>
|
||||
public sealed class PersistedIncidentModeState
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets whether incident mode is enabled.
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the timestamp when incident mode was activated.
|
||||
/// </summary>
|
||||
public DateTimeOffset? ActivatedAt { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the timestamp when incident mode will expire.
|
||||
/// </summary>
|
||||
public DateTimeOffset? ExpiresAt { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the actor who activated incident mode.
|
||||
/// </summary>
|
||||
public string? Actor { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the tenant identifier.
|
||||
/// </summary>
|
||||
public string? TenantId { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the activation ID.
|
||||
/// </summary>
|
||||
public string? ActivationId { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the source of activation.
|
||||
/// </summary>
|
||||
public string? Source { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the reason for activation.
|
||||
/// </summary>
|
||||
public string? Reason { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the number of TTL extensions applied.
|
||||
/// </summary>
|
||||
public int ExtensionCount { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,531 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace StellaOps.Telemetry.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of <see cref="IIncidentModeService"/>.
|
||||
/// </summary>
|
||||
public sealed class IncidentModeService : IIncidentModeService, IDisposable
|
||||
{
|
||||
private readonly IOptionsMonitor<IncidentModeOptions> _optionsMonitor;
|
||||
private readonly ITelemetryContextAccessor? _contextAccessor;
|
||||
private readonly ILogger<IncidentModeService>? _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly object _lock = new();
|
||||
private readonly Timer _expiryTimer;
|
||||
|
||||
private IncidentModeState? _currentState;
|
||||
private int _extensionCount;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool IsActive => _currentState is not null && !_currentState.IsExpired;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IncidentModeState? CurrentState => _currentState?.IsExpired == true ? null : _currentState;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public event EventHandler<IncidentModeActivatedEventArgs>? Activated;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public event EventHandler<IncidentModeDeactivatedEventArgs>? Deactivated;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="IncidentModeService"/>.
|
||||
/// </summary>
|
||||
public IncidentModeService(
|
||||
IOptionsMonitor<IncidentModeOptions> optionsMonitor,
|
||||
ITelemetryContextAccessor? contextAccessor = null,
|
||||
ILogger<IncidentModeService>? logger = null,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
|
||||
_contextAccessor = contextAccessor;
|
||||
_logger = logger;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
|
||||
_expiryTimer = new Timer(CheckExpiry, null, TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(10));
|
||||
|
||||
// Restore state if configured
|
||||
if (_optionsMonitor.CurrentValue.RestoreOnStartup)
|
||||
{
|
||||
_ = RestoreStateAsync();
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<IncidentModeActivationResult> ActivateAsync(
|
||||
string actor,
|
||||
string? tenantId = null,
|
||||
TimeSpan? ttlOverride = null,
|
||||
string? reason = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(actor);
|
||||
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
|
||||
// Check sealed mode restriction
|
||||
if (options.DisableInSealedMode && IsSealedModeActive())
|
||||
{
|
||||
return IncidentModeActivationResult.Failed(
|
||||
"Cannot activate incident mode when sealed mode is active");
|
||||
}
|
||||
|
||||
var ttl = ttlOverride.HasValue ? options.ClampTtl(ttlOverride.Value) : options.DefaultTtl;
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var wasAlreadyActive = false;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentState is not null && !_currentState.IsExpired)
|
||||
{
|
||||
wasAlreadyActive = true;
|
||||
_logger?.LogInformation(
|
||||
"Incident mode already active (activation {ActivationId}). Extending TTL.",
|
||||
_currentState.ActivationId);
|
||||
|
||||
// Extend existing activation
|
||||
_currentState = _currentState with
|
||||
{
|
||||
ExpiresAt = now + ttl
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
// New activation
|
||||
_currentState = new IncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = now,
|
||||
ExpiresAt = now + ttl,
|
||||
Actor = actor,
|
||||
TenantId = tenantId ?? _contextAccessor?.Context?.TenantId,
|
||||
Source = IncidentModeSource.Api,
|
||||
Reason = reason,
|
||||
ActivationId = Guid.NewGuid().ToString("N")[..12]
|
||||
};
|
||||
_extensionCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
_logger?.LogInformation(
|
||||
"Incident mode activated by {Actor} for tenant {TenantId}. Expires at {ExpiresAt}. Activation ID: {ActivationId}",
|
||||
actor,
|
||||
_currentState.TenantId ?? "global",
|
||||
_currentState.ExpiresAt,
|
||||
_currentState.ActivationId);
|
||||
|
||||
// Persist state
|
||||
if (options.PersistState)
|
||||
{
|
||||
await PersistStateAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Emit audit event
|
||||
if (options.EmitAuditEvents)
|
||||
{
|
||||
EmitActivationAuditEvent(_currentState, wasAlreadyActive);
|
||||
}
|
||||
|
||||
// Raise event
|
||||
Activated?.Invoke(this, new IncidentModeActivatedEventArgs
|
||||
{
|
||||
State = _currentState,
|
||||
WasReactivation = wasAlreadyActive
|
||||
});
|
||||
|
||||
return IncidentModeActivationResult.Succeeded(_currentState, wasAlreadyActive);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<IncidentModeDeactivationResult> DeactivateAsync(
|
||||
string actor,
|
||||
string? reason = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
IncidentModeState? previousState;
|
||||
bool wasActive;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
previousState = _currentState;
|
||||
wasActive = previousState is not null && !previousState.IsExpired;
|
||||
_currentState = null;
|
||||
_extensionCount = 0;
|
||||
}
|
||||
|
||||
if (wasActive && previousState is not null)
|
||||
{
|
||||
_logger?.LogInformation(
|
||||
"Incident mode deactivated by {Actor}. Activation ID: {ActivationId}. Reason: {Reason}",
|
||||
actor,
|
||||
previousState.ActivationId,
|
||||
reason ?? "manual deactivation");
|
||||
|
||||
// Clear persisted state
|
||||
if (options.PersistState)
|
||||
{
|
||||
await ClearPersistedStateAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Emit audit event
|
||||
if (options.EmitAuditEvents)
|
||||
{
|
||||
EmitDeactivationAuditEvent(previousState, IncidentModeDeactivationReason.Manual, actor);
|
||||
}
|
||||
|
||||
// Raise event
|
||||
Deactivated?.Invoke(this, new IncidentModeDeactivatedEventArgs
|
||||
{
|
||||
State = previousState,
|
||||
Reason = IncidentModeDeactivationReason.Manual,
|
||||
DeactivatedBy = actor
|
||||
});
|
||||
}
|
||||
|
||||
return IncidentModeDeactivationResult.Succeeded(wasActive, IncidentModeDeactivationReason.Manual);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<DateTimeOffset?> ExtendTtlAsync(
|
||||
TimeSpan extension,
|
||||
string actor,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
|
||||
if (!options.AllowTtlExtension)
|
||||
{
|
||||
_logger?.LogWarning("TTL extension not allowed by configuration");
|
||||
return null;
|
||||
}
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentState is null || _currentState.IsExpired)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (_extensionCount >= options.MaxExtensions)
|
||||
{
|
||||
_logger?.LogWarning(
|
||||
"Maximum TTL extensions ({MaxExtensions}) reached for activation {ActivationId}",
|
||||
options.MaxExtensions,
|
||||
_currentState.ActivationId);
|
||||
return null;
|
||||
}
|
||||
|
||||
var newExpiresAt = _currentState.ExpiresAt + extension;
|
||||
var maxAllowedExpiry = _currentState.ActivatedAt + options.MaxTtl;
|
||||
|
||||
if (newExpiresAt > maxAllowedExpiry)
|
||||
{
|
||||
newExpiresAt = maxAllowedExpiry;
|
||||
}
|
||||
|
||||
_currentState = _currentState with { ExpiresAt = newExpiresAt };
|
||||
_extensionCount++;
|
||||
|
||||
_logger?.LogInformation(
|
||||
"Incident mode TTL extended by {Actor}. New expiry: {ExpiresAt}. Extensions: {Count}/{Max}",
|
||||
actor,
|
||||
newExpiresAt,
|
||||
_extensionCount,
|
||||
options.MaxExtensions);
|
||||
|
||||
return newExpiresAt;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IReadOnlyDictionary<string, string> GetIncidentTags()
|
||||
{
|
||||
var state = CurrentState;
|
||||
if (state is null)
|
||||
{
|
||||
return new Dictionary<string, string>();
|
||||
}
|
||||
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
var tags = new Dictionary<string, string>
|
||||
{
|
||||
[options.IncidentTagName] = "true",
|
||||
["incident_activation_id"] = state.ActivationId,
|
||||
["incident_actor"] = state.Actor
|
||||
};
|
||||
|
||||
if (state.TenantId is not null)
|
||||
{
|
||||
tags["incident_tenant"] = state.TenantId;
|
||||
}
|
||||
|
||||
foreach (var (key, value) in options.AdditionalTags)
|
||||
{
|
||||
tags[key] = value;
|
||||
}
|
||||
|
||||
return tags;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Activates incident mode from CLI flag.
|
||||
/// </summary>
|
||||
public Task<IncidentModeActivationResult> ActivateFromCliAsync(
|
||||
string actor,
|
||||
TimeSpan? ttl = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
return ActivateInternalAsync(actor, null, ttl, "CLI activation", IncidentModeSource.Cli, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Activates incident mode from configuration.
|
||||
/// </summary>
|
||||
public Task<IncidentModeActivationResult> ActivateFromConfigAsync(CancellationToken ct = default)
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
if (!options.Enabled)
|
||||
{
|
||||
return Task.FromResult(IncidentModeActivationResult.Failed("Incident mode not enabled in configuration"));
|
||||
}
|
||||
|
||||
return ActivateInternalAsync("configuration", null, null, "Configuration activation", IncidentModeSource.Configuration, ct);
|
||||
}
|
||||
|
||||
private async Task<IncidentModeActivationResult> ActivateInternalAsync(
|
||||
string actor,
|
||||
string? tenantId,
|
||||
TimeSpan? ttl,
|
||||
string? reason,
|
||||
IncidentModeSource source,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var result = await ActivateAsync(actor, tenantId, ttl, reason, ct).ConfigureAwait(false);
|
||||
|
||||
if (result.Success && result.State is not null)
|
||||
{
|
||||
// Update source
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentState is not null)
|
||||
{
|
||||
_currentState = _currentState with { Source = source };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private void CheckExpiry(object? state)
|
||||
{
|
||||
IncidentModeState? expiredState;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentState is null || !_currentState.IsExpired)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
expiredState = _currentState;
|
||||
_currentState = null;
|
||||
_extensionCount = 0;
|
||||
}
|
||||
|
||||
_logger?.LogInformation(
|
||||
"Incident mode expired. Activation ID: {ActivationId}",
|
||||
expiredState.ActivationId);
|
||||
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
|
||||
// Clear persisted state
|
||||
if (options.PersistState)
|
||||
{
|
||||
_ = ClearPersistedStateAsync(default);
|
||||
}
|
||||
|
||||
// Emit audit event
|
||||
if (options.EmitAuditEvents)
|
||||
{
|
||||
EmitDeactivationAuditEvent(expiredState, IncidentModeDeactivationReason.Expired, null);
|
||||
}
|
||||
|
||||
// Raise event
|
||||
Deactivated?.Invoke(this, new IncidentModeDeactivatedEventArgs
|
||||
{
|
||||
State = expiredState,
|
||||
Reason = IncidentModeDeactivationReason.Expired
|
||||
});
|
||||
}
|
||||
|
||||
private async Task RestoreStateAsync()
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
var path = GetStateFilePath(options);
|
||||
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var json = await File.ReadAllTextAsync(path).ConfigureAwait(false);
|
||||
var persisted = JsonSerializer.Deserialize<PersistedIncidentModeState>(json);
|
||||
|
||||
if (persisted?.Enabled == true &&
|
||||
persisted.ExpiresAt.HasValue &&
|
||||
persisted.ExpiresAt.Value > _timeProvider.GetUtcNow())
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_currentState = new IncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = persisted.ActivatedAt ?? _timeProvider.GetUtcNow(),
|
||||
ExpiresAt = persisted.ExpiresAt.Value,
|
||||
Actor = persisted.Actor ?? "restored",
|
||||
TenantId = persisted.TenantId,
|
||||
Source = IncidentModeSource.Restored,
|
||||
Reason = persisted.Reason,
|
||||
ActivationId = persisted.ActivationId ?? Guid.NewGuid().ToString("N")[..12]
|
||||
};
|
||||
_extensionCount = persisted.ExtensionCount;
|
||||
}
|
||||
|
||||
_logger?.LogInformation(
|
||||
"Restored incident mode state. Activation ID: {ActivationId}. Expires at: {ExpiresAt}",
|
||||
_currentState.ActivationId,
|
||||
_currentState.ExpiresAt);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to restore incident mode state from {Path}", path);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task PersistStateAsync(CancellationToken ct)
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
var path = GetStateFilePath(options);
|
||||
var state = _currentState;
|
||||
|
||||
if (state is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var directory = Path.GetDirectoryName(path);
|
||||
if (!string.IsNullOrEmpty(directory) && !Directory.Exists(directory))
|
||||
{
|
||||
Directory.CreateDirectory(directory);
|
||||
}
|
||||
|
||||
var persisted = new PersistedIncidentModeState
|
||||
{
|
||||
Enabled = true,
|
||||
ActivatedAt = state.ActivatedAt,
|
||||
ExpiresAt = state.ExpiresAt,
|
||||
Actor = state.Actor,
|
||||
TenantId = state.TenantId,
|
||||
ActivationId = state.ActivationId,
|
||||
Source = state.Source.ToString(),
|
||||
Reason = state.Reason,
|
||||
ExtensionCount = _extensionCount
|
||||
};
|
||||
|
||||
var json = JsonSerializer.Serialize(persisted, new JsonSerializerOptions { WriteIndented = true });
|
||||
await File.WriteAllTextAsync(path, json, ct).ConfigureAwait(false);
|
||||
|
||||
// Set file permissions (Unix only)
|
||||
if (!OperatingSystem.IsWindows())
|
||||
{
|
||||
File.SetUnixFileMode(path, UnixFileMode.UserRead | UnixFileMode.UserWrite);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to persist incident mode state to {Path}", path);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ClearPersistedStateAsync(CancellationToken ct)
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
var path = GetStateFilePath(options);
|
||||
|
||||
try
|
||||
{
|
||||
if (File.Exists(path))
|
||||
{
|
||||
File.Delete(path);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to clear incident mode state file {Path}", path);
|
||||
}
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
private static string GetStateFilePath(IncidentModeOptions options)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(options.StateFilePath))
|
||||
{
|
||||
return options.StateFilePath;
|
||||
}
|
||||
|
||||
var homeDir = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
|
||||
return Path.Combine(homeDir, ".stellaops", "incident-mode.json");
|
||||
}
|
||||
|
||||
private bool IsSealedModeActive()
|
||||
{
|
||||
// This would integrate with the sealed mode service when implemented
|
||||
// For now, check via options or context
|
||||
return false;
|
||||
}
|
||||
|
||||
private void EmitActivationAuditEvent(IncidentModeState state, bool wasReactivation)
|
||||
{
|
||||
_logger?.LogInformation(
|
||||
"Audit: telemetry.incident.{Action} - tenant={Tenant} actor={Actor} source={Source} expires_at={ExpiresAt} activation_id={ActivationId}",
|
||||
wasReactivation ? "reactivated" : "activated",
|
||||
state.TenantId ?? "global",
|
||||
state.Actor,
|
||||
state.Source,
|
||||
state.ExpiresAt.ToString("O"),
|
||||
state.ActivationId);
|
||||
}
|
||||
|
||||
private void EmitDeactivationAuditEvent(IncidentModeState state, IncidentModeDeactivationReason reason, string? deactivatedBy)
|
||||
{
|
||||
_logger?.LogInformation(
|
||||
"Audit: telemetry.incident.{Action} - tenant={Tenant} reason={Reason} deactivated_by={DeactivatedBy} activation_id={ActivationId}",
|
||||
reason == IncidentModeDeactivationReason.Expired ? "expired" : "deactivated",
|
||||
state.TenantId ?? "global",
|
||||
reason,
|
||||
deactivatedBy ?? "system",
|
||||
state.ActivationId);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Dispose()
|
||||
{
|
||||
_expiryTimer.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace StellaOps.Telemetry.Core;
|
||||
|
||||
/// <summary>
|
||||
/// File-based exporter for sealed mode telemetry.
|
||||
/// Writes OTLP data to a local file with rotation support.
|
||||
/// </summary>
|
||||
public sealed class SealedModeFileExporter : IDisposable
|
||||
{
|
||||
private readonly IOptionsMonitor<SealedModeTelemetryOptions> _optionsMonitor;
|
||||
private readonly ILogger<SealedModeFileExporter>? _logger;
|
||||
private readonly object _lock = new();
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
private FileStream? _currentStream;
|
||||
private string? _currentFilePath;
|
||||
private long _currentSize;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether the exporter has been initialized.
|
||||
/// </summary>
|
||||
public bool IsInitialized => _currentStream is not null;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current file path being written to.
|
||||
/// </summary>
|
||||
public string? CurrentFilePath => _currentFilePath;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current file size in bytes.
|
||||
/// </summary>
|
||||
public long CurrentSize => _currentSize;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="SealedModeFileExporter"/>.
|
||||
/// </summary>
|
||||
public SealedModeFileExporter(
|
||||
IOptionsMonitor<SealedModeTelemetryOptions> optionsMonitor,
|
||||
ILogger<SealedModeFileExporter>? logger = null,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
|
||||
_logger = logger;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes the exporter and creates the output file.
|
||||
/// </summary>
|
||||
/// <exception cref="InvalidOperationException">Thrown if the file path has insecure permissions.</exception>
|
||||
public void Initialize()
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentStream is not null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var filePath = options.FilePath;
|
||||
if (string.IsNullOrWhiteSpace(filePath))
|
||||
{
|
||||
throw new InvalidOperationException("File path is not configured");
|
||||
}
|
||||
|
||||
var directory = Path.GetDirectoryName(filePath);
|
||||
if (!string.IsNullOrEmpty(directory) && !Directory.Exists(directory))
|
||||
{
|
||||
Directory.CreateDirectory(directory);
|
||||
|
||||
// Set directory permissions on Unix
|
||||
if (!OperatingSystem.IsWindows())
|
||||
{
|
||||
try
|
||||
{
|
||||
File.SetUnixFileMode(directory, UnixFileMode.UserRead | UnixFileMode.UserWrite | UnixFileMode.UserExecute);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to set directory permissions for {Directory}", directory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check existing file permissions
|
||||
if (File.Exists(filePath) && options.FailOnInsecurePermissions && !OperatingSystem.IsWindows())
|
||||
{
|
||||
try
|
||||
{
|
||||
var mode = File.GetUnixFileMode(filePath);
|
||||
if ((mode & (UnixFileMode.OtherRead | UnixFileMode.OtherWrite | UnixFileMode.GroupRead | UnixFileMode.GroupWrite)) != 0)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Sealed mode telemetry file {filePath} has insecure permissions. " +
|
||||
"File must not be readable or writable by group or others.");
|
||||
}
|
||||
}
|
||||
catch (InvalidOperationException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to check file permissions for {FilePath}", filePath);
|
||||
}
|
||||
}
|
||||
|
||||
_currentFilePath = filePath;
|
||||
_currentStream = new FileStream(
|
||||
filePath,
|
||||
FileMode.Append,
|
||||
FileAccess.Write,
|
||||
FileShare.Read,
|
||||
bufferSize: 4096,
|
||||
FileOptions.WriteThrough);
|
||||
|
||||
_currentSize = _currentStream.Length;
|
||||
|
||||
// Set file permissions on Unix
|
||||
if (!OperatingSystem.IsWindows())
|
||||
{
|
||||
try
|
||||
{
|
||||
File.SetUnixFileMode(filePath, options.FilePermissions);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to set file permissions for {FilePath}", filePath);
|
||||
}
|
||||
}
|
||||
|
||||
_logger?.LogInformation(
|
||||
"Sealed mode file exporter initialized at {FilePath} (current size: {Size} bytes)",
|
||||
filePath,
|
||||
_currentSize);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes telemetry data to the file.
|
||||
/// </summary>
|
||||
/// <param name="data">The binary data to write.</param>
|
||||
/// <param name="signal">The telemetry signal type.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public void Write(ReadOnlySpan<byte> data, TelemetrySignal signal, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
throw new ObjectDisposedException(nameof(SealedModeFileExporter));
|
||||
}
|
||||
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentStream is null)
|
||||
{
|
||||
Initialize();
|
||||
}
|
||||
|
||||
// Check if rotation is needed
|
||||
if (_currentSize + data.Length > options.MaxBytes)
|
||||
{
|
||||
RotateFile();
|
||||
}
|
||||
|
||||
// Write header with timestamp and signal type
|
||||
var timestamp = _timeProvider.GetUtcNow();
|
||||
var header = $"[{timestamp:O}][{signal}][{data.Length}]\n";
|
||||
var headerBytes = Encoding.UTF8.GetBytes(header);
|
||||
|
||||
_currentStream!.Write(headerBytes);
|
||||
_currentStream.Write(data);
|
||||
_currentStream.WriteByte((byte)'\n');
|
||||
_currentStream.Flush();
|
||||
|
||||
_currentSize += headerBytes.Length + data.Length + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes a string record to the file.
|
||||
/// </summary>
|
||||
/// <param name="record">The string record to write.</param>
|
||||
/// <param name="signal">The telemetry signal type.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public void WriteRecord(string record, TelemetrySignal signal, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var bytes = Encoding.UTF8.GetBytes(record);
|
||||
Write(bytes, signal, cancellationToken);
|
||||
}
|
||||
|
||||
private void RotateFile()
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
var basePath = _currentFilePath!;
|
||||
|
||||
_currentStream?.Dispose();
|
||||
_currentStream = null;
|
||||
|
||||
// Rotate existing files
|
||||
for (var i = options.MaxRotatedFiles; i >= 1; i--)
|
||||
{
|
||||
var oldPath = i == 1 ? basePath : $"{basePath}.{i - 1}";
|
||||
var newPath = $"{basePath}.{i}";
|
||||
|
||||
if (File.Exists(oldPath))
|
||||
{
|
||||
if (i == options.MaxRotatedFiles)
|
||||
{
|
||||
// Delete oldest file
|
||||
try
|
||||
{
|
||||
File.Delete(oldPath);
|
||||
_logger?.LogDebug("Deleted oldest rotated file {Path}", oldPath);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to delete rotated file {Path}", oldPath);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Rename to next slot
|
||||
try
|
||||
{
|
||||
if (File.Exists(newPath))
|
||||
{
|
||||
File.Delete(newPath);
|
||||
}
|
||||
File.Move(oldPath, newPath);
|
||||
_logger?.LogDebug("Rotated {OldPath} to {NewPath}", oldPath, newPath);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to rotate {OldPath} to {NewPath}", oldPath, newPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create new file
|
||||
_currentStream = new FileStream(
|
||||
basePath,
|
||||
FileMode.Create,
|
||||
FileAccess.Write,
|
||||
FileShare.Read,
|
||||
bufferSize: 4096,
|
||||
FileOptions.WriteThrough);
|
||||
|
||||
_currentSize = 0;
|
||||
|
||||
// Set file permissions on Unix
|
||||
if (!OperatingSystem.IsWindows())
|
||||
{
|
||||
try
|
||||
{
|
||||
File.SetUnixFileMode(basePath, options.FilePermissions);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.LogWarning(ex, "Failed to set file permissions for {FilePath}", basePath);
|
||||
}
|
||||
}
|
||||
|
||||
_logger?.LogInformation("Rotated sealed mode telemetry file. New file: {Path}", basePath);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Flushes any buffered data to disk.
|
||||
/// </summary>
|
||||
public void Flush()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_currentStream?.Flush();
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
_currentStream?.Dispose();
|
||||
_currentStream = null;
|
||||
_disposed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,166 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
|
||||
namespace StellaOps.Telemetry.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Options for sealed-mode telemetry behavior.
|
||||
/// </summary>
|
||||
public sealed class SealedModeTelemetryOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Configuration section name.
|
||||
/// </summary>
|
||||
public const string SectionName = "Telemetry:Sealed";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether sealed mode telemetry is enabled.
|
||||
/// This is typically driven by <see cref="StellaOps.AirGap.Policy.IEgressPolicy.IsSealed"/>.
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the exporter type to use in sealed mode.
|
||||
/// </summary>
|
||||
public SealedModeExporterType Exporter { get; set; } = SealedModeExporterType.File;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the file path for the file exporter.
|
||||
/// </summary>
|
||||
public string FilePath { get; set; } = "./logs/telemetry-sealed.otlp";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum bytes for the file exporter before rotation.
|
||||
/// Default is 10 MB.
|
||||
/// </summary>
|
||||
public long MaxBytes { get; set; } = 10_485_760;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum number of rotated files to keep.
|
||||
/// </summary>
|
||||
public int MaxRotatedFiles { get; set; } = 3;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum sampling percentage in sealed mode (0-100).
|
||||
/// Default is 10%.
|
||||
/// </summary>
|
||||
public int MaxSamplingPercent { get; set; } = 10;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to force scrubbing regardless of default settings.
|
||||
/// </summary>
|
||||
public bool ForceScrub { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to suppress exemplars in sealed mode.
|
||||
/// </summary>
|
||||
public bool SuppressExemplars { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the tag name for sealed mode indicator.
|
||||
/// </summary>
|
||||
public string SealedTagName { get; set; } = "sealed";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to add scrubbed indicator tag.
|
||||
/// </summary>
|
||||
public bool AddScrubbedTag { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets additional tags to add in sealed mode.
|
||||
/// </summary>
|
||||
public Dictionary<string, string> AdditionalTags { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum clock skew threshold before warning.
|
||||
/// Default is 500ms.
|
||||
/// </summary>
|
||||
public TimeSpan ClockSkewThreshold { get; set; } = TimeSpan.FromMilliseconds(500);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether incident mode can override the sampling ceiling.
|
||||
/// </summary>
|
||||
public bool AllowIncidentModeOverride { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the required file permissions (Unix only).
|
||||
/// Default is 0600 (owner read/write only).
|
||||
/// </summary>
|
||||
public UnixFileMode FilePermissions { get; set; } = UnixFileMode.UserRead | UnixFileMode.UserWrite;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to fail startup if the file path has insecure permissions.
|
||||
/// </summary>
|
||||
public bool FailOnInsecurePermissions { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Validates the options and returns any validation errors.
|
||||
/// </summary>
|
||||
public List<string> Validate()
|
||||
{
|
||||
var errors = new List<string>();
|
||||
|
||||
if (MaxSamplingPercent < 0 || MaxSamplingPercent > 100)
|
||||
{
|
||||
errors.Add($"MaxSamplingPercent ({MaxSamplingPercent}) must be between 0 and 100");
|
||||
}
|
||||
|
||||
if (MaxBytes <= 0)
|
||||
{
|
||||
errors.Add("MaxBytes must be positive");
|
||||
}
|
||||
|
||||
if (MaxRotatedFiles < 0)
|
||||
{
|
||||
errors.Add("MaxRotatedFiles cannot be negative");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(FilePath) && Exporter == SealedModeExporterType.File)
|
||||
{
|
||||
errors.Add("FilePath is required when Exporter is File");
|
||||
}
|
||||
|
||||
if (ClockSkewThreshold <= TimeSpan.Zero)
|
||||
{
|
||||
errors.Add("ClockSkewThreshold must be positive");
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the effective sampling rate as a decimal (0.0-1.0).
|
||||
/// </summary>
|
||||
/// <param name="incidentModeActive">Whether incident mode is active.</param>
|
||||
/// <param name="incidentSamplingRate">The sampling rate requested by incident mode.</param>
|
||||
/// <returns>The effective sampling rate clamped to sealed mode limits.</returns>
|
||||
public double GetEffectiveSamplingRate(bool incidentModeActive, double incidentSamplingRate)
|
||||
{
|
||||
var maxRate = MaxSamplingPercent / 100.0;
|
||||
|
||||
if (incidentModeActive && AllowIncidentModeOverride)
|
||||
{
|
||||
// Incident mode can override up to 100%
|
||||
return Math.Min(incidentSamplingRate, 1.0);
|
||||
}
|
||||
|
||||
return maxRate;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exporter type for sealed mode telemetry.
|
||||
/// </summary>
|
||||
public enum SealedModeExporterType
|
||||
{
|
||||
/// <summary>
|
||||
/// In-memory ring buffer exporter.
|
||||
/// </summary>
|
||||
Memory,
|
||||
|
||||
/// <summary>
|
||||
/// File-based OTLP exporter.
|
||||
/// </summary>
|
||||
File
|
||||
}
|
||||
@@ -0,0 +1,286 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.AirGap.Policy;
|
||||
|
||||
namespace StellaOps.Telemetry.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of <see cref="ISealedModeTelemetryService"/>.
|
||||
/// </summary>
|
||||
public sealed class SealedModeTelemetryService : ISealedModeTelemetryService, IDisposable
|
||||
{
|
||||
private static readonly ActivitySource ActivitySource = new("StellaOps.Telemetry.SealedMode", "1.0.0");
|
||||
private static readonly Meter Meter = new("StellaOps.Telemetry.SealedMode", "1.0.0");
|
||||
|
||||
private readonly IOptionsMonitor<SealedModeTelemetryOptions> _optionsMonitor;
|
||||
private readonly IEgressPolicy? _egressPolicy;
|
||||
private readonly IIncidentModeService? _incidentModeService;
|
||||
private readonly ILogger<SealedModeTelemetryService>? _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly object _lock = new();
|
||||
|
||||
private readonly Counter<long> _sealEventsCounter;
|
||||
private readonly Counter<long> _unsealEventsCounter;
|
||||
private readonly Counter<long> _driftEventsCounter;
|
||||
private readonly Counter<long> _blockedExportsCounter;
|
||||
|
||||
private bool _previousSealedState;
|
||||
private DateTimeOffset? _lastStateChangeTime;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool IsSealed => _egressPolicy?.IsSealed ?? _optionsMonitor.CurrentValue.Enabled;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public double EffectiveSamplingRate
|
||||
{
|
||||
get
|
||||
{
|
||||
if (!IsSealed)
|
||||
{
|
||||
return 1.0; // Full sampling when not sealed
|
||||
}
|
||||
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
var incidentActive = _incidentModeService?.IsActive ?? false;
|
||||
var incidentRate = incidentActive ? 1.0 : options.MaxSamplingPercent / 100.0;
|
||||
|
||||
return options.GetEffectiveSamplingRate(incidentActive, incidentRate);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool IsIncidentModeOverrideActive =>
|
||||
IsSealed &&
|
||||
(_incidentModeService?.IsActive ?? false) &&
|
||||
_optionsMonitor.CurrentValue.AllowIncidentModeOverride;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public event EventHandler<SealedModeStateChangedEventArgs>? StateChanged;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="SealedModeTelemetryService"/>.
|
||||
/// </summary>
|
||||
public SealedModeTelemetryService(
|
||||
IOptionsMonitor<SealedModeTelemetryOptions> optionsMonitor,
|
||||
IEgressPolicy? egressPolicy = null,
|
||||
IIncidentModeService? incidentModeService = null,
|
||||
ILogger<SealedModeTelemetryService>? logger = null,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
|
||||
_egressPolicy = egressPolicy;
|
||||
_incidentModeService = incidentModeService;
|
||||
_logger = logger;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
|
||||
// Initialize metrics
|
||||
_sealEventsCounter = Meter.CreateCounter<long>(
|
||||
"stellaops.telemetry.sealed.seal_events",
|
||||
unit: "{event}",
|
||||
description: "Count of seal events (entries into sealed mode)");
|
||||
|
||||
_unsealEventsCounter = Meter.CreateCounter<long>(
|
||||
"stellaops.telemetry.sealed.unseal_events",
|
||||
unit: "{event}",
|
||||
description: "Count of unseal events (exits from sealed mode)");
|
||||
|
||||
_driftEventsCounter = Meter.CreateCounter<long>(
|
||||
"stellaops.telemetry.sealed.drift_events",
|
||||
unit: "{event}",
|
||||
description: "Count of drift events when external export was blocked");
|
||||
|
||||
_blockedExportsCounter = Meter.CreateCounter<long>(
|
||||
"stellaops.telemetry.sealed.blocked_exports",
|
||||
unit: "{request}",
|
||||
description: "Count of blocked external export requests");
|
||||
|
||||
_previousSealedState = IsSealed;
|
||||
|
||||
// Monitor for state changes
|
||||
if (_egressPolicy is null)
|
||||
{
|
||||
_optionsMonitor.OnChange(OnOptionsChanged);
|
||||
}
|
||||
}
|
||||
|
||||
private void OnOptionsChanged(SealedModeTelemetryOptions options, string? name)
|
||||
{
|
||||
var currentSealed = options.Enabled;
|
||||
bool stateChanged;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
stateChanged = currentSealed != _previousSealedState;
|
||||
if (stateChanged)
|
||||
{
|
||||
_previousSealedState = currentSealed;
|
||||
_lastStateChangeTime = _timeProvider.GetUtcNow();
|
||||
}
|
||||
}
|
||||
|
||||
if (stateChanged)
|
||||
{
|
||||
if (currentSealed)
|
||||
{
|
||||
RecordSealEvent("Configuration change", "system");
|
||||
}
|
||||
else
|
||||
{
|
||||
RecordUnsealEvent("Configuration change", "system");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IReadOnlyDictionary<string, string> GetSealedModeTags()
|
||||
{
|
||||
if (!IsSealed)
|
||||
{
|
||||
return new Dictionary<string, string>();
|
||||
}
|
||||
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
var tags = new Dictionary<string, string>
|
||||
{
|
||||
[options.SealedTagName] = "true"
|
||||
};
|
||||
|
||||
if (options.AddScrubbedTag && options.ForceScrub)
|
||||
{
|
||||
tags["scrubbed"] = "true";
|
||||
}
|
||||
|
||||
if (IsIncidentModeOverrideActive)
|
||||
{
|
||||
tags["incident_override"] = "true";
|
||||
}
|
||||
|
||||
foreach (var (key, value) in options.AdditionalTags)
|
||||
{
|
||||
tags[key] = value;
|
||||
}
|
||||
|
||||
return tags;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool IsExternalExportAllowed(Uri endpoint)
|
||||
{
|
||||
if (!IsSealed)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
_blockedExportsCounter.Add(1, new KeyValuePair<string, object?>("endpoint_host", endpoint.Host));
|
||||
|
||||
_logger?.LogDebug(
|
||||
"External export to {Endpoint} blocked in sealed mode",
|
||||
endpoint);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public SealedModeExporterConfig? GetLocalExporterConfig()
|
||||
{
|
||||
if (!IsSealed)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var options = _optionsMonitor.CurrentValue;
|
||||
return new SealedModeExporterConfig
|
||||
{
|
||||
Type = options.Exporter,
|
||||
FilePath = options.FilePath,
|
||||
MaxBytes = options.MaxBytes,
|
||||
MaxRotatedFiles = options.MaxRotatedFiles
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void RecordSealEvent(string? reason = null, string? actor = null)
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
using var activity = ActivitySource.StartActivity("SealMode", ActivityKind.Internal);
|
||||
activity?.SetTag("sealed.reason", reason ?? "unspecified");
|
||||
activity?.SetTag("sealed.actor", actor ?? "unknown");
|
||||
activity?.SetTag("sealed.timestamp", now.ToString("O"));
|
||||
|
||||
_sealEventsCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("reason", reason ?? "unspecified"),
|
||||
new KeyValuePair<string, object?>("actor", actor ?? "unknown"));
|
||||
|
||||
_logger?.LogInformation(
|
||||
"Sealed mode activated. Reason: {Reason}, Actor: {Actor}, Timestamp: {Timestamp}",
|
||||
reason ?? "unspecified",
|
||||
actor ?? "unknown",
|
||||
now);
|
||||
|
||||
StateChanged?.Invoke(this, new SealedModeStateChangedEventArgs
|
||||
{
|
||||
IsSealed = true,
|
||||
Timestamp = now,
|
||||
Reason = reason,
|
||||
Actor = actor
|
||||
});
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void RecordUnsealEvent(string? reason = null, string? actor = null)
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
using var activity = ActivitySource.StartActivity("UnsealMode", ActivityKind.Internal);
|
||||
activity?.SetTag("sealed.reason", reason ?? "unspecified");
|
||||
activity?.SetTag("sealed.actor", actor ?? "unknown");
|
||||
activity?.SetTag("sealed.timestamp", now.ToString("O"));
|
||||
|
||||
_unsealEventsCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("reason", reason ?? "unspecified"),
|
||||
new KeyValuePair<string, object?>("actor", actor ?? "unknown"));
|
||||
|
||||
_logger?.LogInformation(
|
||||
"Sealed mode deactivated. Reason: {Reason}, Actor: {Actor}, Timestamp: {Timestamp}",
|
||||
reason ?? "unspecified",
|
||||
actor ?? "unknown",
|
||||
now);
|
||||
|
||||
StateChanged?.Invoke(this, new SealedModeStateChangedEventArgs
|
||||
{
|
||||
IsSealed = false,
|
||||
Timestamp = now,
|
||||
Reason = reason,
|
||||
Actor = actor
|
||||
});
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void RecordDriftEvent(Uri endpoint, TelemetrySignal signal)
|
||||
{
|
||||
using var activity = ActivitySource.StartActivity("SealedModeDrift", ActivityKind.Internal);
|
||||
activity?.SetTag("drift.endpoint", endpoint.ToString());
|
||||
activity?.SetTag("drift.signal", signal.ToString());
|
||||
activity?.SetTag("drift.timestamp", _timeProvider.GetUtcNow().ToString("O"));
|
||||
|
||||
_driftEventsCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("endpoint_host", endpoint.Host),
|
||||
new KeyValuePair<string, object?>("signal", signal.ToString()));
|
||||
|
||||
_logger?.LogWarning(
|
||||
"Telemetry drift detected: external {Signal} export to {Endpoint} blocked in sealed mode",
|
||||
signal,
|
||||
endpoint);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Dispose()
|
||||
{
|
||||
// Cleanup if needed
|
||||
}
|
||||
}
|
||||
@@ -83,6 +83,71 @@ public static class TelemetryServiceCollectionExtensions
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers incident mode services for toggling enhanced telemetry during incidents.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection to mutate.</param>
|
||||
/// <param name="configuration">Optional configuration section binding.</param>
|
||||
/// <param name="configureOptions">Optional options configuration.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddIncidentMode(
|
||||
this IServiceCollection services,
|
||||
IConfiguration? configuration = null,
|
||||
Action<IncidentModeOptions>? configureOptions = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var optionsBuilder = services.AddOptions<IncidentModeOptions>();
|
||||
|
||||
if (configuration is not null)
|
||||
{
|
||||
optionsBuilder.Bind(configuration.GetSection(IncidentModeOptions.SectionName));
|
||||
}
|
||||
|
||||
if (configureOptions is not null)
|
||||
{
|
||||
optionsBuilder.Configure(configureOptions);
|
||||
}
|
||||
|
||||
services.TryAddSingleton<IncidentModeService>();
|
||||
services.TryAddSingleton<IIncidentModeService>(sp => sp.GetRequiredService<IncidentModeService>());
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers sealed-mode telemetry services.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection to mutate.</param>
|
||||
/// <param name="configuration">Optional configuration section binding.</param>
|
||||
/// <param name="configureOptions">Optional options configuration.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddSealedModeTelemetry(
|
||||
this IServiceCollection services,
|
||||
IConfiguration? configuration = null,
|
||||
Action<SealedModeTelemetryOptions>? configureOptions = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var optionsBuilder = services.AddOptions<SealedModeTelemetryOptions>();
|
||||
|
||||
if (configuration is not null)
|
||||
{
|
||||
optionsBuilder.Bind(configuration.GetSection(SealedModeTelemetryOptions.SectionName));
|
||||
}
|
||||
|
||||
if (configureOptions is not null)
|
||||
{
|
||||
optionsBuilder.Configure(configureOptions);
|
||||
}
|
||||
|
||||
services.TryAddSingleton<SealedModeTelemetryService>();
|
||||
services.TryAddSingleton<ISealedModeTelemetryService>(sp => sp.GetRequiredService<SealedModeTelemetryService>());
|
||||
services.TryAddSingleton<SealedModeFileExporter>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the StellaOps telemetry stack with sealed-mode enforcement.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user