partly or unimplemented features - now implemented

This commit is contained in:
master
2026-02-09 08:53:51 +02:00
parent 1bf6bbf395
commit 4bdc298ec1
674 changed files with 90194 additions and 2271 deletions

View File

@@ -0,0 +1,327 @@
using StellaOps.Telemetry.Core;
using StellaOps.TestKit;
namespace StellaOps.Telemetry.Core.Tests;
public sealed class DoraMetricsServiceTests : IDisposable
{
private readonly DoraMetrics _metrics;
private readonly InMemoryDoraMetricsService _service;
public DoraMetricsServiceTests()
{
_metrics = new DoraMetrics();
_service = new InMemoryDoraMetricsService(_metrics);
}
public void Dispose() => _metrics.Dispose();
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task RecordDeploymentAsync_StoresDeployment()
{
var deployment = CreateDeployment("deploy-001", DoraDeploymentOutcome.Success);
await _service.RecordDeploymentAsync(deployment);
var deployments = await _service.GetDeploymentsAsync(
"acme", null,
DateTimeOffset.UtcNow.AddDays(-1),
DateTimeOffset.UtcNow.AddDays(1)).ToListAsync();
Assert.Single(deployments);
Assert.Equal("deploy-001", deployments[0].DeploymentId);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task RecordIncidentAsync_StoresIncident()
{
var incident = CreateIncident("inc-001", isResolved: false);
await _service.RecordIncidentAsync(incident);
var incidents = await _service.GetIncidentsAsync(
"acme", null,
DateTimeOffset.UtcNow.AddDays(-1),
DateTimeOffset.UtcNow.AddDays(1)).ToListAsync();
Assert.Single(incidents);
Assert.Equal("inc-001", incidents[0].IncidentId);
Assert.True(incidents[0].IsOpen);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task ResolveIncidentAsync_UpdatesIncident()
{
var incident = CreateIncident("inc-002", isResolved: false);
await _service.RecordIncidentAsync(incident);
var resolveTime = DateTimeOffset.UtcNow;
await _service.ResolveIncidentAsync("acme", "inc-002", resolveTime);
var incidents = await _service.GetIncidentsAsync(
"acme", null,
DateTimeOffset.UtcNow.AddDays(-1),
DateTimeOffset.UtcNow.AddDays(1)).ToListAsync();
Assert.Single(incidents);
Assert.False(incidents[0].IsOpen);
Assert.Equal(resolveTime, incidents[0].ResolvedAt);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetSummaryAsync_CalculatesDeploymentFrequency()
{
// Add 10 deployments over 10 days (1 per day)
var baseTime = DateTimeOffset.UtcNow;
for (int i = 0; i < 10; i++)
{
var deployment = new DoraDeploymentEvent(
DeploymentId: $"deploy-{i:000}",
TenantId: "acme",
Environment: "production",
CommitSha: $"sha{i}",
CommitTimestamp: baseTime.AddDays(-10 + i).AddHours(-1),
DeploymentTimestamp: baseTime.AddDays(-10 + i),
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 60000);
await _service.RecordDeploymentAsync(deployment);
}
var summary = await _service.GetSummaryAsync(
"acme", null,
baseTime.AddDays(-10),
baseTime);
Assert.Equal(10, summary.DeploymentCount);
Assert.Equal(1.0, summary.DeploymentFrequencyPerDay, precision: 1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetSummaryAsync_CalculatesChangeFailureRate()
{
var baseTime = DateTimeOffset.UtcNow;
// 7 successful, 3 failures = 30% CFR
for (int i = 0; i < 7; i++)
{
await _service.RecordDeploymentAsync(CreateDeployment($"success-{i}", DoraDeploymentOutcome.Success));
}
for (int i = 0; i < 3; i++)
{
await _service.RecordDeploymentAsync(CreateDeployment($"rollback-{i}", DoraDeploymentOutcome.Rollback));
}
var summary = await _service.GetSummaryAsync(
"acme", null,
baseTime.AddDays(-1),
baseTime.AddDays(1));
Assert.Equal(10, summary.DeploymentCount);
Assert.Equal(7, summary.SuccessfulDeployments);
Assert.Equal(3, summary.FailedDeployments);
Assert.Equal(30.0, summary.ChangeFailureRatePercent, precision: 1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetSummaryAsync_CalculatesMedianLeadTime()
{
var baseTime = DateTimeOffset.UtcNow;
// Lead times: 1h, 2h, 3h, 4h, 5h -> median = 3h
for (int i = 1; i <= 5; i++)
{
var deployment = new DoraDeploymentEvent(
DeploymentId: $"deploy-{i}",
TenantId: "acme",
Environment: "production",
CommitSha: $"sha{i}",
CommitTimestamp: baseTime.AddHours(-i),
DeploymentTimestamp: baseTime,
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 30000);
await _service.RecordDeploymentAsync(deployment);
}
var summary = await _service.GetSummaryAsync(
"acme", null,
baseTime.AddDays(-1),
baseTime.AddDays(1));
Assert.Equal(3.0, summary.MedianLeadTimeHours, precision: 1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetSummaryAsync_CalculatesMTTR()
{
var baseTime = DateTimeOffset.UtcNow;
// Recovery times: 1h, 2h, 3h -> mean = 2h
for (int i = 1; i <= 3; i++)
{
var incident = new DoraIncidentEvent(
IncidentId: $"inc-{i}",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.High,
StartedAt: baseTime.AddHours(-i - 1),
ResolvedAt: baseTime.AddHours(-1));
await _service.RecordIncidentAsync(incident);
}
var summary = await _service.GetSummaryAsync(
"acme", null,
baseTime.AddDays(-1),
baseTime.AddDays(1));
Assert.Equal(2.0, summary.MeanTimeToRecoveryHours, precision: 1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetSummaryAsync_ClassifiesPerformanceLevel()
{
var baseTime = DateTimeOffset.UtcNow;
// Add elite-level deployments (high frequency, low lead time, low CFR)
for (int i = 0; i < 30; i++)
{
var deployment = new DoraDeploymentEvent(
DeploymentId: $"deploy-{i:000}",
TenantId: "acme",
Environment: "production",
CommitSha: $"sha{i}",
CommitTimestamp: baseTime.AddDays(-30 + i).AddMinutes(-30), // 30 min lead time
DeploymentTimestamp: baseTime.AddDays(-30 + i),
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 30000);
await _service.RecordDeploymentAsync(deployment);
}
// Add one resolved incident with quick recovery
var incident = new DoraIncidentEvent(
IncidentId: "inc-1",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.High,
StartedAt: baseTime.AddMinutes(-30),
ResolvedAt: baseTime);
await _service.RecordIncidentAsync(incident);
var summary = await _service.GetSummaryAsync(
"acme", null,
baseTime.AddDays(-30),
baseTime);
Assert.Equal(DoraPerformanceLevel.Elite, summary.PerformanceLevel);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetDeploymentsAsync_FiltersbyEnvironment()
{
await _service.RecordDeploymentAsync(CreateDeployment("prod-1", DoraDeploymentOutcome.Success, "production"));
await _service.RecordDeploymentAsync(CreateDeployment("stage-1", DoraDeploymentOutcome.Success, "staging"));
await _service.RecordDeploymentAsync(CreateDeployment("prod-2", DoraDeploymentOutcome.Success, "production"));
var prodDeployments = await _service.GetDeploymentsAsync(
"acme", "production",
DateTimeOffset.UtcNow.AddDays(-1),
DateTimeOffset.UtcNow.AddDays(1)).ToListAsync();
Assert.Equal(2, prodDeployments.Count);
Assert.All(prodDeployments, d => Assert.Equal("production", d.Environment));
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetIncidentsAsync_ExcludesOpenWhenRequested()
{
await _service.RecordIncidentAsync(CreateIncident("open-1", isResolved: false));
await _service.RecordIncidentAsync(CreateIncident("resolved-1", isResolved: true));
await _service.RecordIncidentAsync(CreateIncident("open-2", isResolved: false));
var resolvedOnly = await _service.GetIncidentsAsync(
"acme", null,
DateTimeOffset.UtcNow.AddDays(-1),
DateTimeOffset.UtcNow.AddDays(1),
includeOpen: false).ToListAsync();
Assert.Single(resolvedOnly);
Assert.False(resolvedOnly[0].IsOpen);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task TenantIsolation_DeploymentsIsolatedByTenant()
{
var deployment1 = CreateDeployment("tenant1-deploy", DoraDeploymentOutcome.Success, tenant: "tenant1");
var deployment2 = CreateDeployment("tenant2-deploy", DoraDeploymentOutcome.Success, tenant: "tenant2");
await _service.RecordDeploymentAsync(deployment1);
await _service.RecordDeploymentAsync(deployment2);
var tenant1Deployments = await _service.GetDeploymentsAsync(
"tenant1", null,
DateTimeOffset.UtcNow.AddDays(-1),
DateTimeOffset.UtcNow.AddDays(1)).ToListAsync();
Assert.Single(tenant1Deployments);
Assert.Equal("tenant1-deploy", tenant1Deployments[0].DeploymentId);
}
private static DoraDeploymentEvent CreateDeployment(
string id,
DoraDeploymentOutcome outcome,
string environment = "production",
string tenant = "acme")
{
return new DoraDeploymentEvent(
DeploymentId: id,
TenantId: tenant,
Environment: environment,
CommitSha: $"sha-{id}",
CommitTimestamp: DateTimeOffset.UtcNow.AddHours(-1),
DeploymentTimestamp: DateTimeOffset.UtcNow,
Outcome: outcome,
DurationMs: 60000);
}
private static DoraIncidentEvent CreateIncident(
string id,
bool isResolved,
string tenant = "acme")
{
return new DoraIncidentEvent(
IncidentId: id,
TenantId: tenant,
Environment: "production",
Severity: DoraIncidentSeverity.High,
StartedAt: DateTimeOffset.UtcNow.AddHours(-2),
ResolvedAt: isResolved ? DateTimeOffset.UtcNow : null);
}
}
internal static class AsyncEnumerableExtensions
{
public static async Task<List<T>> ToListAsync<T>(this IAsyncEnumerable<T> source)
{
var list = new List<T>();
await foreach (var item in source)
{
list.Add(item);
}
return list;
}
}

View File

@@ -0,0 +1,266 @@
using System.Diagnostics.Metrics;
using StellaOps.Telemetry.Core;
using StellaOps.TestKit;
namespace StellaOps.Telemetry.Core.Tests;
public sealed class DoraMetricsTests : IDisposable
{
private readonly MeterListener _listener;
private readonly List<RecordedMeasurement> _measurements = [];
public DoraMetricsTests()
{
_listener = new MeterListener();
_listener.InstrumentPublished = (instrument, listener) =>
{
if (instrument.Meter.Name == DoraMetrics.MeterName)
{
listener.EnableMeasurementEvents(instrument);
}
};
_listener.SetMeasurementEventCallback<double>((instrument, measurement, tags, state) =>
{
_measurements.Add(new RecordedMeasurement(instrument.Name, measurement, tags.ToArray()));
});
_listener.SetMeasurementEventCallback<long>((instrument, measurement, tags, state) =>
{
_measurements.Add(new RecordedMeasurement(instrument.Name, measurement, tags.ToArray()));
});
_listener.Start();
}
public void Dispose() => _listener.Dispose();
[Trait("Category", TestCategories.Unit)]
[Fact]
public void RecordDeployment_WithSuccessfulDeployment_RecordsMetrics()
{
using var metrics = new DoraMetrics();
var deployment = new DoraDeploymentEvent(
DeploymentId: "deploy-001",
TenantId: "acme",
Environment: "production",
CommitSha: "abc123",
CommitTimestamp: DateTimeOffset.UtcNow.AddHours(-2),
DeploymentTimestamp: DateTimeOffset.UtcNow,
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 120_000);
metrics.RecordDeployment(deployment);
Assert.Contains(_measurements, m => m.Name == "dora_deployments_total" && m.Value is long v && v == 1);
Assert.Contains(_measurements, m => m.Name == "dora_deployment_success_total" && m.Value is long v && v == 1);
Assert.Contains(_measurements, m => m.Name == "dora_deployment_duration_seconds");
Assert.Contains(_measurements, m => m.Name == "dora_lead_time_hours");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void RecordDeployment_WithRollback_RecordsFailureMetrics()
{
using var metrics = new DoraMetrics();
var deployment = new DoraDeploymentEvent(
DeploymentId: "deploy-002",
TenantId: "acme",
Environment: "production",
CommitSha: "def456",
CommitTimestamp: DateTimeOffset.UtcNow.AddDays(-1),
DeploymentTimestamp: DateTimeOffset.UtcNow,
Outcome: DoraDeploymentOutcome.Rollback,
DurationMs: 60_000);
metrics.RecordDeployment(deployment);
Assert.Contains(_measurements, m => m.Name == "dora_deployments_total" && m.Value is long v && v == 1);
Assert.Contains(_measurements, m => m.Name == "dora_deployment_failure_total" && m.Value is long v && v == 1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void RecordDeployment_ExceedsLeadTimeSlo_RecordsSloBreak()
{
var options = new DoraMetricsOptions { LeadTimeSloHours = 1.0 };
using var metrics = new DoraMetrics(options);
var deployment = new DoraDeploymentEvent(
DeploymentId: "deploy-003",
TenantId: "acme",
Environment: "production",
CommitSha: "ghi789",
CommitTimestamp: DateTimeOffset.UtcNow.AddDays(-2), // 48 hours ago
DeploymentTimestamp: DateTimeOffset.UtcNow,
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 30_000);
metrics.RecordDeployment(deployment);
Assert.Contains(_measurements, m => m.Name == "dora_slo_breach_total" && m.Value is long v && v == 1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void RecordIncidentStarted_TracksIncidentCount()
{
using var metrics = new DoraMetrics();
var incident = new DoraIncidentEvent(
IncidentId: "inc-001",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.High,
StartedAt: DateTimeOffset.UtcNow,
ResolvedAt: null);
metrics.RecordIncidentStarted(incident);
Assert.Contains(_measurements, m => m.Name == "dora_incidents_total" && m.Value is long v && v == 1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void RecordIncidentResolved_TracksTimeToRecovery()
{
using var metrics = new DoraMetrics();
var incident = new DoraIncidentEvent(
IncidentId: "inc-002",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.Critical,
StartedAt: DateTimeOffset.UtcNow.AddHours(-2),
ResolvedAt: DateTimeOffset.UtcNow);
metrics.RecordIncidentResolved(incident);
Assert.Contains(_measurements, m => m.Name == "dora_incidents_resolved_total" && m.Value is long v && v == 1);
Assert.Contains(_measurements, m => m.Name == "dora_time_to_recovery_hours");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void RecordIncidentResolved_ExceedsMttrSlo_RecordsSloBreak()
{
var options = new DoraMetricsOptions { MttrSloHours = 0.5 };
using var metrics = new DoraMetrics(options);
var incident = new DoraIncidentEvent(
IncidentId: "inc-003",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.High,
StartedAt: DateTimeOffset.UtcNow.AddHours(-2),
ResolvedAt: DateTimeOffset.UtcNow);
metrics.RecordIncidentResolved(incident);
Assert.Contains(_measurements, m => m.Name == "dora_slo_breach_total" && m.Value is long v && v == 1);
}
[Trait("Category", TestCategories.Unit)]
[Theory]
[InlineData(2.0, 12.0, 10.0, 0.5, DoraPerformanceLevel.Elite)]
[InlineData(0.2, 100.0, 20.0, 20.0, DoraPerformanceLevel.High)]
[InlineData(0.05, 2000.0, 40.0, 100.0, DoraPerformanceLevel.Medium)]
[InlineData(0.01, 5000.0, 60.0, 200.0, DoraPerformanceLevel.Low)]
[InlineData(0.0, 0.0, 0.0, 0.0, DoraPerformanceLevel.Unknown)]
public void ClassifyPerformance_ReturnsCorrectLevel(
double deploymentFrequency,
double leadTimeHours,
double cfrPercent,
double mttrHours,
DoraPerformanceLevel expectedLevel)
{
var result = DoraMetrics.ClassifyPerformance(
deploymentFrequency,
leadTimeHours,
cfrPercent,
mttrHours);
Assert.Equal(expectedLevel, result);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void DoraDeploymentEvent_LeadTime_CalculatesCorrectly()
{
var commitTime = new DateTimeOffset(2025, 1, 15, 10, 0, 0, TimeSpan.Zero);
var deployTime = new DateTimeOffset(2025, 1, 15, 14, 30, 0, TimeSpan.Zero);
var deployment = new DoraDeploymentEvent(
DeploymentId: "test",
TenantId: "acme",
Environment: "prod",
CommitSha: "abc",
CommitTimestamp: commitTime,
DeploymentTimestamp: deployTime,
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 1000);
Assert.Equal(TimeSpan.FromHours(4.5), deployment.LeadTime);
}
[Trait("Category", TestCategories.Unit)]
[Theory]
[InlineData(DoraDeploymentOutcome.Success, false)]
[InlineData(DoraDeploymentOutcome.Rollback, true)]
[InlineData(DoraDeploymentOutcome.Hotfix, true)]
[InlineData(DoraDeploymentOutcome.Failed, true)]
[InlineData(DoraDeploymentOutcome.Cancelled, false)]
public void DoraDeploymentEvent_IsFailure_ReturnsCorrectValue(
DoraDeploymentOutcome outcome,
bool expectedIsFailure)
{
var deployment = new DoraDeploymentEvent(
DeploymentId: "test",
TenantId: "acme",
Environment: "prod",
CommitSha: "abc",
CommitTimestamp: DateTimeOffset.UtcNow,
DeploymentTimestamp: DateTimeOffset.UtcNow,
Outcome: outcome,
DurationMs: 1000);
Assert.Equal(expectedIsFailure, deployment.IsFailure);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void DoraIncidentEvent_TimeToRecovery_ReturnsNullWhenOpen()
{
var incident = new DoraIncidentEvent(
IncidentId: "test",
TenantId: "acme",
Environment: "prod",
Severity: DoraIncidentSeverity.High,
StartedAt: DateTimeOffset.UtcNow,
ResolvedAt: null);
Assert.Null(incident.TimeToRecovery);
Assert.True(incident.IsOpen);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void DoraIncidentEvent_TimeToRecovery_CalculatesWhenResolved()
{
var startTime = new DateTimeOffset(2025, 1, 15, 10, 0, 0, TimeSpan.Zero);
var resolveTime = new DateTimeOffset(2025, 1, 15, 11, 30, 0, TimeSpan.Zero);
var incident = new DoraIncidentEvent(
IncidentId: "test",
TenantId: "acme",
Environment: "prod",
Severity: DoraIncidentSeverity.High,
StartedAt: startTime,
ResolvedAt: resolveTime);
Assert.Equal(TimeSpan.FromHours(1.5), incident.TimeToRecovery);
Assert.False(incident.IsOpen);
}
private sealed record RecordedMeasurement(string Name, object Value, KeyValuePair<string, object?>[] Tags);
}

View File

@@ -0,0 +1,237 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Telemetry.Core;
namespace StellaOps.Telemetry.Core.Tests;
public sealed class OutcomeAnalyticsServiceTests : IDisposable
{
private static readonly DateTimeOffset BaseTime = new(2026, 2, 1, 0, 0, 0, TimeSpan.Zero);
private readonly DoraMetrics _metrics;
private readonly InMemoryDoraMetricsService _doraMetricsService;
private readonly DoraOutcomeAnalyticsService _outcomeAnalyticsService;
public OutcomeAnalyticsServiceTests()
{
_metrics = new DoraMetrics();
_doraMetricsService = new InMemoryDoraMetricsService(_metrics);
_outcomeAnalyticsService = new DoraOutcomeAnalyticsService(_doraMetricsService);
}
public void Dispose() => _metrics.Dispose();
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetExecutiveReportAsync_ComputesAttributionAndCohorts()
{
await SeedDeterministicTelemetryAsync();
var report = await _outcomeAnalyticsService.GetExecutiveReportAsync(
tenantId: "acme",
environment: "production",
periodStart: BaseTime,
periodEnd: BaseTime.AddDays(4));
Assert.Equal(4, report.TotalDeployments);
Assert.Equal(2, report.FailedDeployments);
Assert.Equal(3, report.TotalIncidents);
Assert.Equal(2, report.ResolvedIncidents);
Assert.Equal(2, report.AcknowledgedIncidents);
Assert.Equal(0.38, report.MeanTimeToAcknowledgeHours);
Assert.Equal(2.5, report.MeanTimeToRecoveryHours);
Assert.Collection(report.DeploymentAttribution,
pipeline =>
{
Assert.Equal("pipeline-a", pipeline.PipelineId);
Assert.Equal(2, pipeline.DeploymentCount);
Assert.Equal(1, pipeline.FailedDeploymentCount);
Assert.Equal(50.0, pipeline.ChangeFailureRatePercent);
Assert.Equal(2.5, pipeline.MedianLeadTimeHours);
},
pipeline =>
{
Assert.Equal("pipeline-b", pipeline.PipelineId);
Assert.Equal(1, pipeline.DeploymentCount);
Assert.Equal(0, pipeline.FailedDeploymentCount);
Assert.Equal(0.0, pipeline.ChangeFailureRatePercent);
Assert.Equal(6.0, pipeline.MedianLeadTimeHours);
},
pipeline =>
{
Assert.Equal("unknown", pipeline.PipelineId);
Assert.Equal(1, pipeline.DeploymentCount);
Assert.Equal(1, pipeline.FailedDeploymentCount);
Assert.Equal(100.0, pipeline.ChangeFailureRatePercent);
Assert.Equal(6.0, pipeline.MedianLeadTimeHours);
});
Assert.Collection(report.IncidentAttribution,
critical =>
{
Assert.Equal(DoraIncidentSeverity.Critical, critical.Severity);
Assert.Equal(1, critical.IncidentCount);
Assert.Equal(1, critical.ResolvedIncidentCount);
Assert.Equal(0, critical.AcknowledgedIncidentCount);
Assert.Equal(0.0, critical.MeanTimeToAcknowledgeHours);
Assert.Equal(4.0, critical.MeanTimeToRecoveryHours);
},
high =>
{
Assert.Equal(DoraIncidentSeverity.High, high.Severity);
Assert.Equal(1, high.IncidentCount);
Assert.Equal(1, high.ResolvedIncidentCount);
Assert.Equal(1, high.AcknowledgedIncidentCount);
Assert.Equal(0.25, high.MeanTimeToAcknowledgeHours);
Assert.Equal(1.0, high.MeanTimeToRecoveryHours);
},
medium =>
{
Assert.Equal(DoraIncidentSeverity.Medium, medium.Severity);
Assert.Equal(1, medium.IncidentCount);
Assert.Equal(0, medium.ResolvedIncidentCount);
Assert.Equal(1, medium.AcknowledgedIncidentCount);
Assert.Equal(0.5, medium.MeanTimeToAcknowledgeHours);
Assert.Equal(0.0, medium.MeanTimeToRecoveryHours);
});
Assert.Equal(5, report.DailyCohorts.Count);
Assert.Equal(new DateOnly(2026, 2, 1), report.DailyCohorts[0].Day);
Assert.Equal(new DateOnly(2026, 2, 5), report.DailyCohorts[4].Day);
Assert.Equal(0, report.DailyCohorts[3].DeploymentCount);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public async Task GetExecutiveReportAsync_IsDeterministicAcrossRepeatedCalls()
{
await SeedDeterministicTelemetryAsync();
var first = await _outcomeAnalyticsService.GetExecutiveReportAsync(
tenantId: "acme",
environment: "production",
periodStart: BaseTime,
periodEnd: BaseTime.AddDays(4));
var second = await _outcomeAnalyticsService.GetExecutiveReportAsync(
tenantId: "acme",
environment: "production",
periodStart: BaseTime,
periodEnd: BaseTime.AddDays(4));
Assert.Equal(first.TenantId, second.TenantId);
Assert.Equal(first.Environment, second.Environment);
Assert.Equal(first.PeriodStart, second.PeriodStart);
Assert.Equal(first.PeriodEnd, second.PeriodEnd);
Assert.Equal(first.TotalDeployments, second.TotalDeployments);
Assert.Equal(first.FailedDeployments, second.FailedDeployments);
Assert.Equal(first.TotalIncidents, second.TotalIncidents);
Assert.Equal(first.ResolvedIncidents, second.ResolvedIncidents);
Assert.Equal(first.AcknowledgedIncidents, second.AcknowledgedIncidents);
Assert.Equal(first.MeanTimeToAcknowledgeHours, second.MeanTimeToAcknowledgeHours);
Assert.Equal(first.MeanTimeToRecoveryHours, second.MeanTimeToRecoveryHours);
Assert.Equal(first.DeploymentAttribution, second.DeploymentAttribution);
Assert.Equal(first.IncidentAttribution, second.IncidentAttribution);
Assert.Equal(first.DailyCohorts, second.DailyCohorts);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void AddDoraMetrics_RegistersOutcomeAnalyticsService()
{
var services = new ServiceCollection();
services.AddDoraMetrics();
using var serviceProvider = services.BuildServiceProvider();
var service = serviceProvider.GetService<IOutcomeAnalyticsService>();
Assert.NotNull(service);
}
private async Task SeedDeterministicTelemetryAsync()
{
var deployments = new[]
{
new DoraDeploymentEvent(
DeploymentId: "deploy-001",
TenantId: "acme",
Environment: "production",
CommitSha: "sha-001",
CommitTimestamp: BaseTime.AddHours(-1),
DeploymentTimestamp: BaseTime.AddHours(1),
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 30_000,
PipelineId: "pipeline-a"),
new DoraDeploymentEvent(
DeploymentId: "deploy-002",
TenantId: "acme",
Environment: "production",
CommitSha: "sha-002",
CommitTimestamp: BaseTime.AddDays(1).AddHours(-2),
DeploymentTimestamp: BaseTime.AddDays(1).AddHours(1),
Outcome: DoraDeploymentOutcome.Rollback,
DurationMs: 45_000,
PipelineId: "pipeline-a"),
new DoraDeploymentEvent(
DeploymentId: "deploy-003",
TenantId: "acme",
Environment: "production",
CommitSha: "sha-003",
CommitTimestamp: BaseTime.AddDays(1).AddHours(-4),
DeploymentTimestamp: BaseTime.AddDays(1).AddHours(2),
Outcome: DoraDeploymentOutcome.Success,
DurationMs: 32_000,
PipelineId: "pipeline-b"),
new DoraDeploymentEvent(
DeploymentId: "deploy-004",
TenantId: "acme",
Environment: "production",
CommitSha: "sha-004",
CommitTimestamp: BaseTime.AddDays(2).AddHours(-3),
DeploymentTimestamp: BaseTime.AddDays(2).AddHours(3),
Outcome: DoraDeploymentOutcome.Failed,
DurationMs: 52_000,
PipelineId: null),
};
foreach (var deployment in deployments)
{
await _doraMetricsService.RecordDeploymentAsync(deployment);
}
var incidents = new[]
{
new DoraIncidentEvent(
IncidentId: "inc-001",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.High,
StartedAt: BaseTime.AddDays(1).AddHours(10),
ResolvedAt: BaseTime.AddDays(1).AddHours(11),
AcknowledgedAt: BaseTime.AddDays(1).AddHours(10.25),
DeploymentId: "deploy-002"),
new DoraIncidentEvent(
IncidentId: "inc-002",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.Critical,
StartedAt: BaseTime.AddDays(2).AddHours(8),
ResolvedAt: BaseTime.AddDays(2).AddHours(12),
DeploymentId: "deploy-004"),
new DoraIncidentEvent(
IncidentId: "inc-003",
TenantId: "acme",
Environment: "production",
Severity: DoraIncidentSeverity.Medium,
StartedAt: BaseTime.AddDays(3).AddHours(9),
ResolvedAt: null,
AcknowledgedAt: BaseTime.AddDays(3).AddHours(9.5),
DeploymentId: "deploy-004"),
};
foreach (var incident in incidents)
{
await _doraMetricsService.RecordIncidentAsync(incident);
}
}
}

View File

@@ -0,0 +1,304 @@
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.Telemetry.Core;
/// <summary>
/// OpenTelemetry-style metrics for DORA (DevOps Research and Assessment) metrics.
/// Tracks the four key metrics: Deployment Frequency, Lead Time for Changes,
/// Change Failure Rate, and Mean Time to Recovery (MTTR).
/// </summary>
public sealed class DoraMetrics : IDisposable
{
/// <summary>
/// Default meter name for DORA metrics.
/// </summary>
public const string MeterName = "StellaOps.DORA";
private readonly Meter _meter;
private readonly DoraMetricsOptions _options;
private bool _disposed;
// Deployment Frequency metrics
private readonly Counter<long> _deploymentCounter;
private readonly Histogram<double> _deploymentDurationHistogram;
// Lead Time for Changes metrics
private readonly Histogram<double> _leadTimeHistogram;
// Change Failure Rate metrics
private readonly Counter<long> _deploymentSuccessCounter;
private readonly Counter<long> _deploymentFailureCounter;
// MTTR metrics
private readonly Counter<long> _incidentCounter;
private readonly Counter<long> _incidentResolvedCounter;
private readonly Histogram<double> _timeToRecoveryHistogram;
// SLO breach tracking
private readonly Counter<long> _sloBreachCounter;
/// <summary>
/// Initializes a new instance of <see cref="DoraMetrics"/>.
/// </summary>
public DoraMetrics(DoraMetricsOptions? options = null)
{
_options = options ?? new DoraMetricsOptions();
_meter = new Meter(MeterName, _options.Version);
// Deployment Frequency
_deploymentCounter = _meter.CreateCounter<long>(
name: "dora_deployments_total",
unit: "{deployment}",
description: "Total number of deployments.");
_deploymentDurationHistogram = _meter.CreateHistogram<double>(
name: "dora_deployment_duration_seconds",
unit: "s",
description: "Duration of deployments in seconds.");
// Lead Time for Changes
_leadTimeHistogram = _meter.CreateHistogram<double>(
name: "dora_lead_time_hours",
unit: "h",
description: "Lead time from commit to deployment in hours.");
// Change Failure Rate
_deploymentSuccessCounter = _meter.CreateCounter<long>(
name: "dora_deployment_success_total",
unit: "{deployment}",
description: "Total number of successful deployments.");
_deploymentFailureCounter = _meter.CreateCounter<long>(
name: "dora_deployment_failure_total",
unit: "{deployment}",
description: "Total number of failed deployments (rollbacks, hotfixes, failures).");
// MTTR
_incidentCounter = _meter.CreateCounter<long>(
name: "dora_incidents_total",
unit: "{incident}",
description: "Total number of incidents.");
_incidentResolvedCounter = _meter.CreateCounter<long>(
name: "dora_incidents_resolved_total",
unit: "{incident}",
description: "Total number of resolved incidents.");
_timeToRecoveryHistogram = _meter.CreateHistogram<double>(
name: "dora_time_to_recovery_hours",
unit: "h",
description: "Time to recovery from incidents in hours.");
// SLO tracking
_sloBreachCounter = _meter.CreateCounter<long>(
name: "dora_slo_breach_total",
unit: "{breach}",
description: "Total number of DORA SLO breaches.");
}
/// <summary>
/// Records a deployment event.
/// </summary>
public void RecordDeployment(DoraDeploymentEvent deployment)
{
ArgumentNullException.ThrowIfNull(deployment);
var tags = new TagList
{
{ "tenant_id", deployment.TenantId },
{ "environment", deployment.Environment },
{ "outcome", deployment.Outcome.ToString().ToLowerInvariant() }
};
if (!string.IsNullOrEmpty(deployment.PipelineId))
{
tags.Add("pipeline_id", deployment.PipelineId);
}
// Record deployment count
_deploymentCounter.Add(1, tags);
// Record deployment duration
var durationSeconds = deployment.DurationMs / 1000.0;
_deploymentDurationHistogram.Record(durationSeconds, tags);
// Record lead time
var leadTimeHours = deployment.LeadTime.TotalHours;
_leadTimeHistogram.Record(leadTimeHours, tags);
// Track success/failure for CFR
if (deployment.IsFailure)
{
_deploymentFailureCounter.Add(1, tags);
}
else if (deployment.Outcome == DoraDeploymentOutcome.Success)
{
_deploymentSuccessCounter.Add(1, tags);
}
// Check SLO breaches
CheckDeploymentSlos(deployment);
}
/// <summary>
/// Records an incident start.
/// </summary>
public void RecordIncidentStarted(DoraIncidentEvent incident)
{
ArgumentNullException.ThrowIfNull(incident);
var tags = new TagList
{
{ "tenant_id", incident.TenantId },
{ "environment", incident.Environment },
{ "severity", incident.Severity.ToString().ToLowerInvariant() }
};
_incidentCounter.Add(1, tags);
}
/// <summary>
/// Records an incident resolution.
/// </summary>
public void RecordIncidentResolved(DoraIncidentEvent incident)
{
ArgumentNullException.ThrowIfNull(incident);
if (!incident.ResolvedAt.HasValue || !incident.TimeToRecovery.HasValue)
{
return;
}
var tags = new TagList
{
{ "tenant_id", incident.TenantId },
{ "environment", incident.Environment },
{ "severity", incident.Severity.ToString().ToLowerInvariant() }
};
_incidentResolvedCounter.Add(1, tags);
var mttrHours = incident.TimeToRecovery.Value.TotalHours;
_timeToRecoveryHistogram.Record(mttrHours, tags);
// Check MTTR SLO
if (mttrHours > _options.MttrSloHours)
{
var sloTags = new TagList
{
{ "tenant_id", incident.TenantId },
{ "environment", incident.Environment },
{ "severity", incident.Severity.ToString().ToLowerInvariant() },
{ "metric", "mttr" }
};
_sloBreachCounter.Add(1, sloTags);
}
}
private void CheckDeploymentSlos(DoraDeploymentEvent deployment)
{
// Lead time SLO check
var leadTimeHours = deployment.LeadTime.TotalHours;
if (leadTimeHours > _options.LeadTimeSloHours)
{
var sloTags = new TagList
{
{ "tenant_id", deployment.TenantId },
{ "environment", deployment.Environment },
{ "outcome", deployment.Outcome.ToString().ToLowerInvariant() },
{ "metric", "lead_time" }
};
if (!string.IsNullOrEmpty(deployment.PipelineId))
{
sloTags.Add("pipeline_id", deployment.PipelineId);
}
_sloBreachCounter.Add(1, sloTags);
}
}
/// <summary>
/// Records a deployment frequency SLO breach (typically calculated in batches).
/// </summary>
public void RecordDeploymentFrequencySloBreak(string tenantId, string environment, double actualFrequency)
{
var tags = new TagList
{
{ "tenant_id", tenantId },
{ "environment", environment },
{ "metric", "deployment_frequency" },
{ "actual_frequency", actualFrequency.ToString("F2") }
};
_sloBreachCounter.Add(1, tags);
}
/// <summary>
/// Records a change failure rate SLO breach (typically calculated in batches).
/// </summary>
public void RecordChangeFailureRateSloBreak(string tenantId, string environment, double actualRate)
{
var tags = new TagList
{
{ "tenant_id", tenantId },
{ "environment", environment },
{ "metric", "change_failure_rate" },
{ "actual_rate", actualRate.ToString("F2") }
};
_sloBreachCounter.Add(1, tags);
}
/// <summary>
/// Classifies the DORA performance level based on the four key metrics.
/// </summary>
public static DoraPerformanceLevel ClassifyPerformance(
double deploymentFrequencyPerDay,
double leadTimeHours,
double changeFailureRatePercent,
double mttrHours)
{
// Elite: On-demand (multiple per day), <1h lead time, <15% CFR, <1h MTTR
if (deploymentFrequencyPerDay >= 1.0 &&
leadTimeHours < 24 &&
changeFailureRatePercent < 15 &&
mttrHours < 1)
{
return DoraPerformanceLevel.Elite;
}
// High: Once per day to once per week, 1 day to 1 week lead time, 16-30% CFR, <1 day MTTR
if (deploymentFrequencyPerDay >= 0.14 && // ~1/week
leadTimeHours < 168 && // 1 week
changeFailureRatePercent <= 30 &&
mttrHours < 24)
{
return DoraPerformanceLevel.High;
}
// Medium: Once per week to once per month, 1-6 months lead time, <45% CFR, <1 week MTTR
if (deploymentFrequencyPerDay >= 0.033 && // ~1/month
leadTimeHours < 4320 && // ~6 months
changeFailureRatePercent <= 45 &&
mttrHours < 168) // 1 week
{
return DoraPerformanceLevel.Medium;
}
// Low: Everything else with some activity
if (deploymentFrequencyPerDay > 0)
{
return DoraPerformanceLevel.Low;
}
return DoraPerformanceLevel.Unknown;
}
/// <inheritdoc/>
public void Dispose()
{
if (_disposed) return;
_meter.Dispose();
_disposed = true;
}
}

View File

@@ -0,0 +1,245 @@
namespace StellaOps.Telemetry.Core;
/// <summary>
/// Options for DORA metrics collection and reporting.
/// </summary>
public sealed class DoraMetricsOptions
{
/// <summary>
/// Version string for the meter.
/// </summary>
public string Version { get; set; } = "1.0.0";
/// <summary>
/// Whether DORA metrics collection is enabled.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// SLO target for Lead Time for Changes in hours (default: 24 hours for Elite performers).
/// </summary>
public double LeadTimeSloHours { get; set; } = 24.0;
/// <summary>
/// SLO target for Deployment Frequency per day (default: 1 for Elite performers).
/// </summary>
public double DeploymentFrequencySloPerDay { get; set; } = 1.0;
/// <summary>
/// SLO target for Change Failure Rate as a percentage (default: 15% for Elite performers).
/// </summary>
public double ChangeFailureRateSloPercent { get; set; } = 15.0;
/// <summary>
/// SLO target for Mean Time to Recovery in hours (default: 1 hour for Elite performers).
/// </summary>
public double MttrSloHours { get; set; } = 1.0;
/// <summary>
/// Rolling window for calculating deployment frequency (in days).
/// </summary>
public int FrequencyWindowDays { get; set; } = 30;
/// <summary>
/// Rolling window for calculating change failure rate (in days).
/// </summary>
public int FailureRateWindowDays { get; set; } = 30;
}
/// <summary>
/// DORA performance classification based on the Four Keys metrics.
/// </summary>
public enum DoraPerformanceLevel
{
/// <summary>
/// Elite performers: On-demand deployments, &lt;1 hour lead time, &lt;15% CFR, &lt;1 hour MTTR.
/// </summary>
Elite = 4,
/// <summary>
/// High performers: Between once per day and once per week, 1-7 days lead time, 16-30% CFR, &lt;1 day MTTR.
/// </summary>
High = 3,
/// <summary>
/// Medium performers: Between once per week and once per month, 1-6 months lead time, ~45% CFR, &lt;1 week MTTR.
/// </summary>
Medium = 2,
/// <summary>
/// Low performers: Between once per month and once per six months, &gt;6 months lead time, ~64% CFR, &gt;6 months MTTR.
/// </summary>
Low = 1,
/// <summary>
/// Unknown or insufficient data to classify.
/// </summary>
Unknown = 0
}
/// <summary>
/// Type of deployment event for DORA tracking.
/// </summary>
public enum DoraDeploymentOutcome
{
/// <summary>
/// Successful deployment that did not require rollback or hotfix.
/// </summary>
Success = 0,
/// <summary>
/// Deployment that required a rollback.
/// </summary>
Rollback = 1,
/// <summary>
/// Deployment that required a hotfix.
/// </summary>
Hotfix = 2,
/// <summary>
/// Deployment that failed during execution.
/// </summary>
Failed = 3,
/// <summary>
/// Deployment was cancelled before completion.
/// </summary>
Cancelled = 4
}
/// <summary>
/// Incident severity levels for MTTR tracking.
/// </summary>
public enum DoraIncidentSeverity
{
/// <summary>
/// Critical incident affecting all users/services.
/// </summary>
Critical = 1,
/// <summary>
/// High severity incident affecting major functionality.
/// </summary>
High = 2,
/// <summary>
/// Medium severity incident affecting some users.
/// </summary>
Medium = 3,
/// <summary>
/// Low severity incident with minimal impact.
/// </summary>
Low = 4
}
/// <summary>
/// Record of a deployment event for DORA metrics.
/// </summary>
/// <param name="DeploymentId">Unique identifier for the deployment.</param>
/// <param name="TenantId">Tenant associated with the deployment.</param>
/// <param name="Environment">Target environment (e.g., production, staging).</param>
/// <param name="CommitSha">The commit SHA that was deployed.</param>
/// <param name="CommitTimestamp">When the commit was created.</param>
/// <param name="DeploymentTimestamp">When the deployment completed.</param>
/// <param name="Outcome">The outcome of the deployment.</param>
/// <param name="DurationMs">How long the deployment took in milliseconds.</param>
/// <param name="ArtifactDigest">The artifact digest that was deployed.</param>
/// <param name="PipelineId">The CI/CD pipeline that executed the deployment.</param>
public sealed record DoraDeploymentEvent(
string DeploymentId,
string TenantId,
string Environment,
string CommitSha,
DateTimeOffset CommitTimestamp,
DateTimeOffset DeploymentTimestamp,
DoraDeploymentOutcome Outcome,
long DurationMs,
string? ArtifactDigest = null,
string? PipelineId = null)
{
/// <summary>
/// Calculates the lead time for this deployment (time from commit to deployment).
/// </summary>
public TimeSpan LeadTime => DeploymentTimestamp - CommitTimestamp;
/// <summary>
/// Whether this deployment is considered a failure for CFR calculation.
/// </summary>
public bool IsFailure => Outcome is DoraDeploymentOutcome.Rollback
or DoraDeploymentOutcome.Hotfix
or DoraDeploymentOutcome.Failed;
}
/// <summary>
/// Record of an incident for MTTR tracking.
/// </summary>
/// <param name="IncidentId">Unique identifier for the incident.</param>
/// <param name="TenantId">Tenant associated with the incident.</param>
/// <param name="Environment">Environment where the incident occurred.</param>
/// <param name="Severity">The severity of the incident.</param>
/// <param name="StartedAt">When the incident was detected.</param>
/// <param name="ResolvedAt">When the incident was resolved (null if still open).</param>
/// <param name="AcknowledgedAt">When the incident was acknowledged (null if not yet acknowledged).</param>
/// <param name="DeploymentId">The deployment that caused the incident (if known).</param>
/// <param name="Description">Brief description of the incident.</param>
public sealed record DoraIncidentEvent(
string IncidentId,
string TenantId,
string Environment,
DoraIncidentSeverity Severity,
DateTimeOffset StartedAt,
DateTimeOffset? ResolvedAt,
DateTimeOffset? AcknowledgedAt = null,
string? DeploymentId = null,
string? Description = null)
{
/// <summary>
/// Calculates the time to acknowledge (null if not acknowledged).
/// </summary>
public TimeSpan? TimeToAcknowledge => AcknowledgedAt.HasValue
? AcknowledgedAt.Value - StartedAt
: null;
/// <summary>
/// Calculates the time to recovery (null if still open).
/// </summary>
public TimeSpan? TimeToRecovery => ResolvedAt.HasValue
? ResolvedAt.Value - StartedAt
: null;
/// <summary>
/// Whether the incident is still open.
/// </summary>
public bool IsOpen => !ResolvedAt.HasValue;
}
/// <summary>
/// Summary of DORA metrics for a tenant/environment over a time period.
/// </summary>
/// <param name="TenantId">The tenant ID.</param>
/// <param name="Environment">The environment (or null for all environments).</param>
/// <param name="PeriodStart">Start of the measurement period.</param>
/// <param name="PeriodEnd">End of the measurement period.</param>
/// <param name="DeploymentCount">Total number of deployments.</param>
/// <param name="SuccessfulDeployments">Number of successful deployments.</param>
/// <param name="FailedDeployments">Number of failed deployments (CFR numerator).</param>
/// <param name="DeploymentFrequencyPerDay">Average deployments per day.</param>
/// <param name="MedianLeadTimeHours">Median lead time for changes in hours.</param>
/// <param name="ChangeFailureRatePercent">Change failure rate as a percentage.</param>
/// <param name="MeanTimeToRecoveryHours">Mean time to recovery in hours.</param>
/// <param name="PerformanceLevel">Calculated DORA performance classification.</param>
public sealed record DoraSummary(
string TenantId,
string? Environment,
DateTimeOffset PeriodStart,
DateTimeOffset PeriodEnd,
int DeploymentCount,
int SuccessfulDeployments,
int FailedDeployments,
double DeploymentFrequencyPerDay,
double MedianLeadTimeHours,
double ChangeFailureRatePercent,
double MeanTimeToRecoveryHours,
DoraPerformanceLevel PerformanceLevel);

View File

@@ -0,0 +1,214 @@
namespace StellaOps.Telemetry.Core;
/// <summary>
/// Deterministic outcome analytics service backed by <see cref="IDoraMetricsService"/>.
/// </summary>
public sealed class DoraOutcomeAnalyticsService : IOutcomeAnalyticsService
{
private const string UnknownPipelineId = "unknown";
private readonly IDoraMetricsService _doraMetricsService;
public DoraOutcomeAnalyticsService(IDoraMetricsService doraMetricsService)
{
_doraMetricsService = doraMetricsService ?? throw new ArgumentNullException(nameof(doraMetricsService));
}
/// <inheritdoc />
public async Task<OutcomeExecutiveReport> GetExecutiveReportAsync(
string tenantId,
string? environment,
DateTimeOffset periodStart,
DateTimeOffset periodEnd,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
if (periodEnd < periodStart)
{
throw new ArgumentException("Period end must be greater than or equal to period start.", nameof(periodEnd));
}
var deployments = await ToListAsync(
_doraMetricsService.GetDeploymentsAsync(tenantId, environment, periodStart, periodEnd, cancellationToken),
cancellationToken);
var incidents = await ToListAsync(
_doraMetricsService.GetIncidentsAsync(tenantId, environment, periodStart, periodEnd, includeOpen: true, cancellationToken),
cancellationToken);
var totalDeployments = deployments.Count;
var failedDeployments = deployments.Count(static d => d.IsFailure);
var totalIncidents = incidents.Count;
var resolvedIncidents = incidents.Where(static i => !i.IsOpen).ToList();
var acknowledgedIncidents = incidents.Where(static i => i.TimeToAcknowledge.HasValue).ToList();
var meanTimeToAcknowledgeHours = CalculateMeanHours(acknowledgedIncidents
.Select(i => i.TimeToAcknowledge)
.Where(static t => t.HasValue)
.Select(static t => t!.Value));
var meanTimeToRecoveryHours = CalculateMeanHours(resolvedIncidents
.Select(i => i.TimeToRecovery)
.Where(static t => t.HasValue)
.Select(static t => t!.Value));
var deploymentAttribution = BuildDeploymentAttribution(deployments);
var incidentAttribution = BuildIncidentAttribution(incidents);
var dailyCohorts = BuildDailyCohorts(periodStart, periodEnd, deployments, resolvedIncidents);
return new OutcomeExecutiveReport(
TenantId: tenantId,
Environment: environment,
PeriodStart: periodStart,
PeriodEnd: periodEnd,
TotalDeployments: totalDeployments,
FailedDeployments: failedDeployments,
TotalIncidents: totalIncidents,
ResolvedIncidents: resolvedIncidents.Count,
AcknowledgedIncidents: acknowledgedIncidents.Count,
MeanTimeToAcknowledgeHours: meanTimeToAcknowledgeHours,
MeanTimeToRecoveryHours: meanTimeToRecoveryHours,
DeploymentAttribution: deploymentAttribution,
IncidentAttribution: incidentAttribution,
DailyCohorts: dailyCohorts);
}
private static IReadOnlyList<DeploymentAttributionSlice> BuildDeploymentAttribution(
IReadOnlyList<DoraDeploymentEvent> deployments)
{
return deployments
.GroupBy(static d => NormalizePipelineId(d.PipelineId), StringComparer.Ordinal)
.OrderBy(static g => g.Key, StringComparer.Ordinal)
.Select(static group =>
{
var events = group.OrderBy(static d => d.DeploymentTimestamp).ToList();
var deploymentCount = events.Count;
var failedDeploymentCount = events.Count(static d => d.IsFailure);
var failureRate = deploymentCount == 0
? 0
: Math.Round((failedDeploymentCount * 100.0) / deploymentCount, 2);
var medianLeadTimeHours = Math.Round(CalculateMedianHours(events.Select(static d => d.LeadTime.TotalHours)), 2);
return new DeploymentAttributionSlice(
PipelineId: group.Key,
DeploymentCount: deploymentCount,
FailedDeploymentCount: failedDeploymentCount,
ChangeFailureRatePercent: failureRate,
MedianLeadTimeHours: medianLeadTimeHours);
})
.ToList();
}
private static IReadOnlyList<IncidentAttributionSlice> BuildIncidentAttribution(
IReadOnlyList<DoraIncidentEvent> incidents)
{
return incidents
.GroupBy(static i => i.Severity)
.OrderBy(static g => g.Key)
.Select(static group =>
{
var events = group.OrderBy(static i => i.StartedAt).ToList();
var resolved = events.Where(static i => !i.IsOpen).ToList();
var acknowledged = events.Where(static i => i.TimeToAcknowledge.HasValue).ToList();
return new IncidentAttributionSlice(
Severity: group.Key,
IncidentCount: events.Count,
ResolvedIncidentCount: resolved.Count,
AcknowledgedIncidentCount: acknowledged.Count,
MeanTimeToAcknowledgeHours: CalculateMeanHours(acknowledged
.Select(i => i.TimeToAcknowledge)
.Where(static t => t.HasValue)
.Select(static t => t!.Value)),
MeanTimeToRecoveryHours: CalculateMeanHours(resolved
.Select(i => i.TimeToRecovery)
.Where(static t => t.HasValue)
.Select(static t => t!.Value)));
})
.ToList();
}
private static IReadOnlyList<OutcomeCohortSlice> BuildDailyCohorts(
DateTimeOffset periodStart,
DateTimeOffset periodEnd,
IReadOnlyList<DoraDeploymentEvent> deployments,
IReadOnlyList<DoraIncidentEvent> resolvedIncidents)
{
var deploymentByDay = deployments
.GroupBy(static d => DateOnly.FromDateTime(d.DeploymentTimestamp.UtcDateTime.Date))
.ToDictionary(
static g => g.Key,
static g => (Deployments: g.Count(), FailedDeployments: g.Count(static d => d.IsFailure)));
var resolvedByDay = resolvedIncidents
.GroupBy(static i => DateOnly.FromDateTime(i.ResolvedAt!.Value.UtcDateTime.Date))
.ToDictionary(static g => g.Key, static g => g.Count());
var day = DateOnly.FromDateTime(periodStart.UtcDateTime.Date);
var endDay = DateOnly.FromDateTime(periodEnd.UtcDateTime.Date);
var cohorts = new List<OutcomeCohortSlice>();
while (day <= endDay)
{
deploymentByDay.TryGetValue(day, out var deploymentStats);
resolvedByDay.TryGetValue(day, out var resolvedCount);
cohorts.Add(new OutcomeCohortSlice(
Day: day,
DeploymentCount: deploymentStats.Deployments,
FailedDeploymentCount: deploymentStats.FailedDeployments,
ResolvedIncidentCount: resolvedCount));
day = day.AddDays(1);
}
return cohorts;
}
private static string NormalizePipelineId(string? pipelineId) =>
string.IsNullOrWhiteSpace(pipelineId)
? UnknownPipelineId
: pipelineId.Trim().ToLowerInvariant();
private static double CalculateMeanHours(IEnumerable<TimeSpan> values)
{
var hours = values
.Where(static span => span >= TimeSpan.Zero)
.Select(static span => span.TotalHours)
.ToList();
if (hours.Count == 0)
{
return 0;
}
return Math.Round(hours.Average(), 2);
}
private static double CalculateMedianHours(IEnumerable<double> values)
{
var sorted = values.OrderBy(static value => value).ToList();
if (sorted.Count == 0)
{
return 0;
}
var mid = sorted.Count / 2;
if (sorted.Count % 2 == 0)
{
return (sorted[mid - 1] + sorted[mid]) / 2.0;
}
return sorted[mid];
}
private static async Task<List<T>> ToListAsync<T>(IAsyncEnumerable<T> source, CancellationToken cancellationToken)
{
var list = new List<T>();
await foreach (var item in source)
{
cancellationToken.ThrowIfCancellationRequested();
list.Add(item);
}
return list;
}
}

View File

@@ -0,0 +1,80 @@
namespace StellaOps.Telemetry.Core;
/// <summary>
/// Service interface for recording and querying DORA metrics.
/// </summary>
public interface IDoraMetricsService
{
/// <summary>
/// Records a deployment event for DORA metrics tracking.
/// </summary>
/// <param name="deployment">The deployment event to record.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RecordDeploymentAsync(DoraDeploymentEvent deployment, CancellationToken cancellationToken = default);
/// <summary>
/// Records an incident for MTTR tracking.
/// </summary>
/// <param name="incident">The incident event to record.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RecordIncidentAsync(DoraIncidentEvent incident, CancellationToken cancellationToken = default);
/// <summary>
/// Resolves an open incident.
/// </summary>
/// <param name="tenantId">The tenant ID.</param>
/// <param name="incidentId">The incident ID to resolve.</param>
/// <param name="resolvedAt">When the incident was resolved.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task ResolveIncidentAsync(string tenantId, string incidentId, DateTimeOffset resolvedAt, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a DORA metrics summary for a tenant and optional environment.
/// </summary>
/// <param name="tenantId">The tenant ID.</param>
/// <param name="environment">Optional environment filter.</param>
/// <param name="periodStart">Start of the period to analyze.</param>
/// <param name="periodEnd">End of the period to analyze.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>A summary of DORA metrics for the period.</returns>
Task<DoraSummary> GetSummaryAsync(
string tenantId,
string? environment,
DateTimeOffset periodStart,
DateTimeOffset periodEnd,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets deployment events for a tenant within a time range.
/// </summary>
/// <param name="tenantId">The tenant ID.</param>
/// <param name="environment">Optional environment filter.</param>
/// <param name="from">Start of the time range.</param>
/// <param name="to">End of the time range.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Deployment events in the time range.</returns>
IAsyncEnumerable<DoraDeploymentEvent> GetDeploymentsAsync(
string tenantId,
string? environment,
DateTimeOffset from,
DateTimeOffset to,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets incident events for a tenant within a time range.
/// </summary>
/// <param name="tenantId">The tenant ID.</param>
/// <param name="environment">Optional environment filter.</param>
/// <param name="from">Start of the time range.</param>
/// <param name="to">End of the time range.</param>
/// <param name="includeOpen">Whether to include open incidents.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Incident events in the time range.</returns>
IAsyncEnumerable<DoraIncidentEvent> GetIncidentsAsync(
string tenantId,
string? environment,
DateTimeOffset from,
DateTimeOffset to,
bool includeOpen = true,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,23 @@
namespace StellaOps.Telemetry.Core;
/// <summary>
/// Service interface for deterministic outcome attribution and executive reporting.
/// </summary>
public interface IOutcomeAnalyticsService
{
/// <summary>
/// Builds an executive outcome report for a tenant and optional environment over a fixed period.
/// </summary>
/// <param name="tenantId">Tenant to report for.</param>
/// <param name="environment">Optional environment filter.</param>
/// <param name="periodStart">Start of the reporting period.</param>
/// <param name="periodEnd">End of the reporting period.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Deterministic outcome report with attribution and cohort slices.</returns>
Task<OutcomeExecutiveReport> GetExecutiveReportAsync(
string tenantId,
string? environment,
DateTimeOffset periodStart,
DateTimeOffset periodEnd,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,281 @@
using System.Collections.Concurrent;
using System.Runtime.CompilerServices;
namespace StellaOps.Telemetry.Core;
/// <summary>
/// In-memory implementation of <see cref="IDoraMetricsService"/> for development and testing.
/// Production deployments should use a persistent storage implementation.
/// </summary>
public sealed class InMemoryDoraMetricsService : IDoraMetricsService
{
private readonly ConcurrentDictionary<string, List<DoraDeploymentEvent>> _deployments = new();
private readonly ConcurrentDictionary<string, List<DoraIncidentEvent>> _incidents = new();
private readonly DoraMetrics _metrics;
private readonly DoraMetricsOptions _options;
/// <summary>
/// Initializes a new instance of <see cref="InMemoryDoraMetricsService"/>.
/// </summary>
public InMemoryDoraMetricsService(DoraMetrics metrics, DoraMetricsOptions? options = null)
{
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
_options = options ?? new DoraMetricsOptions();
}
/// <inheritdoc/>
public Task RecordDeploymentAsync(DoraDeploymentEvent deployment, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(deployment);
var key = GetTenantKey(deployment.TenantId);
var list = _deployments.GetOrAdd(key, _ => new List<DoraDeploymentEvent>());
lock (list)
{
list.Add(deployment);
}
_metrics.RecordDeployment(deployment);
return Task.CompletedTask;
}
/// <inheritdoc/>
public Task RecordIncidentAsync(DoraIncidentEvent incident, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(incident);
var key = GetTenantKey(incident.TenantId);
var list = _incidents.GetOrAdd(key, _ => new List<DoraIncidentEvent>());
lock (list)
{
list.Add(incident);
}
_metrics.RecordIncidentStarted(incident);
return Task.CompletedTask;
}
/// <inheritdoc/>
public Task ResolveIncidentAsync(string tenantId, string incidentId, DateTimeOffset resolvedAt, CancellationToken cancellationToken = default)
{
var key = GetTenantKey(tenantId);
if (!_incidents.TryGetValue(key, out var list))
{
return Task.CompletedTask;
}
DoraIncidentEvent? resolved = null;
lock (list)
{
var index = list.FindIndex(i => i.IncidentId == incidentId && i.IsOpen);
if (index >= 0)
{
var original = list[index];
resolved = original with { ResolvedAt = resolvedAt };
list[index] = resolved;
}
}
if (resolved != null)
{
_metrics.RecordIncidentResolved(resolved);
}
return Task.CompletedTask;
}
/// <inheritdoc/>
public Task<DoraSummary> GetSummaryAsync(
string tenantId,
string? environment,
DateTimeOffset periodStart,
DateTimeOffset periodEnd,
CancellationToken cancellationToken = default)
{
var deployments = GetDeploymentsInRange(tenantId, environment, periodStart, periodEnd);
var incidents = GetIncidentsInRange(tenantId, environment, periodStart, periodEnd, resolvedOnly: true);
var periodDays = (periodEnd - periodStart).TotalDays;
if (periodDays <= 0) periodDays = 1;
// Deployment Frequency
var totalDeployments = deployments.Count;
var deploymentFrequency = totalDeployments / periodDays;
// Change Failure Rate
var successfulDeployments = deployments.Count(d => !d.IsFailure);
var failedDeployments = deployments.Count(d => d.IsFailure);
var changeFailureRate = totalDeployments > 0
? (failedDeployments * 100.0) / totalDeployments
: 0.0;
// Lead Time for Changes (median)
var leadTimes = deployments
.Select(d => d.LeadTime.TotalHours)
.OrderBy(t => t)
.ToList();
var medianLeadTime = leadTimes.Count > 0
? CalculateMedian(leadTimes)
: 0.0;
// Mean Time to Recovery
var recoveryTimes = incidents
.Where(i => i.TimeToRecovery.HasValue)
.Select(i => i.TimeToRecovery!.Value.TotalHours)
.ToList();
var mttr = recoveryTimes.Count > 0
? recoveryTimes.Average()
: 0.0;
// Classify performance
var performanceLevel = DoraMetrics.ClassifyPerformance(
deploymentFrequency,
medianLeadTime,
changeFailureRate,
mttr);
// Check and record SLO breaches
if (deploymentFrequency < _options.DeploymentFrequencySloPerDay && totalDeployments > 0)
{
_metrics.RecordDeploymentFrequencySloBreak(tenantId, environment ?? "all", deploymentFrequency);
}
if (changeFailureRate > _options.ChangeFailureRateSloPercent && totalDeployments > 0)
{
_metrics.RecordChangeFailureRateSloBreak(tenantId, environment ?? "all", changeFailureRate);
}
var summary = new DoraSummary(
TenantId: tenantId,
Environment: environment,
PeriodStart: periodStart,
PeriodEnd: periodEnd,
DeploymentCount: totalDeployments,
SuccessfulDeployments: successfulDeployments,
FailedDeployments: failedDeployments,
DeploymentFrequencyPerDay: Math.Round(deploymentFrequency, 4),
MedianLeadTimeHours: Math.Round(medianLeadTime, 2),
ChangeFailureRatePercent: Math.Round(changeFailureRate, 2),
MeanTimeToRecoveryHours: Math.Round(mttr, 2),
PerformanceLevel: performanceLevel);
return Task.FromResult(summary);
}
/// <inheritdoc/>
public async IAsyncEnumerable<DoraDeploymentEvent> GetDeploymentsAsync(
string tenantId,
string? environment,
DateTimeOffset from,
DateTimeOffset to,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var deployments = GetDeploymentsInRange(tenantId, environment, from, to);
foreach (var deployment in deployments)
{
cancellationToken.ThrowIfCancellationRequested();
yield return deployment;
}
await Task.CompletedTask; // Async enumerable pattern
}
/// <inheritdoc/>
public async IAsyncEnumerable<DoraIncidentEvent> GetIncidentsAsync(
string tenantId,
string? environment,
DateTimeOffset from,
DateTimeOffset to,
bool includeOpen = true,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var incidents = GetIncidentsInRange(tenantId, environment, from, to, resolvedOnly: !includeOpen);
foreach (var incident in incidents)
{
cancellationToken.ThrowIfCancellationRequested();
yield return incident;
}
await Task.CompletedTask;
}
private List<DoraDeploymentEvent> GetDeploymentsInRange(
string tenantId,
string? environment,
DateTimeOffset from,
DateTimeOffset to)
{
var key = GetTenantKey(tenantId);
if (!_deployments.TryGetValue(key, out var list))
{
return new List<DoraDeploymentEvent>();
}
lock (list)
{
var query = list.Where(d =>
d.DeploymentTimestamp >= from &&
d.DeploymentTimestamp <= to);
if (!string.IsNullOrEmpty(environment))
{
query = query.Where(d => d.Environment.Equals(environment, StringComparison.OrdinalIgnoreCase));
}
return query.OrderBy(d => d.DeploymentTimestamp).ToList();
}
}
private List<DoraIncidentEvent> GetIncidentsInRange(
string tenantId,
string? environment,
DateTimeOffset from,
DateTimeOffset to,
bool resolvedOnly)
{
var key = GetTenantKey(tenantId);
if (!_incidents.TryGetValue(key, out var list))
{
return new List<DoraIncidentEvent>();
}
lock (list)
{
var query = list.Where(i =>
i.StartedAt >= from &&
i.StartedAt <= to);
if (!string.IsNullOrEmpty(environment))
{
query = query.Where(i => i.Environment.Equals(environment, StringComparison.OrdinalIgnoreCase));
}
if (resolvedOnly)
{
query = query.Where(i => !i.IsOpen);
}
return query.OrderBy(i => i.StartedAt).ToList();
}
}
private static double CalculateMedian(List<double> sortedValues)
{
if (sortedValues.Count == 0) return 0;
var mid = sortedValues.Count / 2;
if (sortedValues.Count % 2 == 0)
{
return (sortedValues[mid - 1] + sortedValues[mid]) / 2.0;
}
return sortedValues[mid];
}
private static string GetTenantKey(string tenantId) =>
tenantId.ToLowerInvariant();
}

View File

@@ -0,0 +1,50 @@
namespace StellaOps.Telemetry.Core;
/// <summary>
/// Executive outcome analytics report built from deployment and incident telemetry events.
/// </summary>
public sealed record OutcomeExecutiveReport(
string TenantId,
string? Environment,
DateTimeOffset PeriodStart,
DateTimeOffset PeriodEnd,
int TotalDeployments,
int FailedDeployments,
int TotalIncidents,
int ResolvedIncidents,
int AcknowledgedIncidents,
double MeanTimeToAcknowledgeHours,
double MeanTimeToRecoveryHours,
IReadOnlyList<DeploymentAttributionSlice> DeploymentAttribution,
IReadOnlyList<IncidentAttributionSlice> IncidentAttribution,
IReadOnlyList<OutcomeCohortSlice> DailyCohorts);
/// <summary>
/// Attribution slice for deployment outcomes grouped by pipeline.
/// </summary>
public sealed record DeploymentAttributionSlice(
string PipelineId,
int DeploymentCount,
int FailedDeploymentCount,
double ChangeFailureRatePercent,
double MedianLeadTimeHours);
/// <summary>
/// Attribution slice for incidents grouped by severity.
/// </summary>
public sealed record IncidentAttributionSlice(
DoraIncidentSeverity Severity,
int IncidentCount,
int ResolvedIncidentCount,
int AcknowledgedIncidentCount,
double MeanTimeToAcknowledgeHours,
double MeanTimeToRecoveryHours);
/// <summary>
/// Daily cohort view used for trend reporting.
/// </summary>
public sealed record OutcomeCohortSlice(
DateOnly Day,
int DeploymentCount,
int FailedDeploymentCount,
int ResolvedIncidentCount);

View File

@@ -134,6 +134,44 @@ public static class TelemetryServiceCollectionExtensions
return services;
}
/// <summary>
/// Registers DORA (DevOps Research and Assessment) metrics for measuring software delivery performance.
/// Tracks the four key metrics: Deployment Frequency, Lead Time for Changes, Change Failure Rate, and MTTR.
/// </summary>
/// <param name="services">Service collection to mutate.</param>
/// <param name="configureOptions">Optional options configuration including SLO targets.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDoraMetrics(
this IServiceCollection services,
Action<DoraMetricsOptions>? configureOptions = null)
{
ArgumentNullException.ThrowIfNull(services);
services.AddOptions<DoraMetricsOptions>()
.Configure(options => configureOptions?.Invoke(options));
services.TryAddSingleton(sp =>
{
var options = sp.GetRequiredService<IOptions<DoraMetricsOptions>>().Value;
return new DoraMetrics(options);
});
services.TryAddSingleton<IDoraMetricsService>(sp =>
{
var metrics = sp.GetRequiredService<DoraMetrics>();
var options = sp.GetRequiredService<IOptions<DoraMetricsOptions>>().Value;
return new InMemoryDoraMetricsService(metrics, options);
});
services.TryAddSingleton<IOutcomeAnalyticsService>(sp =>
{
var doraMetricsService = sp.GetRequiredService<IDoraMetricsService>();
return new DoraOutcomeAnalyticsService(doraMetricsService);
});
return services;
}
/// <summary>
/// Registers incident mode services for toggling enhanced telemetry during incidents.
/// </summary>