feat: Update policy simulation metrics and tests to align with new naming conventions and enhance telemetry coverage
This commit is contained in:
@@ -0,0 +1,242 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics.Metrics;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
using StellaOps.Scheduler.WebService.PolicySimulations;
|
||||
|
||||
namespace StellaOps.Scheduler.WebService.Tests;
|
||||
|
||||
public sealed class PolicySimulationMetricsProviderTests
|
||||
{
|
||||
[Fact]
|
||||
public async Task CaptureAsync_ComputesQueueDepthAndLatency()
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var queueCounts = new Dictionary<PolicyRunJobStatus, long>
|
||||
{
|
||||
[PolicyRunJobStatus.Pending] = 2,
|
||||
[PolicyRunJobStatus.Dispatching] = 1,
|
||||
[PolicyRunJobStatus.Submitted] = 1
|
||||
};
|
||||
|
||||
var jobs = new List<PolicyRunJob>
|
||||
{
|
||||
CreateJob(
|
||||
status: PolicyRunJobStatus.Completed,
|
||||
queuedAt: now.AddSeconds(-30),
|
||||
submittedAt: now.AddSeconds(-28),
|
||||
completedAt: now.AddSeconds(-20)),
|
||||
CreateJob(
|
||||
status: PolicyRunJobStatus.Cancelled,
|
||||
queuedAt: now.AddSeconds(-50),
|
||||
submittedAt: now.AddSeconds(-48),
|
||||
completedAt: null,
|
||||
cancelledAt: now.AddSeconds(-20))
|
||||
};
|
||||
|
||||
await using var provider = new PolicySimulationMetricsProvider(
|
||||
new StubPolicyRunJobRepository(queueCounts, jobs));
|
||||
|
||||
var response = await provider.CaptureAsync("tenant-alpha", CancellationToken.None);
|
||||
|
||||
Assert.Equal(4, response.QueueDepth.Total);
|
||||
Assert.Equal(2, response.QueueDepth.ByStatus["pending"]);
|
||||
Assert.Equal(1, response.QueueDepth.ByStatus["dispatching"]);
|
||||
Assert.Equal(1, response.QueueDepth.ByStatus["submitted"]);
|
||||
|
||||
Assert.Equal(2, response.Latency.Samples);
|
||||
Assert.Equal(20.0, response.Latency.Mean);
|
||||
Assert.Equal(20.0, response.Latency.P50);
|
||||
Assert.Equal(28.0, response.Latency.P90);
|
||||
Assert.Equal(29.0, response.Latency.P95);
|
||||
Assert.Equal(30.0, response.Latency.P99);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordLatency_EmitsHistogramMeasurement()
|
||||
{
|
||||
var repo = new StubPolicyRunJobRepository(
|
||||
counts: new Dictionary<PolicyRunJobStatus, long>(),
|
||||
jobs: Array.Empty<PolicyRunJob>());
|
||||
|
||||
using var provider = new PolicySimulationMetricsProvider(repo);
|
||||
|
||||
var measurements = new List<double>();
|
||||
using var listener = new MeterListener
|
||||
{
|
||||
InstrumentPublished = (instrument, meterListener) =>
|
||||
{
|
||||
if (instrument.Meter.Name == "StellaOps.Scheduler.WebService.PolicySimulations" &&
|
||||
instrument.Name == "policy_simulation_latency_seconds")
|
||||
{
|
||||
meterListener.EnableMeasurementEvents(instrument);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
listener.SetMeasurementEventCallback<double>((instrument, measurement, tags, state) =>
|
||||
{
|
||||
if (instrument.Name == "policy_simulation_latency_seconds")
|
||||
{
|
||||
measurements.Add(measurement);
|
||||
}
|
||||
});
|
||||
|
||||
listener.Start();
|
||||
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var status = new PolicyRunStatus(
|
||||
runId: "run-1",
|
||||
tenantId: "tenant-alpha",
|
||||
policyId: "policy-alpha",
|
||||
policyVersion: 1,
|
||||
mode: PolicyRunMode.Simulate,
|
||||
status: PolicyRunExecutionStatus.Succeeded,
|
||||
priority: PolicyRunPriority.Normal,
|
||||
queuedAt: now.AddSeconds(-12),
|
||||
startedAt: now.AddSeconds(-10),
|
||||
finishedAt: now,
|
||||
stats: PolicyRunStats.Empty,
|
||||
inputs: PolicyRunInputs.Empty,
|
||||
determinismHash: null,
|
||||
errorCode: null,
|
||||
error: null,
|
||||
attempts: 1,
|
||||
traceId: null,
|
||||
explainUri: null,
|
||||
metadata: ImmutableSortedDictionary<string, string>.Empty,
|
||||
cancellationRequested: false,
|
||||
cancellationRequestedAt: null,
|
||||
cancellationReason: null,
|
||||
schemaVersion: null);
|
||||
|
||||
provider.RecordLatency(status, now);
|
||||
|
||||
listener.Dispose();
|
||||
|
||||
Assert.Single(measurements);
|
||||
Assert.Equal(12, measurements[0], precision: 6);
|
||||
}
|
||||
|
||||
private static PolicyRunJob CreateJob(
|
||||
PolicyRunJobStatus status,
|
||||
DateTimeOffset queuedAt,
|
||||
DateTimeOffset submittedAt,
|
||||
DateTimeOffset? completedAt,
|
||||
DateTimeOffset? cancelledAt = null)
|
||||
{
|
||||
var id = Guid.NewGuid().ToString("N");
|
||||
var runId = $"run:{id}";
|
||||
var updatedAt = completedAt ?? cancelledAt ?? submittedAt;
|
||||
|
||||
return new PolicyRunJob(
|
||||
SchemaVersion: SchedulerSchemaVersions.PolicyRunJob,
|
||||
Id: id,
|
||||
TenantId: "tenant-alpha",
|
||||
PolicyId: "policy-alpha",
|
||||
PolicyVersion: 1,
|
||||
Mode: PolicyRunMode.Simulate,
|
||||
Priority: PolicyRunPriority.Normal,
|
||||
PriorityRank: 0,
|
||||
RunId: runId,
|
||||
RequestedBy: "tester",
|
||||
CorrelationId: null,
|
||||
Metadata: ImmutableSortedDictionary<string, string>.Empty,
|
||||
Inputs: PolicyRunInputs.Empty,
|
||||
QueuedAt: queuedAt,
|
||||
Status: status,
|
||||
AttemptCount: 1,
|
||||
LastAttemptAt: submittedAt,
|
||||
LastError: null,
|
||||
CreatedAt: queuedAt,
|
||||
UpdatedAt: updatedAt,
|
||||
AvailableAt: queuedAt,
|
||||
SubmittedAt: submittedAt,
|
||||
CompletedAt: completedAt,
|
||||
LeaseOwner: null,
|
||||
LeaseExpiresAt: null,
|
||||
CancellationRequested: false,
|
||||
CancellationRequestedAt: null,
|
||||
CancellationReason: null,
|
||||
CancelledAt: cancelledAt);
|
||||
}
|
||||
|
||||
private sealed class StubPolicyRunJobRepository : IPolicyRunJobRepository
|
||||
{
|
||||
private readonly IReadOnlyDictionary<PolicyRunJobStatus, long> _counts;
|
||||
private readonly IReadOnlyList<PolicyRunJob> _jobs;
|
||||
|
||||
public StubPolicyRunJobRepository(
|
||||
IReadOnlyDictionary<PolicyRunJobStatus, long> counts,
|
||||
IReadOnlyList<PolicyRunJob> jobs)
|
||||
{
|
||||
_counts = counts;
|
||||
_jobs = jobs;
|
||||
}
|
||||
|
||||
public Task<long> CountAsync(
|
||||
string tenantId,
|
||||
PolicyRunMode mode,
|
||||
IReadOnlyCollection<PolicyRunJobStatus> statuses,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
long total = 0;
|
||||
foreach (var status in statuses)
|
||||
{
|
||||
if (_counts.TryGetValue(status, out var value))
|
||||
{
|
||||
total += value;
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult(total);
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<PolicyRunJob>> ListAsync(
|
||||
string tenantId,
|
||||
string? policyId = null,
|
||||
PolicyRunMode? mode = null,
|
||||
IReadOnlyCollection<PolicyRunJobStatus>? statuses = null,
|
||||
DateTimeOffset? queuedAfter = null,
|
||||
int limit = 50,
|
||||
MongoDB.Driver.IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
=> Task.FromResult(_jobs);
|
||||
|
||||
Task IPolicyRunJobRepository.InsertAsync(
|
||||
PolicyRunJob job,
|
||||
MongoDB.Driver.IClientSessionHandle? session,
|
||||
CancellationToken cancellationToken)
|
||||
=> throw new NotSupportedException();
|
||||
|
||||
Task<PolicyRunJob?> IPolicyRunJobRepository.GetAsync(
|
||||
string tenantId,
|
||||
string jobId,
|
||||
MongoDB.Driver.IClientSessionHandle? session,
|
||||
CancellationToken cancellationToken)
|
||||
=> throw new NotSupportedException();
|
||||
|
||||
Task<PolicyRunJob?> IPolicyRunJobRepository.GetByRunIdAsync(
|
||||
string tenantId,
|
||||
string runId,
|
||||
MongoDB.Driver.IClientSessionHandle? session,
|
||||
CancellationToken cancellationToken)
|
||||
=> throw new NotSupportedException();
|
||||
|
||||
Task<PolicyRunJob?> IPolicyRunJobRepository.LeaseAsync(
|
||||
string leaseOwner,
|
||||
DateTimeOffset now,
|
||||
TimeSpan leaseDuration,
|
||||
int maxAttempts,
|
||||
MongoDB.Driver.IClientSessionHandle? session,
|
||||
CancellationToken cancellationToken)
|
||||
=> throw new NotSupportedException();
|
||||
|
||||
Task<bool> IPolicyRunJobRepository.ReplaceAsync(
|
||||
PolicyRunJob job,
|
||||
string? expectedLeaseOwner,
|
||||
MongoDB.Driver.IClientSessionHandle? session,
|
||||
CancellationToken cancellationToken)
|
||||
=> throw new NotSupportedException();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user