feat: Implement approvals workflow and notifications integration
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Added approvals orchestration with persistence and workflow scaffolding. - Integrated notifications insights and staged resume hooks. - Introduced approval coordinator and policy notification bridge with unit tests. - Added approval decision API with resume requeue and persisted plan snapshots. - Documented the Excitor consensus API beta and provided JSON sample payload. - Created analyzers to flag usage of deprecated merge service APIs. - Implemented logging for artifact uploads and approval decision service. - Added tests for PackRunApprovalDecisionService and related components.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
namespace StellaOps.Scheduler.WebService.GraphJobs;
|
||||
|
||||
internal readonly record struct GraphJobUpdateResult<TJob>(bool Updated, TJob Job) where TJob : class
|
||||
public readonly record struct GraphJobUpdateResult<TJob>(bool Updated, TJob Job) where TJob : class
|
||||
{
|
||||
public static GraphJobUpdateResult<TJob> UpdatedResult(TJob job) => new(true, job);
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ internal sealed class PolicySimulationMetricsProvider : IPolicySimulationMetrics
|
||||
private readonly Histogram<double> _latencyHistogram;
|
||||
private readonly object _snapshotLock = new();
|
||||
private IReadOnlyDictionary<string, long> _latestQueueSnapshot = new Dictionary<string, long>(StringComparer.Ordinal);
|
||||
private string _latestTenantId = string.Empty;
|
||||
private bool _disposed;
|
||||
|
||||
public PolicySimulationMetricsProvider(IPolicyRunJobRepository repository, TimeProvider? timeProvider = null)
|
||||
@@ -86,6 +87,7 @@ internal sealed class PolicySimulationMetricsProvider : IPolicySimulationMetrics
|
||||
lock (_snapshotLock)
|
||||
{
|
||||
_latestQueueSnapshot = queueCounts;
|
||||
_latestTenantId = tenantId;
|
||||
}
|
||||
|
||||
var sampleSize = 200;
|
||||
@@ -134,16 +136,21 @@ internal sealed class PolicySimulationMetricsProvider : IPolicySimulationMetrics
|
||||
private IEnumerable<Measurement<long>> ObserveQueueDepth()
|
||||
{
|
||||
IReadOnlyDictionary<string, long> snapshot;
|
||||
string tenantId;
|
||||
lock (_snapshotLock)
|
||||
{
|
||||
snapshot = _latestQueueSnapshot;
|
||||
tenantId = _latestTenantId;
|
||||
}
|
||||
|
||||
tenantId = string.IsNullOrWhiteSpace(tenantId) ? "unknown" : tenantId;
|
||||
|
||||
foreach (var pair in snapshot)
|
||||
{
|
||||
yield return new Measurement<long>(
|
||||
pair.Value,
|
||||
new KeyValuePair<string, object?>("status", pair.Key));
|
||||
new KeyValuePair<string, object?>("status", pair.Key),
|
||||
new KeyValuePair<string, object?>("tenantId", tenantId));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -30,7 +30,8 @@
|
||||
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|
||||
|----|--------|----------|------------|-------------|---------------|
|
||||
| SCHED-CONSOLE-27-001 | DONE (2025-11-03) | Scheduler WebService Guild, Policy Registry Guild | SCHED-WEB-16-103, REGISTRY-API-27-005 | Provide policy batch simulation orchestration endpoints (`/policies/simulations` POST/GET) exposing run creation, shard status, SSE progress, cancellation, and retries with RBAC enforcement. | API handles shard lifecycle with SSE heartbeats + retry headers; unauthorized requests rejected; integration tests cover submit/cancel/resume flows. |
|
||||
| SCHED-CONSOLE-27-002 | DOING (2025-11-03) | Scheduler WebService Guild, Observability Guild | SCHED-CONSOLE-27-001 | Emit telemetry endpoints/metrics (`policy_simulation_queue_depth`, `policy_simulation_latency`) and webhook callbacks for completion/failure consumed by Registry. | Metrics exposed via gateway, dashboards seeded, webhook contract documented, integration tests validate metrics emission. |
|
||||
| SCHED-CONSOLE-27-002 | DOING (2025-11-03) | Scheduler WebService Guild, Observability Guild | SCHED-CONSOLE-27-001 | Emit telemetry endpoints/metrics (`policy_simulation_queue_depth`, `policy_simulation_latency`) and webhook callbacks for completion/failure consumed by Registry. | Metrics exposed via gateway, dashboards seeded, webhook contract documented, integration tests validate metrics emission. |
|
||||
> 2025-11-06: Added tenant-aware tagging to `policy_simulation_queue_depth` metrics and unit coverage for the metrics provider snapshot.
|
||||
|
||||
## Vulnerability Explorer (Sprint 29)
|
||||
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics.Metrics;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using MongoDB.Driver;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
using StellaOps.Scheduler.WebService.PolicySimulations;
|
||||
using Xunit;
|
||||
|
||||
namespace StellaOps.Scheduler.WebService.Tests;
|
||||
|
||||
public sealed class PolicySimulationMetricsProviderTests
|
||||
{
|
||||
[Fact]
|
||||
public async Task CaptureAsync_UpdatesSnapshotAndEmitsTenantTaggedGauge()
|
||||
{
|
||||
var repository = new StubPolicyRunJobRepository();
|
||||
repository.QueueCounts[PolicyRunJobStatus.Pending] = 3;
|
||||
repository.QueueCounts[PolicyRunJobStatus.Dispatching] = 1;
|
||||
repository.QueueCounts[PolicyRunJobStatus.Submitted] = 2;
|
||||
|
||||
var now = DateTimeOffset.Parse("2025-11-06T10:00:00Z", CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal);
|
||||
repository.TerminalJobs.Add(CreateJob("job-1", PolicyRunJobStatus.Completed, now.AddMinutes(-30), now.AddMinutes(-5)));
|
||||
repository.TerminalJobs.Add(CreateJob("job-2", PolicyRunJobStatus.Failed, now.AddMinutes(-20), now.AddMinutes(-2)));
|
||||
|
||||
using var provider = new PolicySimulationMetricsProvider(repository);
|
||||
|
||||
var response = await provider.CaptureAsync("tenant-alpha", CancellationToken.None);
|
||||
|
||||
Assert.Equal(6, response.QueueDepth.Total);
|
||||
Assert.Equal(3, response.QueueDepth.ByStatus["pending"]);
|
||||
Assert.Equal(2, response.QueueDepth.ByStatus["submitted"]);
|
||||
|
||||
var measurements = new List<(string Status, string Tenant, long Value)>();
|
||||
using var listener = new MeterListener
|
||||
{
|
||||
InstrumentPublished = (instrument, listener) =>
|
||||
{
|
||||
if (instrument.Meter.Name == "StellaOps.Scheduler.WebService.PolicySimulations" &&
|
||||
instrument.Name == "policy_simulation_queue_depth")
|
||||
{
|
||||
listener.EnableMeasurementEvents(instrument);
|
||||
}
|
||||
}
|
||||
};
|
||||
listener.SetMeasurementEventCallback<long>((instrument, measurement, tags, state) =>
|
||||
{
|
||||
var status = "";
|
||||
var tenant = "";
|
||||
foreach (var tag in tags)
|
||||
{
|
||||
if (string.Equals(tag.Key, "status", StringComparison.Ordinal))
|
||||
{
|
||||
status = tag.Value?.ToString() ?? string.Empty;
|
||||
}
|
||||
|
||||
if (string.Equals(tag.Key, "tenantId", StringComparison.Ordinal))
|
||||
{
|
||||
tenant = tag.Value?.ToString() ?? string.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
measurements.Add((status, tenant, measurement));
|
||||
});
|
||||
listener.Start();
|
||||
listener.RecordObservableInstruments();
|
||||
|
||||
Assert.Contains(measurements, item =>
|
||||
item.Status == "pending" &&
|
||||
item.Tenant == "tenant-alpha" &&
|
||||
item.Value == 3);
|
||||
}
|
||||
|
||||
private static PolicyRunJob CreateJob(string id, PolicyRunJobStatus status, DateTimeOffset queuedAt, DateTimeOffset finishedAt)
|
||||
{
|
||||
DateTimeOffset? submittedAt = status is PolicyRunJobStatus.Completed or PolicyRunJobStatus.Failed
|
||||
? queuedAt.AddMinutes(2)
|
||||
: null;
|
||||
DateTimeOffset? completedAt = status is PolicyRunJobStatus.Completed or PolicyRunJobStatus.Failed
|
||||
? finishedAt
|
||||
: null;
|
||||
DateTimeOffset? cancelledAt = status is PolicyRunJobStatus.Cancelled ? finishedAt : null;
|
||||
var lastError = status is PolicyRunJobStatus.Failed ? "policy engine timeout" : null;
|
||||
|
||||
return new PolicyRunJob(
|
||||
SchedulerSchemaVersions.PolicyRunJob,
|
||||
id,
|
||||
"tenant-alpha",
|
||||
"policy-x",
|
||||
1,
|
||||
PolicyRunMode.Simulate,
|
||||
PolicyRunPriority.Normal,
|
||||
0,
|
||||
$"run-{id}",
|
||||
"user:actor",
|
||||
null,
|
||||
null,
|
||||
PolicyRunInputs.Empty,
|
||||
queuedAt,
|
||||
status,
|
||||
1,
|
||||
finishedAt,
|
||||
status == PolicyRunJobStatus.Failed ? "policy engine timeout" : null,
|
||||
queuedAt,
|
||||
finishedAt,
|
||||
finishedAt,
|
||||
submittedAt,
|
||||
completedAt,
|
||||
null,
|
||||
null,
|
||||
false,
|
||||
null,
|
||||
null,
|
||||
cancelledAt);
|
||||
}
|
||||
|
||||
private sealed class StubPolicyRunJobRepository : IPolicyRunJobRepository
|
||||
{
|
||||
public Dictionary<PolicyRunJobStatus, long> QueueCounts { get; } = new();
|
||||
public List<PolicyRunJob> TerminalJobs { get; } = new();
|
||||
|
||||
public Task<long> CountAsync(string tenantId, PolicyRunMode mode, IReadOnlyCollection<PolicyRunJobStatus> statuses, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var total = 0L;
|
||||
foreach (var status in statuses)
|
||||
{
|
||||
if (QueueCounts.TryGetValue(status, out var count))
|
||||
{
|
||||
total += count;
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult(total);
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<PolicyRunJob>> ListAsync(
|
||||
string tenantId,
|
||||
string? policyId = null,
|
||||
PolicyRunMode? mode = null,
|
||||
IReadOnlyCollection<PolicyRunJobStatus>? statuses = null,
|
||||
DateTimeOffset? queuedAfter = null,
|
||||
int limit = 50,
|
||||
IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
IReadOnlyList<PolicyRunJob> filtered = TerminalJobs;
|
||||
if (statuses is { Count: > 0 })
|
||||
{
|
||||
filtered = TerminalJobs.Where(job => statuses.Contains(job.Status)).ToList();
|
||||
}
|
||||
|
||||
return Task.FromResult(filtered);
|
||||
}
|
||||
|
||||
public Task<PolicyRunJob?> GetAsync(string tenantId, string jobId, IClientSessionHandle? session = null, CancellationToken cancellationToken = default)
|
||||
=> Task.FromResult<PolicyRunJob?>(null);
|
||||
|
||||
public Task<PolicyRunJob?> GetByRunIdAsync(string tenantId, string runId, IClientSessionHandle? session = null, CancellationToken cancellationToken = default)
|
||||
=> Task.FromResult<PolicyRunJob?>(null);
|
||||
|
||||
public Task InsertAsync(PolicyRunJob job, IClientSessionHandle? session = null, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
|
||||
public Task<PolicyRunJob?> LeaseAsync(string leaseOwner, DateTimeOffset now, TimeSpan leaseDuration, int maxAttempts, IClientSessionHandle? session = null, CancellationToken cancellationToken = default)
|
||||
=> Task.FromResult<PolicyRunJob?>(null);
|
||||
|
||||
public Task<bool> ReplaceAsync(PolicyRunJob job, string? expectedLeaseOwner = null, IClientSessionHandle? session = null, CancellationToken cancellationToken = default)
|
||||
=> Task.FromResult(true);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user