feat: Document completed tasks for KMS, Cryptography, and Plugin Libraries

- Added detailed task completion records for KMS interface implementation and CLI support for file-based keys.
- Documented security enhancements including Argon2id password hashing, audit event contracts, and rate limiting configurations.
- Included scoped service support and integration updates for the Plugin platform, ensuring proper DI handling and testing coverage.
This commit is contained in:
master
2025-10-31 14:37:45 +02:00
parent 240e8ff25d
commit 15b4a1de6a
312 changed files with 6399 additions and 3319 deletions

View File

@@ -1,41 +1,185 @@
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Scheduler.WebService.Options;
namespace StellaOps.Scheduler.WebService.GraphJobs.Events;
internal sealed class GraphJobEventPublisher : IGraphJobCompletionPublisher
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
private readonly IOptionsMonitor<SchedulerEventsOptions> _options;
private readonly ILogger<GraphJobEventPublisher> _logger;
public GraphJobEventPublisher(
IOptionsMonitor<SchedulerEventsOptions> options,
ILogger<GraphJobEventPublisher> logger)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public Task PublishAsync(GraphJobCompletionNotification notification, CancellationToken cancellationToken)
{
var options = _options.CurrentValue;
if (!options.GraphJobs.Enabled)
{
_logger.LogDebug("Graph job events disabled; skipping emission for {JobId}.", notification.Job.Id);
return Task.CompletedTask;
}
var envelope = GraphJobEventFactory.Create(notification);
var json = JsonSerializer.Serialize(envelope, SerializerOptions);
_logger.LogInformation("{EventJson}", json);
return Task.CompletedTask;
}
}
using System;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StackExchange.Redis;
using StellaOps.Scheduler.WebService.Options;
namespace StellaOps.Scheduler.WebService.GraphJobs.Events;
internal sealed class GraphJobEventPublisher : IGraphJobCompletionPublisher, IAsyncDisposable
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
private readonly IOptionsMonitor<SchedulerEventsOptions> _options;
private readonly IRedisConnectionFactory _connectionFactory;
private readonly ILogger<GraphJobEventPublisher> _logger;
private readonly SemaphoreSlim _connectionGate = new(1, 1);
private IConnectionMultiplexer? _connection;
private bool _disposed;
public GraphJobEventPublisher(
IOptionsMonitor<SchedulerEventsOptions> options,
IRedisConnectionFactory connectionFactory,
ILogger<GraphJobEventPublisher> logger)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
_connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task PublishAsync(GraphJobCompletionNotification notification, CancellationToken cancellationToken)
{
if (notification is null)
{
throw new ArgumentNullException(nameof(notification));
}
var options = _options.CurrentValue?.GraphJobs ?? new GraphJobEventsOptions();
if (!options.Enabled)
{
_logger.LogDebug("Graph job events disabled; skipping emission for {JobId}.", notification.Job.Id);
return;
}
if (!string.Equals(options.Driver, "redis", StringComparison.OrdinalIgnoreCase))
{
_logger.LogWarning(
"Graph job events configured with unsupported driver '{Driver}'. Falling back to logging.",
options.Driver);
LogEnvelope(notification);
return;
}
try
{
var database = await GetDatabaseAsync(options, cancellationToken).ConfigureAwait(false);
var envelope = GraphJobEventFactory.Create(notification);
var payload = JsonSerializer.Serialize(envelope, SerializerOptions);
var entries = new[]
{
new NameValueEntry("event", payload),
new NameValueEntry("kind", envelope.Kind),
new NameValueEntry("tenant", envelope.Tenant),
new NameValueEntry("occurredAt", envelope.Timestamp.ToString("O")),
new NameValueEntry("jobId", notification.Job.Id),
new NameValueEntry("status", notification.Status.ToString())
};
var streamKey = string.IsNullOrWhiteSpace(options.Stream) ? "stella.events" : options.Stream;
var publishTask = CreatePublishTask(database, streamKey, entries, options.MaxStreamLength);
if (options.PublishTimeoutSeconds > 0)
{
var timeout = TimeSpan.FromSeconds(options.PublishTimeoutSeconds);
await publishTask.WaitAsync(timeout, cancellationToken).ConfigureAwait(false);
}
else
{
await publishTask.ConfigureAwait(false);
}
_logger.LogDebug("Published graph job event {JobId} to stream {Stream}.", notification.Job.Id, streamKey);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to publish graph job completion for {JobId}; logging payload instead.", notification.Job.Id);
LogEnvelope(notification);
}
}
private Task<RedisValue> CreatePublishTask(IDatabase database, string streamKey, NameValueEntry[] entries, long maxStreamLength)
{
if (maxStreamLength > 0)
{
var clamped = (int)Math.Min(maxStreamLength, int.MaxValue);
return database.StreamAddAsync(streamKey, entries, maxLength: clamped, useApproximateMaxLength: true);
}
return database.StreamAddAsync(streamKey, entries);
}
private async Task<IDatabase> GetDatabaseAsync(GraphJobEventsOptions options, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
if (_connection is { IsConnected: true })
{
return _connection.GetDatabase();
}
await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_connection is null || !_connection.IsConnected)
{
var configuration = ConfigurationOptions.Parse(options.Dsn);
configuration.AbortOnConnectFail = false;
if (options.DriverSettings.TryGetValue("clientName", out var clientName) && !string.IsNullOrWhiteSpace(clientName))
{
configuration.ClientName = clientName;
}
if (options.DriverSettings.TryGetValue("ssl", out var sslValue) && bool.TryParse(sslValue, out var ssl))
{
configuration.Ssl = ssl;
}
if (options.DriverSettings.TryGetValue("password", out var password) && !string.IsNullOrWhiteSpace(password))
{
configuration.Password = password;
}
_connection = await _connectionFactory.ConnectAsync(configuration, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("Connected graph job publisher to Redis stream {Stream}.", options.Stream);
}
}
finally
{
_connectionGate.Release();
}
return _connection!.GetDatabase();
}
private void LogEnvelope(GraphJobCompletionNotification notification)
{
var envelope = GraphJobEventFactory.Create(notification);
var json = JsonSerializer.Serialize(envelope, SerializerOptions);
_logger.LogInformation("{EventJson}", json);
}
public async ValueTask DisposeAsync()
{
if (_disposed)
{
return;
}
_disposed = true;
if (_connection is not null)
{
try
{
await _connection.CloseAsync();
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Error while closing graph job Redis connection.");
}
_connection.Dispose();
}
_connectionGate.Dispose();
}
}

View File

@@ -0,0 +1,8 @@
using StackExchange.Redis;
namespace StellaOps.Scheduler.WebService.GraphJobs.Events;
internal interface IRedisConnectionFactory
{
Task<IConnectionMultiplexer> ConnectAsync(ConfigurationOptions options, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,26 @@
using StackExchange.Redis;
namespace StellaOps.Scheduler.WebService.GraphJobs.Events;
internal sealed class RedisConnectionFactory : IRedisConnectionFactory
{
public async Task<IConnectionMultiplexer> ConnectAsync(ConfigurationOptions options, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(options);
var completionSource = new TaskCompletionSource<IConnectionMultiplexer>(TaskCreationOptions.RunContinuationsAsynchronously);
cancellationToken.Register(() => completionSource.TrySetCanceled(cancellationToken));
try
{
var connection = await ConnectionMultiplexer.ConnectAsync(options).ConfigureAwait(false);
completionSource.TrySetResult(connection);
}
catch (Exception ex)
{
completionSource.TrySetException(ex);
}
return await completionSource.Task.ConfigureAwait(false);
}
}

View File

@@ -3,7 +3,8 @@ namespace StellaOps.Scheduler.WebService.Options;
/// <summary>
/// Scheduler WebService event options (outbound + inbound).
/// </summary>
using System;
using System;
using System.Collections.Generic;
public sealed class SchedulerEventsOptions
{
@@ -12,13 +13,43 @@ public sealed class SchedulerEventsOptions
public SchedulerInboundWebhooksOptions Webhooks { get; set; } = new();
}
public sealed class GraphJobEventsOptions
{
/// <summary>
/// Enables emission of legacy <c>scheduler.graph.job.completed@1</c> events.
/// </summary>
public bool Enabled { get; set; }
}
public sealed class GraphJobEventsOptions
{
/// <summary>
/// Enables emission of legacy <c>scheduler.graph.job.completed@1</c> events.
/// </summary>
public bool Enabled { get; set; }
/// <summary>
/// Event transport driver (defaults to <c>redis</c>).
/// </summary>
public string Driver { get; set; } = "redis";
/// <summary>
/// Connection string for the event transport.
/// </summary>
public string Dsn { get; set; } = string.Empty;
/// <summary>
/// Stream/topic identifier for published events.
/// </summary>
public string Stream { get; set; } = "stella.events";
/// <summary>
/// Maximum time in seconds to wait for the transport to accept an event.
/// </summary>
public double PublishTimeoutSeconds { get; set; } = 5;
/// <summary>
/// Maximum number of events to retain in the stream.
/// </summary>
public long MaxStreamLength { get; set; } = 10000;
/// <summary>
/// Additional transport-specific settings (e.g., <c>clientName</c>, <c>ssl</c>).
/// </summary>
public IDictionary<string, string> DriverSettings { get; set; } = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
}
public sealed class SchedulerInboundWebhooksOptions
{

View File

@@ -48,8 +48,8 @@ if (authorityOptions.Audiences.Count == 0)
authorityOptions.Validate();
builder.Services.AddSingleton(authorityOptions);
builder.Services.AddOptions<SchedulerEventsOptions>()
.Bind(builder.Configuration.GetSection("Scheduler:Events"))
builder.Services.AddOptions<SchedulerEventsOptions>()
.Bind(builder.Configuration.GetSection("Scheduler:Events"))
.PostConfigure(options =>
{
options.Webhooks ??= new SchedulerInboundWebhooksOptions();
@@ -67,10 +67,11 @@ builder.Services.AddOptions<SchedulerEventsOptions>()
options.Webhooks.Vexer.Validate();
});
builder.Services.AddMemoryCache();
builder.Services.AddSingleton<IWebhookRateLimiter, InMemoryWebhookRateLimiter>();
builder.Services.AddSingleton<IWebhookRequestAuthenticator, WebhookRequestAuthenticator>();
builder.Services.AddSingleton<IInboundExportEventSink, LoggingExportEventSink>();
builder.Services.AddMemoryCache();
builder.Services.AddSingleton<IWebhookRateLimiter, InMemoryWebhookRateLimiter>();
builder.Services.AddSingleton<IWebhookRequestAuthenticator, WebhookRequestAuthenticator>();
builder.Services.AddSingleton<IInboundExportEventSink, LoggingExportEventSink>();
builder.Services.AddSingleton<IRedisConnectionFactory, RedisConnectionFactory>();
var cartographerOptions = builder.Configuration.GetSection("Scheduler:Cartographer").Get<SchedulerCartographerOptions>() ?? new SchedulerCartographerOptions();
builder.Services.AddSingleton(cartographerOptions);

View File

@@ -13,4 +13,7 @@
<ProjectReference Include="../../Authority/StellaOps.Authority/StellaOps.Auth.Abstractions/StellaOps.Auth.Abstractions.csproj" />
<ProjectReference Include="../../Authority/StellaOps.Authority/StellaOps.Auth.ServerIntegration/StellaOps.Auth.ServerIntegration.csproj" />
</ItemGroup>
</Project>
<ItemGroup>
<PackageReference Include="StackExchange.Redis" Version="2.8.24" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,12 @@
# Completed Tasks
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCHED-WEB-16-101 | DONE (2025-10-27) | Scheduler WebService Guild | SCHED-MODELS-16-101 | Bootstrap Minimal API host with Authority OpTok + DPoP, health endpoints, plug-in discovery per architecture §§12. | Service boots with config validation; `/healthz`/`/readyz` pass; restart-only plug-ins enforced. |
| SCHED-WEB-16-102 | DONE (2025-10-26) | Scheduler WebService Guild | SCHED-WEB-16-101 | Implement schedules CRUD (tenant-scoped) with cron validation, pause/resume, audit logging. | CRUD operations tested; invalid cron inputs rejected; audit entries persisted. |
| SCHED-WEB-16-103 | DONE (2025-10-26) | Scheduler WebService Guild | SCHED-WEB-16-102 | Runs API (list/detail/cancel), ad-hoc run POST, and impact preview endpoints. | Integration tests cover run lifecycle; preview returns counts/sample; cancellation honoured. |
| SCHED-WEB-16-104 | DONE (2025-10-27) | Scheduler WebService Guild | SCHED-QUEUE-16-401, SCHED-STORAGE-16-201 | Webhook endpoints for Feeder/Vexer exports with mTLS/HMAC validation and rate limiting. | Webhooks validated via tests; invalid signatures rejected; rate limits documented. |
| SCHED-WEB-20-001 | DONE (2025-10-29) | Scheduler WebService Guild, Policy Guild | SCHED-WEB-16-101, POLICY-ENGINE-20-000 | Expose policy run scheduling APIs (`POST /policy/runs`, `GET /policy/runs`) with tenant scoping and RBAC enforcement for `policy:run`. | Endpoints documented; integration tests cover run creation/status; unauthorized access blocked. |
| SCHED-WEB-21-001 | DONE (2025-10-26) | Scheduler WebService Guild, Cartographer Guild | SCHED-WEB-16-101, SCHED-MODELS-21-001 | Expose graph build/overlay job APIs (`POST /graphs/build`, `GET /graphs/jobs`) with `graph:write`/`graph:read` enforcement and tenant scoping. | APIs documented in `docs/SCHED-WEB-21-001-GRAPH-APIS.md`; integration tests cover submission/status; unauthorized requests blocked; scope checks now reference `StellaOpsScopes`. |
| SCHED-WEB-21-002 | DONE (2025-10-26) | Scheduler WebService Guild | SCHED-WEB-21-001, CARTO-GRAPH-21-007 | Provide overlay lag metrics endpoint and webhook to notify Cartographer of job completions; include correlation IDs. | `POST /graphs/hooks/completed` + `GET /graphs/overlays/lag` documented in `docs/SCHED-WEB-21-001-GRAPH-APIS.md`; integration tests cover completion + metrics. |
| SCHED-WEB-21-003 | DONE (2025-10-26) | Scheduler WebService Guild, Authority Core Guild | AUTH-GRAPH-21-001 | Replace temporary `X-Scopes`/`X-Tenant-Id` headers with Authority-issued OpTok verification and scope enforcement for graph endpoints. | Authentication configured via `AddStellaOpsResourceServerAuthentication`; authority scopes enforced end-to-end with `StellaOpsScopes`; header fallback limited to dev mode; tests updated. |

View File

@@ -2,16 +2,11 @@
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCHED-WEB-16-101 | DONE (2025-10-27) | Scheduler WebService Guild | SCHED-MODELS-16-101 | Bootstrap Minimal API host with Authority OpTok + DPoP, health endpoints, plug-in discovery per architecture §§12. | Service boots with config validation; `/healthz`/`/readyz` pass; restart-only plug-ins enforced. |
| SCHED-WEB-16-102 | DONE (2025-10-26) | Scheduler WebService Guild | SCHED-WEB-16-101 | Implement schedules CRUD (tenant-scoped) with cron validation, pause/resume, audit logging. | CRUD operations tested; invalid cron inputs rejected; audit entries persisted. |
| SCHED-WEB-16-103 | DONE (2025-10-26) | Scheduler WebService Guild | SCHED-WEB-16-102 | Runs API (list/detail/cancel), ad-hoc run POST, and impact preview endpoints. | Integration tests cover run lifecycle; preview returns counts/sample; cancellation honoured. |
| SCHED-WEB-16-104 | DONE (2025-10-27) | Scheduler WebService Guild | SCHED-QUEUE-16-401, SCHED-STORAGE-16-201 | Webhook endpoints for Feeder/Vexer exports with mTLS/HMAC validation and rate limiting. | Webhooks validated via tests; invalid signatures rejected; rate limits documented. |
## Policy Engine v2 (Sprint 20)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCHED-WEB-20-001 | DONE (2025-10-29) | Scheduler WebService Guild, Policy Guild | SCHED-WEB-16-101, POLICY-ENGINE-20-000 | Expose policy run scheduling APIs (`POST /policy/runs`, `GET /policy/runs`) with tenant scoping and RBAC enforcement for `policy:run`. | Endpoints documented; integration tests cover run creation/status; unauthorized access blocked. |
> 2025-10-29: Added `/api/v1/scheduler/policy/runs` create/list/get endpoints with in-memory queue, scope/tenant enforcement, and contract docs (`docs/SCHED-WEB-20-001-POLICY-RUNS.md`). Tests cover happy path + auth failures.
> 2025-10-26: Use canonical request/response samples from `samples/api/scheduler/policy-*.json`; serializer contract defined in `src/Scheduler/__Libraries/StellaOps.Scheduler.Models/docs/SCHED-MODELS-20-001-POLICY-RUNS.md`.
| SCHED-WEB-20-002 | BLOCKED (waiting on SCHED-WORKER-20-301) | Scheduler WebService Guild | SCHED-WEB-20-001, SCHED-WORKER-20-301 | Provide simulation trigger endpoint returning diff preview metadata and job state for UI/CLI consumption. | Simulation endpoint returns deterministic diffs metadata; rate limits enforced; tests cover concurrency. |
@@ -21,10 +16,8 @@
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCHED-WEB-21-001 | DONE (2025-10-26) | Scheduler WebService Guild, Cartographer Guild | SCHED-WEB-16-101, SCHED-MODELS-21-001 | Expose graph build/overlay job APIs (`POST /graphs/build`, `GET /graphs/jobs`) with `graph:write`/`graph:read` enforcement and tenant scoping. | APIs documented in `docs/SCHED-WEB-21-001-GRAPH-APIS.md`; integration tests cover submission/status; unauthorized requests blocked; scope checks now reference `StellaOpsScopes`. |
| SCHED-WEB-21-002 | DONE (2025-10-26) | Scheduler WebService Guild | SCHED-WEB-21-001, CARTO-GRAPH-21-007 | Provide overlay lag metrics endpoint and webhook to notify Cartographer of job completions; include correlation IDs. | `POST /graphs/hooks/completed` + `GET /graphs/overlays/lag` documented in `docs/SCHED-WEB-21-001-GRAPH-APIS.md`; integration tests cover completion + metrics. |
| SCHED-WEB-21-003 | DONE (2025-10-26) | Scheduler WebService Guild, Authority Core Guild | AUTH-GRAPH-21-001 | Replace temporary `X-Scopes`/`X-Tenant-Id` headers with Authority-issued OpTok verification and scope enforcement for graph endpoints. | Authentication configured via `AddStellaOpsResourceServerAuthentication`; authority scopes enforced end-to-end with `StellaOpsScopes`; header fallback limited to dev mode; tests updated. |
| SCHED-WEB-21-004 | DOING (2025-10-26) | Scheduler WebService Guild, Scheduler Storage Guild | SCHED-WEB-21-001, SCHED-STORAGE-16-201 | Persist graph job lifecycle to Mongo storage and publish `scheduler.graph.job.completed@1` events + outbound webhook to Cartographer. | Storage repositories updated; events emitted; webhook payload documented; integration tests cover storage + event flow. **Note:** Events currently log JSON envelopes while the shared platform bus is provisioned. Cartographer webhook now posts JSON payloads when configured; replace inline logging with bus publisher once the shared event transport is online. |
> 2025-10-30: Implemented Redis-backed publisher (`Scheduler:Events:GraphJobs`) emitting `scheduler.graph.job.completed@1` to configured stream with optional logging fallback; docs/configs to be validated with DevOps before closing.
## StellaOps Console (Sprint 23)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |