Add unit tests for SBOM ingestion and transformation
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled

- Implement `SbomIngestServiceCollectionExtensionsTests` to verify the SBOM ingestion pipeline exports snapshots correctly.
- Create `SbomIngestTransformerTests` to ensure the transformation produces expected nodes and edges, including deduplication of license nodes and normalization of timestamps.
- Add `SbomSnapshotExporterTests` to test the export functionality for manifest, adjacency, nodes, and edges.
- Introduce `VexOverlayTransformerTests` to validate the transformation of VEX nodes and edges.
- Set up project file for the test project with necessary dependencies and configurations.
- Include JSON fixture files for testing purposes.
This commit is contained in:
master
2025-11-04 07:49:39 +02:00
parent f72c5c513a
commit 2eb6852d34
491 changed files with 39445 additions and 3917 deletions

View File

@@ -265,13 +265,16 @@ public sealed record PolicyRunStatus
int attempts = 0,
string? traceId = null,
string? explainUri = null,
ImmutableSortedDictionary<string, string>? metadata = null,
string? schemaVersion = null)
: this(
runId,
tenantId,
policyId,
policyVersion,
ImmutableSortedDictionary<string, string>? metadata = null,
bool cancellationRequested = false,
DateTimeOffset? cancellationRequestedAt = null,
string? cancellationReason = null,
string? schemaVersion = null)
: this(
runId,
tenantId,
policyId,
policyVersion,
mode,
status,
priority,
@@ -282,16 +285,19 @@ public sealed record PolicyRunStatus
inputs ?? PolicyRunInputs.Empty,
determinismHash,
Validation.TrimToNull(errorCode),
Validation.TrimToNull(error),
attempts,
Validation.TrimToNull(traceId),
Validation.TrimToNull(explainUri),
metadata ?? ImmutableSortedDictionary<string, string>.Empty,
schemaVersion)
{
}
[JsonConstructor]
Validation.TrimToNull(error),
attempts,
Validation.TrimToNull(traceId),
Validation.TrimToNull(explainUri),
metadata ?? ImmutableSortedDictionary<string, string>.Empty,
cancellationRequested,
cancellationRequestedAt,
cancellationReason,
schemaVersion)
{
}
[JsonConstructor]
public PolicyRunStatus(
string runId,
string tenantId,
@@ -307,12 +313,15 @@ public sealed record PolicyRunStatus
PolicyRunInputs inputs,
string? determinismHash,
string? errorCode,
string? error,
int attempts,
string? traceId,
string? explainUri,
ImmutableSortedDictionary<string, string> metadata,
string? schemaVersion = null)
string? error,
int attempts,
string? traceId,
string? explainUri,
ImmutableSortedDictionary<string, string> metadata,
bool cancellationRequested,
DateTimeOffset? cancellationRequestedAt,
string? cancellationReason,
string? schemaVersion = null)
{
SchemaVersion = SchedulerSchemaVersions.EnsurePolicyRunStatus(schemaVersion);
RunId = Validation.EnsureId(runId, nameof(runId));
@@ -339,16 +348,19 @@ public sealed record PolicyRunStatus
? throw new ArgumentOutOfRangeException(nameof(attempts), attempts, "Attempts must be non-negative.")
: attempts;
TraceId = Validation.TrimToNull(traceId);
ExplainUri = Validation.TrimToNull(explainUri);
Metadata = (metadata ?? ImmutableSortedDictionary<string, string>.Empty)
.Select(static pair => new KeyValuePair<string, string>(
Validation.TrimToNull(pair.Key)?.ToLowerInvariant() ?? string.Empty,
Validation.TrimToNull(pair.Value) ?? string.Empty))
.Where(static pair => !string.IsNullOrEmpty(pair.Key) && !string.IsNullOrEmpty(pair.Value))
.DistinctBy(static pair => pair.Key, StringComparer.Ordinal)
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToImmutableSortedDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal);
}
ExplainUri = Validation.TrimToNull(explainUri);
Metadata = (metadata ?? ImmutableSortedDictionary<string, string>.Empty)
.Select(static pair => new KeyValuePair<string, string>(
Validation.TrimToNull(pair.Key)?.ToLowerInvariant() ?? string.Empty,
Validation.TrimToNull(pair.Value) ?? string.Empty))
.Where(static pair => !string.IsNullOrEmpty(pair.Key) && !string.IsNullOrEmpty(pair.Value))
.DistinctBy(static pair => pair.Key, StringComparer.Ordinal)
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToImmutableSortedDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal);
CancellationRequested = cancellationRequested;
CancellationRequestedAt = Validation.NormalizeTimestamp(cancellationRequestedAt);
CancellationReason = Validation.TrimToNull(cancellationReason);
}
public string SchemaVersion { get; }
@@ -392,13 +404,22 @@ public sealed record PolicyRunStatus
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? ExplainUri { get; init; }
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public ImmutableSortedDictionary<string, string> Metadata { get; init; } = ImmutableSortedDictionary<string, string>.Empty;
public PolicyRunStats Stats { get; init; } = PolicyRunStats.Empty;
public PolicyRunInputs Inputs { get; init; } = PolicyRunInputs.Empty;
}
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public ImmutableSortedDictionary<string, string> Metadata { get; init; } = ImmutableSortedDictionary<string, string>.Empty;
public PolicyRunStats Stats { get; init; } = PolicyRunStats.Empty;
public PolicyRunInputs Inputs { get; init; } = PolicyRunInputs.Empty;
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public bool CancellationRequested { get; init; }
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public DateTimeOffset? CancellationRequestedAt { get; init; }
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? CancellationReason { get; init; }
}
/// <summary>
/// Aggregated metrics captured for a policy run.

View File

@@ -0,0 +1,62 @@
using System;
using System.Collections.Immutable;
namespace StellaOps.Scheduler.Models;
/// <summary>
/// Shared helper for translating persisted <see cref="PolicyRunJob"/> documents into
/// API-facing <see cref="PolicyRunStatus"/> projections.
/// </summary>
public static class PolicyRunStatusFactory
{
public static PolicyRunStatus Create(PolicyRunJob job, DateTimeOffset nowUtc)
{
ArgumentNullException.ThrowIfNull(job);
var status = MapExecutionStatus(job.Status);
var queuedAt = job.QueuedAt ?? job.CreatedAt;
var startedAt = job.SubmittedAt;
var finishedAt = job.CompletedAt ?? job.CancelledAt;
var metadata = job.Metadata ?? ImmutableSortedDictionary<string, string>.Empty;
var inputs = job.Inputs ?? PolicyRunInputs.Empty;
var policyVersion = job.PolicyVersion
?? throw new InvalidOperationException($"Policy run job '{job.Id}' is missing policyVersion.");
return new PolicyRunStatus(
job.RunId ?? job.Id,
job.TenantId,
job.PolicyId,
policyVersion,
job.Mode,
status,
job.Priority,
queuedAt,
job.Status == PolicyRunJobStatus.Pending ? null : startedAt,
finishedAt,
PolicyRunStats.Empty,
inputs,
determinismHash: null,
errorCode: null,
error: job.Status == PolicyRunJobStatus.Failed ? job.LastError : null,
attempts: job.AttemptCount,
traceId: null,
explainUri: null,
metadata,
cancellationRequested: job.CancellationRequested,
cancellationRequestedAt: job.CancellationRequestedAt,
cancellationReason: job.CancellationReason,
SchedulerSchemaVersions.PolicyRunStatus);
}
private static PolicyRunExecutionStatus MapExecutionStatus(PolicyRunJobStatus status)
=> status switch
{
PolicyRunJobStatus.Pending => PolicyRunExecutionStatus.Queued,
PolicyRunJobStatus.Dispatching => PolicyRunExecutionStatus.Running,
PolicyRunJobStatus.Submitted => PolicyRunExecutionStatus.Running,
PolicyRunJobStatus.Completed => PolicyRunExecutionStatus.Succeeded,
PolicyRunJobStatus.Failed => PolicyRunExecutionStatus.Failed,
PolicyRunJobStatus.Cancelled => PolicyRunExecutionStatus.Cancelled,
_ => PolicyRunExecutionStatus.Queued
};
}

View File

@@ -0,0 +1,65 @@
using System;
using System.Text.Json.Serialization;
namespace StellaOps.Scheduler.Models;
public sealed record PolicySimulationWebhookPayload(
[property: JsonPropertyName("tenantId")] string TenantId,
[property: JsonPropertyName("simulation")] PolicyRunStatus Simulation,
[property: JsonPropertyName("result")] string Result,
[property: JsonPropertyName("observedAt")] DateTimeOffset ObservedAt,
[property: JsonPropertyName("latencySeconds")] double? LatencySeconds,
[property: JsonPropertyName("reason")] string? Reason);
public static class PolicySimulationWebhookPayloadFactory
{
public static PolicySimulationWebhookPayload Create(PolicyRunStatus status, DateTimeOffset observedAt)
{
ArgumentNullException.ThrowIfNull(status);
var result = status.Status switch
{
PolicyRunExecutionStatus.Succeeded => "succeeded",
PolicyRunExecutionStatus.Failed => "failed",
PolicyRunExecutionStatus.Cancelled => "cancelled",
PolicyRunExecutionStatus.ReplayPending => "replay_pending",
PolicyRunExecutionStatus.Running => "running",
_ => "queued"
};
var latencySeconds = CalculateLatencySeconds(status, observedAt);
var reason = status.Status switch
{
PolicyRunExecutionStatus.Failed => status.Error,
PolicyRunExecutionStatus.Cancelled => status.CancellationReason,
_ => null
};
return new PolicySimulationWebhookPayload(
status.TenantId,
status,
result,
observedAt,
latencySeconds,
reason);
}
private static double? CalculateLatencySeconds(PolicyRunStatus status, DateTimeOffset observedAt)
{
var started = status.QueuedAt;
var finished = status.FinishedAt ?? observedAt;
if (started == default)
{
return null;
}
var duration = (finished - started).TotalSeconds;
if (duration < 0)
{
duration = 0;
}
return Math.Round(duration, 4);
}
}

View File

@@ -21,6 +21,7 @@ public sealed record Run
DateTimeOffset? finishedAt = null,
string? error = null,
IEnumerable<DeltaSummary>? deltas = null,
string? retryOf = null,
string? schemaVersion = null)
: this(
id,
@@ -35,6 +36,7 @@ public sealed record Run
Validation.NormalizeTimestamp(finishedAt),
Validation.TrimToNull(error),
NormalizeDeltas(deltas),
Validation.TrimToNull(retryOf),
schemaVersion)
{
}
@@ -53,6 +55,7 @@ public sealed record Run
DateTimeOffset? finishedAt,
string? error,
ImmutableArray<DeltaSummary> deltas,
string? retryOf,
string? schemaVersion = null)
{
Id = Validation.EnsureId(id, nameof(id));
@@ -69,6 +72,7 @@ public sealed record Run
Deltas = deltas.IsDefault
? ImmutableArray<DeltaSummary>.Empty
: deltas.OrderBy(static delta => delta.ImageDigest, StringComparer.Ordinal).ToImmutableArray();
RetryOf = Validation.TrimToNull(retryOf);
SchemaVersion = SchedulerSchemaVersions.EnsureRun(schemaVersion);
}
@@ -103,6 +107,9 @@ public sealed record Run
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public ImmutableArray<DeltaSummary> Deltas { get; } = ImmutableArray<DeltaSummary>.Empty;
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? RetryOf { get; }
private static ImmutableArray<DeltaSummary> NormalizeDeltas(IEnumerable<DeltaSummary>? deltas)
{
if (deltas is null)

View File

@@ -1,3 +1,4 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
@@ -5,7 +6,7 @@ using System.Linq;
namespace StellaOps.Scheduler.Queue;
internal static class SchedulerQueueMetrics
public static class SchedulerQueueMetrics
{
private const string TransportTagName = "transport";
private const string QueueTagName = "queue";
@@ -21,6 +22,25 @@ internal static class SchedulerQueueMetrics
"scheduler_queue_depth",
ObserveDepth);
public static IReadOnlyList<SchedulerQueueDepthSample> CaptureDepthSamples()
{
var snapshot = DepthSamples.ToArray();
if (snapshot.Length == 0)
{
return Array.Empty<SchedulerQueueDepthSample>();
}
var samples = new SchedulerQueueDepthSample[snapshot.Length];
for (var i = 0; i < snapshot.Length; i++)
{
var entry = snapshot[i];
samples[i] = new SchedulerQueueDepthSample(entry.Key.transport, entry.Key.queue, entry.Value);
}
Array.Sort(samples, SchedulerQueueDepthSampleComparer.Instance);
return Array.AsReadOnly(samples);
}
public static void RecordEnqueued(string transport, string queue)
=> EnqueuedCounter.Add(1, BuildTags(transport, queue));
@@ -45,6 +65,22 @@ internal static class SchedulerQueueMetrics
internal static IReadOnlyDictionary<(string transport, string queue), long> SnapshotDepths()
=> DepthSamples.ToDictionary(pair => pair.Key, pair => pair.Value);
private sealed class SchedulerQueueDepthSampleComparer : IComparer<SchedulerQueueDepthSample>
{
public static SchedulerQueueDepthSampleComparer Instance { get; } = new();
public int Compare(SchedulerQueueDepthSample x, SchedulerQueueDepthSample y)
{
var transport = string.Compare(x.Transport, y.Transport, StringComparison.Ordinal);
if (transport != 0)
{
return transport;
}
return string.Compare(x.Queue, y.Queue, StringComparison.Ordinal);
}
}
private static KeyValuePair<string, object?>[] BuildTags(string transport, string queue)
=> new[]
{
@@ -63,3 +99,5 @@ internal static class SchedulerQueueMetrics
}
}
}
public readonly record struct SchedulerQueueDepthSample(string Transport, string Queue, long Depth);

View File

@@ -15,6 +15,8 @@ public sealed class SchedulerMongoOptions
public string RunsCollection { get; set; } = "runs";
public string PolicyJobsCollection { get; set; } = "policy_jobs";
public string ImpactSnapshotsCollection { get; set; } = "impact_snapshots";
public string AuditCollection { get; set; } = "audit";

View File

@@ -36,13 +36,19 @@ public interface IPolicyRunJobRepository
PolicyRunMode? mode = null,
IReadOnlyCollection<PolicyRunJobStatus>? statuses = null,
DateTimeOffset? queuedAfter = null,
int limit = 50,
IClientSessionHandle? session = null,
CancellationToken cancellationToken = default);
Task<bool> ReplaceAsync(
PolicyRunJob job,
string? expectedLeaseOwner = null,
IClientSessionHandle? session = null,
CancellationToken cancellationToken = default);
}
int limit = 50,
IClientSessionHandle? session = null,
CancellationToken cancellationToken = default);
Task<bool> ReplaceAsync(
PolicyRunJob job,
string? expectedLeaseOwner = null,
IClientSessionHandle? session = null,
CancellationToken cancellationToken = default);
Task<long> CountAsync(
string tenantId,
PolicyRunMode mode,
IReadOnlyCollection<PolicyRunJobStatus> statuses,
CancellationToken cancellationToken = default);
}

View File

@@ -1,11 +1,12 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
using MongoDB.Driver;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Storage.Mongo.Internal;
using StellaOps.Scheduler.Storage.Mongo.Serialization;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using MongoDB.Bson;
using MongoDB.Driver;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Storage.Mongo.Internal;
using StellaOps.Scheduler.Storage.Mongo.Serialization;
namespace StellaOps.Scheduler.Storage.Mongo.Repositories;
@@ -206,16 +207,43 @@ internal sealed class PolicyRunJobRepository : IPolicyRunJobRepository
.ToListAsync(cancellationToken)
.ConfigureAwait(false);
return documents
.Select(PolicyRunJobDocumentMapper.FromBsonDocument)
.ToList();
}
public async Task<bool> ReplaceAsync(
PolicyRunJob job,
string? expectedLeaseOwner = null,
IClientSessionHandle? session = null,
CancellationToken cancellationToken = default)
return documents
.Select(PolicyRunJobDocumentMapper.FromBsonDocument)
.ToList();
}
public async Task<long> CountAsync(
string tenantId,
PolicyRunMode mode,
IReadOnlyCollection<PolicyRunJobStatus> statuses,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(tenantId))
{
throw new ArgumentException("Tenant id must be provided.", nameof(tenantId));
}
var filters = new List<FilterDefinition<BsonDocument>>
{
Filter.Eq("tenantId", tenantId),
Filter.Eq("mode", mode.ToString().ToLowerInvariant())
};
if (statuses is { Count: > 0 })
{
var array = new BsonArray(statuses.Select(static status => status.ToString().ToLowerInvariant()));
filters.Add(Filter.In("status", array));
}
var filter = Filter.And(filters);
return await _collection.CountDocumentsAsync(filter, cancellationToken: cancellationToken).ConfigureAwait(false);
}
public async Task<bool> ReplaceAsync(
PolicyRunJob job,
string? expectedLeaseOwner = null,
IClientSessionHandle? session = null,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(job);

View File

@@ -0,0 +1,47 @@
using System;
namespace StellaOps.Scheduler.Storage.Mongo.Repositories;
/// <summary>
/// Cursor describing the position of a run in deterministic ordering.
/// </summary>
public sealed record RunListCursor
{
public RunListCursor(DateTimeOffset createdAt, string runId)
{
CreatedAt = NormalizeTimestamp(createdAt);
RunId = NormalizeRunId(runId);
}
/// <summary>
/// Timestamp of the last run observed (UTC).
/// </summary>
public DateTimeOffset CreatedAt { get; }
/// <summary>
/// Identifier of the last run observed.
/// </summary>
public string RunId { get; }
private static DateTimeOffset NormalizeTimestamp(DateTimeOffset value)
{
var utc = value.ToUniversalTime();
return new DateTimeOffset(DateTime.SpecifyKind(utc.DateTime, DateTimeKind.Utc));
}
private static string NormalizeRunId(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
throw new ArgumentException("Run id must be provided.", nameof(value));
}
var trimmed = value.Trim();
if (trimmed.Length > 256)
{
throw new ArgumentException("Run id exceeds 256 characters.", nameof(value));
}
return trimmed;
}
}

View File

@@ -19,16 +19,21 @@ public sealed class RunQueryOptions
public ImmutableArray<RunState> States { get; init; } = ImmutableArray<RunState>.Empty;
/// <summary>
/// Optional lower bound for creation timestamp (UTC).
/// </summary>
public DateTimeOffset? CreatedAfter { get; init; }
/// <summary>
/// Maximum number of runs to return (default 50 when unspecified).
/// </summary>
public int? Limit { get; init; }
/// <summary>
/// Optional lower bound for creation timestamp (UTC).
/// </summary>
public DateTimeOffset? CreatedAfter { get; init; }
/// <summary>
/// Optional cursor to resume iteration using deterministic ordering.
/// </summary>
public RunListCursor? Cursor { get; init; }
/// <summary>
/// Maximum number of runs to return (default 50 when unspecified).
/// </summary>
public int? Limit { get; init; }
/// <summary>
/// Sort order flag. Defaults to descending by createdAt.
/// </summary>
public bool SortAscending { get; init; }

View File

@@ -127,28 +127,53 @@ internal sealed class RunRepository : IRunRepository
filters.Add(Filter.In("state", options.States.Select(state => state.ToString().ToLowerInvariant())));
}
if (options.CreatedAfter is { } createdAfter)
{
filters.Add(Filter.Gt("createdAt", createdAfter.ToUniversalTime().UtcDateTime));
}
if (options.CreatedAfter is { } createdAfter)
{
filters.Add(Filter.Gt("createdAt", createdAfter.ToUniversalTime().UtcDateTime));
}
if (options.Cursor is { } cursor)
{
var createdAtUtc = cursor.CreatedAt.ToUniversalTime().UtcDateTime;
FilterDefinition<BsonDocument> cursorFilter;
if (options.SortAscending)
{
cursorFilter = Filter.Or(
Filter.Gt("createdAt", createdAtUtc),
Filter.And(
Filter.Eq("createdAt", createdAtUtc),
Filter.Gt("_id", cursor.RunId)));
}
else
{
cursorFilter = Filter.Or(
Filter.Lt("createdAt", createdAtUtc),
Filter.And(
Filter.Eq("createdAt", createdAtUtc),
Filter.Lt("_id", cursor.RunId)));
}
filters.Add(cursorFilter);
}
var combined = Filter.And(filters);
var find = session is null
? _collection.Find(combined)
: _collection.Find(session, combined);
var combined = Filter.And(filters);
var find = session is null
? _collection.Find(combined)
: _collection.Find(session, combined);
var limit = options.Limit is { } specified && specified > 0 ? specified : DefaultListLimit;
find = find.Limit(limit);
var sortDefinition = options.SortAscending
? Sort.Ascending("createdAt")
: Sort.Descending("createdAt");
find = find.Sort(sortDefinition);
var documents = await find.ToListAsync(cancellationToken).ConfigureAwait(false);
return documents.Select(RunDocumentMapper.FromBsonDocument).ToArray();
var limit = options.Limit is { } specified && specified > 0 ? specified : DefaultListLimit;
find = find.Limit(limit);
var sortDefinition = options.SortAscending
? Sort.Combine(Sort.Ascending("createdAt"), Sort.Ascending("_id"))
: Sort.Combine(Sort.Descending("createdAt"), Sort.Descending("_id"));
find = find.Sort(sortDefinition);
var documents = await find.ToListAsync(cancellationToken).ConfigureAwait(false);
return documents.Select(RunDocumentMapper.FromBsonDocument).ToArray();
}
public async Task<IReadOnlyList<Run>> ListByStateAsync(

View File

@@ -57,8 +57,9 @@ public static class SchedulerWorkerServiceCollectionExtensions
loggerFactory.CreateLogger<SchedulerEventPublisher>());
});
services.AddHttpClient<IScannerReportClient, HttpScannerReportClient>();
services.AddHttpClient<IPolicyRunClient, HttpPolicyRunClient>();
services.AddHttpClient<IScannerReportClient, HttpScannerReportClient>();
services.AddHttpClient<IPolicyRunClient, HttpPolicyRunClient>();
services.AddHttpClient<IPolicySimulationWebhookClient, HttpPolicySimulationWebhookClient>();
services.AddHttpClient<ICartographerBuildClient, HttpCartographerBuildClient>((sp, client) =>
{
var options = sp.GetRequiredService<IOptions<SchedulerWorkerOptions>>().Value.Graph;

View File

@@ -4,10 +4,11 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Storage.Mongo.Repositories;
using StellaOps.Scheduler.Worker.Graph.Cartographer;
using StellaOps.Scheduler.Worker.Graph.Scheduler;
using StellaOps.Scheduler.Worker.Options;
using StellaOps.Scheduler.Storage.Mongo.Repositories;
using StellaOps.Scheduler.Worker.Graph.Cartographer;
using StellaOps.Scheduler.Worker.Graph.Scheduler;
using StellaOps.Scheduler.Worker.Options;
using StellaOps.Scheduler.Worker.Observability;
namespace StellaOps.Scheduler.Worker.Graph;

View File

@@ -4,10 +4,11 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Storage.Mongo.Repositories;
using StellaOps.Scheduler.Worker.Graph.Cartographer;
using StellaOps.Scheduler.Worker.Graph.Scheduler;
using StellaOps.Scheduler.Worker.Options;
using StellaOps.Scheduler.Storage.Mongo.Repositories;
using StellaOps.Scheduler.Worker.Graph.Cartographer;
using StellaOps.Scheduler.Worker.Graph.Scheduler;
using StellaOps.Scheduler.Worker.Options;
using StellaOps.Scheduler.Worker.Observability;
namespace StellaOps.Scheduler.Worker.Graph;

View File

@@ -1,236 +1,245 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Worker.Observability;
public sealed class SchedulerWorkerMetrics : IDisposable
{
public const string MeterName = "StellaOps.Scheduler.Worker";
private readonly Meter _meter;
private readonly Counter<long> _plannerRunsTotal;
private readonly Histogram<double> _plannerLatencySeconds;
private readonly Counter<long> _runnerSegmentsTotal;
private readonly Counter<long> _runnerImagesTotal;
private readonly Counter<long> _runnerDeltaCriticalTotal;
private readonly Counter<long> _runnerDeltaHighTotal;
private readonly Counter<long> _runnerDeltaFindingsTotal;
private readonly Counter<long> _runnerKevHitsTotal;
private readonly Histogram<double> _runDurationSeconds;
private readonly UpDownCounter<long> _runsActive;
private readonly Counter<long> _graphJobsTotal;
private readonly Histogram<double> _graphJobDurationSeconds;
private readonly ConcurrentDictionary<string, long> _backlog = new(StringComparer.Ordinal);
private readonly ObservableGauge<long> _backlogGauge;
private bool _disposed;
public SchedulerWorkerMetrics()
{
_meter = new Meter(MeterName);
_plannerRunsTotal = _meter.CreateCounter<long>(
"scheduler_planner_runs_total",
unit: "count",
description: "Planner runs grouped by status and mode.");
_plannerLatencySeconds = _meter.CreateHistogram<double>(
"scheduler_planner_latency_seconds",
unit: "s",
description: "Latency between run creation and planner processing grouped by mode and status.");
_runnerSegmentsTotal = _meter.CreateCounter<long>(
"scheduler_runner_segments_total",
unit: "count",
description: "Runner segments processed grouped by status and mode.");
_runnerImagesTotal = _meter.CreateCounter<long>(
"scheduler_runner_images_total",
unit: "count",
description: "Images processed by runner grouped by mode and delta outcome.");
_runnerDeltaCriticalTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_critical_total",
unit: "count",
description: "Critical findings observed by runner grouped by mode.");
_runnerDeltaHighTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_high_total",
unit: "count",
description: "High findings observed by runner grouped by mode.");
_runnerDeltaFindingsTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_total",
unit: "count",
description: "Total findings observed by runner grouped by mode.");
_runnerKevHitsTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_kev_total",
unit: "count",
description: "KEV hits observed by runner grouped by mode.");
_runDurationSeconds = _meter.CreateHistogram<double>(
"scheduler_run_duration_seconds",
unit: "s",
description: "End-to-end run durations grouped by mode and result.");
_runsActive = _meter.CreateUpDownCounter<long>(
"scheduler_runs_active",
unit: "count",
description: "Active scheduler runs grouped by mode.");
_graphJobsTotal = _meter.CreateCounter<long>(
"scheduler_graph_jobs_total",
unit: "count",
description: "Graph jobs processed by the worker grouped by type and result.");
_graphJobDurationSeconds = _meter.CreateHistogram<double>(
"scheduler_graph_job_duration_seconds",
unit: "s",
description: "Graph job durations grouped by type and result.");
_backlogGauge = _meter.CreateObservableGauge<long>(
"scheduler_runner_backlog",
ObserveBacklog,
unit: "images",
description: "Remaining images queued for runner processing grouped by mode and schedule.");
}
public void RecordGraphJobResult(string type, string result, TimeSpan? duration = null)
{
var tags = new[]
{
new KeyValuePair<string, object?>("type", type),
new KeyValuePair<string, object?>("result", result)
};
_graphJobsTotal.Add(1, tags);
if (duration is { } jobDuration)
{
_graphJobDurationSeconds.Record(Math.Max(jobDuration.TotalSeconds, 0d), tags);
}
}
public void RecordPlannerResult(string mode, string status, TimeSpan latency, int imageCount)
{
var tags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("status", status)
};
_plannerRunsTotal.Add(1, tags);
_plannerLatencySeconds.Record(Math.Max(latency.TotalSeconds, 0d), tags);
if (status.Equals("enqueued", StringComparison.OrdinalIgnoreCase) && imageCount > 0)
{
_runsActive.Add(1, new[] { new KeyValuePair<string, object?>("mode", mode) });
}
}
public void RecordRunnerSegment(string mode, string status, int processedImages, int deltaImages)
{
var tags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("status", status)
};
_runnerSegmentsTotal.Add(1, tags);
var imageTags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("delta", deltaImages > 0 ? "true" : "false")
};
_runnerImagesTotal.Add(processedImages, imageTags);
}
public void RecordDeltaSummaries(string mode, IReadOnlyList<DeltaSummary> deltas)
{
if (deltas.Count == 0)
{
return;
}
var tags = new[] { new KeyValuePair<string, object?>("mode", mode) };
foreach (var delta in deltas)
{
if (delta.NewCriticals > 0)
{
_runnerDeltaCriticalTotal.Add(delta.NewCriticals, tags);
}
if (delta.NewHigh > 0)
{
_runnerDeltaHighTotal.Add(delta.NewHigh, tags);
}
if (delta.NewFindings > 0)
{
_runnerDeltaFindingsTotal.Add(delta.NewFindings, tags);
}
if (!delta.KevHits.IsDefaultOrEmpty)
{
_runnerKevHitsTotal.Add(delta.KevHits.Length, tags);
}
}
}
public void RecordRunCompletion(string mode, string result, TimeSpan? duration, bool decrementActive = true)
{
var tags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("result", result)
};
if (duration is { } runDuration)
{
_runDurationSeconds.Record(Math.Max(runDuration.TotalSeconds, 0d), tags);
}
if (decrementActive)
{
_runsActive.Add(-1, new[] { new KeyValuePair<string, object?>("mode", mode) });
}
}
public void UpdateBacklog(string mode, string? scheduleId, long backlog)
{
var key = BuildBacklogKey(mode, scheduleId);
if (backlog <= 0)
{
_backlog.TryRemove(key, out _);
}
else
{
_backlog[key] = backlog;
}
}
private IEnumerable<Measurement<long>> ObserveBacklog()
{
foreach (var entry in _backlog)
{
var (mode, scheduleId) = SplitBacklogKey(entry.Key);
yield return new Measurement<long>(
entry.Value,
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("scheduleId", scheduleId ?? string.Empty));
}
}
private static string BuildBacklogKey(string mode, string? scheduleId)
=> $"{mode}|{scheduleId ?? string.Empty}";
private static (string Mode, string? ScheduleId) SplitBacklogKey(string key)
{
var parts = key.Split('|', 2);
return parts.Length == 2
? (parts[0], string.IsNullOrEmpty(parts[1]) ? null : parts[1])
: (key, null);
}
public void Dispose()
{
if (_disposed)
{
return;
}
_meter.Dispose();
_disposed = true;
}
}
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Worker.Observability;
public sealed class SchedulerWorkerMetrics : IDisposable
{
public const string MeterName = "StellaOps.Scheduler.Worker";
private readonly Meter _meter;
private readonly Counter<long> _plannerRunsTotal;
private readonly Histogram<double> _plannerLatencySeconds;
private readonly Counter<long> _runnerSegmentsTotal;
private readonly Counter<long> _runnerImagesTotal;
private readonly Counter<long> _runnerDeltaCriticalTotal;
private readonly Counter<long> _runnerDeltaHighTotal;
private readonly Counter<long> _runnerDeltaFindingsTotal;
private readonly Counter<long> _runnerKevHitsTotal;
private readonly Histogram<double> _runDurationSeconds;
private readonly UpDownCounter<long> _runsActive;
private readonly Counter<long> _graphJobsTotal;
private readonly Histogram<double> _graphJobDurationSeconds;
private readonly ConcurrentDictionary<string, long> _backlog = new(StringComparer.Ordinal);
private readonly ObservableGauge<long> _backlogGauge;
private bool _disposed;
public SchedulerWorkerMetrics()
{
_meter = new Meter(MeterName);
_plannerRunsTotal = _meter.CreateCounter<long>(
"scheduler_planner_runs_total",
unit: "count",
description: "Planner runs grouped by status and mode.");
_plannerLatencySeconds = _meter.CreateHistogram<double>(
"scheduler_planner_latency_seconds",
unit: "s",
description: "Latency between run creation and planner processing grouped by mode and status.");
_runnerSegmentsTotal = _meter.CreateCounter<long>(
"scheduler_runner_segments_total",
unit: "count",
description: "Runner segments processed grouped by status and mode.");
_runnerImagesTotal = _meter.CreateCounter<long>(
"scheduler_runner_images_total",
unit: "count",
description: "Images processed by runner grouped by mode and delta outcome.");
_runnerDeltaCriticalTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_critical_total",
unit: "count",
description: "Critical findings observed by runner grouped by mode.");
_runnerDeltaHighTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_high_total",
unit: "count",
description: "High findings observed by runner grouped by mode.");
_runnerDeltaFindingsTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_total",
unit: "count",
description: "Total findings observed by runner grouped by mode.");
_runnerKevHitsTotal = _meter.CreateCounter<long>(
"scheduler_runner_delta_kev_total",
unit: "count",
description: "KEV hits observed by runner grouped by mode.");
_runDurationSeconds = _meter.CreateHistogram<double>(
"scheduler_run_duration_seconds",
unit: "s",
description: "End-to-end run durations grouped by mode and result.");
_runsActive = _meter.CreateUpDownCounter<long>(
"scheduler_runs_active",
unit: "count",
description: "Active scheduler runs grouped by mode.");
_graphJobsTotal = _meter.CreateCounter<long>(
"scheduler_graph_jobs_total",
unit: "count",
description: "Graph jobs processed by the worker grouped by type and result.");
_graphJobDurationSeconds = _meter.CreateHistogram<double>(
"scheduler_graph_job_duration_seconds",
unit: "s",
description: "Graph job durations grouped by type and result.");
_backlogGauge = _meter.CreateObservableGauge<long>(
"scheduler_runner_backlog",
ObserveBacklog,
unit: "images",
description: "Remaining images queued for runner processing grouped by mode and schedule.");
}
public void RecordGraphJobResult(string type, string result, TimeSpan? duration = null)
{
var tags = new[]
{
new KeyValuePair<string, object?>("type", type),
new KeyValuePair<string, object?>("result", result)
};
_graphJobsTotal.Add(1, tags);
if (duration is { } jobDuration)
{
_graphJobDurationSeconds.Record(Math.Max(jobDuration.TotalSeconds, 0d), tags);
}
}
public void RecordPlannerResult(string mode, string status, TimeSpan latency, int imageCount)
{
var tags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("status", status)
};
_plannerRunsTotal.Add(1, tags);
_plannerLatencySeconds.Record(Math.Max(latency.TotalSeconds, 0d), tags);
if (status.Equals("enqueued", StringComparison.OrdinalIgnoreCase) && imageCount > 0)
{
_runsActive.Add(1, new[] { new KeyValuePair<string, object?>("mode", mode) });
}
}
public void RecordRunnerSegment(string mode, string status, int processedImages, int deltaImages)
{
var tags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("status", status)
};
_runnerSegmentsTotal.Add(1, tags);
var imageTags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("delta", deltaImages > 0 ? "true" : "false")
};
_runnerImagesTotal.Add(processedImages, imageTags);
}
public void RecordDeltaSummaries(string mode, IReadOnlyList<DeltaSummary> deltas)
{
if (deltas.Count == 0)
{
return;
}
var tags = new[] { new KeyValuePair<string, object?>("mode", mode) };
foreach (var delta in deltas)
{
if (delta.NewCriticals > 0)
{
_runnerDeltaCriticalTotal.Add(delta.NewCriticals, tags);
}
if (delta.NewHigh > 0)
{
_runnerDeltaHighTotal.Add(delta.NewHigh, tags);
}
if (delta.NewFindings > 0)
{
_runnerDeltaFindingsTotal.Add(delta.NewFindings, tags);
}
if (!delta.KevHits.IsDefaultOrEmpty)
{
_runnerKevHitsTotal.Add(delta.KevHits.Length, tags);
}
}
}
public void RecordPolicyRunEvent(string tenantId, string policyId, PolicyRunMode mode, string status, TimeSpan? latency = null, string? reason = null)
{
var modeTag = mode.ToString().ToLowerInvariant();
var decrementActive = !string.Equals(status, "submitted", StringComparison.OrdinalIgnoreCase)
&& !string.Equals(status, "retry", StringComparison.OrdinalIgnoreCase);
RecordRunCompletion(modeTag, status, latency, decrementActive);
}
public void RecordRunCompletion(string mode, string result, TimeSpan? duration, bool decrementActive = true)
{
var tags = new[]
{
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("result", result)
};
if (duration is { } runDuration)
{
_runDurationSeconds.Record(Math.Max(runDuration.TotalSeconds, 0d), tags);
}
if (decrementActive)
{
_runsActive.Add(-1, new[] { new KeyValuePair<string, object?>("mode", mode) });
}
}
public void UpdateBacklog(string mode, string? scheduleId, long backlog)
{
var key = BuildBacklogKey(mode, scheduleId);
if (backlog <= 0)
{
_backlog.TryRemove(key, out _);
}
else
{
_backlog[key] = backlog;
}
}
private IEnumerable<Measurement<long>> ObserveBacklog()
{
foreach (var entry in _backlog)
{
var (mode, scheduleId) = SplitBacklogKey(entry.Key);
yield return new Measurement<long>(
entry.Value,
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("scheduleId", scheduleId ?? string.Empty));
}
}
private static string BuildBacklogKey(string mode, string? scheduleId)
=> $"{mode}|{scheduleId ?? string.Empty}";
private static (string Mode, string? ScheduleId) SplitBacklogKey(string key)
{
var parts = key.Split('|', 2);
return parts.Length == 2
? (parts[0], string.IsNullOrEmpty(parts[1]) ? null : parts[1])
: (key, null);
}
public void Dispose()
{
if (_disposed)
{
return;
}
_meter.Dispose();
_disposed = true;
}
}

View File

@@ -280,18 +280,21 @@ public sealed class SchedulerWorkerOptions
/// </summary>
public bool Enabled { get; set; } = true;
public DispatchOptions Dispatch { get; set; } = new();
public ApiOptions Api { get; set; } = new();
public TargetingOptions Targeting { get; set; } = new();
public void Validate()
{
Dispatch.Validate();
Api.Validate();
Targeting.Validate();
}
public DispatchOptions Dispatch { get; set; } = new();
public ApiOptions Api { get; set; } = new();
public TargetingOptions Targeting { get; set; } = new();
public WebhookOptions Webhook { get; set; } = new();
public void Validate()
{
Dispatch.Validate();
Api.Validate();
Targeting.Validate();
Webhook.Validate();
}
public sealed class DispatchOptions
{
@@ -430,11 +433,11 @@ public sealed class SchedulerWorkerOptions
}
}
public sealed class TargetingOptions
{
/// <summary>
/// When disabled the worker skips policy delta targeting.
/// </summary>
public sealed class TargetingOptions
{
/// <summary>
/// When disabled the worker skips policy delta targeting.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
@@ -454,8 +457,59 @@ public sealed class SchedulerWorkerOptions
throw new InvalidOperationException("Policy targeting MaxSboms must be greater than zero.");
}
}
}
}
}
public sealed class WebhookOptions
{
/// <summary>
/// Controls whether webhook callbacks are emitted when simulations complete.
/// </summary>
public bool Enabled { get; set; }
/// <summary>
/// Absolute endpoint to invoke for webhook callbacks.
/// </summary>
public string? Endpoint { get; set; }
/// <summary>
/// Optional header to carry an API key.
/// </summary>
public string? ApiKeyHeader { get; set; }
/// <summary>
/// Optional API key value aligned with <see cref="ApiKeyHeader"/>.
/// </summary>
public string? ApiKey { get; set; }
/// <summary>
/// Request timeout in seconds.
/// </summary>
public int TimeoutSeconds { get; set; } = 10;
public void Validate()
{
if (!Enabled)
{
return;
}
if (string.IsNullOrWhiteSpace(Endpoint))
{
throw new InvalidOperationException("Policy webhook endpoint must be configured when enabled.");
}
if (!Uri.TryCreate(Endpoint, UriKind.Absolute, out _))
{
throw new InvalidOperationException("Policy webhook endpoint must be an absolute URI.");
}
if (TimeoutSeconds <= 0)
{
throw new InvalidOperationException("Policy webhook timeout must be greater than zero.");
}
}
}
}
public sealed class GraphOptions
{

View File

@@ -13,30 +13,33 @@ namespace StellaOps.Scheduler.Worker.Policy;
internal sealed class PolicyRunExecutionService
{
private readonly IPolicyRunJobRepository _repository;
private readonly IPolicyRunClient _client;
private readonly IOptions<SchedulerWorkerOptions> _options;
private readonly TimeProvider _timeProvider;
private readonly SchedulerWorkerMetrics _metrics;
private readonly IPolicyRunTargetingService _targetingService;
private readonly ILogger<PolicyRunExecutionService> _logger;
public PolicyRunExecutionService(
IPolicyRunJobRepository repository,
IPolicyRunClient client,
IOptions<SchedulerWorkerOptions> options,
TimeProvider? timeProvider,
SchedulerWorkerMetrics metrics,
IPolicyRunTargetingService targetingService,
ILogger<PolicyRunExecutionService> logger)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_client = client ?? throw new ArgumentNullException(nameof(client));
_options = options ?? throw new ArgumentNullException(nameof(options));
_timeProvider = timeProvider ?? TimeProvider.System;
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
_targetingService = targetingService ?? throw new ArgumentNullException(nameof(targetingService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
private readonly IPolicyRunClient _client;
private readonly IOptions<SchedulerWorkerOptions> _options;
private readonly TimeProvider _timeProvider;
private readonly SchedulerWorkerMetrics _metrics;
private readonly IPolicyRunTargetingService _targetingService;
private readonly IPolicySimulationWebhookClient _webhookClient;
private readonly ILogger<PolicyRunExecutionService> _logger;
public PolicyRunExecutionService(
IPolicyRunJobRepository repository,
IPolicyRunClient client,
IOptions<SchedulerWorkerOptions> options,
TimeProvider? timeProvider,
SchedulerWorkerMetrics metrics,
IPolicyRunTargetingService targetingService,
IPolicySimulationWebhookClient webhookClient,
ILogger<PolicyRunExecutionService> logger)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_client = client ?? throw new ArgumentNullException(nameof(client));
_options = options ?? throw new ArgumentNullException(nameof(options));
_timeProvider = timeProvider ?? TimeProvider.System;
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
_targetingService = targetingService ?? throw new ArgumentNullException(nameof(targetingService));
_webhookClient = webhookClient ?? throw new ArgumentNullException(nameof(webhookClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<PolicyRunExecutionResult> ExecuteAsync(PolicyRunJob job, CancellationToken cancellationToken)
{
@@ -62,20 +65,24 @@ internal sealed class PolicyRunExecutionService
_logger.LogWarning("Failed to update cancelled policy run job {JobId}.", job.Id);
}
_metrics.RecordPolicyRunEvent(
cancelled.TenantId,
cancelled.PolicyId,
cancelled.Mode,
"cancelled",
reason: cancelled.CancellationReason);
_logger.LogInformation(
"Policy run job {JobId} cancelled (tenant={TenantId}, policy={PolicyId}, runId={RunId}).",
cancelled.Id,
cancelled.TenantId,
cancelled.PolicyId,
cancelled.RunId ?? "(pending)");
return PolicyRunExecutionResult.Cancelled(cancelled);
_metrics.RecordPolicyRunEvent(
cancelled.TenantId,
cancelled.PolicyId,
cancelled.Mode,
"cancelled",
reason: cancelled.CancellationReason);
_logger.LogInformation(
"Policy run job {JobId} cancelled (tenant={TenantId}, policy={PolicyId}, runId={RunId}).",
cancelled.Id,
cancelled.TenantId,
cancelled.PolicyId,
cancelled.RunId ?? "(pending)");
var cancelledStatus = PolicyRunStatusFactory.Create(cancelled, cancelledAt);
var cancelledPayload = PolicySimulationWebhookPayloadFactory.Create(cancelledStatus, cancelledAt);
await _webhookClient.NotifyAsync(cancelledPayload, cancellationToken).ConfigureAwait(false);
return PolicyRunExecutionResult.Cancelled(cancelled);
}
var targeting = await _targetingService
@@ -108,19 +115,23 @@ internal sealed class PolicyRunExecutionService
}
var latency = CalculateLatency(job, completionTime);
_metrics.RecordPolicyRunEvent(
completed.TenantId,
completed.PolicyId,
completed.Mode,
"no_work",
latency,
targeting.Reason);
_logger.LogInformation(
"Policy run job {JobId} completed without submission (reason={Reason}).",
completed.Id,
targeting.Reason ?? "none");
return PolicyRunExecutionResult.NoOp(completed, targeting.Reason);
_metrics.RecordPolicyRunEvent(
completed.TenantId,
completed.PolicyId,
completed.Mode,
"no_work",
latency,
targeting.Reason);
_logger.LogInformation(
"Policy run job {JobId} completed without submission (reason={Reason}).",
completed.Id,
targeting.Reason ?? "none");
var completedStatus = PolicyRunStatusFactory.Create(completed, completionTime);
var completedPayload = PolicySimulationWebhookPayloadFactory.Create(completedStatus, completionTime);
await _webhookClient.NotifyAsync(completedPayload, cancellationToken).ConfigureAwait(false);
return PolicyRunExecutionResult.NoOp(completed, targeting.Reason);
}
job = targeting.Job;
@@ -200,24 +211,28 @@ internal sealed class PolicyRunExecutionService
if (nextStatus == PolicyRunJobStatus.Failed)
{
_metrics.RecordPolicyRunEvent(
failedJob.TenantId,
failedJob.PolicyId,
failedJob.Mode,
"failed",
latencyForFailure,
reason);
_logger.LogError(
"Policy run job {JobId} failed after {Attempts} attempts (tenant={TenantId}, policy={PolicyId}, runId={RunId}). Error: {Error}",
failedJob.Id,
attemptCount,
failedJob.TenantId,
failedJob.PolicyId,
failedJob.RunId ?? "(pending)",
submission.Error ?? "unknown");
return PolicyRunExecutionResult.Failed(failedJob, submission.Error);
_metrics.RecordPolicyRunEvent(
failedJob.TenantId,
failedJob.PolicyId,
failedJob.Mode,
"failed",
latencyForFailure,
reason);
_logger.LogError(
"Policy run job {JobId} failed after {Attempts} attempts (tenant={TenantId}, policy={PolicyId}, runId={RunId}). Error: {Error}",
failedJob.Id,
attemptCount,
failedJob.TenantId,
failedJob.PolicyId,
failedJob.RunId ?? "(pending)",
submission.Error ?? "unknown");
var failedStatus = PolicyRunStatusFactory.Create(failedJob, now);
var failedPayload = PolicySimulationWebhookPayloadFactory.Create(failedStatus, now);
await _webhookClient.NotifyAsync(failedPayload, cancellationToken).ConfigureAwait(false);
return PolicyRunExecutionResult.Failed(failedJob, submission.Error);
}
_metrics.RecordPolicyRunEvent(

View File

@@ -0,0 +1,104 @@
using System;
using System.Net.Http;
using System.Net.Mime;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Worker.Options;
namespace StellaOps.Scheduler.Worker.Policy;
internal interface IPolicySimulationWebhookClient
{
Task NotifyAsync(PolicySimulationWebhookPayload payload, CancellationToken cancellationToken);
}
internal sealed class HttpPolicySimulationWebhookClient : IPolicySimulationWebhookClient
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
private readonly HttpClient _httpClient;
private readonly IOptionsMonitor<SchedulerWorkerOptions> _options;
private readonly ILogger<HttpPolicySimulationWebhookClient> _logger;
public HttpPolicySimulationWebhookClient(
HttpClient httpClient,
IOptionsMonitor<SchedulerWorkerOptions> options,
ILogger<HttpPolicySimulationWebhookClient> logger)
{
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task NotifyAsync(PolicySimulationWebhookPayload payload, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(payload);
var snapshot = _options.CurrentValue.Policy.Webhook;
if (!snapshot.Enabled)
{
_logger.LogDebug("Policy simulation webhook disabled; skip run {RunId}.", payload.Simulation.RunId);
return;
}
if (string.IsNullOrWhiteSpace(snapshot.Endpoint))
{
_logger.LogWarning("Policy simulation webhook endpoint missing; run {RunId} not dispatched.", payload.Simulation.RunId);
return;
}
if (!Uri.TryCreate(snapshot.Endpoint, UriKind.Absolute, out var endpoint))
{
_logger.LogError("Policy simulation webhook endpoint '{Endpoint}' invalid.", snapshot.Endpoint);
return;
}
var timeout = snapshot.TimeoutSeconds <= 0 ? TimeSpan.FromSeconds(10) : TimeSpan.FromSeconds(snapshot.TimeoutSeconds);
_httpClient.Timeout = timeout;
using var request = new HttpRequestMessage(HttpMethod.Post, endpoint)
{
Content = new StringContent(JsonSerializer.Serialize(payload, SerializerOptions), Encoding.UTF8, MediaTypeNames.Application.Json)
};
request.Headers.TryAddWithoutValidation("X-StellaOps-Tenant", payload.TenantId);
if (!string.IsNullOrWhiteSpace(payload.Simulation.RunId))
{
request.Headers.TryAddWithoutValidation("X-StellaOps-Run-Id", payload.Simulation.RunId);
}
if (!string.IsNullOrWhiteSpace(snapshot.ApiKey) && !string.IsNullOrWhiteSpace(snapshot.ApiKeyHeader))
{
request.Headers.TryAddWithoutValidation(snapshot.ApiKeyHeader!, snapshot.ApiKey);
}
try
{
using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
if (!response.IsSuccessStatusCode)
{
var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
_logger.LogWarning(
"Policy simulation webhook responded {StatusCode} for run {RunId}: {Body}",
(int)response.StatusCode,
payload.Simulation.RunId,
body);
}
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Policy simulation webhook failed for run {RunId}.", payload.Simulation.RunId);
}
}
}