Add unit tests for SBOM ingestion and transformation
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Implement `SbomIngestServiceCollectionExtensionsTests` to verify the SBOM ingestion pipeline exports snapshots correctly. - Create `SbomIngestTransformerTests` to ensure the transformation produces expected nodes and edges, including deduplication of license nodes and normalization of timestamps. - Add `SbomSnapshotExporterTests` to test the export functionality for manifest, adjacency, nodes, and edges. - Introduce `VexOverlayTransformerTests` to validate the transformation of VEX nodes and edges. - Set up project file for the test project with necessary dependencies and configurations. - Include JSON fixture files for testing purposes.
This commit is contained in:
@@ -265,13 +265,16 @@ public sealed record PolicyRunStatus
|
||||
int attempts = 0,
|
||||
string? traceId = null,
|
||||
string? explainUri = null,
|
||||
ImmutableSortedDictionary<string, string>? metadata = null,
|
||||
string? schemaVersion = null)
|
||||
: this(
|
||||
runId,
|
||||
tenantId,
|
||||
policyId,
|
||||
policyVersion,
|
||||
ImmutableSortedDictionary<string, string>? metadata = null,
|
||||
bool cancellationRequested = false,
|
||||
DateTimeOffset? cancellationRequestedAt = null,
|
||||
string? cancellationReason = null,
|
||||
string? schemaVersion = null)
|
||||
: this(
|
||||
runId,
|
||||
tenantId,
|
||||
policyId,
|
||||
policyVersion,
|
||||
mode,
|
||||
status,
|
||||
priority,
|
||||
@@ -282,16 +285,19 @@ public sealed record PolicyRunStatus
|
||||
inputs ?? PolicyRunInputs.Empty,
|
||||
determinismHash,
|
||||
Validation.TrimToNull(errorCode),
|
||||
Validation.TrimToNull(error),
|
||||
attempts,
|
||||
Validation.TrimToNull(traceId),
|
||||
Validation.TrimToNull(explainUri),
|
||||
metadata ?? ImmutableSortedDictionary<string, string>.Empty,
|
||||
schemaVersion)
|
||||
{
|
||||
}
|
||||
|
||||
[JsonConstructor]
|
||||
Validation.TrimToNull(error),
|
||||
attempts,
|
||||
Validation.TrimToNull(traceId),
|
||||
Validation.TrimToNull(explainUri),
|
||||
metadata ?? ImmutableSortedDictionary<string, string>.Empty,
|
||||
cancellationRequested,
|
||||
cancellationRequestedAt,
|
||||
cancellationReason,
|
||||
schemaVersion)
|
||||
{
|
||||
}
|
||||
|
||||
[JsonConstructor]
|
||||
public PolicyRunStatus(
|
||||
string runId,
|
||||
string tenantId,
|
||||
@@ -307,12 +313,15 @@ public sealed record PolicyRunStatus
|
||||
PolicyRunInputs inputs,
|
||||
string? determinismHash,
|
||||
string? errorCode,
|
||||
string? error,
|
||||
int attempts,
|
||||
string? traceId,
|
||||
string? explainUri,
|
||||
ImmutableSortedDictionary<string, string> metadata,
|
||||
string? schemaVersion = null)
|
||||
string? error,
|
||||
int attempts,
|
||||
string? traceId,
|
||||
string? explainUri,
|
||||
ImmutableSortedDictionary<string, string> metadata,
|
||||
bool cancellationRequested,
|
||||
DateTimeOffset? cancellationRequestedAt,
|
||||
string? cancellationReason,
|
||||
string? schemaVersion = null)
|
||||
{
|
||||
SchemaVersion = SchedulerSchemaVersions.EnsurePolicyRunStatus(schemaVersion);
|
||||
RunId = Validation.EnsureId(runId, nameof(runId));
|
||||
@@ -339,16 +348,19 @@ public sealed record PolicyRunStatus
|
||||
? throw new ArgumentOutOfRangeException(nameof(attempts), attempts, "Attempts must be non-negative.")
|
||||
: attempts;
|
||||
TraceId = Validation.TrimToNull(traceId);
|
||||
ExplainUri = Validation.TrimToNull(explainUri);
|
||||
Metadata = (metadata ?? ImmutableSortedDictionary<string, string>.Empty)
|
||||
.Select(static pair => new KeyValuePair<string, string>(
|
||||
Validation.TrimToNull(pair.Key)?.ToLowerInvariant() ?? string.Empty,
|
||||
Validation.TrimToNull(pair.Value) ?? string.Empty))
|
||||
.Where(static pair => !string.IsNullOrEmpty(pair.Key) && !string.IsNullOrEmpty(pair.Value))
|
||||
.DistinctBy(static pair => pair.Key, StringComparer.Ordinal)
|
||||
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
|
||||
.ToImmutableSortedDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal);
|
||||
}
|
||||
ExplainUri = Validation.TrimToNull(explainUri);
|
||||
Metadata = (metadata ?? ImmutableSortedDictionary<string, string>.Empty)
|
||||
.Select(static pair => new KeyValuePair<string, string>(
|
||||
Validation.TrimToNull(pair.Key)?.ToLowerInvariant() ?? string.Empty,
|
||||
Validation.TrimToNull(pair.Value) ?? string.Empty))
|
||||
.Where(static pair => !string.IsNullOrEmpty(pair.Key) && !string.IsNullOrEmpty(pair.Value))
|
||||
.DistinctBy(static pair => pair.Key, StringComparer.Ordinal)
|
||||
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
|
||||
.ToImmutableSortedDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal);
|
||||
CancellationRequested = cancellationRequested;
|
||||
CancellationRequestedAt = Validation.NormalizeTimestamp(cancellationRequestedAt);
|
||||
CancellationReason = Validation.TrimToNull(cancellationReason);
|
||||
}
|
||||
|
||||
public string SchemaVersion { get; }
|
||||
|
||||
@@ -392,13 +404,22 @@ public sealed record PolicyRunStatus
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? ExplainUri { get; init; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
|
||||
public ImmutableSortedDictionary<string, string> Metadata { get; init; } = ImmutableSortedDictionary<string, string>.Empty;
|
||||
|
||||
public PolicyRunStats Stats { get; init; } = PolicyRunStats.Empty;
|
||||
|
||||
public PolicyRunInputs Inputs { get; init; } = PolicyRunInputs.Empty;
|
||||
}
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
|
||||
public ImmutableSortedDictionary<string, string> Metadata { get; init; } = ImmutableSortedDictionary<string, string>.Empty;
|
||||
|
||||
public PolicyRunStats Stats { get; init; } = PolicyRunStats.Empty;
|
||||
|
||||
public PolicyRunInputs Inputs { get; init; } = PolicyRunInputs.Empty;
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
|
||||
public bool CancellationRequested { get; init; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public DateTimeOffset? CancellationRequestedAt { get; init; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? CancellationReason { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Aggregated metrics captured for a policy run.
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
using System;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Scheduler.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Shared helper for translating persisted <see cref="PolicyRunJob"/> documents into
|
||||
/// API-facing <see cref="PolicyRunStatus"/> projections.
|
||||
/// </summary>
|
||||
public static class PolicyRunStatusFactory
|
||||
{
|
||||
public static PolicyRunStatus Create(PolicyRunJob job, DateTimeOffset nowUtc)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(job);
|
||||
|
||||
var status = MapExecutionStatus(job.Status);
|
||||
var queuedAt = job.QueuedAt ?? job.CreatedAt;
|
||||
var startedAt = job.SubmittedAt;
|
||||
var finishedAt = job.CompletedAt ?? job.CancelledAt;
|
||||
var metadata = job.Metadata ?? ImmutableSortedDictionary<string, string>.Empty;
|
||||
var inputs = job.Inputs ?? PolicyRunInputs.Empty;
|
||||
var policyVersion = job.PolicyVersion
|
||||
?? throw new InvalidOperationException($"Policy run job '{job.Id}' is missing policyVersion.");
|
||||
|
||||
return new PolicyRunStatus(
|
||||
job.RunId ?? job.Id,
|
||||
job.TenantId,
|
||||
job.PolicyId,
|
||||
policyVersion,
|
||||
job.Mode,
|
||||
status,
|
||||
job.Priority,
|
||||
queuedAt,
|
||||
job.Status == PolicyRunJobStatus.Pending ? null : startedAt,
|
||||
finishedAt,
|
||||
PolicyRunStats.Empty,
|
||||
inputs,
|
||||
determinismHash: null,
|
||||
errorCode: null,
|
||||
error: job.Status == PolicyRunJobStatus.Failed ? job.LastError : null,
|
||||
attempts: job.AttemptCount,
|
||||
traceId: null,
|
||||
explainUri: null,
|
||||
metadata,
|
||||
cancellationRequested: job.CancellationRequested,
|
||||
cancellationRequestedAt: job.CancellationRequestedAt,
|
||||
cancellationReason: job.CancellationReason,
|
||||
SchedulerSchemaVersions.PolicyRunStatus);
|
||||
}
|
||||
|
||||
private static PolicyRunExecutionStatus MapExecutionStatus(PolicyRunJobStatus status)
|
||||
=> status switch
|
||||
{
|
||||
PolicyRunJobStatus.Pending => PolicyRunExecutionStatus.Queued,
|
||||
PolicyRunJobStatus.Dispatching => PolicyRunExecutionStatus.Running,
|
||||
PolicyRunJobStatus.Submitted => PolicyRunExecutionStatus.Running,
|
||||
PolicyRunJobStatus.Completed => PolicyRunExecutionStatus.Succeeded,
|
||||
PolicyRunJobStatus.Failed => PolicyRunExecutionStatus.Failed,
|
||||
PolicyRunJobStatus.Cancelled => PolicyRunExecutionStatus.Cancelled,
|
||||
_ => PolicyRunExecutionStatus.Queued
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
using System;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Scheduler.Models;
|
||||
|
||||
public sealed record PolicySimulationWebhookPayload(
|
||||
[property: JsonPropertyName("tenantId")] string TenantId,
|
||||
[property: JsonPropertyName("simulation")] PolicyRunStatus Simulation,
|
||||
[property: JsonPropertyName("result")] string Result,
|
||||
[property: JsonPropertyName("observedAt")] DateTimeOffset ObservedAt,
|
||||
[property: JsonPropertyName("latencySeconds")] double? LatencySeconds,
|
||||
[property: JsonPropertyName("reason")] string? Reason);
|
||||
|
||||
public static class PolicySimulationWebhookPayloadFactory
|
||||
{
|
||||
public static PolicySimulationWebhookPayload Create(PolicyRunStatus status, DateTimeOffset observedAt)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(status);
|
||||
|
||||
var result = status.Status switch
|
||||
{
|
||||
PolicyRunExecutionStatus.Succeeded => "succeeded",
|
||||
PolicyRunExecutionStatus.Failed => "failed",
|
||||
PolicyRunExecutionStatus.Cancelled => "cancelled",
|
||||
PolicyRunExecutionStatus.ReplayPending => "replay_pending",
|
||||
PolicyRunExecutionStatus.Running => "running",
|
||||
_ => "queued"
|
||||
};
|
||||
|
||||
var latencySeconds = CalculateLatencySeconds(status, observedAt);
|
||||
var reason = status.Status switch
|
||||
{
|
||||
PolicyRunExecutionStatus.Failed => status.Error,
|
||||
PolicyRunExecutionStatus.Cancelled => status.CancellationReason,
|
||||
_ => null
|
||||
};
|
||||
|
||||
return new PolicySimulationWebhookPayload(
|
||||
status.TenantId,
|
||||
status,
|
||||
result,
|
||||
observedAt,
|
||||
latencySeconds,
|
||||
reason);
|
||||
}
|
||||
|
||||
private static double? CalculateLatencySeconds(PolicyRunStatus status, DateTimeOffset observedAt)
|
||||
{
|
||||
var started = status.QueuedAt;
|
||||
var finished = status.FinishedAt ?? observedAt;
|
||||
|
||||
if (started == default)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var duration = (finished - started).TotalSeconds;
|
||||
if (duration < 0)
|
||||
{
|
||||
duration = 0;
|
||||
}
|
||||
|
||||
return Math.Round(duration, 4);
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,7 @@ public sealed record Run
|
||||
DateTimeOffset? finishedAt = null,
|
||||
string? error = null,
|
||||
IEnumerable<DeltaSummary>? deltas = null,
|
||||
string? retryOf = null,
|
||||
string? schemaVersion = null)
|
||||
: this(
|
||||
id,
|
||||
@@ -35,6 +36,7 @@ public sealed record Run
|
||||
Validation.NormalizeTimestamp(finishedAt),
|
||||
Validation.TrimToNull(error),
|
||||
NormalizeDeltas(deltas),
|
||||
Validation.TrimToNull(retryOf),
|
||||
schemaVersion)
|
||||
{
|
||||
}
|
||||
@@ -53,6 +55,7 @@ public sealed record Run
|
||||
DateTimeOffset? finishedAt,
|
||||
string? error,
|
||||
ImmutableArray<DeltaSummary> deltas,
|
||||
string? retryOf,
|
||||
string? schemaVersion = null)
|
||||
{
|
||||
Id = Validation.EnsureId(id, nameof(id));
|
||||
@@ -69,6 +72,7 @@ public sealed record Run
|
||||
Deltas = deltas.IsDefault
|
||||
? ImmutableArray<DeltaSummary>.Empty
|
||||
: deltas.OrderBy(static delta => delta.ImageDigest, StringComparer.Ordinal).ToImmutableArray();
|
||||
RetryOf = Validation.TrimToNull(retryOf);
|
||||
SchemaVersion = SchedulerSchemaVersions.EnsureRun(schemaVersion);
|
||||
}
|
||||
|
||||
@@ -103,6 +107,9 @@ public sealed record Run
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
|
||||
public ImmutableArray<DeltaSummary> Deltas { get; } = ImmutableArray<DeltaSummary>.Empty;
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? RetryOf { get; }
|
||||
|
||||
private static ImmutableArray<DeltaSummary> NormalizeDeltas(IEnumerable<DeltaSummary>? deltas)
|
||||
{
|
||||
if (deltas is null)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.Metrics;
|
||||
@@ -5,7 +6,7 @@ using System.Linq;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
internal static class SchedulerQueueMetrics
|
||||
public static class SchedulerQueueMetrics
|
||||
{
|
||||
private const string TransportTagName = "transport";
|
||||
private const string QueueTagName = "queue";
|
||||
@@ -21,6 +22,25 @@ internal static class SchedulerQueueMetrics
|
||||
"scheduler_queue_depth",
|
||||
ObserveDepth);
|
||||
|
||||
public static IReadOnlyList<SchedulerQueueDepthSample> CaptureDepthSamples()
|
||||
{
|
||||
var snapshot = DepthSamples.ToArray();
|
||||
if (snapshot.Length == 0)
|
||||
{
|
||||
return Array.Empty<SchedulerQueueDepthSample>();
|
||||
}
|
||||
|
||||
var samples = new SchedulerQueueDepthSample[snapshot.Length];
|
||||
for (var i = 0; i < snapshot.Length; i++)
|
||||
{
|
||||
var entry = snapshot[i];
|
||||
samples[i] = new SchedulerQueueDepthSample(entry.Key.transport, entry.Key.queue, entry.Value);
|
||||
}
|
||||
|
||||
Array.Sort(samples, SchedulerQueueDepthSampleComparer.Instance);
|
||||
return Array.AsReadOnly(samples);
|
||||
}
|
||||
|
||||
public static void RecordEnqueued(string transport, string queue)
|
||||
=> EnqueuedCounter.Add(1, BuildTags(transport, queue));
|
||||
|
||||
@@ -45,6 +65,22 @@ internal static class SchedulerQueueMetrics
|
||||
internal static IReadOnlyDictionary<(string transport, string queue), long> SnapshotDepths()
|
||||
=> DepthSamples.ToDictionary(pair => pair.Key, pair => pair.Value);
|
||||
|
||||
private sealed class SchedulerQueueDepthSampleComparer : IComparer<SchedulerQueueDepthSample>
|
||||
{
|
||||
public static SchedulerQueueDepthSampleComparer Instance { get; } = new();
|
||||
|
||||
public int Compare(SchedulerQueueDepthSample x, SchedulerQueueDepthSample y)
|
||||
{
|
||||
var transport = string.Compare(x.Transport, y.Transport, StringComparison.Ordinal);
|
||||
if (transport != 0)
|
||||
{
|
||||
return transport;
|
||||
}
|
||||
|
||||
return string.Compare(x.Queue, y.Queue, StringComparison.Ordinal);
|
||||
}
|
||||
}
|
||||
|
||||
private static KeyValuePair<string, object?>[] BuildTags(string transport, string queue)
|
||||
=> new[]
|
||||
{
|
||||
@@ -63,3 +99,5 @@ internal static class SchedulerQueueMetrics
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public readonly record struct SchedulerQueueDepthSample(string Transport, string Queue, long Depth);
|
||||
|
||||
@@ -15,6 +15,8 @@ public sealed class SchedulerMongoOptions
|
||||
|
||||
public string RunsCollection { get; set; } = "runs";
|
||||
|
||||
public string PolicyJobsCollection { get; set; } = "policy_jobs";
|
||||
|
||||
public string ImpactSnapshotsCollection { get; set; } = "impact_snapshots";
|
||||
|
||||
public string AuditCollection { get; set; } = "audit";
|
||||
|
||||
@@ -36,13 +36,19 @@ public interface IPolicyRunJobRepository
|
||||
PolicyRunMode? mode = null,
|
||||
IReadOnlyCollection<PolicyRunJobStatus>? statuses = null,
|
||||
DateTimeOffset? queuedAfter = null,
|
||||
int limit = 50,
|
||||
IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<bool> ReplaceAsync(
|
||||
PolicyRunJob job,
|
||||
string? expectedLeaseOwner = null,
|
||||
IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
int limit = 50,
|
||||
IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<bool> ReplaceAsync(
|
||||
PolicyRunJob job,
|
||||
string? expectedLeaseOwner = null,
|
||||
IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<long> CountAsync(
|
||||
string tenantId,
|
||||
PolicyRunMode mode,
|
||||
IReadOnlyCollection<PolicyRunJobStatus> statuses,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using MongoDB.Bson;
|
||||
using MongoDB.Driver;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Internal;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Serialization;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using MongoDB.Bson;
|
||||
using MongoDB.Driver;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Internal;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Serialization;
|
||||
|
||||
namespace StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
|
||||
@@ -206,16 +207,43 @@ internal sealed class PolicyRunJobRepository : IPolicyRunJobRepository
|
||||
.ToListAsync(cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return documents
|
||||
.Select(PolicyRunJobDocumentMapper.FromBsonDocument)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
public async Task<bool> ReplaceAsync(
|
||||
PolicyRunJob job,
|
||||
string? expectedLeaseOwner = null,
|
||||
IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
return documents
|
||||
.Select(PolicyRunJobDocumentMapper.FromBsonDocument)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
public async Task<long> CountAsync(
|
||||
string tenantId,
|
||||
PolicyRunMode mode,
|
||||
IReadOnlyCollection<PolicyRunJobStatus> statuses,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(tenantId))
|
||||
{
|
||||
throw new ArgumentException("Tenant id must be provided.", nameof(tenantId));
|
||||
}
|
||||
|
||||
var filters = new List<FilterDefinition<BsonDocument>>
|
||||
{
|
||||
Filter.Eq("tenantId", tenantId),
|
||||
Filter.Eq("mode", mode.ToString().ToLowerInvariant())
|
||||
};
|
||||
|
||||
if (statuses is { Count: > 0 })
|
||||
{
|
||||
var array = new BsonArray(statuses.Select(static status => status.ToString().ToLowerInvariant()));
|
||||
filters.Add(Filter.In("status", array));
|
||||
}
|
||||
|
||||
var filter = Filter.And(filters);
|
||||
return await _collection.CountDocumentsAsync(filter, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task<bool> ReplaceAsync(
|
||||
PolicyRunJob job,
|
||||
string? expectedLeaseOwner = null,
|
||||
IClientSessionHandle? session = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(job);
|
||||
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
using System;
|
||||
|
||||
namespace StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Cursor describing the position of a run in deterministic ordering.
|
||||
/// </summary>
|
||||
public sealed record RunListCursor
|
||||
{
|
||||
public RunListCursor(DateTimeOffset createdAt, string runId)
|
||||
{
|
||||
CreatedAt = NormalizeTimestamp(createdAt);
|
||||
RunId = NormalizeRunId(runId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Timestamp of the last run observed (UTC).
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Identifier of the last run observed.
|
||||
/// </summary>
|
||||
public string RunId { get; }
|
||||
|
||||
private static DateTimeOffset NormalizeTimestamp(DateTimeOffset value)
|
||||
{
|
||||
var utc = value.ToUniversalTime();
|
||||
return new DateTimeOffset(DateTime.SpecifyKind(utc.DateTime, DateTimeKind.Utc));
|
||||
}
|
||||
|
||||
private static string NormalizeRunId(string value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
throw new ArgumentException("Run id must be provided.", nameof(value));
|
||||
}
|
||||
|
||||
var trimmed = value.Trim();
|
||||
if (trimmed.Length > 256)
|
||||
{
|
||||
throw new ArgumentException("Run id exceeds 256 characters.", nameof(value));
|
||||
}
|
||||
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
@@ -19,16 +19,21 @@ public sealed class RunQueryOptions
|
||||
public ImmutableArray<RunState> States { get; init; } = ImmutableArray<RunState>.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Optional lower bound for creation timestamp (UTC).
|
||||
/// </summary>
|
||||
public DateTimeOffset? CreatedAfter { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of runs to return (default 50 when unspecified).
|
||||
/// </summary>
|
||||
public int? Limit { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional lower bound for creation timestamp (UTC).
|
||||
/// </summary>
|
||||
public DateTimeOffset? CreatedAfter { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional cursor to resume iteration using deterministic ordering.
|
||||
/// </summary>
|
||||
public RunListCursor? Cursor { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of runs to return (default 50 when unspecified).
|
||||
/// </summary>
|
||||
public int? Limit { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Sort order flag. Defaults to descending by createdAt.
|
||||
/// </summary>
|
||||
public bool SortAscending { get; init; }
|
||||
|
||||
@@ -127,28 +127,53 @@ internal sealed class RunRepository : IRunRepository
|
||||
filters.Add(Filter.In("state", options.States.Select(state => state.ToString().ToLowerInvariant())));
|
||||
}
|
||||
|
||||
if (options.CreatedAfter is { } createdAfter)
|
||||
{
|
||||
filters.Add(Filter.Gt("createdAt", createdAfter.ToUniversalTime().UtcDateTime));
|
||||
}
|
||||
if (options.CreatedAfter is { } createdAfter)
|
||||
{
|
||||
filters.Add(Filter.Gt("createdAt", createdAfter.ToUniversalTime().UtcDateTime));
|
||||
}
|
||||
|
||||
if (options.Cursor is { } cursor)
|
||||
{
|
||||
var createdAtUtc = cursor.CreatedAt.ToUniversalTime().UtcDateTime;
|
||||
FilterDefinition<BsonDocument> cursorFilter;
|
||||
|
||||
if (options.SortAscending)
|
||||
{
|
||||
cursorFilter = Filter.Or(
|
||||
Filter.Gt("createdAt", createdAtUtc),
|
||||
Filter.And(
|
||||
Filter.Eq("createdAt", createdAtUtc),
|
||||
Filter.Gt("_id", cursor.RunId)));
|
||||
}
|
||||
else
|
||||
{
|
||||
cursorFilter = Filter.Or(
|
||||
Filter.Lt("createdAt", createdAtUtc),
|
||||
Filter.And(
|
||||
Filter.Eq("createdAt", createdAtUtc),
|
||||
Filter.Lt("_id", cursor.RunId)));
|
||||
}
|
||||
|
||||
filters.Add(cursorFilter);
|
||||
}
|
||||
|
||||
var combined = Filter.And(filters);
|
||||
|
||||
var find = session is null
|
||||
? _collection.Find(combined)
|
||||
: _collection.Find(session, combined);
|
||||
|
||||
var combined = Filter.And(filters);
|
||||
|
||||
var find = session is null
|
||||
? _collection.Find(combined)
|
||||
: _collection.Find(session, combined);
|
||||
|
||||
var limit = options.Limit is { } specified && specified > 0 ? specified : DefaultListLimit;
|
||||
find = find.Limit(limit);
|
||||
|
||||
var sortDefinition = options.SortAscending
|
||||
? Sort.Ascending("createdAt")
|
||||
: Sort.Descending("createdAt");
|
||||
|
||||
find = find.Sort(sortDefinition);
|
||||
|
||||
var documents = await find.ToListAsync(cancellationToken).ConfigureAwait(false);
|
||||
return documents.Select(RunDocumentMapper.FromBsonDocument).ToArray();
|
||||
var limit = options.Limit is { } specified && specified > 0 ? specified : DefaultListLimit;
|
||||
find = find.Limit(limit);
|
||||
|
||||
var sortDefinition = options.SortAscending
|
||||
? Sort.Combine(Sort.Ascending("createdAt"), Sort.Ascending("_id"))
|
||||
: Sort.Combine(Sort.Descending("createdAt"), Sort.Descending("_id"));
|
||||
|
||||
find = find.Sort(sortDefinition);
|
||||
|
||||
var documents = await find.ToListAsync(cancellationToken).ConfigureAwait(false);
|
||||
return documents.Select(RunDocumentMapper.FromBsonDocument).ToArray();
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Run>> ListByStateAsync(
|
||||
|
||||
@@ -57,8 +57,9 @@ public static class SchedulerWorkerServiceCollectionExtensions
|
||||
loggerFactory.CreateLogger<SchedulerEventPublisher>());
|
||||
});
|
||||
|
||||
services.AddHttpClient<IScannerReportClient, HttpScannerReportClient>();
|
||||
services.AddHttpClient<IPolicyRunClient, HttpPolicyRunClient>();
|
||||
services.AddHttpClient<IScannerReportClient, HttpScannerReportClient>();
|
||||
services.AddHttpClient<IPolicyRunClient, HttpPolicyRunClient>();
|
||||
services.AddHttpClient<IPolicySimulationWebhookClient, HttpPolicySimulationWebhookClient>();
|
||||
services.AddHttpClient<ICartographerBuildClient, HttpCartographerBuildClient>((sp, client) =>
|
||||
{
|
||||
var options = sp.GetRequiredService<IOptions<SchedulerWorkerOptions>>().Value.Graph;
|
||||
|
||||
@@ -4,10 +4,11 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
using StellaOps.Scheduler.Worker.Graph.Cartographer;
|
||||
using StellaOps.Scheduler.Worker.Graph.Scheduler;
|
||||
using StellaOps.Scheduler.Worker.Options;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
using StellaOps.Scheduler.Worker.Graph.Cartographer;
|
||||
using StellaOps.Scheduler.Worker.Graph.Scheduler;
|
||||
using StellaOps.Scheduler.Worker.Options;
|
||||
using StellaOps.Scheduler.Worker.Observability;
|
||||
|
||||
namespace StellaOps.Scheduler.Worker.Graph;
|
||||
|
||||
|
||||
@@ -4,10 +4,11 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
using StellaOps.Scheduler.Worker.Graph.Cartographer;
|
||||
using StellaOps.Scheduler.Worker.Graph.Scheduler;
|
||||
using StellaOps.Scheduler.Worker.Options;
|
||||
using StellaOps.Scheduler.Storage.Mongo.Repositories;
|
||||
using StellaOps.Scheduler.Worker.Graph.Cartographer;
|
||||
using StellaOps.Scheduler.Worker.Graph.Scheduler;
|
||||
using StellaOps.Scheduler.Worker.Options;
|
||||
using StellaOps.Scheduler.Worker.Observability;
|
||||
|
||||
namespace StellaOps.Scheduler.Worker.Graph;
|
||||
|
||||
|
||||
@@ -1,236 +1,245 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.Metrics;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Worker.Observability;
|
||||
|
||||
public sealed class SchedulerWorkerMetrics : IDisposable
|
||||
{
|
||||
public const string MeterName = "StellaOps.Scheduler.Worker";
|
||||
|
||||
private readonly Meter _meter;
|
||||
private readonly Counter<long> _plannerRunsTotal;
|
||||
private readonly Histogram<double> _plannerLatencySeconds;
|
||||
private readonly Counter<long> _runnerSegmentsTotal;
|
||||
private readonly Counter<long> _runnerImagesTotal;
|
||||
private readonly Counter<long> _runnerDeltaCriticalTotal;
|
||||
private readonly Counter<long> _runnerDeltaHighTotal;
|
||||
private readonly Counter<long> _runnerDeltaFindingsTotal;
|
||||
private readonly Counter<long> _runnerKevHitsTotal;
|
||||
private readonly Histogram<double> _runDurationSeconds;
|
||||
private readonly UpDownCounter<long> _runsActive;
|
||||
private readonly Counter<long> _graphJobsTotal;
|
||||
private readonly Histogram<double> _graphJobDurationSeconds;
|
||||
private readonly ConcurrentDictionary<string, long> _backlog = new(StringComparer.Ordinal);
|
||||
private readonly ObservableGauge<long> _backlogGauge;
|
||||
private bool _disposed;
|
||||
|
||||
public SchedulerWorkerMetrics()
|
||||
{
|
||||
_meter = new Meter(MeterName);
|
||||
_plannerRunsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_planner_runs_total",
|
||||
unit: "count",
|
||||
description: "Planner runs grouped by status and mode.");
|
||||
_plannerLatencySeconds = _meter.CreateHistogram<double>(
|
||||
"scheduler_planner_latency_seconds",
|
||||
unit: "s",
|
||||
description: "Latency between run creation and planner processing grouped by mode and status.");
|
||||
_runnerSegmentsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_segments_total",
|
||||
unit: "count",
|
||||
description: "Runner segments processed grouped by status and mode.");
|
||||
_runnerImagesTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_images_total",
|
||||
unit: "count",
|
||||
description: "Images processed by runner grouped by mode and delta outcome.");
|
||||
_runnerDeltaCriticalTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_critical_total",
|
||||
unit: "count",
|
||||
description: "Critical findings observed by runner grouped by mode.");
|
||||
_runnerDeltaHighTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_high_total",
|
||||
unit: "count",
|
||||
description: "High findings observed by runner grouped by mode.");
|
||||
_runnerDeltaFindingsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_total",
|
||||
unit: "count",
|
||||
description: "Total findings observed by runner grouped by mode.");
|
||||
_runnerKevHitsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_kev_total",
|
||||
unit: "count",
|
||||
description: "KEV hits observed by runner grouped by mode.");
|
||||
_runDurationSeconds = _meter.CreateHistogram<double>(
|
||||
"scheduler_run_duration_seconds",
|
||||
unit: "s",
|
||||
description: "End-to-end run durations grouped by mode and result.");
|
||||
_runsActive = _meter.CreateUpDownCounter<long>(
|
||||
"scheduler_runs_active",
|
||||
unit: "count",
|
||||
description: "Active scheduler runs grouped by mode.");
|
||||
_graphJobsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_graph_jobs_total",
|
||||
unit: "count",
|
||||
description: "Graph jobs processed by the worker grouped by type and result.");
|
||||
_graphJobDurationSeconds = _meter.CreateHistogram<double>(
|
||||
"scheduler_graph_job_duration_seconds",
|
||||
unit: "s",
|
||||
description: "Graph job durations grouped by type and result.");
|
||||
_backlogGauge = _meter.CreateObservableGauge<long>(
|
||||
"scheduler_runner_backlog",
|
||||
ObserveBacklog,
|
||||
unit: "images",
|
||||
description: "Remaining images queued for runner processing grouped by mode and schedule.");
|
||||
}
|
||||
|
||||
public void RecordGraphJobResult(string type, string result, TimeSpan? duration = null)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("type", type),
|
||||
new KeyValuePair<string, object?>("result", result)
|
||||
};
|
||||
|
||||
_graphJobsTotal.Add(1, tags);
|
||||
|
||||
if (duration is { } jobDuration)
|
||||
{
|
||||
_graphJobDurationSeconds.Record(Math.Max(jobDuration.TotalSeconds, 0d), tags);
|
||||
}
|
||||
}
|
||||
|
||||
public void RecordPlannerResult(string mode, string status, TimeSpan latency, int imageCount)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("status", status)
|
||||
};
|
||||
_plannerRunsTotal.Add(1, tags);
|
||||
_plannerLatencySeconds.Record(Math.Max(latency.TotalSeconds, 0d), tags);
|
||||
|
||||
if (status.Equals("enqueued", StringComparison.OrdinalIgnoreCase) && imageCount > 0)
|
||||
{
|
||||
_runsActive.Add(1, new[] { new KeyValuePair<string, object?>("mode", mode) });
|
||||
}
|
||||
}
|
||||
|
||||
public void RecordRunnerSegment(string mode, string status, int processedImages, int deltaImages)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("status", status)
|
||||
};
|
||||
|
||||
_runnerSegmentsTotal.Add(1, tags);
|
||||
|
||||
var imageTags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("delta", deltaImages > 0 ? "true" : "false")
|
||||
};
|
||||
_runnerImagesTotal.Add(processedImages, imageTags);
|
||||
}
|
||||
|
||||
public void RecordDeltaSummaries(string mode, IReadOnlyList<DeltaSummary> deltas)
|
||||
{
|
||||
if (deltas.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var tags = new[] { new KeyValuePair<string, object?>("mode", mode) };
|
||||
|
||||
foreach (var delta in deltas)
|
||||
{
|
||||
if (delta.NewCriticals > 0)
|
||||
{
|
||||
_runnerDeltaCriticalTotal.Add(delta.NewCriticals, tags);
|
||||
}
|
||||
|
||||
if (delta.NewHigh > 0)
|
||||
{
|
||||
_runnerDeltaHighTotal.Add(delta.NewHigh, tags);
|
||||
}
|
||||
|
||||
if (delta.NewFindings > 0)
|
||||
{
|
||||
_runnerDeltaFindingsTotal.Add(delta.NewFindings, tags);
|
||||
}
|
||||
|
||||
if (!delta.KevHits.IsDefaultOrEmpty)
|
||||
{
|
||||
_runnerKevHitsTotal.Add(delta.KevHits.Length, tags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void RecordRunCompletion(string mode, string result, TimeSpan? duration, bool decrementActive = true)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("result", result)
|
||||
};
|
||||
|
||||
if (duration is { } runDuration)
|
||||
{
|
||||
_runDurationSeconds.Record(Math.Max(runDuration.TotalSeconds, 0d), tags);
|
||||
}
|
||||
|
||||
if (decrementActive)
|
||||
{
|
||||
_runsActive.Add(-1, new[] { new KeyValuePair<string, object?>("mode", mode) });
|
||||
}
|
||||
}
|
||||
|
||||
public void UpdateBacklog(string mode, string? scheduleId, long backlog)
|
||||
{
|
||||
var key = BuildBacklogKey(mode, scheduleId);
|
||||
if (backlog <= 0)
|
||||
{
|
||||
_backlog.TryRemove(key, out _);
|
||||
}
|
||||
else
|
||||
{
|
||||
_backlog[key] = backlog;
|
||||
}
|
||||
}
|
||||
|
||||
private IEnumerable<Measurement<long>> ObserveBacklog()
|
||||
{
|
||||
foreach (var entry in _backlog)
|
||||
{
|
||||
var (mode, scheduleId) = SplitBacklogKey(entry.Key);
|
||||
yield return new Measurement<long>(
|
||||
entry.Value,
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("scheduleId", scheduleId ?? string.Empty));
|
||||
}
|
||||
}
|
||||
|
||||
private static string BuildBacklogKey(string mode, string? scheduleId)
|
||||
=> $"{mode}|{scheduleId ?? string.Empty}";
|
||||
|
||||
private static (string Mode, string? ScheduleId) SplitBacklogKey(string key)
|
||||
{
|
||||
var parts = key.Split('|', 2);
|
||||
return parts.Length == 2
|
||||
? (parts[0], string.IsNullOrEmpty(parts[1]) ? null : parts[1])
|
||||
: (key, null);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_meter.Dispose();
|
||||
_disposed = true;
|
||||
}
|
||||
}
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.Metrics;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Worker.Observability;
|
||||
|
||||
public sealed class SchedulerWorkerMetrics : IDisposable
|
||||
{
|
||||
public const string MeterName = "StellaOps.Scheduler.Worker";
|
||||
|
||||
private readonly Meter _meter;
|
||||
private readonly Counter<long> _plannerRunsTotal;
|
||||
private readonly Histogram<double> _plannerLatencySeconds;
|
||||
private readonly Counter<long> _runnerSegmentsTotal;
|
||||
private readonly Counter<long> _runnerImagesTotal;
|
||||
private readonly Counter<long> _runnerDeltaCriticalTotal;
|
||||
private readonly Counter<long> _runnerDeltaHighTotal;
|
||||
private readonly Counter<long> _runnerDeltaFindingsTotal;
|
||||
private readonly Counter<long> _runnerKevHitsTotal;
|
||||
private readonly Histogram<double> _runDurationSeconds;
|
||||
private readonly UpDownCounter<long> _runsActive;
|
||||
private readonly Counter<long> _graphJobsTotal;
|
||||
private readonly Histogram<double> _graphJobDurationSeconds;
|
||||
private readonly ConcurrentDictionary<string, long> _backlog = new(StringComparer.Ordinal);
|
||||
private readonly ObservableGauge<long> _backlogGauge;
|
||||
private bool _disposed;
|
||||
|
||||
public SchedulerWorkerMetrics()
|
||||
{
|
||||
_meter = new Meter(MeterName);
|
||||
_plannerRunsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_planner_runs_total",
|
||||
unit: "count",
|
||||
description: "Planner runs grouped by status and mode.");
|
||||
_plannerLatencySeconds = _meter.CreateHistogram<double>(
|
||||
"scheduler_planner_latency_seconds",
|
||||
unit: "s",
|
||||
description: "Latency between run creation and planner processing grouped by mode and status.");
|
||||
_runnerSegmentsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_segments_total",
|
||||
unit: "count",
|
||||
description: "Runner segments processed grouped by status and mode.");
|
||||
_runnerImagesTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_images_total",
|
||||
unit: "count",
|
||||
description: "Images processed by runner grouped by mode and delta outcome.");
|
||||
_runnerDeltaCriticalTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_critical_total",
|
||||
unit: "count",
|
||||
description: "Critical findings observed by runner grouped by mode.");
|
||||
_runnerDeltaHighTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_high_total",
|
||||
unit: "count",
|
||||
description: "High findings observed by runner grouped by mode.");
|
||||
_runnerDeltaFindingsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_total",
|
||||
unit: "count",
|
||||
description: "Total findings observed by runner grouped by mode.");
|
||||
_runnerKevHitsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_runner_delta_kev_total",
|
||||
unit: "count",
|
||||
description: "KEV hits observed by runner grouped by mode.");
|
||||
_runDurationSeconds = _meter.CreateHistogram<double>(
|
||||
"scheduler_run_duration_seconds",
|
||||
unit: "s",
|
||||
description: "End-to-end run durations grouped by mode and result.");
|
||||
_runsActive = _meter.CreateUpDownCounter<long>(
|
||||
"scheduler_runs_active",
|
||||
unit: "count",
|
||||
description: "Active scheduler runs grouped by mode.");
|
||||
_graphJobsTotal = _meter.CreateCounter<long>(
|
||||
"scheduler_graph_jobs_total",
|
||||
unit: "count",
|
||||
description: "Graph jobs processed by the worker grouped by type and result.");
|
||||
_graphJobDurationSeconds = _meter.CreateHistogram<double>(
|
||||
"scheduler_graph_job_duration_seconds",
|
||||
unit: "s",
|
||||
description: "Graph job durations grouped by type and result.");
|
||||
_backlogGauge = _meter.CreateObservableGauge<long>(
|
||||
"scheduler_runner_backlog",
|
||||
ObserveBacklog,
|
||||
unit: "images",
|
||||
description: "Remaining images queued for runner processing grouped by mode and schedule.");
|
||||
}
|
||||
|
||||
public void RecordGraphJobResult(string type, string result, TimeSpan? duration = null)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("type", type),
|
||||
new KeyValuePair<string, object?>("result", result)
|
||||
};
|
||||
|
||||
_graphJobsTotal.Add(1, tags);
|
||||
|
||||
if (duration is { } jobDuration)
|
||||
{
|
||||
_graphJobDurationSeconds.Record(Math.Max(jobDuration.TotalSeconds, 0d), tags);
|
||||
}
|
||||
}
|
||||
|
||||
public void RecordPlannerResult(string mode, string status, TimeSpan latency, int imageCount)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("status", status)
|
||||
};
|
||||
_plannerRunsTotal.Add(1, tags);
|
||||
_plannerLatencySeconds.Record(Math.Max(latency.TotalSeconds, 0d), tags);
|
||||
|
||||
if (status.Equals("enqueued", StringComparison.OrdinalIgnoreCase) && imageCount > 0)
|
||||
{
|
||||
_runsActive.Add(1, new[] { new KeyValuePair<string, object?>("mode", mode) });
|
||||
}
|
||||
}
|
||||
|
||||
public void RecordRunnerSegment(string mode, string status, int processedImages, int deltaImages)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("status", status)
|
||||
};
|
||||
|
||||
_runnerSegmentsTotal.Add(1, tags);
|
||||
|
||||
var imageTags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("delta", deltaImages > 0 ? "true" : "false")
|
||||
};
|
||||
_runnerImagesTotal.Add(processedImages, imageTags);
|
||||
}
|
||||
|
||||
public void RecordDeltaSummaries(string mode, IReadOnlyList<DeltaSummary> deltas)
|
||||
{
|
||||
if (deltas.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var tags = new[] { new KeyValuePair<string, object?>("mode", mode) };
|
||||
|
||||
foreach (var delta in deltas)
|
||||
{
|
||||
if (delta.NewCriticals > 0)
|
||||
{
|
||||
_runnerDeltaCriticalTotal.Add(delta.NewCriticals, tags);
|
||||
}
|
||||
|
||||
if (delta.NewHigh > 0)
|
||||
{
|
||||
_runnerDeltaHighTotal.Add(delta.NewHigh, tags);
|
||||
}
|
||||
|
||||
if (delta.NewFindings > 0)
|
||||
{
|
||||
_runnerDeltaFindingsTotal.Add(delta.NewFindings, tags);
|
||||
}
|
||||
|
||||
if (!delta.KevHits.IsDefaultOrEmpty)
|
||||
{
|
||||
_runnerKevHitsTotal.Add(delta.KevHits.Length, tags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void RecordPolicyRunEvent(string tenantId, string policyId, PolicyRunMode mode, string status, TimeSpan? latency = null, string? reason = null)
|
||||
{
|
||||
var modeTag = mode.ToString().ToLowerInvariant();
|
||||
var decrementActive = !string.Equals(status, "submitted", StringComparison.OrdinalIgnoreCase)
|
||||
&& !string.Equals(status, "retry", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
RecordRunCompletion(modeTag, status, latency, decrementActive);
|
||||
}
|
||||
|
||||
public void RecordRunCompletion(string mode, string result, TimeSpan? duration, bool decrementActive = true)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("result", result)
|
||||
};
|
||||
|
||||
if (duration is { } runDuration)
|
||||
{
|
||||
_runDurationSeconds.Record(Math.Max(runDuration.TotalSeconds, 0d), tags);
|
||||
}
|
||||
|
||||
if (decrementActive)
|
||||
{
|
||||
_runsActive.Add(-1, new[] { new KeyValuePair<string, object?>("mode", mode) });
|
||||
}
|
||||
}
|
||||
|
||||
public void UpdateBacklog(string mode, string? scheduleId, long backlog)
|
||||
{
|
||||
var key = BuildBacklogKey(mode, scheduleId);
|
||||
if (backlog <= 0)
|
||||
{
|
||||
_backlog.TryRemove(key, out _);
|
||||
}
|
||||
else
|
||||
{
|
||||
_backlog[key] = backlog;
|
||||
}
|
||||
}
|
||||
|
||||
private IEnumerable<Measurement<long>> ObserveBacklog()
|
||||
{
|
||||
foreach (var entry in _backlog)
|
||||
{
|
||||
var (mode, scheduleId) = SplitBacklogKey(entry.Key);
|
||||
yield return new Measurement<long>(
|
||||
entry.Value,
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("scheduleId", scheduleId ?? string.Empty));
|
||||
}
|
||||
}
|
||||
|
||||
private static string BuildBacklogKey(string mode, string? scheduleId)
|
||||
=> $"{mode}|{scheduleId ?? string.Empty}";
|
||||
|
||||
private static (string Mode, string? ScheduleId) SplitBacklogKey(string key)
|
||||
{
|
||||
var parts = key.Split('|', 2);
|
||||
return parts.Length == 2
|
||||
? (parts[0], string.IsNullOrEmpty(parts[1]) ? null : parts[1])
|
||||
: (key, null);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_meter.Dispose();
|
||||
_disposed = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,18 +280,21 @@ public sealed class SchedulerWorkerOptions
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; } = true;
|
||||
|
||||
public DispatchOptions Dispatch { get; set; } = new();
|
||||
|
||||
public ApiOptions Api { get; set; } = new();
|
||||
|
||||
public TargetingOptions Targeting { get; set; } = new();
|
||||
|
||||
public void Validate()
|
||||
{
|
||||
Dispatch.Validate();
|
||||
Api.Validate();
|
||||
Targeting.Validate();
|
||||
}
|
||||
public DispatchOptions Dispatch { get; set; } = new();
|
||||
|
||||
public ApiOptions Api { get; set; } = new();
|
||||
|
||||
public TargetingOptions Targeting { get; set; } = new();
|
||||
|
||||
public WebhookOptions Webhook { get; set; } = new();
|
||||
|
||||
public void Validate()
|
||||
{
|
||||
Dispatch.Validate();
|
||||
Api.Validate();
|
||||
Targeting.Validate();
|
||||
Webhook.Validate();
|
||||
}
|
||||
|
||||
public sealed class DispatchOptions
|
||||
{
|
||||
@@ -430,11 +433,11 @@ public sealed class SchedulerWorkerOptions
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class TargetingOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// When disabled the worker skips policy delta targeting.
|
||||
/// </summary>
|
||||
public sealed class TargetingOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// When disabled the worker skips policy delta targeting.
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
@@ -454,8 +457,59 @@ public sealed class SchedulerWorkerOptions
|
||||
throw new InvalidOperationException("Policy targeting MaxSboms must be greater than zero.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class WebhookOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Controls whether webhook callbacks are emitted when simulations complete.
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Absolute endpoint to invoke for webhook callbacks.
|
||||
/// </summary>
|
||||
public string? Endpoint { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional header to carry an API key.
|
||||
/// </summary>
|
||||
public string? ApiKeyHeader { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional API key value aligned with <see cref="ApiKeyHeader"/>.
|
||||
/// </summary>
|
||||
public string? ApiKey { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Request timeout in seconds.
|
||||
/// </summary>
|
||||
public int TimeoutSeconds { get; set; } = 10;
|
||||
|
||||
public void Validate()
|
||||
{
|
||||
if (!Enabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(Endpoint))
|
||||
{
|
||||
throw new InvalidOperationException("Policy webhook endpoint must be configured when enabled.");
|
||||
}
|
||||
|
||||
if (!Uri.TryCreate(Endpoint, UriKind.Absolute, out _))
|
||||
{
|
||||
throw new InvalidOperationException("Policy webhook endpoint must be an absolute URI.");
|
||||
}
|
||||
|
||||
if (TimeoutSeconds <= 0)
|
||||
{
|
||||
throw new InvalidOperationException("Policy webhook timeout must be greater than zero.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class GraphOptions
|
||||
{
|
||||
|
||||
@@ -13,30 +13,33 @@ namespace StellaOps.Scheduler.Worker.Policy;
|
||||
internal sealed class PolicyRunExecutionService
|
||||
{
|
||||
private readonly IPolicyRunJobRepository _repository;
|
||||
private readonly IPolicyRunClient _client;
|
||||
private readonly IOptions<SchedulerWorkerOptions> _options;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly SchedulerWorkerMetrics _metrics;
|
||||
private readonly IPolicyRunTargetingService _targetingService;
|
||||
private readonly ILogger<PolicyRunExecutionService> _logger;
|
||||
|
||||
public PolicyRunExecutionService(
|
||||
IPolicyRunJobRepository repository,
|
||||
IPolicyRunClient client,
|
||||
IOptions<SchedulerWorkerOptions> options,
|
||||
TimeProvider? timeProvider,
|
||||
SchedulerWorkerMetrics metrics,
|
||||
IPolicyRunTargetingService targetingService,
|
||||
ILogger<PolicyRunExecutionService> logger)
|
||||
{
|
||||
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_targetingService = targetingService ?? throw new ArgumentNullException(nameof(targetingService));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
private readonly IPolicyRunClient _client;
|
||||
private readonly IOptions<SchedulerWorkerOptions> _options;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly SchedulerWorkerMetrics _metrics;
|
||||
private readonly IPolicyRunTargetingService _targetingService;
|
||||
private readonly IPolicySimulationWebhookClient _webhookClient;
|
||||
private readonly ILogger<PolicyRunExecutionService> _logger;
|
||||
|
||||
public PolicyRunExecutionService(
|
||||
IPolicyRunJobRepository repository,
|
||||
IPolicyRunClient client,
|
||||
IOptions<SchedulerWorkerOptions> options,
|
||||
TimeProvider? timeProvider,
|
||||
SchedulerWorkerMetrics metrics,
|
||||
IPolicyRunTargetingService targetingService,
|
||||
IPolicySimulationWebhookClient webhookClient,
|
||||
ILogger<PolicyRunExecutionService> logger)
|
||||
{
|
||||
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_targetingService = targetingService ?? throw new ArgumentNullException(nameof(targetingService));
|
||||
_webhookClient = webhookClient ?? throw new ArgumentNullException(nameof(webhookClient));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<PolicyRunExecutionResult> ExecuteAsync(PolicyRunJob job, CancellationToken cancellationToken)
|
||||
{
|
||||
@@ -62,20 +65,24 @@ internal sealed class PolicyRunExecutionService
|
||||
_logger.LogWarning("Failed to update cancelled policy run job {JobId}.", job.Id);
|
||||
}
|
||||
|
||||
_metrics.RecordPolicyRunEvent(
|
||||
cancelled.TenantId,
|
||||
cancelled.PolicyId,
|
||||
cancelled.Mode,
|
||||
"cancelled",
|
||||
reason: cancelled.CancellationReason);
|
||||
_logger.LogInformation(
|
||||
"Policy run job {JobId} cancelled (tenant={TenantId}, policy={PolicyId}, runId={RunId}).",
|
||||
cancelled.Id,
|
||||
cancelled.TenantId,
|
||||
cancelled.PolicyId,
|
||||
cancelled.RunId ?? "(pending)");
|
||||
|
||||
return PolicyRunExecutionResult.Cancelled(cancelled);
|
||||
_metrics.RecordPolicyRunEvent(
|
||||
cancelled.TenantId,
|
||||
cancelled.PolicyId,
|
||||
cancelled.Mode,
|
||||
"cancelled",
|
||||
reason: cancelled.CancellationReason);
|
||||
_logger.LogInformation(
|
||||
"Policy run job {JobId} cancelled (tenant={TenantId}, policy={PolicyId}, runId={RunId}).",
|
||||
cancelled.Id,
|
||||
cancelled.TenantId,
|
||||
cancelled.PolicyId,
|
||||
cancelled.RunId ?? "(pending)");
|
||||
|
||||
var cancelledStatus = PolicyRunStatusFactory.Create(cancelled, cancelledAt);
|
||||
var cancelledPayload = PolicySimulationWebhookPayloadFactory.Create(cancelledStatus, cancelledAt);
|
||||
await _webhookClient.NotifyAsync(cancelledPayload, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return PolicyRunExecutionResult.Cancelled(cancelled);
|
||||
}
|
||||
|
||||
var targeting = await _targetingService
|
||||
@@ -108,19 +115,23 @@ internal sealed class PolicyRunExecutionService
|
||||
}
|
||||
|
||||
var latency = CalculateLatency(job, completionTime);
|
||||
_metrics.RecordPolicyRunEvent(
|
||||
completed.TenantId,
|
||||
completed.PolicyId,
|
||||
completed.Mode,
|
||||
"no_work",
|
||||
latency,
|
||||
targeting.Reason);
|
||||
_logger.LogInformation(
|
||||
"Policy run job {JobId} completed without submission (reason={Reason}).",
|
||||
completed.Id,
|
||||
targeting.Reason ?? "none");
|
||||
|
||||
return PolicyRunExecutionResult.NoOp(completed, targeting.Reason);
|
||||
_metrics.RecordPolicyRunEvent(
|
||||
completed.TenantId,
|
||||
completed.PolicyId,
|
||||
completed.Mode,
|
||||
"no_work",
|
||||
latency,
|
||||
targeting.Reason);
|
||||
_logger.LogInformation(
|
||||
"Policy run job {JobId} completed without submission (reason={Reason}).",
|
||||
completed.Id,
|
||||
targeting.Reason ?? "none");
|
||||
|
||||
var completedStatus = PolicyRunStatusFactory.Create(completed, completionTime);
|
||||
var completedPayload = PolicySimulationWebhookPayloadFactory.Create(completedStatus, completionTime);
|
||||
await _webhookClient.NotifyAsync(completedPayload, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return PolicyRunExecutionResult.NoOp(completed, targeting.Reason);
|
||||
}
|
||||
|
||||
job = targeting.Job;
|
||||
@@ -200,24 +211,28 @@ internal sealed class PolicyRunExecutionService
|
||||
|
||||
if (nextStatus == PolicyRunJobStatus.Failed)
|
||||
{
|
||||
_metrics.RecordPolicyRunEvent(
|
||||
failedJob.TenantId,
|
||||
failedJob.PolicyId,
|
||||
failedJob.Mode,
|
||||
"failed",
|
||||
latencyForFailure,
|
||||
reason);
|
||||
|
||||
_logger.LogError(
|
||||
"Policy run job {JobId} failed after {Attempts} attempts (tenant={TenantId}, policy={PolicyId}, runId={RunId}). Error: {Error}",
|
||||
failedJob.Id,
|
||||
attemptCount,
|
||||
failedJob.TenantId,
|
||||
failedJob.PolicyId,
|
||||
failedJob.RunId ?? "(pending)",
|
||||
submission.Error ?? "unknown");
|
||||
|
||||
return PolicyRunExecutionResult.Failed(failedJob, submission.Error);
|
||||
_metrics.RecordPolicyRunEvent(
|
||||
failedJob.TenantId,
|
||||
failedJob.PolicyId,
|
||||
failedJob.Mode,
|
||||
"failed",
|
||||
latencyForFailure,
|
||||
reason);
|
||||
|
||||
_logger.LogError(
|
||||
"Policy run job {JobId} failed after {Attempts} attempts (tenant={TenantId}, policy={PolicyId}, runId={RunId}). Error: {Error}",
|
||||
failedJob.Id,
|
||||
attemptCount,
|
||||
failedJob.TenantId,
|
||||
failedJob.PolicyId,
|
||||
failedJob.RunId ?? "(pending)",
|
||||
submission.Error ?? "unknown");
|
||||
|
||||
var failedStatus = PolicyRunStatusFactory.Create(failedJob, now);
|
||||
var failedPayload = PolicySimulationWebhookPayloadFactory.Create(failedStatus, now);
|
||||
await _webhookClient.NotifyAsync(failedPayload, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return PolicyRunExecutionResult.Failed(failedJob, submission.Error);
|
||||
}
|
||||
|
||||
_metrics.RecordPolicyRunEvent(
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
using System;
|
||||
using System.Net.Http;
|
||||
using System.Net.Mime;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Worker.Options;
|
||||
|
||||
namespace StellaOps.Scheduler.Worker.Policy;
|
||||
|
||||
internal interface IPolicySimulationWebhookClient
|
||||
{
|
||||
Task NotifyAsync(PolicySimulationWebhookPayload payload, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
internal sealed class HttpPolicySimulationWebhookClient : IPolicySimulationWebhookClient
|
||||
{
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IOptionsMonitor<SchedulerWorkerOptions> _options;
|
||||
private readonly ILogger<HttpPolicySimulationWebhookClient> _logger;
|
||||
|
||||
public HttpPolicySimulationWebhookClient(
|
||||
HttpClient httpClient,
|
||||
IOptionsMonitor<SchedulerWorkerOptions> options,
|
||||
ILogger<HttpPolicySimulationWebhookClient> logger)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task NotifyAsync(PolicySimulationWebhookPayload payload, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(payload);
|
||||
|
||||
var snapshot = _options.CurrentValue.Policy.Webhook;
|
||||
if (!snapshot.Enabled)
|
||||
{
|
||||
_logger.LogDebug("Policy simulation webhook disabled; skip run {RunId}.", payload.Simulation.RunId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(snapshot.Endpoint))
|
||||
{
|
||||
_logger.LogWarning("Policy simulation webhook endpoint missing; run {RunId} not dispatched.", payload.Simulation.RunId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Uri.TryCreate(snapshot.Endpoint, UriKind.Absolute, out var endpoint))
|
||||
{
|
||||
_logger.LogError("Policy simulation webhook endpoint '{Endpoint}' invalid.", snapshot.Endpoint);
|
||||
return;
|
||||
}
|
||||
|
||||
var timeout = snapshot.TimeoutSeconds <= 0 ? TimeSpan.FromSeconds(10) : TimeSpan.FromSeconds(snapshot.TimeoutSeconds);
|
||||
_httpClient.Timeout = timeout;
|
||||
|
||||
using var request = new HttpRequestMessage(HttpMethod.Post, endpoint)
|
||||
{
|
||||
Content = new StringContent(JsonSerializer.Serialize(payload, SerializerOptions), Encoding.UTF8, MediaTypeNames.Application.Json)
|
||||
};
|
||||
|
||||
request.Headers.TryAddWithoutValidation("X-StellaOps-Tenant", payload.TenantId);
|
||||
if (!string.IsNullOrWhiteSpace(payload.Simulation.RunId))
|
||||
{
|
||||
request.Headers.TryAddWithoutValidation("X-StellaOps-Run-Id", payload.Simulation.RunId);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(snapshot.ApiKey) && !string.IsNullOrWhiteSpace(snapshot.ApiKeyHeader))
|
||||
{
|
||||
request.Headers.TryAddWithoutValidation(snapshot.ApiKeyHeader!, snapshot.ApiKey);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogWarning(
|
||||
"Policy simulation webhook responded {StatusCode} for run {RunId}: {Body}",
|
||||
(int)response.StatusCode,
|
||||
payload.Simulation.RunId,
|
||||
body);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Policy simulation webhook failed for run {RunId}.", payload.Simulation.RunId);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user