feat: Add initial implementation of Vulnerability Resolver Jobs
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled

- Created project for StellaOps.Scanner.Analyzers.Native.Tests with necessary dependencies.
- Documented roles and guidelines in AGENTS.md for Scheduler module.
- Implemented IResolverJobService interface and InMemoryResolverJobService for handling resolver jobs.
- Added ResolverBacklogNotifier and ResolverBacklogService for monitoring job metrics.
- Developed API endpoints for managing resolver jobs and retrieving metrics.
- Defined models for resolver job requests and responses.
- Integrated dependency injection for resolver job services.
- Implemented ImpactIndexSnapshot for persisting impact index data.
- Introduced SignalsScoringOptions for configurable scoring weights in reachability scoring.
- Added unit tests for ReachabilityScoringService and RuntimeFactsIngestionService.
- Created dotnet-filter.sh script to handle command-line arguments for dotnet.
- Established nuget-prime project for managing package downloads.
This commit is contained in:
master
2025-11-18 07:52:15 +02:00
parent e69b57d467
commit 8355e2ff75
299 changed files with 13293 additions and 2444 deletions

View File

@@ -107,21 +107,78 @@ public sealed class FixtureImpactIndex : IImpactIndex
return CreateImpactSet(state, selector, Enumerable.Empty<FixtureMatch>(), usageOnly);
}
public async ValueTask<ImpactSet> ResolveAllAsync(
Selector selector,
bool usageOnly,
CancellationToken cancellationToken = default)
{
public async ValueTask<ImpactSet> ResolveAllAsync(
Selector selector,
bool usageOnly,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(selector);
var state = await EnsureInitializedAsync(cancellationToken).ConfigureAwait(false);
var matches = state.ImagesByDigest.Values
.Select(image => new FixtureMatch(image, image.UsedByEntrypoint))
.Where(match => !usageOnly || match.UsedByEntrypoint);
return CreateImpactSet(state, selector, matches, usageOnly);
}
var matches = state.ImagesByDigest.Values
.Select(image => new FixtureMatch(image, image.UsedByEntrypoint))
.Where(match => !usageOnly || match.UsedByEntrypoint);
return CreateImpactSet(state, selector, matches, usageOnly);
}
public ValueTask RemoveAsync(string imageDigest, CancellationToken cancellationToken = default)
{
// Fixture-backed index is immutable; removals are ignored.
return ValueTask.CompletedTask;
}
public async ValueTask<ImpactIndexSnapshot> CreateSnapshotAsync(CancellationToken cancellationToken = default)
{
var state = await EnsureInitializedAsync(cancellationToken).ConfigureAwait(false);
var images = state.ImagesByDigest.Values
.OrderBy(image => image.Digest, StringComparer.OrdinalIgnoreCase)
.Select((image, index) => new ImpactImageRecord(
index,
"fixture",
image.Digest,
image.Registry,
image.Repository,
image.Namespaces,
image.Tags,
image.Labels,
image.GeneratedAt,
image.Components.Select(c => c.Purl).ToImmutableArray(),
image.Components.Where(c => c.UsedByEntrypoint).Select(c => c.Purl).ToImmutableArray()))
.ToImmutableArray();
var contains = images
.SelectMany(img => img.Components.Select(purl => (purl, img.ImageId)))
.GroupBy(pair => pair.purl, StringComparer.OrdinalIgnoreCase)
.ToImmutableDictionary(
g => g.Key,
g => g.Select(p => p.ImageId).Distinct().OrderBy(id => id).ToImmutableArray(),
StringComparer.OrdinalIgnoreCase);
var usedBy = images
.SelectMany(img => img.EntrypointComponents.Select(purl => (purl, img.ImageId)))
.GroupBy(pair => pair.purl, StringComparer.OrdinalIgnoreCase)
.ToImmutableDictionary(
g => g.Key,
g => g.Select(p => p.ImageId).Distinct().OrderBy(id => id).ToImmutableArray(),
StringComparer.OrdinalIgnoreCase);
return new ImpactIndexSnapshot(
state.GeneratedAt,
state.SnapshotId,
images,
contains,
usedBy);
}
public ValueTask RestoreSnapshotAsync(ImpactIndexSnapshot snapshot, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(snapshot);
// Fixture index remains immutable; restoration is a no-op.
return ValueTask.CompletedTask;
}
private async Task<FixtureIndexState> EnsureInitializedAsync(CancellationToken cancellationToken)
{

View File

@@ -39,8 +39,29 @@ public interface IImpactIndex
/// <param name="selector">Selector scoping the query.</param>
/// <param name="usageOnly">When true, restricts results to images with entrypoint usage.</param>
/// <param name="cancellationToken">Cancellation token.</param>
ValueTask<ImpactSet> ResolveAllAsync(
Selector selector,
bool usageOnly,
CancellationToken cancellationToken = default);
}
ValueTask<ImpactSet> ResolveAllAsync(
Selector selector,
bool usageOnly,
CancellationToken cancellationToken = default);
/// <summary>
/// Removes an image digest and its component mappings from the index.
/// Used when an image is deleted or aged out.
/// </summary>
ValueTask RemoveAsync(
string imageDigest,
CancellationToken cancellationToken = default);
/// <summary>
/// Creates a compacted snapshot of the index for persistence (e.g., RocksDB/Redis).
/// </summary>
ValueTask<ImpactIndexSnapshot> CreateSnapshotAsync(
CancellationToken cancellationToken = default);
/// <summary>
/// Restores index state from a previously persisted snapshot.
/// </summary>
ValueTask RestoreSnapshotAsync(
ImpactIndexSnapshot snapshot,
CancellationToken cancellationToken = default);
}

View File

@@ -3,7 +3,7 @@ using System.Collections.Immutable;
namespace StellaOps.Scheduler.ImpactIndex;
internal sealed record ImpactImageRecord(
public sealed record ImpactImageRecord(
int ImageId,
string TenantId,
string Digest,

View File

@@ -0,0 +1,37 @@
using System.Collections.Immutable;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Scheduler.ImpactIndex;
/// <summary>
/// Serializable snapshot for persisting the ImpactIndex (e.g., RocksDB/Redis).
/// Contains compacted image IDs and per-purl bitmap membership.
/// </summary>
public sealed record ImpactIndexSnapshot(
DateTimeOffset GeneratedAt,
string SnapshotId,
ImmutableArray<ImpactImageRecord> Images,
ImmutableDictionary<string, ImmutableArray<int>> ContainsByPurl,
ImmutableDictionary<string, ImmutableArray<int>> UsedByEntrypointByPurl)
{
public static byte[] ToBytes(ImpactIndexSnapshot snapshot)
{
var options = SerializerOptions;
return JsonSerializer.SerializeToUtf8Bytes(snapshot, options);
}
public static ImpactIndexSnapshot FromBytes(ReadOnlySpan<byte> payload)
{
var options = SerializerOptions;
var snapshot = JsonSerializer.Deserialize<ImpactIndexSnapshot>(payload, options);
return snapshot ?? throw new InvalidOperationException("ImpactIndexSnapshot payload could not be deserialized.");
}
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
}

View File

@@ -23,16 +23,41 @@ public sealed class RoaringImpactIndex : IImpactIndex
private readonly Dictionary<string, int> _imageIds = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<int, ImpactImageRecord> _images = new();
private readonly Dictionary<string, RoaringBitmap> _containsByPurl = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, RoaringBitmap> _usedByEntrypointByPurl = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, RoaringBitmap> _usedByEntrypointByPurl = new(StringComparer.OrdinalIgnoreCase);
private readonly ILogger<RoaringImpactIndex> _logger;
private readonly TimeProvider _timeProvider;
private string? _snapshotId;
private readonly ILogger<RoaringImpactIndex> _logger;
private readonly TimeProvider _timeProvider;
public RoaringImpactIndex(ILogger<RoaringImpactIndex> logger, TimeProvider? timeProvider = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
}
public RoaringImpactIndex(ILogger<RoaringImpactIndex> logger, TimeProvider? timeProvider = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
}
public ValueTask RemoveAsync(string imageDigest, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(imageDigest);
lock (_gate)
{
if (!_imageIds.TryGetValue(imageDigest, out var imageId))
{
return ValueTask.CompletedTask;
}
if (_images.TryGetValue(imageId, out var record))
{
RemoveImageComponents(record);
_images.Remove(imageId);
}
_imageIds.Remove(imageDigest);
_snapshotId = null;
}
return ValueTask.CompletedTask;
}
public async Task IngestAsync(ImpactIndexIngestionRequest request, CancellationToken cancellationToken = default)
{
@@ -130,11 +155,108 @@ public sealed class RoaringImpactIndex : IImpactIndex
CancellationToken cancellationToken = default)
=> ValueTask.FromResult(CreateEmptyImpactSet(selector, usageOnly));
public ValueTask<ImpactSet> ResolveAllAsync(
Selector selector,
bool usageOnly,
CancellationToken cancellationToken = default)
=> ValueTask.FromResult(ResolveAllCore(selector, usageOnly));
public ValueTask<ImpactSet> ResolveAllAsync(
Selector selector,
bool usageOnly,
CancellationToken cancellationToken = default)
=> ValueTask.FromResult(ResolveAllCore(selector, usageOnly));
public ValueTask<ImpactIndexSnapshot> CreateSnapshotAsync(CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
lock (_gate)
{
var orderedImages = _images
.Values
.OrderBy(img => img.Digest, StringComparer.OrdinalIgnoreCase)
.ThenBy(img => img.Repository, StringComparer.OrdinalIgnoreCase)
.ToArray();
var idMap = orderedImages
.Select((image, index) => (image.ImageId, NewId: index))
.ToDictionary(tuple => tuple.ImageId, tuple => tuple.NewId);
var compactedImages = orderedImages
.Select(image => image with { ImageId = idMap[image.ImageId] })
.ToImmutableArray();
ImmutableDictionary<string, ImmutableArray<int>> CompactBitmaps(Dictionary<string, RoaringBitmap> source)
{
var builder = ImmutableDictionary.CreateBuilder<string, ImmutableArray<int>>(StringComparer.OrdinalIgnoreCase);
foreach (var (key, bitmap) in source)
{
var remapped = bitmap
.Select(id => idMap.TryGetValue(id, out var newId) ? newId : (int?)null)
.Where(id => id.HasValue)
.Select(id => id!.Value)
.Distinct()
.OrderBy(id => id)
.ToImmutableArray();
if (remapped.Length > 0)
{
builder[key] = remapped;
}
}
return builder.ToImmutable();
}
var contains = CompactBitmaps(_containsByPurl);
var usedBy = CompactBitmaps(_usedByEntrypointByPurl);
var generatedAt = orderedImages.Length == 0
? _timeProvider.GetUtcNow()
: orderedImages.Max(img => img.GeneratedAt);
var snapshotId = ComputeSnapshotId(compactedImages, contains, usedBy);
_snapshotId = snapshotId;
var snapshot = new ImpactIndexSnapshot(
generatedAt,
snapshotId,
compactedImages,
contains,
usedBy);
return ValueTask.FromResult(snapshot);
}
}
public ValueTask RestoreSnapshotAsync(ImpactIndexSnapshot snapshot, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(snapshot);
cancellationToken.ThrowIfCancellationRequested();
lock (_gate)
{
_images.Clear();
_imageIds.Clear();
_containsByPurl.Clear();
_usedByEntrypointByPurl.Clear();
foreach (var image in snapshot.Images)
{
_images[image.ImageId] = image;
_imageIds[image.Digest] = image.ImageId;
}
foreach (var kvp in snapshot.ContainsByPurl)
{
_containsByPurl[kvp.Key] = RoaringBitmap.Create(kvp.Value.ToArray());
}
foreach (var kvp in snapshot.UsedByEntrypointByPurl)
{
_usedByEntrypointByPurl[kvp.Key] = RoaringBitmap.Create(kvp.Value.ToArray());
}
_snapshotId = snapshot.SnapshotId;
}
return ValueTask.CompletedTask;
}
private ImpactSet ResolveByPurlsCore(IEnumerable<string> purls, bool usageOnly, Selector selector)
{
@@ -231,27 +353,27 @@ public sealed class RoaringImpactIndex : IImpactIndex
var generatedAt = latestGeneratedAt == DateTimeOffset.MinValue ? _timeProvider.GetUtcNow() : latestGeneratedAt;
return new ImpactSet(
selector,
images.ToImmutableArray(),
usageOnly,
generatedAt,
images.Count,
snapshotId: null,
schemaVersion: SchedulerSchemaVersions.ImpactSet);
}
return new ImpactSet(
selector,
images.ToImmutableArray(),
usageOnly,
generatedAt,
images.Count,
snapshotId: _snapshotId,
schemaVersion: SchedulerSchemaVersions.ImpactSet);
}
private ImpactSet CreateEmptyImpactSet(Selector selector, bool usageOnly)
{
return new ImpactSet(
selector,
ImmutableArray<ImpactImage>.Empty,
usageOnly,
_timeProvider.GetUtcNow(),
0,
snapshotId: null,
schemaVersion: SchedulerSchemaVersions.ImpactSet);
}
return new ImpactSet(
selector,
ImmutableArray<ImpactImage>.Empty,
usageOnly,
_timeProvider.GetUtcNow(),
0,
snapshotId: _snapshotId,
schemaVersion: SchedulerSchemaVersions.ImpactSet);
}
private static bool ImageMatchesSelector(ImpactImageRecord image, Selector selector)
{
@@ -403,22 +525,54 @@ public sealed class RoaringImpactIndex : IImpactIndex
return RoaringBitmap.Create(remaining);
}
private static bool MatchesScope(ImpactImageRecord image, Selector selector)
{
return selector.Scope switch
{
SelectorScope.AllImages => true,
private static bool MatchesScope(ImpactImageRecord image, Selector selector)
{
return selector.Scope switch
{
SelectorScope.AllImages => true,
SelectorScope.ByDigest => selector.Digests.Contains(image.Digest, StringComparer.OrdinalIgnoreCase),
SelectorScope.ByRepository => selector.Repositories.Any(repo =>
string.Equals(repo, image.Repository, StringComparison.OrdinalIgnoreCase) ||
string.Equals(repo, $"{image.Registry}/{image.Repository}", StringComparison.OrdinalIgnoreCase)),
SelectorScope.ByNamespace => !image.Namespaces.IsDefaultOrEmpty && selector.Namespaces.Any(ns => image.Namespaces.Contains(ns, StringComparer.OrdinalIgnoreCase)),
SelectorScope.ByLabels => selector.Labels.All(label =>
image.Labels.TryGetValue(label.Key, out var value) &&
(label.Values.Length == 0 || label.Values.Contains(value, StringComparer.OrdinalIgnoreCase))),
_ => true,
};
}
SelectorScope.ByLabels => selector.Labels.All(label =>
image.Labels.TryGetValue(label.Key, out var value) &&
(label.Values.Length == 0 || label.Values.Contains(value, StringComparer.OrdinalIgnoreCase))),
_ => true,
};
}
private static string ComputeSnapshotId(
ImmutableArray<ImpactImageRecord> images,
ImmutableDictionary<string, ImmutableArray<int>> contains,
ImmutableDictionary<string, ImmutableArray<int>> usedBy)
{
var builder = new StringBuilder();
foreach (var image in images.OrderBy(img => img.Digest, StringComparer.OrdinalIgnoreCase))
{
builder.Append(image.Digest).Append('|').Append(image.GeneratedAt.ToUnixTimeSeconds()).Append(';');
}
void AppendMap(ImmutableDictionary<string, ImmutableArray<int>> map)
{
foreach (var kvp in map.OrderBy(pair => pair.Key, StringComparer.OrdinalIgnoreCase))
{
builder.Append(kvp.Key).Append('=');
foreach (var id in kvp.Value)
{
builder.Append(id).Append(',');
}
builder.Append('|');
}
}
AppendMap(contains);
AppendMap(usedBy);
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(builder.ToString()));
return "snap-" + Convert.ToHexString(hash).ToLowerInvariant();
}
private static bool MatchesTagPattern(string tag, string pattern)
{

View File

@@ -71,11 +71,13 @@ internal sealed class GraphBuildExecutionService
return GraphBuildExecutionResult.Skipped(job, "transition_invalid");
}
if (!await _repository.TryReplaceAsync(running, job.Status, cancellationToken).ConfigureAwait(false))
{
_metrics.RecordGraphJobResult("build", "skipped");
return GraphBuildExecutionResult.Skipped(job, "concurrency_conflict");
}
if (!await _repository.TryReplaceAsync(running, job.Status, cancellationToken).ConfigureAwait(false))
{
_metrics.RecordGraphJobResult("build", "skipped");
return GraphBuildExecutionResult.Skipped(job, "concurrency_conflict");
}
_metrics.RecordGraphJobStart("build", running.TenantId, running.GraphSnapshotId ?? running.SbomId);
var attempt = 0;
CartographerBuildResult? lastResult = null;
@@ -114,9 +116,11 @@ internal sealed class GraphBuildExecutionService
{
var completionTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Completed, completionTime, response.GraphSnapshotId, response.ResultUri, response.Error, cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("build", "completed", completionTime - running.CreatedAt);
return GraphBuildExecutionResult.Completed(running, response.ResultUri);
}
var duration = completionTime - running.CreatedAt;
_metrics.RecordGraphJobResult("build", "completed", duration);
_metrics.RecordGraphJobCompletion("build", running.TenantId, running.GraphSnapshotId ?? running.SbomId, "completed", duration);
return GraphBuildExecutionResult.Completed(running, response.ResultUri);
}
if (response.Status == GraphJobStatus.Failed)
{
@@ -124,9 +128,11 @@ internal sealed class GraphBuildExecutionService
{
var completionTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Failed, completionTime, response.GraphSnapshotId, response.ResultUri, response.Error, cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("build", "failed", completionTime - running.CreatedAt);
return GraphBuildExecutionResult.Failed(running, response.Error);
}
var duration = completionTime - running.CreatedAt;
_metrics.RecordGraphJobResult("build", "failed", duration);
_metrics.RecordGraphJobCompletion("build", running.TenantId, running.GraphSnapshotId ?? running.SbomId, "failed", duration);
return GraphBuildExecutionResult.Failed(running, response.Error);
}
_logger.LogWarning(
"Cartographer build attempt {Attempt} failed for job {JobId}; retrying in {Delay} (reason: {Reason}).",
@@ -144,9 +150,11 @@ internal sealed class GraphBuildExecutionService
{
var completionTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Failed, completionTime, response.GraphSnapshotId, response.ResultUri, response.Error ?? "Cartographer did not complete the build.", cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("build", "failed", completionTime - running.CreatedAt);
return GraphBuildExecutionResult.Failed(running, response.Error);
}
var duration = completionTime - running.CreatedAt;
_metrics.RecordGraphJobResult("build", "failed", duration);
_metrics.RecordGraphJobCompletion("build", running.TenantId, running.GraphSnapshotId ?? running.SbomId, "failed", duration);
return GraphBuildExecutionResult.Failed(running, response.Error);
}
await Task.Delay(backoff, cancellationToken).ConfigureAwait(false);
}
@@ -170,9 +178,11 @@ internal sealed class GraphBuildExecutionService
var error = lastResult?.Error ?? lastException?.Message ?? "Cartographer build failed";
var finalTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Failed, finalTime, lastResult?.GraphSnapshotId ?? running.GraphSnapshotId, lastResult?.ResultUri, error, cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("build", "failed", finalTime - running.CreatedAt);
return GraphBuildExecutionResult.Failed(running, error);
}
var finalDuration = finalTime - running.CreatedAt;
_metrics.RecordGraphJobResult("build", "failed", finalDuration);
_metrics.RecordGraphJobCompletion("build", running.TenantId, running.GraphSnapshotId ?? running.SbomId, "failed", finalDuration);
return GraphBuildExecutionResult.Failed(running, error);
}
private async Task NotifyCompletionAsync(
GraphBuildJob job,

View File

@@ -71,11 +71,13 @@ internal sealed class GraphOverlayExecutionService
return GraphOverlayExecutionResult.Skipped(job, "transition_invalid");
}
if (!await _repository.TryReplaceOverlayAsync(running, job.Status, cancellationToken).ConfigureAwait(false))
{
_metrics.RecordGraphJobResult("overlay", "skipped");
return GraphOverlayExecutionResult.Skipped(job, "concurrency_conflict");
}
if (!await _repository.TryReplaceOverlayAsync(running, job.Status, cancellationToken).ConfigureAwait(false))
{
_metrics.RecordGraphJobResult("overlay", "skipped");
return GraphOverlayExecutionResult.Skipped(job, "concurrency_conflict");
}
_metrics.RecordGraphJobStart("overlay", running.TenantId, running.GraphSnapshotId);
var attempt = 0;
CartographerOverlayResult? lastResult = null;
@@ -96,9 +98,11 @@ internal sealed class GraphOverlayExecutionService
{
var completionTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Completed, completionTime, response.GraphSnapshotId ?? running.GraphSnapshotId, response.ResultUri, response.Error, cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("overlay", "completed", completionTime - running.CreatedAt);
return GraphOverlayExecutionResult.Completed(running, response.ResultUri);
}
var duration = completionTime - running.CreatedAt;
_metrics.RecordGraphJobResult("overlay", "completed", duration);
_metrics.RecordGraphJobCompletion("overlay", running.TenantId, running.GraphSnapshotId, "completed", duration);
return GraphOverlayExecutionResult.Completed(running, response.ResultUri);
}
if (response.Status == GraphJobStatus.Failed)
{
@@ -106,9 +110,11 @@ internal sealed class GraphOverlayExecutionService
{
var completionTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Failed, completionTime, response.GraphSnapshotId ?? running.GraphSnapshotId, response.ResultUri, response.Error, cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("overlay", "failed", completionTime - running.CreatedAt);
return GraphOverlayExecutionResult.Failed(running, response.Error);
}
var duration = completionTime - running.CreatedAt;
_metrics.RecordGraphJobResult("overlay", "failed", duration);
_metrics.RecordGraphJobCompletion("overlay", running.TenantId, running.GraphSnapshotId, "failed", duration);
return GraphOverlayExecutionResult.Failed(running, response.Error);
}
_logger.LogWarning(
"Cartographer overlay attempt {Attempt} failed for job {JobId}; retrying in {Delay} (reason: {Reason}).",
@@ -125,9 +131,11 @@ internal sealed class GraphOverlayExecutionService
{
var completionTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Failed, completionTime, response.GraphSnapshotId ?? running.GraphSnapshotId, response.ResultUri, response.Error ?? "Cartographer did not complete the overlay.", cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("overlay", "failed", completionTime - running.CreatedAt);
return GraphOverlayExecutionResult.Failed(running, response.Error);
}
var duration = completionTime - running.CreatedAt;
_metrics.RecordGraphJobResult("overlay", "failed", duration);
_metrics.RecordGraphJobCompletion("overlay", running.TenantId, running.GraphSnapshotId, "failed", duration);
return GraphOverlayExecutionResult.Failed(running, response.Error);
}
await Task.Delay(backoff, cancellationToken).ConfigureAwait(false);
}
@@ -151,9 +159,11 @@ internal sealed class GraphOverlayExecutionService
var error = lastResult?.Error ?? lastException?.Message ?? "Cartographer overlay failed";
var finalTime = _timeProvider.GetUtcNow();
await NotifyCompletionAsync(running, GraphJobStatus.Failed, finalTime, lastResult?.GraphSnapshotId ?? running.GraphSnapshotId, lastResult?.ResultUri, error, cancellationToken).ConfigureAwait(false);
_metrics.RecordGraphJobResult("overlay", "failed", finalTime - running.CreatedAt);
return GraphOverlayExecutionResult.Failed(running, error);
}
var finalDuration = finalTime - running.CreatedAt;
_metrics.RecordGraphJobResult("overlay", "failed", finalDuration);
_metrics.RecordGraphJobCompletion("overlay", running.TenantId, running.GraphSnapshotId, "failed", finalDuration);
return GraphOverlayExecutionResult.Failed(running, error);
}
private async Task NotifyCompletionAsync(
GraphOverlayJob job,

View File

@@ -23,6 +23,9 @@ public sealed class SchedulerWorkerMetrics : IDisposable
private readonly UpDownCounter<long> _runsActive;
private readonly Counter<long> _graphJobsTotal;
private readonly Histogram<double> _graphJobDurationSeconds;
private readonly UpDownCounter<long> _graphJobsInflight;
private readonly Histogram<double> _graphBuildSeconds;
private readonly Histogram<double> _overlayLagSeconds;
private readonly ConcurrentDictionary<string, long> _backlog = new(StringComparer.Ordinal);
private readonly ObservableGauge<long> _backlogGauge;
private bool _disposed;
@@ -78,6 +81,18 @@ public sealed class SchedulerWorkerMetrics : IDisposable
"scheduler_graph_job_duration_seconds",
unit: "s",
description: "Graph job durations grouped by type and result.");
_graphJobsInflight = _meter.CreateUpDownCounter<long>(
"graph_jobs_inflight",
unit: "count",
description: "Number of in-flight graph jobs grouped by type, tenant, and graph identifier.");
_graphBuildSeconds = _meter.CreateHistogram<double>(
"graph_build_seconds",
unit: "s",
description: "Wall-clock duration of Cartographer graph build jobs grouped by tenant and graph identifier.");
_overlayLagSeconds = _meter.CreateHistogram<double>(
"overlay_lag_seconds",
unit: "s",
description: "Latency between overlay job creation and completion grouped by tenant and graph identifier.");
_backlogGauge = _meter.CreateObservableGauge<long>(
"scheduler_runner_backlog",
ObserveBacklog,
@@ -85,6 +100,28 @@ public sealed class SchedulerWorkerMetrics : IDisposable
description: "Remaining images queued for runner processing grouped by mode and schedule.");
}
public void RecordGraphJobStart(string type, string tenantId, string graphId)
{
_graphJobsInflight.Add(1, GraphTags(type, tenantId, graphId));
}
public void RecordGraphJobCompletion(string type, string tenantId, string graphId, string result, TimeSpan? duration)
{
_graphJobsInflight.Add(-1, GraphTags(type, tenantId, graphId));
if (string.Equals(type, "build", StringComparison.OrdinalIgnoreCase) && duration is { } buildDuration)
{
_graphBuildSeconds.Record(Math.Max(buildDuration.TotalSeconds, 0d), GraphResultTags(type, tenantId, graphId, result));
}
if (string.Equals(type, "overlay", StringComparison.OrdinalIgnoreCase) && duration is { } lag)
{
_overlayLagSeconds.Record(Math.Max(lag.TotalSeconds, 0d), GraphResultTags(type, tenantId, graphId, result));
}
_graphJobDurationSeconds.Record(Math.Max(duration?.TotalSeconds ?? 0d, 0d), GraphResultTags(type, tenantId, graphId, result));
}
public void RecordGraphJobResult(string type, string result, TimeSpan? duration = null)
{
var tags = new[]
@@ -221,6 +258,23 @@ public sealed class SchedulerWorkerMetrics : IDisposable
}
}
private static KeyValuePair<string, object?>[] GraphTags(string type, string tenantId, string graphId)
=> new[]
{
new KeyValuePair<string, object?>("type", type),
new KeyValuePair<string, object?>("tenant", tenantId),
new KeyValuePair<string, object?>("graph_id", graphId)
};
private static KeyValuePair<string, object?>[] GraphResultTags(string type, string tenantId, string graphId, string result)
=> new[]
{
new KeyValuePair<string, object?>("type", type),
new KeyValuePair<string, object?>("tenant", tenantId),
new KeyValuePair<string, object?>("graph_id", graphId),
new KeyValuePair<string, object?>("result", result)
};
private static string BuildBacklogKey(string mode, string? scheduleId)
=> $"{mode}|{scheduleId ?? string.Empty}";