feat: Implement runner execution pipeline with planner dispatch and execution services
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Docs CI / lint-and-preview (push) Has been cancelled
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Docs CI / lint-and-preview (push) Has been cancelled
				
			- Introduced RunnerBackgroundService to handle execution of runner segments. - Added RunnerExecutionService for processing segments and aggregating results. - Implemented PlannerQueueDispatchService to manage dispatching of planner messages. - Created PlannerQueueDispatcherBackgroundService for leasing and processing planner queue messages. - Developed ScannerReportClient for interacting with the scanner service. - Enhanced observability with SchedulerWorkerMetrics for tracking planner and runner performance. - Added comprehensive documentation for the new runner execution pipeline and observability metrics. - Implemented event emission for rescan activity and scanner report readiness.
This commit is contained in:
		
							
								
								
									
										32
									
								
								src/StellaOps.Scheduler.Worker.Host/Program.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								src/StellaOps.Scheduler.Worker.Host/Program.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
| using System.Diagnostics; | ||||
| using Microsoft.Extensions.Configuration; | ||||
| using Microsoft.Extensions.DependencyInjection; | ||||
| using Microsoft.Extensions.Hosting; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Storage.Mongo; | ||||
| using StellaOps.Scheduler.Worker.DependencyInjection; | ||||
|  | ||||
| var builder = Host.CreateApplicationBuilder(args); | ||||
|  | ||||
| builder.Logging.Configure(options => | ||||
| { | ||||
|     options.ActivityTrackingOptions = ActivityTrackingOptions.TraceId | ||||
|         | ActivityTrackingOptions.SpanId | ||||
|         | ActivityTrackingOptions.ParentId; | ||||
| }); | ||||
|  | ||||
| builder.Services.AddSchedulerQueues(builder.Configuration); | ||||
|  | ||||
| var storageSection = builder.Configuration.GetSection("Scheduler:Storage"); | ||||
| if (storageSection.Exists()) | ||||
| { | ||||
|     builder.Services.AddSchedulerMongoStorage(storageSection); | ||||
| } | ||||
|  | ||||
| builder.Services.AddSchedulerWorker(builder.Configuration.GetSection("Scheduler:Worker")); | ||||
|  | ||||
| var host = builder.Build(); | ||||
| await host.RunAsync(); | ||||
|  | ||||
| public partial class Program; | ||||
| @@ -0,0 +1,16 @@ | ||||
| <Project Sdk="Microsoft.NET.Sdk"> | ||||
|   <PropertyGroup> | ||||
|     <OutputType>Exe</OutputType> | ||||
|     <TargetFramework>net10.0</TargetFramework> | ||||
|     <Nullable>enable</Nullable> | ||||
|     <ImplicitUsings>enable</ImplicitUsings> | ||||
|   </PropertyGroup> | ||||
|   <ItemGroup> | ||||
|     <FrameworkReference Include="Microsoft.AspNetCore.App" /> | ||||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <ProjectReference Include="..\StellaOps.Scheduler.Queue\StellaOps.Scheduler.Queue.csproj" /> | ||||
|     <ProjectReference Include="..\StellaOps.Scheduler.Storage.Mongo\StellaOps.Scheduler.Storage.Mongo.csproj" /> | ||||
|     <ProjectReference Include="..\StellaOps.Scheduler.Worker\StellaOps.Scheduler.Worker.csproj" /> | ||||
|   </ItemGroup> | ||||
| </Project> | ||||
| @@ -1,5 +1,4 @@ | ||||
| global using System.Collections.Immutable; | ||||
| global using Moq; | ||||
| global using StellaOps.Scheduler.ImpactIndex; | ||||
| global using StellaOps.Scheduler.Models; | ||||
| global using StellaOps.Scheduler.Worker; | ||||
|   | ||||
| @@ -0,0 +1,120 @@ | ||||
| using System.Collections.Generic; | ||||
| using System.Net; | ||||
| using System.Net.Http; | ||||
| using System.Net.Http.Json; | ||||
| using System.Text.Json; | ||||
| using Microsoft.Extensions.Logging.Abstractions; | ||||
| using Microsoft.Extensions.Options; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Worker.Execution; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Tests; | ||||
|  | ||||
| public sealed class HttpScannerReportClientTests | ||||
| { | ||||
|     [Fact] | ||||
|     public async Task ExecuteAsync_WhenReportReturnsFindings_ProducesDeltaSummary() | ||||
|     { | ||||
|         var handler = new StubHttpMessageHandler(request => | ||||
|         { | ||||
|             if (request.RequestUri?.AbsolutePath.EndsWith("/api/v1/reports", StringComparison.OrdinalIgnoreCase) == true) | ||||
|             { | ||||
|                 var payload = new | ||||
|                 { | ||||
|                     report = new | ||||
|                     { | ||||
|                         reportId = "report-123", | ||||
|                         imageDigest = "sha256:abc", | ||||
|                         generatedAt = DateTimeOffset.UtcNow, | ||||
|                         verdict = "warn", | ||||
|                         policy = new { revisionId = "rev-1", digest = "digest-1" }, | ||||
|                         summary = new { total = 3, blocked = 2, warned = 1, ignored = 0, quieted = 0 } | ||||
|                     }, | ||||
|                     dsse = new | ||||
|                     { | ||||
|                         payloadType = "application/vnd.in-toto+json", | ||||
|                         payload = "eyJkYXRhIjoidGVzdCJ9", | ||||
|                         signatures = new[] { new { keyId = "test", algorithm = "ed25519", signature = "c2ln" } } | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK) | ||||
|                 { | ||||
|                     Content = JsonContent.Create(payload) | ||||
|                 }); | ||||
|             } | ||||
|  | ||||
|             return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)); | ||||
|         }); | ||||
|  | ||||
|         var httpClient = new HttpClient(handler) | ||||
|         { | ||||
|             BaseAddress = new Uri("https://scanner.example") | ||||
|         }; | ||||
|  | ||||
|         var options = Microsoft.Extensions.Options.Options.Create(new SchedulerWorkerOptions()); | ||||
|         options.Value.Runner.Scanner.BaseAddress = httpClient.BaseAddress; | ||||
|         options.Value.Runner.Scanner.EnableContentRefresh = false; | ||||
|  | ||||
|         var client = new HttpScannerReportClient(httpClient, options, NullLogger<HttpScannerReportClient>.Instance); | ||||
|  | ||||
|         var result = await client.ExecuteAsync( | ||||
|             new ScannerReportRequest("tenant-1", "run-1", "sha256:abc", ScheduleMode.AnalysisOnly, true, new Dictionary<string, string>()), | ||||
|             CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal("sha256:abc", result.ImageDigest); | ||||
|         Assert.NotNull(result.Delta); | ||||
|         Assert.Equal(3, result.Delta!.NewFindings); | ||||
|         Assert.Equal(2, result.Delta.NewCriticals); | ||||
|         Assert.Equal(1, result.Delta.NewHigh); | ||||
|         Assert.Equal(0, result.Delta.NewMedium); | ||||
|         Assert.Equal(0, result.Delta.NewLow); | ||||
|         Assert.Equal("report-123", result.Report.ReportId); | ||||
|         Assert.Equal("rev-1", result.Report.PolicyRevisionId); | ||||
|         Assert.NotNull(result.Dsse); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
|     public async Task ExecuteAsync_WhenReportFails_RetriesAndThrows() | ||||
|     { | ||||
|         var callCount = 0; | ||||
|         var handler = new StubHttpMessageHandler(_ => | ||||
|         { | ||||
|             callCount++; | ||||
|             return Task.FromResult(new HttpResponseMessage(HttpStatusCode.InternalServerError)); | ||||
|         }); | ||||
|  | ||||
|         var httpClient = new HttpClient(handler) | ||||
|         { | ||||
|             BaseAddress = new Uri("https://scanner.example") | ||||
|         }; | ||||
|  | ||||
|         var options = Microsoft.Extensions.Options.Options.Create(new SchedulerWorkerOptions()); | ||||
|         options.Value.Runner.Scanner.BaseAddress = httpClient.BaseAddress; | ||||
|         options.Value.Runner.Scanner.EnableContentRefresh = false; | ||||
|         options.Value.Runner.Scanner.MaxRetryAttempts = 2; | ||||
|         options.Value.Runner.Scanner.RetryBaseDelay = TimeSpan.FromMilliseconds(1); | ||||
|  | ||||
|         var client = new HttpScannerReportClient(httpClient, options, NullLogger<HttpScannerReportClient>.Instance); | ||||
|  | ||||
|         await Assert.ThrowsAsync<HttpRequestException>(() => client.ExecuteAsync( | ||||
|             new ScannerReportRequest("tenant-1", "run-1", "sha256:abc", ScheduleMode.AnalysisOnly, true, new Dictionary<string, string>()), | ||||
|             CancellationToken.None)); | ||||
|  | ||||
|         Assert.Equal(3, callCount); | ||||
|     } | ||||
|  | ||||
|     private sealed class StubHttpMessageHandler : HttpMessageHandler | ||||
|     { | ||||
|         private readonly Func<HttpRequestMessage, Task<HttpResponseMessage>> _handler; | ||||
|  | ||||
|         public StubHttpMessageHandler(Func<HttpRequestMessage, Task<HttpResponseMessage>> handler) | ||||
|         { | ||||
|             _handler = handler; | ||||
|         } | ||||
|  | ||||
|         protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) | ||||
|             => _handler(request); | ||||
|     } | ||||
| } | ||||
| @@ -1,4 +1,9 @@ | ||||
| using System.Collections.Generic; | ||||
| using System.Collections.Immutable; | ||||
| using System.Linq; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using StellaOps.Scheduler.ImpactIndex; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Tests; | ||||
|  | ||||
| @@ -9,17 +14,20 @@ public sealed class ImpactTargetingServiceTests | ||||
|     { | ||||
|         var selector = new Selector(SelectorScope.AllImages, tenantId: "tenant-alpha"); | ||||
|         var expected = CreateEmptyImpactSet(selector, usageOnly: false); | ||||
|         IEnumerable<string>? capturedKeys = null; | ||||
|  | ||||
|         var mockIndex = new Mock<IImpactIndex>(MockBehavior.Strict); | ||||
|         mockIndex | ||||
|             .Setup(index => index.ResolveByPurlsAsync( | ||||
|                 It.Is<IEnumerable<string>>(keys => keys.SequenceEqual(new[] { "pkg:npm/a", "pkg:npm/b" })), | ||||
|                 false, | ||||
|                 selector, | ||||
|                 It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(expected); | ||||
|         var index = new StubImpactIndex | ||||
|         { | ||||
|             OnResolveByPurls = (purls, usageOnly, sel, _) => | ||||
|             { | ||||
|                 capturedKeys = purls.ToArray(); | ||||
|                 Assert.False(usageOnly); | ||||
|                 Assert.Equal(selector, sel); | ||||
|                 return ValueTask.FromResult(expected); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         var service = new ImpactTargetingService(mockIndex.Object); | ||||
|         var service = new ImpactTargetingService(index); | ||||
|  | ||||
|         var result = await service.ResolveByPurlsAsync( | ||||
|             new[] { "pkg:npm/a", "pkg:npm/A ", null!, "pkg:npm/b" }, | ||||
| @@ -27,21 +35,21 @@ public sealed class ImpactTargetingServiceTests | ||||
|             selector); | ||||
|  | ||||
|         Assert.Equal(expected, result); | ||||
|         mockIndex.VerifyAll(); | ||||
|         Assert.Equal(new[] { "pkg:npm/a", "pkg:npm/b" }, capturedKeys); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
|     public async Task ResolveByVulnerabilitiesAsync_ReturnsEmptyWhenNoIds() | ||||
|     { | ||||
|         var selector = new Selector(SelectorScope.AllImages, tenantId: "tenant-alpha"); | ||||
|         var mockIndex = new Mock<IImpactIndex>(MockBehavior.Strict); | ||||
|         var service = new ImpactTargetingService(mockIndex.Object); | ||||
|         var index = new StubImpactIndex(); | ||||
|         var service = new ImpactTargetingService(index); | ||||
|  | ||||
|         var result = await service.ResolveByVulnerabilitiesAsync(Array.Empty<string>(), usageOnly: true, selector); | ||||
|  | ||||
|         Assert.Empty(result.Images); | ||||
|         Assert.True(result.UsageOnly); | ||||
|         mockIndex.Verify(index => index.ResolveByVulnerabilitiesAsync(It.IsAny<IEnumerable<string>>(), It.IsAny<bool>(), It.IsAny<Selector>(), It.IsAny<CancellationToken>()), Times.Never); | ||||
|         Assert.Null(index.LastVulnerabilityIds); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
| @@ -50,16 +58,20 @@ public sealed class ImpactTargetingServiceTests | ||||
|         var selector = new Selector(SelectorScope.AllImages, tenantId: "tenant-alpha"); | ||||
|         var expected = CreateEmptyImpactSet(selector, usageOnly: true); | ||||
|  | ||||
|         var mockIndex = new Mock<IImpactIndex>(); | ||||
|         mockIndex | ||||
|             .Setup(index => index.ResolveAllAsync(selector, true, It.IsAny<CancellationToken>())) | ||||
|             .Returns(new ValueTask<ImpactSet>(expected)); | ||||
|         var index = new StubImpactIndex | ||||
|         { | ||||
|             OnResolveAll = (sel, usageOnly, _) => | ||||
|             { | ||||
|                 Assert.Equal(selector, sel); | ||||
|                 Assert.True(usageOnly); | ||||
|                 return ValueTask.FromResult(expected); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         var service = new ImpactTargetingService(mockIndex.Object); | ||||
|         var service = new ImpactTargetingService(index); | ||||
|         var result = await service.ResolveAllAsync(selector, usageOnly: true); | ||||
|  | ||||
|         Assert.Equal(expected, result); | ||||
|         mockIndex.VerifyAll(); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
| @@ -77,10 +89,7 @@ public sealed class ImpactTargetingServiceTests | ||||
|                     namespaces: new[] { "team-a" }, | ||||
|                     tags: new[] { "v1" }, | ||||
|                     usedByEntrypoint: false, | ||||
|                     labels: new[] | ||||
|                     { | ||||
|                         KeyValuePair.Create("env", "prod") | ||||
|                     }), | ||||
|                     labels: new[] { KeyValuePair.Create("env", "prod") }), | ||||
|                 new ImpactImage( | ||||
|                     "sha256:111", | ||||
|                     "registry-1", | ||||
| @@ -100,17 +109,12 @@ public sealed class ImpactTargetingServiceTests | ||||
|             snapshotId: "snap-1", | ||||
|             schemaVersion: SchedulerSchemaVersions.ImpactSet); | ||||
|  | ||||
|         var mockIndex = new Mock<IImpactIndex>(MockBehavior.Strict); | ||||
|         mockIndex | ||||
|             .Setup(index => index.ResolveByPurlsAsync( | ||||
|                 It.IsAny<IEnumerable<string>>(), | ||||
|                 false, | ||||
|                 selector, | ||||
|                 It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(indexResult); | ||||
|  | ||||
|         var service = new ImpactTargetingService(mockIndex.Object); | ||||
|         var index = new StubImpactIndex | ||||
|         { | ||||
|             OnResolveByPurls = (_, _, _, _) => ValueTask.FromResult(indexResult) | ||||
|         }; | ||||
|  | ||||
|         var service = new ImpactTargetingService(index); | ||||
|         var result = await service.ResolveByPurlsAsync(new[] { "pkg:npm/a" }, usageOnly: false, selector); | ||||
|  | ||||
|         Assert.Single(result.Images); | ||||
| @@ -163,16 +167,12 @@ public sealed class ImpactTargetingServiceTests | ||||
|             snapshotId: null, | ||||
|             schemaVersion: SchedulerSchemaVersions.ImpactSet); | ||||
|  | ||||
|         var mockIndex = new Mock<IImpactIndex>(MockBehavior.Strict); | ||||
|         mockIndex | ||||
|             .Setup(index => index.ResolveByPurlsAsync( | ||||
|                 It.IsAny<IEnumerable<string>>(), | ||||
|                 true, | ||||
|                 selector, | ||||
|                 It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(indexResult); | ||||
|         var index = new StubImpactIndex | ||||
|         { | ||||
|             OnResolveByPurls = (_, _, _, _) => ValueTask.FromResult(indexResult) | ||||
|         }; | ||||
|  | ||||
|         var service = new ImpactTargetingService(mockIndex.Object); | ||||
|         var service = new ImpactTargetingService(index); | ||||
|         var result = await service.ResolveByPurlsAsync(new[] { "pkg:npm/a" }, usageOnly: true, selector); | ||||
|  | ||||
|         Assert.Single(result.Images); | ||||
| @@ -180,8 +180,7 @@ public sealed class ImpactTargetingServiceTests | ||||
|     } | ||||
|  | ||||
|     private static ImpactSet CreateEmptyImpactSet(Selector selector, bool usageOnly) | ||||
|     { | ||||
|         return new ImpactSet( | ||||
|         => new( | ||||
|             selector, | ||||
|             ImmutableArray<ImpactImage>.Empty, | ||||
|             usageOnly, | ||||
| @@ -189,5 +188,30 @@ public sealed class ImpactTargetingServiceTests | ||||
|             0, | ||||
|             snapshotId: null, | ||||
|             schemaVersion: SchedulerSchemaVersions.ImpactSet); | ||||
|  | ||||
|     private sealed class StubImpactIndex : IImpactIndex | ||||
|     { | ||||
|         public Func<IEnumerable<string>, bool, Selector, CancellationToken, ValueTask<ImpactSet>>? OnResolveByPurls { get; set; } | ||||
|  | ||||
|         public Func<IEnumerable<string>, bool, Selector, CancellationToken, ValueTask<ImpactSet>>? OnResolveByVulnerabilities { get; set; } | ||||
|  | ||||
|         public Func<Selector, bool, CancellationToken, ValueTask<ImpactSet>>? OnResolveAll { get; set; } | ||||
|  | ||||
|         public IEnumerable<string>? LastVulnerabilityIds { get; private set; } | ||||
|  | ||||
|         public ValueTask<ImpactSet> ResolveByPurlsAsync(IEnumerable<string> purls, bool usageOnly, Selector selector, CancellationToken cancellationToken = default) | ||||
|             => OnResolveByPurls?.Invoke(purls, usageOnly, selector, cancellationToken) | ||||
|                 ?? ValueTask.FromResult(CreateEmptyImpactSet(selector, usageOnly)); | ||||
|  | ||||
|         public ValueTask<ImpactSet> ResolveByVulnerabilitiesAsync(IEnumerable<string> vulnerabilityIds, bool usageOnly, Selector selector, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             LastVulnerabilityIds = vulnerabilityIds; | ||||
|             return OnResolveByVulnerabilities?.Invoke(vulnerabilityIds, usageOnly, selector, cancellationToken) | ||||
|                 ?? ValueTask.FromResult(CreateEmptyImpactSet(selector, usageOnly)); | ||||
|         } | ||||
|  | ||||
|         public ValueTask<ImpactSet> ResolveAllAsync(Selector selector, bool usageOnly, CancellationToken cancellationToken = default) | ||||
|             => OnResolveAll?.Invoke(selector, usageOnly, cancellationToken) | ||||
|                 ?? ValueTask.FromResult(CreateEmptyImpactSet(selector, usageOnly)); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,11 +1,16 @@ | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Projections; | ||||
| using System.Collections.Concurrent; | ||||
| using System.Collections.Generic; | ||||
| using System.Collections.Immutable; | ||||
| using MongoDB.Driver; | ||||
| using Microsoft.Extensions.Logging.Abstractions; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Projections; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Repositories; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Services; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
| using StellaOps.Scheduler.Worker.Planning; | ||||
| using StellaOps.Scheduler.Worker.Observability; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Tests; | ||||
|  | ||||
| @@ -18,56 +23,34 @@ public sealed class PlannerExecutionServiceTests | ||||
|         var run = CreateRun(schedule.Id); | ||||
|         var impactSet = CreateImpactSet(schedule.Selection, images: 2); | ||||
|  | ||||
|         var scheduleRepository = new Mock<IScheduleRepository>(); | ||||
|         scheduleRepository | ||||
|             .Setup(repo => repo.GetAsync(run.TenantId, run.ScheduleId!, null, It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(schedule); | ||||
|         var scheduleRepository = new StubScheduleRepository(schedule); | ||||
|         var runRepository = new InMemoryRunRepository(run); | ||||
|         var snapshotRepository = new RecordingImpactSnapshotRepository(); | ||||
|         var runSummaryService = new RecordingRunSummaryService(); | ||||
|         var targetingService = new StubImpactTargetingService(impactSet); | ||||
|         var plannerQueue = new RecordingPlannerQueue(); | ||||
|  | ||||
|         var runRepository = new Mock<IRunRepository>(); | ||||
|         runRepository | ||||
|             .Setup(repo => repo.UpdateAsync(It.IsAny<Run>(), null, It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(true); | ||||
|         using var metrics = new SchedulerWorkerMetrics(); | ||||
|  | ||||
|         var snapshotRepository = new Mock<IImpactSnapshotRepository>(); | ||||
|         snapshotRepository | ||||
|             .Setup(repo => repo.UpsertAsync(It.IsAny<ImpactSet>(), null, It.IsAny<CancellationToken>())) | ||||
|             .Returns(Task.CompletedTask); | ||||
|  | ||||
|         var runSummaryService = new Mock<IRunSummaryService>(); | ||||
|         runSummaryService | ||||
|             .Setup(service => service.ProjectAsync(It.IsAny<Run>(), It.IsAny<CancellationToken>())) | ||||
|             .Returns(Task.FromResult(default(RunSummaryProjection)!)); | ||||
|  | ||||
|         var targetingService = new Mock<IImpactTargetingService>(); | ||||
|         targetingService | ||||
|             .Setup(service => service.ResolveAllAsync(schedule.Selection, true, It.IsAny<CancellationToken>())) | ||||
|             .Returns(new ValueTask<ImpactSet>(impactSet)); | ||||
|  | ||||
|         var plannerQueue = new Mock<ISchedulerPlannerQueue>(); | ||||
|         plannerQueue | ||||
|             .Setup(queue => queue.EnqueueAsync(It.IsAny<PlannerQueueMessage>(), It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(new SchedulerQueueEnqueueResult("msg-1", false)); | ||||
|  | ||||
|         var options = new SchedulerWorkerOptions(); | ||||
|         var service = new PlannerExecutionService( | ||||
|             scheduleRepository.Object, | ||||
|             runRepository.Object, | ||||
|             snapshotRepository.Object, | ||||
|             runSummaryService.Object, | ||||
|             targetingService.Object, | ||||
|             plannerQueue.Object, | ||||
|             options, | ||||
|             scheduleRepository, | ||||
|             runRepository, | ||||
|             snapshotRepository, | ||||
|             runSummaryService, | ||||
|             targetingService, | ||||
|             plannerQueue, | ||||
|             new SchedulerWorkerOptions(), | ||||
|             TimeProvider.System, | ||||
|             CreateLogger()); | ||||
|             metrics, | ||||
|             NullLogger<PlannerExecutionService>.Instance); | ||||
|  | ||||
|         var result = await service.ProcessAsync(run, CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal(PlannerExecutionStatus.Enqueued, result.Status); | ||||
|         Assert.NotNull(result.UpdatedRun); | ||||
|         Assert.Single(plannerQueue.Messages); | ||||
|         Assert.NotNull(snapshotRepository.LastSnapshot); | ||||
|         Assert.Equal(RunState.Queued, result.UpdatedRun!.State); | ||||
|         Assert.Equal(impactSet.Images.Length, result.UpdatedRun.Stats.Queued); | ||||
|         plannerQueue.Verify(queue => queue.EnqueueAsync(It.IsAny<PlannerQueueMessage>(), It.IsAny<CancellationToken>()), Times.Once); | ||||
|         snapshotRepository.Verify(repo => repo.UpsertAsync(It.IsAny<ImpactSet>(), null, It.IsAny<CancellationToken>()), Times.Once); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
| @@ -77,96 +60,75 @@ public sealed class PlannerExecutionServiceTests | ||||
|         var run = CreateRun(schedule.Id); | ||||
|         var impactSet = CreateImpactSet(schedule.Selection, images: 0); | ||||
|  | ||||
|         var scheduleRepository = new Mock<IScheduleRepository>(); | ||||
|         scheduleRepository | ||||
|             .Setup(repo => repo.GetAsync(run.TenantId, run.ScheduleId!, null, It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(schedule); | ||||
|  | ||||
|         var runRepository = new Mock<IRunRepository>(); | ||||
|         runRepository | ||||
|             .Setup(repo => repo.UpdateAsync(It.IsAny<Run>(), null, It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(true); | ||||
|  | ||||
|         var snapshotRepository = new Mock<IImpactSnapshotRepository>(); | ||||
|         var runSummaryService = new Mock<IRunSummaryService>(); | ||||
|         runSummaryService | ||||
|             .Setup(service => service.ProjectAsync(It.IsAny<Run>(), It.IsAny<CancellationToken>())) | ||||
|             .Returns(Task.FromResult(default(RunSummaryProjection)!)); | ||||
|  | ||||
|         var targetingService = new Mock<IImpactTargetingService>(); | ||||
|         targetingService | ||||
|             .Setup(service => service.ResolveAllAsync(schedule.Selection, true, It.IsAny<CancellationToken>())) | ||||
|             .Returns(new ValueTask<ImpactSet>(impactSet)); | ||||
|  | ||||
|         var plannerQueue = new Mock<ISchedulerPlannerQueue>(); | ||||
|         var options = new SchedulerWorkerOptions(); | ||||
|  | ||||
|         var service = new PlannerExecutionService( | ||||
|             scheduleRepository.Object, | ||||
|             runRepository.Object, | ||||
|             snapshotRepository.Object, | ||||
|             runSummaryService.Object, | ||||
|             targetingService.Object, | ||||
|             plannerQueue.Object, | ||||
|             options, | ||||
|             TimeProvider.System, | ||||
|             CreateLogger()); | ||||
|         var service = CreateService(schedule, run, impactSet, out var plannerQueue); | ||||
|  | ||||
|         var result = await service.ProcessAsync(run, CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal(PlannerExecutionStatus.CompletedWithoutWork, result.Status); | ||||
|         Assert.NotNull(result.UpdatedRun); | ||||
|         Assert.Equal(RunState.Completed, result.UpdatedRun!.State); | ||||
|         plannerQueue.Verify(queue => queue.EnqueueAsync(It.IsAny<PlannerQueueMessage>(), It.IsAny<CancellationToken>()), Times.Never); | ||||
|         Assert.Empty(plannerQueue.Messages); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
|     public async Task ProcessAsync_WhenScheduleMissing_MarksRunAsFailed() | ||||
|     { | ||||
|         var run = CreateRun(scheduleId: "missing"); | ||||
|         var scheduleRepository = new StubScheduleRepository(); // empty repository | ||||
|         var runRepository = new InMemoryRunRepository(run); | ||||
|         var snapshotRepository = new RecordingImpactSnapshotRepository(); | ||||
|         var runSummaryService = new RecordingRunSummaryService(); | ||||
|         var targetingService = new StubImpactTargetingService(CreateImpactSet(new Selector(SelectorScope.AllImages, run.TenantId), 0)); | ||||
|         var plannerQueue = new RecordingPlannerQueue(); | ||||
|  | ||||
|         var scheduleRepository = new Mock<IScheduleRepository>(); | ||||
|         scheduleRepository | ||||
|             .Setup(repo => repo.GetAsync(run.TenantId, run.ScheduleId!, null, It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync((Schedule?)null); | ||||
|  | ||||
|         var runRepository = new Mock<IRunRepository>(); | ||||
|         runRepository | ||||
|             .Setup(repo => repo.UpdateAsync(It.IsAny<Run>(), null, It.IsAny<CancellationToken>())) | ||||
|             .ReturnsAsync(true); | ||||
|  | ||||
|         var snapshotRepository = new Mock<IImpactSnapshotRepository>(); | ||||
|         var runSummaryService = new Mock<IRunSummaryService>(); | ||||
|         runSummaryService | ||||
|             .Setup(service => service.ProjectAsync(It.IsAny<Run>(), It.IsAny<CancellationToken>())) | ||||
|             .Returns(Task.FromResult(default(RunSummaryProjection)!)); | ||||
|  | ||||
|         var targetingService = new Mock<IImpactTargetingService>(); | ||||
|         var plannerQueue = new Mock<ISchedulerPlannerQueue>(); | ||||
|         using var metrics = new SchedulerWorkerMetrics(); | ||||
|  | ||||
|         var service = new PlannerExecutionService( | ||||
|             scheduleRepository.Object, | ||||
|             runRepository.Object, | ||||
|             snapshotRepository.Object, | ||||
|             runSummaryService.Object, | ||||
|             targetingService.Object, | ||||
|             plannerQueue.Object, | ||||
|             scheduleRepository, | ||||
|             runRepository, | ||||
|             snapshotRepository, | ||||
|             runSummaryService, | ||||
|             targetingService, | ||||
|             plannerQueue, | ||||
|             new SchedulerWorkerOptions(), | ||||
|             TimeProvider.System, | ||||
|             CreateLogger()); | ||||
|             metrics, | ||||
|             NullLogger<PlannerExecutionService>.Instance); | ||||
|  | ||||
|         var result = await service.ProcessAsync(run, CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal(PlannerExecutionStatus.Failed, result.Status); | ||||
|         Assert.NotNull(result.UpdatedRun); | ||||
|         Assert.Equal(RunState.Error, result.UpdatedRun!.State); | ||||
|         targetingService.Verify(service => service.ResolveAllAsync(It.IsAny<Selector>(), It.IsAny<bool>(), It.IsAny<CancellationToken>()), Times.Never); | ||||
|         plannerQueue.Verify(queue => queue.EnqueueAsync(It.IsAny<PlannerQueueMessage>(), It.IsAny<CancellationToken>()), Times.Never); | ||||
|         Assert.Empty(plannerQueue.Messages); | ||||
|     } | ||||
|  | ||||
|     private static PlannerExecutionService CreateService( | ||||
|         Schedule schedule, | ||||
|         Run run, | ||||
|         ImpactSet impactSet, | ||||
|         out RecordingPlannerQueue plannerQueue) | ||||
|     { | ||||
|         var scheduleRepository = new StubScheduleRepository(schedule); | ||||
|         var runRepository = new InMemoryRunRepository(run); | ||||
|         var snapshotRepository = new RecordingImpactSnapshotRepository(); | ||||
|         var runSummaryService = new RecordingRunSummaryService(); | ||||
|         var targetingService = new StubImpactTargetingService(impactSet); | ||||
|         plannerQueue = new RecordingPlannerQueue(); | ||||
|  | ||||
|         return new PlannerExecutionService( | ||||
|             scheduleRepository, | ||||
|             runRepository, | ||||
|             snapshotRepository, | ||||
|             runSummaryService, | ||||
|             targetingService, | ||||
|             plannerQueue, | ||||
|             new SchedulerWorkerOptions(), | ||||
|             TimeProvider.System, | ||||
|             new SchedulerWorkerMetrics(), | ||||
|             NullLogger<PlannerExecutionService>.Instance); | ||||
|     } | ||||
|  | ||||
|     private static Run CreateRun(string scheduleId) | ||||
|     { | ||||
|         return new Run( | ||||
|         => new( | ||||
|             id: "run_001", | ||||
|             tenantId: "tenant-alpha", | ||||
|             trigger: RunTrigger.Cron, | ||||
| @@ -174,11 +136,9 @@ public sealed class PlannerExecutionServiceTests | ||||
|             stats: RunStats.Empty, | ||||
|             createdAt: DateTimeOffset.UtcNow.AddMinutes(-5), | ||||
|             scheduleId: scheduleId); | ||||
|     } | ||||
|  | ||||
|     private static Schedule CreateSchedule() | ||||
|     { | ||||
|         return new Schedule( | ||||
|         => new( | ||||
|             id: "sch_001", | ||||
|             tenantId: "tenant-alpha", | ||||
|             name: "Nightly", | ||||
| @@ -195,7 +155,6 @@ public sealed class PlannerExecutionServiceTests | ||||
|             updatedAt: DateTimeOffset.UtcNow.AddHours(-1), | ||||
|             updatedBy: "system", | ||||
|             subscribers: ImmutableArray<string>.Empty); | ||||
|     } | ||||
|  | ||||
|     private static ImpactSet CreateImpactSet(Selector selector, int images) | ||||
|     { | ||||
| @@ -219,8 +178,144 @@ public sealed class PlannerExecutionServiceTests | ||||
|             schemaVersion: SchedulerSchemaVersions.ImpactSet); | ||||
|     } | ||||
|  | ||||
|     private static ILogger<PlannerExecutionService> CreateLogger() | ||||
|     private sealed class StubScheduleRepository : IScheduleRepository | ||||
|     { | ||||
|         return LoggerFactory.Create(builder => { }).CreateLogger<PlannerExecutionService>(); | ||||
|         private readonly Dictionary<(string TenantId, string ScheduleId), Schedule> _store; | ||||
|  | ||||
|         public StubScheduleRepository(params Schedule[] schedules) | ||||
|         { | ||||
|             _store = schedules.ToDictionary(schedule => (schedule.TenantId, schedule.Id), schedule => schedule); | ||||
|         } | ||||
|  | ||||
|         public Task UpsertAsync(Schedule schedule, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _store[(schedule.TenantId, schedule.Id)] = schedule; | ||||
|             return Task.CompletedTask; | ||||
|         } | ||||
|  | ||||
|         public Task<Schedule?> GetAsync(string tenantId, string scheduleId, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _store.TryGetValue((tenantId, scheduleId), out var schedule); | ||||
|             return Task.FromResult(schedule); | ||||
|         } | ||||
|  | ||||
|         public Task<IReadOnlyList<Schedule>> ListAsync(string tenantId, ScheduleQueryOptions? options = null, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<IReadOnlyList<Schedule>>(_store.Values.Where(schedule => schedule.TenantId == tenantId).ToArray()); | ||||
|  | ||||
|         public Task<bool> SoftDeleteAsync(string tenantId, string scheduleId, string deletedBy, DateTimeOffset deletedAt, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult(_store.Remove((tenantId, scheduleId))); | ||||
|     } | ||||
|  | ||||
|     private sealed class InMemoryRunRepository : IRunRepository | ||||
|     { | ||||
|         private readonly ConcurrentDictionary<(string Tenant, string RunId), Run> _runs = new(); | ||||
|  | ||||
|         public InMemoryRunRepository(params Run[] runs) | ||||
|         { | ||||
|             foreach (var run in runs) | ||||
|             { | ||||
|                 _runs[(run.TenantId, run.Id)] = run; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         public Task InsertAsync(Run run, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _runs[(run.TenantId, run.Id)] = run; | ||||
|             return Task.CompletedTask; | ||||
|         } | ||||
|  | ||||
|         public Task<bool> UpdateAsync(Run run, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _runs[(run.TenantId, run.Id)] = run; | ||||
|             return Task.FromResult(true); | ||||
|         } | ||||
|  | ||||
|         public Task<Run?> GetAsync(string tenantId, string runId, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _runs.TryGetValue((tenantId, runId), out var run); | ||||
|             return Task.FromResult(run); | ||||
|         } | ||||
|  | ||||
|         public Task<IReadOnlyList<Run>> ListAsync(string tenantId, RunQueryOptions? options = null, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<IReadOnlyList<Run>>(_runs.Values.Where(run => run.TenantId == tenantId).ToArray()); | ||||
|  | ||||
|         public Task<IReadOnlyList<Run>> ListByStateAsync(RunState state, int limit = 50, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<IReadOnlyList<Run>>(_runs.Values.Where(run => run.State == state).Take(limit).ToArray()); | ||||
|     } | ||||
|  | ||||
|     private sealed class RecordingImpactSnapshotRepository : IImpactSnapshotRepository | ||||
|     { | ||||
|         public ImpactSet? LastSnapshot { get; private set; } | ||||
|  | ||||
|         public Task UpsertAsync(ImpactSet snapshot, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             LastSnapshot = snapshot; | ||||
|             return Task.CompletedTask; | ||||
|         } | ||||
|  | ||||
|         public Task<ImpactSet?> GetBySnapshotIdAsync(string snapshotId, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<ImpactSet?>(null); | ||||
|  | ||||
|         public Task<ImpactSet?> GetLatestBySelectorAsync(Selector selector, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<ImpactSet?>(null); | ||||
|     } | ||||
|  | ||||
|     private sealed class RecordingRunSummaryService : IRunSummaryService | ||||
|     { | ||||
|         public Run? LastRun { get; private set; } | ||||
|  | ||||
|         public Task<RunSummaryProjection> ProjectAsync(Run run, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             LastRun = run; | ||||
|             return Task.FromResult(new RunSummaryProjection( | ||||
|                 run.TenantId, | ||||
|                 run.ScheduleId ?? string.Empty, | ||||
|                 DateTimeOffset.UtcNow, | ||||
|                 null, | ||||
|                 ImmutableArray<RunSummarySnapshot>.Empty, | ||||
|                 new RunSummaryCounters(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); | ||||
|         } | ||||
|  | ||||
|         public Task<RunSummaryProjection?> GetAsync(string tenantId, string scheduleId, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<RunSummaryProjection?>(null); | ||||
|  | ||||
|         public Task<IReadOnlyList<RunSummaryProjection>> ListAsync(string tenantId, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<IReadOnlyList<RunSummaryProjection>>(Array.Empty<RunSummaryProjection>()); | ||||
|     } | ||||
|  | ||||
|     private sealed class StubImpactTargetingService : IImpactTargetingService | ||||
|     { | ||||
|         private readonly ImpactSet _result; | ||||
|  | ||||
|         public StubImpactTargetingService(ImpactSet result) | ||||
|         { | ||||
|             _result = result; | ||||
|         } | ||||
|  | ||||
|         public ValueTask<ImpactSet> ResolveByPurlsAsync(IEnumerable<string> productKeys, bool usageOnly, Selector selector, CancellationToken cancellationToken = default) | ||||
|             => new(_result); | ||||
|  | ||||
|         public ValueTask<ImpactSet> ResolveByVulnerabilitiesAsync(IEnumerable<string> vulnerabilityIds, bool usageOnly, Selector selector, CancellationToken cancellationToken = default) | ||||
|             => new(_result); | ||||
|  | ||||
|         public ValueTask<ImpactSet> ResolveAllAsync(Selector selector, bool usageOnly, CancellationToken cancellationToken = default) | ||||
|             => new(_result); | ||||
|     } | ||||
|  | ||||
|     private sealed class RecordingPlannerQueue : ISchedulerPlannerQueue | ||||
|     { | ||||
|         public List<PlannerQueueMessage> Messages { get; } = new(); | ||||
|  | ||||
|         public ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(PlannerQueueMessage message, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             Messages.Add(message); | ||||
|             return ValueTask.FromResult(new SchedulerQueueEnqueueResult(Guid.NewGuid().ToString(), Deduplicated: false)); | ||||
|         } | ||||
|  | ||||
|         public ValueTask<IReadOnlyList<ISchedulerQueueLease<PlannerQueueMessage>>> LeaseAsync(SchedulerQueueLeaseRequest request, CancellationToken cancellationToken = default) | ||||
|             => throw new NotSupportedException(); | ||||
|  | ||||
|         public ValueTask<IReadOnlyList<ISchedulerQueueLease<PlannerQueueMessage>>> ClaimExpiredAsync(SchedulerQueueClaimOptions options, CancellationToken cancellationToken = default) | ||||
|             => throw new NotSupportedException(); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -0,0 +1,170 @@ | ||||
| using System.Collections.Generic; | ||||
| using System.Collections.Immutable; | ||||
| using System.Linq; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.Extensions.Logging.Abstractions; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Worker.Planning; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Tests; | ||||
|  | ||||
| public sealed class PlannerQueueDispatchServiceTests | ||||
| { | ||||
|     [Fact] | ||||
|     public async Task DispatchAsync_EnqueuesRunnerSegmentsDeterministically() | ||||
|     { | ||||
|         var run = CreateRun(); | ||||
|         var schedule = CreateSchedule(parallelism: 2, maxJobs: 4, ratePerSecond: 11); | ||||
|         var impactSet = CreateImpactSet(run.TenantId, count: 5); | ||||
|         var message = new PlannerQueueMessage(run, impactSet, schedule, correlationId: "corr-123"); | ||||
|  | ||||
|         var shardPlanner = new ImpactShardPlanner(); | ||||
|         var runnerQueue = new RecordingRunnerQueue(); | ||||
|         var service = new PlannerQueueDispatchService( | ||||
|             shardPlanner, | ||||
|             runnerQueue, | ||||
|             new SchedulerWorkerOptions(), | ||||
|             NullLogger<PlannerQueueDispatchService>.Instance); | ||||
|  | ||||
|         var result = await service.DispatchAsync(message, CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal(PlannerQueueDispatchStatus.DispatchCompleted, result.Status); | ||||
|         Assert.Equal(2, result.SegmentCount); | ||||
|         Assert.Equal(4, runnerQueue.Messages.Sum(msg => msg.ImageDigests.Count)); | ||||
|         Assert.All(runnerQueue.Messages, msg => Assert.Equal(run.Id, msg.RunId)); | ||||
|         Assert.All(runnerQueue.Messages, msg => Assert.Equal(run.TenantId, msg.TenantId)); | ||||
|         Assert.All(runnerQueue.Messages, msg => Assert.Equal(run.ScheduleId, msg.ScheduleId)); | ||||
|         Assert.All(runnerQueue.Messages, msg => Assert.Equal(impactSet.UsageOnly, msg.UsageOnly)); | ||||
|         Assert.All(runnerQueue.Messages, msg => Assert.Equal(11, msg.RatePerSecond)); | ||||
|  | ||||
|         Assert.Collection( | ||||
|             runnerQueue.Messages.OrderBy(msg => msg.SegmentId), | ||||
|             first => | ||||
|             { | ||||
|                 Assert.Equal($"{run.Id}:0000", first.SegmentId); | ||||
|                 Assert.Equal(2, first.ImageDigests.Count); | ||||
|             }, | ||||
|             second => | ||||
|             { | ||||
|                 Assert.Equal($"{run.Id}:0001", second.SegmentId); | ||||
|                 Assert.Equal(2, second.ImageDigests.Count); | ||||
|             }); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
|     public async Task DispatchAsync_NoImages_ReturnsNoWork() | ||||
|     { | ||||
|         var run = CreateRun(); | ||||
|         var schedule = CreateSchedule(); | ||||
|         var impactSet = new ImpactSet( | ||||
|             new Selector(SelectorScope.AllImages, run.TenantId), | ||||
|             ImmutableArray<ImpactImage>.Empty, | ||||
|             usageOnly: true, | ||||
|             DateTimeOffset.UtcNow, | ||||
|             total: 0, | ||||
|             snapshotId: null, | ||||
|             schemaVersion: SchedulerSchemaVersions.ImpactSet); | ||||
|         var message = new PlannerQueueMessage(run, impactSet, schedule); | ||||
|  | ||||
|         var shardPlanner = new StubImpactShardPlanner(ImmutableArray<ImpactShard>.Empty); | ||||
|         var runnerQueue = new RecordingRunnerQueue(); | ||||
|         var service = new PlannerQueueDispatchService( | ||||
|             shardPlanner, | ||||
|             runnerQueue, | ||||
|             new SchedulerWorkerOptions(), | ||||
|             NullLogger<PlannerQueueDispatchService>.Instance); | ||||
|  | ||||
|         var result = await service.DispatchAsync(message, CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal(PlannerQueueDispatchStatus.NoWork, result.Status); | ||||
|         Assert.Empty(runnerQueue.Messages); | ||||
|     } | ||||
|  | ||||
|     private static Run CreateRun() | ||||
|         => new( | ||||
|             id: "run-123", | ||||
|             tenantId: "tenant-abc", | ||||
|             trigger: RunTrigger.Cron, | ||||
|             state: RunState.Queued, | ||||
|             stats: new RunStats(candidates: 6, deduped: 5, queued: 5), | ||||
|             createdAt: DateTimeOffset.UtcNow.AddMinutes(-5), | ||||
|             scheduleId: "sched-789"); | ||||
|  | ||||
|     private static Schedule CreateSchedule(int? parallelism = null, int? maxJobs = null, int? ratePerSecond = null) | ||||
|         => new( | ||||
|             id: "sched-789", | ||||
|             tenantId: "tenant-abc", | ||||
|             name: "Nightly", | ||||
|             enabled: true, | ||||
|             cronExpression: "0 2 * * *", | ||||
|             timezone: "UTC", | ||||
|             mode: ScheduleMode.AnalysisOnly, | ||||
|             selection: new Selector(SelectorScope.AllImages, tenantId: "tenant-abc"), | ||||
|             onlyIf: ScheduleOnlyIf.Default, | ||||
|             notify: ScheduleNotify.Default, | ||||
|             limits: new ScheduleLimits(maxJobs, ratePerSecond, parallelism), | ||||
|             createdAt: DateTimeOffset.UtcNow.AddDays(-1), | ||||
|             createdBy: "system", | ||||
|             updatedAt: DateTimeOffset.UtcNow.AddHours(-1), | ||||
|             updatedBy: "system", | ||||
|             subscribers: ImmutableArray<string>.Empty); | ||||
|  | ||||
|     private static ImpactSet CreateImpactSet(string tenantId, int count) | ||||
|     { | ||||
|         var selector = new Selector(SelectorScope.AllImages, tenantId); | ||||
|         var images = Enumerable.Range(0, count) | ||||
|             .Select(index => new ImpactImage( | ||||
|                 imageDigest: $"sha256:{index:D64}", | ||||
|                 registry: "registry.example.com", | ||||
|                 repository: "service/api", | ||||
|                 namespaces: new[] { "team-a" }, | ||||
|                 tags: new[] { $"v{index}" }, | ||||
|                 usedByEntrypoint: index % 2 == 0)) | ||||
|             .ToImmutableArray(); | ||||
|  | ||||
|         return new ImpactSet( | ||||
|             selector, | ||||
|             images, | ||||
|             usageOnly: true, | ||||
|             generatedAt: DateTimeOffset.UtcNow.AddMinutes(-2), | ||||
|             total: count, | ||||
|             snapshotId: "snapshot-xyz", | ||||
|             schemaVersion: SchedulerSchemaVersions.ImpactSet); | ||||
|     } | ||||
|  | ||||
|     private sealed class RecordingRunnerQueue : ISchedulerRunnerQueue | ||||
|     { | ||||
|         public List<RunnerSegmentQueueMessage> Messages { get; } = new(); | ||||
|  | ||||
|         public ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(RunnerSegmentQueueMessage message, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             Messages.Add(message); | ||||
|             return ValueTask.FromResult(new SchedulerQueueEnqueueResult(Guid.NewGuid().ToString(), Deduplicated: false)); | ||||
|         } | ||||
|  | ||||
|         public ValueTask<IReadOnlyList<ISchedulerQueueLease<RunnerSegmentQueueMessage>>> LeaseAsync( | ||||
|             SchedulerQueueLeaseRequest request, | ||||
|             CancellationToken cancellationToken = default) | ||||
|             => throw new NotSupportedException(); | ||||
|  | ||||
|         public ValueTask<IReadOnlyList<ISchedulerQueueLease<RunnerSegmentQueueMessage>>> ClaimExpiredAsync( | ||||
|             SchedulerQueueClaimOptions options, | ||||
|             CancellationToken cancellationToken = default) | ||||
|             => throw new NotSupportedException(); | ||||
|     } | ||||
|  | ||||
|     private sealed class StubImpactShardPlanner : IImpactShardPlanner | ||||
|     { | ||||
|         private readonly ImmutableArray<ImpactShard> _result; | ||||
|  | ||||
|         public StubImpactShardPlanner(ImmutableArray<ImpactShard> result) | ||||
|         { | ||||
|             _result = result; | ||||
|         } | ||||
|  | ||||
|         public ImmutableArray<ImpactShard> PlanShards(ImpactSet impactSet, int? maxJobs, int? parallelism) => _result; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,327 @@ | ||||
| using System.Collections.Concurrent; | ||||
| using System.Collections.Generic; | ||||
| using System.Collections.Immutable; | ||||
| using System.Linq; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using Microsoft.Extensions.Logging.Abstractions; | ||||
| using MongoDB.Driver; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Repositories; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Services; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Projections; | ||||
| using StellaOps.Scheduler.Worker.Events; | ||||
| using StellaOps.Scheduler.Worker.Execution; | ||||
| using StellaOps.Scheduler.Worker.Observability; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Tests; | ||||
|  | ||||
| public sealed class RunnerExecutionServiceTests | ||||
| { | ||||
|     [Fact] | ||||
|     public async Task ExecuteAsync_UpdatesRunStatsAndDeltas() | ||||
|     { | ||||
|         var run = CreateRun(); | ||||
|         var repository = new InMemoryRunRepository(run); | ||||
|         var summaryService = new RecordingRunSummaryService(); | ||||
|         var impactRepository = new InMemoryImpactSnapshotRepository(run.Id, | ||||
|             new[] | ||||
|         { | ||||
|             CreateImpactImage("sha256:1111111111111111111111111111111111111111111111111111111111111111", "registry-1", "repo-1"), | ||||
|             CreateImpactImage("sha256:2222222222222222222222222222222222222222222222222222222222222222", "registry-1", "repo-2") | ||||
|         }); | ||||
|         var scannerClient = new StubScannerReportClient(new Dictionary<string, RunnerImageResult> | ||||
|         { | ||||
|             ["sha256:1111111111111111111111111111111111111111111111111111111111111111"] = CreateRunnerImageResult( | ||||
|                 "sha256:1111111111111111111111111111111111111111111111111111111111111111", | ||||
|                 new DeltaSummary( | ||||
|                     "sha256:1111111111111111111111111111111111111111111111111111111111111111", | ||||
|                     newFindings: 2, | ||||
|                     newCriticals: 1, | ||||
|                     newHigh: 0, | ||||
|                     newMedium: 1, | ||||
|                     newLow: 0, | ||||
|                     kevHits: ImmutableArray.Create("CVE-2025-0001"), | ||||
|                     topFindings: ImmutableArray.Create(new DeltaFinding("pkg:purl", "CVE-2025-0001", SeverityRank.Critical)), | ||||
|                     reportUrl: "https://scanner/reports/1", | ||||
|                     attestation: null, | ||||
|                     detectedAt: DateTimeOffset.UtcNow)), | ||||
|             ["sha256:2222222222222222222222222222222222222222222222222222222222222222"] = CreateRunnerImageResult( | ||||
|                 "sha256:2222222222222222222222222222222222222222222222222222222222222222", | ||||
|                 delta: null) | ||||
|         }); | ||||
|         var eventPublisher = new RecordingSchedulerEventPublisher(); | ||||
|  | ||||
|         using var metrics = new SchedulerWorkerMetrics(); | ||||
|  | ||||
|         var service = new RunnerExecutionService( | ||||
|             repository, | ||||
|             summaryService, | ||||
|             impactRepository, | ||||
|             scannerClient, | ||||
|             eventPublisher, | ||||
|             metrics, | ||||
|             TimeProvider.System, | ||||
|             NullLogger<RunnerExecutionService>.Instance); | ||||
|  | ||||
|         var message = new RunnerSegmentQueueMessage( | ||||
|             segmentId: "run-123:0000", | ||||
|             runId: run.Id, | ||||
|             tenantId: run.TenantId, | ||||
|             imageDigests: new[] | ||||
|             { | ||||
|                 "sha256:1111111111111111111111111111111111111111111111111111111111111111", | ||||
|                 "sha256:2222222222222222222222222222222222222222222222222222222222222222" | ||||
|             }, | ||||
|             scheduleId: run.ScheduleId, | ||||
|             ratePerSecond: null, | ||||
|             usageOnly: true, | ||||
|             attributes: new Dictionary<string, string> | ||||
|             { | ||||
|                 ["scheduleMode"] = ScheduleMode.AnalysisOnly.ToString(), | ||||
|                 ["impactSnapshotId"] = $"impact::{run.Id}" | ||||
|             }, | ||||
|             correlationId: "corr-xyz"); | ||||
|  | ||||
|         var result = await service.ExecuteAsync(message, CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal(RunnerSegmentExecutionStatus.Completed, result.Status); | ||||
|         Assert.True(result.RunCompleted); | ||||
|         Assert.Equal(1, result.DeltaImages); | ||||
|  | ||||
|         var persisted = repository.GetSnapshot(run.TenantId, run.Id); | ||||
|         Assert.NotNull(persisted); | ||||
|         Assert.Equal(2, persisted!.Stats.Completed); | ||||
|         Assert.Equal(1, persisted.Stats.Deltas); | ||||
|         Assert.Equal(1, persisted.Stats.NewCriticals); | ||||
|         Assert.Equal(1, persisted.Stats.NewMedium); | ||||
|         Assert.Contains(persisted.Deltas, delta => delta.ImageDigest == "sha256:1111111111111111111111111111111111111111111111111111111111111111"); | ||||
|         Assert.Equal(persisted, summaryService.LastProjected); | ||||
|         Assert.Equal(2, eventPublisher.ReportReady.Count); | ||||
|         Assert.Single(eventPublisher.RescanDeltaPayloads); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
|     public async Task ExecuteAsync_WhenRunMissing_ReturnsRunMissing() | ||||
|     { | ||||
|         var repository = new InMemoryRunRepository(); | ||||
|         var impactRepository = new InMemoryImpactSnapshotRepository("run-123", Array.Empty<ImpactImage>()); | ||||
|         var eventPublisher = new RecordingSchedulerEventPublisher(); | ||||
|         using var metrics = new SchedulerWorkerMetrics(); | ||||
|  | ||||
|         var service = new RunnerExecutionService( | ||||
|             repository, | ||||
|             new RecordingRunSummaryService(), | ||||
|             impactRepository, | ||||
|             new StubScannerReportClient(new Dictionary<string, RunnerImageResult>()), | ||||
|             eventPublisher, | ||||
|             metrics, | ||||
|             TimeProvider.System, | ||||
|             NullLogger<RunnerExecutionService>.Instance); | ||||
|  | ||||
|         var message = new RunnerSegmentQueueMessage( | ||||
|             segmentId: "run-123:0000", | ||||
|             runId: "run-123", | ||||
|             tenantId: "tenant-abc", | ||||
|             imageDigests: new[] { "sha256:3333333333333333333333333333333333333333333333333333333333333333" }, | ||||
|             scheduleId: "sched-1", | ||||
|             ratePerSecond: null, | ||||
|             usageOnly: true, | ||||
|             attributes: new Dictionary<string, string>(), | ||||
|             correlationId: null); | ||||
|  | ||||
|         var result = await service.ExecuteAsync(message, CancellationToken.None); | ||||
|  | ||||
|         Assert.Equal(RunnerSegmentExecutionStatus.RunMissing, result.Status); | ||||
|     } | ||||
|  | ||||
|     private static Run CreateRun() | ||||
|         => new( | ||||
|             id: "run-123", | ||||
|             tenantId: "tenant-abc", | ||||
|             trigger: RunTrigger.Cron, | ||||
|             state: RunState.Queued, | ||||
|             stats: new RunStats( | ||||
|                 candidates: 4, | ||||
|                 deduped: 4, | ||||
|                 queued: 2, | ||||
|                 completed: 0, | ||||
|                 deltas: 0, | ||||
|                 newCriticals: 0, | ||||
|                 newHigh: 0, | ||||
|                 newMedium: 0, | ||||
|                 newLow: 0), | ||||
|             createdAt: DateTimeOffset.UtcNow.AddMinutes(-10), | ||||
|             scheduleId: "sched-1"); | ||||
|  | ||||
|     private static ImpactImage CreateImpactImage(string digest, string registry, string repository) | ||||
|         => new( | ||||
|             imageDigest: digest, | ||||
|             registry: registry, | ||||
|             repository: repository, | ||||
|             namespaces: null, | ||||
|             tags: null, | ||||
|             usedByEntrypoint: false, | ||||
|             labels: null); | ||||
|  | ||||
|     private static RunnerImageResult CreateRunnerImageResult(string digest, DeltaSummary? delta) | ||||
|     { | ||||
|         var newTotal = delta?.NewFindings ?? 0; | ||||
|         var summary = new RunnerReportSummary( | ||||
|             Total: newTotal, | ||||
|             Blocked: delta?.NewCriticals ?? 0, | ||||
|             Warned: delta?.NewHigh ?? 0, | ||||
|             Ignored: delta?.NewLow ?? 0, | ||||
|             Quieted: 0); | ||||
|  | ||||
|         var snapshot = new RunnerReportSnapshot( | ||||
|             ReportId: $"report-{digest[^4..]}", | ||||
|             ImageDigest: digest, | ||||
|             Verdict: "warn", | ||||
|             GeneratedAt: DateTimeOffset.UtcNow, | ||||
|             Summary: summary, | ||||
|             PolicyRevisionId: "pol-rev", | ||||
|             PolicyDigest: "pol-digest"); | ||||
|  | ||||
|         return new RunnerImageResult( | ||||
|             digest, | ||||
|             delta, | ||||
|             ContentRefreshed: false, | ||||
|             snapshot, | ||||
|             Dsse: null); | ||||
|     } | ||||
|  | ||||
|     private sealed class InMemoryRunRepository : IRunRepository | ||||
|     { | ||||
|         private readonly ConcurrentDictionary<(string Tenant, string RunId), Run> _runs = new(); | ||||
|  | ||||
|         public InMemoryRunRepository(params Run[] runs) | ||||
|         { | ||||
|             foreach (var run in runs) | ||||
|             { | ||||
|                 _runs[(run.TenantId, run.Id)] = run; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         public Task InsertAsync(Run run, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _runs[(run.TenantId, run.Id)] = run; | ||||
|             return Task.CompletedTask; | ||||
|         } | ||||
|  | ||||
|         public Task<bool> UpdateAsync(Run run, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _runs[(run.TenantId, run.Id)] = run; | ||||
|             return Task.FromResult(true); | ||||
|         } | ||||
|  | ||||
|         public Task<Run?> GetAsync(string tenantId, string runId, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             _runs.TryGetValue((tenantId, runId), out var run); | ||||
|             return Task.FromResult(run); | ||||
|         } | ||||
|  | ||||
|         public Task<IReadOnlyList<Run>> ListAsync(string tenantId, RunQueryOptions? options = null, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<IReadOnlyList<Run>>(_runs.Values.Where(run => run.TenantId == tenantId).ToArray()); | ||||
|  | ||||
|         public Task<IReadOnlyList<Run>> ListByStateAsync(RunState state, int limit = 50, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<IReadOnlyList<Run>>(_runs.Values.Where(run => run.State == state).Take(limit).ToArray()); | ||||
|  | ||||
|         public Run? GetSnapshot(string tenantId, string runId) | ||||
|         { | ||||
|             _runs.TryGetValue((tenantId, runId), out var run); | ||||
|             return run; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private sealed class InMemoryImpactSnapshotRepository : IImpactSnapshotRepository | ||||
|     { | ||||
|         private readonly string _snapshotId; | ||||
|         private readonly ImpactSet _snapshot; | ||||
|  | ||||
|         public InMemoryImpactSnapshotRepository(string runId, IEnumerable<ImpactImage> images) | ||||
|         { | ||||
|             _snapshotId = $"impact::{runId}"; | ||||
|             var imageArray = images.ToImmutableArray(); | ||||
|             _snapshot = new ImpactSet( | ||||
|                 new Selector(SelectorScope.AllImages, "tenant-abc"), | ||||
|                 imageArray, | ||||
|                 usageOnly: true, | ||||
|                 generatedAt: DateTimeOffset.UtcNow, | ||||
|                 total: imageArray.Length); | ||||
|         } | ||||
|  | ||||
|         public Task UpsertAsync(ImpactSet snapshot, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.CompletedTask; | ||||
|  | ||||
|         public Task<ImpactSet?> GetBySnapshotIdAsync(string snapshotId, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<ImpactSet?>(string.Equals(snapshotId, _snapshotId, StringComparison.Ordinal) ? _snapshot : null); | ||||
|  | ||||
|         public Task<ImpactSet?> GetLatestBySelectorAsync(Selector selector, IClientSessionHandle? session = null, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<ImpactSet?>(_snapshot); | ||||
|     } | ||||
|  | ||||
|     private sealed class RecordingSchedulerEventPublisher : ISchedulerEventPublisher | ||||
|     { | ||||
|         public List<(Run run, RunnerImageResult result)> ReportReady { get; } = new(); | ||||
|  | ||||
|         public List<(Run run, IReadOnlyList<DeltaSummary> deltas)> RescanDeltaPayloads { get; } = new(); | ||||
|  | ||||
|         public Task PublishReportReadyAsync(Run run, RunnerSegmentQueueMessage message, RunnerImageResult result, ImpactImage? impactImage, CancellationToken cancellationToken) | ||||
|         { | ||||
|             ReportReady.Add((run, result)); | ||||
|             return Task.CompletedTask; | ||||
|         } | ||||
|  | ||||
|         public Task PublishRescanDeltaAsync(Run run, RunnerSegmentQueueMessage message, IReadOnlyList<DeltaSummary> deltas, IReadOnlyDictionary<string, ImpactImage> impactLookup, CancellationToken cancellationToken) | ||||
|         { | ||||
|             RescanDeltaPayloads.Add((run, deltas)); | ||||
|             return Task.CompletedTask; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private sealed class RecordingRunSummaryService : IRunSummaryService | ||||
|     { | ||||
|         public Run? LastProjected { get; private set; } | ||||
|  | ||||
|         public Task<RunSummaryProjection> ProjectAsync(Run run, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             LastProjected = run; | ||||
|             return Task.FromResult(new RunSummaryProjection( | ||||
|                 run.TenantId, | ||||
|                 run.ScheduleId ?? string.Empty, | ||||
|                 DateTimeOffset.UtcNow, | ||||
|                 null, | ||||
|                 ImmutableArray<RunSummarySnapshot>.Empty, | ||||
|                 new RunSummaryCounters(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); | ||||
|         } | ||||
|  | ||||
|         public Task<RunSummaryProjection?> GetAsync(string tenantId, string scheduleId, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<RunSummaryProjection?>(null); | ||||
|  | ||||
|         public Task<IReadOnlyList<RunSummaryProjection>> ListAsync(string tenantId, CancellationToken cancellationToken = default) | ||||
|             => Task.FromResult<IReadOnlyList<RunSummaryProjection>>(Array.Empty<RunSummaryProjection>()); | ||||
|     } | ||||
|  | ||||
|     private sealed class StubScannerReportClient : IScannerReportClient | ||||
|     { | ||||
|         private readonly IReadOnlyDictionary<string, RunnerImageResult> _responses; | ||||
|  | ||||
|         public StubScannerReportClient(IReadOnlyDictionary<string, RunnerImageResult> responses) | ||||
|         { | ||||
|             _responses = responses; | ||||
|         } | ||||
|  | ||||
|         public Task<RunnerImageResult> ExecuteAsync(ScannerReportRequest request, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             if (_responses.TryGetValue(request.ImageDigest, out var result)) | ||||
|             { | ||||
|                 return Task.FromResult(result); | ||||
|             } | ||||
|  | ||||
|             return Task.FromResult(CreateRunnerImageResult(request.ImageDigest, delta: null)); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,140 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Text.Json.Nodes; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.Extensions.Logging.Abstractions; | ||||
| using StellaOps.Notify.Models; | ||||
| using StellaOps.Notify.Queue; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Worker.Events; | ||||
| using StellaOps.Scheduler.Worker.Execution; | ||||
| using Xunit; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Tests; | ||||
|  | ||||
| public sealed class SchedulerEventPublisherTests | ||||
| { | ||||
|     [Fact] | ||||
|     public async Task PublishReportReadyAsync_EnqueuesNotifyEvent() | ||||
|     { | ||||
|         var queue = new RecordingNotifyEventQueue(); | ||||
|         var options = new NotifyEventQueueOptions(); | ||||
|         var publisher = new SchedulerEventPublisher(queue, options, TimeProvider.System, NullLogger<SchedulerEventPublisher>.Instance); | ||||
|         var run = CreateRun(); | ||||
|         var message = CreateMessage(run); | ||||
|         var delta = new DeltaSummary( | ||||
|             run.Id, | ||||
|             newFindings: 2, | ||||
|             newCriticals: 1, | ||||
|             newHigh: 1, | ||||
|             newMedium: 0, | ||||
|             newLow: 0); | ||||
|         var result = CreateRunnerImageResult(run.Id, delta); | ||||
|         var impact = new ImpactImage(run.Id, "registry", "repository"); | ||||
|  | ||||
|         await publisher.PublishReportReadyAsync(run, message, result, impact, CancellationToken.None); | ||||
|  | ||||
|         Assert.Single(queue.Messages); | ||||
|         var notifyEvent = queue.Messages[0].Event; | ||||
|         Assert.Equal(NotifyEventKinds.ScannerReportReady, notifyEvent.Kind); | ||||
|         Assert.Equal(run.TenantId, notifyEvent.Tenant); | ||||
|         Assert.NotNull(notifyEvent.Scope); | ||||
|         Assert.Equal("repository", notifyEvent.Scope!.Repo); | ||||
|  | ||||
|         var payload = Assert.IsType<JsonObject>(notifyEvent.Payload); | ||||
|         Assert.Equal(result.Report.ReportId, payload["reportId"]!.GetValue<string>()); | ||||
|         Assert.Equal("warn", payload["verdict"]!.GetValue<string>()); | ||||
|         var deltaNode = Assert.IsType<JsonObject>(payload["delta"]); | ||||
|         Assert.Equal(1, deltaNode["newCritical"]!.GetValue<int>()); | ||||
|     } | ||||
|  | ||||
|     [Fact] | ||||
|     public async Task PublishRescanDeltaAsync_EnqueuesDeltaEvent() | ||||
|     { | ||||
|         var queue = new RecordingNotifyEventQueue(); | ||||
|         var options = new NotifyEventQueueOptions(); | ||||
|         var publisher = new SchedulerEventPublisher(queue, options, TimeProvider.System, NullLogger<SchedulerEventPublisher>.Instance); | ||||
|         var run = CreateRun(); | ||||
|         var message = CreateMessage(run); | ||||
|         var delta = new DeltaSummary(run.Id, 1, 1, 0, 0, 0); | ||||
|         var impactLookup = new Dictionary<string, ImpactImage> | ||||
|         { | ||||
|             [run.Id] = new ImpactImage(run.Id, "registry", "repository") | ||||
|         }; | ||||
|  | ||||
|         await publisher.PublishRescanDeltaAsync(run, message, new[] { delta }, impactLookup, CancellationToken.None); | ||||
|  | ||||
|         Assert.Single(queue.Messages); | ||||
|         var notifyEvent = queue.Messages[0].Event; | ||||
|         Assert.Equal(NotifyEventKinds.SchedulerRescanDelta, notifyEvent.Kind); | ||||
|         var payload = Assert.IsType<JsonObject>(notifyEvent.Payload); | ||||
|         var digests = Assert.IsType<JsonArray>(payload["impactedDigests"]); | ||||
|         Assert.Equal(run.Id, digests[0]!.GetValue<string>()); | ||||
|     } | ||||
|  | ||||
|     private const string SampleDigest = "sha256:1111111111111111111111111111111111111111111111111111111111111111"; | ||||
|  | ||||
|     private static Run CreateRun() | ||||
|         => new( | ||||
|             id: SampleDigest, | ||||
|             tenantId: "tenant-1", | ||||
|             trigger: RunTrigger.Cron, | ||||
|             state: RunState.Running, | ||||
|             stats: new RunStats(queued: 1, completed: 0), | ||||
|             createdAt: DateTimeOffset.UtcNow, | ||||
|             scheduleId: "schedule-1"); | ||||
|  | ||||
|     private static RunnerSegmentQueueMessage CreateMessage(Run run) | ||||
|         => new( | ||||
|             segmentId: $"{run.Id}:0000", | ||||
|             runId: run.Id, | ||||
|             tenantId: run.TenantId, | ||||
|             imageDigests: new[] { run.Id }, | ||||
|             scheduleId: run.ScheduleId, | ||||
|             ratePerSecond: null, | ||||
|             usageOnly: true, | ||||
|             attributes: new Dictionary<string, string>(StringComparer.Ordinal) | ||||
|             { | ||||
|                 ["scheduleMode"] = ScheduleMode.AnalysisOnly.ToString() | ||||
|             }); | ||||
|  | ||||
|     private static RunnerImageResult CreateRunnerImageResult(string digest, DeltaSummary? delta) | ||||
|     { | ||||
|         var summary = new RunnerReportSummary( | ||||
|             Total: delta?.NewFindings ?? 0, | ||||
|             Blocked: delta?.NewCriticals ?? 0, | ||||
|             Warned: delta?.NewHigh ?? 0, | ||||
|             Ignored: delta?.NewLow ?? 0, | ||||
|             Quieted: 0); | ||||
|  | ||||
|         var snapshot = new RunnerReportSnapshot( | ||||
|             ReportId: $"report-{digest[^4..]}", | ||||
|             ImageDigest: digest, | ||||
|             Verdict: "warn", | ||||
|             GeneratedAt: DateTimeOffset.UtcNow, | ||||
|             Summary: summary, | ||||
|             PolicyRevisionId: null, | ||||
|             PolicyDigest: null); | ||||
|  | ||||
|         return new RunnerImageResult(digest, delta, ContentRefreshed: false, snapshot, Dsse: null); | ||||
|     } | ||||
|  | ||||
|     private sealed class RecordingNotifyEventQueue : INotifyEventQueue | ||||
|     { | ||||
|         public List<NotifyQueueEventMessage> Messages { get; } = new(); | ||||
|  | ||||
|         public ValueTask<NotifyQueueEnqueueResult> PublishAsync(NotifyQueueEventMessage message, CancellationToken cancellationToken = default) | ||||
|         { | ||||
|             Messages.Add(message); | ||||
|             return ValueTask.FromResult(new NotifyQueueEnqueueResult(Guid.NewGuid().ToString("N"), false)); | ||||
|         } | ||||
|  | ||||
|         public ValueTask<IReadOnlyList<INotifyQueueLease<NotifyQueueEventMessage>>> LeaseAsync(NotifyQueueLeaseRequest request, CancellationToken cancellationToken = default) | ||||
|             => throw new NotSupportedException(); | ||||
|  | ||||
|         public ValueTask<IReadOnlyList<INotifyQueueLease<NotifyQueueEventMessage>>> ClaimExpiredAsync(NotifyQueueClaimOptions options, CancellationToken cancellationToken = default) | ||||
|             => throw new NotSupportedException(); | ||||
|     } | ||||
| } | ||||
| @@ -3,15 +3,19 @@ | ||||
|     <TargetFramework>net10.0</TargetFramework> | ||||
|     <ImplicitUsings>enable</ImplicitUsings> | ||||
|     <Nullable>enable</Nullable> | ||||
|     <UseConcelierTestInfra>false</UseConcelierTestInfra> | ||||
|   </PropertyGroup> | ||||
|   <ItemGroup> | ||||
|     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.14.0" /> | ||||
|     <PackageReference Include="Moq" Version="4.20.70" /> | ||||
|     <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0-rc.2.25502.107" /> | ||||
|     <PackageReference Include="MongoDB.Driver" Version="3.5.0" /> | ||||
|     <PackageReference Include="xunit" Version="2.9.2" /> | ||||
|     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" /> | ||||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <ProjectReference Include="../StellaOps.Scheduler.Worker/StellaOps.Scheduler.Worker.csproj" /> | ||||
|     <ProjectReference Include="../StellaOps.Scheduler.Models/StellaOps.Scheduler.Models.csproj" /> | ||||
|     <ProjectReference Include="../StellaOps.Notify.Models/StellaOps.Notify.Models.csproj" /> | ||||
|     <ProjectReference Include="../StellaOps.Notify.Queue/StellaOps.Notify.Queue.csproj" /> | ||||
|   </ItemGroup> | ||||
| </Project> | ||||
|   | ||||
| @@ -0,0 +1,59 @@ | ||||
| using Microsoft.Extensions.Configuration; | ||||
| using Microsoft.Extensions.DependencyInjection; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Notify.Queue; | ||||
| using StellaOps.Scheduler.Worker.Events; | ||||
| using StellaOps.Scheduler.Worker.Execution; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
| using StellaOps.Scheduler.Worker.Observability; | ||||
| using StellaOps.Scheduler.Worker.Planning; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.DependencyInjection; | ||||
|  | ||||
| public static class SchedulerWorkerServiceCollectionExtensions | ||||
| { | ||||
|     public static IServiceCollection AddSchedulerWorker(this IServiceCollection services, IConfiguration configuration) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(services); | ||||
|         ArgumentNullException.ThrowIfNull(configuration); | ||||
|  | ||||
|         services | ||||
|             .AddOptions<SchedulerWorkerOptions>() | ||||
|             .Bind(configuration) | ||||
|             .PostConfigure(options => options.Validate()); | ||||
|  | ||||
|         services.AddSingleton(TimeProvider.System); | ||||
|         services.AddSingleton<SchedulerWorkerMetrics>(); | ||||
|         services.AddSingleton<IImpactTargetingService, ImpactTargetingService>(); | ||||
|         services.AddSingleton<IImpactShardPlanner, ImpactShardPlanner>(); | ||||
|         services.AddSingleton<IPlannerQueueDispatchService, PlannerQueueDispatchService>(); | ||||
|         services.AddSingleton<PlannerExecutionService>(); | ||||
|         services.AddSingleton<IRunnerExecutionService, RunnerExecutionService>(); | ||||
|         services.AddSingleton<ISchedulerEventPublisher>(sp => | ||||
|         { | ||||
|             var loggerFactory = sp.GetRequiredService<ILoggerFactory>(); | ||||
|             var queue = sp.GetService<INotifyEventQueue>(); | ||||
|             var queueOptions = sp.GetService<NotifyEventQueueOptions>(); | ||||
|             var timeProvider = sp.GetRequiredService<TimeProvider>(); | ||||
|  | ||||
|             if (queue is null || queueOptions is null) | ||||
|             { | ||||
|                 return new NullSchedulerEventPublisher(loggerFactory.CreateLogger<NullSchedulerEventPublisher>()); | ||||
|             } | ||||
|  | ||||
|             return new SchedulerEventPublisher( | ||||
|                 queue, | ||||
|                 queueOptions, | ||||
|                 timeProvider, | ||||
|                 loggerFactory.CreateLogger<SchedulerEventPublisher>()); | ||||
|         }); | ||||
|  | ||||
|         services.AddHttpClient<IScannerReportClient, HttpScannerReportClient>(); | ||||
|  | ||||
|         services.AddHostedService<PlannerBackgroundService>(); | ||||
|         services.AddHostedService<PlannerQueueDispatcherBackgroundService>(); | ||||
|         services.AddHostedService<RunnerBackgroundService>(); | ||||
|  | ||||
|         return services; | ||||
|     } | ||||
| } | ||||
							
								
								
									
										501
									
								
								src/StellaOps.Scheduler.Worker/Events/SchedulerEventPublisher.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										501
									
								
								src/StellaOps.Scheduler.Worker/Events/SchedulerEventPublisher.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,501 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Linq; | ||||
| using System.Text.Json.Nodes; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Notify.Models; | ||||
| using StellaOps.Notify.Queue; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Worker.Execution; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Events; | ||||
|  | ||||
| internal interface ISchedulerEventPublisher | ||||
| { | ||||
|     Task PublishReportReadyAsync( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         RunnerImageResult result, | ||||
|         ImpactImage? impactImage, | ||||
|         CancellationToken cancellationToken); | ||||
|  | ||||
|     Task PublishRescanDeltaAsync( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         IReadOnlyList<DeltaSummary> deltas, | ||||
|         IReadOnlyDictionary<string, ImpactImage> impactLookup, | ||||
|         CancellationToken cancellationToken); | ||||
| } | ||||
|  | ||||
| internal sealed class SchedulerEventPublisher : ISchedulerEventPublisher | ||||
| { | ||||
|     private const string Source = "scheduler.worker"; | ||||
|  | ||||
|     private readonly INotifyEventQueue _queue; | ||||
|     private readonly NotifyEventQueueOptions _queueOptions; | ||||
|     private readonly TimeProvider _timeProvider; | ||||
|     private readonly ILogger<SchedulerEventPublisher> _logger; | ||||
|     private readonly string _stream; | ||||
|  | ||||
|     public SchedulerEventPublisher( | ||||
|         INotifyEventQueue queue, | ||||
|         NotifyEventQueueOptions queueOptions, | ||||
|         TimeProvider timeProvider, | ||||
|         ILogger<SchedulerEventPublisher> logger) | ||||
|     { | ||||
|         _queue = queue ?? throw new ArgumentNullException(nameof(queue)); | ||||
|         _queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions)); | ||||
|         _timeProvider = timeProvider ?? TimeProvider.System; | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|         _stream = ResolveStream(queueOptions); | ||||
|     } | ||||
|  | ||||
|     public async Task PublishReportReadyAsync( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         RunnerImageResult result, | ||||
|         ImpactImage? impactImage, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(run); | ||||
|         ArgumentNullException.ThrowIfNull(message); | ||||
|         ArgumentNullException.ThrowIfNull(result); | ||||
|  | ||||
|         if (result.Report is null) | ||||
|         { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         var now = _timeProvider.GetUtcNow(); | ||||
|         var occurredAt = result.Report.GeneratedAt == default ? now : result.Report.GeneratedAt; | ||||
|         var scope = BuildScope(result.ImageDigest, impactImage); | ||||
|         var payload = BuildReportPayload(result); | ||||
|         var attributes = BuildReportAttributes(run, message, result, impactImage); | ||||
|  | ||||
|         var notifyEvent = NotifyEvent.Create( | ||||
|             eventId: Guid.NewGuid(), | ||||
|             kind: NotifyEventKinds.ScannerReportReady, | ||||
|             tenant: run.TenantId, | ||||
|             ts: occurredAt, | ||||
|             payload: payload, | ||||
|             scope: scope, | ||||
|             version: "1", | ||||
|             actor: Source, | ||||
|             attributes: attributes); | ||||
|  | ||||
|         await PublishAsync(notifyEvent, run, message, cancellationToken).ConfigureAwait(false); | ||||
|     } | ||||
|  | ||||
|     public async Task PublishRescanDeltaAsync( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         IReadOnlyList<DeltaSummary> deltas, | ||||
|         IReadOnlyDictionary<string, ImpactImage> impactLookup, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(run); | ||||
|         ArgumentNullException.ThrowIfNull(message); | ||||
|         ArgumentNullException.ThrowIfNull(deltas); | ||||
|  | ||||
|         if (deltas.Count == 0) | ||||
|         { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         var now = _timeProvider.GetUtcNow(); | ||||
|         var payload = BuildRescanPayload(run, deltas); | ||||
|         var attributes = BuildRescanAttributes(run, message, deltas, impactLookup); | ||||
|  | ||||
|         var notifyEvent = NotifyEvent.Create( | ||||
|             eventId: Guid.NewGuid(), | ||||
|             kind: NotifyEventKinds.SchedulerRescanDelta, | ||||
|             tenant: run.TenantId, | ||||
|             ts: now, | ||||
|             payload: payload, | ||||
|             version: "1", | ||||
|             actor: Source, | ||||
|             attributes: attributes); | ||||
|  | ||||
|         await PublishAsync(notifyEvent, run, message, cancellationToken).ConfigureAwait(false); | ||||
|     } | ||||
|  | ||||
|     private async Task PublishAsync( | ||||
|         NotifyEvent notifyEvent, | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         var partitionKey = string.IsNullOrWhiteSpace(run.ScheduleId) ? run.Id : run.ScheduleId!; | ||||
|         var traceId = string.IsNullOrWhiteSpace(message.CorrelationId) ? null : message.CorrelationId!.Trim(); | ||||
|  | ||||
|         var queueAttributes = new Dictionary<string, string>(StringComparer.Ordinal) | ||||
|         { | ||||
|             ["source"] = Source | ||||
|         }; | ||||
|  | ||||
|         try | ||||
|         { | ||||
|             var queueMessage = new NotifyQueueEventMessage( | ||||
|                 notifyEvent, | ||||
|                 _stream, | ||||
|                 partitionKey: partitionKey, | ||||
|                 traceId: traceId, | ||||
|                 attributes: queueAttributes); | ||||
|  | ||||
|             await _queue.PublishAsync(queueMessage, cancellationToken).ConfigureAwait(false); | ||||
|         } | ||||
|         catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) | ||||
|         { | ||||
|             throw; | ||||
|         } | ||||
|         catch (Exception ex) | ||||
|         { | ||||
|             _logger.LogError( | ||||
|                 ex, | ||||
|                 "Failed to publish scheduler event {EventKind} for run {RunId}.", | ||||
|                 notifyEvent.Kind, | ||||
|                 run.Id); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static NotifyEventScope BuildScope(string imageDigest, ImpactImage? impactImage) | ||||
|     { | ||||
|         var repo = impactImage?.Repository; | ||||
|         if (string.IsNullOrWhiteSpace(repo)) | ||||
|         { | ||||
|             repo = "(unknown)"; | ||||
|         } | ||||
|  | ||||
|         var @namespace = impactImage?.Namespaces.IsDefaultOrEmpty == false | ||||
|             ? impactImage.Namespaces[0] | ||||
|             : impactImage?.Registry; | ||||
|  | ||||
|         return NotifyEventScope.Create( | ||||
|             @namespace: string.IsNullOrWhiteSpace(@namespace) ? null : @namespace, | ||||
|             repo: repo, | ||||
|             digest: imageDigest); | ||||
|     } | ||||
|  | ||||
|     private static JsonObject BuildReportPayload(RunnerImageResult result) | ||||
|     { | ||||
|         var payload = new JsonObject | ||||
|         { | ||||
|             ["reportId"] = result.Report.ReportId, | ||||
|             ["verdict"] = string.IsNullOrWhiteSpace(result.Report.Verdict) | ||||
|                 ? "warn" | ||||
|                 : result.Report.Verdict.ToLowerInvariant() | ||||
|         }; | ||||
|  | ||||
|         if (result.Report.GeneratedAt != default) | ||||
|         { | ||||
|             payload["generatedAt"] = JsonValue.Create(result.Report.GeneratedAt.ToUniversalTime()); | ||||
|         } | ||||
|  | ||||
|         payload["summary"] = BuildSummaryNode(result.Report.Summary); | ||||
|         payload["delta"] = BuildDeltaNode(result.Delta); | ||||
|         payload["links"] = BuildLinksNode(result.Delta); | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(result.Report.PolicyRevisionId) || | ||||
|             !string.IsNullOrWhiteSpace(result.Report.PolicyDigest)) | ||||
|         { | ||||
|             var policy = new JsonObject(); | ||||
|             if (!string.IsNullOrWhiteSpace(result.Report.PolicyRevisionId)) | ||||
|             { | ||||
|                 policy["revisionId"] = result.Report.PolicyRevisionId; | ||||
|             } | ||||
|  | ||||
|             if (!string.IsNullOrWhiteSpace(result.Report.PolicyDigest)) | ||||
|             { | ||||
|                 policy["digest"] = result.Report.PolicyDigest; | ||||
|             } | ||||
|  | ||||
|             payload["policy"] = policy; | ||||
|         } | ||||
|  | ||||
|         if (result.Report.Summary.Quieted > 0) | ||||
|         { | ||||
|             payload["quietedFindingCount"] = result.Report.Summary.Quieted; | ||||
|         } | ||||
|  | ||||
|         if (result.Dsse is not null) | ||||
|         { | ||||
|             payload["dsse"] = BuildDsseNode(result.Dsse); | ||||
|         } | ||||
|  | ||||
|         return payload; | ||||
|     } | ||||
|  | ||||
|     private static JsonObject BuildSummaryNode(RunnerReportSummary summary) | ||||
|     { | ||||
|         return new JsonObject | ||||
|         { | ||||
|             ["total"] = summary.Total, | ||||
|             ["blocked"] = summary.Blocked, | ||||
|             ["warned"] = summary.Warned, | ||||
|             ["ignored"] = summary.Ignored, | ||||
|             ["quieted"] = summary.Quieted | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     private static JsonObject BuildDeltaNode(DeltaSummary? delta) | ||||
|     { | ||||
|         var node = new JsonObject | ||||
|         { | ||||
|             ["newCritical"] = delta?.NewCriticals ?? 0, | ||||
|             ["newHigh"] = delta?.NewHigh ?? 0 | ||||
|         }; | ||||
|  | ||||
|         var kevArray = new JsonArray(); | ||||
|         if (delta is not null && !delta.KevHits.IsDefaultOrEmpty) | ||||
|         { | ||||
|             foreach (var kev in delta.KevHits) | ||||
|             { | ||||
|                 kevArray.Add(kev); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         node["kev"] = kevArray; | ||||
|         return node; | ||||
|     } | ||||
|  | ||||
|     private static JsonObject BuildLinksNode(DeltaSummary? delta) | ||||
|     { | ||||
|         var links = new JsonObject(); | ||||
|         if (delta is not null && !string.IsNullOrWhiteSpace(delta.ReportUrl)) | ||||
|         { | ||||
|             links["ui"] = delta.ReportUrl; | ||||
|         } | ||||
|  | ||||
|         if (delta?.Attestation?.Uuid is { Length: > 0 } uuid) | ||||
|         { | ||||
|             links["rekor"] = uuid; | ||||
|         } | ||||
|  | ||||
|         return links; | ||||
|     } | ||||
|  | ||||
|     private static JsonObject BuildDsseNode(RunnerDsseEnvelope envelope) | ||||
|     { | ||||
|         var node = new JsonObject | ||||
|         { | ||||
|             ["payloadType"] = envelope.PayloadType, | ||||
|             ["payload"] = envelope.Payload | ||||
|         }; | ||||
|  | ||||
|         if (envelope.Signatures.Count > 0) | ||||
|         { | ||||
|             var signatures = new JsonArray(); | ||||
|             foreach (var signature in envelope.Signatures) | ||||
|             { | ||||
|                 signatures.Add(new JsonObject | ||||
|                 { | ||||
|                     ["keyId"] = signature.KeyId, | ||||
|                     ["algorithm"] = signature.Algorithm, | ||||
|                     ["signature"] = signature.Signature | ||||
|                 }); | ||||
|             } | ||||
|  | ||||
|             node["signatures"] = signatures; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             node["signatures"] = new JsonArray(); | ||||
|         } | ||||
|  | ||||
|         return node; | ||||
|     } | ||||
|  | ||||
|     private static IEnumerable<KeyValuePair<string, string>> BuildReportAttributes( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         RunnerImageResult result, | ||||
|         ImpactImage? impactImage) | ||||
|     { | ||||
|         var attributes = new Dictionary<string, string>(StringComparer.Ordinal) | ||||
|         { | ||||
|             ["source"] = Source, | ||||
|             ["runId"] = run.Id, | ||||
|             ["segmentId"] = message.SegmentId, | ||||
|             ["trigger"] = run.Trigger.ToString(), | ||||
|             ["scheduleId"] = run.ScheduleId ?? string.Empty, | ||||
|             ["reportId"] = result.Report.ReportId, | ||||
|             ["verdict"] = string.IsNullOrWhiteSpace(result.Report.Verdict) | ||||
|                 ? "warn" | ||||
|                 : result.Report.Verdict.ToLowerInvariant() | ||||
|         }; | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(message.CorrelationId)) | ||||
|         { | ||||
|             attributes["correlationId"] = message.CorrelationId!; | ||||
|         } | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(result.Report.PolicyRevisionId)) | ||||
|         { | ||||
|             attributes["policyRevisionId"] = result.Report.PolicyRevisionId!; | ||||
|         } | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(result.Report.PolicyDigest)) | ||||
|         { | ||||
|             attributes["policyDigest"] = result.Report.PolicyDigest!; | ||||
|         } | ||||
|  | ||||
|         if (impactImage is not null) | ||||
|         { | ||||
|             attributes["registry"] = impactImage.Registry; | ||||
|             attributes["repository"] = impactImage.Repository; | ||||
|         } | ||||
|  | ||||
|         if (result.Delta is not null) | ||||
|         { | ||||
|             attributes["deltaImages"] = "1"; | ||||
|             attributes["deltaNewCritical"] = result.Delta.NewCriticals.ToString(); | ||||
|             attributes["deltaNewHigh"] = result.Delta.NewHigh.ToString(); | ||||
|         } | ||||
|  | ||||
|         return attributes; | ||||
|     } | ||||
|  | ||||
|     private static JsonObject BuildRescanPayload(Run run, IReadOnlyList<DeltaSummary> deltas) | ||||
|     { | ||||
|         var totalFindings = deltas.Sum(delta => delta.NewFindings); | ||||
|         if (totalFindings <= 0) | ||||
|         { | ||||
|             totalFindings = deltas.Count; | ||||
|         } | ||||
|  | ||||
|         var payload = new JsonObject | ||||
|         { | ||||
|             ["scheduleId"] = string.IsNullOrWhiteSpace(run.ScheduleId) ? string.Empty : run.ScheduleId, | ||||
|             ["impactedDigests"] = new JsonArray(deltas.Select(delta => JsonValue.Create(delta.ImageDigest)).ToArray()), | ||||
|             ["summary"] = new JsonObject | ||||
|             { | ||||
|                 ["newCritical"] = deltas.Sum(delta => delta.NewCriticals), | ||||
|                 ["newHigh"] = deltas.Sum(delta => delta.NewHigh), | ||||
|                 ["total"] = totalFindings | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         var reason = BuildReason(run.Reason); | ||||
|         if (!string.IsNullOrWhiteSpace(reason)) | ||||
|         { | ||||
|             payload["reason"] = reason; | ||||
|         } | ||||
|  | ||||
|         return payload; | ||||
|     } | ||||
|  | ||||
|     private static IEnumerable<KeyValuePair<string, string>> BuildRescanAttributes( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         IReadOnlyList<DeltaSummary> deltas, | ||||
|         IReadOnlyDictionary<string, ImpactImage> impactLookup) | ||||
|     { | ||||
|         var attributes = new Dictionary<string, string>(StringComparer.Ordinal) | ||||
|         { | ||||
|             ["source"] = Source, | ||||
|             ["runId"] = run.Id, | ||||
|             ["segmentId"] = message.SegmentId, | ||||
|             ["trigger"] = run.Trigger.ToString(), | ||||
|             ["scheduleId"] = run.ScheduleId ?? string.Empty, | ||||
|             ["deltaCount"] = deltas.Count.ToString() | ||||
|         }; | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(message.CorrelationId)) | ||||
|         { | ||||
|             attributes["correlationId"] = message.CorrelationId!; | ||||
|         } | ||||
|  | ||||
|         if (impactLookup.Count > 0) | ||||
|         { | ||||
|             var repositories = deltas | ||||
|                 .Select(delta => impactLookup.TryGetValue(delta.ImageDigest, out var impact) ? impact.Repository : null) | ||||
|                 .Where(repo => !string.IsNullOrWhiteSpace(repo)) | ||||
|                 .Distinct(StringComparer.Ordinal) | ||||
|                 .ToArray(); | ||||
|  | ||||
|             if (repositories.Length > 0) | ||||
|             { | ||||
|                 attributes["repositories"] = string.Join(",", repositories); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return attributes; | ||||
|     } | ||||
|  | ||||
|     private static string? BuildReason(RunReason reason) | ||||
|     { | ||||
|         if (!string.IsNullOrWhiteSpace(reason.ManualReason)) | ||||
|         { | ||||
|             return $"manual:{reason.ManualReason}"; | ||||
|         } | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(reason.FeedserExportId)) | ||||
|         { | ||||
|             return $"feedser:{reason.FeedserExportId}"; | ||||
|         } | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(reason.VexerExportId)) | ||||
|         { | ||||
|             return $"vexer:{reason.VexerExportId}"; | ||||
|         } | ||||
|  | ||||
|         return null; | ||||
|     } | ||||
|  | ||||
|     private static string ResolveStream(NotifyEventQueueOptions options) | ||||
|     { | ||||
|         return options.Transport switch | ||||
|         { | ||||
|             NotifyQueueTransportKind.Nats => string.IsNullOrWhiteSpace(options.Nats.Subject) | ||||
|                 ? "notify.events" | ||||
|                 : options.Nats.Subject, | ||||
|             _ => options.Redis.Streams.Count > 0 && !string.IsNullOrWhiteSpace(options.Redis.Streams[0].Stream) | ||||
|                 ? options.Redis.Streams[0].Stream | ||||
|                 : "notify:events" | ||||
|         }; | ||||
|     } | ||||
| } | ||||
|  | ||||
| internal sealed class NullSchedulerEventPublisher : ISchedulerEventPublisher | ||||
| { | ||||
|     private readonly ILogger<NullSchedulerEventPublisher> _logger; | ||||
|     private int _hasWarned; | ||||
|  | ||||
|     public NullSchedulerEventPublisher(ILogger<NullSchedulerEventPublisher> logger) | ||||
|     { | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|     } | ||||
|  | ||||
|     public Task PublishReportReadyAsync( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         RunnerImageResult result, | ||||
|         ImpactImage? impactImage, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         WarnOnce(); | ||||
|         return Task.CompletedTask; | ||||
|     } | ||||
|  | ||||
|     public Task PublishRescanDeltaAsync( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         IReadOnlyList<DeltaSummary> deltas, | ||||
|         IReadOnlyDictionary<string, ImpactImage> impactLookup, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         WarnOnce(); | ||||
|         return Task.CompletedTask; | ||||
|     } | ||||
|  | ||||
|     private void WarnOnce() | ||||
|     { | ||||
|         if (Interlocked.Exchange(ref _hasWarned, 1) == 0) | ||||
|         { | ||||
|             _logger.LogWarning("Notify event queue not configured; scheduler events will not be published."); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,319 @@ | ||||
| using System; | ||||
| using System.Linq; | ||||
| using System.Net.Http; | ||||
| using System.Net.Http.Json; | ||||
| using System.Text.Json; | ||||
| using System.Text.Json.Serialization; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using Microsoft.Extensions.Options; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Execution; | ||||
|  | ||||
| internal sealed class HttpScannerReportClient : IScannerReportClient | ||||
| { | ||||
|     private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web) | ||||
|     { | ||||
|         DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull | ||||
|     }; | ||||
|  | ||||
|     private readonly HttpClient _httpClient; | ||||
|     private readonly IOptions<SchedulerWorkerOptions> _options; | ||||
|     private readonly ILogger<HttpScannerReportClient> _logger; | ||||
|  | ||||
|     public HttpScannerReportClient( | ||||
|         HttpClient httpClient, | ||||
|         IOptions<SchedulerWorkerOptions> options, | ||||
|         ILogger<HttpScannerReportClient> logger) | ||||
|     { | ||||
|         _httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient)); | ||||
|         _options = options ?? throw new ArgumentNullException(nameof(options)); | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|     } | ||||
|  | ||||
|     public async Task<RunnerImageResult> ExecuteAsync( | ||||
|         ScannerReportRequest request, | ||||
|         CancellationToken cancellationToken = default) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(request); | ||||
|  | ||||
|         var settings = _options.Value.Runner.Scanner; | ||||
|         ConfigureHttpClientBaseAddress(settings); | ||||
|  | ||||
|         if (request.Mode == ScheduleMode.ContentRefresh && settings.EnableContentRefresh) | ||||
|         { | ||||
|             await TriggerContentRefreshAsync(request, settings, cancellationToken).ConfigureAwait(false); | ||||
|         } | ||||
|  | ||||
|         var report = await FetchReportAsync(request, settings, cancellationToken).ConfigureAwait(false); | ||||
|         var reportSnapshot = BuildReportSnapshot(report, request.ImageDigest); | ||||
|         var delta = BuildDeltaSummary(report, request.ImageDigest); | ||||
|         var dsse = BuildDsseEnvelope(report); | ||||
|  | ||||
|         return new RunnerImageResult( | ||||
|             request.ImageDigest, | ||||
|             delta, | ||||
|             ContentRefreshed: request.Mode == ScheduleMode.ContentRefresh && settings.EnableContentRefresh, | ||||
|             reportSnapshot, | ||||
|             dsse); | ||||
|     } | ||||
|  | ||||
|     private void ConfigureHttpClientBaseAddress(SchedulerWorkerOptions.RunnerOptions.ScannerOptions settings) | ||||
|     { | ||||
|         if (settings.BaseAddress is not null && _httpClient.BaseAddress != settings.BaseAddress) | ||||
|         { | ||||
|             _httpClient.BaseAddress = settings.BaseAddress; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private async Task TriggerContentRefreshAsync( | ||||
|         ScannerReportRequest request, | ||||
|         SchedulerWorkerOptions.RunnerOptions.ScannerOptions settings, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         try | ||||
|         { | ||||
|             var response = await _httpClient.PostAsJsonAsync( | ||||
|                 settings.ScansPath, | ||||
|                 new ScanSubmitRequest(new ScanTargetRequest(null, request.ImageDigest)), | ||||
|                 SerializerOptions, | ||||
|                 cancellationToken).ConfigureAwait(false); | ||||
|  | ||||
|             if (!response.IsSuccessStatusCode) | ||||
|             { | ||||
|                 _logger.LogWarning( | ||||
|                     "Scanner content refresh submission returned status {StatusCode} for digest {Digest}.", | ||||
|                     (int)response.StatusCode, | ||||
|                     request.ImageDigest); | ||||
|             } | ||||
|         } | ||||
|         catch (HttpRequestException ex) | ||||
|         { | ||||
|             _logger.LogWarning( | ||||
|                 ex, | ||||
|                 "Scanner content refresh submission failed for digest {Digest}. Proceeding with report request.", | ||||
|                 request.ImageDigest); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private async Task<ReportResponse> FetchReportAsync( | ||||
|         ScannerReportRequest request, | ||||
|         SchedulerWorkerOptions.RunnerOptions.ScannerOptions settings, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         var payload = new ReportRequest { ImageDigest = request.ImageDigest }; | ||||
|         HttpResponseMessage? response = null; | ||||
|         var attempt = 0; | ||||
|  | ||||
|         while (true) | ||||
|         { | ||||
|             attempt++; | ||||
|             try | ||||
|             { | ||||
|                 response = await _httpClient.PostAsJsonAsync( | ||||
|                         settings.ReportsPath, | ||||
|                         payload, | ||||
|                         SerializerOptions, | ||||
|                         cancellationToken) | ||||
|                     .ConfigureAwait(false); | ||||
|  | ||||
|                 response.EnsureSuccessStatusCode(); | ||||
|  | ||||
|                 var report = await response.Content.ReadFromJsonAsync<ReportResponse>(SerializerOptions, cancellationToken) | ||||
|                     .ConfigureAwait(false); | ||||
|  | ||||
|                 if (report is null) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Scanner response payload was empty."); | ||||
|                 } | ||||
|  | ||||
|                 return report; | ||||
|             } | ||||
|             catch (Exception ex) when (IsTransient(ex) && attempt <= settings.MaxRetryAttempts) | ||||
|             { | ||||
|                 var delay = TimeSpan.FromMilliseconds(settings.RetryBaseDelay.TotalMilliseconds * Math.Pow(2, attempt - 1)); | ||||
|                 _logger.LogWarning( | ||||
|                     ex, | ||||
|                     "Scanner report attempt {Attempt} failed for digest {Digest}; retrying in {Delay}.", | ||||
|                     attempt, | ||||
|                     request.ImageDigest, | ||||
|                     delay); | ||||
|                 await Task.Delay(delay, cancellationToken).ConfigureAwait(false); | ||||
|             } | ||||
|             finally | ||||
|             { | ||||
|                 response?.Dispose(); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static bool IsTransient(Exception exception) | ||||
|         => exception is HttpRequestException or TaskCanceledException; | ||||
|  | ||||
|     private static RunnerReportSnapshot BuildReportSnapshot(ReportResponse report, string fallbackDigest) | ||||
|     { | ||||
|         var document = report.Report ?? new ReportDocument(); | ||||
|         var summary = document.Summary ?? new ReportSummary(); | ||||
|  | ||||
|         return new RunnerReportSnapshot( | ||||
|             string.IsNullOrWhiteSpace(document.ReportId) ? Guid.NewGuid().ToString("N") : document.ReportId, | ||||
|             string.IsNullOrWhiteSpace(document.ImageDigest) ? fallbackDigest : document.ImageDigest, | ||||
|             string.IsNullOrWhiteSpace(document.Verdict) ? "warn" : document.Verdict, | ||||
|             document.GeneratedAt, | ||||
|             new RunnerReportSummary( | ||||
|                 summary.Total, | ||||
|                 summary.Blocked, | ||||
|                 summary.Warned, | ||||
|                 summary.Ignored, | ||||
|                 summary.Quieted), | ||||
|             document.Policy?.RevisionId, | ||||
|             document.Policy?.Digest); | ||||
|     } | ||||
|  | ||||
|     private static RunnerDsseEnvelope? BuildDsseEnvelope(ReportResponse report) | ||||
|     { | ||||
|         if (report.Dsse is null || string.IsNullOrWhiteSpace(report.Dsse.PayloadType)) | ||||
|         { | ||||
|             return null; | ||||
|         } | ||||
|  | ||||
|         var signatures = report.Dsse.Signatures is null | ||||
|             ? Array.Empty<RunnerDsseSignature>() | ||||
|             : report.Dsse.Signatures | ||||
|                 .Where(signature => signature is not null) | ||||
|                 .Select(signature => new RunnerDsseSignature( | ||||
|                     signature!.KeyId, | ||||
|                     signature.Algorithm, | ||||
|                     signature.Signature)) | ||||
|                 .ToArray(); | ||||
|  | ||||
|         return new RunnerDsseEnvelope( | ||||
|             report.Dsse.PayloadType, | ||||
|             report.Dsse.Payload, | ||||
|             signatures); | ||||
|     } | ||||
|  | ||||
|     private static DeltaSummary? BuildDeltaSummary(ReportResponse report, string imageDigest) | ||||
|     { | ||||
|         if (report?.Report?.Summary is null) | ||||
|         { | ||||
|             return null; | ||||
|         } | ||||
|  | ||||
|         var summary = report.Report.Summary; | ||||
|         var blocked = summary.Blocked; | ||||
|         var warned = summary.Warned; | ||||
|         var ignored = summary.Ignored; | ||||
|         var newFindings = blocked + warned; | ||||
|  | ||||
|         if (newFindings == 0 && ignored == 0) | ||||
|         { | ||||
|             return null; | ||||
|         } | ||||
|  | ||||
|         return new DeltaSummary( | ||||
|             imageDigest, | ||||
|             newFindings, | ||||
|             newCriticals: blocked, | ||||
|             newHigh: warned, | ||||
|             newMedium: 0, | ||||
|             newLow: ignored, | ||||
|             kevHits: Array.Empty<string>(), | ||||
|             topFindings: Array.Empty<DeltaFinding>(), | ||||
|             reportUrl: null, | ||||
|             attestation: null, | ||||
|             detectedAt: report.Report.GeneratedAt == default ? null : report.Report.GeneratedAt); | ||||
|     } | ||||
|  | ||||
|     private sealed record ReportRequest | ||||
|     { | ||||
|         [JsonPropertyName("imageDigest")] | ||||
|         public string ImageDigest { get; init; } = string.Empty; | ||||
|     } | ||||
|  | ||||
|     private sealed record ReportResponse | ||||
|     { | ||||
|         [JsonPropertyName("report")] | ||||
|         public ReportDocument Report { get; init; } = new(); | ||||
|  | ||||
|         [JsonPropertyName("dsse")] | ||||
|         public DsseEnvelope? Dsse { get; init; } | ||||
|     } | ||||
|  | ||||
|     private sealed record ReportDocument | ||||
|     { | ||||
|         [JsonPropertyName("reportId")] | ||||
|         public string ReportId { get; init; } = string.Empty; | ||||
|  | ||||
|         [JsonPropertyName("imageDigest")] | ||||
|         public string ImageDigest { get; init; } = string.Empty; | ||||
|  | ||||
|         [JsonPropertyName("generatedAt")] | ||||
|         public DateTimeOffset GeneratedAt { get; init; } | ||||
|  | ||||
|         [JsonPropertyName("verdict")] | ||||
|         public string Verdict { get; init; } = string.Empty; | ||||
|  | ||||
|         [JsonPropertyName("policy")] | ||||
|         public ReportPolicy Policy { get; init; } = new(); | ||||
|  | ||||
|         [JsonPropertyName("summary")] | ||||
|         public ReportSummary Summary { get; init; } = new(); | ||||
|     } | ||||
|  | ||||
|     private sealed record ReportPolicy | ||||
|     { | ||||
|         [JsonPropertyName("revisionId")] | ||||
|         public string? RevisionId { get; init; } | ||||
|  | ||||
|         [JsonPropertyName("digest")] | ||||
|         public string? Digest { get; init; } | ||||
|     } | ||||
|  | ||||
|     private sealed record ReportSummary | ||||
|     { | ||||
|         [JsonPropertyName("total")] | ||||
|         public int Total { get; init; } | ||||
|  | ||||
|         [JsonPropertyName("blocked")] | ||||
|         public int Blocked { get; init; } | ||||
|  | ||||
|         [JsonPropertyName("warned")] | ||||
|         public int Warned { get; init; } | ||||
|  | ||||
|         [JsonPropertyName("ignored")] | ||||
|         public int Ignored { get; init; } | ||||
|  | ||||
|         [JsonPropertyName("quieted")] | ||||
|         public int Quieted { get; init; } | ||||
|     } | ||||
|  | ||||
|     private sealed record DsseEnvelope | ||||
|     { | ||||
|         [JsonPropertyName("payloadType")] | ||||
|         public string PayloadType { get; init; } = string.Empty; | ||||
|  | ||||
|         [JsonPropertyName("payload")] | ||||
|         public string Payload { get; init; } = string.Empty; | ||||
|  | ||||
|         [JsonPropertyName("signatures")] | ||||
|         public IReadOnlyList<DsseSignature> Signatures { get; init; } = Array.Empty<DsseSignature>(); | ||||
|     } | ||||
|  | ||||
|     private sealed record DsseSignature | ||||
|     { | ||||
|         [JsonPropertyName("keyId")] | ||||
|         public string KeyId { get; init; } = string.Empty; | ||||
|  | ||||
|         [JsonPropertyName("algorithm")] | ||||
|         public string Algorithm { get; init; } = string.Empty; | ||||
|  | ||||
|         [JsonPropertyName("signature")] | ||||
|         public string Signature { get; init; } = string.Empty; | ||||
|     } | ||||
|  | ||||
|     private sealed record ScanSubmitRequest(ScanTargetRequest Image); | ||||
|  | ||||
|     private sealed record ScanTargetRequest(string? Reference, string? Digest); | ||||
| } | ||||
| @@ -0,0 +1,145 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.Extensions.Hosting; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Execution; | ||||
|  | ||||
| internal sealed class RunnerBackgroundService : BackgroundService | ||||
| { | ||||
|     private readonly ISchedulerRunnerQueue _runnerQueue; | ||||
|     private readonly IRunnerExecutionService _executionService; | ||||
|     private readonly SchedulerWorkerOptions _options; | ||||
|     private readonly ILogger<RunnerBackgroundService> _logger; | ||||
|  | ||||
|     public RunnerBackgroundService( | ||||
|         ISchedulerRunnerQueue runnerQueue, | ||||
|         IRunnerExecutionService executionService, | ||||
|         SchedulerWorkerOptions options, | ||||
|         ILogger<RunnerBackgroundService> logger) | ||||
|     { | ||||
|         _runnerQueue = runnerQueue ?? throw new ArgumentNullException(nameof(runnerQueue)); | ||||
|         _executionService = executionService ?? throw new ArgumentNullException(nameof(executionService)); | ||||
|         _options = options ?? throw new ArgumentNullException(nameof(options)); | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|     } | ||||
|  | ||||
|     protected override async Task ExecuteAsync(CancellationToken stoppingToken) | ||||
|     { | ||||
|         var execOptions = _options.Runner.Execution; | ||||
|         var leaseRequest = new SchedulerQueueLeaseRequest(execOptions.ConsumerName, execOptions.BatchSize, execOptions.LeaseDuration); | ||||
|  | ||||
|         _logger.LogInformation("Runner execution loop started with consumer {Consumer}.", execOptions.ConsumerName); | ||||
|  | ||||
|         while (!stoppingToken.IsCancellationRequested) | ||||
|         { | ||||
|             IReadOnlyList<ISchedulerQueueLease<RunnerSegmentQueueMessage>> leases; | ||||
|             try | ||||
|             { | ||||
|                 leases = await _runnerQueue.LeaseAsync(leaseRequest, stoppingToken).ConfigureAwait(false); | ||||
|             } | ||||
|             catch (OperationCanceledException) | ||||
|             { | ||||
|                 break; | ||||
|             } | ||||
|             catch (Exception ex) | ||||
|             { | ||||
|                 _logger.LogError(ex, "Runner execution failed to lease segments; backing off."); | ||||
|                 await DelayAsync(execOptions.IdleDelay, stoppingToken).ConfigureAwait(false); | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             if (leases.Count == 0) | ||||
|             { | ||||
|                 await DelayAsync(execOptions.IdleDelay, stoppingToken).ConfigureAwait(false); | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             foreach (var lease in leases) | ||||
|             { | ||||
|                 await ProcessLeaseAsync(lease, execOptions.LeaseDuration, stoppingToken).ConfigureAwait(false); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         _logger.LogInformation("Runner execution loop stopping."); | ||||
|     } | ||||
|  | ||||
|     private async Task ProcessLeaseAsync( | ||||
|         ISchedulerQueueLease<RunnerSegmentQueueMessage> lease, | ||||
|         TimeSpan leaseDuration, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         try | ||||
|         { | ||||
|             var result = await _executionService.ExecuteAsync(lease.Message, cancellationToken).ConfigureAwait(false); | ||||
|  | ||||
|             await lease.AcknowledgeAsync(cancellationToken).ConfigureAwait(false); | ||||
|  | ||||
|             _logger.LogDebug( | ||||
|                 "Runner segment {SegmentId} processed; status={Status} processed={Processed} deltaImages={DeltaImages} runCompleted={RunCompleted}.", | ||||
|                 lease.SegmentId, | ||||
|                 result.Status, | ||||
|                 result.ProcessedImages, | ||||
|                 result.DeltaImages, | ||||
|                 result.RunCompleted); | ||||
|         } | ||||
|         catch (OperationCanceledException) | ||||
|         { | ||||
|             throw; | ||||
|         } | ||||
|         catch (Exception ex) | ||||
|         { | ||||
|             _logger.LogWarning( | ||||
|                 ex, | ||||
|                 "Runner segment {SegmentId} failed on attempt {Attempt}; releasing for retry.", | ||||
|                 lease.SegmentId, | ||||
|                 lease.Attempt); | ||||
|  | ||||
|             try | ||||
|             { | ||||
|                 await lease.ReleaseAsync(SchedulerQueueReleaseDisposition.Retry, cancellationToken).ConfigureAwait(false); | ||||
|             } | ||||
|             catch (Exception releaseEx) when (releaseEx is not OperationCanceledException) | ||||
|             { | ||||
|                 _logger.LogError( | ||||
|                     releaseEx, | ||||
|                     "Failed to release runner segment {SegmentId}; attempting lease renewal.", | ||||
|                     lease.SegmentId); | ||||
|  | ||||
|                 try | ||||
|                 { | ||||
|                     await lease.RenewAsync(leaseDuration, cancellationToken).ConfigureAwait(false); | ||||
|                 } | ||||
|                 catch (Exception renewEx) when (renewEx is not OperationCanceledException) | ||||
|                 { | ||||
|                     _logger.LogCritical( | ||||
|                         renewEx, | ||||
|                         "Unable to renew runner segment {SegmentId}; acknowledging to avoid tight failure loop.", | ||||
|                         lease.SegmentId); | ||||
|  | ||||
|                     await lease.AcknowledgeAsync(cancellationToken).ConfigureAwait(false); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static async Task DelayAsync(TimeSpan delay, CancellationToken cancellationToken) | ||||
|     { | ||||
|         if (delay <= TimeSpan.Zero) | ||||
|         { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         try | ||||
|         { | ||||
|             await Task.Delay(delay, cancellationToken).ConfigureAwait(false); | ||||
|         } | ||||
|         catch (OperationCanceledException) | ||||
|         { | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,376 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Linq; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Repositories; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Services; | ||||
| using StellaOps.Scheduler.Worker.Events; | ||||
| using StellaOps.Scheduler.Worker.Observability; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Execution; | ||||
|  | ||||
| public interface IRunnerExecutionService | ||||
| { | ||||
|     Task<RunnerSegmentExecutionResult> ExecuteAsync(RunnerSegmentQueueMessage message, CancellationToken cancellationToken = default); | ||||
| } | ||||
|  | ||||
| internal sealed class RunnerExecutionService : IRunnerExecutionService | ||||
| { | ||||
|     private static readonly IReadOnlyDictionary<string, ImpactImage> EmptyImpactLookup = | ||||
|         new Dictionary<string, ImpactImage>(0, StringComparer.Ordinal); | ||||
|  | ||||
|     private readonly IRunRepository _runRepository; | ||||
|     private readonly IRunSummaryService _runSummaryService; | ||||
|     private readonly IImpactSnapshotRepository _impactSnapshotRepository; | ||||
|     private readonly IScannerReportClient _scannerClient; | ||||
|     private readonly ISchedulerEventPublisher _eventPublisher; | ||||
|     private readonly SchedulerWorkerMetrics _metrics; | ||||
|     private readonly TimeProvider _timeProvider; | ||||
|     private readonly ILogger<RunnerExecutionService> _logger; | ||||
|  | ||||
|     public RunnerExecutionService( | ||||
|         IRunRepository runRepository, | ||||
|         IRunSummaryService runSummaryService, | ||||
|         IImpactSnapshotRepository impactSnapshotRepository, | ||||
|         IScannerReportClient scannerClient, | ||||
|         ISchedulerEventPublisher eventPublisher, | ||||
|         SchedulerWorkerMetrics metrics, | ||||
|         TimeProvider? timeProvider, | ||||
|         ILogger<RunnerExecutionService> logger) | ||||
|     { | ||||
|         _runRepository = runRepository ?? throw new ArgumentNullException(nameof(runRepository)); | ||||
|         _runSummaryService = runSummaryService ?? throw new ArgumentNullException(nameof(runSummaryService)); | ||||
|         _impactSnapshotRepository = impactSnapshotRepository ?? throw new ArgumentNullException(nameof(impactSnapshotRepository)); | ||||
|         _scannerClient = scannerClient ?? throw new ArgumentNullException(nameof(scannerClient)); | ||||
|         _eventPublisher = eventPublisher ?? throw new ArgumentNullException(nameof(eventPublisher)); | ||||
|         _metrics = metrics ?? throw new ArgumentNullException(nameof(metrics)); | ||||
|         _timeProvider = timeProvider ?? TimeProvider.System; | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|     } | ||||
|  | ||||
|     public async Task<RunnerSegmentExecutionResult> ExecuteAsync( | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         CancellationToken cancellationToken = default) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(message); | ||||
|         cancellationToken.ThrowIfCancellationRequested(); | ||||
|  | ||||
|         var scheduleMode = ResolveScheduleMode(message.Attributes); | ||||
|         var modeLabel = scheduleMode.ToString().ToLowerInvariant(); | ||||
|  | ||||
|         var run = await _runRepository | ||||
|             .GetAsync(message.TenantId, message.RunId, cancellationToken: cancellationToken) | ||||
|             .ConfigureAwait(false); | ||||
|  | ||||
|         if (run is null) | ||||
|         { | ||||
|             _logger.LogWarning( | ||||
|                 "Runner segment {SegmentId} references missing run {RunId} for tenant {TenantId}.", | ||||
|                 message.SegmentId, | ||||
|                 message.RunId, | ||||
|                 message.TenantId); | ||||
|  | ||||
|             _metrics.RecordRunnerSegment(modeLabel, RunnerSegmentExecutionStatus.RunMissing.ToString(), 0, 0); | ||||
|             return RunnerSegmentExecutionResult.RunMissing(message.RunId); | ||||
|         } | ||||
|  | ||||
|         var now = _timeProvider.GetUtcNow(); | ||||
|         var accumulator = new StatsAccumulator(run.Stats); | ||||
|         var impactLookup = await LoadImpactLookupAsync(run, message, cancellationToken).ConfigureAwait(false); | ||||
|         var deltaSummaries = new List<DeltaSummary>(); | ||||
|         var imageContexts = new List<ImageExecutionContext>(message.ImageDigests.Count); | ||||
|  | ||||
|         foreach (var digest in message.ImageDigests) | ||||
|         { | ||||
|             impactLookup.TryGetValue(digest, out var impactImage); | ||||
|  | ||||
|             var request = new ScannerReportRequest( | ||||
|                 message.TenantId, | ||||
|                 message.RunId, | ||||
|                 digest, | ||||
|                 scheduleMode, | ||||
|                 message.UsageOnly, | ||||
|                 message.Attributes); | ||||
|  | ||||
|             RunnerImageResult result; | ||||
|             try | ||||
|             { | ||||
|                 result = await _scannerClient.ExecuteAsync(request, cancellationToken).ConfigureAwait(false); | ||||
|             } | ||||
|             catch (OperationCanceledException) | ||||
|             { | ||||
|                 throw; | ||||
|             } | ||||
|             catch (Exception ex) | ||||
|             { | ||||
|                 _logger.LogWarning( | ||||
|                     ex, | ||||
|                     "Scanner execution failed for run {RunId} digest {Digest}; propagating failure for retry.", | ||||
|                     message.RunId, | ||||
|                     digest); | ||||
|                 throw; | ||||
|             } | ||||
|  | ||||
|             accumulator.Record(result); | ||||
|             imageContexts.Add(new ImageExecutionContext(result, impactImage)); | ||||
|             if (result.Delta is { } delta && HasMeaningfulDelta(delta)) | ||||
|             { | ||||
|                 deltaSummaries.Add(delta); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         var updatedStats = accumulator.Build(); | ||||
|         var startedAt = run.StartedAt ?? now; | ||||
|         var completed = updatedStats.Completed >= updatedStats.Queued && updatedStats.Queued > 0; | ||||
|         var finishedAt = completed ? now : run.FinishedAt; | ||||
|         var newState = completed ? RunState.Completed : RunState.Running; | ||||
|         var deltas = run.Deltas.ToList(); | ||||
|         deltas.AddRange(deltaSummaries); | ||||
|  | ||||
|         var updatedRun = new Run( | ||||
|             run.Id, | ||||
|             run.TenantId, | ||||
|             run.Trigger, | ||||
|             newState, | ||||
|             updatedStats, | ||||
|             run.CreatedAt, | ||||
|             run.Reason, | ||||
|             run.ScheduleId, | ||||
|             startedAt, | ||||
|             finishedAt, | ||||
|             error: null, | ||||
|             deltas, | ||||
|             run.SchemaVersion); | ||||
|  | ||||
|         var persisted = await _runRepository.UpdateAsync(updatedRun, cancellationToken: cancellationToken).ConfigureAwait(false); | ||||
|         if (!persisted) | ||||
|         { | ||||
|             _logger.LogWarning("Failed to persist run {RunId} after processing runner segment {SegmentId}.", run.Id, message.SegmentId); | ||||
|         } | ||||
|  | ||||
|         if (persisted && !string.IsNullOrWhiteSpace(updatedRun.ScheduleId)) | ||||
|         { | ||||
|             try | ||||
|             { | ||||
|                 await _runSummaryService.ProjectAsync(updatedRun, cancellationToken).ConfigureAwait(false); | ||||
|             } | ||||
|             catch (Exception ex) when (ex is not OperationCanceledException) | ||||
|             { | ||||
|                 _logger.LogWarning(ex, "Failed to project run summary for run {RunId}.", run.Id); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         var segmentStatus = persisted ? RunnerSegmentExecutionStatus.Completed.ToString() : "persist_failed"; | ||||
|         _metrics.RecordRunnerSegment(modeLabel, segmentStatus, accumulator.Processed, accumulator.DeltaImages); | ||||
|  | ||||
|         if (deltaSummaries.Count > 0) | ||||
|         { | ||||
|             _metrics.RecordDeltaSummaries(modeLabel, deltaSummaries); | ||||
|         } | ||||
|  | ||||
|         var remaining = Math.Max(updatedRun.Stats.Queued - updatedRun.Stats.Completed, 0); | ||||
|         _metrics.UpdateBacklog(modeLabel, updatedRun.ScheduleId, remaining); | ||||
|  | ||||
|         if (completed && persisted) | ||||
|         { | ||||
|             var duration = (updatedRun.FinishedAt ?? now) - (updatedRun.StartedAt ?? updatedRun.CreatedAt); | ||||
|             _metrics.RecordRunCompletion(modeLabel, "completed", duration); | ||||
|         } | ||||
|  | ||||
|         if (persisted) | ||||
|         { | ||||
|             foreach (var context in imageContexts) | ||||
|             { | ||||
|                 await _eventPublisher.PublishReportReadyAsync( | ||||
|                         updatedRun, | ||||
|                         message, | ||||
|                         context.Result, | ||||
|                         context.ImpactImage, | ||||
|                         cancellationToken) | ||||
|                     .ConfigureAwait(false); | ||||
|             } | ||||
|  | ||||
|             if (deltaSummaries.Count > 0) | ||||
|             { | ||||
|                 await _eventPublisher.PublishRescanDeltaAsync( | ||||
|                         updatedRun, | ||||
|                         message, | ||||
|                         deltaSummaries, | ||||
|                         impactLookup, | ||||
|                         cancellationToken) | ||||
|                     .ConfigureAwait(false); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return RunnerSegmentExecutionResult.Success( | ||||
|             updatedRun, | ||||
|             accumulator.Processed, | ||||
|             accumulator.DeltaImages, | ||||
|             completed, | ||||
|             deltaSummaries); | ||||
|     } | ||||
|  | ||||
|     private async Task<IReadOnlyDictionary<string, ImpactImage>> LoadImpactLookupAsync( | ||||
|         Run run, | ||||
|         RunnerSegmentQueueMessage message, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         var snapshotId = ResolveSnapshotId(run, message); | ||||
|         if (string.IsNullOrWhiteSpace(snapshotId)) | ||||
|         { | ||||
|             return EmptyImpactLookup; | ||||
|         } | ||||
|  | ||||
|         try | ||||
|         { | ||||
|             var snapshot = await _impactSnapshotRepository | ||||
|                 .GetBySnapshotIdAsync(snapshotId, cancellationToken: cancellationToken) | ||||
|                 .ConfigureAwait(false); | ||||
|  | ||||
|             if (snapshot?.Images.Length > 0) | ||||
|             { | ||||
|                 var map = new Dictionary<string, ImpactImage>(snapshot.Images.Length, StringComparer.Ordinal); | ||||
|                 foreach (var image in snapshot.Images) | ||||
|                 { | ||||
|                     if (!map.ContainsKey(image.ImageDigest)) | ||||
|                     { | ||||
|                         map[image.ImageDigest] = image; | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 return map; | ||||
|             } | ||||
|         } | ||||
|         catch (OperationCanceledException) | ||||
|         { | ||||
|             throw; | ||||
|         } | ||||
|         catch (Exception ex) | ||||
|         { | ||||
|             _logger.LogWarning( | ||||
|                 ex, | ||||
|                 "Failed to load impact snapshot {SnapshotId} for run {RunId}.", | ||||
|                 snapshotId, | ||||
|                 run.Id); | ||||
|         } | ||||
|  | ||||
|         return EmptyImpactLookup; | ||||
|     } | ||||
|  | ||||
|     private static string? ResolveSnapshotId(Run run, RunnerSegmentQueueMessage message) | ||||
|     { | ||||
|         if (message.Attributes.TryGetValue("impactSnapshotId", out var snapshotId) && | ||||
|             !string.IsNullOrWhiteSpace(snapshotId)) | ||||
|         { | ||||
|             return snapshotId.Trim(); | ||||
|         } | ||||
|  | ||||
|         return string.IsNullOrWhiteSpace(run.Id) ? null : $"impact::{run.Id}"; | ||||
|     } | ||||
|  | ||||
|     private static ScheduleMode ResolveScheduleMode(IReadOnlyDictionary<string, string> attributes) | ||||
|     { | ||||
|         if (attributes.TryGetValue("scheduleMode", out var mode) && | ||||
|             Enum.TryParse<ScheduleMode>(mode, ignoreCase: true, out var parsed)) | ||||
|         { | ||||
|             return parsed; | ||||
|         } | ||||
|  | ||||
|         return ScheduleMode.AnalysisOnly; | ||||
|     } | ||||
|  | ||||
|     private readonly record struct ImageExecutionContext( | ||||
|         RunnerImageResult Result, | ||||
|         ImpactImage? ImpactImage); | ||||
|  | ||||
|     private static bool HasMeaningfulDelta(DeltaSummary delta) | ||||
|     { | ||||
|         if (delta is null) | ||||
|         { | ||||
|             return false; | ||||
|         } | ||||
|  | ||||
|         if (delta.NewFindings > 0 || | ||||
|             delta.NewCriticals > 0 || | ||||
|             delta.NewHigh > 0 || | ||||
|             delta.NewMedium > 0 || | ||||
|             delta.NewLow > 0) | ||||
|         { | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         return !delta.KevHits.IsDefaultOrEmpty; | ||||
|     } | ||||
|  | ||||
|     private sealed class StatsAccumulator | ||||
|     { | ||||
|         private readonly RunStats _baseStats; | ||||
|  | ||||
|         public StatsAccumulator(RunStats baseStats) | ||||
|         { | ||||
|             _baseStats = baseStats ?? RunStats.Empty; | ||||
|         } | ||||
|  | ||||
|         public int Processed { get; private set; } | ||||
|  | ||||
|         public int DeltaImages { get; private set; } | ||||
|  | ||||
|         private int _newCriticals; | ||||
|         private int _newHigh; | ||||
|         private int _newMedium; | ||||
|         private int _newLow; | ||||
|  | ||||
|         public void Record(RunnerImageResult result) | ||||
|         { | ||||
|             Processed++; | ||||
|  | ||||
|             if (result.Delta is { } delta && HasMeaningfulDelta(delta)) | ||||
|             { | ||||
|                 DeltaImages++; | ||||
|                 _newCriticals += delta.NewCriticals; | ||||
|                 _newHigh += delta.NewHigh; | ||||
|                 _newMedium += delta.NewMedium; | ||||
|                 _newLow += delta.NewLow; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         public RunStats Build() | ||||
|         { | ||||
|             return new RunStats( | ||||
|                 _baseStats.Candidates, | ||||
|                 _baseStats.Deduped, | ||||
|                 _baseStats.Queued, | ||||
|                 _baseStats.Completed + Processed, | ||||
|                 _baseStats.Deltas + DeltaImages, | ||||
|                 _baseStats.NewCriticals + _newCriticals, | ||||
|                 _baseStats.NewHigh + _newHigh, | ||||
|                 _baseStats.NewMedium + _newMedium, | ||||
|                 _baseStats.NewLow + _newLow); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| public sealed record RunnerSegmentExecutionResult( | ||||
|     RunnerSegmentExecutionStatus Status, | ||||
|     Run? UpdatedRun, | ||||
|     int ProcessedImages, | ||||
|     int DeltaImages, | ||||
|     bool RunCompleted, | ||||
|     IReadOnlyList<DeltaSummary> DeltaSummaries) | ||||
| { | ||||
|     public static RunnerSegmentExecutionResult Success(Run updatedRun, int processedImages, int deltaImages, bool runCompleted, IReadOnlyList<DeltaSummary> deltas) | ||||
|         => new(RunnerSegmentExecutionStatus.Completed, updatedRun, processedImages, deltaImages, runCompleted, deltas); | ||||
|  | ||||
|     public static RunnerSegmentExecutionResult RunMissing(string runId) | ||||
|         => new(RunnerSegmentExecutionStatus.RunMissing, null, 0, 0, false, Array.Empty<DeltaSummary>()); | ||||
| } | ||||
|  | ||||
| public enum RunnerSegmentExecutionStatus | ||||
| { | ||||
|     Completed, | ||||
|     RunMissing | ||||
| } | ||||
| @@ -0,0 +1,52 @@ | ||||
| using System.Collections.Generic; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using StellaOps.Scheduler.Models; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Execution; | ||||
|  | ||||
| public interface IScannerReportClient | ||||
| { | ||||
|     Task<RunnerImageResult> ExecuteAsync(ScannerReportRequest request, CancellationToken cancellationToken = default); | ||||
| } | ||||
|  | ||||
| public sealed record ScannerReportRequest( | ||||
|     string TenantId, | ||||
|     string RunId, | ||||
|     string ImageDigest, | ||||
|     ScheduleMode Mode, | ||||
|     bool UsageOnly, | ||||
|     IReadOnlyDictionary<string, string> Attributes); | ||||
|  | ||||
| public sealed record RunnerImageResult( | ||||
|     string ImageDigest, | ||||
|     DeltaSummary? Delta, | ||||
|     bool ContentRefreshed, | ||||
|     RunnerReportSnapshot Report, | ||||
|     RunnerDsseEnvelope? Dsse); | ||||
|  | ||||
| public sealed record RunnerReportSnapshot( | ||||
|     string ReportId, | ||||
|     string ImageDigest, | ||||
|     string Verdict, | ||||
|     DateTimeOffset GeneratedAt, | ||||
|     RunnerReportSummary Summary, | ||||
|     string? PolicyRevisionId, | ||||
|     string? PolicyDigest); | ||||
|  | ||||
| public sealed record RunnerReportSummary( | ||||
|     int Total, | ||||
|     int Blocked, | ||||
|     int Warned, | ||||
|     int Ignored, | ||||
|     int Quieted); | ||||
|  | ||||
| public sealed record RunnerDsseEnvelope( | ||||
|     string PayloadType, | ||||
|     string Payload, | ||||
|     IReadOnlyList<RunnerDsseSignature> Signatures); | ||||
|  | ||||
| public sealed record RunnerDsseSignature( | ||||
|     string KeyId, | ||||
|     string Algorithm, | ||||
|     string Signature); | ||||
| @@ -0,0 +1,210 @@ | ||||
| using System; | ||||
| using System.Collections.Concurrent; | ||||
| using System.Collections.Generic; | ||||
| using System.Diagnostics.Metrics; | ||||
| using StellaOps.Scheduler.Models; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Observability; | ||||
|  | ||||
| public sealed class SchedulerWorkerMetrics : IDisposable | ||||
| { | ||||
|     public const string MeterName = "StellaOps.Scheduler.Worker"; | ||||
|  | ||||
|     private readonly Meter _meter; | ||||
|     private readonly Counter<long> _plannerRunsTotal; | ||||
|     private readonly Histogram<double> _plannerLatencySeconds; | ||||
|     private readonly Counter<long> _runnerSegmentsTotal; | ||||
|     private readonly Counter<long> _runnerImagesTotal; | ||||
|     private readonly Counter<long> _runnerDeltaCriticalTotal; | ||||
|     private readonly Counter<long> _runnerDeltaHighTotal; | ||||
|     private readonly Counter<long> _runnerDeltaFindingsTotal; | ||||
|     private readonly Counter<long> _runnerKevHitsTotal; | ||||
|     private readonly Histogram<double> _runDurationSeconds; | ||||
|     private readonly UpDownCounter<long> _runsActive; | ||||
|     private readonly ConcurrentDictionary<string, long> _backlog = new(StringComparer.Ordinal); | ||||
|     private readonly ObservableGauge<long> _backlogGauge; | ||||
|     private bool _disposed; | ||||
|  | ||||
|     public SchedulerWorkerMetrics() | ||||
|     { | ||||
|         _meter = new Meter(MeterName); | ||||
|         _plannerRunsTotal = _meter.CreateCounter<long>( | ||||
|             "scheduler_planner_runs_total", | ||||
|             unit: "count", | ||||
|             description: "Planner runs grouped by status and mode."); | ||||
|         _plannerLatencySeconds = _meter.CreateHistogram<double>( | ||||
|             "scheduler_planner_latency_seconds", | ||||
|             unit: "s", | ||||
|             description: "Latency between run creation and planner processing grouped by mode and status."); | ||||
|         _runnerSegmentsTotal = _meter.CreateCounter<long>( | ||||
|             "scheduler_runner_segments_total", | ||||
|             unit: "count", | ||||
|             description: "Runner segments processed grouped by status and mode."); | ||||
|         _runnerImagesTotal = _meter.CreateCounter<long>( | ||||
|             "scheduler_runner_images_total", | ||||
|             unit: "count", | ||||
|             description: "Images processed by runner grouped by mode and delta outcome."); | ||||
|         _runnerDeltaCriticalTotal = _meter.CreateCounter<long>( | ||||
|             "scheduler_runner_delta_critical_total", | ||||
|             unit: "count", | ||||
|             description: "Critical findings observed by runner grouped by mode."); | ||||
|         _runnerDeltaHighTotal = _meter.CreateCounter<long>( | ||||
|             "scheduler_runner_delta_high_total", | ||||
|             unit: "count", | ||||
|             description: "High findings observed by runner grouped by mode."); | ||||
|         _runnerDeltaFindingsTotal = _meter.CreateCounter<long>( | ||||
|             "scheduler_runner_delta_total", | ||||
|             unit: "count", | ||||
|             description: "Total findings observed by runner grouped by mode."); | ||||
|         _runnerKevHitsTotal = _meter.CreateCounter<long>( | ||||
|             "scheduler_runner_delta_kev_total", | ||||
|             unit: "count", | ||||
|             description: "KEV hits observed by runner grouped by mode."); | ||||
|         _runDurationSeconds = _meter.CreateHistogram<double>( | ||||
|             "scheduler_run_duration_seconds", | ||||
|             unit: "s", | ||||
|             description: "End-to-end run durations grouped by mode and result."); | ||||
|         _runsActive = _meter.CreateUpDownCounter<long>( | ||||
|             "scheduler_runs_active", | ||||
|             unit: "count", | ||||
|             description: "Active scheduler runs grouped by mode."); | ||||
|         _backlogGauge = _meter.CreateObservableGauge<long>( | ||||
|             "scheduler_runner_backlog", | ||||
|             ObserveBacklog, | ||||
|             unit: "images", | ||||
|             description: "Remaining images queued for runner processing grouped by mode and schedule."); | ||||
|     } | ||||
|  | ||||
|     public void RecordPlannerResult(string mode, string status, TimeSpan latency, int imageCount) | ||||
|     { | ||||
|         var tags = new[] | ||||
|         { | ||||
|             new KeyValuePair<string, object?>("mode", mode), | ||||
|             new KeyValuePair<string, object?>("status", status) | ||||
|         }; | ||||
|         _plannerRunsTotal.Add(1, tags); | ||||
|         _plannerLatencySeconds.Record(Math.Max(latency.TotalSeconds, 0d), tags); | ||||
|  | ||||
|         if (status.Equals("enqueued", StringComparison.OrdinalIgnoreCase) && imageCount > 0) | ||||
|         { | ||||
|             _runsActive.Add(1, new[] { new KeyValuePair<string, object?>("mode", mode) }); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     public void RecordRunnerSegment(string mode, string status, int processedImages, int deltaImages) | ||||
|     { | ||||
|         var tags = new[] | ||||
|         { | ||||
|             new KeyValuePair<string, object?>("mode", mode), | ||||
|             new KeyValuePair<string, object?>("status", status) | ||||
|         }; | ||||
|  | ||||
|         _runnerSegmentsTotal.Add(1, tags); | ||||
|  | ||||
|         var imageTags = new[] | ||||
|         { | ||||
|             new KeyValuePair<string, object?>("mode", mode), | ||||
|             new KeyValuePair<string, object?>("delta", deltaImages > 0 ? "true" : "false") | ||||
|         }; | ||||
|         _runnerImagesTotal.Add(processedImages, imageTags); | ||||
|     } | ||||
|  | ||||
|     public void RecordDeltaSummaries(string mode, IReadOnlyList<DeltaSummary> deltas) | ||||
|     { | ||||
|         if (deltas.Count == 0) | ||||
|         { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         var tags = new[] { new KeyValuePair<string, object?>("mode", mode) }; | ||||
|  | ||||
|         foreach (var delta in deltas) | ||||
|         { | ||||
|             if (delta.NewCriticals > 0) | ||||
|             { | ||||
|                 _runnerDeltaCriticalTotal.Add(delta.NewCriticals, tags); | ||||
|             } | ||||
|  | ||||
|             if (delta.NewHigh > 0) | ||||
|             { | ||||
|                 _runnerDeltaHighTotal.Add(delta.NewHigh, tags); | ||||
|             } | ||||
|  | ||||
|             if (delta.NewFindings > 0) | ||||
|             { | ||||
|                 _runnerDeltaFindingsTotal.Add(delta.NewFindings, tags); | ||||
|             } | ||||
|  | ||||
|             if (!delta.KevHits.IsDefaultOrEmpty) | ||||
|             { | ||||
|                 _runnerKevHitsTotal.Add(delta.KevHits.Length, tags); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     public void RecordRunCompletion(string mode, string result, TimeSpan? duration, bool decrementActive = true) | ||||
|     { | ||||
|         var tags = new[] | ||||
|         { | ||||
|             new KeyValuePair<string, object?>("mode", mode), | ||||
|             new KeyValuePair<string, object?>("result", result) | ||||
|         }; | ||||
|  | ||||
|         if (duration is { } runDuration) | ||||
|         { | ||||
|             _runDurationSeconds.Record(Math.Max(runDuration.TotalSeconds, 0d), tags); | ||||
|         } | ||||
|  | ||||
|         if (decrementActive) | ||||
|         { | ||||
|             _runsActive.Add(-1, new[] { new KeyValuePair<string, object?>("mode", mode) }); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     public void UpdateBacklog(string mode, string? scheduleId, long backlog) | ||||
|     { | ||||
|         var key = BuildBacklogKey(mode, scheduleId); | ||||
|         if (backlog <= 0) | ||||
|         { | ||||
|             _backlog.TryRemove(key, out _); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             _backlog[key] = backlog; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private IEnumerable<Measurement<long>> ObserveBacklog() | ||||
|     { | ||||
|         foreach (var entry in _backlog) | ||||
|         { | ||||
|             var (mode, scheduleId) = SplitBacklogKey(entry.Key); | ||||
|             yield return new Measurement<long>( | ||||
|                 entry.Value, | ||||
|                 new KeyValuePair<string, object?>("mode", mode), | ||||
|                 new KeyValuePair<string, object?>("scheduleId", scheduleId ?? string.Empty)); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static string BuildBacklogKey(string mode, string? scheduleId) | ||||
|         => $"{mode}|{scheduleId ?? string.Empty}"; | ||||
|  | ||||
|     private static (string Mode, string? ScheduleId) SplitBacklogKey(string key) | ||||
|     { | ||||
|         var parts = key.Split('|', 2); | ||||
|         return parts.Length == 2 | ||||
|             ? (parts[0], string.IsNullOrEmpty(parts[1]) ? null : parts[1]) | ||||
|             : (key, null); | ||||
|     } | ||||
|  | ||||
|     public void Dispose() | ||||
|     { | ||||
|         if (_disposed) | ||||
|         { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         _meter.Dispose(); | ||||
|         _disposed = true; | ||||
|     } | ||||
| } | ||||
| @@ -9,9 +9,12 @@ public sealed class SchedulerWorkerOptions | ||||
| { | ||||
|     public PlannerOptions Planner { get; set; } = new(); | ||||
|  | ||||
|     public RunnerOptions Runner { get; set; } = new(); | ||||
|  | ||||
|     public void Validate() | ||||
|     { | ||||
|         Planner.Validate(); | ||||
|         Runner.Validate(); | ||||
|     } | ||||
|  | ||||
|     public sealed class PlannerOptions | ||||
| @@ -79,4 +82,188 @@ public sealed class SchedulerWorkerOptions | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     public sealed class RunnerOptions | ||||
|     { | ||||
|         public DispatchOptions Dispatch { get; set; } = new(); | ||||
|  | ||||
|         public ExecutionOptions Execution { get; set; } = new(); | ||||
|  | ||||
|         public ScannerOptions Scanner { get; set; } = new(); | ||||
|  | ||||
|         public void Validate() | ||||
|         { | ||||
|             Dispatch.Validate(); | ||||
|             Execution.Validate(); | ||||
|             Scanner.Validate(); | ||||
|         } | ||||
|  | ||||
|         public sealed class DispatchOptions | ||||
|         { | ||||
|             /// <summary> | ||||
|             /// Consumer name used when leasing planner queue messages to dispatch runner segments. | ||||
|             /// </summary> | ||||
|             public string ConsumerName { get; set; } = "scheduler-runner-dispatch"; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Maximum number of planner messages claimed per lease. | ||||
|             /// </summary> | ||||
|             public int BatchSize { get; set; } = 5; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Duration of the lease held while dispatching runner segments. | ||||
|             /// </summary> | ||||
|             public TimeSpan LeaseDuration { get; set; } = TimeSpan.FromMinutes(5); | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Delay applied between polls when no planner messages are available. | ||||
|             /// </summary> | ||||
|             public TimeSpan IdleDelay { get; set; } = TimeSpan.FromSeconds(10); | ||||
|  | ||||
|             public void Validate() | ||||
|             { | ||||
|                 if (string.IsNullOrWhiteSpace(ConsumerName)) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner dispatch consumer name must be configured."); | ||||
|                 } | ||||
|  | ||||
|                 if (BatchSize <= 0) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner dispatch batch size must be greater than zero."); | ||||
|                 } | ||||
|  | ||||
|                 if (LeaseDuration <= TimeSpan.Zero) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner dispatch lease duration must be greater than zero."); | ||||
|                 } | ||||
|  | ||||
|                 if (IdleDelay < TimeSpan.Zero) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner dispatch idle delay cannot be negative."); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         public sealed class ExecutionOptions | ||||
|         { | ||||
|             /// <summary> | ||||
|             /// Consumer name used when leasing runner segment messages. | ||||
|             /// </summary> | ||||
|             public string ConsumerName { get; set; } = "scheduler-runner"; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Maximum number of runner segments leased per poll. | ||||
|             /// </summary> | ||||
|             public int BatchSize { get; set; } = 5; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Lease duration granted while processing a runner segment. | ||||
|             /// </summary> | ||||
|             public TimeSpan LeaseDuration { get; set; } = TimeSpan.FromMinutes(5); | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Delay applied between polls when no runner segments are available. | ||||
|             /// </summary> | ||||
|             public TimeSpan IdleDelay { get; set; } = TimeSpan.FromSeconds(5); | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Maximum number of runner segments processed concurrently. | ||||
|             /// </summary> | ||||
|             public int MaxConcurrentSegments { get; set; } = Environment.ProcessorCount; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Timeout applied to scanner requests per image digest. | ||||
|             /// </summary> | ||||
|             public TimeSpan ReportTimeout { get; set; } = TimeSpan.FromSeconds(60); | ||||
|  | ||||
|             public void Validate() | ||||
|             { | ||||
|                 if (string.IsNullOrWhiteSpace(ConsumerName)) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner execution consumer name must be configured."); | ||||
|                 } | ||||
|  | ||||
|                 if (BatchSize <= 0) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner execution batch size must be greater than zero."); | ||||
|                 } | ||||
|  | ||||
|                 if (LeaseDuration <= TimeSpan.Zero) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner execution lease duration must be greater than zero."); | ||||
|                 } | ||||
|  | ||||
|                 if (IdleDelay < TimeSpan.Zero) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner execution idle delay cannot be negative."); | ||||
|                 } | ||||
|  | ||||
|                 if (MaxConcurrentSegments <= 0) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner execution max concurrent segments must be greater than zero."); | ||||
|                 } | ||||
|  | ||||
|                 if (ReportTimeout <= TimeSpan.Zero) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner execution report timeout must be greater than zero."); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         public sealed class ScannerOptions | ||||
|         { | ||||
|             /// <summary> | ||||
|             /// Base address for Scanner WebService API calls. | ||||
|             /// </summary> | ||||
|             public Uri? BaseAddress { get; set; } | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Relative path to the reports endpoint. | ||||
|             /// </summary> | ||||
|             public string ReportsPath { get; set; } = "/api/v1/reports"; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Relative path to the scans endpoint (content refresh). | ||||
|             /// </summary> | ||||
|             public string ScansPath { get; set; } = "/api/v1/scans"; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Whether runner should attempt content refresh before requesting report in content refresh mode. | ||||
|             /// </summary> | ||||
|             public bool EnableContentRefresh { get; set; } = true; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Maximum number of scanner retries for transient failures. | ||||
|             /// </summary> | ||||
|             public int MaxRetryAttempts { get; set; } = 3; | ||||
|  | ||||
|             /// <summary> | ||||
|             /// Base delay applied between retries for transient failures. | ||||
|             /// </summary> | ||||
|             public TimeSpan RetryBaseDelay { get; set; } = TimeSpan.FromSeconds(2); | ||||
|  | ||||
|             public void Validate() | ||||
|             { | ||||
|                 if (string.IsNullOrWhiteSpace(ReportsPath)) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner scanner reports path must be configured."); | ||||
|                 } | ||||
|  | ||||
|                 if (string.IsNullOrWhiteSpace(ScansPath)) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner scanner scans path must be configured."); | ||||
|                 } | ||||
|  | ||||
|                 if (MaxRetryAttempts < 0) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner scanner retry attempts cannot be negative."); | ||||
|                 } | ||||
|  | ||||
|                 if (RetryBaseDelay < TimeSpan.Zero) | ||||
|                 { | ||||
|                     throw new InvalidOperationException("Runner scanner retry delay cannot be negative."); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -5,6 +5,7 @@ using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Repositories; | ||||
| using StellaOps.Scheduler.Storage.Mongo.Services; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
| using StellaOps.Scheduler.Worker.Observability; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Planning; | ||||
|  | ||||
| @@ -18,6 +19,7 @@ internal sealed class PlannerExecutionService | ||||
|     private readonly ISchedulerPlannerQueue _plannerQueue; | ||||
|     private readonly SchedulerWorkerOptions _options; | ||||
|     private readonly TimeProvider _timeProvider; | ||||
|     private readonly SchedulerWorkerMetrics _metrics; | ||||
|     private readonly ILogger<PlannerExecutionService> _logger; | ||||
|  | ||||
|     public PlannerExecutionService( | ||||
| @@ -29,6 +31,7 @@ internal sealed class PlannerExecutionService | ||||
|         ISchedulerPlannerQueue plannerQueue, | ||||
|         SchedulerWorkerOptions options, | ||||
|         TimeProvider? timeProvider, | ||||
|         SchedulerWorkerMetrics metrics, | ||||
|         ILogger<PlannerExecutionService> logger) | ||||
|     { | ||||
|         _scheduleRepository = scheduleRepository ?? throw new ArgumentNullException(nameof(scheduleRepository)); | ||||
| @@ -39,6 +42,7 @@ internal sealed class PlannerExecutionService | ||||
|         _plannerQueue = plannerQueue ?? throw new ArgumentNullException(nameof(plannerQueue)); | ||||
|         _options = options ?? throw new ArgumentNullException(nameof(options)); | ||||
|         _timeProvider = timeProvider ?? TimeProvider.System; | ||||
|         _metrics = metrics ?? throw new ArgumentNullException(nameof(metrics)); | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|     } | ||||
|  | ||||
| @@ -52,6 +56,9 @@ internal sealed class PlannerExecutionService | ||||
|             return new PlannerExecutionResult(PlannerExecutionStatus.Skipped, run); | ||||
|         } | ||||
|  | ||||
|         var plannerStartedAt = _timeProvider.GetUtcNow(); | ||||
|         var plannerLatency = plannerStartedAt - run.CreatedAt; | ||||
|  | ||||
|         if (string.IsNullOrWhiteSpace(run.ScheduleId)) | ||||
|         { | ||||
|             _logger.LogWarning("Run {RunId} has no scheduleId; marking as failed.", run.Id); | ||||
| @@ -63,6 +70,7 @@ internal sealed class PlannerExecutionService | ||||
|             }; | ||||
|  | ||||
|             await PersistRunAsync(failed, cancellationToken).ConfigureAwait(false); | ||||
|             _metrics.RecordPlannerResult("unknown", "failed", plannerLatency, 0); | ||||
|             return new PlannerExecutionResult( | ||||
|                 PlannerExecutionStatus.Failed, | ||||
|                 failed, | ||||
| @@ -92,6 +100,7 @@ internal sealed class PlannerExecutionService | ||||
|             }; | ||||
|  | ||||
|             await PersistRunAsync(failed, cancellationToken).ConfigureAwait(false); | ||||
|             _metrics.RecordPlannerResult("unknown", "failed", plannerLatency, 0); | ||||
|             return new PlannerExecutionResult( | ||||
|                 PlannerExecutionStatus.Failed, | ||||
|                 failed, | ||||
| @@ -113,6 +122,7 @@ internal sealed class PlannerExecutionService | ||||
|         } | ||||
|  | ||||
|         var usageOnly = schedule.Mode != ScheduleMode.ContentRefresh; | ||||
|         var modeLabel = schedule.Mode.ToString().ToLowerInvariant(); | ||||
|  | ||||
|         ImpactSet impactSet; | ||||
|         try | ||||
| @@ -133,6 +143,7 @@ internal sealed class PlannerExecutionService | ||||
|             }; | ||||
|  | ||||
|             await PersistRunAsync(failed, cancellationToken).ConfigureAwait(false); | ||||
|             _metrics.RecordPlannerResult(modeLabel, "failed", plannerLatency, 0); | ||||
|             return new PlannerExecutionResult( | ||||
|                 PlannerExecutionStatus.Failed, | ||||
|                 failed, | ||||
| @@ -176,6 +187,9 @@ internal sealed class PlannerExecutionService | ||||
|  | ||||
|             await PersistRunAsync(completed, cancellationToken).ConfigureAwait(false); | ||||
|             _logger.LogInformation("Run {RunId} produced no impacted images; marking Completed.", run.Id); | ||||
|             _metrics.RecordPlannerResult(modeLabel, "no_work", plannerLatency, 0); | ||||
|             _metrics.UpdateBacklog(modeLabel, run.ScheduleId, 0); | ||||
|             _metrics.RecordRunCompletion(modeLabel, "completed", TimeSpan.Zero, decrementActive: false); | ||||
|             return new PlannerExecutionResult( | ||||
|                 PlannerExecutionStatus.CompletedWithoutWork, | ||||
|                 completed, | ||||
| @@ -212,6 +226,8 @@ internal sealed class PlannerExecutionService | ||||
|             snapshot.Images.Length, | ||||
|             run.TenantId, | ||||
|             schedule.Id); | ||||
|         _metrics.RecordPlannerResult(modeLabel, "enqueued", plannerLatency, snapshot.Images.Length); | ||||
|         _metrics.UpdateBacklog(modeLabel, run.ScheduleId, snapshot.Images.Length); | ||||
|  | ||||
|         return new PlannerExecutionResult( | ||||
|             PlannerExecutionStatus.Enqueued, | ||||
|   | ||||
| @@ -0,0 +1,212 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Linq; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Scheduler.Models; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Planning; | ||||
|  | ||||
| public interface IPlannerQueueDispatchService | ||||
| { | ||||
|     Task<PlannerQueueDispatchResult> DispatchAsync(PlannerQueueMessage message, CancellationToken cancellationToken = default); | ||||
| } | ||||
|  | ||||
| internal sealed class PlannerQueueDispatchService : IPlannerQueueDispatchService | ||||
| { | ||||
|     private readonly IImpactShardPlanner _shardPlanner; | ||||
|     private readonly ISchedulerRunnerQueue _runnerQueue; | ||||
|     private readonly SchedulerWorkerOptions _options; | ||||
|     private readonly ILogger<PlannerQueueDispatchService> _logger; | ||||
|  | ||||
|     public PlannerQueueDispatchService( | ||||
|         IImpactShardPlanner shardPlanner, | ||||
|         ISchedulerRunnerQueue runnerQueue, | ||||
|         SchedulerWorkerOptions options, | ||||
|         ILogger<PlannerQueueDispatchService> logger) | ||||
|     { | ||||
|         _shardPlanner = shardPlanner ?? throw new ArgumentNullException(nameof(shardPlanner)); | ||||
|         _runnerQueue = runnerQueue ?? throw new ArgumentNullException(nameof(runnerQueue)); | ||||
|         _options = options ?? throw new ArgumentNullException(nameof(options)); | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|     } | ||||
|  | ||||
|     public async Task<PlannerQueueDispatchResult> DispatchAsync( | ||||
|         PlannerQueueMessage message, | ||||
|         CancellationToken cancellationToken = default) | ||||
|     { | ||||
|         ArgumentNullException.ThrowIfNull(message); | ||||
|         cancellationToken.ThrowIfCancellationRequested(); | ||||
|  | ||||
|         var run = message.Run; | ||||
|         if (run is null) | ||||
|         { | ||||
|             throw new InvalidOperationException("Planner queue message did not include a run payload."); | ||||
|         } | ||||
|  | ||||
|         var impactSet = message.ImpactSet ?? throw new InvalidOperationException("Planner queue message did not include an impact set."); | ||||
|         if (impactSet.Images.Length == 0) | ||||
|         { | ||||
|             _logger.LogDebug("Skipping dispatch for run {RunId} because impact set is empty.", run.Id); | ||||
|             return PlannerQueueDispatchResult.NoWork(run.Id); | ||||
|         } | ||||
|  | ||||
|         var schedule = message.Schedule; | ||||
|         var limits = schedule?.Limits ?? ScheduleLimits.Default; | ||||
|         var shards = _shardPlanner.PlanShards(impactSet, limits.MaxJobs, limits.Parallelism); | ||||
|         if (shards.Length == 0) | ||||
|         { | ||||
|             _logger.LogDebug( | ||||
|                 "Planner dispatch produced no shards for run {RunId}; maxJobs={MaxJobs} parallelism={Parallelism}.", | ||||
|                 run.Id, | ||||
|                 limits.MaxJobs, | ||||
|                 limits.Parallelism); | ||||
|             return PlannerQueueDispatchResult.NoWork(run.Id); | ||||
|         } | ||||
|  | ||||
|         var shardCount = shards.Length; | ||||
|         var enqueueTasks = new List<Task>(shardCount); | ||||
|         var attributes = BuildCommonAttributes(run, impactSet, shardCount, schedule); | ||||
|  | ||||
|         foreach (var shard in shards) | ||||
|         { | ||||
|             var segmentId = $"{run.Id}:{shard.Index:D4}"; | ||||
|             var digests = shard.Images.Select(static image => image.ImageDigest).ToArray(); | ||||
|             if (digests.Length == 0) | ||||
|             { | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             var segmentAttributes = MergeAttributes(attributes, shard, schedule); | ||||
|             var runnerMessage = new RunnerSegmentQueueMessage( | ||||
|                 segmentId, | ||||
|                 run.Id, | ||||
|                 run.TenantId, | ||||
|                 digests, | ||||
|                 run.ScheduleId, | ||||
|                 limits.RatePerSecond, | ||||
|                 impactSet.UsageOnly, | ||||
|                 segmentAttributes, | ||||
|                 message.CorrelationId); | ||||
|  | ||||
|             enqueueTasks.Add(_runnerQueue.EnqueueAsync(runnerMessage, cancellationToken).AsTask()); | ||||
|         } | ||||
|  | ||||
|         if (enqueueTasks.Count == 0) | ||||
|         { | ||||
|             _logger.LogWarning("No runner segments were generated for run {RunId} despite non-empty impact set.", run.Id); | ||||
|             return PlannerQueueDispatchResult.NoWork(run.Id); | ||||
|         } | ||||
|  | ||||
|         await Task.WhenAll(enqueueTasks).ConfigureAwait(false); | ||||
|         _logger.LogInformation( | ||||
|             "Run {RunId} dispatched {SegmentCount} runner segments covering {ImageCount} images.", | ||||
|             run.Id, | ||||
|             enqueueTasks.Count, | ||||
|             impactSet.Images.Length); | ||||
|  | ||||
|         return PlannerQueueDispatchResult.Success(run.Id, enqueueTasks.Count, impactSet.Images.Length); | ||||
|     } | ||||
|  | ||||
|     private static IReadOnlyDictionary<string, string> BuildCommonAttributes( | ||||
|         Run run, | ||||
|         ImpactSet impactSet, | ||||
|         int shardCount, | ||||
|         Schedule? schedule) | ||||
|     { | ||||
|         var map = new Dictionary<string, string>(StringComparer.Ordinal) | ||||
|         { | ||||
|             ["runId"] = run.Id, | ||||
|             ["tenantId"] = run.TenantId, | ||||
|             ["usageOnly"] = impactSet.UsageOnly ? "true" : "false", | ||||
|             ["shardCount"] = shardCount.ToString(), | ||||
|             ["totalImages"] = impactSet.Images.Length.ToString() | ||||
|         }; | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(impactSet.SnapshotId)) | ||||
|         { | ||||
|             map["impactSnapshotId"] = impactSet.SnapshotId!; | ||||
|         } | ||||
|  | ||||
|         if (impactSet.GeneratedAt != default) | ||||
|         { | ||||
|             map["impactGeneratedAt"] = impactSet.GeneratedAt.UtcDateTime.ToString("O"); | ||||
|         } | ||||
|  | ||||
|         if (!string.IsNullOrWhiteSpace(run.ScheduleId)) | ||||
|         { | ||||
|             map["scheduleId"] = run.ScheduleId!; | ||||
|         } | ||||
|  | ||||
|         if (schedule is not null) | ||||
|         { | ||||
|             map["scheduleMode"] = schedule.Mode.ToString(); | ||||
|             if (!string.IsNullOrWhiteSpace(schedule.Name)) | ||||
|             { | ||||
|                 map["scheduleName"] = schedule.Name; | ||||
|             } | ||||
|  | ||||
|             if (schedule.Limits.RatePerSecond is { } rate && rate > 0) | ||||
|             { | ||||
|                 map["ratePerSecond"] = rate.ToString(); | ||||
|             } | ||||
|  | ||||
|             if (schedule.Limits.Parallelism is { } parallelism && parallelism > 0) | ||||
|             { | ||||
|                 map["parallelism"] = parallelism.ToString(); | ||||
|             } | ||||
|  | ||||
|             if (schedule.Limits.MaxJobs is { } maxJobs && maxJobs > 0) | ||||
|             { | ||||
|                 map["maxJobs"] = maxJobs.ToString(); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return map; | ||||
|     } | ||||
|  | ||||
|     private static IReadOnlyDictionary<string, string> MergeAttributes( | ||||
|         IReadOnlyDictionary<string, string> common, | ||||
|         ImpactShard shard, | ||||
|         Schedule? schedule) | ||||
|     { | ||||
|         if (shard.Images.Length == 0) | ||||
|         { | ||||
|             return common; | ||||
|         } | ||||
|  | ||||
|         var map = new Dictionary<string, string>(common, StringComparer.Ordinal) | ||||
|         { | ||||
|             ["shardIndex"] = shard.Index.ToString(), | ||||
|             ["shardSize"] = shard.Images.Length.ToString() | ||||
|         }; | ||||
|  | ||||
|         if (schedule?.Mode == ScheduleMode.ContentRefresh) | ||||
|         { | ||||
|             var entrypointCount = shard.Images.Count(static image => image.UsedByEntrypoint); | ||||
|             map["entrypointCount"] = entrypointCount.ToString(); | ||||
|         } | ||||
|  | ||||
|         return map; | ||||
|     } | ||||
| } | ||||
|  | ||||
| public readonly record struct PlannerQueueDispatchResult( | ||||
|     string RunId, | ||||
|     PlannerQueueDispatchStatus Status, | ||||
|     int SegmentCount, | ||||
|     int ImageCount) | ||||
| { | ||||
|     public static PlannerQueueDispatchResult Success(string runId, int segmentCount, int imageCount) | ||||
|         => new(runId, PlannerQueueDispatchStatus.DispatchCompleted, segmentCount, imageCount); | ||||
|  | ||||
|     public static PlannerQueueDispatchResult NoWork(string runId) | ||||
|         => new(runId, PlannerQueueDispatchStatus.NoWork, 0, 0); | ||||
| } | ||||
|  | ||||
| public enum PlannerQueueDispatchStatus | ||||
| { | ||||
|     NoWork, | ||||
|     DispatchCompleted | ||||
| } | ||||
| @@ -0,0 +1,145 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Threading; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.Extensions.Hosting; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using StellaOps.Scheduler.Queue; | ||||
| using StellaOps.Scheduler.Worker.Options; | ||||
|  | ||||
| namespace StellaOps.Scheduler.Worker.Planning; | ||||
|  | ||||
| internal sealed class PlannerQueueDispatcherBackgroundService : BackgroundService | ||||
| { | ||||
|     private readonly ISchedulerPlannerQueue _plannerQueue; | ||||
|     private readonly IPlannerQueueDispatchService _dispatchService; | ||||
|     private readonly SchedulerWorkerOptions _options; | ||||
|     private readonly ILogger<PlannerQueueDispatcherBackgroundService> _logger; | ||||
|  | ||||
|     public PlannerQueueDispatcherBackgroundService( | ||||
|         ISchedulerPlannerQueue plannerQueue, | ||||
|         IPlannerQueueDispatchService dispatchService, | ||||
|         SchedulerWorkerOptions options, | ||||
|         ILogger<PlannerQueueDispatcherBackgroundService> logger) | ||||
|     { | ||||
|         _plannerQueue = plannerQueue ?? throw new ArgumentNullException(nameof(plannerQueue)); | ||||
|         _dispatchService = dispatchService ?? throw new ArgumentNullException(nameof(dispatchService)); | ||||
|         _options = options ?? throw new ArgumentNullException(nameof(options)); | ||||
|         _logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||||
|     } | ||||
|  | ||||
|     protected override async Task ExecuteAsync(CancellationToken stoppingToken) | ||||
|     { | ||||
|         var dispatchOptions = _options.Runner.Dispatch; | ||||
|         var consumer = dispatchOptions.ConsumerName; | ||||
|         var leaseRequest = new SchedulerQueueLeaseRequest(consumer, dispatchOptions.BatchSize, dispatchOptions.LeaseDuration); | ||||
|  | ||||
|         _logger.LogInformation("Planner dispatcher loop started with consumer {Consumer}.", consumer); | ||||
|  | ||||
|         while (!stoppingToken.IsCancellationRequested) | ||||
|         { | ||||
|             IReadOnlyList<ISchedulerQueueLease<PlannerQueueMessage>> leases; | ||||
|             try | ||||
|             { | ||||
|                 leases = await _plannerQueue.LeaseAsync(leaseRequest, stoppingToken).ConfigureAwait(false); | ||||
|             } | ||||
|             catch (OperationCanceledException) | ||||
|             { | ||||
|                 break; | ||||
|             } | ||||
|             catch (Exception ex) | ||||
|             { | ||||
|                 _logger.LogError(ex, "Planner dispatcher failed to lease messages; backing off."); | ||||
|                 await DelayAsync(dispatchOptions.IdleDelay, stoppingToken).ConfigureAwait(false); | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             if (leases.Count == 0) | ||||
|             { | ||||
|                 await DelayAsync(dispatchOptions.IdleDelay, stoppingToken).ConfigureAwait(false); | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             foreach (var lease in leases) | ||||
|             { | ||||
|                 await ProcessLeaseAsync(lease, dispatchOptions.LeaseDuration, stoppingToken).ConfigureAwait(false); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         _logger.LogInformation("Planner dispatcher loop stopping."); | ||||
|     } | ||||
|  | ||||
|     private async Task ProcessLeaseAsync( | ||||
|         ISchedulerQueueLease<PlannerQueueMessage> lease, | ||||
|         TimeSpan leaseDuration, | ||||
|         CancellationToken cancellationToken) | ||||
|     { | ||||
|         try | ||||
|         { | ||||
|             var result = await _dispatchService.DispatchAsync(lease.Message, cancellationToken).ConfigureAwait(false); | ||||
|             await lease.AcknowledgeAsync(cancellationToken).ConfigureAwait(false); | ||||
|  | ||||
|             _logger.LogDebug( | ||||
|                 "Dispatched planner message {MessageId} for run {RunId}; status={Status} segments={Segments} images={Images}.", | ||||
|                 lease.MessageId, | ||||
|                 result.RunId, | ||||
|                 result.Status, | ||||
|                 result.SegmentCount, | ||||
|                 result.ImageCount); | ||||
|         } | ||||
|         catch (OperationCanceledException) | ||||
|         { | ||||
|             throw; | ||||
|         } | ||||
|         catch (Exception ex) | ||||
|         { | ||||
|             _logger.LogError( | ||||
|                 ex, | ||||
|                 "Planner dispatch failed for message {MessageId} (run {RunId}); releasing for retry.", | ||||
|                 lease.MessageId, | ||||
|                 lease.RunId); | ||||
|  | ||||
|             try | ||||
|             { | ||||
|                 await lease.ReleaseAsync(SchedulerQueueReleaseDisposition.Retry, cancellationToken).ConfigureAwait(false); | ||||
|             } | ||||
|             catch (Exception releaseEx) when (releaseEx is not OperationCanceledException) | ||||
|             { | ||||
|                 _logger.LogWarning( | ||||
|                     releaseEx, | ||||
|                     "Failed to release planner message {MessageId}; attempting lease renewal.", | ||||
|                     lease.MessageId); | ||||
|  | ||||
|                 try | ||||
|                 { | ||||
|                     await lease.RenewAsync(leaseDuration, cancellationToken).ConfigureAwait(false); | ||||
|                 } | ||||
|                 catch (Exception renewEx) when (renewEx is not OperationCanceledException) | ||||
|                 { | ||||
|                     _logger.LogError( | ||||
|                         renewEx, | ||||
|                         "Lease renewal also failed for planner message {MessageId}; acknowledging to avoid tight loop.", | ||||
|                         lease.MessageId); | ||||
|  | ||||
|                     await lease.AcknowledgeAsync(cancellationToken).ConfigureAwait(false); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static async Task DelayAsync(TimeSpan delay, CancellationToken cancellationToken) | ||||
|     { | ||||
|         if (delay <= TimeSpan.Zero) | ||||
|         { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         try | ||||
|         { | ||||
|             await Task.Delay(delay, cancellationToken).ConfigureAwait(false); | ||||
|         } | ||||
|         catch (OperationCanceledException) | ||||
|         { | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -9,7 +9,10 @@ | ||||
|     <ProjectReference Include="../StellaOps.Scheduler.Models/StellaOps.Scheduler.Models.csproj" /> | ||||
|     <ProjectReference Include="../StellaOps.Scheduler.Storage.Mongo/StellaOps.Scheduler.Storage.Mongo.csproj" /> | ||||
|     <ProjectReference Include="../StellaOps.Scheduler.Queue/StellaOps.Scheduler.Queue.csproj" /> | ||||
|     <ProjectReference Include="../StellaOps.Notify.Models/StellaOps.Notify.Models.csproj" /> | ||||
|     <ProjectReference Include="../StellaOps.Notify.Queue/StellaOps.Notify.Queue.csproj" /> | ||||
|     <PackageReference Include="Cronos" Version="0.10.0" /> | ||||
|     <PackageReference Include="System.Threading.RateLimiting" Version="8.0.0" /> | ||||
|     <PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0-rc.2.25502.107" /> | ||||
|   </ItemGroup> | ||||
| </Project> | ||||
|   | ||||
| @@ -4,13 +4,15 @@ | ||||
| |----|--------|----------|------------|-------------|---------------| | ||||
| | SCHED-WORKER-16-201 | DOING (2025-10-27) | Scheduler Worker Guild | SCHED-QUEUE-16-401 | Planner loop (cron + event triggers) with lease management, fairness, and rate limiting (§6). | Planner integration tests cover cron/event triggers; rate limits enforced; logs include run IDs. | | ||||
| | SCHED-WORKER-16-202 | DONE (2025-10-27) | Scheduler Worker Guild | SCHED-IMPACT-16-301 | Wire ImpactIndex targeting (ResolveByPurls/vulns), dedupe, shard planning. | Targeting tests confirm correct image selection; dedupe documented; shards evenly distributed. | | ||||
| | SCHED-WORKER-16-203 | TODO | Scheduler Worker Guild | SCHED-WORKER-16-202 | Runner execution: call Scanner `/reports` (analysis-only) or `/scans` when configured; collect deltas; handle retries. | Runner tests stub Scanner; retries/backoff validated; deltas aggregated deterministically. | | ||||
| | SCHED-WORKER-16-204 | TODO | Scheduler Worker Guild | SCHED-WORKER-16-203 | Emit events (`scheduler.rescan.delta`, `scanner.report.ready`) for Notify/UI with summaries. | Events published to queue; payload schema documented; integration tests verify consumption. | | ||||
| | SCHED-WORKER-16-205 | TODO | Scheduler Worker Guild | SCHED-WORKER-16-201 | Metrics/telemetry: run stats, queue depth, planner latency, delta counts. | Metrics exported per spec; dashboards updated; alerts configured. | | ||||
| | SCHED-WORKER-16-203 | DONE (2025-10-27) | Scheduler Worker Guild | SCHED-WORKER-16-202 | Runner execution: call Scanner `/reports` (analysis-only) or `/scans` when configured; collect deltas; handle retries. | Runner tests stub Scanner; retries/backoff validated; deltas aggregated deterministically. | | ||||
| | SCHED-WORKER-16-204 | DONE (2025-10-27) | Scheduler Worker Guild | SCHED-WORKER-16-203 | Emit events (`scheduler.rescan.delta`, `scanner.report.ready`) for Notify/UI with summaries. | Events published to queue; payload schema documented; integration tests verify consumption. | | ||||
| | SCHED-WORKER-16-205 | DONE (2025-10-27) | Scheduler Worker Guild | SCHED-WORKER-16-201 | Metrics/telemetry: run stats, queue depth, planner latency, delta counts. | Metrics exported per spec; dashboards updated; alerts configured. | | ||||
|  | ||||
| > 2025-10-27: Impact targeting sanitizes selector-constrained results, dedupes digests, and documents shard planning in `docs/SCHED-WORKER-16-202-IMPACT-TARGETING.md`. | ||||
|  | ||||
| > 2025-10-27: Planner loop processes Planning runs via PlannerExecutionService; documented in docs/SCHED-WORKER-16-201-PLANNER.md. | ||||
|  | ||||
| > 2025-10-27: Runner dispatcher + execution service documented in docs/SCHED-WORKER-16-203-RUNNER.md; queue pipeline now drives scanner invocations, aggregates deltas back into run stats, and `AddSchedulerWorker` wires the background services into the host. | ||||
| ## Policy Engine v2 (Sprint 20) | ||||
|  | ||||
| | ID | Status | Owner(s) | Depends on | Description | Exit Criteria | | ||||
|   | ||||
| @@ -0,0 +1,54 @@ | ||||
| # SCHED-WORKER-16-203 — Runner Execution Pipeline | ||||
|  | ||||
| _Sprint 16 · Scheduler Worker Guild_ | ||||
|  | ||||
| This increment brings the scheduler runner online. The worker now consumes the | ||||
| planner queue, shards impact sets into deterministic runner segments, executes | ||||
| them against Scanner, and aggregates deltas back into run state. | ||||
|  | ||||
| ## Planner queue dispatch | ||||
|  | ||||
| `PlannerQueueDispatchService` consumes `PlannerQueueMessage` payloads and uses | ||||
| `ImpactShardPlanner` to slice the associated `ImpactSet`. Each shard yields a | ||||
| stable `RunnerSegmentQueueMessage`: | ||||
|  | ||||
| - `segmentId` is `{runId}:{shardIndex:D4}` for idempotency. | ||||
| - Attributes include schedule mode, limits, shard size, and usage hints. | ||||
| - `RatePerSecond` carries through schedule limits so execution can pace calls. | ||||
|  | ||||
| `PlannerQueueDispatcherBackgroundService` leases planner queue messages, invokes | ||||
| the dispatch service, and releases messages on failure with retry semantics. | ||||
|  | ||||
| ## Runner execution | ||||
|  | ||||
| `RunnerBackgroundService` leases runner segments and hands them to | ||||
| `RunnerExecutionService`. The execution service: | ||||
|  | ||||
| 1. Loads the target `Run` (marking `StartedAt` when first segment processes). | ||||
| 2. Calls `IScannerReportClient` (`HttpScannerReportClient`) for each digest | ||||
|    according to schedule mode (`analysis-only` vs. `content-refresh`) and usage | ||||
|    flag. A light retry/backoff loop shields transient failures. | ||||
| 3. Aggregates output into `DeltaSummary` records, updating cumulative stats | ||||
|    (`Completed`, `Deltas`, severity counters) in a deterministic manner. | ||||
| 4. Persists the updated run and projects schedule summaries when the update | ||||
|    succeeds. | ||||
| 5. Signals completion when cumulative `Completed >= Queued`. | ||||
|  | ||||
| Segment processing is idempotent—the same segment will re-create the same delta | ||||
| summaries and stat deltas. Failures bubble so the queue retry policy can apply | ||||
| exponential backoff. | ||||
|  | ||||
| ## Tests | ||||
|  | ||||
| - `PlannerQueueDispatchServiceTests` verify shard sizing, attribute emission, and | ||||
|   deterministic `segmentId` construction with schedule limits. | ||||
| - `RunnerExecutionServiceTests` cover stat aggregation, delta persistence, and | ||||
|   missing-run handling. Scanner interactions are stubbed via `IScannerReportClient`. | ||||
|  | ||||
| ## Follow-ups | ||||
|  | ||||
| - `AddSchedulerWorker(configuration)` registers impact targeting, planner | ||||
|   dispatch, runner execution, and the three hosted services. Call it after | ||||
|   `AddSchedulerQueues` and `AddSchedulerMongoStorage` when bootstrapping the | ||||
|   worker host. | ||||
| - Extend execution metrics (Sprint 16-205) before exposing Prometheus counters. | ||||
| @@ -0,0 +1,36 @@ | ||||
| # SCHED-WORKER-16-204 — Platform Events | ||||
|  | ||||
| _Sprint 16 · Scheduler Worker Guild_ | ||||
|  | ||||
| The runner now emits canonical platform events so Notify/UI can surface | ||||
| rescan activity in near real time. | ||||
|  | ||||
| ## Event emission | ||||
|  | ||||
| - `scheduler.rescan.delta@1` — published once per runner segment when that | ||||
|   segment produced at least one meaningful delta (new critical/high findings or | ||||
|   KEV hits). Payload batches all impacted digests for the segment and includes | ||||
|   severity totals. Reason strings (manual trigger, Feedser/Vexer exports) flow | ||||
|   from the run reason when present. | ||||
| - `scanner.report.ready@1` — published for every image the runner processes. | ||||
|   The payload mirrors the Scanner contract (verdict, summary buckets, DSSE | ||||
|   envelope) and surfaces delta counts/links when available. Scope information | ||||
|   is derived from the impact snapshot so notify rules can match on registry and | ||||
|   repository. | ||||
|  | ||||
| Events are formatted using `NotifyEvent` envelopes and published via the | ||||
| configured Notify queue transport. When Notify is not configured the worker | ||||
| logs once and suppresses event emission. | ||||
|  | ||||
| ## Payload references | ||||
|  | ||||
| - Schema source: `docs/events/scheduler.rescan.delta@1.json` | ||||
| - Schema source: `docs/events/scanner.report.ready@1.json` | ||||
| - Sample payloads: `docs/events/samples/*.sample.json` | ||||
|  | ||||
| ## Metrics tie-in | ||||
|  | ||||
| Event emission complements the new observability counters introduced in | ||||
| `SCHED-WORKER-16-205` (runner segment totals, delta counts, backlog gauge) so | ||||
| that operators can correlate queue depth with downstream notifications. | ||||
|  | ||||
| @@ -0,0 +1,43 @@ | ||||
| # SCHED-WORKER-16-205 — Scheduler Worker Observability | ||||
|  | ||||
| _Sprint 16 · Scheduler Worker Guild_ | ||||
|  | ||||
| The scheduler worker now exposes first-class metrics covering planner latency, | ||||
| runner throughput, and backlog health. | ||||
|  | ||||
| ## Meter: `StellaOps.Scheduler.Worker` | ||||
|  | ||||
| | Metric | Type | Tags | Description | | ||||
| | --- | --- | --- | --- | | ||||
| | `scheduler_planner_runs_total` | Counter | `mode`, `status` | Planner outcomes (`enqueued`, `no_work`, `failed`). | | ||||
| | `scheduler_planner_latency_seconds` | Histogram | `mode`, `status` | Time between run creation and planner completion. | | ||||
| | `scheduler_runner_segments_total` | Counter | `mode`, `status` | Runner segments processed (`Completed`, `persist_failed`, `RunMissing`). | | ||||
| | `scheduler_runner_images_total` | Counter | `mode`, `delta` | Images processed per mode, split by whether a delta was observed. | | ||||
| | `scheduler_runner_delta_total` | Counter | `mode` | Total new findings observed. | | ||||
| | `scheduler_runner_delta_critical_total` | Counter | `mode` | Critical findings observed. | | ||||
| | `scheduler_runner_delta_high_total` | Counter | `mode` | High findings observed. | | ||||
| | `scheduler_runner_delta_kev_total` | Counter | `mode` | KEV hits surfaced across runner segments. | | ||||
| | `scheduler_run_duration_seconds` | Histogram | `mode`, `result` | End-to-end run durations (currently recorded for successful completions). | | ||||
| | `scheduler_runs_active` | Up/down counter | `mode` | Active runs in-flight. | | ||||
| | `scheduler_runner_backlog` | Observable gauge | `mode`, `scheduleId` | Remaining images awaiting runner processing per schedule. | | ||||
|  | ||||
| ## Instrumentation notes | ||||
|  | ||||
| - Planner records latency once a run transitions out of `Planning`. `no_work` | ||||
|   completions emit zero-duration runs without incrementing the active counter. | ||||
| - Runner updates backlog after every segment and decrements the active counter | ||||
|   when a run reaches `Completed`. | ||||
| - Delta counters aggregate per severity and KEV hit; they only increment when | ||||
|   `DeltaSummary` reports meaningful changes. | ||||
| - Metrics are emitted regardless of Notify availability so operators can track | ||||
|   queue pressure even in air-gapped deployments. | ||||
|  | ||||
| ## Dashboards & alerts | ||||
|  | ||||
| - **Grafana dashboard:** `docs/ops/scheduler-worker-grafana-dashboard.json` | ||||
|   (import into Prometheus-backed Grafana). Panels mirror the metrics above with | ||||
|   mode filters. | ||||
| - **Prometheus rules:** `docs/ops/scheduler-worker-prometheus-rules.yaml` | ||||
|   provides planner failure/latency, backlog, and stuck-run alerts. | ||||
| - **Operations guide:** see `docs/ops/scheduler-worker-operations.md` for | ||||
|   runbook steps, alert context, and dashboard wiring instructions. | ||||
		Reference in New Issue
	
	Block a user