up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Determinism;
|
||||
|
||||
@@ -19,30 +18,7 @@ public sealed record DeterminismReport(
|
||||
double ThresholdOverall,
|
||||
double ThresholdImage,
|
||||
IReadOnlyList<DeterminismImageReport> Images)
|
||||
{
|
||||
public static DeterminismReport FromHarness(Harness.DeterminismReport harnessReport,
|
||||
string release,
|
||||
string platform,
|
||||
string? policySha = null,
|
||||
string? feedsSha = null,
|
||||
string? scannerSha = null,
|
||||
string version = "1")
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(harnessReport);
|
||||
|
||||
return new DeterminismReport(
|
||||
Version: version,
|
||||
Release: release,
|
||||
Platform: platform,
|
||||
PolicySha: policySha,
|
||||
FeedsSha: feedsSha,
|
||||
ScannerSha: scannerSha,
|
||||
OverallScore: harnessReport.OverallScore,
|
||||
ThresholdOverall: harnessReport.OverallThreshold,
|
||||
ThresholdImage: harnessReport.ImageThreshold,
|
||||
Images: harnessReport.Images.Select(DeterminismImageReport.FromHarness).ToList());
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
public sealed record DeterminismImageReport(
|
||||
string Image,
|
||||
@@ -50,30 +26,9 @@ public sealed record DeterminismImageReport(
|
||||
int Identical,
|
||||
double Score,
|
||||
IReadOnlyDictionary<string, string> ArtifactHashes,
|
||||
IReadOnlyList<DeterminismRunReport> RunsDetail)
|
||||
{
|
||||
public static DeterminismImageReport FromHarness(Harness.DeterminismImageReport report)
|
||||
{
|
||||
return new DeterminismImageReport(
|
||||
Image: report.ImageDigest,
|
||||
Runs: report.Runs,
|
||||
Identical: report.Identical,
|
||||
Score: report.Score,
|
||||
ArtifactHashes: report.BaselineHashes,
|
||||
RunsDetail: report.RunReports.Select(DeterminismRunReport.FromHarness).ToList());
|
||||
}
|
||||
}
|
||||
IReadOnlyList<DeterminismRunReport> RunsDetail);
|
||||
|
||||
public sealed record DeterminismRunReport(
|
||||
int RunIndex,
|
||||
IReadOnlyDictionary<string, string> ArtifactHashes,
|
||||
IReadOnlyList<string> NonDeterministic)
|
||||
{
|
||||
public static DeterminismRunReport FromHarness(Harness.DeterminismRunReport report)
|
||||
{
|
||||
return new DeterminismRunReport(
|
||||
RunIndex: report.RunIndex,
|
||||
ArtifactHashes: report.ArtifactHashes,
|
||||
NonDeterministic: report.NonDeterministicArtifacts);
|
||||
}
|
||||
}
|
||||
IReadOnlyList<string> NonDeterministic);
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
public static class ScannerWorkerInstrumentation
|
||||
{
|
||||
public const string ActivitySourceName = "StellaOps.Scanner.Worker.Job";
|
||||
|
||||
public const string MeterName = "StellaOps.Scanner.Worker";
|
||||
|
||||
public static ActivitySource ActivitySource { get; } = new(ActivitySourceName);
|
||||
|
||||
public static Meter Meter { get; } = new(MeterName, version: "1.0.0");
|
||||
}
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
public static class ScannerWorkerInstrumentation
|
||||
{
|
||||
public const string ActivitySourceName = "StellaOps.Scanner.Worker.Job";
|
||||
|
||||
public const string MeterName = "StellaOps.Scanner.Worker";
|
||||
|
||||
public static ActivitySource ActivitySource { get; } = new(ActivitySourceName);
|
||||
|
||||
public static Meter Meter { get; } = new(MeterName, version: "1.0.0");
|
||||
}
|
||||
|
||||
@@ -3,18 +3,20 @@ using System.Collections.Generic;
|
||||
using System.Diagnostics.Metrics;
|
||||
using StellaOps.Scanner.Surface.Secrets;
|
||||
using StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
public sealed class ScannerWorkerMetrics
|
||||
{
|
||||
private readonly Histogram<double> _queueLatencyMs;
|
||||
private readonly Histogram<double> _jobDurationMs;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
public sealed class ScannerWorkerMetrics
|
||||
{
|
||||
private readonly Histogram<double> _queueLatencyMs;
|
||||
private readonly Histogram<double> _jobDurationMs;
|
||||
private readonly Histogram<double> _stageDurationMs;
|
||||
private readonly Counter<long> _jobsCompleted;
|
||||
private readonly Counter<long> _jobsFailed;
|
||||
private readonly Counter<long> _languageCacheHits;
|
||||
private readonly Counter<long> _languageCacheMisses;
|
||||
private readonly Counter<long> _osCacheHits;
|
||||
private readonly Counter<long> _osCacheMisses;
|
||||
private readonly Counter<long> _registrySecretRequests;
|
||||
private readonly Histogram<double> _registrySecretTtlSeconds;
|
||||
private readonly Counter<long> _surfaceManifestsPublished;
|
||||
@@ -22,21 +24,21 @@ public sealed class ScannerWorkerMetrics
|
||||
private readonly Counter<long> _surfaceManifestFailures;
|
||||
private readonly Counter<long> _surfacePayloadPersisted;
|
||||
private readonly Histogram<double> _surfaceManifestPublishDurationMs;
|
||||
|
||||
public ScannerWorkerMetrics()
|
||||
{
|
||||
_queueLatencyMs = ScannerWorkerInstrumentation.Meter.CreateHistogram<double>(
|
||||
"scanner_worker_queue_latency_ms",
|
||||
unit: "ms",
|
||||
description: "Time from job enqueue to lease acquisition.");
|
||||
_jobDurationMs = ScannerWorkerInstrumentation.Meter.CreateHistogram<double>(
|
||||
"scanner_worker_job_duration_ms",
|
||||
unit: "ms",
|
||||
description: "Total processing duration per job.");
|
||||
_stageDurationMs = ScannerWorkerInstrumentation.Meter.CreateHistogram<double>(
|
||||
"scanner_worker_stage_duration_ms",
|
||||
unit: "ms",
|
||||
description: "Stage execution duration per job.");
|
||||
|
||||
public ScannerWorkerMetrics()
|
||||
{
|
||||
_queueLatencyMs = ScannerWorkerInstrumentation.Meter.CreateHistogram<double>(
|
||||
"scanner_worker_queue_latency_ms",
|
||||
unit: "ms",
|
||||
description: "Time from job enqueue to lease acquisition.");
|
||||
_jobDurationMs = ScannerWorkerInstrumentation.Meter.CreateHistogram<double>(
|
||||
"scanner_worker_job_duration_ms",
|
||||
unit: "ms",
|
||||
description: "Total processing duration per job.");
|
||||
_stageDurationMs = ScannerWorkerInstrumentation.Meter.CreateHistogram<double>(
|
||||
"scanner_worker_stage_duration_ms",
|
||||
unit: "ms",
|
||||
description: "Stage execution duration per job.");
|
||||
_jobsCompleted = ScannerWorkerInstrumentation.Meter.CreateCounter<long>(
|
||||
"scanner_worker_jobs_completed_total",
|
||||
description: "Number of successfully completed scan jobs.");
|
||||
@@ -49,6 +51,12 @@ public sealed class ScannerWorkerMetrics
|
||||
_languageCacheMisses = ScannerWorkerInstrumentation.Meter.CreateCounter<long>(
|
||||
"scanner_worker_language_cache_misses_total",
|
||||
description: "Number of language analyzer cache misses encountered by the worker.");
|
||||
_osCacheHits = ScannerWorkerInstrumentation.Meter.CreateCounter<long>(
|
||||
"scanner_worker_os_cache_hits_total",
|
||||
description: "Number of OS analyzer cache hits encountered by the worker.");
|
||||
_osCacheMisses = ScannerWorkerInstrumentation.Meter.CreateCounter<long>(
|
||||
"scanner_worker_os_cache_misses_total",
|
||||
description: "Number of OS analyzer cache misses encountered by the worker.");
|
||||
_registrySecretRequests = ScannerWorkerInstrumentation.Meter.CreateCounter<long>(
|
||||
"scanner_worker_registry_secret_requests_total",
|
||||
description: "Number of registry secret resolution attempts performed by the worker.");
|
||||
@@ -72,28 +80,28 @@ public sealed class ScannerWorkerMetrics
|
||||
"scanner_worker_surface_manifest_publish_duration_ms",
|
||||
unit: "ms",
|
||||
description: "Duration in milliseconds to persist and publish surface manifests.");
|
||||
}
|
||||
|
||||
public void RecordQueueLatency(ScanJobContext context, TimeSpan latency)
|
||||
{
|
||||
if (latency <= TimeSpan.Zero)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_queueLatencyMs.Record(latency.TotalMilliseconds, CreateTags(context));
|
||||
}
|
||||
|
||||
public void RecordJobDuration(ScanJobContext context, TimeSpan duration)
|
||||
{
|
||||
if (duration <= TimeSpan.Zero)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_jobDurationMs.Record(duration.TotalMilliseconds, CreateTags(context));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void RecordQueueLatency(ScanJobContext context, TimeSpan latency)
|
||||
{
|
||||
if (latency <= TimeSpan.Zero)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_queueLatencyMs.Record(latency.TotalMilliseconds, CreateTags(context));
|
||||
}
|
||||
|
||||
public void RecordJobDuration(ScanJobContext context, TimeSpan duration)
|
||||
{
|
||||
if (duration <= TimeSpan.Zero)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_jobDurationMs.Record(duration.TotalMilliseconds, CreateTags(context));
|
||||
}
|
||||
|
||||
public void RecordStageDuration(ScanJobContext context, string stage, TimeSpan duration)
|
||||
{
|
||||
if (duration <= TimeSpan.Zero)
|
||||
@@ -103,12 +111,12 @@ public sealed class ScannerWorkerMetrics
|
||||
|
||||
_stageDurationMs.Record(duration.TotalMilliseconds, CreateTags(context, stage: stage));
|
||||
}
|
||||
|
||||
public void IncrementJobCompleted(ScanJobContext context)
|
||||
{
|
||||
_jobsCompleted.Add(1, CreateTags(context));
|
||||
}
|
||||
|
||||
|
||||
public void IncrementJobCompleted(ScanJobContext context)
|
||||
{
|
||||
_jobsCompleted.Add(1, CreateTags(context));
|
||||
}
|
||||
|
||||
public void IncrementJobFailed(ScanJobContext context, string failureReason)
|
||||
{
|
||||
_jobsFailed.Add(1, CreateTags(context, failureReason: failureReason));
|
||||
@@ -124,6 +132,16 @@ public sealed class ScannerWorkerMetrics
|
||||
_languageCacheMisses.Add(1, CreateTags(context, analyzerId: analyzerId));
|
||||
}
|
||||
|
||||
public void RecordOsCacheHit(ScanJobContext context, string analyzerId)
|
||||
{
|
||||
_osCacheHits.Add(1, CreateTags(context, analyzerId: analyzerId));
|
||||
}
|
||||
|
||||
public void RecordOsCacheMiss(ScanJobContext context, string analyzerId)
|
||||
{
|
||||
_osCacheMisses.Add(1, CreateTags(context, analyzerId: analyzerId));
|
||||
}
|
||||
|
||||
public void RecordRegistrySecretResolved(
|
||||
ScanJobContext context,
|
||||
string secretName,
|
||||
@@ -253,18 +271,18 @@ public sealed class ScannerWorkerMetrics
|
||||
new("scan.id", context.ScanId),
|
||||
new("attempt", context.Lease.Attempt),
|
||||
};
|
||||
|
||||
if (context.Lease.Metadata.TryGetValue("queue", out var queueName) && !string.IsNullOrWhiteSpace(queueName))
|
||||
{
|
||||
tags.Add(new KeyValuePair<string, object?>("queue", queueName));
|
||||
}
|
||||
|
||||
if (context.Lease.Metadata.TryGetValue("job.kind", out var jobKind) && !string.IsNullOrWhiteSpace(jobKind))
|
||||
{
|
||||
tags.Add(new KeyValuePair<string, object?>("job.kind", jobKind));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(stage))
|
||||
|
||||
if (context.Lease.Metadata.TryGetValue("queue", out var queueName) && !string.IsNullOrWhiteSpace(queueName))
|
||||
{
|
||||
tags.Add(new KeyValuePair<string, object?>("queue", queueName));
|
||||
}
|
||||
|
||||
if (context.Lease.Metadata.TryGetValue("job.kind", out var jobKind) && !string.IsNullOrWhiteSpace(jobKind))
|
||||
{
|
||||
tags.Add(new KeyValuePair<string, object?>("job.kind", jobKind));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(stage))
|
||||
{
|
||||
tags.Add(new KeyValuePair<string, object?>("stage", stage));
|
||||
}
|
||||
|
||||
@@ -1,59 +1,59 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Reflection;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using OpenTelemetry.Metrics;
|
||||
using OpenTelemetry.Resources;
|
||||
using OpenTelemetry.Trace;
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
public static class TelemetryExtensions
|
||||
{
|
||||
public static void ConfigureScannerWorkerTelemetry(this IHostApplicationBuilder builder, ScannerWorkerOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
|
||||
var telemetry = options.Telemetry;
|
||||
if (!telemetry.EnableTelemetry)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var openTelemetry = builder.Services.AddOpenTelemetry();
|
||||
|
||||
openTelemetry.ConfigureResource(resource =>
|
||||
{
|
||||
var version = Assembly.GetExecutingAssembly().GetName().Version?.ToString() ?? "unknown";
|
||||
resource.AddService(telemetry.ServiceName, serviceVersion: version, serviceInstanceId: Environment.MachineName);
|
||||
resource.AddAttributes(new[]
|
||||
{
|
||||
new KeyValuePair<string, object>("deployment.environment", builder.Environment.EnvironmentName),
|
||||
});
|
||||
|
||||
foreach (var kvp in telemetry.ResourceAttributes)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(kvp.Key) || kvp.Value is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
resource.AddAttributes(new[] { new KeyValuePair<string, object>(kvp.Key, kvp.Value) });
|
||||
}
|
||||
});
|
||||
|
||||
if (telemetry.EnableTracing)
|
||||
{
|
||||
openTelemetry.WithTracing(tracing =>
|
||||
{
|
||||
tracing.AddSource(ScannerWorkerInstrumentation.ActivitySourceName);
|
||||
ConfigureExporter(tracing, telemetry);
|
||||
});
|
||||
}
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Reflection;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using OpenTelemetry.Metrics;
|
||||
using OpenTelemetry.Resources;
|
||||
using OpenTelemetry.Trace;
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
public static class TelemetryExtensions
|
||||
{
|
||||
public static void ConfigureScannerWorkerTelemetry(this IHostApplicationBuilder builder, ScannerWorkerOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
|
||||
var telemetry = options.Telemetry;
|
||||
if (!telemetry.EnableTelemetry)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var openTelemetry = builder.Services.AddOpenTelemetry();
|
||||
|
||||
openTelemetry.ConfigureResource(resource =>
|
||||
{
|
||||
var version = Assembly.GetExecutingAssembly().GetName().Version?.ToString() ?? "unknown";
|
||||
resource.AddService(telemetry.ServiceName, serviceVersion: version, serviceInstanceId: Environment.MachineName);
|
||||
resource.AddAttributes(new[]
|
||||
{
|
||||
new KeyValuePair<string, object>("deployment.environment", builder.Environment.EnvironmentName),
|
||||
});
|
||||
|
||||
foreach (var kvp in telemetry.ResourceAttributes)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(kvp.Key) || kvp.Value is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
resource.AddAttributes(new[] { new KeyValuePair<string, object>(kvp.Key, kvp.Value) });
|
||||
}
|
||||
});
|
||||
|
||||
if (telemetry.EnableTracing)
|
||||
{
|
||||
openTelemetry.WithTracing(tracing =>
|
||||
{
|
||||
tracing.AddSource(ScannerWorkerInstrumentation.ActivitySourceName);
|
||||
ConfigureExporter(tracing, telemetry);
|
||||
});
|
||||
}
|
||||
|
||||
if (telemetry.EnableMetrics)
|
||||
{
|
||||
openTelemetry.WithMetrics(metrics =>
|
||||
@@ -68,38 +68,38 @@ public static class TelemetryExtensions
|
||||
|
||||
ConfigureExporter(metrics, telemetry);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private static void ConfigureExporter(TracerProviderBuilder tracing, ScannerWorkerOptions.TelemetryOptions telemetry)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
tracing.AddOtlpExporter(options =>
|
||||
{
|
||||
options.Endpoint = new Uri(telemetry.OtlpEndpoint);
|
||||
});
|
||||
}
|
||||
|
||||
if (telemetry.ExportConsole || string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
tracing.AddConsoleExporter();
|
||||
}
|
||||
}
|
||||
|
||||
private static void ConfigureExporter(MeterProviderBuilder metrics, ScannerWorkerOptions.TelemetryOptions telemetry)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
metrics.AddOtlpExporter(options =>
|
||||
{
|
||||
options.Endpoint = new Uri(telemetry.OtlpEndpoint);
|
||||
});
|
||||
}
|
||||
|
||||
if (telemetry.ExportConsole || string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
metrics.AddConsoleExporter();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ConfigureExporter(TracerProviderBuilder tracing, ScannerWorkerOptions.TelemetryOptions telemetry)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
tracing.AddOtlpExporter(options =>
|
||||
{
|
||||
options.Endpoint = new Uri(telemetry.OtlpEndpoint);
|
||||
});
|
||||
}
|
||||
|
||||
if (telemetry.ExportConsole || string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
tracing.AddConsoleExporter();
|
||||
}
|
||||
}
|
||||
|
||||
private static void ConfigureExporter(MeterProviderBuilder metrics, ScannerWorkerOptions.TelemetryOptions telemetry)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
metrics.AddOtlpExporter(options =>
|
||||
{
|
||||
options.Endpoint = new Uri(telemetry.OtlpEndpoint);
|
||||
});
|
||||
}
|
||||
|
||||
if (telemetry.ExportConsole || string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
|
||||
{
|
||||
metrics.AddConsoleExporter();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,108 +1,111 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scanner.Worker.Diagnostics;
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
using StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Hosting;
|
||||
|
||||
public sealed partial class ScannerWorkerHostedService : BackgroundService
|
||||
{
|
||||
private readonly IScanJobSource _jobSource;
|
||||
private readonly ScanJobProcessor _processor;
|
||||
private readonly LeaseHeartbeatService _heartbeatService;
|
||||
private readonly ScannerWorkerMetrics _metrics;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IOptionsMonitor<ScannerWorkerOptions> _options;
|
||||
private readonly ILogger<ScannerWorkerHostedService> _logger;
|
||||
private readonly IDelayScheduler _delayScheduler;
|
||||
|
||||
public ScannerWorkerHostedService(
|
||||
IScanJobSource jobSource,
|
||||
ScanJobProcessor processor,
|
||||
LeaseHeartbeatService heartbeatService,
|
||||
ScannerWorkerMetrics metrics,
|
||||
TimeProvider timeProvider,
|
||||
IDelayScheduler delayScheduler,
|
||||
IOptionsMonitor<ScannerWorkerOptions> options,
|
||||
ILogger<ScannerWorkerHostedService> logger)
|
||||
{
|
||||
_jobSource = jobSource ?? throw new ArgumentNullException(nameof(jobSource));
|
||||
_processor = processor ?? throw new ArgumentNullException(nameof(processor));
|
||||
_heartbeatService = heartbeatService ?? throw new ArgumentNullException(nameof(heartbeatService));
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_delayScheduler = delayScheduler ?? throw new ArgumentNullException(nameof(delayScheduler));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var runningJobs = new HashSet<Task>();
|
||||
var delayStrategy = new PollDelayStrategy(_options.CurrentValue.Polling);
|
||||
|
||||
WorkerStarted(_logger);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
runningJobs.RemoveWhere(static task => task.IsCompleted);
|
||||
|
||||
var options = _options.CurrentValue;
|
||||
if (runningJobs.Count >= options.MaxConcurrentJobs)
|
||||
{
|
||||
var completed = await Task.WhenAny(runningJobs).ConfigureAwait(false);
|
||||
runningJobs.Remove(completed);
|
||||
continue;
|
||||
}
|
||||
|
||||
IScanJobLease? lease = null;
|
||||
try
|
||||
{
|
||||
lease = await _jobSource.TryAcquireAsync(stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Scanner worker failed to acquire job lease; backing off.");
|
||||
}
|
||||
|
||||
if (lease is null)
|
||||
{
|
||||
var delay = delayStrategy.NextDelay();
|
||||
await _delayScheduler.DelayAsync(delay, stoppingToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
delayStrategy.Reset();
|
||||
runningJobs.Add(RunJobAsync(lease, stoppingToken));
|
||||
}
|
||||
|
||||
if (runningJobs.Count > 0)
|
||||
{
|
||||
await Task.WhenAll(runningJobs).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
WorkerStopping(_logger);
|
||||
}
|
||||
|
||||
private async Task RunJobAsync(IScanJobLease lease, CancellationToken stoppingToken)
|
||||
{
|
||||
var options = _options.CurrentValue;
|
||||
var jobStart = _timeProvider.GetUtcNow();
|
||||
var queueLatency = jobStart - lease.EnqueuedAtUtc;
|
||||
var jobCts = CancellationTokenSource.CreateLinkedTokenSource(stoppingToken);
|
||||
var jobToken = jobCts.Token;
|
||||
var context = new ScanJobContext(lease, _timeProvider, jobStart, jobToken);
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scanner.Worker.Diagnostics;
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
using StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Hosting;
|
||||
|
||||
public sealed partial class ScannerWorkerHostedService : BackgroundService
|
||||
{
|
||||
private readonly IScanJobSource _jobSource;
|
||||
private readonly ScanJobProcessor _processor;
|
||||
private readonly LeaseHeartbeatService _heartbeatService;
|
||||
private readonly ScannerWorkerMetrics _metrics;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly DeterministicRandomService _randomService;
|
||||
private readonly IOptionsMonitor<ScannerWorkerOptions> _options;
|
||||
private readonly ILogger<ScannerWorkerHostedService> _logger;
|
||||
private readonly IDelayScheduler _delayScheduler;
|
||||
|
||||
public ScannerWorkerHostedService(
|
||||
IScanJobSource jobSource,
|
||||
ScanJobProcessor processor,
|
||||
LeaseHeartbeatService heartbeatService,
|
||||
ScannerWorkerMetrics metrics,
|
||||
TimeProvider timeProvider,
|
||||
DeterministicRandomService randomService,
|
||||
IDelayScheduler delayScheduler,
|
||||
IOptionsMonitor<ScannerWorkerOptions> options,
|
||||
ILogger<ScannerWorkerHostedService> logger)
|
||||
{
|
||||
_jobSource = jobSource ?? throw new ArgumentNullException(nameof(jobSource));
|
||||
_processor = processor ?? throw new ArgumentNullException(nameof(processor));
|
||||
_heartbeatService = heartbeatService ?? throw new ArgumentNullException(nameof(heartbeatService));
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_randomService = randomService ?? throw new ArgumentNullException(nameof(randomService));
|
||||
_delayScheduler = delayScheduler ?? throw new ArgumentNullException(nameof(delayScheduler));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var runningJobs = new HashSet<Task>();
|
||||
var delayStrategy = new PollDelayStrategy(_options.CurrentValue.Polling, _randomService);
|
||||
|
||||
WorkerStarted(_logger);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
runningJobs.RemoveWhere(static task => task.IsCompleted);
|
||||
|
||||
var options = _options.CurrentValue;
|
||||
if (runningJobs.Count >= options.MaxConcurrentJobs)
|
||||
{
|
||||
var completed = await Task.WhenAny(runningJobs).ConfigureAwait(false);
|
||||
runningJobs.Remove(completed);
|
||||
continue;
|
||||
}
|
||||
|
||||
IScanJobLease? lease = null;
|
||||
try
|
||||
{
|
||||
lease = await _jobSource.TryAcquireAsync(stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Scanner worker failed to acquire job lease; backing off.");
|
||||
}
|
||||
|
||||
if (lease is null)
|
||||
{
|
||||
var delay = delayStrategy.NextDelay();
|
||||
await _delayScheduler.DelayAsync(delay, stoppingToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
delayStrategy.Reset();
|
||||
runningJobs.Add(RunJobAsync(lease, stoppingToken));
|
||||
}
|
||||
|
||||
if (runningJobs.Count > 0)
|
||||
{
|
||||
await Task.WhenAll(runningJobs).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
WorkerStopping(_logger);
|
||||
}
|
||||
|
||||
private async Task RunJobAsync(IScanJobLease lease, CancellationToken stoppingToken)
|
||||
{
|
||||
var options = _options.CurrentValue;
|
||||
var jobStart = _timeProvider.GetUtcNow();
|
||||
var queueLatency = jobStart - lease.EnqueuedAtUtc;
|
||||
var jobCts = CancellationTokenSource.CreateLinkedTokenSource(stoppingToken);
|
||||
var jobToken = jobCts.Token;
|
||||
var context = new ScanJobContext(lease, _timeProvider, jobStart, jobToken);
|
||||
|
||||
_metrics.RecordQueueLatency(context, queueLatency);
|
||||
JobAcquired(_logger, lease.JobId, lease.ScanId, lease.Attempt, queueLatency.TotalMilliseconds);
|
||||
|
||||
@@ -118,85 +121,85 @@ public sealed partial class ScannerWorkerHostedService : BackgroundService
|
||||
await lease.CompleteAsync(stoppingToken).ConfigureAwait(false);
|
||||
var duration = _timeProvider.GetUtcNow() - jobStart;
|
||||
_metrics.RecordJobDuration(context, duration);
|
||||
_metrics.IncrementJobCompleted(context);
|
||||
JobCompleted(_logger, lease.JobId, lease.ScanId, duration.TotalMilliseconds);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
processingException = null;
|
||||
await lease.AbandonAsync("host-stopping", CancellationToken.None).ConfigureAwait(false);
|
||||
JobAbandoned(_logger, lease.JobId, lease.ScanId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
processingException = ex;
|
||||
var duration = _timeProvider.GetUtcNow() - jobStart;
|
||||
_metrics.RecordJobDuration(context, duration);
|
||||
|
||||
var reason = ex.GetType().Name;
|
||||
var maxAttempts = options.Queue.MaxAttempts;
|
||||
if (lease.Attempt >= maxAttempts)
|
||||
{
|
||||
await lease.PoisonAsync(reason, CancellationToken.None).ConfigureAwait(false);
|
||||
_metrics.IncrementJobFailed(context, reason);
|
||||
JobPoisoned(_logger, lease.JobId, lease.ScanId, lease.Attempt, maxAttempts, ex);
|
||||
}
|
||||
else
|
||||
{
|
||||
await lease.AbandonAsync(reason, CancellationToken.None).ConfigureAwait(false);
|
||||
JobAbandonedWithError(_logger, lease.JobId, lease.ScanId, lease.Attempt, maxAttempts, ex);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
jobCts.Cancel();
|
||||
try
|
||||
{
|
||||
await heartbeatTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (processingException is null && ex is not OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Heartbeat loop ended with an exception for job {JobId}.", lease.JobId);
|
||||
}
|
||||
|
||||
await lease.DisposeAsync().ConfigureAwait(false);
|
||||
jobCts.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
[LoggerMessage(EventId = 2000, Level = LogLevel.Information, Message = "Scanner worker host started.")]
|
||||
private static partial void WorkerStarted(ILogger logger);
|
||||
|
||||
[LoggerMessage(EventId = 2001, Level = LogLevel.Information, Message = "Scanner worker host stopping.")]
|
||||
private static partial void WorkerStopping(ILogger logger);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2002,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Leased job {JobId} (scan {ScanId}) attempt {Attempt}; queue latency {LatencyMs:F0} ms.")]
|
||||
private static partial void JobAcquired(ILogger logger, string jobId, string scanId, int attempt, double latencyMs);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2003,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Job {JobId} (scan {ScanId}) completed in {DurationMs:F0} ms.")]
|
||||
private static partial void JobCompleted(ILogger logger, string jobId, string scanId, double durationMs);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2004,
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Job {JobId} (scan {ScanId}) abandoned due to host shutdown.")]
|
||||
private static partial void JobAbandoned(ILogger logger, string jobId, string scanId);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2005,
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Job {JobId} (scan {ScanId}) attempt {Attempt}/{MaxAttempts} abandoned after failure; job will be retried.")]
|
||||
private static partial void JobAbandonedWithError(ILogger logger, string jobId, string scanId, int attempt, int maxAttempts, Exception exception);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2006,
|
||||
Level = LogLevel.Error,
|
||||
Message = "Job {JobId} (scan {ScanId}) attempt {Attempt}/{MaxAttempts} exceeded retry budget; quarantining job.")]
|
||||
private static partial void JobPoisoned(ILogger logger, string jobId, string scanId, int attempt, int maxAttempts, Exception exception);
|
||||
}
|
||||
_metrics.IncrementJobCompleted(context);
|
||||
JobCompleted(_logger, lease.JobId, lease.ScanId, duration.TotalMilliseconds);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
processingException = null;
|
||||
await lease.AbandonAsync("host-stopping", CancellationToken.None).ConfigureAwait(false);
|
||||
JobAbandoned(_logger, lease.JobId, lease.ScanId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
processingException = ex;
|
||||
var duration = _timeProvider.GetUtcNow() - jobStart;
|
||||
_metrics.RecordJobDuration(context, duration);
|
||||
|
||||
var reason = ex.GetType().Name;
|
||||
var maxAttempts = options.Queue.MaxAttempts;
|
||||
if (lease.Attempt >= maxAttempts)
|
||||
{
|
||||
await lease.PoisonAsync(reason, CancellationToken.None).ConfigureAwait(false);
|
||||
_metrics.IncrementJobFailed(context, reason);
|
||||
JobPoisoned(_logger, lease.JobId, lease.ScanId, lease.Attempt, maxAttempts, ex);
|
||||
}
|
||||
else
|
||||
{
|
||||
await lease.AbandonAsync(reason, CancellationToken.None).ConfigureAwait(false);
|
||||
JobAbandonedWithError(_logger, lease.JobId, lease.ScanId, lease.Attempt, maxAttempts, ex);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
jobCts.Cancel();
|
||||
try
|
||||
{
|
||||
await heartbeatTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (processingException is null && ex is not OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Heartbeat loop ended with an exception for job {JobId}.", lease.JobId);
|
||||
}
|
||||
|
||||
await lease.DisposeAsync().ConfigureAwait(false);
|
||||
jobCts.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
[LoggerMessage(EventId = 2000, Level = LogLevel.Information, Message = "Scanner worker host started.")]
|
||||
private static partial void WorkerStarted(ILogger logger);
|
||||
|
||||
[LoggerMessage(EventId = 2001, Level = LogLevel.Information, Message = "Scanner worker host stopping.")]
|
||||
private static partial void WorkerStopping(ILogger logger);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2002,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Leased job {JobId} (scan {ScanId}) attempt {Attempt}; queue latency {LatencyMs:F0} ms.")]
|
||||
private static partial void JobAcquired(ILogger logger, string jobId, string scanId, int attempt, double latencyMs);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2003,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Job {JobId} (scan {ScanId}) completed in {DurationMs:F0} ms.")]
|
||||
private static partial void JobCompleted(ILogger logger, string jobId, string scanId, double durationMs);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2004,
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Job {JobId} (scan {ScanId}) abandoned due to host shutdown.")]
|
||||
private static partial void JobAbandoned(ILogger logger, string jobId, string scanId);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2005,
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Job {JobId} (scan {ScanId}) attempt {Attempt}/{MaxAttempts} abandoned after failure; job will be retried.")]
|
||||
private static partial void JobAbandonedWithError(ILogger logger, string jobId, string scanId, int attempt, int maxAttempts, Exception exception);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 2006,
|
||||
Level = LogLevel.Error,
|
||||
Message = "Job {JobId} (scan {ScanId}) attempt {Attempt}/{MaxAttempts} exceeded retry budget; quarantining job.")]
|
||||
private static partial void JobPoisoned(ILogger logger, string jobId, string scanId, int attempt, int maxAttempts, Exception exception);
|
||||
}
|
||||
|
||||
@@ -5,19 +5,19 @@ using System.Collections.ObjectModel;
|
||||
using System.IO;
|
||||
using StellaOps.Configuration;
|
||||
using StellaOps.Scanner.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Options;
|
||||
|
||||
public sealed class ScannerWorkerOptions
|
||||
{
|
||||
public const string SectionName = "Scanner:Worker";
|
||||
|
||||
public int MaxConcurrentJobs { get; set; } = 2;
|
||||
|
||||
public QueueOptions Queue { get; } = new();
|
||||
|
||||
public PollingOptions Polling { get; } = new();
|
||||
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Options;
|
||||
|
||||
public sealed class ScannerWorkerOptions
|
||||
{
|
||||
public const string SectionName = "Scanner:Worker";
|
||||
|
||||
public int MaxConcurrentJobs { get; set; } = 2;
|
||||
|
||||
public QueueOptions Queue { get; } = new();
|
||||
|
||||
public PollingOptions Polling { get; } = new();
|
||||
|
||||
public AuthorityOptions Authority { get; } = new();
|
||||
|
||||
public TelemetryOptions Telemetry { get; } = new();
|
||||
@@ -31,121 +31,121 @@ public sealed class ScannerWorkerOptions
|
||||
public SigningOptions Signing { get; } = new();
|
||||
|
||||
public DeterminismOptions Determinism { get; } = new();
|
||||
|
||||
public sealed class QueueOptions
|
||||
{
|
||||
public int MaxAttempts { get; set; } = 5;
|
||||
|
||||
public double HeartbeatSafetyFactor { get; set; } = 3.0;
|
||||
|
||||
public int MaxHeartbeatJitterMilliseconds { get; set; } = 750;
|
||||
|
||||
public IReadOnlyList<TimeSpan> HeartbeatRetryDelays => _heartbeatRetryDelays;
|
||||
|
||||
public TimeSpan MinHeartbeatInterval { get; set; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
public TimeSpan MaxHeartbeatInterval { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
public void SetHeartbeatRetryDelays(IEnumerable<TimeSpan> delays)
|
||||
{
|
||||
_heartbeatRetryDelays = NormalizeDelays(delays);
|
||||
}
|
||||
|
||||
internal IReadOnlyList<TimeSpan> NormalizedHeartbeatRetryDelays => _heartbeatRetryDelays;
|
||||
|
||||
private static IReadOnlyList<TimeSpan> NormalizeDelays(IEnumerable<TimeSpan> delays)
|
||||
{
|
||||
var buffer = new List<TimeSpan>();
|
||||
foreach (var delay in delays)
|
||||
{
|
||||
if (delay <= TimeSpan.Zero)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
buffer.Add(delay);
|
||||
}
|
||||
|
||||
buffer.Sort();
|
||||
return new ReadOnlyCollection<TimeSpan>(buffer);
|
||||
}
|
||||
|
||||
private IReadOnlyList<TimeSpan> _heartbeatRetryDelays = new ReadOnlyCollection<TimeSpan>(new TimeSpan[]
|
||||
{
|
||||
TimeSpan.FromSeconds(2),
|
||||
TimeSpan.FromSeconds(5),
|
||||
TimeSpan.FromSeconds(10),
|
||||
});
|
||||
}
|
||||
|
||||
public sealed class PollingOptions
|
||||
{
|
||||
public TimeSpan InitialDelay { get; set; } = TimeSpan.FromMilliseconds(200);
|
||||
|
||||
public TimeSpan MaxDelay { get; set; } = TimeSpan.FromSeconds(5);
|
||||
|
||||
public double JitterRatio { get; set; } = 0.2;
|
||||
}
|
||||
|
||||
public sealed class AuthorityOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
|
||||
public string? Issuer { get; set; }
|
||||
|
||||
public string? ClientId { get; set; }
|
||||
|
||||
public string? ClientSecret { get; set; }
|
||||
|
||||
public bool RequireHttpsMetadata { get; set; } = true;
|
||||
|
||||
public string? MetadataAddress { get; set; }
|
||||
|
||||
public int BackchannelTimeoutSeconds { get; set; } = 20;
|
||||
|
||||
public int TokenClockSkewSeconds { get; set; } = 30;
|
||||
|
||||
public IList<string> Scopes { get; } = new List<string> { "scanner.scan" };
|
||||
|
||||
public ResilienceOptions Resilience { get; } = new();
|
||||
}
|
||||
|
||||
public sealed class ResilienceOptions
|
||||
{
|
||||
public bool? EnableRetries { get; set; }
|
||||
|
||||
public IList<TimeSpan> RetryDelays { get; } = new List<TimeSpan>
|
||||
{
|
||||
TimeSpan.FromMilliseconds(250),
|
||||
TimeSpan.FromMilliseconds(500),
|
||||
TimeSpan.FromSeconds(1),
|
||||
TimeSpan.FromSeconds(5),
|
||||
};
|
||||
|
||||
public bool? AllowOfflineCacheFallback { get; set; }
|
||||
|
||||
public TimeSpan? OfflineCacheTolerance { get; set; }
|
||||
}
|
||||
|
||||
public sealed class TelemetryOptions
|
||||
{
|
||||
public bool EnableLogging { get; set; } = true;
|
||||
|
||||
public bool EnableTelemetry { get; set; } = true;
|
||||
|
||||
public bool EnableTracing { get; set; }
|
||||
|
||||
public bool EnableMetrics { get; set; } = true;
|
||||
|
||||
public string ServiceName { get; set; } = "stellaops-scanner-worker";
|
||||
|
||||
public string? OtlpEndpoint { get; set; }
|
||||
|
||||
public bool ExportConsole { get; set; }
|
||||
|
||||
public IDictionary<string, string?> ResourceAttributes { get; } = new ConcurrentDictionary<string, string?>(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
|
||||
public sealed class QueueOptions
|
||||
{
|
||||
public int MaxAttempts { get; set; } = 5;
|
||||
|
||||
public double HeartbeatSafetyFactor { get; set; } = 3.0;
|
||||
|
||||
public int MaxHeartbeatJitterMilliseconds { get; set; } = 750;
|
||||
|
||||
public IReadOnlyList<TimeSpan> HeartbeatRetryDelays => _heartbeatRetryDelays;
|
||||
|
||||
public TimeSpan MinHeartbeatInterval { get; set; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
public TimeSpan MaxHeartbeatInterval { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
public void SetHeartbeatRetryDelays(IEnumerable<TimeSpan> delays)
|
||||
{
|
||||
_heartbeatRetryDelays = NormalizeDelays(delays);
|
||||
}
|
||||
|
||||
internal IReadOnlyList<TimeSpan> NormalizedHeartbeatRetryDelays => _heartbeatRetryDelays;
|
||||
|
||||
private static IReadOnlyList<TimeSpan> NormalizeDelays(IEnumerable<TimeSpan> delays)
|
||||
{
|
||||
var buffer = new List<TimeSpan>();
|
||||
foreach (var delay in delays)
|
||||
{
|
||||
if (delay <= TimeSpan.Zero)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
buffer.Add(delay);
|
||||
}
|
||||
|
||||
buffer.Sort();
|
||||
return new ReadOnlyCollection<TimeSpan>(buffer);
|
||||
}
|
||||
|
||||
private IReadOnlyList<TimeSpan> _heartbeatRetryDelays = new ReadOnlyCollection<TimeSpan>(new TimeSpan[]
|
||||
{
|
||||
TimeSpan.FromSeconds(2),
|
||||
TimeSpan.FromSeconds(5),
|
||||
TimeSpan.FromSeconds(10),
|
||||
});
|
||||
}
|
||||
|
||||
public sealed class PollingOptions
|
||||
{
|
||||
public TimeSpan InitialDelay { get; set; } = TimeSpan.FromMilliseconds(200);
|
||||
|
||||
public TimeSpan MaxDelay { get; set; } = TimeSpan.FromSeconds(5);
|
||||
|
||||
public double JitterRatio { get; set; } = 0.2;
|
||||
}
|
||||
|
||||
public sealed class AuthorityOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
|
||||
public string? Issuer { get; set; }
|
||||
|
||||
public string? ClientId { get; set; }
|
||||
|
||||
public string? ClientSecret { get; set; }
|
||||
|
||||
public bool RequireHttpsMetadata { get; set; } = true;
|
||||
|
||||
public string? MetadataAddress { get; set; }
|
||||
|
||||
public int BackchannelTimeoutSeconds { get; set; } = 20;
|
||||
|
||||
public int TokenClockSkewSeconds { get; set; } = 30;
|
||||
|
||||
public IList<string> Scopes { get; } = new List<string> { "scanner.scan" };
|
||||
|
||||
public ResilienceOptions Resilience { get; } = new();
|
||||
}
|
||||
|
||||
public sealed class ResilienceOptions
|
||||
{
|
||||
public bool? EnableRetries { get; set; }
|
||||
|
||||
public IList<TimeSpan> RetryDelays { get; } = new List<TimeSpan>
|
||||
{
|
||||
TimeSpan.FromMilliseconds(250),
|
||||
TimeSpan.FromMilliseconds(500),
|
||||
TimeSpan.FromSeconds(1),
|
||||
TimeSpan.FromSeconds(5),
|
||||
};
|
||||
|
||||
public bool? AllowOfflineCacheFallback { get; set; }
|
||||
|
||||
public TimeSpan? OfflineCacheTolerance { get; set; }
|
||||
}
|
||||
|
||||
public sealed class TelemetryOptions
|
||||
{
|
||||
public bool EnableLogging { get; set; } = true;
|
||||
|
||||
public bool EnableTelemetry { get; set; } = true;
|
||||
|
||||
public bool EnableTracing { get; set; }
|
||||
|
||||
public bool EnableMetrics { get; set; } = true;
|
||||
|
||||
public string ServiceName { get; set; } = "stellaops-scanner-worker";
|
||||
|
||||
public string? OtlpEndpoint { get; set; }
|
||||
|
||||
public bool ExportConsole { get; set; }
|
||||
|
||||
public IDictionary<string, string?> ResourceAttributes { get; } = new ConcurrentDictionary<string, string?>(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
public sealed class ShutdownOptions
|
||||
{
|
||||
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
@@ -3,17 +3,17 @@ using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Options;
|
||||
|
||||
public sealed class ScannerWorkerOptionsValidator : IValidateOptions<ScannerWorkerOptions>
|
||||
{
|
||||
public ValidateOptionsResult Validate(string? name, ScannerWorkerOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
|
||||
var failures = new List<string>();
|
||||
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Options;
|
||||
|
||||
public sealed class ScannerWorkerOptionsValidator : IValidateOptions<ScannerWorkerOptions>
|
||||
{
|
||||
public ValidateOptionsResult Validate(string? name, ScannerWorkerOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
|
||||
var failures = new List<string>();
|
||||
|
||||
if (options.MaxConcurrentJobs <= 0)
|
||||
{
|
||||
failures.Add("Scanner.Worker:MaxConcurrentJobs must be greater than zero.");
|
||||
@@ -31,65 +31,65 @@ public sealed class ScannerWorkerOptionsValidator : IValidateOptions<ScannerWork
|
||||
{
|
||||
failures.Add("Scanner.Worker:Queue:HeartbeatSafetyFactor must be at least 3.");
|
||||
}
|
||||
|
||||
if (options.Queue.MaxAttempts <= 0)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Queue:MaxAttempts must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Queue.MinHeartbeatInterval <= TimeSpan.Zero)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Queue:MinHeartbeatInterval must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Queue.MaxHeartbeatInterval <= options.Queue.MinHeartbeatInterval)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Queue:MaxHeartbeatInterval must be greater than MinHeartbeatInterval.");
|
||||
}
|
||||
|
||||
if (options.Polling.InitialDelay <= TimeSpan.Zero)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Polling:InitialDelay must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Polling.MaxDelay < options.Polling.InitialDelay)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Polling:MaxDelay must be greater than or equal to InitialDelay.");
|
||||
}
|
||||
|
||||
if (options.Polling.JitterRatio is < 0 or > 1)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Polling:JitterRatio must be between 0 and 1.");
|
||||
}
|
||||
|
||||
if (options.Authority.Enabled)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(options.Authority.Issuer))
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority requires Issuer when Enabled is true.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(options.Authority.ClientId))
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority requires ClientId when Enabled is true.");
|
||||
}
|
||||
|
||||
if (options.Authority.BackchannelTimeoutSeconds <= 0)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority:BackchannelTimeoutSeconds must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Authority.TokenClockSkewSeconds < 0)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority:TokenClockSkewSeconds cannot be negative.");
|
||||
}
|
||||
|
||||
if (options.Authority.Resilience.RetryDelays.Any(delay => delay <= TimeSpan.Zero))
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority:Resilience:RetryDelays must be positive durations.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (options.Queue.MaxAttempts <= 0)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Queue:MaxAttempts must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Queue.MinHeartbeatInterval <= TimeSpan.Zero)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Queue:MinHeartbeatInterval must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Queue.MaxHeartbeatInterval <= options.Queue.MinHeartbeatInterval)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Queue:MaxHeartbeatInterval must be greater than MinHeartbeatInterval.");
|
||||
}
|
||||
|
||||
if (options.Polling.InitialDelay <= TimeSpan.Zero)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Polling:InitialDelay must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Polling.MaxDelay < options.Polling.InitialDelay)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Polling:MaxDelay must be greater than or equal to InitialDelay.");
|
||||
}
|
||||
|
||||
if (options.Polling.JitterRatio is < 0 or > 1)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Polling:JitterRatio must be between 0 and 1.");
|
||||
}
|
||||
|
||||
if (options.Authority.Enabled)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(options.Authority.Issuer))
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority requires Issuer when Enabled is true.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(options.Authority.ClientId))
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority requires ClientId when Enabled is true.");
|
||||
}
|
||||
|
||||
if (options.Authority.BackchannelTimeoutSeconds <= 0)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority:BackchannelTimeoutSeconds must be greater than zero.");
|
||||
}
|
||||
|
||||
if (options.Authority.TokenClockSkewSeconds < 0)
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority:TokenClockSkewSeconds cannot be negative.");
|
||||
}
|
||||
|
||||
if (options.Authority.Resilience.RetryDelays.Any(delay => delay <= TimeSpan.Zero))
|
||||
{
|
||||
failures.Add("Scanner.Worker:Authority:Resilience:RetryDelays must be positive durations.");
|
||||
}
|
||||
}
|
||||
|
||||
if (options.Shutdown.Timeout < TimeSpan.FromSeconds(5))
|
||||
{
|
||||
failures.Add("Scanner.Worker:Shutdown:Timeout must be at least 5 seconds to allow lease completion.");
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class AnalyzerStageExecutor : IScanStageExecutor
|
||||
{
|
||||
private readonly IScanAnalyzerDispatcher _dispatcher;
|
||||
|
||||
@@ -13,6 +13,7 @@ using StellaOps.Scanner.Analyzers.Lang.Internal;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Plugin;
|
||||
using StellaOps.Scanner.Analyzers.OS;
|
||||
using StellaOps.Scanner.Analyzers.OS.Abstractions;
|
||||
using StellaOps.Scanner.Analyzers.OS.Internal;
|
||||
using StellaOps.Scanner.Analyzers.OS.Mapping;
|
||||
using StellaOps.Scanner.Analyzers.OS.Plugin;
|
||||
using StellaOps.Scanner.Core.Contracts;
|
||||
@@ -126,6 +127,9 @@ internal sealed class CompositeScanAnalyzerDispatcher : IScanAnalyzerDispatcher
|
||||
|
||||
await validatorRunner.EnsureAsync(validationContext, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var cache = services.GetRequiredService<ISurfaceCache>();
|
||||
var cacheAdapter = new OsAnalyzerSurfaceCache(cache, surfaceEnvironment.Settings.Tenant);
|
||||
|
||||
foreach (var analyzer in analyzers)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
@@ -135,7 +139,46 @@ internal sealed class CompositeScanAnalyzerDispatcher : IScanAnalyzerDispatcher
|
||||
|
||||
try
|
||||
{
|
||||
var result = await analyzer.AnalyzeAsync(analyzerContext, cancellationToken).ConfigureAwait(false);
|
||||
string? fingerprint = null;
|
||||
try
|
||||
{
|
||||
fingerprint = OsRootfsFingerprint.TryCompute(analyzer.AnalyzerId, rootfsPath, cancellationToken);
|
||||
}
|
||||
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException or ArgumentException)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
ex,
|
||||
"Failed to compute rootfs fingerprint for OS analyzer {AnalyzerId} (job {JobId}); bypassing cache.",
|
||||
analyzer.AnalyzerId,
|
||||
context.JobId);
|
||||
}
|
||||
|
||||
OSPackageAnalyzerResult result;
|
||||
if (fingerprint is not null)
|
||||
{
|
||||
var cacheEntry = await cacheAdapter.GetOrCreateEntryAsync(
|
||||
_logger,
|
||||
analyzer.AnalyzerId,
|
||||
fingerprint,
|
||||
token => analyzer.AnalyzeAsync(analyzerContext, token),
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
result = cacheEntry.Result;
|
||||
if (cacheEntry.IsHit)
|
||||
{
|
||||
_metrics.RecordOsCacheHit(context, analyzer.AnalyzerId);
|
||||
}
|
||||
else
|
||||
{
|
||||
_metrics.RecordOsCacheMiss(context, analyzer.AnalyzerId);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result = await analyzer.AnalyzeAsync(analyzerContext, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
results.Add(result);
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
@@ -3,7 +3,7 @@ using StellaOps.Scanner.Worker.Determinism;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
internal sealed class DeterministicRandomService
|
||||
public sealed class DeterministicRandomService
|
||||
{
|
||||
private readonly IDeterministicRandomProvider _provider;
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IDelayScheduler
|
||||
{
|
||||
Task DelayAsync(TimeSpan delay, CancellationToken cancellationToken);
|
||||
}
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IDelayScheduler
|
||||
{
|
||||
Task DelayAsync(TimeSpan delay, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IEntryTraceExecutionService
|
||||
{
|
||||
ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken);
|
||||
}
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IEntryTraceExecutionService
|
||||
{
|
||||
ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanAnalyzerDispatcher
|
||||
{
|
||||
ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
public sealed class NullScanAnalyzerDispatcher : IScanAnalyzerDispatcher
|
||||
{
|
||||
public ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken)
|
||||
=> ValueTask.CompletedTask;
|
||||
}
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanAnalyzerDispatcher
|
||||
{
|
||||
ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
public sealed class NullScanAnalyzerDispatcher : IScanAnalyzerDispatcher
|
||||
{
|
||||
public ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken)
|
||||
=> ValueTask.CompletedTask;
|
||||
}
|
||||
|
||||
@@ -1,31 +1,31 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanJobLease : IAsyncDisposable
|
||||
{
|
||||
string JobId { get; }
|
||||
|
||||
string ScanId { get; }
|
||||
|
||||
int Attempt { get; }
|
||||
|
||||
DateTimeOffset EnqueuedAtUtc { get; }
|
||||
|
||||
DateTimeOffset LeasedAtUtc { get; }
|
||||
|
||||
TimeSpan LeaseDuration { get; }
|
||||
|
||||
IReadOnlyDictionary<string, string> Metadata { get; }
|
||||
|
||||
ValueTask RenewAsync(CancellationToken cancellationToken);
|
||||
|
||||
ValueTask CompleteAsync(CancellationToken cancellationToken);
|
||||
|
||||
ValueTask AbandonAsync(string reason, CancellationToken cancellationToken);
|
||||
|
||||
ValueTask PoisonAsync(string reason, CancellationToken cancellationToken);
|
||||
}
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanJobLease : IAsyncDisposable
|
||||
{
|
||||
string JobId { get; }
|
||||
|
||||
string ScanId { get; }
|
||||
|
||||
int Attempt { get; }
|
||||
|
||||
DateTimeOffset EnqueuedAtUtc { get; }
|
||||
|
||||
DateTimeOffset LeasedAtUtc { get; }
|
||||
|
||||
TimeSpan LeaseDuration { get; }
|
||||
|
||||
IReadOnlyDictionary<string, string> Metadata { get; }
|
||||
|
||||
ValueTask RenewAsync(CancellationToken cancellationToken);
|
||||
|
||||
ValueTask CompleteAsync(CancellationToken cancellationToken);
|
||||
|
||||
ValueTask AbandonAsync(string reason, CancellationToken cancellationToken);
|
||||
|
||||
ValueTask PoisonAsync(string reason, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanJobSource
|
||||
{
|
||||
Task<IScanJobLease?> TryAcquireAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanJobSource
|
||||
{
|
||||
Task<IScanJobLease?> TryAcquireAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanStageExecutor
|
||||
{
|
||||
string StageName { get; }
|
||||
|
||||
ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken);
|
||||
}
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public interface IScanStageExecutor
|
||||
{
|
||||
string StageName { get; }
|
||||
|
||||
ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class LeaseHeartbeatService
|
||||
{
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scanner.Worker.Determinism;
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class LeaseHeartbeatService
|
||||
{
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IOptionsMonitor<ScannerWorkerOptions> _options;
|
||||
private readonly IDelayScheduler _delayScheduler;
|
||||
@@ -28,7 +29,7 @@ public sealed class LeaseHeartbeatService
|
||||
_randomProvider = randomProvider ?? throw new ArgumentNullException(nameof(randomProvider));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
|
||||
public async Task RunAsync(IScanJobLease lease, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(lease);
|
||||
@@ -45,27 +46,27 @@ public sealed class LeaseHeartbeatService
|
||||
await _delayScheduler.DelayAsync(delay, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (await TryRenewAsync(options, lease, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
_logger.LogError(
|
||||
"Job {JobId} (scan {ScanId}) lease renewal exhausted retries; cancelling processing.",
|
||||
lease.JobId,
|
||||
lease.ScanId);
|
||||
throw new InvalidOperationException("Lease renewal retries exhausted.");
|
||||
}
|
||||
}
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (await TryRenewAsync(options, lease, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
_logger.LogError(
|
||||
"Job {JobId} (scan {ScanId}) lease renewal exhausted retries; cancelling processing.",
|
||||
lease.JobId,
|
||||
lease.ScanId);
|
||||
throw new InvalidOperationException("Lease renewal retries exhausted.");
|
||||
}
|
||||
}
|
||||
|
||||
private static TimeSpan ComputeInterval(ScannerWorkerOptions options, IScanJobLease lease)
|
||||
{
|
||||
@@ -77,9 +78,9 @@ public sealed class LeaseHeartbeatService
|
||||
recommended = options.Queue.MinHeartbeatInterval;
|
||||
}
|
||||
else if (recommended > options.Queue.MaxHeartbeatInterval)
|
||||
{
|
||||
recommended = options.Queue.MaxHeartbeatInterval;
|
||||
}
|
||||
{
|
||||
recommended = options.Queue.MaxHeartbeatInterval;
|
||||
}
|
||||
|
||||
return recommended;
|
||||
}
|
||||
@@ -108,55 +109,55 @@ public sealed class LeaseHeartbeatService
|
||||
await lease.RenewAsync(cancellationToken).ConfigureAwait(false);
|
||||
return true;
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Job {JobId} (scan {ScanId}) heartbeat failed; retrying.",
|
||||
lease.JobId,
|
||||
lease.ScanId);
|
||||
}
|
||||
|
||||
foreach (var delay in options.Queue.NormalizedHeartbeatRetryDelays)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await _delayScheduler.DelayAsync(delay, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await lease.RenewAsync(cancellationToken).ConfigureAwait(false);
|
||||
return true;
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Job {JobId} (scan {ScanId}) heartbeat retry failed; will retry after {Delay}.",
|
||||
lease.JobId,
|
||||
lease.ScanId,
|
||||
delay);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Job {JobId} (scan {ScanId}) heartbeat failed; retrying.",
|
||||
lease.JobId,
|
||||
lease.ScanId);
|
||||
}
|
||||
|
||||
foreach (var delay in options.Queue.NormalizedHeartbeatRetryDelays)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await _delayScheduler.DelayAsync(delay, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await lease.RenewAsync(cancellationToken).ConfigureAwait(false);
|
||||
return true;
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Job {JobId} (scan {ScanId}) heartbeat retry failed; will retry after {Delay}.",
|
||||
lease.JobId,
|
||||
lease.ScanId,
|
||||
delay);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class NoOpStageExecutor : IScanStageExecutor
|
||||
{
|
||||
public NoOpStageExecutor(string stageName)
|
||||
{
|
||||
StageName = stageName ?? throw new ArgumentNullException(nameof(stageName));
|
||||
}
|
||||
|
||||
public string StageName { get; }
|
||||
|
||||
public ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken)
|
||||
=> ValueTask.CompletedTask;
|
||||
}
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class NoOpStageExecutor : IScanStageExecutor
|
||||
{
|
||||
public NoOpStageExecutor(string stageName)
|
||||
{
|
||||
StageName = stageName ?? throw new ArgumentNullException(nameof(stageName));
|
||||
}
|
||||
|
||||
public string StageName { get; }
|
||||
|
||||
public ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken)
|
||||
=> ValueTask.CompletedTask;
|
||||
}
|
||||
|
||||
@@ -1,26 +1,26 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class NullScanJobSource : IScanJobSource
|
||||
{
|
||||
private readonly ILogger<NullScanJobSource> _logger;
|
||||
private int _logged;
|
||||
|
||||
public NullScanJobSource(ILogger<NullScanJobSource> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public Task<IScanJobLease?> TryAcquireAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (Interlocked.Exchange(ref _logged, 1) == 0)
|
||||
{
|
||||
_logger.LogWarning("No queue provider registered. Scanner worker will idle until a queue adapter is configured.");
|
||||
}
|
||||
|
||||
return Task.FromResult<IScanJobLease?>(null);
|
||||
}
|
||||
}
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class NullScanJobSource : IScanJobSource
|
||||
{
|
||||
private readonly ILogger<NullScanJobSource> _logger;
|
||||
private int _logged;
|
||||
|
||||
public NullScanJobSource(ILogger<NullScanJobSource> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public Task<IScanJobLease?> TryAcquireAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (Interlocked.Exchange(ref _logged, 1) == 0)
|
||||
{
|
||||
_logger.LogWarning("No queue provider registered. Scanner worker will idle until a queue adapter is configured.");
|
||||
}
|
||||
|
||||
return Task.FromResult<IScanJobLease?>(null);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
using System;
|
||||
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class PollDelayStrategy
|
||||
{
|
||||
using System;
|
||||
|
||||
using StellaOps.Scanner.Worker.Options;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class PollDelayStrategy
|
||||
{
|
||||
private readonly ScannerWorkerOptions.PollingOptions _options;
|
||||
private readonly DeterministicRandomService _randomService;
|
||||
private TimeSpan _currentDelay;
|
||||
@@ -15,35 +15,35 @@ public sealed class PollDelayStrategy
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_randomService = randomService ?? throw new ArgumentNullException(nameof(randomService));
|
||||
}
|
||||
|
||||
public TimeSpan NextDelay()
|
||||
{
|
||||
if (_currentDelay == TimeSpan.Zero)
|
||||
{
|
||||
_currentDelay = _options.InitialDelay;
|
||||
return ApplyJitter(_currentDelay);
|
||||
}
|
||||
|
||||
var doubled = _currentDelay + _currentDelay;
|
||||
_currentDelay = doubled < _options.MaxDelay ? doubled : _options.MaxDelay;
|
||||
return ApplyJitter(_currentDelay);
|
||||
}
|
||||
|
||||
public void Reset() => _currentDelay = TimeSpan.Zero;
|
||||
|
||||
private TimeSpan ApplyJitter(TimeSpan duration)
|
||||
{
|
||||
if (_options.JitterRatio <= 0)
|
||||
{
|
||||
return duration;
|
||||
}
|
||||
|
||||
var maxOffset = duration.TotalMilliseconds * _options.JitterRatio;
|
||||
if (maxOffset <= 0)
|
||||
{
|
||||
return duration;
|
||||
}
|
||||
|
||||
|
||||
public TimeSpan NextDelay()
|
||||
{
|
||||
if (_currentDelay == TimeSpan.Zero)
|
||||
{
|
||||
_currentDelay = _options.InitialDelay;
|
||||
return ApplyJitter(_currentDelay);
|
||||
}
|
||||
|
||||
var doubled = _currentDelay + _currentDelay;
|
||||
_currentDelay = doubled < _options.MaxDelay ? doubled : _options.MaxDelay;
|
||||
return ApplyJitter(_currentDelay);
|
||||
}
|
||||
|
||||
public void Reset() => _currentDelay = TimeSpan.Zero;
|
||||
|
||||
private TimeSpan ApplyJitter(TimeSpan duration)
|
||||
{
|
||||
if (_options.JitterRatio <= 0)
|
||||
{
|
||||
return duration;
|
||||
}
|
||||
|
||||
var maxOffset = duration.TotalMilliseconds * _options.JitterRatio;
|
||||
if (maxOffset <= 0)
|
||||
{
|
||||
return duration;
|
||||
}
|
||||
|
||||
var rng = _randomService.Create();
|
||||
var offset = (rng.NextDouble() * 2.0 - 1.0) * maxOffset;
|
||||
var adjustedMs = Math.Max(0, duration.TotalMilliseconds + offset);
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace StellaOps.Scanner.Worker.Processing.Replay;
|
||||
/// Fetches a sealed replay bundle from the configured object store, verifies its SHA-256 hash,
|
||||
/// and returns a local file path for downstream analyzers.
|
||||
/// </summary>
|
||||
internal sealed class ReplayBundleFetcher
|
||||
public sealed class ReplayBundleFetcher
|
||||
{
|
||||
private readonly IArtifactObjectStore _objectStore;
|
||||
private readonly ICryptoHash _cryptoHash;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using StellaOps.Scanner.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class ScanJobContext
|
||||
{
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class ScanJobContext
|
||||
{
|
||||
public ScanJobContext(IScanJobLease lease, TimeProvider timeProvider, DateTimeOffset startUtc, CancellationToken cancellationToken)
|
||||
{
|
||||
Lease = lease ?? throw new ArgumentNullException(nameof(lease));
|
||||
@@ -14,13 +14,13 @@ public sealed class ScanJobContext
|
||||
CancellationToken = cancellationToken;
|
||||
Analysis = new ScanAnalysisStore();
|
||||
}
|
||||
|
||||
public IScanJobLease Lease { get; }
|
||||
|
||||
public TimeProvider TimeProvider { get; }
|
||||
|
||||
public DateTimeOffset StartUtc { get; }
|
||||
|
||||
|
||||
public IScanJobLease Lease { get; }
|
||||
|
||||
public TimeProvider TimeProvider { get; }
|
||||
|
||||
public DateTimeOffset StartUtc { get; }
|
||||
|
||||
public CancellationToken CancellationToken { get; }
|
||||
|
||||
public string JobId => Lease.JobId;
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.Core.Contracts;
|
||||
using StellaOps.Scanner.Reachability;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class ScanJobProcessor
|
||||
{
|
||||
private readonly IReadOnlyDictionary<string, IScanStageExecutor> _executors;
|
||||
@@ -26,36 +27,36 @@ public sealed class ScanJobProcessor
|
||||
_reachabilityPublisher = reachabilityPublisher ?? throw new ArgumentNullException(nameof(reachabilityPublisher));
|
||||
_replayBundleFetcher = replayBundleFetcher ?? throw new ArgumentNullException(nameof(replayBundleFetcher));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
|
||||
var map = new Dictionary<string, IScanStageExecutor>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var executor in executors ?? Array.Empty<IScanStageExecutor>())
|
||||
{
|
||||
if (executor is null || string.IsNullOrWhiteSpace(executor.StageName))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
map[executor.StageName] = executor;
|
||||
}
|
||||
|
||||
foreach (var stage in ScanStageNames.Ordered)
|
||||
{
|
||||
if (map.ContainsKey(stage))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
map[stage] = new NoOpStageExecutor(stage);
|
||||
_logger.LogDebug("No executor registered for stage {Stage}; using no-op placeholder.", stage);
|
||||
}
|
||||
|
||||
_executors = map;
|
||||
}
|
||||
|
||||
|
||||
var map = new Dictionary<string, IScanStageExecutor>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var executor in executors ?? Array.Empty<IScanStageExecutor>())
|
||||
{
|
||||
if (executor is null || string.IsNullOrWhiteSpace(executor.StageName))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
map[executor.StageName] = executor;
|
||||
}
|
||||
|
||||
foreach (var stage in ScanStageNames.Ordered)
|
||||
{
|
||||
if (map.ContainsKey(stage))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
map[stage] = new NoOpStageExecutor(stage);
|
||||
_logger.LogDebug("No executor registered for stage {Stage}; using no-op placeholder.", stage);
|
||||
}
|
||||
|
||||
_executors = map;
|
||||
}
|
||||
|
||||
public async ValueTask ExecuteAsync(ScanJobContext context, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
await EnsureReplayBundleFetchedAsync(context, cancellationToken).ConfigureAwait(false);
|
||||
await EnsureReplayBundleFetchedAsync(context, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
foreach (var stage in ScanStageNames.Ordered)
|
||||
{
|
||||
|
||||
@@ -1,86 +1,86 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed partial class ScanProgressReporter
|
||||
{
|
||||
private readonly ScannerWorkerMetrics _metrics;
|
||||
private readonly ILogger<ScanProgressReporter> _logger;
|
||||
|
||||
public ScanProgressReporter(ScannerWorkerMetrics metrics, ILogger<ScanProgressReporter> logger)
|
||||
{
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async ValueTask ExecuteStageAsync(
|
||||
ScanJobContext context,
|
||||
string stageName,
|
||||
Func<ScanJobContext, CancellationToken, ValueTask> stageWork,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(stageName);
|
||||
ArgumentNullException.ThrowIfNull(stageWork);
|
||||
|
||||
StageStarting(_logger, context.JobId, context.ScanId, stageName, context.Lease.Attempt);
|
||||
|
||||
var start = context.TimeProvider.GetUtcNow();
|
||||
using var activity = ScannerWorkerInstrumentation.ActivitySource.StartActivity(
|
||||
$"scanner.worker.{stageName}",
|
||||
ActivityKind.Internal);
|
||||
|
||||
activity?.SetTag("scanner.worker.job_id", context.JobId);
|
||||
activity?.SetTag("scanner.worker.scan_id", context.ScanId);
|
||||
activity?.SetTag("scanner.worker.stage", stageName);
|
||||
|
||||
try
|
||||
{
|
||||
await stageWork(context, cancellationToken).ConfigureAwait(false);
|
||||
var duration = context.TimeProvider.GetUtcNow() - start;
|
||||
_metrics.RecordStageDuration(context, stageName, duration);
|
||||
StageCompleted(_logger, context.JobId, context.ScanId, stageName, duration.TotalMilliseconds);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
StageCancelled(_logger, context.JobId, context.ScanId, stageName);
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
var duration = context.TimeProvider.GetUtcNow() - start;
|
||||
_metrics.RecordStageDuration(context, stageName, duration);
|
||||
StageFailed(_logger, context.JobId, context.ScanId, stageName, ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1000,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Job {JobId} (scan {ScanId}) entering stage {Stage} (attempt {Attempt}).")]
|
||||
private static partial void StageStarting(ILogger logger, string jobId, string scanId, string stage, int attempt);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1001,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Job {JobId} (scan {ScanId}) finished stage {Stage} in {ElapsedMs:F0} ms.")]
|
||||
private static partial void StageCompleted(ILogger logger, string jobId, string scanId, string stage, double elapsedMs);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1002,
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Job {JobId} (scan {ScanId}) stage {Stage} cancelled by request.")]
|
||||
private static partial void StageCancelled(ILogger logger, string jobId, string scanId, string stage);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1003,
|
||||
Level = LogLevel.Error,
|
||||
Message = "Job {JobId} (scan {ScanId}) stage {Stage} failed.")]
|
||||
private static partial void StageFailed(ILogger logger, string jobId, string scanId, string stage, Exception exception);
|
||||
}
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.Worker.Diagnostics;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed partial class ScanProgressReporter
|
||||
{
|
||||
private readonly ScannerWorkerMetrics _metrics;
|
||||
private readonly ILogger<ScanProgressReporter> _logger;
|
||||
|
||||
public ScanProgressReporter(ScannerWorkerMetrics metrics, ILogger<ScanProgressReporter> logger)
|
||||
{
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async ValueTask ExecuteStageAsync(
|
||||
ScanJobContext context,
|
||||
string stageName,
|
||||
Func<ScanJobContext, CancellationToken, ValueTask> stageWork,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(stageName);
|
||||
ArgumentNullException.ThrowIfNull(stageWork);
|
||||
|
||||
StageStarting(_logger, context.JobId, context.ScanId, stageName, context.Lease.Attempt);
|
||||
|
||||
var start = context.TimeProvider.GetUtcNow();
|
||||
using var activity = ScannerWorkerInstrumentation.ActivitySource.StartActivity(
|
||||
$"scanner.worker.{stageName}",
|
||||
ActivityKind.Internal);
|
||||
|
||||
activity?.SetTag("scanner.worker.job_id", context.JobId);
|
||||
activity?.SetTag("scanner.worker.scan_id", context.ScanId);
|
||||
activity?.SetTag("scanner.worker.stage", stageName);
|
||||
|
||||
try
|
||||
{
|
||||
await stageWork(context, cancellationToken).ConfigureAwait(false);
|
||||
var duration = context.TimeProvider.GetUtcNow() - start;
|
||||
_metrics.RecordStageDuration(context, stageName, duration);
|
||||
StageCompleted(_logger, context.JobId, context.ScanId, stageName, duration.TotalMilliseconds);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
StageCancelled(_logger, context.JobId, context.ScanId, stageName);
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
var duration = context.TimeProvider.GetUtcNow() - start;
|
||||
_metrics.RecordStageDuration(context, stageName, duration);
|
||||
StageFailed(_logger, context.JobId, context.ScanId, stageName, ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1000,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Job {JobId} (scan {ScanId}) entering stage {Stage} (attempt {Attempt}).")]
|
||||
private static partial void StageStarting(ILogger logger, string jobId, string scanId, string stage, int attempt);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1001,
|
||||
Level = LogLevel.Information,
|
||||
Message = "Job {JobId} (scan {ScanId}) finished stage {Stage} in {ElapsedMs:F0} ms.")]
|
||||
private static partial void StageCompleted(ILogger logger, string jobId, string scanId, string stage, double elapsedMs);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1002,
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Job {JobId} (scan {ScanId}) stage {Stage} cancelled by request.")]
|
||||
private static partial void StageCancelled(ILogger logger, string jobId, string scanId, string stage);
|
||||
|
||||
[LoggerMessage(
|
||||
EventId = 1003,
|
||||
Level = LogLevel.Error,
|
||||
Message = "Job {JobId} (scan {ScanId}) stage {Stage} failed.")]
|
||||
private static partial void StageFailed(ILogger logger, string jobId, string scanId, string stage, Exception exception);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public static class ScanStageNames
|
||||
{
|
||||
public const string IngestReplay = "ingest-replay";
|
||||
|
||||
@@ -129,30 +129,14 @@ internal sealed class HmacDsseEnvelopeSigner : IDsseEnvelopeSigner
|
||||
try
|
||||
{
|
||||
var tenant = environment?.Settings.Tenant ?? "default";
|
||||
var request = new SurfaceSecretRequest(tenant, component: "scanner-worker", secretType: "attestation", name: "dsse-signing");
|
||||
var handle = provider.TryGetSecret(request, CancellationToken.None);
|
||||
if (handle is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (handle.Secret.TryGetProperty("privateKeyPem", out var privateKeyPem) && privateKeyPem.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var pem = privateKeyPem.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(pem))
|
||||
{
|
||||
return Encoding.UTF8.GetBytes(pem);
|
||||
}
|
||||
}
|
||||
|
||||
if (handle.Secret.TryGetProperty("token", out var token) && token.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var value = token.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return Encoding.UTF8.GetBytes(value);
|
||||
}
|
||||
}
|
||||
var request = new SurfaceSecretRequest(
|
||||
Tenant: tenant,
|
||||
Component: "scanner-worker",
|
||||
SecretType: "attestation",
|
||||
Name: "dsse-signing");
|
||||
using var handle = provider.GetAsync(request, CancellationToken.None).GetAwaiter().GetResult();
|
||||
var bytes = handle.AsBytes();
|
||||
return bytes.IsEmpty ? null : bytes.Span.ToArray();
|
||||
}
|
||||
catch
|
||||
{
|
||||
|
||||
@@ -85,11 +85,6 @@ internal sealed class SurfaceManifestStageExecutor : IScanStageExecutor
|
||||
await PersistRubyPackagesAsync(context, cancellationToken).ConfigureAwait(false);
|
||||
await PersistBunPackagesAsync(context, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var determinismPayloads = BuildDeterminismPayloads(context, payloads, out var merkleRoot);
|
||||
if (determinismPayloads is not null && determinismPayloads.Count > 0)
|
||||
{
|
||||
payloads.AddRange(determinismPayloads);
|
||||
}
|
||||
if (payloads.Count == 0)
|
||||
{
|
||||
_metrics.RecordSurfaceManifestSkipped(context);
|
||||
@@ -97,6 +92,12 @@ internal sealed class SurfaceManifestStageExecutor : IScanStageExecutor
|
||||
return;
|
||||
}
|
||||
|
||||
var determinismPayloads = BuildDeterminismPayloads(context, payloads, out var merkleRoot);
|
||||
if (determinismPayloads is not null && determinismPayloads.Count > 0)
|
||||
{
|
||||
payloads.AddRange(determinismPayloads);
|
||||
}
|
||||
|
||||
var tenant = _surfaceEnvironment.Settings?.Tenant ?? string.Empty;
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
@@ -249,12 +250,6 @@ internal sealed class SurfaceManifestStageExecutor : IScanStageExecutor
|
||||
}));
|
||||
}
|
||||
|
||||
var determinismPayload = BuildDeterminismPayload(context, payloads);
|
||||
if (determinismPayload is not null)
|
||||
{
|
||||
payloads.Add(determinismPayload);
|
||||
}
|
||||
|
||||
return payloads;
|
||||
}
|
||||
|
||||
@@ -326,7 +321,7 @@ internal sealed class SurfaceManifestStageExecutor : IScanStageExecutor
|
||||
}));
|
||||
|
||||
// Attach DSSE envelope for layer fragments when present.
|
||||
foreach (var fragmentPayload in payloadList.Where(p => p.Kind == "layer.fragments"))
|
||||
foreach (var fragmentPayload in payloadList.Where(p => p.Kind == "layer.fragments").ToArray())
|
||||
{
|
||||
var dsse = _dsseSigner.SignAsync(
|
||||
payloadType: fragmentPayload.MediaType,
|
||||
@@ -362,10 +357,9 @@ internal sealed class SurfaceManifestStageExecutor : IScanStageExecutor
|
||||
return payloadList.Skip(payloads.Count()).ToList();
|
||||
}
|
||||
|
||||
private static (Dictionary<string, string> Hashes, byte[] RecipeBytes, string RecipeSha256) BuildCompositionRecipe(IEnumerable<SurfaceManifestPayload> payloads)
|
||||
private (Dictionary<string, string> Hashes, byte[] RecipeBytes, string RecipeSha256) BuildCompositionRecipe(IEnumerable<SurfaceManifestPayload> payloads)
|
||||
{
|
||||
var map = new SortedDictionary<string, string>(StringComparer.Ordinal);
|
||||
using var sha = SHA256.Create();
|
||||
|
||||
foreach (var payload in payloads.OrderBy(p => p.Kind, StringComparer.Ordinal))
|
||||
{
|
||||
@@ -381,8 +375,7 @@ internal sealed class SurfaceManifestStageExecutor : IScanStageExecutor
|
||||
|
||||
var recipeJson = JsonSerializer.Serialize(recipe, JsonOptions);
|
||||
var recipeBytes = Encoding.UTF8.GetBytes(recipeJson);
|
||||
var rootHash = sha.ComputeHash(recipeBytes);
|
||||
var merkleRoot = Convert.ToHexString(rootHash).ToLowerInvariant();
|
||||
var merkleRoot = _hash.ComputeHashHex(recipeBytes, HashAlgorithms.Sha256);
|
||||
|
||||
return (new Dictionary<string, string>(map, StringComparer.OrdinalIgnoreCase), recipeBytes, merkleRoot);
|
||||
}
|
||||
@@ -459,10 +452,10 @@ internal sealed class SurfaceManifestStageExecutor : IScanStageExecutor
|
||||
|
||||
if (bestPlan is not null)
|
||||
{
|
||||
metadata["best_terminal"] = bestPlan.Value.TerminalPath;
|
||||
metadata["best_confidence"] = bestPlan.Value.Confidence.ToString("F4", CultureInfoInvariant);
|
||||
metadata["best_user"] = bestPlan.Value.User;
|
||||
metadata["best_workdir"] = bestPlan.Value.WorkingDirectory;
|
||||
metadata["best_terminal"] = bestPlan.TerminalPath;
|
||||
metadata["best_confidence"] = bestPlan.Confidence.ToString("F4", CultureInfoInvariant);
|
||||
metadata["best_user"] = bestPlan.User;
|
||||
metadata["best_workdir"] = bestPlan.WorkingDirectory;
|
||||
}
|
||||
|
||||
return metadata;
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class SystemDelayScheduler : IDelayScheduler
|
||||
{
|
||||
public Task DelayAsync(TimeSpan delay, CancellationToken cancellationToken)
|
||||
{
|
||||
if (delay <= TimeSpan.Zero)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
return Task.Delay(delay, cancellationToken);
|
||||
}
|
||||
}
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scanner.Worker.Processing;
|
||||
|
||||
public sealed class SystemDelayScheduler : IDelayScheduler
|
||||
{
|
||||
public Task DelayAsync(TimeSpan delay, CancellationToken cancellationToken)
|
||||
{
|
||||
if (delay <= TimeSpan.Zero)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
return Task.Delay(delay, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("StellaOps.Scanner.Worker.Tests")]
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("StellaOps.Scanner.Worker.Tests")]
|
||||
|
||||
6
src/Scanner/StellaOps.Scanner.Worker/TASKS.md
Normal file
6
src/Scanner/StellaOps.Scanner.Worker/TASKS.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# Scanner Worker Tasks (Sprint 0409.0001.0001)
|
||||
|
||||
| Task ID | Status | Notes | Updated (UTC) |
|
||||
| --- | --- | --- | --- |
|
||||
| SCAN-NL-0409-002 | DONE | OS analyzer surface-cache wiring + hit/miss metrics + worker tests updated to current APIs. | 2025-12-12 |
|
||||
|
||||
Reference in New Issue
Block a user