save progress
This commit is contained in:
@@ -478,7 +478,7 @@ public sealed record ExportAlert(
|
||||
TenantId: tenantId,
|
||||
ExportType: exportType,
|
||||
Severity: severity,
|
||||
Message: $"Export job {exportType} failure rate is {failureRate:F1}%",
|
||||
Message: FormattableString.Invariant($"Export job {exportType} failure rate is {failureRate:F1}%"),
|
||||
FailedJobIds: recentFailedJobIds,
|
||||
ConsecutiveFailures: 0,
|
||||
FailureRate: failureRate,
|
||||
|
||||
@@ -523,8 +523,8 @@ public sealed record SloAlert(
|
||||
AlertBudgetThreshold threshold)
|
||||
{
|
||||
var message = threshold.BurnRateThreshold.HasValue && state.BurnRate >= threshold.BurnRateThreshold.Value
|
||||
? $"SLO '{slo.Name}' burn rate {state.BurnRate:F2}x exceeds threshold {threshold.BurnRateThreshold.Value:F2}x"
|
||||
: $"SLO '{slo.Name}' error budget {state.BudgetConsumed:P1} consumed exceeds threshold {threshold.BudgetConsumedThreshold:P1}";
|
||||
? FormattableString.Invariant($"SLO '{slo.Name}' burn rate {state.BurnRate:F2}x exceeds threshold {threshold.BurnRateThreshold.Value:F2}x")
|
||||
: FormattableString.Invariant($"SLO '{slo.Name}' error budget {state.BudgetConsumed:P1} consumed exceeds threshold {threshold.BudgetConsumedThreshold:P1}");
|
||||
|
||||
return new SloAlert(
|
||||
AlertId: Guid.NewGuid(),
|
||||
|
||||
@@ -7,6 +7,7 @@ public sealed class FirstSignalOptions
|
||||
public FirstSignalCacheOptions Cache { get; set; } = new();
|
||||
public FirstSignalColdPathOptions ColdPath { get; set; } = new();
|
||||
public FirstSignalSnapshotWriterOptions SnapshotWriter { get; set; } = new();
|
||||
public FirstSignalFailureSignatureOptions FailureSignatures { get; set; } = new();
|
||||
}
|
||||
|
||||
public sealed class FirstSignalCacheOptions
|
||||
@@ -30,3 +31,12 @@ public sealed class FirstSignalSnapshotWriterOptions
|
||||
public int MaxRunsPerTick { get; set; } = 50;
|
||||
public int LookbackMinutes { get; set; } = 60;
|
||||
}
|
||||
|
||||
public sealed class FirstSignalFailureSignatureOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
public string? SchedulerBaseUrl { get; set; }
|
||||
public int TimeoutMs { get; set; } = 1000;
|
||||
public int MediumOccurrenceThreshold { get; set; } = 3;
|
||||
public int HighOccurrenceThreshold { get; set; } = 10;
|
||||
}
|
||||
|
||||
@@ -73,6 +73,7 @@ public static class ServiceCollectionExtensions
|
||||
|
||||
// First signal (TTFS) services
|
||||
services.Configure<FirstSignalOptions>(configuration.GetSection(FirstSignalOptions.SectionName));
|
||||
services.AddHttpClient<IFailureSignatureLookupClient, SchedulerFailureSignatureLookupClient>();
|
||||
services.AddSingleton<IFirstSignalCache, FirstSignalCache>();
|
||||
services.AddScoped<StellaOps.Orchestrator.Core.Services.IFirstSignalService, FirstSignalService>();
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ public sealed class FirstSignalService : CoreServices.IFirstSignalService
|
||||
private readonly IFirstSignalSnapshotRepository _snapshotRepository;
|
||||
private readonly IRunRepository _runRepository;
|
||||
private readonly IJobRepository _jobRepository;
|
||||
private readonly IFailureSignatureLookupClient _failureSignatureLookupClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly TimeToFirstSignalMetrics _ttfsMetrics;
|
||||
private readonly FirstSignalOptions _options;
|
||||
@@ -38,6 +39,7 @@ public sealed class FirstSignalService : CoreServices.IFirstSignalService
|
||||
IFirstSignalSnapshotRepository snapshotRepository,
|
||||
IRunRepository runRepository,
|
||||
IJobRepository jobRepository,
|
||||
IFailureSignatureLookupClient failureSignatureLookupClient,
|
||||
TimeProvider timeProvider,
|
||||
TimeToFirstSignalMetrics ttfsMetrics,
|
||||
IOptions<FirstSignalOptions> options,
|
||||
@@ -47,6 +49,7 @@ public sealed class FirstSignalService : CoreServices.IFirstSignalService
|
||||
_snapshotRepository = snapshotRepository ?? throw new ArgumentNullException(nameof(snapshotRepository));
|
||||
_runRepository = runRepository ?? throw new ArgumentNullException(nameof(runRepository));
|
||||
_jobRepository = jobRepository ?? throw new ArgumentNullException(nameof(jobRepository));
|
||||
_failureSignatureLookupClient = failureSignatureLookupClient ?? throw new ArgumentNullException(nameof(failureSignatureLookupClient));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_ttfsMetrics = ttfsMetrics ?? throw new ArgumentNullException(nameof(ttfsMetrics));
|
||||
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value;
|
||||
@@ -241,13 +244,44 @@ public sealed class FirstSignalService : CoreServices.IFirstSignalService
|
||||
};
|
||||
}
|
||||
|
||||
var signalComputed = ComputeSignal(run, jobs, cacheHit: false, origin: "cold_start");
|
||||
var signalOrigin = "cold_start";
|
||||
var signalComputed = ComputeSignal(run, jobs, cacheHit: false, signalOrigin);
|
||||
|
||||
if (signalComputed.Kind == FirstSignalKind.Failed && _options.FailureSignatures.Enabled)
|
||||
{
|
||||
var lookup = TryBuildFailureSignatureLookup(run, jobs);
|
||||
if (lookup is not null)
|
||||
{
|
||||
var lastKnownOutcome = await _failureSignatureLookupClient
|
||||
.TryGetLastKnownOutcomeAsync(
|
||||
tenantId,
|
||||
lookup.Value.ScopeType,
|
||||
lookup.Value.ScopeId,
|
||||
lookup.Value.ToolchainHash,
|
||||
coldPathCts.Token)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (lastKnownOutcome is not null)
|
||||
{
|
||||
signalOrigin = "failure_index";
|
||||
signalComputed = signalComputed with
|
||||
{
|
||||
LastKnownOutcome = lastKnownOutcome,
|
||||
Diagnostics = signalComputed.Diagnostics with
|
||||
{
|
||||
Source = signalOrigin
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var computedEtag = GenerateEtag(signalComputed);
|
||||
|
||||
_ttfsMetrics.RecordColdPathComputation(
|
||||
coldStopwatch.Elapsed.TotalSeconds,
|
||||
surface: "api",
|
||||
signalSource: "cold_start",
|
||||
signalSource: signalOrigin,
|
||||
kind: MapKind(signalComputed.Kind),
|
||||
phase: MapPhase(signalComputed.Phase),
|
||||
tenantId: tenantId);
|
||||
@@ -261,30 +295,30 @@ public sealed class FirstSignalService : CoreServices.IFirstSignalService
|
||||
{
|
||||
Signal = signalComputed,
|
||||
ETag = computedEtag,
|
||||
Origin = "cold_start",
|
||||
Origin = signalOrigin,
|
||||
},
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (IsNotModified(ifNoneMatch, computedEtag))
|
||||
{
|
||||
RecordSignalRendered(overallStopwatch, cacheHit: false, origin: "cold_start", signalComputed.Kind, signalComputed.Phase, tenantId);
|
||||
RecordSignalRendered(overallStopwatch, cacheHit: false, origin: signalOrigin, signalComputed.Kind, signalComputed.Phase, tenantId);
|
||||
return new CoreServices.FirstSignalResult
|
||||
{
|
||||
Status = CoreServices.FirstSignalResultStatus.NotModified,
|
||||
CacheHit = false,
|
||||
Source = "cold_start",
|
||||
Source = signalOrigin,
|
||||
ETag = computedEtag,
|
||||
Signal = signalComputed,
|
||||
};
|
||||
}
|
||||
|
||||
RecordSignalRendered(overallStopwatch, cacheHit: false, origin: "cold_start", signalComputed.Kind, signalComputed.Phase, tenantId);
|
||||
RecordSignalRendered(overallStopwatch, cacheHit: false, origin: signalOrigin, signalComputed.Kind, signalComputed.Phase, tenantId);
|
||||
return new CoreServices.FirstSignalResult
|
||||
{
|
||||
Status = CoreServices.FirstSignalResultStatus.Found,
|
||||
CacheHit = false,
|
||||
Source = "cold_start",
|
||||
Source = signalOrigin,
|
||||
ETag = computedEtag,
|
||||
Signal = signalComputed,
|
||||
};
|
||||
@@ -409,6 +443,152 @@ public sealed class FirstSignalService : CoreServices.IFirstSignalService
|
||||
};
|
||||
}
|
||||
|
||||
private readonly record struct FailureSignatureLookup(string ScopeType, string ScopeId, string ToolchainHash);
|
||||
|
||||
private static FailureSignatureLookup? TryBuildFailureSignatureLookup(Run run, IReadOnlyList<Job> jobs)
|
||||
{
|
||||
if (jobs.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var job = SelectRepresentativeJob(run, jobs);
|
||||
if (string.IsNullOrWhiteSpace(job.Payload))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var document = JsonDocument.Parse(job.Payload);
|
||||
if (document.RootElement.ValueKind != JsonValueKind.Object)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var payload = document.RootElement;
|
||||
if (TryGetPayloadString(payload, "repository", out var repository) ||
|
||||
TryGetPayloadString(payload, "repo", out repository))
|
||||
{
|
||||
var toolchainHash = ComputeToolchainHash(job, payload);
|
||||
return new FailureSignatureLookup("repo", repository!, toolchainHash);
|
||||
}
|
||||
|
||||
if (TryGetDigestScope(payload, out var scopeType, out var scopeId))
|
||||
{
|
||||
var toolchainHash = ComputeToolchainHash(job, payload);
|
||||
return new FailureSignatureLookup(scopeType!, scopeId!, toolchainHash);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool TryGetPayloadString(JsonElement payload, string key, out string? value)
|
||||
{
|
||||
foreach (var property in payload.EnumerateObject())
|
||||
{
|
||||
if (!string.Equals(property.Name, key, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (property.Value.ValueKind != JsonValueKind.String)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var raw = property.Value.GetString();
|
||||
if (string.IsNullOrWhiteSpace(raw))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
value = raw.Trim();
|
||||
return true;
|
||||
}
|
||||
|
||||
value = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool TryGetDigestScope(JsonElement payload, out string? scopeType, out string? scopeId)
|
||||
{
|
||||
var candidates = new (string Key, string Type)[]
|
||||
{
|
||||
("artifactDigest", "artifact"),
|
||||
("imageDigest", "image"),
|
||||
("digest", "image"),
|
||||
("artifact", "artifact"),
|
||||
("image", "image"),
|
||||
};
|
||||
|
||||
foreach (var (key, type) in candidates)
|
||||
{
|
||||
if (!TryGetPayloadString(payload, key, out var value))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var normalized = NormalizeDigest(value);
|
||||
if (normalized is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
scopeType = type;
|
||||
scopeId = normalized;
|
||||
return true;
|
||||
}
|
||||
|
||||
foreach (var property in payload.EnumerateObject())
|
||||
{
|
||||
if (property.Value.ValueKind != JsonValueKind.String)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var normalized = NormalizeDigest(property.Value.GetString());
|
||||
if (normalized is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
scopeType = property.Name.Contains("artifact", StringComparison.OrdinalIgnoreCase) ? "artifact" : "image";
|
||||
scopeId = normalized;
|
||||
return true;
|
||||
}
|
||||
|
||||
scopeType = null;
|
||||
scopeId = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static string? NormalizeDigest(string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trimmed = value.Trim();
|
||||
return trimmed.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase) ? trimmed : null;
|
||||
}
|
||||
|
||||
private static string ComputeToolchainHash(Job job, JsonElement payload)
|
||||
{
|
||||
var scannerVersion = TryGetPayloadString(payload, "scannerVersion", out var scanner) ? scanner : null;
|
||||
var runtimeVersion = TryGetPayloadString(payload, "runtimeVersion", out var runtime) ? runtime : null;
|
||||
|
||||
var material = $"{job.JobType}|{scannerVersion ?? "unknown"}|{runtimeVersion ?? "unknown"}";
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(material));
|
||||
return Convert.ToHexStringLower(hash.AsSpan(0, 8));
|
||||
}
|
||||
|
||||
private static Job SelectRepresentativeJob(Run run, IReadOnlyList<Job> jobs)
|
||||
{
|
||||
// Prefer an in-flight job to surface "started" quickly, even if Run.Status hasn't transitioned yet.
|
||||
|
||||
@@ -0,0 +1,198 @@
|
||||
using System.Net;
|
||||
using System.Net.Http.Json;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Options;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Services;
|
||||
|
||||
public interface IFailureSignatureLookupClient
|
||||
{
|
||||
Task<LastKnownOutcome?> TryGetLastKnownOutcomeAsync(
|
||||
string tenantId,
|
||||
string scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public sealed class SchedulerFailureSignatureLookupClient : IFailureSignatureLookupClient
|
||||
{
|
||||
private const string TenantHeader = "X-Tenant-Id";
|
||||
private const string ScopeHeader = "X-Scopes";
|
||||
private const string RequiredScope = "scheduler.runs.read";
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web);
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IOptionsMonitor<FirstSignalOptions> _optionsMonitor;
|
||||
private readonly ILogger<SchedulerFailureSignatureLookupClient> _logger;
|
||||
|
||||
public SchedulerFailureSignatureLookupClient(
|
||||
HttpClient httpClient,
|
||||
IOptionsMonitor<FirstSignalOptions> optionsMonitor,
|
||||
ILogger<SchedulerFailureSignatureLookupClient> logger)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<LastKnownOutcome?> TryGetLastKnownOutcomeAsync(
|
||||
string tenantId,
|
||||
string scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var options = _optionsMonitor.CurrentValue.FailureSignatures;
|
||||
if (!options.Enabled)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(options.SchedulerBaseUrl))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!Uri.TryCreate(options.SchedulerBaseUrl.Trim(), UriKind.Absolute, out var baseUri))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(tenantId) ||
|
||||
string.IsNullOrWhiteSpace(scopeType) ||
|
||||
string.IsNullOrWhiteSpace(scopeId) ||
|
||||
string.IsNullOrWhiteSpace(toolchainHash))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var normalizedBaseUri = new Uri(baseUri.ToString().TrimEnd('/') + "/", UriKind.Absolute);
|
||||
var relative = "api/v1/scheduler/failure-signatures/best-match"
|
||||
+ $"?scopeType={Uri.EscapeDataString(scopeType)}"
|
||||
+ $"&scopeId={Uri.EscapeDataString(scopeId)}"
|
||||
+ $"&toolchainHash={Uri.EscapeDataString(toolchainHash)}";
|
||||
var requestUri = new Uri(normalizedBaseUri, relative);
|
||||
|
||||
using var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
||||
request.Headers.TryAddWithoutValidation(TenantHeader, tenantId);
|
||||
request.Headers.TryAddWithoutValidation(ScopeHeader, RequiredScope);
|
||||
|
||||
try
|
||||
{
|
||||
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
if (options.TimeoutMs > 0)
|
||||
{
|
||||
timeoutCts.CancelAfter(TimeSpan.FromMilliseconds(options.TimeoutMs));
|
||||
}
|
||||
|
||||
using var response = await _httpClient
|
||||
.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, timeoutCts.Token)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.NoContent)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Scheduler failure signature lookup returned status {StatusCode} for tenant {TenantId}.",
|
||||
(int)response.StatusCode,
|
||||
tenantId);
|
||||
return null;
|
||||
}
|
||||
|
||||
var payload = await response.Content
|
||||
.ReadFromJsonAsync<FailureSignatureBestMatchResponse>(JsonOptions, timeoutCts.Token)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (payload is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var token = NormalizeToken(payload);
|
||||
return new LastKnownOutcome
|
||||
{
|
||||
SignatureId = payload.SignatureId.ToString("D"),
|
||||
ErrorCode = string.IsNullOrWhiteSpace(payload.ErrorCode) ? null : payload.ErrorCode.Trim(),
|
||||
Token = token,
|
||||
Excerpt = null,
|
||||
Confidence = MapConfidence(options, payload),
|
||||
FirstSeenAt = payload.FirstSeenAt,
|
||||
HitCount = payload.OccurrenceCount,
|
||||
};
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Scheduler failure signature lookup failed for tenant {TenantId}.", tenantId);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static string NormalizeToken(FailureSignatureBestMatchResponse payload)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(payload.ErrorCode))
|
||||
{
|
||||
return payload.ErrorCode.Trim();
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(payload.ErrorCategory))
|
||||
{
|
||||
return payload.ErrorCategory.Trim();
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
private static string MapConfidence(FirstSignalFailureSignatureOptions options, FailureSignatureBestMatchResponse payload)
|
||||
{
|
||||
if (payload.ConfidenceScore is { } score)
|
||||
{
|
||||
return score switch
|
||||
{
|
||||
>= 0.8m => "high",
|
||||
>= 0.6m => "medium",
|
||||
_ => "low"
|
||||
};
|
||||
}
|
||||
|
||||
if (options.HighOccurrenceThreshold > 0 && payload.OccurrenceCount >= options.HighOccurrenceThreshold)
|
||||
{
|
||||
return "high";
|
||||
}
|
||||
|
||||
if (options.MediumOccurrenceThreshold > 0 && payload.OccurrenceCount >= options.MediumOccurrenceThreshold)
|
||||
{
|
||||
return "medium";
|
||||
}
|
||||
|
||||
return "low";
|
||||
}
|
||||
|
||||
private sealed record FailureSignatureBestMatchResponse
|
||||
{
|
||||
public Guid SignatureId { get; init; }
|
||||
public string ScopeType { get; init; } = string.Empty;
|
||||
public string ScopeId { get; init; } = string.Empty;
|
||||
public string ToolchainHash { get; init; } = string.Empty;
|
||||
public string? ErrorCode { get; init; }
|
||||
public string? ErrorCategory { get; init; }
|
||||
public string PredictedOutcome { get; init; } = string.Empty;
|
||||
public decimal? ConfidenceScore { get; init; }
|
||||
public int OccurrenceCount { get; init; }
|
||||
public DateTimeOffset FirstSeenAt { get; init; }
|
||||
public DateTimeOffset LastSeenAt { get; init; }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Messaging;
|
||||
@@ -81,6 +83,7 @@ public sealed class FirstSignalServiceTests
|
||||
snapshots,
|
||||
runs,
|
||||
jobs,
|
||||
new NullFailureSignatureLookupClient(),
|
||||
TimeProvider.System,
|
||||
ttfs,
|
||||
options,
|
||||
@@ -155,6 +158,7 @@ public sealed class FirstSignalServiceTests
|
||||
snapshotRepository: new FakeFirstSignalSnapshotRepository(),
|
||||
runRepository: new FakeRunRepository(run),
|
||||
jobRepository: new FakeJobRepository(job),
|
||||
failureSignatureLookupClient: new NullFailureSignatureLookupClient(),
|
||||
timeProvider: TimeProvider.System,
|
||||
ttfsMetrics: ttfs,
|
||||
options: Options.Create(new FirstSignalOptions()),
|
||||
@@ -176,6 +180,7 @@ public sealed class FirstSignalServiceTests
|
||||
snapshotRepository: new FakeFirstSignalSnapshotRepository(),
|
||||
runRepository: new FakeRunRepository(null),
|
||||
jobRepository: new FakeJobRepository(),
|
||||
failureSignatureLookupClient: new NullFailureSignatureLookupClient(),
|
||||
timeProvider: TimeProvider.System,
|
||||
ttfsMetrics: ttfs,
|
||||
options: Options.Create(new FirstSignalOptions()),
|
||||
@@ -213,6 +218,7 @@ public sealed class FirstSignalServiceTests
|
||||
snapshotRepository: new FakeFirstSignalSnapshotRepository(),
|
||||
runRepository: new FakeRunRepository(run),
|
||||
jobRepository: new FakeJobRepository(),
|
||||
failureSignatureLookupClient: new NullFailureSignatureLookupClient(),
|
||||
timeProvider: TimeProvider.System,
|
||||
ttfsMetrics: ttfs,
|
||||
options: Options.Create(new FirstSignalOptions()),
|
||||
@@ -275,6 +281,7 @@ public sealed class FirstSignalServiceTests
|
||||
snapshotRepo,
|
||||
runRepository: new FakeRunRepository(null),
|
||||
jobRepository: new FakeJobRepository(),
|
||||
failureSignatureLookupClient: new NullFailureSignatureLookupClient(),
|
||||
timeProvider: TimeProvider.System,
|
||||
ttfsMetrics: ttfs,
|
||||
options: Options.Create(new FirstSignalOptions()),
|
||||
@@ -290,6 +297,142 @@ public sealed class FirstSignalServiceTests
|
||||
Assert.True(second.CacheHit);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetFirstSignalAsync_RunFailed_EnrichesLastKnownOutcome_WhenFailureSignatureAvailable()
|
||||
{
|
||||
var runId = Guid.NewGuid();
|
||||
var jobId = Guid.NewGuid();
|
||||
var now = new DateTimeOffset(2025, 12, 18, 12, 0, 0, TimeSpan.Zero);
|
||||
|
||||
var run = new Run(
|
||||
RunId: runId,
|
||||
TenantId: TenantId,
|
||||
ProjectId: null,
|
||||
SourceId: Guid.NewGuid(),
|
||||
RunType: "scan",
|
||||
Status: RunStatus.Failed,
|
||||
CorrelationId: "corr-ttfs",
|
||||
TotalJobs: 1,
|
||||
CompletedJobs: 1,
|
||||
SucceededJobs: 0,
|
||||
FailedJobs: 1,
|
||||
CreatedAt: now,
|
||||
StartedAt: now.AddSeconds(5),
|
||||
CompletedAt: now.AddMinutes(1),
|
||||
CreatedBy: "system",
|
||||
Metadata: null);
|
||||
|
||||
var jobPayload = """{"repository":"acme/repo","scannerVersion":"1.2.3","runtimeVersion":"7.0.0"}""";
|
||||
|
||||
var job = new Job(
|
||||
JobId: jobId,
|
||||
TenantId: TenantId,
|
||||
ProjectId: null,
|
||||
RunId: runId,
|
||||
JobType: "scan.image",
|
||||
Status: JobStatus.Failed,
|
||||
Priority: 0,
|
||||
Attempt: 1,
|
||||
MaxAttempts: 1,
|
||||
PayloadDigest: new string('b', 64),
|
||||
Payload: jobPayload,
|
||||
IdempotencyKey: "idem-ttfs",
|
||||
CorrelationId: null,
|
||||
LeaseId: Guid.NewGuid(),
|
||||
WorkerId: "worker-1",
|
||||
TaskRunnerId: null,
|
||||
LeaseUntil: null,
|
||||
CreatedAt: now,
|
||||
ScheduledAt: now,
|
||||
LeasedAt: now.AddSeconds(10),
|
||||
CompletedAt: now.AddMinutes(1),
|
||||
NotBefore: null,
|
||||
Reason: "failed",
|
||||
ReplayOf: null,
|
||||
CreatedBy: "system");
|
||||
|
||||
var expectedHashMaterial = $"{job.JobType}|1.2.3|7.0.0";
|
||||
var expectedHash = SHA256.HashData(Encoding.UTF8.GetBytes(expectedHashMaterial));
|
||||
var expectedToolchainHash = Convert.ToHexStringLower(expectedHash.AsSpan(0, 8));
|
||||
|
||||
var outcome = new LastKnownOutcome
|
||||
{
|
||||
SignatureId = "sig-1",
|
||||
ErrorCode = "E123",
|
||||
Token = "E123",
|
||||
Excerpt = null,
|
||||
Confidence = "high",
|
||||
FirstSeenAt = now.AddDays(-2),
|
||||
HitCount = 7
|
||||
};
|
||||
|
||||
var failureSignatures = new CapturingFailureSignatureLookupClient(outcome);
|
||||
|
||||
using var ttfs = new TimeToFirstSignalMetrics();
|
||||
var service = new FirstSignalService(
|
||||
cache: new FakeFirstSignalCache(),
|
||||
snapshotRepository: new FakeFirstSignalSnapshotRepository(),
|
||||
runRepository: new FakeRunRepository(run),
|
||||
jobRepository: new FakeJobRepository(job),
|
||||
failureSignatureLookupClient: failureSignatures,
|
||||
timeProvider: TimeProvider.System,
|
||||
ttfsMetrics: ttfs,
|
||||
options: Options.Create(new FirstSignalOptions
|
||||
{
|
||||
FailureSignatures = new FirstSignalFailureSignatureOptions { Enabled = true }
|
||||
}),
|
||||
logger: NullLogger<FirstSignalService>.Instance);
|
||||
|
||||
var result = await service.GetFirstSignalAsync(runId, TenantId);
|
||||
Assert.Equal(StellaOps.Orchestrator.Core.Services.FirstSignalResultStatus.Found, result.Status);
|
||||
Assert.Equal("failure_index", result.Source);
|
||||
Assert.NotNull(result.Signal);
|
||||
Assert.Equal(FirstSignalKind.Failed, result.Signal!.Kind);
|
||||
Assert.Equal("failure_index", result.Signal.Diagnostics.Source);
|
||||
Assert.NotNull(result.Signal.LastKnownOutcome);
|
||||
Assert.Equal("sig-1", result.Signal.LastKnownOutcome!.SignatureId);
|
||||
|
||||
Assert.NotNull(failureSignatures.LastRequest);
|
||||
Assert.Equal(TenantId, failureSignatures.LastRequest!.Value.TenantId);
|
||||
Assert.Equal("repo", failureSignatures.LastRequest!.Value.ScopeType);
|
||||
Assert.Equal("acme/repo", failureSignatures.LastRequest!.Value.ScopeId);
|
||||
Assert.Equal(expectedToolchainHash, failureSignatures.LastRequest!.Value.ToolchainHash);
|
||||
}
|
||||
|
||||
private sealed class NullFailureSignatureLookupClient : IFailureSignatureLookupClient
|
||||
{
|
||||
public Task<LastKnownOutcome?> TryGetLastKnownOutcomeAsync(
|
||||
string tenantId,
|
||||
string scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
CancellationToken cancellationToken = default) =>
|
||||
Task.FromResult<LastKnownOutcome?>(null);
|
||||
}
|
||||
|
||||
private sealed class CapturingFailureSignatureLookupClient : IFailureSignatureLookupClient
|
||||
{
|
||||
private readonly LastKnownOutcome _outcome;
|
||||
|
||||
public CapturingFailureSignatureLookupClient(LastKnownOutcome outcome)
|
||||
{
|
||||
_outcome = outcome;
|
||||
}
|
||||
|
||||
public (string TenantId, string ScopeType, string ScopeId, string ToolchainHash)? LastRequest { get; private set; }
|
||||
|
||||
public Task<LastKnownOutcome?> TryGetLastKnownOutcomeAsync(
|
||||
string tenantId,
|
||||
string scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
LastRequest = (tenantId, scopeType, scopeId, toolchainHash);
|
||||
return Task.FromResult<LastKnownOutcome?>(_outcome);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class FakeFirstSignalCache : IFirstSignalCache
|
||||
{
|
||||
private readonly Dictionary<(string TenantId, Guid RunId), FirstSignalCacheEntry> _entries = new();
|
||||
|
||||
@@ -18,6 +18,7 @@ public sealed record FirstSignalDto
|
||||
public required string Message { get; init; }
|
||||
public required DateTimeOffset At { get; init; }
|
||||
public FirstSignalArtifactDto? Artifact { get; init; }
|
||||
public FirstSignalLastKnownOutcomeDto? LastKnownOutcome { get; init; }
|
||||
}
|
||||
|
||||
public sealed record FirstSignalArtifactDto
|
||||
@@ -26,6 +27,17 @@ public sealed record FirstSignalArtifactDto
|
||||
public FirstSignalRangeDto? Range { get; init; }
|
||||
}
|
||||
|
||||
public sealed record FirstSignalLastKnownOutcomeDto
|
||||
{
|
||||
public required string SignatureId { get; init; }
|
||||
public string? ErrorCode { get; init; }
|
||||
public required string Token { get; init; }
|
||||
public string? Excerpt { get; init; }
|
||||
public required string Confidence { get; init; }
|
||||
public required DateTimeOffset FirstSeenAt { get; init; }
|
||||
public required int HitCount { get; init; }
|
||||
}
|
||||
|
||||
public sealed record FirstSignalRangeDto
|
||||
{
|
||||
public required int Start { get; init; }
|
||||
|
||||
@@ -97,7 +97,19 @@ public static class FirstSignalEndpoints
|
||||
{
|
||||
Kind = signal.Scope.Type,
|
||||
Range = null
|
||||
}
|
||||
},
|
||||
LastKnownOutcome = signal.LastKnownOutcome is null
|
||||
? null
|
||||
: new FirstSignalLastKnownOutcomeDto
|
||||
{
|
||||
SignatureId = signal.LastKnownOutcome.SignatureId,
|
||||
ErrorCode = signal.LastKnownOutcome.ErrorCode,
|
||||
Token = signal.LastKnownOutcome.Token,
|
||||
Excerpt = signal.LastKnownOutcome.Excerpt,
|
||||
Confidence = signal.LastKnownOutcome.Confidence,
|
||||
FirstSeenAt = signal.LastKnownOutcome.FirstSeenAt,
|
||||
HitCount = signal.LastKnownOutcome.HitCount
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -31,3 +31,13 @@ Status mirror for `docs/implplan/SPRINT_0339_0001_0001_first_signal_api.md`. Upd
|
||||
| 1 | ORCH-TTFS-0339-001 | DONE | First signal API delivered (service/repo/cache/endpoint/ETag/SSE/tests/docs). |
|
||||
|
||||
Last synced: 2025-12-15 (UTC).
|
||||
|
||||
## SPRINT_0341_0001_0001 TTFS Enhancements
|
||||
|
||||
Status mirror for `docs/implplan/SPRINT_0341_0001_0001_ttfs_enhancements.md`. Update alongside the sprint file to avoid drift.
|
||||
|
||||
| # | Task ID | Status | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| 1 | TTFS-T4 | DONE | Enrich FirstSignal with best-effort failure signature lookup via Scheduler WebService; surfaces `lastKnownOutcome` in API response. |
|
||||
|
||||
Last synced: 2025-12-18 (UTC).
|
||||
|
||||
Reference in New Issue
Block a user