up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-26 07:47:08 +02:00
parent 56e2f64d07
commit 1c782897f7
184 changed files with 8991 additions and 649 deletions

View File

@@ -0,0 +1,16 @@
using System.Diagnostics.Metrics;
namespace StellaOps.TaskRunner.Core.Execution;
internal static class TaskRunnerTelemetry
{
internal const string MeterName = "stellaops.taskrunner";
internal static readonly Meter Meter = new(MeterName);
internal static readonly Histogram<double> StepDurationMs =
Meter.CreateHistogram<double>("taskrunner.step.duration.ms", unit: "ms");
internal static readonly Counter<long> StepRetryCount =
Meter.CreateCounter<long>("taskrunner.step.retry.count");
internal static readonly UpDownCounter<long> RunningSteps =
Meter.CreateUpDownCounter<long>("taskrunner.steps.running");
}

View File

@@ -13,8 +13,8 @@ public sealed class FilesystemPackRunDispatcher : IPackRunJobDispatcher, IPackRu
private readonly string archivePath;
private readonly TaskPackManifestLoader manifestLoader = new();
private readonly TaskPackPlanner planner;
private readonly JsonSerializerOptions serializerOptions = new(JsonSerializerDefaults.Web);
private readonly JsonSerializerOptions serializerOptions = new(JsonSerializerDefaults.Web);
public FilesystemPackRunDispatcher(string queuePath, string archivePath, IEgressPolicy? egressPolicy = null)
{
this.queuePath = queuePath ?? throw new ArgumentNullException(nameof(queuePath));
@@ -23,6 +23,8 @@ public sealed class FilesystemPackRunDispatcher : IPackRunJobDispatcher, IPackRu
Directory.CreateDirectory(queuePath);
Directory.CreateDirectory(archivePath);
}
public string QueuePath => queuePath;
public async Task<PackRunExecutionContext?> TryDequeueAsync(CancellationToken cancellationToken)
{

View File

@@ -6,21 +6,30 @@ using System.Text.Json;
using System.Text.Json.Nodes;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Options;
using StellaOps.TaskRunner.Core.Execution;
using StellaOps.TaskRunner.Core.Execution.Simulation;
using StellaOps.TaskRunner.Core.Planning;
using StellaOps.TaskRunner.Core.TaskPacks;
using StellaOps.TaskRunner.Infrastructure.Execution;
using StellaOps.TaskRunner.WebService;
using Microsoft.Extensions.Options;
using StellaOps.TaskRunner.Core.Execution;
using StellaOps.TaskRunner.Core.Execution.Simulation;
using StellaOps.TaskRunner.Core.Planning;
using StellaOps.TaskRunner.Core.TaskPacks;
using StellaOps.TaskRunner.Infrastructure.Execution;
using StellaOps.TaskRunner.WebService;
using StellaOps.Telemetry.Core;
var builder = WebApplication.CreateBuilder(args);
builder.Services.Configure<TaskRunnerServiceOptions>(builder.Configuration.GetSection("TaskRunner"));
var builder = WebApplication.CreateBuilder(args);
builder.Services.Configure<TaskRunnerServiceOptions>(builder.Configuration.GetSection("TaskRunner"));
builder.Services.AddSingleton<TaskPackManifestLoader>();
builder.Services.AddSingleton<TaskPackPlanner>();
builder.Services.AddSingleton<PackRunSimulationEngine>();
builder.Services.AddSingleton<PackRunExecutionGraphBuilder>();
builder.Services.AddStellaOpsTelemetry(
builder.Configuration,
serviceName: "StellaOps.TaskRunner.WebService",
configureTracing: tracing => tracing.AddAspNetCoreInstrumentation()
.AddHttpClientInstrumentation(),
configureMetrics: metrics => metrics
.AddRuntimeInstrumentation()
.AddMeter(TaskRunnerTelemetry.MeterName));
var storageOptions = builder.Configuration.GetSection("TaskRunner:Storage").Get<TaskRunnerStorageOptions>() ?? new TaskRunnerStorageOptions();
builder.Services.AddSingleton(storageOptions);

View File

@@ -15,12 +15,12 @@
<ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.0-rc.2.25502.107"/>
<ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.0-rc.2.25502.107"/>
</ItemGroup>
@@ -29,13 +29,14 @@
<ProjectReference Include="..\StellaOps.TaskRunner.Core\StellaOps.TaskRunner.Core.csproj"/>
<ProjectReference Include="..\StellaOps.TaskRunner.Infrastructure\StellaOps.TaskRunner.Infrastructure.csproj"/>
</ItemGroup>
<ProjectReference Include="..\StellaOps.TaskRunner.Infrastructure\StellaOps.TaskRunner.Infrastructure.csproj"/>
<ProjectReference Include="..\..\Telemetry\StellaOps.Telemetry.Core\StellaOps.Telemetry.Core\StellaOps.Telemetry.Core.csproj"/>
</ItemGroup>
</Project>

View File

@@ -6,6 +6,7 @@ using StellaOps.TaskRunner.Core.Execution;
using StellaOps.TaskRunner.Core.Execution.Simulation;
using StellaOps.TaskRunner.Infrastructure.Execution;
using StellaOps.TaskRunner.Worker.Services;
using StellaOps.Telemetry.Core;
var builder = Host.CreateApplicationBuilder(args);
@@ -42,6 +43,13 @@ builder.Services.AddSingleton<IPackRunStepExecutor, NoopPackRunStepExecutor>();
builder.Services.AddSingleton<PackRunExecutionGraphBuilder>();
builder.Services.AddSingleton<PackRunSimulationEngine>();
builder.Services.AddSingleton<PackRunProcessor>();
builder.Services.AddStellaOpsTelemetry(
builder.Configuration,
serviceName: "StellaOps.TaskRunner.Worker",
configureTracing: tracing => tracing.AddHttpClientInstrumentation(),
configureMetrics: metrics => metrics
.AddRuntimeInstrumentation()
.AddMeter(TaskRunnerTelemetry.MeterName));
var workerStorageOptions = builder.Configuration.GetSection("Worker:Storage").Get<TaskRunnerStorageOptions>() ?? new TaskRunnerStorageOptions();
builder.Services.AddSingleton(workerStorageOptions);

View File

@@ -1,11 +1,13 @@
using System.Collections.Concurrent;
using System.Collections.ObjectModel;
using System.Globalization;
using System.Diagnostics;
using System.Diagnostics.Metrics;
using System.Text.Json.Nodes;
using Microsoft.Extensions.Options;
using StellaOps.TaskRunner.Core.Execution;
using StellaOps.TaskRunner.Core.Execution.Simulation;
using StellaOps.TaskRunner.Core.Planning;
using Microsoft.Extensions.Options;
using StellaOps.TaskRunner.Core.Execution;
using StellaOps.TaskRunner.Core.Execution.Simulation;
using StellaOps.TaskRunner.Core.Planning;
namespace StellaOps.TaskRunner.Worker.Services;
@@ -24,6 +26,7 @@ public sealed class PackRunWorkerService : BackgroundService
private readonly IPackRunArtifactUploader artifactUploader;
private readonly IPackRunLogStore logStore;
private readonly ILogger<PackRunWorkerService> logger;
private readonly UpDownCounter<long> runningSteps;
public PackRunWorkerService(
IPackRunJobDispatcher dispatcher,
@@ -47,7 +50,18 @@ public sealed class PackRunWorkerService : BackgroundService
this.logStore = logStore ?? throw new ArgumentNullException(nameof(logStore));
this.options = options?.Value ?? throw new ArgumentNullException(nameof(options));
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
runningSteps = TaskRunnerTelemetry.RunningSteps;
if (dispatcher is FilesystemPackRunDispatcher fsDispatcher)
{
TaskRunnerTelemetry.Meter.CreateObservableGauge<long>(
"taskrunner.queue.depth",
() => new Measurement<long>(
Directory.Exists(fsDispatcher.QueuePath)
? Directory.GetFiles(fsDispatcher.QueuePath, "*.json", SearchOption.TopDirectoryOnly).LongLength
: 0));
}
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
@@ -314,14 +328,14 @@ public sealed class PackRunWorkerService : BackgroundService
}
}
private async Task<StepExecutionOutcome> ExecuteRunStepAsync(
PackRunExecutionStep step,
ExecutionContext executionContext)
{
var record = executionContext.Steps[step.Id];
var now = DateTimeOffset.UtcNow;
var currentState = new PackRunStepState(record.Status, record.Attempts, record.LastTransitionAt, record.NextAttemptAt);
private async Task<StepExecutionOutcome> ExecuteRunStepAsync(
PackRunExecutionStep step,
ExecutionContext executionContext)
{
var record = executionContext.Steps[step.Id];
var now = DateTimeOffset.UtcNow;
var currentState = new PackRunStepState(record.Status, record.Attempts, record.LastTransitionAt, record.NextAttemptAt);
if (currentState.Status == PackRunStepExecutionStatus.Pending)
{
currentState = PackRunStepStateMachine.Start(currentState, now);
@@ -347,7 +361,15 @@ public sealed class PackRunWorkerService : BackgroundService
startMetadata).ConfigureAwait(false);
}
runningSteps.Add(1);
var stopwatch = Stopwatch.StartNew();
var result = await executor.ExecuteAsync(step, step.Parameters ?? PackRunExecutionStep.EmptyParameters, executionContext.CancellationToken).ConfigureAwait(false);
stopwatch.Stop();
TaskRunnerTelemetry.StepDurationMs.Record(
stopwatch.Elapsed.TotalMilliseconds,
new KeyValuePair<string, object?>("step_kind", step.Kind.ToString()));
runningSteps.Add(-1);
if (result.Succeeded)
{
currentState = PackRunStepStateMachine.CompleteSuccess(currentState, DateTimeOffset.UtcNow);
@@ -422,6 +444,7 @@ public sealed class PackRunWorkerService : BackgroundService
if (failure.Outcome == PackRunStepFailureOutcome.Retry)
{
TaskRunnerTelemetry.StepRetryCount.Add(1, new KeyValuePair<string, object?>("step_kind", step.Kind.ToString()));
var retryMetadata = new Dictionary<string, string>(failureMetadata, StringComparer.Ordinal)
{
["outcome"] = "retry"

View File

@@ -32,12 +32,13 @@
<ProjectReference Include="..\StellaOps.TaskRunner.Core\StellaOps.TaskRunner.Core.csproj"/>
<ProjectReference Include="..\StellaOps.TaskRunner.Infrastructure\StellaOps.TaskRunner.Infrastructure.csproj"/>
</ItemGroup>
<ProjectReference Include="..\StellaOps.TaskRunner.Infrastructure\StellaOps.TaskRunner.Infrastructure.csproj"/>
<ProjectReference Include="..\..\Telemetry\StellaOps.Telemetry.Core\StellaOps.Telemetry.Core\StellaOps.Telemetry.Core.csproj"/>
</ItemGroup>
</Project>

View File

@@ -7,14 +7,14 @@
| TASKRUN-AIRGAP-56-002 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-56-001 | Bundle ingestion helpers; depends on 56-001. |
| TASKRUN-AIRGAP-57-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-56-002 | Sealed install enforcement; depends on 56-002. |
| TASKRUN-AIRGAP-58-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-57-001 | Evidence bundles for imports; depends on 57-001. |
| TASKRUN-42-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | — | Execution engine enhancements (loops/conditionals/maxParallel), simulation mode, policy gate integration. |
| TASKRUN-42-001 | BLOCKED (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | — | Execution engine enhancements (loops/conditionals/maxParallel), simulation mode, policy gate integration. Blocked: loop/conditional semantics and policy-gate evaluation contract not published. |
| TASKRUN-OAS-61-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-41-001 | Document APIs; depends on 41-001. |
| TASKRUN-OAS-61-002 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-61-001 | Well-known OpenAPI endpoint; depends on 61-001. |
| TASKRUN-OAS-62-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-61-002 | SDK examples; depends on 61-002. |
| TASKRUN-OAS-63-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-62-001 | Deprecation headers/notifications; depends on 62-001. |
| TASKRUN-OBS-50-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | — | Telemetry core adoption. |
| TASKRUN-OBS-51-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-50-001 | Metrics/SLOs; depends on 50-001. |
| TASKRUN-OBS-52-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-51-001 | Timeline events; depends on 51-001. |
| TASKRUN-OBS-53-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-52-001 | Evidence locker snapshots; depends on 52-001. |
| TASKRUN-OBS-50-001 | DONE (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | — | Telemetry core adoption. |
| TASKRUN-OBS-51-001 | DONE (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-50-001 | Metrics/SLOs; depends on 50-001. |
| TASKRUN-OBS-52-001 | BLOCKED (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-51-001 | Timeline events; blocked: schema/evidence-pointer contract not published. |
| TASKRUN-OBS-53-001 | BLOCKED (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-52-001 | Evidence locker snapshots; blocked: waiting on timeline schema/pointer contract. |
Status source of truth: `docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md`. Update both files together. Keep UTC dates when advancing status.