fix(infra): resolve fresh-build DB schema gaps, Kerberos warnings, and Dockerfile syntax

- Workflow: add PostgreSQL auto-migration (8 tables in schema `workflow`)
  with AddStartupMigrations wiring and embedded SQL migration
- Scheduler: add missing `schema_version` and `source` columns to
  `scheduler.schedules` table in both init script and migration
- Platform: delay analytics maintenance 15s to avoid race with migration
  020_AnalyticsRollups creating compute_daily_rollups()
- Docker: install libgssapi-krb5-2 in runtime image to eliminate Npgsql
  Kerberos probe warnings across all 59 services
- Docker: remove `# syntax=docker/dockerfile:1.7` directive from both
  Dockerfiles to avoid BuildKit frontend pull failures on flaky DNS
- Postgres init: add `workflow` schema to 01-create-schemas.sql

Verified: 75 containers, 0 unhealthy, 0 recurring errors after full
wipe-and-rebuild cycle.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
master
2026-04-08 11:40:08 +03:00
parent afbedf1c60
commit 53f294400f
10 changed files with 227 additions and 4 deletions

View File

@@ -15,3 +15,4 @@ CREATE SCHEMA IF NOT EXISTS issuer_directory;
CREATE SCHEMA IF NOT EXISTS analytics;
CREATE SCHEMA IF NOT EXISTS scheduler_app;
CREATE SCHEMA IF NOT EXISTS findings_ledger_app;
CREATE SCHEMA IF NOT EXISTS workflow;

View File

@@ -229,7 +229,9 @@ CREATE TABLE IF NOT EXISTS scheduler.schedules (
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_by TEXT NOT NULL,
deleted_at TIMESTAMPTZ,
deleted_by TEXT
deleted_by TEXT,
schema_version TEXT,
source TEXT NOT NULL DEFAULT 'user'
);
CREATE INDEX IF NOT EXISTS idx_schedules_tenant ON scheduler.schedules(tenant_id) WHERE deleted_at IS NULL;

View File

@@ -1,4 +1,3 @@
# syntax=docker/dockerfile:1.7
# Multi-stage Angular console image with non-root runtime (DOCKER-44-001)
ARG NODE_IMAGE=node:20-bookworm-slim
ARG NGINX_IMAGE=nginxinc/nginx-unprivileged:1.27-alpine

View File

@@ -1,4 +1,3 @@
# syntax=docker/dockerfile:1.7
# Hardened multi-stage template for StellaOps services
# Parameters are build-time ARGs so this file can be re-used across services.
@@ -34,6 +33,12 @@ ARG APP_GID=10001
ARG APP_PORT=8080
ARG APP_BINARY=StellaOps.Service
ARG PUBLISH_DIR=/app/publish
# Install GSSAPI/Kerberos client library required by Npgsql for GSSAPI-capable
# PostgreSQL connections. Even when password auth is used, Npgsql probes for this
# library on startup. Sovereign deployments may enable Kerberos-protected Postgres.
RUN apt-get update && \
apt-get install -y --no-install-recommends libgssapi-krb5-2 && \
rm -rf /var/lib/apt/lists/*
# Create non-root user/group with stable ids for auditability
RUN groupadd -r -g ${APP_GID} ${APP_USER} && \
useradd -r -u ${APP_UID} -g ${APP_GID} -d /var/lib/${APP_USER} ${APP_USER} && \

View File

@@ -252,7 +252,9 @@ CREATE TABLE IF NOT EXISTS scheduler.schedules (
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_by TEXT NOT NULL,
deleted_at TIMESTAMPTZ,
deleted_by TEXT
deleted_by TEXT,
schema_version TEXT,
source TEXT NOT NULL DEFAULT 'user'
);
CREATE INDEX IF NOT EXISTS idx_schedules_tenant ON scheduler.schedules(tenant_id) WHERE deleted_at IS NULL;

View File

@@ -0,0 +1,6 @@
-- Add missing schema_version column to schedules table.
-- The runs and policy_run_jobs tables already have this column;
-- schedules was inadvertently omitted.
ALTER TABLE scheduler.schedules
ADD COLUMN IF NOT EXISTS schema_version TEXT;

View File

@@ -50,6 +50,18 @@ public sealed class PlatformAnalyticsMaintenanceService : BackgroundService
return;
}
// Allow time for startup migrations to complete before first run.
// Migration 020_AnalyticsRollups creates the compute_daily_rollups function;
// running before that migration completes produces a harmless but noisy error.
try
{
await Task.Delay(TimeSpan.FromSeconds(15), stoppingToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
return;
}
if (options.RunOnStartup)
{
await RunMaintenanceAsync(stoppingToken).ConfigureAwait(false);

View File

@@ -0,0 +1,169 @@
-- Workflow PostgreSQL Schema: Initial bootstrap
-- Creates all tables required by the PostgreSQL workflow data store.
-- Schema name is configurable; defaults to 'workflow' in compose deployments.
CREATE SCHEMA IF NOT EXISTS workflow;
-- Workflow instances (top-level workflow executions)
CREATE TABLE IF NOT EXISTS workflow.wf_instances (
workflow_instance_id TEXT PRIMARY KEY,
workflow_name TEXT NOT NULL,
workflow_version TEXT NOT NULL,
business_reference_key TEXT NULL,
business_reference_json JSONB NULL,
status TEXT NOT NULL,
state_json JSONB NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL,
completed_on_utc TIMESTAMPTZ NULL,
stale_after_utc TIMESTAMPTZ NULL,
purge_after_utc TIMESTAMPTZ NULL
);
CREATE INDEX IF NOT EXISTS wf_instances_wf_ix
ON workflow.wf_instances (workflow_name, workflow_version);
CREATE INDEX IF NOT EXISTS wf_instances_business_ix
ON workflow.wf_instances (business_reference_key);
CREATE INDEX IF NOT EXISTS wf_instances_status_ix
ON workflow.wf_instances (status);
CREATE INDEX IF NOT EXISTS wf_instances_purge_ix
ON workflow.wf_instances (purge_after_utc);
CREATE INDEX IF NOT EXISTS wf_instances_stale_ix
ON workflow.wf_instances (stale_after_utc);
-- Workflow tasks
CREATE TABLE IF NOT EXISTS workflow.wf_tasks (
workflow_task_id TEXT PRIMARY KEY,
workflow_instance_id TEXT NOT NULL REFERENCES workflow.wf_instances (workflow_instance_id) ON DELETE CASCADE,
workflow_name TEXT NOT NULL,
workflow_version TEXT NOT NULL,
task_name TEXT NOT NULL,
task_type TEXT NOT NULL,
route TEXT NOT NULL,
business_reference_key TEXT NULL,
business_reference_json JSONB NULL,
assignee TEXT NULL,
status TEXT NOT NULL,
workflow_roles_json JSONB NOT NULL,
task_roles_json JSONB NOT NULL,
runtime_roles_json JSONB NOT NULL,
effective_roles_json JSONB NOT NULL,
payload_json JSONB NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL,
completed_on_utc TIMESTAMPTZ NULL,
stale_after_utc TIMESTAMPTZ NULL,
purge_after_utc TIMESTAMPTZ NULL
);
CREATE INDEX IF NOT EXISTS wf_tasks_instance_status_ix
ON workflow.wf_tasks (workflow_instance_id, status);
CREATE INDEX IF NOT EXISTS wf_tasks_wf_ix
ON workflow.wf_tasks (workflow_name, workflow_version);
CREATE INDEX IF NOT EXISTS wf_tasks_business_ix
ON workflow.wf_tasks (business_reference_key);
CREATE INDEX IF NOT EXISTS wf_tasks_assignee_status_ix
ON workflow.wf_tasks (assignee, status);
CREATE INDEX IF NOT EXISTS wf_tasks_purge_ix
ON workflow.wf_tasks (purge_after_utc);
CREATE INDEX IF NOT EXISTS wf_tasks_stale_ix
ON workflow.wf_tasks (stale_after_utc);
-- Task audit events
CREATE TABLE IF NOT EXISTS workflow.wf_task_events (
task_event_id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
workflow_task_id TEXT NOT NULL REFERENCES workflow.wf_tasks (workflow_task_id) ON DELETE CASCADE,
event_type TEXT NOT NULL,
actor_id TEXT NULL,
payload_json JSONB NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL
);
CREATE INDEX IF NOT EXISTS wf_task_events_task_time_ix
ON workflow.wf_task_events (workflow_task_id, created_on_utc);
-- Runtime state tracking
CREATE TABLE IF NOT EXISTS workflow.wf_runtime_states (
workflow_instance_id TEXT PRIMARY KEY,
workflow_name TEXT NOT NULL,
workflow_version TEXT NOT NULL,
version_no BIGINT NOT NULL,
business_reference_key TEXT NULL,
business_reference_json TEXT NULL,
runtime_provider TEXT NOT NULL,
runtime_instance_id TEXT NOT NULL,
runtime_status TEXT NOT NULL,
state_json TEXT NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL,
completed_on_utc TIMESTAMPTZ NULL,
stale_after_utc TIMESTAMPTZ NULL,
purge_after_utc TIMESTAMPTZ NULL,
last_updated_on_utc TIMESTAMPTZ NOT NULL
);
-- Distributed host locks
CREATE TABLE IF NOT EXISTS workflow.wf_host_locks (
lock_name TEXT PRIMARY KEY,
lock_owner TEXT NOT NULL,
acquired_on_utc TIMESTAMPTZ NOT NULL,
expires_on_utc TIMESTAMPTZ NOT NULL
);
-- Signal queue (workflow signal delivery)
CREATE TABLE IF NOT EXISTS workflow.wf_signal_queue (
signal_id TEXT PRIMARY KEY,
workflow_instance_id TEXT NOT NULL,
runtime_provider TEXT NOT NULL,
signal_type TEXT NOT NULL,
expected_version BIGINT NOT NULL,
waiting_token TEXT NULL,
occurred_at_utc TIMESTAMPTZ NOT NULL,
due_at_utc TIMESTAMPTZ NULL,
payload_json JSONB NOT NULL,
delivery_count INTEGER NOT NULL DEFAULT 0,
enqueued_on_utc TIMESTAMPTZ NOT NULL DEFAULT now(),
claimed_by TEXT NULL,
claimed_until_utc TIMESTAMPTZ NULL,
last_error TEXT NULL
);
CREATE INDEX IF NOT EXISTS wf_signal_queue_ready_ix
ON workflow.wf_signal_queue (due_at_utc, claimed_until_utc);
CREATE INDEX IF NOT EXISTS wf_signal_queue_instance_ix
ON workflow.wf_signal_queue (workflow_instance_id);
-- Signal dead letters
CREATE TABLE IF NOT EXISTS workflow.wf_signal_dead_letters (
signal_id TEXT PRIMARY KEY,
workflow_instance_id TEXT NOT NULL,
runtime_provider TEXT NOT NULL,
signal_type TEXT NOT NULL,
expected_version BIGINT NOT NULL,
waiting_token TEXT NULL,
occurred_at_utc TIMESTAMPTZ NOT NULL,
due_at_utc TIMESTAMPTZ NULL,
payload_json JSONB NOT NULL,
delivery_count INTEGER NOT NULL DEFAULT 0,
enqueued_on_utc TIMESTAMPTZ NOT NULL,
last_error TEXT NULL,
dead_lettered_on_utc TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS wf_signal_dead_letters_instance_ix
ON workflow.wf_signal_dead_letters (workflow_instance_id);
CREATE INDEX IF NOT EXISTS wf_signal_dead_letters_type_ix
ON workflow.wf_signal_dead_letters (signal_type);
-- Signal wake outbox
CREATE TABLE IF NOT EXISTS workflow.wf_signal_wake_outbox (
outbox_id TEXT PRIMARY KEY,
signal_id TEXT NOT NULL,
workflow_instance_id TEXT NOT NULL,
runtime_provider TEXT NOT NULL,
signal_type TEXT NOT NULL,
due_at_utc TIMESTAMPTZ NULL,
created_on_utc TIMESTAMPTZ NOT NULL DEFAULT now(),
claimed_by TEXT NULL,
claimed_until_utc TIMESTAMPTZ NULL
);
CREATE INDEX IF NOT EXISTS wf_signal_wake_outbox_claim_ix
ON workflow.wf_signal_wake_outbox (claimed_until_utc, created_on_utc);

View File

@@ -1,6 +1,8 @@
using System;
using StellaOps.Workflow.Abstractions;
using StellaOps.Infrastructure.Postgres.Migrations;
using StellaOps.Infrastructure.Postgres.Options;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@@ -28,6 +30,24 @@ public static class PostgresWorkflowDataStoreExtensions
StringComparison.OrdinalIgnoreCase);
services.Configure<PostgresWorkflowBackendOptions>(configuration.GetSection(PostgresWorkflowBackendOptions.SectionName));
// Resolve connection string and schema for auto-migration
var section = configuration.GetSection(PostgresWorkflowBackendOptions.SectionName);
var connStringName = section["ConnectionStringName"] ?? "WorkflowPostgres";
var schemaName = section["SchemaName"] ?? "srd_wfklw";
var connectionString = configuration.GetConnectionString(connStringName)
?? configuration.GetConnectionString("Default")
?? throw new InvalidOperationException(
$"Workflow auto-migration requires connection string '{connStringName}' or 'Default'.");
services.Configure<PostgresOptions>(opts =>
opts.ConnectionString = connectionString);
services.AddStartupMigrations<PostgresOptions>(
schemaName,
"Workflow.DataStore.PostgreSQL",
typeof(PostgresWorkflowDataStoreExtensions).Assembly,
opts => opts.ConnectionString);
services.AddScoped<PostgresWorkflowMutationSessionAccessor>();
services.AddScoped<PostgresWorkflowDatabase>();
services.AddScoped<PostgresWorkflowSignalStore>();

View File

@@ -6,11 +6,18 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<EmbeddedResource Include="Migrations\**\*.sql"
LogicalName="%(RecursiveDir)%(Filename)%(Extension)" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Npgsql" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.1" />
<ProjectReference Include="..\StellaOps.Workflow.Abstractions\StellaOps.Workflow.Abstractions.csproj" />
<ProjectReference Include="..\StellaOps.Workflow.Contracts\StellaOps.Workflow.Contracts.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>