fix(infra): resolve fresh-build DB schema gaps, Kerberos warnings, and Dockerfile syntax

- Workflow: add PostgreSQL auto-migration (8 tables in schema `workflow`)
  with AddStartupMigrations wiring and embedded SQL migration
- Scheduler: add missing `schema_version` and `source` columns to
  `scheduler.schedules` table in both init script and migration
- Platform: delay analytics maintenance 15s to avoid race with migration
  020_AnalyticsRollups creating compute_daily_rollups()
- Docker: install libgssapi-krb5-2 in runtime image to eliminate Npgsql
  Kerberos probe warnings across all 59 services
- Docker: remove `# syntax=docker/dockerfile:1.7` directive from both
  Dockerfiles to avoid BuildKit frontend pull failures on flaky DNS
- Postgres init: add `workflow` schema to 01-create-schemas.sql

Verified: 75 containers, 0 unhealthy, 0 recurring errors after full
wipe-and-rebuild cycle.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
master
2026-04-08 11:40:08 +03:00
parent afbedf1c60
commit 53f294400f
10 changed files with 227 additions and 4 deletions

View File

@@ -0,0 +1,169 @@
-- Workflow PostgreSQL Schema: Initial bootstrap
-- Creates all tables required by the PostgreSQL workflow data store.
-- Schema name is configurable; defaults to 'workflow' in compose deployments.
CREATE SCHEMA IF NOT EXISTS workflow;
-- Workflow instances (top-level workflow executions)
CREATE TABLE IF NOT EXISTS workflow.wf_instances (
workflow_instance_id TEXT PRIMARY KEY,
workflow_name TEXT NOT NULL,
workflow_version TEXT NOT NULL,
business_reference_key TEXT NULL,
business_reference_json JSONB NULL,
status TEXT NOT NULL,
state_json JSONB NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL,
completed_on_utc TIMESTAMPTZ NULL,
stale_after_utc TIMESTAMPTZ NULL,
purge_after_utc TIMESTAMPTZ NULL
);
CREATE INDEX IF NOT EXISTS wf_instances_wf_ix
ON workflow.wf_instances (workflow_name, workflow_version);
CREATE INDEX IF NOT EXISTS wf_instances_business_ix
ON workflow.wf_instances (business_reference_key);
CREATE INDEX IF NOT EXISTS wf_instances_status_ix
ON workflow.wf_instances (status);
CREATE INDEX IF NOT EXISTS wf_instances_purge_ix
ON workflow.wf_instances (purge_after_utc);
CREATE INDEX IF NOT EXISTS wf_instances_stale_ix
ON workflow.wf_instances (stale_after_utc);
-- Workflow tasks
CREATE TABLE IF NOT EXISTS workflow.wf_tasks (
workflow_task_id TEXT PRIMARY KEY,
workflow_instance_id TEXT NOT NULL REFERENCES workflow.wf_instances (workflow_instance_id) ON DELETE CASCADE,
workflow_name TEXT NOT NULL,
workflow_version TEXT NOT NULL,
task_name TEXT NOT NULL,
task_type TEXT NOT NULL,
route TEXT NOT NULL,
business_reference_key TEXT NULL,
business_reference_json JSONB NULL,
assignee TEXT NULL,
status TEXT NOT NULL,
workflow_roles_json JSONB NOT NULL,
task_roles_json JSONB NOT NULL,
runtime_roles_json JSONB NOT NULL,
effective_roles_json JSONB NOT NULL,
payload_json JSONB NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL,
completed_on_utc TIMESTAMPTZ NULL,
stale_after_utc TIMESTAMPTZ NULL,
purge_after_utc TIMESTAMPTZ NULL
);
CREATE INDEX IF NOT EXISTS wf_tasks_instance_status_ix
ON workflow.wf_tasks (workflow_instance_id, status);
CREATE INDEX IF NOT EXISTS wf_tasks_wf_ix
ON workflow.wf_tasks (workflow_name, workflow_version);
CREATE INDEX IF NOT EXISTS wf_tasks_business_ix
ON workflow.wf_tasks (business_reference_key);
CREATE INDEX IF NOT EXISTS wf_tasks_assignee_status_ix
ON workflow.wf_tasks (assignee, status);
CREATE INDEX IF NOT EXISTS wf_tasks_purge_ix
ON workflow.wf_tasks (purge_after_utc);
CREATE INDEX IF NOT EXISTS wf_tasks_stale_ix
ON workflow.wf_tasks (stale_after_utc);
-- Task audit events
CREATE TABLE IF NOT EXISTS workflow.wf_task_events (
task_event_id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
workflow_task_id TEXT NOT NULL REFERENCES workflow.wf_tasks (workflow_task_id) ON DELETE CASCADE,
event_type TEXT NOT NULL,
actor_id TEXT NULL,
payload_json JSONB NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL
);
CREATE INDEX IF NOT EXISTS wf_task_events_task_time_ix
ON workflow.wf_task_events (workflow_task_id, created_on_utc);
-- Runtime state tracking
CREATE TABLE IF NOT EXISTS workflow.wf_runtime_states (
workflow_instance_id TEXT PRIMARY KEY,
workflow_name TEXT NOT NULL,
workflow_version TEXT NOT NULL,
version_no BIGINT NOT NULL,
business_reference_key TEXT NULL,
business_reference_json TEXT NULL,
runtime_provider TEXT NOT NULL,
runtime_instance_id TEXT NOT NULL,
runtime_status TEXT NOT NULL,
state_json TEXT NOT NULL,
created_on_utc TIMESTAMPTZ NOT NULL,
completed_on_utc TIMESTAMPTZ NULL,
stale_after_utc TIMESTAMPTZ NULL,
purge_after_utc TIMESTAMPTZ NULL,
last_updated_on_utc TIMESTAMPTZ NOT NULL
);
-- Distributed host locks
CREATE TABLE IF NOT EXISTS workflow.wf_host_locks (
lock_name TEXT PRIMARY KEY,
lock_owner TEXT NOT NULL,
acquired_on_utc TIMESTAMPTZ NOT NULL,
expires_on_utc TIMESTAMPTZ NOT NULL
);
-- Signal queue (workflow signal delivery)
CREATE TABLE IF NOT EXISTS workflow.wf_signal_queue (
signal_id TEXT PRIMARY KEY,
workflow_instance_id TEXT NOT NULL,
runtime_provider TEXT NOT NULL,
signal_type TEXT NOT NULL,
expected_version BIGINT NOT NULL,
waiting_token TEXT NULL,
occurred_at_utc TIMESTAMPTZ NOT NULL,
due_at_utc TIMESTAMPTZ NULL,
payload_json JSONB NOT NULL,
delivery_count INTEGER NOT NULL DEFAULT 0,
enqueued_on_utc TIMESTAMPTZ NOT NULL DEFAULT now(),
claimed_by TEXT NULL,
claimed_until_utc TIMESTAMPTZ NULL,
last_error TEXT NULL
);
CREATE INDEX IF NOT EXISTS wf_signal_queue_ready_ix
ON workflow.wf_signal_queue (due_at_utc, claimed_until_utc);
CREATE INDEX IF NOT EXISTS wf_signal_queue_instance_ix
ON workflow.wf_signal_queue (workflow_instance_id);
-- Signal dead letters
CREATE TABLE IF NOT EXISTS workflow.wf_signal_dead_letters (
signal_id TEXT PRIMARY KEY,
workflow_instance_id TEXT NOT NULL,
runtime_provider TEXT NOT NULL,
signal_type TEXT NOT NULL,
expected_version BIGINT NOT NULL,
waiting_token TEXT NULL,
occurred_at_utc TIMESTAMPTZ NOT NULL,
due_at_utc TIMESTAMPTZ NULL,
payload_json JSONB NOT NULL,
delivery_count INTEGER NOT NULL DEFAULT 0,
enqueued_on_utc TIMESTAMPTZ NOT NULL,
last_error TEXT NULL,
dead_lettered_on_utc TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS wf_signal_dead_letters_instance_ix
ON workflow.wf_signal_dead_letters (workflow_instance_id);
CREATE INDEX IF NOT EXISTS wf_signal_dead_letters_type_ix
ON workflow.wf_signal_dead_letters (signal_type);
-- Signal wake outbox
CREATE TABLE IF NOT EXISTS workflow.wf_signal_wake_outbox (
outbox_id TEXT PRIMARY KEY,
signal_id TEXT NOT NULL,
workflow_instance_id TEXT NOT NULL,
runtime_provider TEXT NOT NULL,
signal_type TEXT NOT NULL,
due_at_utc TIMESTAMPTZ NULL,
created_on_utc TIMESTAMPTZ NOT NULL DEFAULT now(),
claimed_by TEXT NULL,
claimed_until_utc TIMESTAMPTZ NULL
);
CREATE INDEX IF NOT EXISTS wf_signal_wake_outbox_claim_ix
ON workflow.wf_signal_wake_outbox (claimed_until_utc, created_on_utc);

View File

@@ -1,6 +1,8 @@
using System;
using StellaOps.Workflow.Abstractions;
using StellaOps.Infrastructure.Postgres.Migrations;
using StellaOps.Infrastructure.Postgres.Options;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@@ -28,6 +30,24 @@ public static class PostgresWorkflowDataStoreExtensions
StringComparison.OrdinalIgnoreCase);
services.Configure<PostgresWorkflowBackendOptions>(configuration.GetSection(PostgresWorkflowBackendOptions.SectionName));
// Resolve connection string and schema for auto-migration
var section = configuration.GetSection(PostgresWorkflowBackendOptions.SectionName);
var connStringName = section["ConnectionStringName"] ?? "WorkflowPostgres";
var schemaName = section["SchemaName"] ?? "srd_wfklw";
var connectionString = configuration.GetConnectionString(connStringName)
?? configuration.GetConnectionString("Default")
?? throw new InvalidOperationException(
$"Workflow auto-migration requires connection string '{connStringName}' or 'Default'.");
services.Configure<PostgresOptions>(opts =>
opts.ConnectionString = connectionString);
services.AddStartupMigrations<PostgresOptions>(
schemaName,
"Workflow.DataStore.PostgreSQL",
typeof(PostgresWorkflowDataStoreExtensions).Assembly,
opts => opts.ConnectionString);
services.AddScoped<PostgresWorkflowMutationSessionAccessor>();
services.AddScoped<PostgresWorkflowDatabase>();
services.AddScoped<PostgresWorkflowSignalStore>();

View File

@@ -6,11 +6,18 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<EmbeddedResource Include="Migrations\**\*.sql"
LogicalName="%(RecursiveDir)%(Filename)%(Extension)" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Npgsql" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.1" />
<ProjectReference Include="..\StellaOps.Workflow.Abstractions\StellaOps.Workflow.Abstractions.csproj" />
<ProjectReference Include="..\StellaOps.Workflow.Contracts\StellaOps.Workflow.Contracts.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>