From 6b7168ca3c7e4fc12630cf4e06abb2328f1a7bf0 Mon Sep 17 00:00:00 2001 From: master <> Date: Tue, 10 Mar 2026 01:37:02 +0200 Subject: [PATCH] Bind startup migrations to module schema search path --- ...atform_rebuild_runtime_contract_repairs.md | 75 +++++++++++++++++++ .../Migrations/StartupMigrationHost.cs | 30 ++++++-- ...tartupMigrationHostTests.Helpers.Schema.cs | 23 ++++++ .../StartupMigrationHostTests.SearchPath.cs | 53 +++++++++++++ 4 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 docs/implplan/SPRINT_20260309_006_Platform_rebuild_runtime_contract_repairs.md create mode 100644 src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.SearchPath.cs diff --git a/docs/implplan/SPRINT_20260309_006_Platform_rebuild_runtime_contract_repairs.md b/docs/implplan/SPRINT_20260309_006_Platform_rebuild_runtime_contract_repairs.md new file mode 100644 index 000000000..f6b6bf6ea --- /dev/null +++ b/docs/implplan/SPRINT_20260309_006_Platform_rebuild_runtime_contract_repairs.md @@ -0,0 +1,75 @@ +# Sprint 20260309-006 - Platform Rebuild Runtime Contract Repairs + +## Topic & Scope +- Repair the post-rebuild JobEngine startup failure where shared startup migrations execute unqualified SQL on PostgreSQL's default search path instead of the module schema. +- Repair the gateway container binding drift where compose publishes explicit HTTP ports but the gateway container only comes up on the opportunistic HTTPS listener. +- Keep this iteration limited to runtime contract correctness exposed by the full-stack rebuild before the next Playwright sweep. +- Working directory: `src/__Libraries/StellaOps.Infrastructure.Postgres`. +- Allowed coordination edits: `src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/**`, `src/Router/StellaOps.Gateway.WebService/**`, `src/Router/__Tests/StellaOps.Gateway.WebService.Tests/**`, `docs/modules/jobengine/architecture.md`, `docs/modules/router/architecture.md`, `docs/implplan/SPRINT_20260309_006_Platform_rebuild_runtime_contract_repairs.md`. +- Expected evidence: focused Postgres migration-host integration tests, focused gateway binding tests, rebuilt/redeployed affected services, and direct live probes before returning to Playwright. + +## Dependencies & Concurrency +- Depends on `SPRINT_20260309_001_Platform_scratch_setup_bootstrap_restore.md` for the rebuilt stack baseline and `SPRINT_20260309_005_JobEngine_live_scratch_reset_and_ops_scope_repair.md` for the previously restored JobEngine startup-migration registration. +- Safe parallelism: avoid the dirty JobEngine migration SQL files already being edited elsewhere; this pass must solve the current failure in shared runtime code and gateway container binding only. + +## Documentation Prerequisites +- `AGENTS.md` +- `docs/code-of-conduct/CODE_OF_CONDUCT.md` +- `docs/qa/feature-checks/FLOW.md` +- `docs/modules/jobengine/architecture.md` +- `docs/modules/router/architecture.md` + +## Delivery Tracker + +### PLATFORM-RUNTIME-006-001 - Bind startup migrations to the module schema +Status: DOING +Dependency: none +Owners: Developer, Test Automation +Task description: +- Align `StartupMigrationHost` with the shared migration runner so startup-applied SQL executes with the target module schema on PostgreSQL's search path. +- Prove the fix with focused integration coverage that reproduces a collision in `public` while asserting the startup host still creates and mutates schema-local objects. + +Completion criteria: +- [ ] `StartupMigrationHost` sets the search path to the module schema before applying embedded SQL. +- [ ] Focused shared-library integration tests fail without the fix and pass with it. +- [ ] JobEngine no longer crashes on startup because `001_initial.sql` is applied against `orchestrator`, not `public`. + +### PLATFORM-RUNTIME-006-002 - Honor compose HTTP bindings in gateway container mode +Status: TODO +Dependency: PLATFORM-RUNTIME-006-001 +Owners: Developer, Test Automation +Task description: +- Repair the container binding helper in `StellaOps.Gateway.WebService` so explicit compose HTTP/HTTPS bindings are actually listened on when the gateway runs in-container with local certificates enabled. +- Add focused gateway tests for the binding-resolution logic instead of relying on manual container inspection only. + +Completion criteria: +- [ ] The gateway container binding helper derives listeners from `ASPNETCORE_URLS` and/or port env vars when present. +- [ ] Focused gateway tests cover explicit URL and port-env resolution. +- [ ] The live gateway container exposes the expected HTTP listener and reaches healthy state after redeploy. + +### PLATFORM-RUNTIME-006-003 - Redeploy and reverify the repaired stack slice +Status: TODO +Dependency: PLATFORM-RUNTIME-006-002 +Owners: QA +Task description: +- Rebuild the affected runtime images, redeploy the live compose slice, verify container health and direct service reachability, then resume the browser-based defect sweep from a clean baseline. + +Completion criteria: +- [ ] Updated runtime images are rebuilt and redeployed without touching unrelated dirty work. +- [ ] `jobengine` and `gateway` both reach healthy running state in the live compose stack. +- [ ] Direct probes for JobEngine summary and gateway frontdoor reachability succeed before the next Playwright pass. + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2026-03-09 | Sprint created after the full rebuild exposed two runtime contract failures: JobEngine startup migrations were executing unqualified SQL on the default PostgreSQL search path, and the gateway container was not honoring explicit compose HTTP bindings. | Developer | + +## Decisions & Risks +- Decision: fix the JobEngine failure in the shared startup migration host instead of rewriting active module SQL files owned by another agent. The runtime contract is that startup migrations target the declared module schema. +- Decision: repair gateway listener resolution in code instead of papering over the symptom with a healthcheck-only change. The compose contract already declares the intended container ports. +- Risk: the gateway still force-redirects browser traffic to HTTPS; after the binding repair, live route checks must confirm this does not break any frontdoor route that still proxies through the gateway service. + +## Next Checkpoints +- 2026-03-09: land shared startup-migration search-path fix with focused tests. +- 2026-03-09: land gateway container binding fix with focused tests. +- 2026-03-09: rebuild/redeploy affected services and resume Playwright verification. diff --git a/src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/StartupMigrationHost.cs b/src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/StartupMigrationHost.cs index 69f600ef7..ea7c21f2b 100644 --- a/src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/StartupMigrationHost.cs +++ b/src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/StartupMigrationHost.cs @@ -91,6 +91,7 @@ public abstract class StartupMigrationHost : IHostedService { // Step 2: Ensure schema and migrations table exist await EnsureSchemaAsync(connection, cancellationToken).ConfigureAwait(false); + await SetSearchPathAsync(connection, cancellationToken).ConfigureAwait(false); await EnsureMigrationsTableAsync(connection, cancellationToken).ConfigureAwait(false); // Step 3: Load and categorize migrations @@ -237,17 +238,28 @@ public abstract class StartupMigrationHost : IHostedService private async Task EnsureSchemaAsync(NpgsqlConnection connection, CancellationToken cancellationToken) { + var quotedSchema = QuoteIdentifier(_schemaName); await using var command = new NpgsqlCommand( - $"CREATE SCHEMA IF NOT EXISTS {_schemaName}", + $"CREATE SCHEMA IF NOT EXISTS {quotedSchema}", + connection); + await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private async Task SetSearchPathAsync(NpgsqlConnection connection, CancellationToken cancellationToken) + { + var quotedSchema = QuoteIdentifier(_schemaName); + await using var command = new NpgsqlCommand( + $"SET search_path TO {quotedSchema}, public", connection); await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); } private async Task EnsureMigrationsTableAsync(NpgsqlConnection connection, CancellationToken cancellationToken) { + var quotedSchema = QuoteIdentifier(_schemaName); await using var command = new NpgsqlCommand( $""" - CREATE TABLE IF NOT EXISTS {_schemaName}.schema_migrations ( + CREATE TABLE IF NOT EXISTS {quotedSchema}.schema_migrations ( migration_name TEXT PRIMARY KEY, category TEXT NOT NULL DEFAULT 'startup', checksum TEXT NOT NULL, @@ -258,7 +270,7 @@ public abstract class StartupMigrationHost : IHostedService ); CREATE INDEX IF NOT EXISTS idx_schema_migrations_applied_at - ON {_schemaName}.schema_migrations(applied_at DESC); + ON {quotedSchema}.schema_migrations(applied_at DESC); """, connection); await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); @@ -269,9 +281,10 @@ public abstract class StartupMigrationHost : IHostedService CancellationToken cancellationToken) { var result = new Dictionary(StringComparer.Ordinal); + var quotedSchema = QuoteIdentifier(_schemaName); await using var command = new NpgsqlCommand( - $"SELECT migration_name, category, checksum, applied_at FROM {_schemaName}.schema_migrations", + $"SELECT migration_name, category, checksum, applied_at FROM {quotedSchema}.schema_migrations", connection); await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); @@ -350,6 +363,7 @@ public abstract class StartupMigrationHost : IHostedService migration.Name, migration.Category); var sw = Stopwatch.StartNew(); + var quotedSchema = QuoteIdentifier(_schemaName); await using var transaction = await connection.BeginTransactionAsync(cancellationToken) .ConfigureAwait(false); @@ -366,7 +380,7 @@ public abstract class StartupMigrationHost : IHostedService // Record migration await using (var recordCommand = new NpgsqlCommand( $""" - INSERT INTO {_schemaName}.schema_migrations + INSERT INTO {quotedSchema}.schema_migrations (migration_name, category, checksum, duration_ms, applied_by) VALUES (@name, @category, @checksum, @duration, @applied_by) ON CONFLICT (migration_name) DO NOTHING @@ -434,6 +448,12 @@ public abstract class StartupMigrationHost : IHostedService return lastSlash >= 0 ? resourceName[(lastSlash + 1)..] : resourceName; } + private static string QuoteIdentifier(string identifier) + { + var escaped = identifier.Replace("\"", "\"\"", StringComparison.Ordinal); + return $"\"{escaped}\""; + } + private record AppliedMigration(string Name, string Category, string Checksum, DateTimeOffset AppliedAt); private record PendingMigration(string Name, string ResourceName, MigrationCategory Category, string Checksum, string Content); } diff --git a/src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.Helpers.Schema.cs b/src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.Helpers.Schema.cs index 84a2dc133..314bc74b2 100644 --- a/src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.Helpers.Schema.cs +++ b/src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.Helpers.Schema.cs @@ -33,6 +33,29 @@ public sealed partial class StartupMigrationHostTests return result is true; } + private async Task ColumnExistsAsync(string schemaName, string tableName, string columnName) + { + await using var conn = new NpgsqlConnection(ConnectionString); + await conn.OpenAsync(); + + await using var cmd = new NpgsqlCommand( + """ + SELECT EXISTS( + SELECT 1 + FROM information_schema.columns + WHERE table_schema = @schema + AND table_name = @table + AND column_name = @column) + """, + conn); + cmd.Parameters.AddWithValue("schema", schemaName); + cmd.Parameters.AddWithValue("table", tableName); + cmd.Parameters.AddWithValue("column", columnName); + + var result = await cmd.ExecuteScalarAsync(); + return result is true; + } + private async Task CorruptChecksumAsync(string schemaName, string migrationName) { await using var conn = new NpgsqlConnection(ConnectionString); diff --git a/src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.SearchPath.cs b/src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.SearchPath.cs new file mode 100644 index 000000000..9d914a677 --- /dev/null +++ b/src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.SearchPath.cs @@ -0,0 +1,53 @@ +using FluentAssertions; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Migrations; +using Xunit; + +namespace StellaOps.Infrastructure.Postgres.Tests.Migrations; + +public sealed partial class StartupMigrationHostTests +{ + [Fact] + public async Task StartAsync_WithPublicNameCollision_AppliesMigrationsInTargetSchema() + { + // Arrange + var schemaName = $"test_{Guid.NewGuid():N}"[..20]; + var options = new StartupMigrationOptions { FailOnPendingReleaseMigrations = false }; + + await using (var conn = new NpgsqlConnection(ConnectionString)) + { + await conn.OpenAsync(); + await using var seed = new NpgsqlCommand( + """ + DROP TABLE IF EXISTS public.test_table; + CREATE TABLE public.test_table ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ); + """, + conn); + await seed.ExecuteNonQueryAsync(); + } + + try + { + var host = CreateTestHost(schemaName, options: options); + + // Act + await host.StartAsync(CancellationToken.None); + + // Assert + (await TableExistsAsync(schemaName, "test_table")).Should().BeTrue(); + (await ColumnExistsAsync(schemaName, "test_table", "description")).Should().BeTrue(); + (await ColumnExistsAsync("public", "test_table", "description")).Should().BeFalse(); + } + finally + { + await using var conn = new NpgsqlConnection(ConnectionString); + await conn.OpenAsync(); + await using var cleanup = new NpgsqlCommand("DROP TABLE IF EXISTS public.test_table;", conn); + await cleanup.ExecuteNonQueryAsync(); + } + } +}