From 08006100a5b609e53d9b3ff01aa12a180bc3d360 Mon Sep 17 00:00:00 2001 From: master <> Date: Wed, 11 Mar 2026 21:19:25 +0200 Subject: [PATCH] Repair scratch setup preflight for repo-local host processes --- docs/INSTALL_GUIDE.md | 2 +- docs/dev/DEV_ENVIRONMENT_SETUP.md | 5 +- ...010_Platform_scratch_setup_revalidation.md | 71 ++++++++++++++ scripts/build-all-solutions.ps1 | 98 +++++++++++++++++++ scripts/build-all-solutions.sh | 45 ++++++++- scripts/setup.ps1 | 2 +- scripts/setup.sh | 4 +- 7 files changed, 221 insertions(+), 6 deletions(-) create mode 100644 docs/implplan/SPRINT_20260311_010_Platform_scratch_setup_revalidation.md diff --git a/docs/INSTALL_GUIDE.md b/docs/INSTALL_GUIDE.md index 638de817a..5b4065f24 100755 --- a/docs/INSTALL_GUIDE.md +++ b/docs/INSTALL_GUIDE.md @@ -55,7 +55,7 @@ The scripts will: 3. Copy `env/stellaops.env.example` to `.env` if needed (works out of the box) 4. Start infrastructure and wait for healthy containers 5. Create or reuse the external frontdoor Docker network from `.env` (`FRONTDOOR_NETWORK`, default `stellaops_frontdoor`) -6. Build repo-owned .NET solutions, then publish backend services locally into small Docker contexts before building hardened runtime images (vendored dependency trees such as `node_modules` are excluded) +6. Stop repo-local host-run Stella services that would lock build outputs, then build repo-owned .NET solutions and publish backend services locally into small Docker contexts before building hardened runtime images (vendored dependency trees such as `node_modules` are excluded) 7. Launch the full platform with health checks Open **https://stella-ops.local** when setup completes. diff --git a/docs/dev/DEV_ENVIRONMENT_SETUP.md b/docs/dev/DEV_ENVIRONMENT_SETUP.md index c67499675..c943b3cb0 100644 --- a/docs/dev/DEV_ENVIRONMENT_SETUP.md +++ b/docs/dev/DEV_ENVIRONMENT_SETUP.md @@ -29,7 +29,7 @@ Setup scripts validate prerequisites, build solutions and Docker images, and lau ./scripts/setup.sh --images-only # only build Docker images ``` -The scripts will check for required tools (dotnet 10.x, node 20+, npm 10+, docker, git), warn about missing hosts file entries, and copy `.env` from the example if needed. See the manual steps below for details on each stage. +The scripts will check for required tools (dotnet 10.x, node 20+, npm 10+, docker, git), warn about missing hosts file entries, copy `.env` from the example if needed, and stop repo-local host-run Stella services before the solution build so scratch bootstraps do not fail on locked `bin/Debug` outputs. See the manual steps below for details on each stage. On Windows and Linux, the backend image builder now publishes each selected .NET service locally and builds the hardened runtime image from a small temporary context. That avoids repeatedly streaming the whole monorepo into Docker during scratch setup. @@ -195,6 +195,9 @@ dotnet test src\Scanner\StellaOps.Scanner.sln # Windows (PowerShell 7) .\scripts\build-all-solutions.ps1 +# Stop repo-local host-run Stella services first if a prior debug session left binaries locked +.\scripts\build-all-solutions.ps1 -StopRepoHostProcesses + # With tests .\scripts\build-all-solutions.ps1 -Test diff --git a/docs/implplan/SPRINT_20260311_010_Platform_scratch_setup_revalidation.md b/docs/implplan/SPRINT_20260311_010_Platform_scratch_setup_revalidation.md new file mode 100644 index 000000000..6dae5a085 --- /dev/null +++ b/docs/implplan/SPRINT_20260311_010_Platform_scratch_setup_revalidation.md @@ -0,0 +1,71 @@ +# Sprint 20260311_010 - Platform Scratch Setup Revalidation + +## Topic & Scope +- Validate the documented Stella Ops scratch setup path against a fully wiped local Docker state. +- Remove Stella-only containers, images, volumes, and networks, then rerun the repo setup path as a first-time operator would. +- If bootstrap defects surface, triage root cause and fix them cleanly before declaring the setup path healthy. +- Working directory: `.`. +- Expected evidence: scoped Docker wipe, setup-script execution evidence, root-cause notes for any bootstrap failures, and a local commit if code/docs change. + +## Dependencies & Concurrency +- Depends on no other agent actively using the local Stella Docker stack. +- Safe parallelism: none during the wipe/rebuild itself because the environment reset is global. + +## Documentation Prerequisites +- `AGENTS.md` +- `docs/dev/DEV_ENVIRONMENT_SETUP.md` +- `docs/INSTALL_GUIDE.md` +- `docs/implplan/SPRINT_20260309_001_Platform_scratch_setup_bootstrap_restore.md` + +## Delivery Tracker + +### PLATFORM-SCRATCH-001 - Wipe Stella Docker state only +Status: DONE +Dependency: none +Owners: QA, 3rd line support +Task description: +- Tear down the Stella compose stack and remove Stella-specific images, volumes, and networks without touching unrelated Docker assets on the machine. + +Completion criteria: +- [x] Stella compose services are stopped. +- [x] Stella-specific images, volumes, and networks are removed. + +### PLATFORM-SCRATCH-002 - Re-run documented setup from zero state +Status: DONE +Dependency: PLATFORM-SCRATCH-001 +Owners: QA +Task description: +- Run the repo setup path from the documented entrypoint and capture the first blocking failure or a successful end-to-end bootstrap. + +Completion criteria: +- [x] The documented setup command is executed from wiped state. +- [x] The first-run result is captured with concrete evidence. + +### PLATFORM-SCRATCH-003 - Repair any bootstrap regression cleanly +Status: DONE +Dependency: PLATFORM-SCRATCH-002 +Owners: Product Manager, Architect, Developer +Task description: +- If the scratch setup exposes a real bootstrap defect, fix the root cause in scripts/docs/code, then rerun the setup path until the documented flow converges again. + +Completion criteria: +- [x] Any exposed setup regression has a root-cause fix. +- [x] Docs/sprint notes reflect the repaired bootstrap path. +- [x] The repair is committed locally if code/docs changed. + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2026-03-11 | Sprint created to revalidate the Stella scratch setup path after the latest runtime/action sweeps on the warm stack came back clean. | QA | +| 2026-03-11 | Completed a Stella-only Docker wipe and reran `scripts/setup.ps1` from zero state. The first real blocker was not Docker or compose; three leaked repo-local `StellaOps.Graph.Api.exe` processes from earlier host debugging locked `src/Graph/StellaOps.Graph.Api/bin/Debug/net10.0/*` and caused the Graph solution build to fail inside the documented setup path. | QA | +| 2026-03-11 | Root cause classified as a bootstrap-preflight gap, not a Graph runtime defect: the documented setup/build path did nothing to clear repo-local host-run Stella services before rebuilding all module solutions. Added repo-scoped host-process cleanup to the setup/build preflight and aligned the bash path with the same behavior. | Developer | +| 2026-03-11 | Revalidated the repaired bootstrap path on the rebuilt local stack and continued the live route/action sweeps from that clean baseline; the setup contract now converges without manual PID cleanup. | QA | + +## Decisions & Risks +- Decision: keep the wipe scoped to Stella-labeled compose resources and `stellaops/*` images so unrelated local Docker work is not disturbed. +- Risk: scratch rebuilds are long-running by nature; if a bootstrap failure appears, capture the first blocker and fix it before attempting to optimize further. +- Decision: scratch setup now stops only repo-local host-run Stella processes before the solution build, because lingering debug services invalidate the documented bootstrap contract but should not require the operator to hunt PIDs manually. +- Risk: forcibly terminating repo-local host services during setup would be surprising if applied to arbitrary processes, so the cleanup is scoped to commands or executables rooted under this repository and containing `StellaOps.`. + +## Next Checkpoints +- Archived by the follow-on local commit once the scoped setup repair is recorded. diff --git a/scripts/build-all-solutions.ps1 b/scripts/build-all-solutions.ps1 index 9347f571a..21c1f0b89 100644 --- a/scripts/build-all-solutions.ps1 +++ b/scripts/build-all-solutions.ps1 @@ -21,6 +21,7 @@ [CmdletBinding()] param( [switch]$Test, + [switch]$StopRepoHostProcesses, [ValidateSet('Debug', 'Release')] [string]$Configuration = 'Debug' ) @@ -49,6 +50,103 @@ function Get-RepoRelativePath { return $normalizedPath } +function Test-RepoOwnedText { + param( + [Parameter(Mandatory = $true)] + [string]$Root, + [AllowNull()] + [string]$Value + ) + + if ([string]::IsNullOrWhiteSpace($Value)) { + return $false + } + + $normalizedRoot = [System.IO.Path]::GetFullPath($Root).TrimEnd('\', '/') + $normalizedValue = $Value.Replace('/', '\') + + return $normalizedValue.IndexOf($normalizedRoot, [System.StringComparison]::OrdinalIgnoreCase) -ge 0 +} + +function Test-IsWindowsPlatform { + return [System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform( + [System.Runtime.InteropServices.OSPlatform]::Windows) +} + +function Stop-RepoHostProcesses { + param( + [Parameter(Mandatory = $true)] + [string]$Root + ) + + if (-not (Test-IsWindowsPlatform)) { + return + } + + $candidates = @(Get-CimInstance Win32_Process -Filter "Name = 'dotnet.exe' OR Name LIKE 'StellaOps.%'") + $staleProcesses = @() + + foreach ($candidate in $candidates) { + if ($candidate.ProcessId -eq $PID) { + continue + } + + $executablePath = "$($candidate.ExecutablePath)" + $commandLine = "$($candidate.CommandLine)" + $name = "$($candidate.Name)" + + $repoOwned = (Test-RepoOwnedText -Root $Root -Value $executablePath) -or + (Test-RepoOwnedText -Root $Root -Value $commandLine) + if (-not $repoOwned) { + continue + } + + $looksLikeService = $name -like 'StellaOps.*' -or $commandLine -match 'StellaOps\.[A-Za-z0-9_.-]+' + if (-not $looksLikeService) { + continue + } + + $staleProcesses += [pscustomobject]@{ + ProcessId = $candidate.ProcessId + Name = $name + ExecutablePath = $executablePath + CommandLine = $commandLine + } + } + + $staleProcesses = @($staleProcesses | Sort-Object ProcessId -Unique) + if ($staleProcesses.Count -eq 0) { + Write-Host 'No repo-local Stella host processes detected.' -ForegroundColor DarkGray + return + } + + Write-Host "Stopping $($staleProcesses.Count) repo-local Stella host process(es) before build." -ForegroundColor Yellow + + foreach ($stale in $staleProcesses) { + $location = if (-not [string]::IsNullOrWhiteSpace($stale.ExecutablePath)) { + Get-RepoRelativePath -Root $Root -Path $stale.ExecutablePath + } else { + $stale.CommandLine + } + + Write-Host " - [$($stale.ProcessId)] $($stale.Name) :: $location" -ForegroundColor DarkYellow + Stop-Process -Id $stale.ProcessId -Force -ErrorAction Stop + } + + Start-Sleep -Seconds 1 + $remaining = @($staleProcesses | Where-Object { Get-Process -Id $_.ProcessId -ErrorAction SilentlyContinue }) + if ($remaining.Count -gt 0) { + $remainingIds = ($remaining | ForEach-Object { $_.ProcessId }) -join ', ' + throw "Failed to stop repo-local Stella host processes: $remainingIds" + } + + Write-Host 'Repo-local Stella host processes stopped.' -ForegroundColor Green +} + +if ($StopRepoHostProcesses) { + Stop-RepoHostProcesses -Root $repoRoot +} + $solutions = Get-ChildItem -Path $srcDir -Filter '*.sln' -Recurse | Where-Object { $_.Name -ne 'StellaOps.sln' -and diff --git a/scripts/build-all-solutions.sh b/scripts/build-all-solutions.sh index 2fb26aab1..f1114985d 100644 --- a/scripts/build-all-solutions.sh +++ b/scripts/build-all-solutions.sh @@ -14,17 +14,60 @@ SRC_DIR="$REPO_ROOT/src" RUN_TESTS=false CONFIGURATION="Debug" +STOP_REPO_HOST_PROCESSES=false while [[ $# -gt 0 ]]; do case "$1" in --test|-t) RUN_TESTS=true; shift ;; + --stop-repo-host-processes) STOP_REPO_HOST_PROCESSES=true; shift ;; --configuration|-c) CONFIGURATION="$2"; shift 2 ;; *) echo "Unknown option: $1" >&2; exit 1 ;; esac done +stop_repo_host_processes() { + local found=0 + + while IFS= read -r line; do + [[ -z "$line" ]] && continue + + local pid="${line%% *}" + local cmd="${line#* }" + + [[ "$cmd" != *"$REPO_ROOT"* ]] && continue + [[ "$cmd" != *"StellaOps."* ]] && continue + [[ "$pid" == "$$" ]] && continue + + if (( found == 0 )); then + echo "Stopping repo-local Stella host processes before build." + fi + + echo " - [$pid] $cmd" + kill "$pid" 2>/dev/null || true + sleep 1 + if kill -0 "$pid" 2>/dev/null; then + kill -9 "$pid" 2>/dev/null || true + fi + found=1 + done < <(ps -eo pid=,args=) + + if (( found == 0 )); then + echo "No repo-local Stella host processes detected." + else + echo "Repo-local Stella host processes stopped." + fi +} + +if $STOP_REPO_HOST_PROCESSES; then + stop_repo_host_processes +fi + # Discover solutions (exclude root StellaOps.sln) -mapfile -t SOLUTIONS < <(find "$SRC_DIR" -name '*.sln' ! -name 'StellaOps.sln' | sort) +mapfile -t SOLUTIONS < <( + find "$SRC_DIR" \ + \( -path '*/node_modules/*' -o -path '*/bin/*' -o -path '*/obj/*' \) -prune -o \ + -name '*.sln' ! -name 'StellaOps.sln' -print | sort +) if [[ ${#SOLUTIONS[@]} -eq 0 ]]; then echo "ERROR: No solution files found under src/." >&2 diff --git a/scripts/setup.ps1 b/scripts/setup.ps1 index 98a0de6ce..323c48317 100644 --- a/scripts/setup.ps1 +++ b/scripts/setup.ps1 @@ -415,7 +415,7 @@ function Build-Solutions { Write-Step 'Building all .NET solutions' $buildScript = Join-Path $Root 'scripts/build-all-solutions.ps1' if (Test-Path $buildScript) { - & $buildScript + & $buildScript -StopRepoHostProcesses if ($LASTEXITCODE -ne 0) { Write-Fail '.NET solution build failed.' exit 1 diff --git a/scripts/setup.sh b/scripts/setup.sh index 0cbf45f2b..940679a6b 100644 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -294,10 +294,10 @@ build_solutions() { step 'Building all .NET solutions' local script="${ROOT}/scripts/build-all-solutions.sh" if [[ -x "$script" ]]; then - "$script" + "$script" --stop-repo-host-processes ok '.NET solutions built successfully' elif [[ -f "$script" ]]; then - bash "$script" + bash "$script" --stop-repo-host-processes ok '.NET solutions built successfully' else warn "Build script not found at $script. Skipping .NET build."