stabilize tests

2026-02-01 21:37:40 +02:00
parent 55744f6a39
commit 5d5e80b2e4
6435 changed files with 33984 additions and 13802 deletions
--- a/scripts/test-stabilization/run-batch.sh
+++ b/scripts/test-stabilization/run-batch.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Run tests from a batch file with per-project timeout
+# Usage: ./run-batch.sh <batch-file> <output-csv> <timeout-seconds>
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+BATCH_FILE="$SCRIPT_DIR/${1:-batch-001.txt}"
+OUTPUT_CSV="$REPO_ROOT/test-results/${2:-batch-001-results.csv}"
+TIMEOUT_SECONDS="${3:-300}"
+
+cd "$REPO_ROOT"
+
+# Create output directory
+mkdir -p "$(dirname "$OUTPUT_CSV")"
+
+# CSV header
+echo "Project,Status,Errors,Warnings,Total,Passed,Failed,Skipped,Duration,ExitCode" > "$OUTPUT_CSV"
+
+echo "=========================================="
+echo "Running batch: $BATCH_FILE"
+echo "Output: $OUTPUT_CSV"
+echo "Timeout: ${TIMEOUT_SECONDS}s per project"
+echo "Repo root: $REPO_ROOT"
+echo "=========================================="
+
+total=0
+passed=0
+failed=0
+build_errors=0
+timeouts=0
+
+while IFS= read -r project; do
+    # Skip empty lines
+    [[ -z "$project" ]] && continue
+
+    project_name=$(basename "$project" .csproj)
+    total=$((total + 1))
+
+    echo -n "[$total] Testing $project_name ... "
+
+    start_time=$(date +%s)
+
+    # Run test with timeout, capture output
+    output_file="/tmp/test-output-$$.txt"
+
+    if timeout "${TIMEOUT_SECONDS}s" dotnet test "$REPO_ROOT/$project" --no-restore --verbosity minimal 2>&1 > "$output_file"; then
+        exit_code=0
+    else
+        exit_code=$?
+    fi
+
+    end_time=$(date +%s)
+    duration=$((end_time - start_time))
+
+    # Parse output for test counts
+    test_total=$(grep -oP 'Total:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
+    test_passed=$(grep -oP 'Passed:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
+    test_failed=$(grep -oP 'Failed:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
+    test_skipped=$(grep -oP 'Skipped:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
+
+    # Count build errors and warnings
+    errors=$(grep -c "error [A-Z]*[0-9]*:" "$output_file" 2>/dev/null || echo "0")
+    warnings=$(grep -c "warning [A-Z]*[0-9]*:" "$output_file" 2>/dev/null || echo "0")
+
+    # Determine status
+    if [[ $exit_code -eq 124 ]]; then
+        status="Timeout"
+        timeouts=$((timeouts + 1))
+        echo "TIMEOUT (${duration}s)"
+    elif [[ $errors -gt 0 ]]; then
+        status="BuildError"
+        build_errors=$((build_errors + 1))
+        echo "BUILD ERROR ($errors errors, ${duration}s)"
+    elif [[ $exit_code -eq 0 ]]; then
+        status="Passed"
+        passed=$((passed + 1))
+        echo "PASSED ($test_passed/$test_total, ${duration}s)"
+    else
+        status="Failed"
+        failed=$((failed + 1))
+        echo "FAILED ($test_failed/$test_total failed, ${duration}s)"
+    fi
+
+    # Write to CSV
+    echo "$project_name,$status,$errors,$warnings,$test_total,$test_passed,$test_failed,$test_skipped,$duration,$exit_code" >> "$OUTPUT_CSV"
+
+    # Cleanup
+    rm -f "$output_file"
+
+done < "$BATCH_FILE"
+
+echo ""
+echo "=========================================="
+echo "BATCH SUMMARY"
+echo "=========================================="
+echo "Total: $total"
+echo "Passed: $passed"
+echo "Failed: $failed"
+echo "Build Errors: $build_errors"
+echo "Timeouts: $timeouts"
+echo "Results: $OUTPUT_CSV"
--- a/scripts/test-stabilization/run-tests-batch.ps1
+++ b/scripts/test-stabilization/run-tests-batch.ps1
@@ -0,0 +1,372 @@
+<#
+.SYNOPSIS
+    Runs .NET test projects in batches with timeout handling and binary search for hanging tests.
+
+.DESCRIPTION
+    This script:
+    1. Discovers all test projects (excluding EvidenceLocker.Tests)
+    2. Runs tests in batches of configurable size (default 50)
+    3. Implements 50-minute timeout per batch
+    4. Uses binary search to identify hanging test projects when timeout occurs
+    5. Logs results to CSV and detailed logs
+
+.PARAMETER BatchSize
+    Number of projects per batch. Default: 50
+
+.PARAMETER TimeoutMinutes
+    Timeout in minutes per batch. Default: 50
+
+.PARAMETER OutputDir
+    Directory for output files. Default: ./test-results
+
+.PARAMETER StartBatch
+    Batch number to start from (0-indexed). Default: 0
+
+.PARAMETER ProjectList
+    Optional path to a file containing project paths (one per line)
+#>
+
+param(
+    [int]$BatchSize = 50,
+    [int]$TimeoutMinutes = 50,
+    [string]$OutputDir = "./test-results",
+    [int]$StartBatch = 0,
+    [string]$ProjectList = ""
+)
+
+$ErrorActionPreference = "Continue"
+$RepoRoot = Split-Path -Parent (Split-Path -Parent $PSScriptRoot)
+
+# Ensure output directory exists
+$OutputDir = Join-Path $RepoRoot $OutputDir
+if (-not (Test-Path $OutputDir)) {
+    New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null
+}
+
+$Timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
+$ResultsFile = Join-Path $OutputDir "test-results-$Timestamp.csv"
+$LogFile = Join-Path $OutputDir "test-log-$Timestamp.txt"
+$HangingProjectsFile = Join-Path $OutputDir "hanging-projects-$Timestamp.txt"
+$TimeoutProjectsFile = Join-Path $OutputDir "timeout-projects.txt"
+
+function Write-Log {
+    param([string]$Message)
+    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+    $logMessage = "[$timestamp] $Message"
+    Write-Host $logMessage
+    Add-Content -Path $LogFile -Value $logMessage
+}
+
+function Get-TestProjects {
+    Write-Log "Discovering test projects..."
+
+    if ($ProjectList -and (Test-Path $ProjectList)) {
+        $projects = Get-Content $ProjectList | Where-Object { $_ -match "\.csproj$" }
+    } else {
+        $projects = Get-ChildItem -Path (Join-Path $RepoRoot "src") -Recurse -Filter "*.csproj" |
+            Where-Object {
+                $_.Name -match "\.Tests\.csproj$" -or
+                $_.FullName -match "__Tests"
+            } |
+            Where-Object {
+                # Exclude fixture projects and bin/obj directories
+                $_.FullName -notmatch "\\bin\\" -and
+                $_.FullName -notmatch "\\obj\\" -and
+                $_.FullName -notmatch "\\Fixtures\\" -and
+                $_.Name -notmatch "Sample\.App\.csproj" -and
+                # Exclude EvidenceLocker.Tests (requires 256GB RAM)
+                $_.Name -ne "StellaOps.EvidenceLocker.Tests.csproj"
+            } |
+            ForEach-Object { $_.FullName }
+    }
+
+    Write-Log "Found $($projects.Count) test projects"
+    return $projects
+}
+
+function Run-SingleTestProject {
+    param(
+        [string]$ProjectPath,
+        [int]$TimeoutSeconds = 300
+    )
+
+    $projectName = [System.IO.Path]::GetFileNameWithoutExtension($ProjectPath)
+    $result = @{
+        Project = $projectName
+        Path = $ProjectPath
+        Status = "Unknown"
+        Errors = 0
+        Warnings = 0
+        Total = 0
+        Passed = 0
+        Failed = 0
+        Skipped = 0
+        Duration = 0
+        Message = ""
+    }
+
+    $stopwatch = [System.Diagnostics.Stopwatch]::StartNew()
+
+    try {
+        $logOutput = Join-Path $OutputDir "logs"
+        if (-not (Test-Path $logOutput)) {
+            New-Item -ItemType Directory -Path $logOutput -Force | Out-Null
+        }
+        $projectLog = Join-Path $logOutput "$projectName.log"
+
+        $process = Start-Process -FilePath "dotnet" `
+            -ArgumentList "test", "`"$ProjectPath`"", "--no-build", "--logger", "trx", "--verbosity", "minimal" `
+            -NoNewWindow -PassThru -RedirectStandardOutput $projectLog -RedirectStandardError "$projectLog.err"
+
+        $completed = $process.WaitForExit($TimeoutSeconds * 1000)
+        $stopwatch.Stop()
+        $result.Duration = [math]::Round($stopwatch.Elapsed.TotalSeconds, 2)
+
+        if (-not $completed) {
+            $process.Kill()
+            $result.Status = "Timeout"
+            $result.Message = "Test timed out after $TimeoutSeconds seconds"
+            return $result
+        }
+
+        $exitCode = $process.ExitCode
+
+        if (Test-Path $projectLog) {
+            $output = Get-Content $projectLog -Raw -ErrorAction SilentlyContinue
+
+            # Parse test results from output
+            if ($output -match "Passed:\s*(\d+)") { $result.Passed = [int]$Matches[1] }
+            if ($output -match "Failed:\s*(\d+)") { $result.Failed = [int]$Matches[1] }
+            if ($output -match "Skipped:\s*(\d+)") { $result.Skipped = [int]$Matches[1] }
+            if ($output -match "Total:\s*(\d+)") { $result.Total = [int]$Matches[1] }
+
+            # Count errors and warnings
+            $result.Errors = ([regex]::Matches($output, "error [A-Z]+\d+:")).Count
+            $result.Warnings = ([regex]::Matches($output, "warning [A-Z]+\d+:")).Count
+        }
+
+        if ($exitCode -eq 0) {
+            $result.Status = "Passed"
+        } elseif ($result.Errors -gt 0) {
+            $result.Status = "BuildError"
+        } elseif ($result.Failed -gt 0) {
+            $result.Status = "Failed"
+        } else {
+            $result.Status = "Error"
+            $result.Message = "Exit code: $exitCode"
+        }
+    }
+    catch {
+        $stopwatch.Stop()
+        $result.Status = "Exception"
+        $result.Message = $_.Exception.Message
+        $result.Duration = [math]::Round($stopwatch.Elapsed.TotalSeconds, 2)
+    }
+
+    return $result
+}
+
+function Run-BatchWithTimeout {
+    param(
+        [string[]]$Projects,
+        [int]$BatchNum,
+        [int]$TimeoutMinutes
+    )
+
+    Write-Log "Starting batch $BatchNum with $($Projects.Count) projects (timeout: $TimeoutMinutes minutes)"
+
+    $batchResults = @()
+    $batchStopwatch = [System.Diagnostics.Stopwatch]::StartNew()
+    $timeoutSeconds = $TimeoutMinutes * 60
+    $perProjectTimeout = [math]::Min(300, [math]::Floor($timeoutSeconds / $Projects.Count))
+
+    foreach ($project in $Projects) {
+        $projectName = [System.IO.Path]::GetFileNameWithoutExtension($project)
+
+        # Check if batch timeout exceeded
+        if ($batchStopwatch.Elapsed.TotalSeconds -gt $timeoutSeconds) {
+            Write-Log "BATCH TIMEOUT: Batch $BatchNum exceeded $TimeoutMinutes minutes"
+            return @{
+                Results = $batchResults
+                TimedOut = $true
+                RemainingProjects = $Projects | Where-Object { $batchResults.Path -notcontains $_ }
+            }
+        }
+
+        Write-Log "  Testing: $projectName"
+        $result = Run-SingleTestProject -ProjectPath $project -TimeoutSeconds $perProjectTimeout
+        $batchResults += $result
+
+        $statusIcon = switch ($result.Status) {
+            "Passed" { "[OK]" }
+            "Failed" { "[FAIL]" }
+            "BuildError" { "[BUILD]" }
+            "Timeout" { "[TIMEOUT]" }
+            default { "[?]" }
+        }
+
+        Write-Log "    $statusIcon $($result.Status) - $($result.Passed)/$($result.Total) passed, $($result.Duration)s"
+    }
+
+    $batchStopwatch.Stop()
+    Write-Log "Batch $BatchNum completed in $([math]::Round($batchStopwatch.Elapsed.TotalMinutes, 2)) minutes"
+
+    return @{
+        Results = $batchResults
+        TimedOut = $false
+        RemainingProjects = @()
+    }
+}
+
+function Binary-SearchHangingProject {
+    param(
+        [string[]]$Projects,
+        [int]$TimeoutMinutes
+    )
+
+    Write-Log "BINARY SEARCH: Starting binary search for hanging project in $($Projects.Count) projects"
+
+    if ($Projects.Count -eq 1) {
+        Write-Log "BINARY SEARCH: Found hanging project: $($Projects[0])"
+        Add-Content -Path $HangingProjectsFile -Value $Projects[0]
+        return $Projects[0]
+    }
+
+    $mid = [math]::Floor($Projects.Count / 2)
+    $firstHalf = $Projects[0..($mid-1)]
+    $secondHalf = $Projects[$mid..($Projects.Count-1)]
+
+    Write-Log "BINARY SEARCH: Testing first half ($($firstHalf.Count) projects)"
+    $reducedTimeout = [math]::Max(5, [math]::Floor($TimeoutMinutes / 2))
+
+    $firstResult = Run-BatchWithTimeout -Projects $firstHalf -BatchNum -1 -TimeoutMinutes $reducedTimeout
+
+    if ($firstResult.TimedOut) {
+        Write-Log "BINARY SEARCH: Timeout in first half, searching deeper"
+        return Binary-SearchHangingProject -Projects $firstHalf -TimeoutMinutes $reducedTimeout
+    }
+
+    Write-Log "BINARY SEARCH: First half completed, testing second half ($($secondHalf.Count) projects)"
+    $secondResult = Run-BatchWithTimeout -Projects $secondHalf -BatchNum -1 -TimeoutMinutes $reducedTimeout
+
+    if ($secondResult.TimedOut) {
+        Write-Log "BINARY SEARCH: Timeout in second half, searching deeper"
+        return Binary-SearchHangingProject -Projects $secondHalf -TimeoutMinutes $reducedTimeout
+    }
+
+    Write-Log "BINARY SEARCH: No timeout found in either half (possible intermittent issue)"
+    return $null
+}
+
+# Initialize CSV with headers
+"Project,Path,Status,Errors,Warnings,Total,Passed,Failed,Skipped,Duration,Message" | Out-File -FilePath $ResultsFile -Encoding UTF8
+
+Write-Log "=========================================="
+Write-Log "Test Stabilization Run"
+Write-Log "Batch Size: $BatchSize"
+Write-Log "Timeout: $TimeoutMinutes minutes per batch"
+Write-Log "Output: $OutputDir"
+Write-Log "=========================================="
+
+# Get all test projects
+$allProjects = Get-TestProjects
+
+# Load previously identified timeout projects to skip
+$skipProjects = @()
+if (Test-Path $TimeoutProjectsFile) {
+    $skipProjects = Get-Content $TimeoutProjectsFile
+    Write-Log "Loaded $($skipProjects.Count) known timeout projects to skip"
+}
+
+$allProjects = $allProjects | Where-Object { $skipProjects -notcontains $_ }
+Write-Log "Running $($allProjects.Count) projects after exclusions"
+
+# Split into batches
+$batches = @()
+for ($i = 0; $i -lt $allProjects.Count; $i += $BatchSize) {
+    $end = [math]::Min($i + $BatchSize - 1, $allProjects.Count - 1)
+    $batches += ,($allProjects[$i..$end])
+}
+
+Write-Log "Created $($batches.Count) batches"
+
+# First, build all projects
+Write-Log "Building solution..."
+$buildProcess = Start-Process -FilePath "dotnet" `
+    -ArgumentList "build", (Join-Path $RepoRoot "src"), "--configuration", "Release", "--verbosity", "minimal" `
+    -NoNewWindow -PassThru -Wait
+
+if ($buildProcess.ExitCode -ne 0) {
+    Write-Log "WARNING: Solution build had errors, continuing with test execution"
+}
+
+# Run batches
+$allResults = @()
+$totalStats = @{
+    Passed = 0
+    Failed = 0
+    BuildError = 0
+    Timeout = 0
+    Total = 0
+}
+
+for ($batchNum = $StartBatch; $batchNum -lt $batches.Count; $batchNum++) {
+    $batch = $batches[$batchNum]
+    Write-Log ""
+    Write-Log "=========================================="
+    Write-Log "BATCH $($batchNum + 1) of $($batches.Count)"
+    Write-Log "=========================================="
+
+    $batchResult = Run-BatchWithTimeout -Projects $batch -BatchNum $batchNum -TimeoutMinutes $TimeoutMinutes
+
+    if ($batchResult.TimedOut) {
+        Write-Log "Batch $batchNum timed out, initiating binary search..."
+        $hangingProject = Binary-SearchHangingProject -Projects $batchResult.RemainingProjects -TimeoutMinutes $TimeoutMinutes
+
+        if ($hangingProject) {
+            Write-Log "Adding $hangingProject to timeout projects list"
+            Add-Content -Path $TimeoutProjectsFile -Value $hangingProject
+        }
+    }
+
+    # Record results
+    foreach ($result in $batchResult.Results) {
+        $csvLine = "$($result.Project),$($result.Path),$($result.Status),$($result.Errors),$($result.Warnings),$($result.Total),$($result.Passed),$($result.Failed),$($result.Skipped),$($result.Duration),`"$($result.Message)`""
+        Add-Content -Path $ResultsFile -Value $csvLine
+
+        $totalStats.Total++
+        switch ($result.Status) {
+            "Passed" { $totalStats.Passed++ }
+            "Failed" { $totalStats.Failed++ }
+            "BuildError" { $totalStats.BuildError++ }
+            "Timeout" { $totalStats.Timeout++ }
+        }
+    }
+
+    $allResults += $batchResult.Results
+
+    # Progress summary
+    Write-Log ""
+    Write-Log "Progress: $($totalStats.Total) projects tested"
+    Write-Log "  Passed: $($totalStats.Passed)"
+    Write-Log "  Failed: $($totalStats.Failed)"
+    Write-Log "  Build Errors: $($totalStats.BuildError)"
+    Write-Log "  Timeouts: $($totalStats.Timeout)"
+}
+
+Write-Log ""
+Write-Log "=========================================="
+Write-Log "FINAL SUMMARY"
+Write-Log "=========================================="
+Write-Log "Total Projects: $($totalStats.Total)"
+Write-Log "Passed: $($totalStats.Passed) ($([math]::Round($totalStats.Passed / [math]::Max(1, $totalStats.Total) * 100, 1))%)"
+Write-Log "Failed: $($totalStats.Failed)"
+Write-Log "Build Errors: $($totalStats.BuildError)"
+Write-Log "Timeouts: $($totalStats.Timeout)"
+Write-Log ""
+Write-Log "Results saved to: $ResultsFile"
+Write-Log "Log saved to: $LogFile"
+
+if (Test-Path $HangingProjectsFile) {
+    Write-Log "Hanging projects saved to: $HangingProjectsFile"
+}
--- a/scripts/test-stabilization/run-tests.cmd
+++ b/scripts/test-stabilization/run-tests.cmd
@@ -0,0 +1,7 @@
+@echo off
+REM Test Stabilization Runner
+REM Runs all backend tests in batches of 50 with 50-minute timeout
+REM Uses binary search to identify hanging tests
+
+cd /d "%~dp0..\.."
+powershell -ExecutionPolicy Bypass -File "%~dp0run-tests-batch.ps1" %*