stabilize tests
This commit is contained in:
102
scripts/test-stabilization/run-batch.sh
Normal file
102
scripts/test-stabilization/run-batch.sh
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/bin/bash
|
||||
# Run tests from a batch file with per-project timeout
|
||||
# Usage: ./run-batch.sh <batch-file> <output-csv> <timeout-seconds>
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
BATCH_FILE="$SCRIPT_DIR/${1:-batch-001.txt}"
|
||||
OUTPUT_CSV="$REPO_ROOT/test-results/${2:-batch-001-results.csv}"
|
||||
TIMEOUT_SECONDS="${3:-300}"
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
# Create output directory
|
||||
mkdir -p "$(dirname "$OUTPUT_CSV")"
|
||||
|
||||
# CSV header
|
||||
echo "Project,Status,Errors,Warnings,Total,Passed,Failed,Skipped,Duration,ExitCode" > "$OUTPUT_CSV"
|
||||
|
||||
echo "=========================================="
|
||||
echo "Running batch: $BATCH_FILE"
|
||||
echo "Output: $OUTPUT_CSV"
|
||||
echo "Timeout: ${TIMEOUT_SECONDS}s per project"
|
||||
echo "Repo root: $REPO_ROOT"
|
||||
echo "=========================================="
|
||||
|
||||
total=0
|
||||
passed=0
|
||||
failed=0
|
||||
build_errors=0
|
||||
timeouts=0
|
||||
|
||||
while IFS= read -r project; do
|
||||
# Skip empty lines
|
||||
[[ -z "$project" ]] && continue
|
||||
|
||||
project_name=$(basename "$project" .csproj)
|
||||
total=$((total + 1))
|
||||
|
||||
echo -n "[$total] Testing $project_name ... "
|
||||
|
||||
start_time=$(date +%s)
|
||||
|
||||
# Run test with timeout, capture output
|
||||
output_file="/tmp/test-output-$$.txt"
|
||||
|
||||
if timeout "${TIMEOUT_SECONDS}s" dotnet test "$REPO_ROOT/$project" --no-restore --verbosity minimal 2>&1 > "$output_file"; then
|
||||
exit_code=0
|
||||
else
|
||||
exit_code=$?
|
||||
fi
|
||||
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time - start_time))
|
||||
|
||||
# Parse output for test counts
|
||||
test_total=$(grep -oP 'Total:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
|
||||
test_passed=$(grep -oP 'Passed:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
|
||||
test_failed=$(grep -oP 'Failed:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
|
||||
test_skipped=$(grep -oP 'Skipped:\s*\K\d+' "$output_file" 2>/dev/null | head -1 || echo "0")
|
||||
|
||||
# Count build errors and warnings
|
||||
errors=$(grep -c "error [A-Z]*[0-9]*:" "$output_file" 2>/dev/null || echo "0")
|
||||
warnings=$(grep -c "warning [A-Z]*[0-9]*:" "$output_file" 2>/dev/null || echo "0")
|
||||
|
||||
# Determine status
|
||||
if [[ $exit_code -eq 124 ]]; then
|
||||
status="Timeout"
|
||||
timeouts=$((timeouts + 1))
|
||||
echo "TIMEOUT (${duration}s)"
|
||||
elif [[ $errors -gt 0 ]]; then
|
||||
status="BuildError"
|
||||
build_errors=$((build_errors + 1))
|
||||
echo "BUILD ERROR ($errors errors, ${duration}s)"
|
||||
elif [[ $exit_code -eq 0 ]]; then
|
||||
status="Passed"
|
||||
passed=$((passed + 1))
|
||||
echo "PASSED ($test_passed/$test_total, ${duration}s)"
|
||||
else
|
||||
status="Failed"
|
||||
failed=$((failed + 1))
|
||||
echo "FAILED ($test_failed/$test_total failed, ${duration}s)"
|
||||
fi
|
||||
|
||||
# Write to CSV
|
||||
echo "$project_name,$status,$errors,$warnings,$test_total,$test_passed,$test_failed,$test_skipped,$duration,$exit_code" >> "$OUTPUT_CSV"
|
||||
|
||||
# Cleanup
|
||||
rm -f "$output_file"
|
||||
|
||||
done < "$BATCH_FILE"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "BATCH SUMMARY"
|
||||
echo "=========================================="
|
||||
echo "Total: $total"
|
||||
echo "Passed: $passed"
|
||||
echo "Failed: $failed"
|
||||
echo "Build Errors: $build_errors"
|
||||
echo "Timeouts: $timeouts"
|
||||
echo "Results: $OUTPUT_CSV"
|
||||
372
scripts/test-stabilization/run-tests-batch.ps1
Normal file
372
scripts/test-stabilization/run-tests-batch.ps1
Normal file
@@ -0,0 +1,372 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Runs .NET test projects in batches with timeout handling and binary search for hanging tests.
|
||||
|
||||
.DESCRIPTION
|
||||
This script:
|
||||
1. Discovers all test projects (excluding EvidenceLocker.Tests)
|
||||
2. Runs tests in batches of configurable size (default 50)
|
||||
3. Implements 50-minute timeout per batch
|
||||
4. Uses binary search to identify hanging test projects when timeout occurs
|
||||
5. Logs results to CSV and detailed logs
|
||||
|
||||
.PARAMETER BatchSize
|
||||
Number of projects per batch. Default: 50
|
||||
|
||||
.PARAMETER TimeoutMinutes
|
||||
Timeout in minutes per batch. Default: 50
|
||||
|
||||
.PARAMETER OutputDir
|
||||
Directory for output files. Default: ./test-results
|
||||
|
||||
.PARAMETER StartBatch
|
||||
Batch number to start from (0-indexed). Default: 0
|
||||
|
||||
.PARAMETER ProjectList
|
||||
Optional path to a file containing project paths (one per line)
|
||||
#>
|
||||
|
||||
param(
|
||||
[int]$BatchSize = 50,
|
||||
[int]$TimeoutMinutes = 50,
|
||||
[string]$OutputDir = "./test-results",
|
||||
[int]$StartBatch = 0,
|
||||
[string]$ProjectList = ""
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Continue"
|
||||
$RepoRoot = Split-Path -Parent (Split-Path -Parent $PSScriptRoot)
|
||||
|
||||
# Ensure output directory exists
|
||||
$OutputDir = Join-Path $RepoRoot $OutputDir
|
||||
if (-not (Test-Path $OutputDir)) {
|
||||
New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null
|
||||
}
|
||||
|
||||
$Timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
|
||||
$ResultsFile = Join-Path $OutputDir "test-results-$Timestamp.csv"
|
||||
$LogFile = Join-Path $OutputDir "test-log-$Timestamp.txt"
|
||||
$HangingProjectsFile = Join-Path $OutputDir "hanging-projects-$Timestamp.txt"
|
||||
$TimeoutProjectsFile = Join-Path $OutputDir "timeout-projects.txt"
|
||||
|
||||
function Write-Log {
|
||||
param([string]$Message)
|
||||
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
|
||||
$logMessage = "[$timestamp] $Message"
|
||||
Write-Host $logMessage
|
||||
Add-Content -Path $LogFile -Value $logMessage
|
||||
}
|
||||
|
||||
function Get-TestProjects {
|
||||
Write-Log "Discovering test projects..."
|
||||
|
||||
if ($ProjectList -and (Test-Path $ProjectList)) {
|
||||
$projects = Get-Content $ProjectList | Where-Object { $_ -match "\.csproj$" }
|
||||
} else {
|
||||
$projects = Get-ChildItem -Path (Join-Path $RepoRoot "src") -Recurse -Filter "*.csproj" |
|
||||
Where-Object {
|
||||
$_.Name -match "\.Tests\.csproj$" -or
|
||||
$_.FullName -match "__Tests"
|
||||
} |
|
||||
Where-Object {
|
||||
# Exclude fixture projects and bin/obj directories
|
||||
$_.FullName -notmatch "\\bin\\" -and
|
||||
$_.FullName -notmatch "\\obj\\" -and
|
||||
$_.FullName -notmatch "\\Fixtures\\" -and
|
||||
$_.Name -notmatch "Sample\.App\.csproj" -and
|
||||
# Exclude EvidenceLocker.Tests (requires 256GB RAM)
|
||||
$_.Name -ne "StellaOps.EvidenceLocker.Tests.csproj"
|
||||
} |
|
||||
ForEach-Object { $_.FullName }
|
||||
}
|
||||
|
||||
Write-Log "Found $($projects.Count) test projects"
|
||||
return $projects
|
||||
}
|
||||
|
||||
function Run-SingleTestProject {
|
||||
param(
|
||||
[string]$ProjectPath,
|
||||
[int]$TimeoutSeconds = 300
|
||||
)
|
||||
|
||||
$projectName = [System.IO.Path]::GetFileNameWithoutExtension($ProjectPath)
|
||||
$result = @{
|
||||
Project = $projectName
|
||||
Path = $ProjectPath
|
||||
Status = "Unknown"
|
||||
Errors = 0
|
||||
Warnings = 0
|
||||
Total = 0
|
||||
Passed = 0
|
||||
Failed = 0
|
||||
Skipped = 0
|
||||
Duration = 0
|
||||
Message = ""
|
||||
}
|
||||
|
||||
$stopwatch = [System.Diagnostics.Stopwatch]::StartNew()
|
||||
|
||||
try {
|
||||
$logOutput = Join-Path $OutputDir "logs"
|
||||
if (-not (Test-Path $logOutput)) {
|
||||
New-Item -ItemType Directory -Path $logOutput -Force | Out-Null
|
||||
}
|
||||
$projectLog = Join-Path $logOutput "$projectName.log"
|
||||
|
||||
$process = Start-Process -FilePath "dotnet" `
|
||||
-ArgumentList "test", "`"$ProjectPath`"", "--no-build", "--logger", "trx", "--verbosity", "minimal" `
|
||||
-NoNewWindow -PassThru -RedirectStandardOutput $projectLog -RedirectStandardError "$projectLog.err"
|
||||
|
||||
$completed = $process.WaitForExit($TimeoutSeconds * 1000)
|
||||
$stopwatch.Stop()
|
||||
$result.Duration = [math]::Round($stopwatch.Elapsed.TotalSeconds, 2)
|
||||
|
||||
if (-not $completed) {
|
||||
$process.Kill()
|
||||
$result.Status = "Timeout"
|
||||
$result.Message = "Test timed out after $TimeoutSeconds seconds"
|
||||
return $result
|
||||
}
|
||||
|
||||
$exitCode = $process.ExitCode
|
||||
|
||||
if (Test-Path $projectLog) {
|
||||
$output = Get-Content $projectLog -Raw -ErrorAction SilentlyContinue
|
||||
|
||||
# Parse test results from output
|
||||
if ($output -match "Passed:\s*(\d+)") { $result.Passed = [int]$Matches[1] }
|
||||
if ($output -match "Failed:\s*(\d+)") { $result.Failed = [int]$Matches[1] }
|
||||
if ($output -match "Skipped:\s*(\d+)") { $result.Skipped = [int]$Matches[1] }
|
||||
if ($output -match "Total:\s*(\d+)") { $result.Total = [int]$Matches[1] }
|
||||
|
||||
# Count errors and warnings
|
||||
$result.Errors = ([regex]::Matches($output, "error [A-Z]+\d+:")).Count
|
||||
$result.Warnings = ([regex]::Matches($output, "warning [A-Z]+\d+:")).Count
|
||||
}
|
||||
|
||||
if ($exitCode -eq 0) {
|
||||
$result.Status = "Passed"
|
||||
} elseif ($result.Errors -gt 0) {
|
||||
$result.Status = "BuildError"
|
||||
} elseif ($result.Failed -gt 0) {
|
||||
$result.Status = "Failed"
|
||||
} else {
|
||||
$result.Status = "Error"
|
||||
$result.Message = "Exit code: $exitCode"
|
||||
}
|
||||
}
|
||||
catch {
|
||||
$stopwatch.Stop()
|
||||
$result.Status = "Exception"
|
||||
$result.Message = $_.Exception.Message
|
||||
$result.Duration = [math]::Round($stopwatch.Elapsed.TotalSeconds, 2)
|
||||
}
|
||||
|
||||
return $result
|
||||
}
|
||||
|
||||
function Run-BatchWithTimeout {
|
||||
param(
|
||||
[string[]]$Projects,
|
||||
[int]$BatchNum,
|
||||
[int]$TimeoutMinutes
|
||||
)
|
||||
|
||||
Write-Log "Starting batch $BatchNum with $($Projects.Count) projects (timeout: $TimeoutMinutes minutes)"
|
||||
|
||||
$batchResults = @()
|
||||
$batchStopwatch = [System.Diagnostics.Stopwatch]::StartNew()
|
||||
$timeoutSeconds = $TimeoutMinutes * 60
|
||||
$perProjectTimeout = [math]::Min(300, [math]::Floor($timeoutSeconds / $Projects.Count))
|
||||
|
||||
foreach ($project in $Projects) {
|
||||
$projectName = [System.IO.Path]::GetFileNameWithoutExtension($project)
|
||||
|
||||
# Check if batch timeout exceeded
|
||||
if ($batchStopwatch.Elapsed.TotalSeconds -gt $timeoutSeconds) {
|
||||
Write-Log "BATCH TIMEOUT: Batch $BatchNum exceeded $TimeoutMinutes minutes"
|
||||
return @{
|
||||
Results = $batchResults
|
||||
TimedOut = $true
|
||||
RemainingProjects = $Projects | Where-Object { $batchResults.Path -notcontains $_ }
|
||||
}
|
||||
}
|
||||
|
||||
Write-Log " Testing: $projectName"
|
||||
$result = Run-SingleTestProject -ProjectPath $project -TimeoutSeconds $perProjectTimeout
|
||||
$batchResults += $result
|
||||
|
||||
$statusIcon = switch ($result.Status) {
|
||||
"Passed" { "[OK]" }
|
||||
"Failed" { "[FAIL]" }
|
||||
"BuildError" { "[BUILD]" }
|
||||
"Timeout" { "[TIMEOUT]" }
|
||||
default { "[?]" }
|
||||
}
|
||||
|
||||
Write-Log " $statusIcon $($result.Status) - $($result.Passed)/$($result.Total) passed, $($result.Duration)s"
|
||||
}
|
||||
|
||||
$batchStopwatch.Stop()
|
||||
Write-Log "Batch $BatchNum completed in $([math]::Round($batchStopwatch.Elapsed.TotalMinutes, 2)) minutes"
|
||||
|
||||
return @{
|
||||
Results = $batchResults
|
||||
TimedOut = $false
|
||||
RemainingProjects = @()
|
||||
}
|
||||
}
|
||||
|
||||
function Binary-SearchHangingProject {
|
||||
param(
|
||||
[string[]]$Projects,
|
||||
[int]$TimeoutMinutes
|
||||
)
|
||||
|
||||
Write-Log "BINARY SEARCH: Starting binary search for hanging project in $($Projects.Count) projects"
|
||||
|
||||
if ($Projects.Count -eq 1) {
|
||||
Write-Log "BINARY SEARCH: Found hanging project: $($Projects[0])"
|
||||
Add-Content -Path $HangingProjectsFile -Value $Projects[0]
|
||||
return $Projects[0]
|
||||
}
|
||||
|
||||
$mid = [math]::Floor($Projects.Count / 2)
|
||||
$firstHalf = $Projects[0..($mid-1)]
|
||||
$secondHalf = $Projects[$mid..($Projects.Count-1)]
|
||||
|
||||
Write-Log "BINARY SEARCH: Testing first half ($($firstHalf.Count) projects)"
|
||||
$reducedTimeout = [math]::Max(5, [math]::Floor($TimeoutMinutes / 2))
|
||||
|
||||
$firstResult = Run-BatchWithTimeout -Projects $firstHalf -BatchNum -1 -TimeoutMinutes $reducedTimeout
|
||||
|
||||
if ($firstResult.TimedOut) {
|
||||
Write-Log "BINARY SEARCH: Timeout in first half, searching deeper"
|
||||
return Binary-SearchHangingProject -Projects $firstHalf -TimeoutMinutes $reducedTimeout
|
||||
}
|
||||
|
||||
Write-Log "BINARY SEARCH: First half completed, testing second half ($($secondHalf.Count) projects)"
|
||||
$secondResult = Run-BatchWithTimeout -Projects $secondHalf -BatchNum -1 -TimeoutMinutes $reducedTimeout
|
||||
|
||||
if ($secondResult.TimedOut) {
|
||||
Write-Log "BINARY SEARCH: Timeout in second half, searching deeper"
|
||||
return Binary-SearchHangingProject -Projects $secondHalf -TimeoutMinutes $reducedTimeout
|
||||
}
|
||||
|
||||
Write-Log "BINARY SEARCH: No timeout found in either half (possible intermittent issue)"
|
||||
return $null
|
||||
}
|
||||
|
||||
# Initialize CSV with headers
|
||||
"Project,Path,Status,Errors,Warnings,Total,Passed,Failed,Skipped,Duration,Message" | Out-File -FilePath $ResultsFile -Encoding UTF8
|
||||
|
||||
Write-Log "=========================================="
|
||||
Write-Log "Test Stabilization Run"
|
||||
Write-Log "Batch Size: $BatchSize"
|
||||
Write-Log "Timeout: $TimeoutMinutes minutes per batch"
|
||||
Write-Log "Output: $OutputDir"
|
||||
Write-Log "=========================================="
|
||||
|
||||
# Get all test projects
|
||||
$allProjects = Get-TestProjects
|
||||
|
||||
# Load previously identified timeout projects to skip
|
||||
$skipProjects = @()
|
||||
if (Test-Path $TimeoutProjectsFile) {
|
||||
$skipProjects = Get-Content $TimeoutProjectsFile
|
||||
Write-Log "Loaded $($skipProjects.Count) known timeout projects to skip"
|
||||
}
|
||||
|
||||
$allProjects = $allProjects | Where-Object { $skipProjects -notcontains $_ }
|
||||
Write-Log "Running $($allProjects.Count) projects after exclusions"
|
||||
|
||||
# Split into batches
|
||||
$batches = @()
|
||||
for ($i = 0; $i -lt $allProjects.Count; $i += $BatchSize) {
|
||||
$end = [math]::Min($i + $BatchSize - 1, $allProjects.Count - 1)
|
||||
$batches += ,($allProjects[$i..$end])
|
||||
}
|
||||
|
||||
Write-Log "Created $($batches.Count) batches"
|
||||
|
||||
# First, build all projects
|
||||
Write-Log "Building solution..."
|
||||
$buildProcess = Start-Process -FilePath "dotnet" `
|
||||
-ArgumentList "build", (Join-Path $RepoRoot "src"), "--configuration", "Release", "--verbosity", "minimal" `
|
||||
-NoNewWindow -PassThru -Wait
|
||||
|
||||
if ($buildProcess.ExitCode -ne 0) {
|
||||
Write-Log "WARNING: Solution build had errors, continuing with test execution"
|
||||
}
|
||||
|
||||
# Run batches
|
||||
$allResults = @()
|
||||
$totalStats = @{
|
||||
Passed = 0
|
||||
Failed = 0
|
||||
BuildError = 0
|
||||
Timeout = 0
|
||||
Total = 0
|
||||
}
|
||||
|
||||
for ($batchNum = $StartBatch; $batchNum -lt $batches.Count; $batchNum++) {
|
||||
$batch = $batches[$batchNum]
|
||||
Write-Log ""
|
||||
Write-Log "=========================================="
|
||||
Write-Log "BATCH $($batchNum + 1) of $($batches.Count)"
|
||||
Write-Log "=========================================="
|
||||
|
||||
$batchResult = Run-BatchWithTimeout -Projects $batch -BatchNum $batchNum -TimeoutMinutes $TimeoutMinutes
|
||||
|
||||
if ($batchResult.TimedOut) {
|
||||
Write-Log "Batch $batchNum timed out, initiating binary search..."
|
||||
$hangingProject = Binary-SearchHangingProject -Projects $batchResult.RemainingProjects -TimeoutMinutes $TimeoutMinutes
|
||||
|
||||
if ($hangingProject) {
|
||||
Write-Log "Adding $hangingProject to timeout projects list"
|
||||
Add-Content -Path $TimeoutProjectsFile -Value $hangingProject
|
||||
}
|
||||
}
|
||||
|
||||
# Record results
|
||||
foreach ($result in $batchResult.Results) {
|
||||
$csvLine = "$($result.Project),$($result.Path),$($result.Status),$($result.Errors),$($result.Warnings),$($result.Total),$($result.Passed),$($result.Failed),$($result.Skipped),$($result.Duration),`"$($result.Message)`""
|
||||
Add-Content -Path $ResultsFile -Value $csvLine
|
||||
|
||||
$totalStats.Total++
|
||||
switch ($result.Status) {
|
||||
"Passed" { $totalStats.Passed++ }
|
||||
"Failed" { $totalStats.Failed++ }
|
||||
"BuildError" { $totalStats.BuildError++ }
|
||||
"Timeout" { $totalStats.Timeout++ }
|
||||
}
|
||||
}
|
||||
|
||||
$allResults += $batchResult.Results
|
||||
|
||||
# Progress summary
|
||||
Write-Log ""
|
||||
Write-Log "Progress: $($totalStats.Total) projects tested"
|
||||
Write-Log " Passed: $($totalStats.Passed)"
|
||||
Write-Log " Failed: $($totalStats.Failed)"
|
||||
Write-Log " Build Errors: $($totalStats.BuildError)"
|
||||
Write-Log " Timeouts: $($totalStats.Timeout)"
|
||||
}
|
||||
|
||||
Write-Log ""
|
||||
Write-Log "=========================================="
|
||||
Write-Log "FINAL SUMMARY"
|
||||
Write-Log "=========================================="
|
||||
Write-Log "Total Projects: $($totalStats.Total)"
|
||||
Write-Log "Passed: $($totalStats.Passed) ($([math]::Round($totalStats.Passed / [math]::Max(1, $totalStats.Total) * 100, 1))%)"
|
||||
Write-Log "Failed: $($totalStats.Failed)"
|
||||
Write-Log "Build Errors: $($totalStats.BuildError)"
|
||||
Write-Log "Timeouts: $($totalStats.Timeout)"
|
||||
Write-Log ""
|
||||
Write-Log "Results saved to: $ResultsFile"
|
||||
Write-Log "Log saved to: $LogFile"
|
||||
|
||||
if (Test-Path $HangingProjectsFile) {
|
||||
Write-Log "Hanging projects saved to: $HangingProjectsFile"
|
||||
}
|
||||
7
scripts/test-stabilization/run-tests.cmd
Normal file
7
scripts/test-stabilization/run-tests.cmd
Normal file
@@ -0,0 +1,7 @@
|
||||
@echo off
|
||||
REM Test Stabilization Runner
|
||||
REM Runs all backend tests in batches of 50 with 50-minute timeout
|
||||
REM Uses binary search to identify hanging tests
|
||||
|
||||
cd /d "%~dp0..\.."
|
||||
powershell -ExecutionPolicy Bypass -File "%~dp0run-tests-batch.ps1" %*
|
||||
Reference in New Issue
Block a user