sprints work.

This commit is contained in:
master
2026-01-20 00:45:38 +02:00
parent b34bde89fa
commit 4903395618
275 changed files with 52785 additions and 79 deletions

View File

@@ -0,0 +1,446 @@
// -----------------------------------------------------------------------------
// AirGapRebuildBundle.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-006 - Air-Gap Rebuild Bundle
// Description: Offline bundle format for reproducible rebuilds.
// -----------------------------------------------------------------------------
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Service for creating and importing air-gap rebuild bundles.
/// </summary>
public sealed class AirGapRebuildBundleService
{
private readonly ILogger<AirGapRebuildBundleService> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
/// <summary>
/// Initializes a new instance of the <see cref="AirGapRebuildBundleService"/> class.
/// </summary>
public AirGapRebuildBundleService(ILogger<AirGapRebuildBundleService> logger)
{
_logger = logger;
}
/// <summary>
/// Exports an air-gap rebuild bundle.
/// </summary>
public async Task<string> ExportBundleAsync(
AirGapBundleRequest request,
CancellationToken cancellationToken = default)
{
request.Validate();
var bundleDir = Path.Combine(
request.OutputDirectory ?? Path.GetTempPath(),
$"rebuild-bundle-{DateTime.UtcNow:yyyyMMdd-HHmmss}");
Directory.CreateDirectory(bundleDir);
var sourcesDir = Path.Combine(bundleDir, "sources");
var buildinfoDir = Path.Combine(bundleDir, "buildinfo");
var environmentDir = Path.Combine(bundleDir, "environment");
Directory.CreateDirectory(sourcesDir);
Directory.CreateDirectory(buildinfoDir);
Directory.CreateDirectory(environmentDir);
var manifest = new AirGapBundleManifest
{
Version = "1.0",
CreatedAt = DateTimeOffset.UtcNow,
Packages = [],
Files = []
};
_logger.LogInformation("Creating air-gap bundle for {Count} packages", request.Packages.Count);
foreach (var pkg in request.Packages)
{
// Copy source files
foreach (var sourceFile in pkg.SourceFiles)
{
var destPath = Path.Combine(sourcesDir, Path.GetFileName(sourceFile));
if (File.Exists(sourceFile))
{
File.Copy(sourceFile, destPath, overwrite: true);
manifest.Files.Add(new BundleFileEntry
{
Path = $"sources/{Path.GetFileName(sourceFile)}",
Sha256 = await ComputeSha256Async(destPath, cancellationToken),
Size = new FileInfo(destPath).Length
});
}
}
// Copy buildinfo
if (pkg.BuildinfoPath is not null && File.Exists(pkg.BuildinfoPath))
{
var destPath = Path.Combine(buildinfoDir, Path.GetFileName(pkg.BuildinfoPath));
File.Copy(pkg.BuildinfoPath, destPath, overwrite: true);
manifest.Files.Add(new BundleFileEntry
{
Path = $"buildinfo/{Path.GetFileName(pkg.BuildinfoPath)}",
Sha256 = await ComputeSha256Async(destPath, cancellationToken),
Size = new FileInfo(destPath).Length
});
}
manifest.Packages.Add(new BundlePackageEntry
{
Name = pkg.Name,
Version = pkg.Version,
Architecture = pkg.Architecture,
BuildinfoFile = pkg.BuildinfoPath is not null ? $"buildinfo/{Path.GetFileName(pkg.BuildinfoPath)}" : null
});
}
// Generate Dockerfile for build environment
var dockerfile = GenerateBundleDockerfile(request);
var dockerfilePath = Path.Combine(environmentDir, "Dockerfile");
await File.WriteAllTextAsync(dockerfilePath, dockerfile, cancellationToken);
manifest.Files.Add(new BundleFileEntry
{
Path = "environment/Dockerfile",
Sha256 = await ComputeSha256Async(dockerfilePath, cancellationToken),
Size = new FileInfo(dockerfilePath).Length
});
// Generate apt sources list
var aptSources = GenerateAptSources(request);
var aptSourcesPath = Path.Combine(environmentDir, "apt-sources.list");
await File.WriteAllTextAsync(aptSourcesPath, aptSources, cancellationToken);
// Write manifest
var manifestPath = Path.Combine(bundleDir, "manifest.json");
var manifestJson = JsonSerializer.Serialize(manifest, JsonOptions);
await File.WriteAllTextAsync(manifestPath, manifestJson, cancellationToken);
// Create archive
var archivePath = $"{bundleDir}.tar.gz";
await CreateTarGzAsync(bundleDir, archivePath, cancellationToken);
_logger.LogInformation("Created air-gap bundle: {Path}", archivePath);
// Cleanup temp directory
if (request.CleanupTempFiles)
{
Directory.Delete(bundleDir, recursive: true);
}
return archivePath;
}
/// <summary>
/// Imports an air-gap rebuild bundle.
/// </summary>
public async Task<AirGapBundleManifest> ImportBundleAsync(
string bundlePath,
string outputDirectory,
CancellationToken cancellationToken = default)
{
if (!File.Exists(bundlePath))
{
throw new FileNotFoundException("Bundle not found", bundlePath);
}
_logger.LogInformation("Importing air-gap bundle from {Path}", bundlePath);
// Extract archive
await ExtractTarGzAsync(bundlePath, outputDirectory, cancellationToken);
// Read manifest
var manifestPath = Path.Combine(outputDirectory, "manifest.json");
if (!File.Exists(manifestPath))
{
throw new InvalidOperationException("Invalid bundle: manifest.json not found");
}
var manifestJson = await File.ReadAllTextAsync(manifestPath, cancellationToken);
var manifest = JsonSerializer.Deserialize<AirGapBundleManifest>(manifestJson, JsonOptions)
?? throw new InvalidOperationException("Failed to parse manifest");
// Verify checksums
foreach (var file in manifest.Files)
{
var filePath = Path.Combine(outputDirectory, file.Path.Replace('/', Path.DirectorySeparatorChar));
if (File.Exists(filePath))
{
var actualHash = await ComputeSha256Async(filePath, cancellationToken);
if (!string.Equals(actualHash, file.Sha256, StringComparison.OrdinalIgnoreCase))
{
_logger.LogWarning("Checksum mismatch for {File}", file.Path);
}
}
else
{
_logger.LogWarning("Missing file: {File}", file.Path);
}
}
_logger.LogInformation("Imported bundle with {Count} packages", manifest.Packages.Count);
return manifest;
}
/// <summary>
/// Executes a rebuild from an imported bundle.
/// </summary>
public async Task<RebuildResult> RebuildFromBundleAsync(
string bundleDirectory,
string packageName,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= new LocalRebuildOptions();
// Read manifest
var manifestPath = Path.Combine(bundleDirectory, "manifest.json");
var manifestJson = await File.ReadAllTextAsync(manifestPath, cancellationToken);
var manifest = JsonSerializer.Deserialize<AirGapBundleManifest>(manifestJson, JsonOptions);
var package = manifest?.Packages.FirstOrDefault(p => p.Name == packageName)
?? throw new InvalidOperationException($"Package {packageName} not found in bundle");
var buildinfoPath = package.BuildinfoFile is not null
? Path.Combine(bundleDirectory, package.BuildinfoFile.Replace('/', Path.DirectorySeparatorChar))
: null;
if (buildinfoPath is null || !File.Exists(buildinfoPath))
{
return RebuildResult.Failed(
Guid.NewGuid().ToString("N")[..12],
"Buildinfo not found in bundle",
backend: RebuildBackend.AirGap);
}
// Use local rebuild backend with air-gap sources
var localBackend = new LocalRebuildBackend(
Microsoft.Extensions.Options.Options.Create(new LocalRebuildBackendOptions()),
new Microsoft.Extensions.Logging.Abstractions.NullLogger<LocalRebuildBackend>());
var result = await localBackend.RebuildAsync(buildinfoPath, options, cancellationToken);
// Update backend type
return result with { Backend = RebuildBackend.AirGap };
}
private static string GenerateBundleDockerfile(AirGapBundleRequest request)
{
var baseImage = request.BaseImage ?? "debian:bookworm";
return $"""
FROM {baseImage}
# This is an air-gap rebuild environment
# Sources are pre-fetched in the bundle
RUN apt-get update && apt-get install -y \
build-essential \
devscripts \
dpkg-dev \
fakeroot \
debhelper \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
# Copy sources from bundle
COPY sources/ /build/sources/
COPY buildinfo/ /build/buildinfo/
CMD ["/bin/bash"]
""";
}
private static string GenerateAptSources(AirGapBundleRequest request)
{
var distribution = request.Distribution ?? "bookworm";
return $"""
# Debian {distribution} sources
# For air-gap scenarios, these would point to local mirrors
deb http://deb.debian.org/debian {distribution} main
deb-src http://deb.debian.org/debian {distribution} main
""";
}
private static async Task CreateTarGzAsync(string sourceDir, string destPath, CancellationToken ct)
{
// Use .NET's ZipFile as a simple alternative for cross-platform
// In production, would use proper tar.gz library
var zipPath = destPath.Replace(".tar.gz", ".zip");
if (File.Exists(zipPath)) File.Delete(zipPath);
ZipFile.CreateFromDirectory(sourceDir, zipPath, CompressionLevel.Optimal, includeBaseDirectory: false);
// Rename to .tar.gz (simplified - real impl would create actual tar.gz)
if (File.Exists(destPath)) File.Delete(destPath);
File.Move(zipPath, destPath);
}
private static async Task ExtractTarGzAsync(string archivePath, string destDir, CancellationToken ct)
{
Directory.CreateDirectory(destDir);
ZipFile.ExtractToDirectory(archivePath, destDir, overwriteFiles: true);
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Request to create an air-gap rebuild bundle.
/// </summary>
public sealed record AirGapBundleRequest
{
/// <summary>
/// Gets the packages to include.
/// </summary>
public required List<AirGapPackageSpec> Packages { get; init; }
/// <summary>
/// Gets the output directory.
/// </summary>
public string? OutputDirectory { get; init; }
/// <summary>
/// Gets the base image for the build environment.
/// </summary>
public string? BaseImage { get; init; }
/// <summary>
/// Gets the Debian distribution.
/// </summary>
public string? Distribution { get; init; }
/// <summary>
/// Gets whether to cleanup temp files.
/// </summary>
public bool CleanupTempFiles { get; init; } = true;
/// <summary>
/// Validates the request.
/// </summary>
public void Validate()
{
if (Packages is not { Count: > 0 })
throw new ArgumentException("At least one package is required");
}
}
/// <summary>
/// Package specification for air-gap bundle.
/// </summary>
public sealed record AirGapPackageSpec
{
/// <summary>
/// Gets the package name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Gets the package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the source files.
/// </summary>
public List<string> SourceFiles { get; init; } = [];
/// <summary>
/// Gets the buildinfo path.
/// </summary>
public string? BuildinfoPath { get; init; }
}
/// <summary>
/// Air-gap bundle manifest.
/// </summary>
public sealed record AirGapBundleManifest
{
/// <summary>
/// Gets the manifest version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets when the bundle was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Gets the packages in the bundle.
/// </summary>
public required List<BundlePackageEntry> Packages { get; init; }
/// <summary>
/// Gets the files in the bundle.
/// </summary>
public required List<BundleFileEntry> Files { get; init; }
}
/// <summary>
/// Package entry in bundle manifest.
/// </summary>
public sealed record BundlePackageEntry
{
/// <summary>
/// Gets the package name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Gets the version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the buildinfo file path in bundle.
/// </summary>
public string? BuildinfoFile { get; init; }
}
/// <summary>
/// File entry in bundle manifest.
/// </summary>
public sealed record BundleFileEntry
{
/// <summary>
/// Gets the file path in bundle.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Gets the SHA-256 hash.
/// </summary>
public required string Sha256 { get; init; }
/// <summary>
/// Gets the file size.
/// </summary>
public long Size { get; init; }
}

View File

@@ -0,0 +1,439 @@
// -----------------------------------------------------------------------------
// DeterminismValidator.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-004 - Determinism Validation
// Description: Validates determinism of rebuilt binaries.
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Validates determinism of rebuilt binaries.
/// </summary>
public sealed class DeterminismValidator
{
private readonly ILogger<DeterminismValidator> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="DeterminismValidator"/> class.
/// </summary>
public DeterminismValidator(ILogger<DeterminismValidator> logger)
{
_logger = logger;
}
/// <summary>
/// Validates that a rebuilt binary is deterministic compared to the original.
/// </summary>
public async Task<DeterminismReport> ValidateAsync(
string originalPath,
string rebuiltPath,
DeterminismValidationOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= DeterminismValidationOptions.Default;
var issues = new List<DeterminismIssue>();
// Check file existence
if (!File.Exists(originalPath))
{
return DeterminismReport.Failed("Original file not found", originalPath, rebuiltPath);
}
if (!File.Exists(rebuiltPath))
{
return DeterminismReport.Failed("Rebuilt file not found", originalPath, rebuiltPath);
}
var originalInfo = new FileInfo(originalPath);
var rebuiltInfo = new FileInfo(rebuiltPath);
// Size check
if (originalInfo.Length != rebuiltInfo.Length)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.SizeMismatch,
Description = $"Size mismatch: original={originalInfo.Length}, rebuilt={rebuiltInfo.Length}",
Severity = IssueSeverity.Error
});
}
// Hash comparison
var originalHash = await ComputeSha256Async(originalPath, cancellationToken);
var rebuiltHash = await ComputeSha256Async(rebuiltPath, cancellationToken);
var hashMatches = string.Equals(originalHash, rebuiltHash, StringComparison.OrdinalIgnoreCase);
if (!hashMatches)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.HashMismatch,
Description = $"SHA-256 mismatch: original={originalHash}, rebuilt={rebuiltHash}",
Severity = IssueSeverity.Error
});
// Perform deeper analysis if hashes don't match
if (options.PerformDeepAnalysis)
{
var deepIssues = await PerformDeepAnalysisAsync(originalPath, rebuiltPath, cancellationToken);
issues.AddRange(deepIssues);
}
}
var isReproducible = hashMatches && !issues.Any(i => i.Severity == IssueSeverity.Error);
_logger.LogInformation(
"Determinism validation for {Original} vs {Rebuilt}: {Result}",
Path.GetFileName(originalPath),
Path.GetFileName(rebuiltPath),
isReproducible ? "REPRODUCIBLE" : "NOT REPRODUCIBLE");
return new DeterminismReport
{
IsReproducible = isReproducible,
OriginalPath = originalPath,
RebuiltPath = rebuiltPath,
OriginalSha256 = originalHash,
RebuiltSha256 = rebuiltHash,
Issues = issues,
ValidatedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Validates multiple rebuilt artifacts against their originals.
/// </summary>
public async Task<DeterminismBatchReport> ValidateBatchAsync(
IReadOnlyList<(string Original, string Rebuilt)> pairs,
DeterminismValidationOptions? options = null,
CancellationToken cancellationToken = default)
{
var reports = new List<DeterminismReport>();
foreach (var (original, rebuilt) in pairs)
{
var report = await ValidateAsync(original, rebuilt, options, cancellationToken);
reports.Add(report);
}
return new DeterminismBatchReport
{
Reports = reports,
TotalCount = reports.Count,
ReproducibleCount = reports.Count(r => r.IsReproducible),
ValidatedAt = DateTimeOffset.UtcNow
};
}
private async Task<IReadOnlyList<DeterminismIssue>> PerformDeepAnalysisAsync(
string originalPath,
string rebuiltPath,
CancellationToken ct)
{
var issues = new List<DeterminismIssue>();
try
{
// Read both files
var originalBytes = await File.ReadAllBytesAsync(originalPath, ct);
var rebuiltBytes = await File.ReadAllBytesAsync(rebuiltPath, ct);
// Find first difference offset
var minLen = Math.Min(originalBytes.Length, rebuiltBytes.Length);
var firstDiffOffset = -1;
var diffCount = 0;
for (var i = 0; i < minLen; i++)
{
if (originalBytes[i] != rebuiltBytes[i])
{
if (firstDiffOffset < 0) firstDiffOffset = i;
diffCount++;
}
}
if (firstDiffOffset >= 0)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.ByteDifference,
Description = $"First difference at offset 0x{firstDiffOffset:X}, total {diffCount} differing bytes",
Severity = IssueSeverity.Info,
Details = new Dictionary<string, object>
{
["firstDiffOffset"] = firstDiffOffset,
["diffCount"] = diffCount,
["diffPercentage"] = Math.Round(100.0 * diffCount / minLen, 2)
}
});
}
// Check for common non-determinism patterns
var patterns = DetectNonDeterminismPatterns(originalBytes, rebuiltBytes);
issues.AddRange(patterns);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Deep analysis failed");
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.AnalysisError,
Description = $"Deep analysis failed: {ex.Message}",
Severity = IssueSeverity.Warning
});
}
return issues;
}
private static IEnumerable<DeterminismIssue> DetectNonDeterminismPatterns(
byte[] original,
byte[] rebuilt)
{
var issues = new List<DeterminismIssue>();
// Check for timestamp-like patterns (32-bit Unix timestamps)
// This is a simplified heuristic
if (original.Length >= 4 && rebuilt.Length >= 4)
{
// Look for differences that could be timestamps
var now = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
var oneYearAgo = now - 365 * 24 * 3600;
for (var i = 0; i < Math.Min(original.Length, rebuilt.Length) - 4; i += 4)
{
var origVal = BitConverter.ToUInt32(original, i);
var rebuildVal = BitConverter.ToUInt32(rebuilt, i);
if (origVal != rebuildVal &&
origVal > oneYearAgo && origVal < now + 86400 &&
rebuildVal > oneYearAgo && rebuildVal < now + 86400)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.EmbeddedTimestamp,
Description = $"Possible embedded timestamp at offset 0x{i:X}",
Severity = IssueSeverity.Info,
Details = new Dictionary<string, object>
{
["offset"] = i,
["originalValue"] = origVal,
["rebuiltValue"] = rebuildVal
}
});
break; // Only report first occurrence
}
}
}
return issues;
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Options for determinism validation.
/// </summary>
public sealed record DeterminismValidationOptions
{
/// <summary>
/// Gets whether to perform deep binary analysis.
/// </summary>
public bool PerformDeepAnalysis { get; init; } = true;
/// <summary>
/// Gets whether to check for timestamp patterns.
/// </summary>
public bool DetectTimestamps { get; init; } = true;
/// <summary>
/// Gets whether to check for build path patterns.
/// </summary>
public bool DetectBuildPaths { get; init; } = true;
/// <summary>
/// Gets the default options.
/// </summary>
public static DeterminismValidationOptions Default { get; } = new();
}
/// <summary>
/// Report from determinism validation.
/// </summary>
public sealed record DeterminismReport
{
/// <summary>
/// Gets whether the rebuild is reproducible.
/// </summary>
public required bool IsReproducible { get; init; }
/// <summary>
/// Gets the original file path.
/// </summary>
public required string OriginalPath { get; init; }
/// <summary>
/// Gets the rebuilt file path.
/// </summary>
public required string RebuiltPath { get; init; }
/// <summary>
/// Gets the original file SHA-256.
/// </summary>
public string? OriginalSha256 { get; init; }
/// <summary>
/// Gets the rebuilt file SHA-256.
/// </summary>
public string? RebuiltSha256 { get; init; }
/// <summary>
/// Gets the list of issues found.
/// </summary>
public IReadOnlyList<DeterminismIssue>? Issues { get; init; }
/// <summary>
/// Gets when validation was performed.
/// </summary>
public DateTimeOffset ValidatedAt { get; init; }
/// <summary>
/// Gets error message if validation failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Creates a failed report.
/// </summary>
public static DeterminismReport Failed(string error, string original, string rebuilt) => new()
{
IsReproducible = false,
OriginalPath = original,
RebuiltPath = rebuilt,
Error = error,
ValidatedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Batch report from determinism validation.
/// </summary>
public sealed record DeterminismBatchReport
{
/// <summary>
/// Gets the individual reports.
/// </summary>
public required IReadOnlyList<DeterminismReport> Reports { get; init; }
/// <summary>
/// Gets the total count.
/// </summary>
public required int TotalCount { get; init; }
/// <summary>
/// Gets the count of reproducible builds.
/// </summary>
public required int ReproducibleCount { get; init; }
/// <summary>
/// Gets the reproducibility rate.
/// </summary>
public double ReproducibilityRate => TotalCount > 0 ? (double)ReproducibleCount / TotalCount : 0;
/// <summary>
/// Gets when validation was performed.
/// </summary>
public DateTimeOffset ValidatedAt { get; init; }
}
/// <summary>
/// A determinism issue.
/// </summary>
public sealed record DeterminismIssue
{
/// <summary>
/// Gets the issue type.
/// </summary>
public required DeterminismIssueType Type { get; init; }
/// <summary>
/// Gets the issue description.
/// </summary>
public required string Description { get; init; }
/// <summary>
/// Gets the severity.
/// </summary>
public required IssueSeverity Severity { get; init; }
/// <summary>
/// Gets additional details.
/// </summary>
public IReadOnlyDictionary<string, object>? Details { get; init; }
}
/// <summary>
/// Type of determinism issue.
/// </summary>
public enum DeterminismIssueType
{
/// <summary>
/// File size mismatch.
/// </summary>
SizeMismatch,
/// <summary>
/// Hash mismatch.
/// </summary>
HashMismatch,
/// <summary>
/// Byte-level difference.
/// </summary>
ByteDifference,
/// <summary>
/// Embedded timestamp detected.
/// </summary>
EmbeddedTimestamp,
/// <summary>
/// Embedded build path detected.
/// </summary>
EmbeddedBuildPath,
/// <summary>
/// Analysis error.
/// </summary>
AnalysisError
}
/// <summary>
/// Severity of an issue.
/// </summary>
public enum IssueSeverity
{
/// <summary>
/// Informational.
/// </summary>
Info,
/// <summary>
/// Warning.
/// </summary>
Warning,
/// <summary>
/// Error.
/// </summary>
Error
}

View File

@@ -0,0 +1,93 @@
// -----------------------------------------------------------------------------
// IRebuildService.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-001 - Rebuild Service Abstractions
// Description: Main interface for reproducible rebuild orchestration.
// -----------------------------------------------------------------------------
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Service for orchestrating reproducible binary rebuilds.
/// </summary>
public interface IRebuildService
{
/// <summary>
/// Requests a rebuild for a package.
/// </summary>
/// <param name="request">The rebuild request.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild job ID.</returns>
Task<string> RequestRebuildAsync(
RebuildRequest request,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the status of a rebuild job.
/// </summary>
/// <param name="jobId">The job ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild status.</returns>
Task<RebuildStatus> GetStatusAsync(
string jobId,
CancellationToken cancellationToken = default);
/// <summary>
/// Downloads the artifacts from a completed rebuild.
/// </summary>
/// <param name="jobId">The job ID.</param>
/// <param name="outputDirectory">The directory to write artifacts.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild result with artifacts.</returns>
Task<RebuildResult> DownloadArtifactsAsync(
string jobId,
string outputDirectory,
CancellationToken cancellationToken = default);
/// <summary>
/// Performs a local rebuild using a .buildinfo file.
/// </summary>
/// <param name="buildinfoPath">Path to the .buildinfo file.</param>
/// <param name="options">Local rebuild options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild result.</returns>
Task<RebuildResult> RebuildLocalAsync(
string buildinfoPath,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Queries if a package has existing rebuild data.
/// </summary>
/// <param name="package">Package name.</param>
/// <param name="version">Package version.</param>
/// <param name="architecture">Target architecture.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Existing rebuild info if available.</returns>
Task<RebuildInfo?> QueryExistingRebuildAsync(
string package,
string version,
string architecture,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Rebuild backend type.
/// </summary>
public enum RebuildBackend
{
/// <summary>
/// Remote rebuild via reproduce.debian.net.
/// </summary>
ReproduceDebian,
/// <summary>
/// Local container-based rebuild.
/// </summary>
Local,
/// <summary>
/// Air-gapped rebuild from pre-fetched bundle.
/// </summary>
AirGap
}

View File

@@ -0,0 +1,459 @@
// -----------------------------------------------------------------------------
// LocalRebuildBackend.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-003 - Local Rebuild Backend
// Description: Container-based local rebuild using .buildinfo files.
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Local container-based rebuild backend.
/// </summary>
public sealed partial class LocalRebuildBackend
{
private readonly LocalRebuildBackendOptions _options;
private readonly ILogger<LocalRebuildBackend> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="LocalRebuildBackend"/> class.
/// </summary>
public LocalRebuildBackend(
IOptions<LocalRebuildBackendOptions> options,
ILogger<LocalRebuildBackend> logger)
{
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Performs a local rebuild using a .buildinfo file.
/// </summary>
public async Task<RebuildResult> RebuildAsync(
string buildinfoPath,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= new LocalRebuildOptions();
var jobId = Guid.NewGuid().ToString("N")[..12];
var sw = Stopwatch.StartNew();
var buildLog = new StringBuilder();
try
{
// Parse .buildinfo file
var buildinfo = await ParseBuildinfoAsync(buildinfoPath, cancellationToken);
buildLog.AppendLine($"Parsed buildinfo: {buildinfo.Source} {buildinfo.Version}");
_logger.LogInformation("Starting local rebuild for {Package} {Version}", buildinfo.Source, buildinfo.Version);
// Create build directory
var buildDir = Path.Combine(
options.OutputDirectory ?? Path.GetTempPath(),
$"rebuild-{jobId}");
Directory.CreateDirectory(buildDir);
// Generate Dockerfile
var dockerfile = GenerateDockerfile(buildinfo, options);
var dockerfilePath = Path.Combine(buildDir, "Dockerfile");
await File.WriteAllTextAsync(dockerfilePath, dockerfile, cancellationToken);
buildLog.AppendLine($"Generated Dockerfile at {dockerfilePath}");
// Generate build script
var buildScript = GenerateBuildScript(buildinfo);
var buildScriptPath = Path.Combine(buildDir, "build.sh");
await File.WriteAllTextAsync(buildScriptPath, buildScript, cancellationToken);
// Build container
var containerName = $"stella-rebuild-{jobId}";
var imageName = $"stella-rebuild-{buildinfo.Source}-{jobId}";
var runtime = options.ContainerRuntime == ContainerRuntime.Podman ? "podman" : "docker";
buildLog.AppendLine("Building container image...");
var buildImageResult = await RunContainerCommandAsync(
runtime,
$"build -t {imageName} {buildDir}",
options.Timeout,
cancellationToken);
if (!buildImageResult.Success)
{
return RebuildResult.Failed(jobId, "Container image build failed", buildImageResult.Output, RebuildBackend.Local);
}
buildLog.AppendLine(buildImageResult.Output);
// Run build container
buildLog.AppendLine("Running rebuild in container...");
var runArgs = new StringBuilder($"run --name {containerName} --rm");
if (options.CpuLimit.HasValue)
{
runArgs.Append($" --cpus={options.CpuLimit}");
}
if (!string.IsNullOrEmpty(options.MemoryLimit))
{
runArgs.Append($" --memory={options.MemoryLimit}");
}
runArgs.Append($" -v {buildDir}/output:/output {imageName}");
Directory.CreateDirectory(Path.Combine(buildDir, "output"));
var runResult = await RunContainerCommandAsync(
runtime,
runArgs.ToString(),
options.Timeout,
cancellationToken);
buildLog.AppendLine(runResult.Output);
if (!runResult.Success)
{
return RebuildResult.Failed(jobId, "Build execution failed", buildLog.ToString(), RebuildBackend.Local);
}
// Collect artifacts
var outputDir = Path.Combine(buildDir, "output");
var artifacts = await CollectArtifactsAsync(outputDir, cancellationToken);
// Verify checksums
var checksumResults = await VerifyChecksumsAsync(artifacts, buildinfo, cancellationToken);
var reproducible = checksumResults.All(c => c.Matches);
sw.Stop();
_logger.LogInformation(
"Rebuild completed: {Package} {Version} - Reproducible: {Reproducible}",
buildinfo.Source, buildinfo.Version, reproducible);
return new RebuildResult
{
JobId = jobId,
Success = true,
Reproducible = reproducible,
Artifacts = artifacts,
BuildLog = buildLog.ToString(),
Duration = sw.Elapsed,
Backend = RebuildBackend.Local,
ChecksumResults = checksumResults,
BuildinfoPath = buildinfoPath
};
}
catch (Exception ex)
{
sw.Stop();
_logger.LogError(ex, "Local rebuild failed for {BuildinfoPath}", buildinfoPath);
return RebuildResult.Failed(jobId, ex.Message, buildLog.ToString(), RebuildBackend.Local);
}
}
private async Task<BuildinfoData> ParseBuildinfoAsync(string path, CancellationToken ct)
{
var content = await File.ReadAllTextAsync(path, ct);
var data = new BuildinfoData();
foreach (var line in content.Split('\n'))
{
var colonIdx = line.IndexOf(':');
if (colonIdx < 0) continue;
var key = line[..colonIdx].Trim();
var value = line[(colonIdx + 1)..].Trim();
switch (key)
{
case "Source":
data.Source = value;
break;
case "Version":
data.Version = value;
break;
case "Architecture":
data.Architecture = value;
break;
case "Build-Origin":
data.BuildOrigin = value;
break;
case "Build-Architecture":
data.BuildArchitecture = value;
break;
case "Build-Date":
data.BuildDate = value;
break;
case "Build-Path":
data.BuildPath = value;
break;
case "Installed-Build-Depends":
data.InstalledBuildDepends = value.Split(',').Select(d => d.Trim()).ToList();
break;
case "Environment":
// Parse environment variables
break;
case "Checksums-Sha256":
// Parse checksums - handled in subsequent lines
break;
default:
// Check for checksum lines (start with space)
if (line.StartsWith(' ') && data.Checksums is not null)
{
var parts = line.Trim().Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (parts.Length >= 3)
{
data.Checksums[parts[2]] = parts[0];
}
}
break;
}
// Initialize checksums dict when we hit that section
if (key == "Checksums-Sha256")
{
data.Checksums = new Dictionary<string, string>();
}
}
return data;
}
private string GenerateDockerfile(BuildinfoData buildinfo, LocalRebuildOptions options)
{
var baseImage = options.BaseImage ?? _options.DefaultBaseImage;
var sb = new StringBuilder();
sb.AppendLine($"FROM {baseImage}");
sb.AppendLine();
sb.AppendLine("# Install build dependencies");
sb.AppendLine("RUN apt-get update && apt-get install -y \\");
sb.AppendLine(" build-essential \\");
sb.AppendLine(" devscripts \\");
sb.AppendLine(" dpkg-dev \\");
sb.AppendLine(" fakeroot \\");
sb.AppendLine(" debhelper \\");
// Add package-specific build dependencies
if (buildinfo.InstalledBuildDepends is { Count: > 0 })
{
foreach (var dep in buildinfo.InstalledBuildDepends.Take(20)) // Limit for Dockerfile length
{
// Extract package name without version constraint
var match = PackageNameRegex().Match(dep);
if (match.Success)
{
sb.AppendLine($" {match.Groups[1].Value} \\");
}
}
}
sb.AppendLine(" && rm -rf /var/lib/apt/lists/*");
sb.AppendLine();
// Set up build environment
if (!string.IsNullOrEmpty(buildinfo.BuildPath))
{
sb.AppendLine($"WORKDIR {buildinfo.BuildPath}");
}
else
{
sb.AppendLine("WORKDIR /build");
}
sb.AppendLine();
sb.AppendLine("# Copy build script");
sb.AppendLine("COPY build.sh /build/build.sh");
sb.AppendLine("RUN chmod +x /build/build.sh");
sb.AppendLine();
sb.AppendLine("CMD [\"/build/build.sh\"]");
return sb.ToString();
}
private static string GenerateBuildScript(BuildinfoData buildinfo)
{
var sb = new StringBuilder();
sb.AppendLine("#!/bin/bash");
sb.AppendLine("set -ex");
sb.AppendLine();
sb.AppendLine("# Fetch source package");
sb.AppendLine($"apt-get source {buildinfo.Source}={buildinfo.Version}");
sb.AppendLine();
sb.AppendLine($"cd {buildinfo.Source}-*");
sb.AppendLine();
sb.AppendLine("# Build package");
sb.AppendLine("dpkg-buildpackage -b -uc -us");
sb.AppendLine();
sb.AppendLine("# Copy artifacts to output");
sb.AppendLine("cp ../*.deb /output/ || true");
sb.AppendLine("cp ../*.buildinfo /output/ || true");
sb.AppendLine("cp ../*.changes /output/ || true");
return sb.ToString();
}
private async Task<(bool Success, string Output)> RunContainerCommandAsync(
string runtime,
string args,
TimeSpan timeout,
CancellationToken ct)
{
var psi = new ProcessStartInfo
{
FileName = runtime,
Arguments = args,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var process = new Process { StartInfo = psi };
var output = new StringBuilder();
process.OutputDataReceived += (_, e) =>
{
if (e.Data is not null) output.AppendLine(e.Data);
};
process.ErrorDataReceived += (_, e) =>
{
if (e.Data is not null) output.AppendLine(e.Data);
};
process.Start();
process.BeginOutputReadLine();
process.BeginErrorReadLine();
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(timeout);
try
{
await process.WaitForExitAsync(cts.Token);
return (process.ExitCode == 0, output.ToString());
}
catch (OperationCanceledException)
{
process.Kill(true);
return (false, output.ToString() + "\n[TIMEOUT]");
}
}
private static async Task<List<RebuildArtifact>> CollectArtifactsAsync(string outputDir, CancellationToken ct)
{
var artifacts = new List<RebuildArtifact>();
if (!Directory.Exists(outputDir))
{
return artifacts;
}
foreach (var file in Directory.GetFiles(outputDir))
{
var fileInfo = new FileInfo(file);
var hash = await ComputeSha256Async(file, ct);
artifacts.Add(new RebuildArtifact
{
Filename = fileInfo.Name,
Path = file,
Size = fileInfo.Length,
Sha256 = hash,
Type = InferArtifactType(fileInfo.Name),
HasDwarfSymbols = await HasDwarfSymbolsAsync(file, ct)
});
}
return artifacts;
}
private static async Task<IReadOnlyList<ChecksumVerification>> VerifyChecksumsAsync(
IReadOnlyList<RebuildArtifact> artifacts,
BuildinfoData buildinfo,
CancellationToken ct)
{
var results = new List<ChecksumVerification>();
foreach (var artifact in artifacts)
{
var expected = buildinfo.Checksums?.GetValueOrDefault(artifact.Filename) ?? "unknown";
results.Add(new ChecksumVerification
{
Filename = artifact.Filename,
ExpectedSha256 = expected,
ActualSha256 = artifact.Sha256
});
}
return results;
}
private static RebuildArtifactType InferArtifactType(string filename)
{
if (filename.EndsWith("-dbgsym.deb", StringComparison.OrdinalIgnoreCase))
return RebuildArtifactType.DebugSymbols;
if (filename.EndsWith(".deb", StringComparison.OrdinalIgnoreCase))
return RebuildArtifactType.DebPackage;
if (filename.EndsWith(".log", StringComparison.OrdinalIgnoreCase))
return RebuildArtifactType.BuildLog;
return RebuildArtifactType.Other;
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static Task<bool> HasDwarfSymbolsAsync(string filePath, CancellationToken ct)
{
// Would use libelf or readelf to check for DWARF sections
// For now, assume .deb files may have symbols
return Task.FromResult(filePath.EndsWith(".deb", StringComparison.OrdinalIgnoreCase));
}
[GeneratedRegex(@"^([a-z0-9][a-z0-9+.-]+)")]
private static partial Regex PackageNameRegex();
}
/// <summary>
/// Options for local rebuild backend.
/// </summary>
public sealed record LocalRebuildBackendOptions
{
/// <summary>
/// Gets the default base image for builds.
/// </summary>
public string DefaultBaseImage { get; init; } = "debian:bookworm";
/// <summary>
/// Gets the container runtime.
/// </summary>
public ContainerRuntime ContainerRuntime { get; init; } = ContainerRuntime.Docker;
/// <summary>
/// Gets the default timeout.
/// </summary>
public TimeSpan DefaultTimeout { get; init; } = TimeSpan.FromHours(2);
}
/// <summary>
/// Parsed .buildinfo data.
/// </summary>
internal sealed class BuildinfoData
{
public string Source { get; set; } = "";
public string Version { get; set; } = "";
public string Architecture { get; set; } = "";
public string? BuildOrigin { get; set; }
public string? BuildArchitecture { get; set; }
public string? BuildDate { get; set; }
public string? BuildPath { get; set; }
public List<string>? InstalledBuildDepends { get; set; }
public Dictionary<string, string>? Checksums { get; set; }
}

View File

@@ -0,0 +1,458 @@
// -----------------------------------------------------------------------------
// RebuildModels.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-001 - Rebuild Service Abstractions
// Description: Request/response models for reproducible rebuilds.
// -----------------------------------------------------------------------------
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Request for a reproducible rebuild.
/// </summary>
public sealed record RebuildRequest
{
/// <summary>
/// Gets the package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Gets the package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the target architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the distribution (e.g., "bookworm", "sid").
/// </summary>
public string? Distribution { get; init; }
/// <summary>
/// Gets the preferred rebuild backend.
/// </summary>
public RebuildBackend PreferredBackend { get; init; } = RebuildBackend.ReproduceDebian;
/// <summary>
/// Gets the path to a .buildinfo file (for local rebuilds).
/// </summary>
public string? BuildinfoPath { get; init; }
/// <summary>
/// Gets custom build environment variables.
/// </summary>
public IReadOnlyDictionary<string, string>? EnvironmentVariables { get; init; }
/// <summary>
/// Gets the timeout for the rebuild operation.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromHours(2);
/// <summary>
/// Gets whether to verify checksums after rebuild.
/// </summary>
public bool VerifyChecksums { get; init; } = true;
/// <summary>
/// Validates the request.
/// </summary>
public void Validate()
{
if (string.IsNullOrWhiteSpace(Package))
throw new ArgumentException("Package name is required");
if (string.IsNullOrWhiteSpace(Version))
throw new ArgumentException("Version is required");
if (string.IsNullOrWhiteSpace(Architecture))
throw new ArgumentException("Architecture is required");
}
}
/// <summary>
/// Result of a reproducible rebuild.
/// </summary>
public sealed record RebuildResult
{
/// <summary>
/// Gets the job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets whether the rebuild was successful.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Gets whether the rebuild was byte-identical to the original.
/// </summary>
public bool? Reproducible { get; init; }
/// <summary>
/// Gets the rebuilt artifacts.
/// </summary>
public IReadOnlyList<RebuildArtifact>? Artifacts { get; init; }
/// <summary>
/// Gets the build log.
/// </summary>
public string? BuildLog { get; init; }
/// <summary>
/// Gets error message if failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Gets the build duration.
/// </summary>
public TimeSpan? Duration { get; init; }
/// <summary>
/// Gets the backend that was used.
/// </summary>
public RebuildBackend Backend { get; init; }
/// <summary>
/// Gets checksum verification results.
/// </summary>
public IReadOnlyList<ChecksumVerification>? ChecksumResults { get; init; }
/// <summary>
/// Gets the .buildinfo file used.
/// </summary>
public string? BuildinfoPath { get; init; }
/// <summary>
/// Creates a successful result.
/// </summary>
public static RebuildResult Successful(
string jobId,
IReadOnlyList<RebuildArtifact> artifacts,
bool reproducible,
RebuildBackend backend) => new()
{
JobId = jobId,
Success = true,
Reproducible = reproducible,
Artifacts = artifacts,
Backend = backend
};
/// <summary>
/// Creates a failed result.
/// </summary>
public static RebuildResult Failed(
string jobId,
string error,
string? buildLog = null,
RebuildBackend backend = RebuildBackend.Local) => new()
{
JobId = jobId,
Success = false,
Error = error,
BuildLog = buildLog,
Backend = backend
};
}
/// <summary>
/// A rebuilt artifact.
/// </summary>
public sealed record RebuildArtifact
{
/// <summary>
/// Gets the artifact filename.
/// </summary>
public required string Filename { get; init; }
/// <summary>
/// Gets the local path to the artifact.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Gets the artifact size in bytes.
/// </summary>
public required long Size { get; init; }
/// <summary>
/// Gets the SHA-256 hash of the artifact.
/// </summary>
public required string Sha256 { get; init; }
/// <summary>
/// Gets the artifact type.
/// </summary>
public RebuildArtifactType Type { get; init; }
/// <summary>
/// Gets whether DWARF symbols are present.
/// </summary>
public bool HasDwarfSymbols { get; init; }
}
/// <summary>
/// Type of rebuild artifact.
/// </summary>
public enum RebuildArtifactType
{
/// <summary>
/// Debian binary package (.deb).
/// </summary>
DebPackage,
/// <summary>
/// Debug symbols package (-dbgsym.deb).
/// </summary>
DebugSymbols,
/// <summary>
/// ELF binary.
/// </summary>
ElfBinary,
/// <summary>
/// Shared library.
/// </summary>
SharedLibrary,
/// <summary>
/// Build log.
/// </summary>
BuildLog,
/// <summary>
/// Other artifact type.
/// </summary>
Other
}
/// <summary>
/// Status of a rebuild job.
/// </summary>
public sealed record RebuildStatus
{
/// <summary>
/// Gets the job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets the current state.
/// </summary>
public required RebuildState State { get; init; }
/// <summary>
/// Gets progress percentage (0-100).
/// </summary>
public int? Progress { get; init; }
/// <summary>
/// Gets the current stage description.
/// </summary>
public string? CurrentStage { get; init; }
/// <summary>
/// Gets when the job was started.
/// </summary>
public DateTimeOffset? StartedAt { get; init; }
/// <summary>
/// Gets estimated completion time.
/// </summary>
public DateTimeOffset? EstimatedCompletion { get; init; }
/// <summary>
/// Gets error message if failed.
/// </summary>
public string? Error { get; init; }
}
/// <summary>
/// State of a rebuild job.
/// </summary>
public enum RebuildState
{
/// <summary>
/// Job is queued.
/// </summary>
Queued,
/// <summary>
/// Fetching source packages.
/// </summary>
FetchingSources,
/// <summary>
/// Setting up build environment.
/// </summary>
SettingUpEnvironment,
/// <summary>
/// Building.
/// </summary>
Building,
/// <summary>
/// Verifying checksums.
/// </summary>
Verifying,
/// <summary>
/// Extracting symbols.
/// </summary>
ExtractingSymbols,
/// <summary>
/// Completed successfully.
/// </summary>
Completed,
/// <summary>
/// Failed.
/// </summary>
Failed,
/// <summary>
/// Cancelled.
/// </summary>
Cancelled
}
/// <summary>
/// Existing rebuild information.
/// </summary>
public sealed record RebuildInfo
{
/// <summary>
/// Gets the job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets the package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Gets the package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets whether it was reproducible.
/// </summary>
public bool Reproducible { get; init; }
/// <summary>
/// Gets when the rebuild was performed.
/// </summary>
public required DateTimeOffset BuiltAt { get; init; }
/// <summary>
/// Gets the backend that was used.
/// </summary>
public RebuildBackend Backend { get; init; }
/// <summary>
/// Gets the artifact checksums.
/// </summary>
public IReadOnlyDictionary<string, string>? ArtifactChecksums { get; init; }
}
/// <summary>
/// Checksum verification result.
/// </summary>
public sealed record ChecksumVerification
{
/// <summary>
/// Gets the artifact filename.
/// </summary>
public required string Filename { get; init; }
/// <summary>
/// Gets the expected checksum from .buildinfo.
/// </summary>
public required string ExpectedSha256 { get; init; }
/// <summary>
/// Gets the actual checksum of rebuilt artifact.
/// </summary>
public required string ActualSha256 { get; init; }
/// <summary>
/// Gets whether the checksums match.
/// </summary>
public bool Matches => string.Equals(ExpectedSha256, ActualSha256, StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Options for local rebuilds.
/// </summary>
public sealed record LocalRebuildOptions
{
/// <summary>
/// Gets the container runtime to use.
/// </summary>
public ContainerRuntime ContainerRuntime { get; init; } = ContainerRuntime.Docker;
/// <summary>
/// Gets the base image for the build container.
/// </summary>
public string? BaseImage { get; init; }
/// <summary>
/// Gets the directory for build outputs.
/// </summary>
public string? OutputDirectory { get; init; }
/// <summary>
/// Gets whether to keep the build container after completion.
/// </summary>
public bool KeepContainer { get; init; } = false;
/// <summary>
/// Gets whether to extract debug symbols.
/// </summary>
public bool ExtractSymbols { get; init; } = true;
/// <summary>
/// Gets the build timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromHours(2);
/// <summary>
/// Gets CPU limit for the container.
/// </summary>
public int? CpuLimit { get; init; }
/// <summary>
/// Gets memory limit for the container.
/// </summary>
public string? MemoryLimit { get; init; }
}
/// <summary>
/// Container runtime for local builds.
/// </summary>
public enum ContainerRuntime
{
/// <summary>
/// Docker.
/// </summary>
Docker,
/// <summary>
/// Podman.
/// </summary>
Podman
}

View File

@@ -0,0 +1,173 @@
// -----------------------------------------------------------------------------
// RebuildService.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-001 through REPR-007 - Service Orchestration
// Description: Main rebuild service orchestrating all backends.
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Main rebuild service implementation.
/// </summary>
public sealed class RebuildService : IRebuildService
{
private readonly ReproduceDebianClient _reproduceDebianClient;
private readonly LocalRebuildBackend _localBackend;
private readonly AirGapRebuildBundleService _airGapService;
private readonly RebuildServiceOptions _options;
private readonly ILogger<RebuildService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="RebuildService"/> class.
/// </summary>
public RebuildService(
ReproduceDebianClient reproduceDebianClient,
LocalRebuildBackend localBackend,
AirGapRebuildBundleService airGapService,
IOptions<RebuildServiceOptions> options,
ILogger<RebuildService> logger)
{
_reproduceDebianClient = reproduceDebianClient;
_localBackend = localBackend;
_airGapService = airGapService;
_options = options.Value;
_logger = logger;
}
/// <inheritdoc />
public async Task<string> RequestRebuildAsync(
RebuildRequest request,
CancellationToken cancellationToken = default)
{
request.Validate();
_logger.LogInformation(
"Requesting rebuild for {Package} {Version} via {Backend}",
request.Package,
request.Version,
request.PreferredBackend);
// For now, generate a job ID and start the rebuild
var jobId = Guid.NewGuid().ToString("N")[..12];
// Store the request for status tracking
// In production, would persist to database
return jobId;
}
/// <inheritdoc />
public async Task<RebuildStatus> GetStatusAsync(
string jobId,
CancellationToken cancellationToken = default)
{
// In production, would query from database/job queue
return new RebuildStatus
{
JobId = jobId,
State = RebuildState.Queued,
CurrentStage = "Pending"
};
}
/// <inheritdoc />
public async Task<RebuildResult> DownloadArtifactsAsync(
string jobId,
string outputDirectory,
CancellationToken cancellationToken = default)
{
Directory.CreateDirectory(outputDirectory);
var artifacts = await _reproduceDebianClient.DownloadArtifactsAsync(
jobId,
outputDirectory,
cancellationToken);
return RebuildResult.Successful(
jobId,
artifacts,
artifacts.Count > 0,
RebuildBackend.ReproduceDebian);
}
/// <inheritdoc />
public async Task<RebuildResult> RebuildLocalAsync(
string buildinfoPath,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default)
{
if (!File.Exists(buildinfoPath))
{
return RebuildResult.Failed(
Guid.NewGuid().ToString("N")[..12],
$"Buildinfo file not found: {buildinfoPath}",
backend: RebuildBackend.Local);
}
return await _localBackend.RebuildAsync(buildinfoPath, options, cancellationToken);
}
/// <inheritdoc />
public async Task<RebuildInfo?> QueryExistingRebuildAsync(
string package,
string version,
string architecture,
CancellationToken cancellationToken = default)
{
_logger.LogDebug(
"Querying existing rebuild for {Package} {Version} {Arch}",
package, version, architecture);
var buildInfo = await _reproduceDebianClient.QueryBuildAsync(
package,
version,
architecture,
cancellationToken);
if (buildInfo is null)
{
return null;
}
return new RebuildInfo
{
JobId = buildInfo.Id,
Package = buildInfo.Package,
Version = buildInfo.Version,
Architecture = buildInfo.Architecture,
Reproducible = buildInfo.Reproducible,
BuiltAt = buildInfo.CompletedAt ?? buildInfo.StartedAt ?? DateTimeOffset.MinValue,
Backend = RebuildBackend.ReproduceDebian
};
}
}
/// <summary>
/// Configuration for the rebuild service.
/// </summary>
public sealed record RebuildServiceOptions
{
/// <summary>
/// Gets the default backend to use.
/// </summary>
public RebuildBackend DefaultBackend { get; init; } = RebuildBackend.ReproduceDebian;
/// <summary>
/// Gets the output directory for artifacts.
/// </summary>
public string OutputDirectory { get; init; } = Path.Combine(Path.GetTempPath(), "stella-rebuilds");
/// <summary>
/// Gets whether to prefer local rebuilds.
/// </summary>
public bool PreferLocalRebuild { get; init; } = false;
/// <summary>
/// Gets the job retention period.
/// </summary>
public TimeSpan JobRetention { get; init; } = TimeSpan.FromDays(30);
}

View File

@@ -0,0 +1,332 @@
// -----------------------------------------------------------------------------
// ReproduceDebianClient.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-002 - Reproduce.debian.net Integration
// Description: HTTP client for reproduce.debian.net API.
// -----------------------------------------------------------------------------
using System.Net.Http.Json;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Client for the reproduce.debian.net API.
/// </summary>
public sealed class ReproduceDebianClient
{
private readonly HttpClient _httpClient;
private readonly ReproduceDebianOptions _options;
private readonly ILogger<ReproduceDebianClient> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
/// <summary>
/// Initializes a new instance of the <see cref="ReproduceDebianClient"/> class.
/// </summary>
public ReproduceDebianClient(
HttpClient httpClient,
IOptions<ReproduceDebianOptions> options,
ILogger<ReproduceDebianClient> logger)
{
_httpClient = httpClient;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Queries for existing rebuild status of a package.
/// </summary>
public async Task<ReproduceDebianBuildInfo?> QueryBuildAsync(
string package,
string version,
string architecture,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(package)}";
var query = $"?version={Uri.EscapeDataString(version)}&arch={Uri.EscapeDataString(architecture)}";
_logger.LogDebug("Querying reproduce.debian.net for {Package} {Version} {Arch}", package, version, architecture);
try
{
var response = await _httpClient.GetAsync(url + query, cancellationToken);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
response.EnsureSuccessStatusCode();
return await response.Content.ReadFromJsonAsync<ReproduceDebianBuildInfo>(JsonOptions, cancellationToken);
}
catch (HttpRequestException ex)
{
_logger.LogWarning(ex, "Failed to query reproduce.debian.net for {Package}", package);
throw;
}
}
/// <summary>
/// Gets the build log for a completed build.
/// </summary>
public async Task<string?> GetBuildLogAsync(
string buildId,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(buildId)}/log";
_logger.LogDebug("Fetching build log for {BuildId}", buildId);
try
{
var response = await _httpClient.GetAsync(url, cancellationToken);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
response.EnsureSuccessStatusCode();
return await response.Content.ReadAsStringAsync(cancellationToken);
}
catch (HttpRequestException ex)
{
_logger.LogWarning(ex, "Failed to fetch build log for {BuildId}", buildId);
throw;
}
}
/// <summary>
/// Downloads artifacts from a completed build.
/// </summary>
public async Task<IReadOnlyList<RebuildArtifact>> DownloadArtifactsAsync(
string buildId,
string outputDirectory,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(buildId)}/artifacts";
_logger.LogDebug("Fetching artifact list for {BuildId}", buildId);
var listResponse = await _httpClient.GetAsync(url, cancellationToken);
listResponse.EnsureSuccessStatusCode();
var artifactList = await listResponse.Content.ReadFromJsonAsync<ReproduceDebianArtifactList>(JsonOptions, cancellationToken);
if (artifactList?.Artifacts is null || artifactList.Artifacts.Count == 0)
{
_logger.LogWarning("No artifacts found for build {BuildId}", buildId);
return [];
}
Directory.CreateDirectory(outputDirectory);
var results = new List<RebuildArtifact>();
foreach (var artifact in artifactList.Artifacts)
{
var artifactUrl = $"{url}/{Uri.EscapeDataString(artifact.Filename)}";
var outputPath = Path.Combine(outputDirectory, artifact.Filename);
_logger.LogDebug("Downloading artifact {Filename}", artifact.Filename);
using var downloadResponse = await _httpClient.GetAsync(artifactUrl, cancellationToken);
downloadResponse.EnsureSuccessStatusCode();
await using var fileStream = File.Create(outputPath);
await downloadResponse.Content.CopyToAsync(fileStream, cancellationToken);
var fileInfo = new FileInfo(outputPath);
results.Add(new RebuildArtifact
{
Filename = artifact.Filename,
Path = outputPath,
Size = fileInfo.Length,
Sha256 = artifact.Sha256 ?? await ComputeSha256Async(outputPath, cancellationToken),
Type = InferArtifactType(artifact.Filename)
});
}
_logger.LogInformation("Downloaded {Count} artifacts for build {BuildId}", results.Count, buildId);
return results;
}
/// <summary>
/// Lists all builds for a package.
/// </summary>
public async Task<IReadOnlyList<ReproduceDebianBuildInfo>> ListBuildsAsync(
string package,
int limit = 10,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(package)}?limit={limit}";
var response = await _httpClient.GetAsync(url, cancellationToken);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return [];
}
response.EnsureSuccessStatusCode();
var result = await response.Content.ReadFromJsonAsync<ReproduceDebianBuildList>(JsonOptions, cancellationToken);
return result?.Builds ?? [];
}
private static RebuildArtifactType InferArtifactType(string filename)
{
if (filename.EndsWith("-dbgsym.deb", StringComparison.OrdinalIgnoreCase) ||
filename.EndsWith("-dbg.deb", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.DebugSymbols;
}
if (filename.EndsWith(".deb", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.DebPackage;
}
if (filename.EndsWith(".so", StringComparison.OrdinalIgnoreCase) ||
filename.Contains(".so.", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.SharedLibrary;
}
if (filename.EndsWith(".log", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.BuildLog;
}
return RebuildArtifactType.Other;
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await System.Security.Cryptography.SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Configuration for reproduce.debian.net client.
/// </summary>
public sealed record ReproduceDebianOptions
{
/// <summary>
/// Gets the base URL for the API.
/// </summary>
public string BaseUrl { get; init; } = "https://reproduce.debian.net";
/// <summary>
/// Gets the request timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Gets the maximum retry count.
/// </summary>
public int MaxRetries { get; init; } = 3;
/// <summary>
/// Gets the delay between retries.
/// </summary>
public TimeSpan RetryDelay { get; init; } = TimeSpan.FromSeconds(5);
}
/// <summary>
/// Build info from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianBuildInfo
{
/// <summary>
/// Gets the build ID.
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Gets the package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Gets the version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the build status.
/// </summary>
public required string Status { get; init; }
/// <summary>
/// Gets whether the build was reproducible.
/// </summary>
public bool Reproducible { get; init; }
/// <summary>
/// Gets when the build was started.
/// </summary>
public DateTimeOffset? StartedAt { get; init; }
/// <summary>
/// Gets when the build completed.
/// </summary>
public DateTimeOffset? CompletedAt { get; init; }
/// <summary>
/// Gets the buildinfo file hash.
/// </summary>
public string? BuildinfoSha256 { get; init; }
}
/// <summary>
/// Build list from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianBuildList
{
/// <summary>
/// Gets the list of builds.
/// </summary>
public IReadOnlyList<ReproduceDebianBuildInfo>? Builds { get; init; }
}
/// <summary>
/// Artifact from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianArtifact
{
/// <summary>
/// Gets the filename.
/// </summary>
public required string Filename { get; init; }
/// <summary>
/// Gets the size.
/// </summary>
public long Size { get; init; }
/// <summary>
/// Gets the SHA-256 hash.
/// </summary>
public string? Sha256 { get; init; }
}
/// <summary>
/// Artifact list from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianArtifactList
{
/// <summary>
/// Gets the artifacts.
/// </summary>
public IReadOnlyList<ReproduceDebianArtifact>? Artifacts { get; init; }
}

View File

@@ -0,0 +1,70 @@
// -----------------------------------------------------------------------------
// ServiceCollectionExtensions.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-007 - CLI Commands & DI
// Description: Dependency injection registration for rebuild services.
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Extension methods for registering reproducible rebuild services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds reproducible rebuild services to the service collection.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configureReproduceDebian">Configuration for reproduce.debian.net client.</param>
/// <param name="configureLocalBackend">Configuration for local rebuild backend.</param>
/// <param name="configureService">Configuration for rebuild service.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddReproducibleRebuild(
this IServiceCollection services,
Action<ReproduceDebianOptions>? configureReproduceDebian = null,
Action<LocalRebuildBackendOptions>? configureLocalBackend = null,
Action<RebuildServiceOptions>? configureService = null)
{
// Register options
services.AddOptions<ReproduceDebianOptions>();
services.AddOptions<LocalRebuildBackendOptions>();
services.AddOptions<RebuildServiceOptions>();
if (configureReproduceDebian is not null)
{
services.Configure(configureReproduceDebian);
}
if (configureLocalBackend is not null)
{
services.Configure(configureLocalBackend);
}
if (configureService is not null)
{
services.Configure(configureService);
}
// Register HttpClient for reproduce.debian.net
services.AddHttpClient<ReproduceDebianClient>((sp, client) =>
{
var options = sp.GetService<Microsoft.Extensions.Options.IOptions<ReproduceDebianOptions>>()?.Value
?? new ReproduceDebianOptions();
client.BaseAddress = new Uri(options.BaseUrl);
client.Timeout = options.Timeout;
client.DefaultRequestHeaders.Add("User-Agent", "StellaOps-BinaryIndex/1.0");
});
// Register services
services.AddSingleton<LocalRebuildBackend>();
services.AddSingleton<AirGapRebuildBundleService>();
services.AddSingleton<DeterminismValidator>();
services.AddSingleton<SymbolExtractor>();
services.AddSingleton<IRebuildService, RebuildService>();
return services;
}
}

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>StellaOps.BinaryIndex.GroundTruth.Reproducible</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,577 @@
// -----------------------------------------------------------------------------
// SymbolExtractor.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-005 - Symbol Extraction from Rebuilds
// Description: Extracts DWARF symbols from rebuilt binaries.
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Extracts symbols from rebuilt binaries for ground-truth corpus.
/// </summary>
public sealed partial class SymbolExtractor
{
private readonly ILogger<SymbolExtractor> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="SymbolExtractor"/> class.
/// </summary>
public SymbolExtractor(ILogger<SymbolExtractor> logger)
{
_logger = logger;
}
/// <summary>
/// Extracts symbols from an ELF binary.
/// </summary>
public async Task<SymbolExtractionResult> ExtractAsync(
string binaryPath,
SymbolExtractionOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= SymbolExtractionOptions.Default;
var symbols = new List<ExtractedSymbol>();
if (!File.Exists(binaryPath))
{
return SymbolExtractionResult.Failed($"File not found: {binaryPath}");
}
try
{
// Check if file is ELF
if (!await IsElfBinaryAsync(binaryPath, cancellationToken))
{
return SymbolExtractionResult.Failed("Not an ELF binary");
}
// Extract symbols using nm
var nmSymbols = await ExtractWithNmAsync(binaryPath, cancellationToken);
symbols.AddRange(nmSymbols);
// Extract DWARF info using readelf/objdump if available
if (options.ExtractDwarf)
{
var dwarfInfo = await ExtractDwarfInfoAsync(binaryPath, cancellationToken);
// Enrich symbols with DWARF source info
EnrichWithDwarf(symbols, dwarfInfo);
}
_logger.LogInformation(
"Extracted {Count} symbols from {Path}",
symbols.Count,
Path.GetFileName(binaryPath));
return new SymbolExtractionResult
{
Success = true,
BinaryPath = binaryPath,
Symbols = symbols,
HasDwarf = symbols.Any(s => s.SourceFile is not null),
ExtractedAt = DateTimeOffset.UtcNow
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Symbol extraction failed for {Path}", binaryPath);
return SymbolExtractionResult.Failed(ex.Message);
}
}
/// <summary>
/// Creates ground-truth observations from extracted symbols.
/// </summary>
public IReadOnlyList<GroundTruthObservation> CreateObservations(
SymbolExtractionResult extraction,
RebuildResult rebuild)
{
if (!extraction.Success || extraction.Symbols is null)
{
return [];
}
var observations = new List<GroundTruthObservation>();
foreach (var symbol in extraction.Symbols)
{
observations.Add(new GroundTruthObservation
{
SymbolName = symbol.Name,
DemangledName = symbol.DemangledName,
Address = symbol.Address,
Size = symbol.Size,
Type = symbol.Type,
SourceFile = symbol.SourceFile,
SourceLine = symbol.SourceLine,
SourceId = "reproducible-rebuild",
BuildinfoPath = rebuild.BuildinfoPath,
ExtractedAt = extraction.ExtractedAt,
Provenance = new ObservationProvenance
{
JobId = rebuild.JobId,
Backend = rebuild.Backend.ToString(),
Reproducible = rebuild.Reproducible ?? false,
BinaryHash = extraction.BinarySha256
}
});
}
return observations;
}
private static async Task<bool> IsElfBinaryAsync(string path, CancellationToken ct)
{
var magic = new byte[4];
await using var stream = File.OpenRead(path);
var bytesRead = await stream.ReadAsync(magic, ct);
// ELF magic: 0x7F 'E' 'L' 'F'
return bytesRead == 4 &&
magic[0] == 0x7F &&
magic[1] == (byte)'E' &&
magic[2] == (byte)'L' &&
magic[3] == (byte)'F';
}
private async Task<IReadOnlyList<ExtractedSymbol>> ExtractWithNmAsync(
string binaryPath,
CancellationToken ct)
{
var symbols = new List<ExtractedSymbol>();
// Run nm to extract symbols
var (success, output) = await RunToolAsync("nm", $"-C -S --defined-only \"{binaryPath}\"", ct);
if (!success)
{
_logger.LogWarning("nm failed for {Path}, trying readelf", binaryPath);
return symbols;
}
// Parse nm output: address size type name
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
var match = NmOutputRegex().Match(line);
if (match.Success)
{
var address = Convert.ToUInt64(match.Groups[1].Value, 16);
var size = match.Groups[2].Success ? Convert.ToUInt64(match.Groups[2].Value, 16) : 0;
var type = match.Groups[3].Value;
var name = match.Groups[4].Value;
symbols.Add(new ExtractedSymbol
{
Name = name,
DemangledName = name, // nm -C already demangles
Address = address,
Size = size,
Type = MapNmType(type)
});
}
}
return symbols;
}
private async Task<DwarfInfo> ExtractDwarfInfoAsync(string binaryPath, CancellationToken ct)
{
var info = new DwarfInfo();
// Use readelf to check for DWARF sections
var (success, output) = await RunToolAsync("readelf", $"-S \"{binaryPath}\"", ct);
if (success)
{
info.HasDebugInfo = output.Contains(".debug_info");
info.HasDebugLine = output.Contains(".debug_line");
info.HasDebugAbbrev = output.Contains(".debug_abbrev");
}
// Extract source line info if available
if (info.HasDebugLine)
{
var (lineSuccess, lineOutput) = await RunToolAsync(
"readelf",
$"--debug-dump=decodedline \"{binaryPath}\"",
ct);
if (lineSuccess)
{
info.LineInfo = ParseLineInfo(lineOutput);
}
}
return info;
}
private static Dictionary<ulong, (string File, int Line)> ParseLineInfo(string output)
{
var result = new Dictionary<ulong, (string, int)>();
// Parse readelf --debug-dump=decodedline output
foreach (var line in output.Split('\n'))
{
// Format varies but typically: directory file line column address
var match = Regex.Match(line, @"0x([0-9a-f]+)\s+\d+\s+(\d+)\s+\d+\s+.*?([^\s/]+\.c(?:pp|xx)?)", RegexOptions.IgnoreCase);
if (match.Success)
{
var address = Convert.ToUInt64(match.Groups[1].Value, 16);
var lineNum = int.Parse(match.Groups[2].Value);
var file = match.Groups[3].Value;
result[address] = (file, lineNum);
}
}
return result;
}
private static void EnrichWithDwarf(List<ExtractedSymbol> symbols, DwarfInfo dwarfInfo)
{
if (dwarfInfo.LineInfo is null) return;
foreach (var symbol in symbols)
{
if (dwarfInfo.LineInfo.TryGetValue(symbol.Address, out var lineInfo))
{
symbol.SourceFile = lineInfo.File;
symbol.SourceLine = lineInfo.Line;
}
}
}
private static SymbolType MapNmType(string nmType)
{
return nmType.ToUpperInvariant() switch
{
"T" => SymbolType.Function,
"t" => SymbolType.LocalFunction,
"D" => SymbolType.Data,
"d" => SymbolType.LocalData,
"B" => SymbolType.Bss,
"b" => SymbolType.LocalBss,
"R" => SymbolType.ReadOnly,
"r" => SymbolType.LocalReadOnly,
"W" => SymbolType.Weak,
"w" => SymbolType.WeakUndefined,
_ => SymbolType.Other
};
}
private static async Task<(bool Success, string Output)> RunToolAsync(
string tool,
string args,
CancellationToken ct)
{
try
{
var psi = new ProcessStartInfo
{
FileName = tool,
Arguments = args,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var process = new Process { StartInfo = psi };
var output = new StringBuilder();
process.OutputDataReceived += (_, e) =>
{
if (e.Data is not null) output.AppendLine(e.Data);
};
process.Start();
process.BeginOutputReadLine();
await process.WaitForExitAsync(ct);
return (process.ExitCode == 0, output.ToString());
}
catch
{
return (false, string.Empty);
}
}
[GeneratedRegex(@"^([0-9a-f]+)\s+(?:([0-9a-f]+)\s+)?([A-Za-z])\s+(.+)$")]
private static partial Regex NmOutputRegex();
}
/// <summary>
/// Options for symbol extraction.
/// </summary>
public sealed record SymbolExtractionOptions
{
/// <summary>
/// Gets whether to extract DWARF information.
/// </summary>
public bool ExtractDwarf { get; init; } = true;
/// <summary>
/// Gets whether to demangle C++ names.
/// </summary>
public bool Demangle { get; init; } = true;
/// <summary>
/// Gets the default options.
/// </summary>
public static SymbolExtractionOptions Default { get; } = new();
}
/// <summary>
/// Result of symbol extraction.
/// </summary>
public sealed record SymbolExtractionResult
{
/// <summary>
/// Gets whether extraction was successful.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Gets the binary path.
/// </summary>
public string? BinaryPath { get; init; }
/// <summary>
/// Gets the binary SHA-256.
/// </summary>
public string? BinarySha256 { get; init; }
/// <summary>
/// Gets the extracted symbols.
/// </summary>
public IReadOnlyList<ExtractedSymbol>? Symbols { get; init; }
/// <summary>
/// Gets whether DWARF info was found.
/// </summary>
public bool HasDwarf { get; init; }
/// <summary>
/// Gets when extraction was performed.
/// </summary>
public DateTimeOffset ExtractedAt { get; init; }
/// <summary>
/// Gets error message if failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Creates a failed result.
/// </summary>
public static SymbolExtractionResult Failed(string error) => new()
{
Success = false,
Error = error,
ExtractedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// An extracted symbol.
/// </summary>
public sealed class ExtractedSymbol
{
/// <summary>
/// Gets the symbol name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Gets the demangled name.
/// </summary>
public string? DemangledName { get; init; }
/// <summary>
/// Gets the symbol address.
/// </summary>
public ulong Address { get; init; }
/// <summary>
/// Gets the symbol size.
/// </summary>
public ulong Size { get; init; }
/// <summary>
/// Gets the symbol type.
/// </summary>
public SymbolType Type { get; init; }
/// <summary>
/// Gets the source file (from DWARF).
/// </summary>
public string? SourceFile { get; set; }
/// <summary>
/// Gets the source line (from DWARF).
/// </summary>
public int? SourceLine { get; set; }
}
/// <summary>
/// Symbol type.
/// </summary>
public enum SymbolType
{
/// <summary>
/// Function (global).
/// </summary>
Function,
/// <summary>
/// Local function.
/// </summary>
LocalFunction,
/// <summary>
/// Data (global).
/// </summary>
Data,
/// <summary>
/// Local data.
/// </summary>
LocalData,
/// <summary>
/// BSS section (global).
/// </summary>
Bss,
/// <summary>
/// Local BSS.
/// </summary>
LocalBss,
/// <summary>
/// Read-only data (global).
/// </summary>
ReadOnly,
/// <summary>
/// Local read-only data.
/// </summary>
LocalReadOnly,
/// <summary>
/// Weak symbol.
/// </summary>
Weak,
/// <summary>
/// Weak undefined symbol.
/// </summary>
WeakUndefined,
/// <summary>
/// Other type.
/// </summary>
Other
}
/// <summary>
/// Ground-truth observation from reproducible rebuild.
/// </summary>
public sealed record GroundTruthObservation
{
/// <summary>
/// Gets the symbol name.
/// </summary>
public required string SymbolName { get; init; }
/// <summary>
/// Gets the demangled name.
/// </summary>
public string? DemangledName { get; init; }
/// <summary>
/// Gets the address.
/// </summary>
public ulong Address { get; init; }
/// <summary>
/// Gets the size.
/// </summary>
public ulong Size { get; init; }
/// <summary>
/// Gets the symbol type.
/// </summary>
public SymbolType Type { get; init; }
/// <summary>
/// Gets the source file.
/// </summary>
public string? SourceFile { get; init; }
/// <summary>
/// Gets the source line.
/// </summary>
public int? SourceLine { get; init; }
/// <summary>
/// Gets the source ID.
/// </summary>
public required string SourceId { get; init; }
/// <summary>
/// Gets the buildinfo path.
/// </summary>
public string? BuildinfoPath { get; init; }
/// <summary>
/// Gets when this was extracted.
/// </summary>
public DateTimeOffset ExtractedAt { get; init; }
/// <summary>
/// Gets the provenance.
/// </summary>
public ObservationProvenance? Provenance { get; init; }
}
/// <summary>
/// Provenance of a ground-truth observation.
/// </summary>
public sealed record ObservationProvenance
{
/// <summary>
/// Gets the rebuild job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets the backend used.
/// </summary>
public required string Backend { get; init; }
/// <summary>
/// Gets whether the rebuild was reproducible.
/// </summary>
public bool Reproducible { get; init; }
/// <summary>
/// Gets the binary hash.
/// </summary>
public string? BinaryHash { get; init; }
}
/// <summary>
/// DWARF debug information.
/// </summary>
internal sealed class DwarfInfo
{
public bool HasDebugInfo { get; set; }
public bool HasDebugLine { get; set; }
public bool HasDebugAbbrev { get; set; }
public Dictionary<ulong, (string File, int Line)>? LineInfo { get; set; }
}