Fix build and code structure improvements. New but essential UI functionality. CI improvements. Documentation improvements. AI module improvements.

This commit is contained in:
StellaOps Bot
2025-12-26 21:54:17 +02:00
parent 335ff7da16
commit c2b9cd8d1f
3717 changed files with 264714 additions and 48202 deletions

View File

@@ -0,0 +1,175 @@
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Configuration options for the reproducible builder infrastructure.
/// </summary>
public sealed class BuilderServiceOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "BinaryIndex:Builders";
/// <summary>
/// Base path for builder Docker images.
/// </summary>
public string BuilderImageRegistry { get; set; } = "ghcr.io/stella-ops";
/// <summary>
/// Path to store build artifacts temporarily.
/// </summary>
public string ArtifactPath { get; set; } = "/tmp/binaryindex-builds";
/// <summary>
/// Path to store build logs.
/// </summary>
public string LogPath { get; set; } = "/tmp/binaryindex-build-logs";
/// <summary>
/// Default build timeout.
/// </summary>
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Maximum concurrent builds.
/// </summary>
public int MaxConcurrentBuilds { get; set; } = 4;
/// <summary>
/// Whether to keep failed build artifacts for debugging.
/// </summary>
public bool KeepFailedArtifacts { get; set; } = true;
/// <summary>
/// Cleanup interval for old artifacts.
/// </summary>
public TimeSpan ArtifactCleanupInterval { get; set; } = TimeSpan.FromHours(6);
/// <summary>
/// Maximum age for artifacts before cleanup.
/// </summary>
public TimeSpan ArtifactMaxAge { get; set; } = TimeSpan.FromDays(1);
/// <summary>
/// Docker socket path for container builds.
/// </summary>
public string DockerSocketPath { get; set; } = "/var/run/docker.sock";
/// <summary>
/// Whether to use podman instead of docker.
/// </summary>
public bool UsePodman { get; set; } = false;
/// <summary>
/// Distro-specific configuration.
/// </summary>
public DistroBuilderOptions Alpine { get; set; } = new() { Enabled = true, Distro = "alpine" };
/// <summary>
/// Debian builder configuration.
/// </summary>
public DistroBuilderOptions Debian { get; set; } = new() { Enabled = true, Distro = "debian" };
/// <summary>
/// RHEL/CentOS builder configuration.
/// </summary>
public DistroBuilderOptions Rhel { get; set; } = new() { Enabled = true, Distro = "rhel" };
}
/// <summary>
/// Configuration for a specific distro builder.
/// </summary>
public sealed class DistroBuilderOptions
{
/// <summary>
/// Distro identifier.
/// </summary>
public string Distro { get; set; } = string.Empty;
/// <summary>
/// Whether this builder is enabled.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// Supported releases for this distro.
/// </summary>
public List<string> SupportedReleases { get; set; } = new();
/// <summary>
/// Docker image template. Use {release} placeholder.
/// </summary>
public string ImageTemplate { get; set; } = "repro-builder-{distro}:{release}";
/// <summary>
/// Custom environment variables for builds.
/// </summary>
public Dictionary<string, string> EnvironmentVariables { get; set; } = new();
/// <summary>
/// Custom build flags to add.
/// </summary>
public List<string> ExtraCFlags { get; set; } = new();
/// <summary>
/// Timeout override for this distro.
/// </summary>
public TimeSpan? Timeout { get; set; }
}
/// <summary>
/// Options for function fingerprint extraction.
/// </summary>
public sealed class FunctionExtractionOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "BinaryIndex:FunctionExtraction";
/// <summary>
/// Minimum function size to extract.
/// </summary>
public int MinFunctionSize { get; set; } = 16;
/// <summary>
/// Maximum function size to extract. 0 = unlimited.
/// </summary>
public int MaxFunctionSize { get; set; } = 0;
/// <summary>
/// Whether to include internal (non-exported) functions.
/// </summary>
public bool IncludeInternalFunctions { get; set; } = false;
/// <summary>
/// Whether to build call graphs.
/// </summary>
public bool BuildCallGraph { get; set; } = true;
/// <summary>
/// Patterns to exclude from extraction (regex).
/// </summary>
public List<string> ExcludePatterns { get; set; } = new()
{
"^__.*", // Compiler-generated
"^_GLOBAL_.*", // Global constructors
"^.plt.*", // PLT stubs
"^.text.*" // Section markers
};
/// <summary>
/// Path to objdump binary.
/// </summary>
public string ObjdumpPath { get; set; } = "objdump";
/// <summary>
/// Path to nm binary.
/// </summary>
public string NmPath { get; set; } = "nm";
/// <summary>
/// Path to readelf binary.
/// </summary>
public string ReadelfPath { get; set; } = "readelf";
}

View File

@@ -0,0 +1,304 @@
using System.Text.Json.Serialization;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// A claim asserting a CVE verdict for a specific fingerprint.
/// Created when reproducible builds show a function was modified to fix a CVE.
/// </summary>
public sealed record FingerprintClaim
{
/// <summary>
/// Unique identifier for this claim.
/// </summary>
public Guid Id { get; init; }
/// <summary>
/// ID of the fingerprint this claim is about.
/// </summary>
public required Guid FingerprintId { get; init; }
/// <summary>
/// CVE identifier (e.g., "CVE-2023-12345").
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Verdict: whether this fingerprint is fixed, vulnerable, or unknown.
/// </summary>
public required ClaimVerdict Verdict { get; init; }
/// <summary>
/// Evidence supporting this claim.
/// </summary>
public required FingerprintClaimEvidence Evidence { get; init; }
/// <summary>
/// Hash of the DSSE attestation if signed.
/// </summary>
public string? AttestationDsseHash { get; init; }
/// <summary>
/// When this claim was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// When this claim was last updated.
/// </summary>
public DateTimeOffset? UpdatedAt { get; init; }
/// <summary>
/// Source that generated this claim (e.g., "repro-builder-alpine").
/// </summary>
public string? Source { get; init; }
/// <summary>
/// Confidence in this claim (0.0-1.0).
/// </summary>
public decimal Confidence { get; init; } = 1.0m;
}
/// <summary>
/// Verdict for a fingerprint claim.
/// </summary>
[JsonConverter(typeof(JsonStringEnumConverter))]
public enum ClaimVerdict
{
/// <summary>
/// The fingerprint is from a binary that contains the CVE fix.
/// </summary>
Fixed,
/// <summary>
/// The fingerprint is from a binary that is vulnerable to the CVE.
/// </summary>
Vulnerable,
/// <summary>
/// Unable to determine fix status.
/// </summary>
Unknown
}
/// <summary>
/// Evidence supporting a fingerprint claim.
/// </summary>
public sealed record FingerprintClaimEvidence
{
/// <summary>
/// Git commit or patch reference that introduced the fix.
/// </summary>
public required string PatchCommit { get; init; }
/// <summary>
/// List of function names that changed between vulnerable and fixed versions.
/// </summary>
public required IReadOnlyList<string> ChangedFunctions { get; init; }
/// <summary>
/// Similarity scores for modified functions (function name → score).
/// </summary>
public IReadOnlyDictionary<string, decimal>? FunctionSimilarities { get; init; }
/// <summary>
/// Reference to the vulnerable build artifacts.
/// </summary>
public string? VulnerableBuildRef { get; init; }
/// <summary>
/// Reference to the patched build artifacts.
/// </summary>
public string? PatchedBuildRef { get; init; }
/// <summary>
/// Source package name.
/// </summary>
public string? SourcePackage { get; init; }
/// <summary>
/// Vulnerable version string.
/// </summary>
public string? VulnerableVersion { get; init; }
/// <summary>
/// Patched version string.
/// </summary>
public string? PatchedVersion { get; init; }
/// <summary>
/// Distro and release this build was done for.
/// </summary>
public string? DistroRelease { get; init; }
/// <summary>
/// Builder image used for reproducible builds.
/// </summary>
public string? BuilderImage { get; init; }
/// <summary>
/// Timestamp of the vulnerable build.
/// </summary>
public DateTimeOffset? VulnerableBuildTimestamp { get; init; }
/// <summary>
/// Timestamp of the patched build.
/// </summary>
public DateTimeOffset? PatchedBuildTimestamp { get; init; }
/// <summary>
/// Diff statistics summary.
/// </summary>
public DiffStatistics? DiffStatistics { get; init; }
}
/// <summary>
/// Repository for managing fingerprint claims.
/// </summary>
public interface IFingerprintClaimRepository
{
/// <summary>
/// Creates a new fingerprint claim.
/// </summary>
/// <param name="claim">The claim to create.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The created claim ID.</returns>
Task<Guid> CreateClaimAsync(FingerprintClaim claim, CancellationToken ct = default);
/// <summary>
/// Creates multiple claims in a batch.
/// </summary>
/// <param name="claims">Claims to create.</param>
/// <param name="ct">Cancellation token.</param>
Task CreateClaimsBatchAsync(IEnumerable<FingerprintClaim> claims, CancellationToken ct = default);
/// <summary>
/// Gets a claim by ID.
/// </summary>
/// <param name="id">Claim ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The claim if found.</returns>
Task<FingerprintClaim?> GetClaimByIdAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Gets all claims for a specific fingerprint.
/// </summary>
/// <param name="fingerprintId">Fingerprint ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims for the fingerprint.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByFingerprintAsync(
Guid fingerprintId,
CancellationToken ct = default);
/// <summary>
/// Gets all claims for a specific fingerprint hash.
/// </summary>
/// <param name="fingerprintHash">Fingerprint hash (hex-encoded).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims for the fingerprint.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByFingerprintHashAsync(
string fingerprintHash,
CancellationToken ct = default);
/// <summary>
/// Gets all claims for a specific CVE.
/// </summary>
/// <param name="cveId">CVE identifier.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims for the CVE.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByCveAsync(
string cveId,
CancellationToken ct = default);
/// <summary>
/// Gets claims with a specific verdict.
/// </summary>
/// <param name="verdict">Verdict to filter by.</param>
/// <param name="limit">Maximum results to return.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims with the verdict.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByVerdictAsync(
ClaimVerdict verdict,
int limit = 100,
CancellationToken ct = default);
/// <summary>
/// Updates an existing claim.
/// </summary>
/// <param name="claim">The updated claim.</param>
/// <param name="ct">Cancellation token.</param>
Task UpdateClaimAsync(FingerprintClaim claim, CancellationToken ct = default);
/// <summary>
/// Deletes a claim by ID.
/// </summary>
/// <param name="id">Claim ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if deleted, false if not found.</returns>
Task<bool> DeleteClaimAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Checks if a claim already exists for a fingerprint+CVE combination.
/// </summary>
/// <param name="fingerprintId">Fingerprint ID.</param>
/// <param name="cveId">CVE identifier.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if a claim exists.</returns>
Task<bool> ClaimExistsAsync(Guid fingerprintId, string cveId, CancellationToken ct = default);
}
/// <summary>
/// Repository for managing function fingerprints (per-binary breakdown).
/// </summary>
public interface IFunctionFingerprintRepository
{
/// <summary>
/// Stores function fingerprints for a binary.
/// </summary>
/// <param name="binaryFingerprintId">Parent binary fingerprint ID.</param>
/// <param name="functions">Function fingerprints to store.</param>
/// <param name="ct">Cancellation token.</param>
Task StoreFunctionsAsync(
Guid binaryFingerprintId,
IEnumerable<FunctionFingerprint> functions,
CancellationToken ct = default);
/// <summary>
/// Gets all function fingerprints for a binary.
/// </summary>
/// <param name="binaryFingerprintId">Parent binary fingerprint ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> GetFunctionsByBinaryAsync(
Guid binaryFingerprintId,
CancellationToken ct = default);
/// <summary>
/// Searches for functions by name pattern.
/// </summary>
/// <param name="namePattern">Function name pattern (SQL LIKE).</param>
/// <param name="limit">Maximum results.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Matching functions with their binary IDs.</returns>
Task<IReadOnlyList<(Guid BinaryId, FunctionFingerprint Function)>> SearchFunctionsByNameAsync(
string namePattern,
int limit = 100,
CancellationToken ct = default);
/// <summary>
/// Finds functions matching a specific basic block hash.
/// </summary>
/// <param name="basicBlockHash">Hash to search for.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Matching functions with their binary IDs.</returns>
Task<IReadOnlyList<(Guid BinaryId, FunctionFingerprint Function)>> FindByBasicBlockHashAsync(
byte[] basicBlockHash,
CancellationToken ct = default);
/// <summary>
/// Deletes all function fingerprints for a binary.
/// </summary>
/// <param name="binaryFingerprintId">Parent binary fingerprint ID.</param>
/// <param name="ct">Cancellation token.</param>
Task DeleteFunctionsByBinaryAsync(Guid binaryFingerprintId, CancellationToken ct = default);
}

View File

@@ -0,0 +1,220 @@
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Extracts function-level fingerprints from binary files.
/// Uses multiple hashing strategies for robust matching.
/// </summary>
public interface IFunctionFingerprintExtractor
{
/// <summary>
/// Extracts function fingerprints from a binary file.
/// </summary>
/// <param name="binaryPath">Path to the binary file.</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> ExtractAsync(
string binaryPath,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Extracts function fingerprints from binary data in memory.
/// </summary>
/// <param name="binaryData">Binary file contents.</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> ExtractFromMemoryAsync(
ReadOnlyMemory<byte> binaryData,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Gets supported binary formats for this extractor.
/// </summary>
IReadOnlyList<string> SupportedFormats { get; }
}
/// <summary>
/// Fingerprint data for a single function in a binary.
/// Uses multiple hash algorithms for robust cross-version matching.
/// </summary>
public sealed record FunctionFingerprint
{
/// <summary>
/// Function name (symbol name or synthesized from offset).
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Offset of the function within the .text section.
/// </summary>
public required long Offset { get; init; }
/// <summary>
/// Size of the function in bytes.
/// </summary>
public required int Size { get; init; }
/// <summary>
/// Hash of the basic block structure (opcode sequence, ignoring operands).
/// More stable across recompilation with different addresses.
/// </summary>
public required byte[] BasicBlockHash { get; init; }
/// <summary>
/// Hash of the control flow graph structure.
/// Captures branch patterns regardless of target addresses.
/// </summary>
public required byte[] CfgHash { get; init; }
/// <summary>
/// Hash of string references in the function.
/// Useful for identifying functions that use specific error messages or constants.
/// </summary>
public required byte[] StringRefsHash { get; init; }
/// <summary>
/// Combined fingerprint hash (all algorithms merged).
/// </summary>
public byte[]? CombinedHash { get; init; }
/// <summary>
/// List of functions called by this function.
/// </summary>
public IReadOnlyList<string>? Callees { get; init; }
/// <summary>
/// List of functions that call this function.
/// </summary>
public IReadOnlyList<string>? Callers { get; init; }
/// <summary>
/// Whether this is an exported (visible) symbol.
/// </summary>
public bool IsExported { get; init; }
/// <summary>
/// Whether this function has debug information available.
/// </summary>
public bool HasDebugInfo { get; init; }
/// <summary>
/// Source file path if debug info available.
/// </summary>
public string? SourceFile { get; init; }
/// <summary>
/// Source line number if debug info available.
/// </summary>
public int? SourceLine { get; init; }
}
/// <summary>
/// Options for function fingerprint extraction.
/// </summary>
public sealed record ExtractionOptions
{
/// <summary>
/// Whether to include internal/static functions (not exported).
/// </summary>
public bool IncludeInternalFunctions { get; init; } = false;
/// <summary>
/// Whether to build the call graph (callees/callers).
/// </summary>
public bool IncludeCallGraph { get; init; } = true;
/// <summary>
/// Minimum function size in bytes to include.
/// </summary>
public int MinFunctionSize { get; init; } = 16;
/// <summary>
/// Maximum function size in bytes to include. 0 = no limit.
/// </summary>
public int MaxFunctionSize { get; init; } = 0;
/// <summary>
/// Regex filter for function names to include. Null = all functions.
/// </summary>
public string? SymbolFilter { get; init; }
/// <summary>
/// Regex filter for function names to exclude.
/// </summary>
public string? ExcludeFilter { get; init; }
/// <summary>
/// Whether to compute the combined hash.
/// </summary>
public bool ComputeCombinedHash { get; init; } = true;
/// <summary>
/// Whether to extract debug information (source file/line).
/// </summary>
public bool ExtractDebugInfo { get; init; } = false;
}
/// <summary>
/// Represents a change to a function between two binary versions.
/// </summary>
public sealed record FunctionChange
{
/// <summary>
/// Function name.
/// </summary>
public required string FunctionName { get; init; }
/// <summary>
/// Type of change detected.
/// </summary>
public required ChangeType Type { get; init; }
/// <summary>
/// Fingerprint from the vulnerable version (null if Added).
/// </summary>
public FunctionFingerprint? VulnerableFingerprint { get; init; }
/// <summary>
/// Fingerprint from the patched version (null if Removed).
/// </summary>
public FunctionFingerprint? PatchedFingerprint { get; init; }
/// <summary>
/// Similarity score between versions (0.0-1.0) for Modified changes.
/// </summary>
public decimal? SimilarityScore { get; init; }
/// <summary>
/// Which hash algorithms showed differences.
/// </summary>
public IReadOnlyList<string>? DifferingHashes { get; init; }
}
/// <summary>
/// Type of change to a function between versions.
/// </summary>
public enum ChangeType
{
/// <summary>
/// Function was added in the patched version.
/// </summary>
Added,
/// <summary>
/// Function was modified (fingerprint changed).
/// </summary>
Modified,
/// <summary>
/// Function was removed in the patched version.
/// </summary>
Removed,
/// <summary>
/// Function signature changed (size/callees differ significantly).
/// </summary>
SignatureChanged
}

View File

@@ -0,0 +1,216 @@
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Computes diffs between function fingerprints of vulnerable and patched binaries.
/// Used to identify which functions were modified to fix a CVE.
/// </summary>
public interface IPatchDiffEngine
{
/// <summary>
/// Compares function fingerprints between vulnerable and patched builds.
/// </summary>
/// <param name="vulnerable">Functions from the vulnerable binary.</param>
/// <param name="patched">Functions from the patched binary.</param>
/// <param name="options">Diff options.</param>
/// <returns>Diff result with changes identified.</returns>
FunctionDiffResult ComputeDiff(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
DiffOptions? options = null);
/// <summary>
/// Computes similarity between two function fingerprints.
/// </summary>
/// <param name="a">First function fingerprint.</param>
/// <param name="b">Second function fingerprint.</param>
/// <returns>Similarity score (0.0-1.0).</returns>
decimal ComputeSimilarity(FunctionFingerprint a, FunctionFingerprint b);
/// <summary>
/// Identifies functions that likely correspond between versions despite name changes.
/// Uses fingerprint matching to find renamed or moved functions.
/// </summary>
/// <param name="vulnerable">Functions from the vulnerable binary.</param>
/// <param name="patched">Functions from the patched binary.</param>
/// <param name="threshold">Minimum similarity to consider a match.</param>
/// <returns>Mapping of vulnerable function names to patched function names.</returns>
IReadOnlyDictionary<string, string> FindFunctionMappings(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
decimal threshold = 0.8m);
}
/// <summary>
/// Result of computing a diff between function sets.
/// </summary>
public sealed record FunctionDiffResult
{
/// <summary>
/// All function changes detected.
/// </summary>
public required IReadOnlyList<FunctionChange> Changes { get; init; }
/// <summary>
/// Total functions in vulnerable version.
/// </summary>
public int TotalFunctionsVulnerable { get; init; }
/// <summary>
/// Total functions in patched version.
/// </summary>
public int TotalFunctionsPatched { get; init; }
/// <summary>
/// Number of functions added.
/// </summary>
public int AddedCount => Changes.Count(c => c.Type == ChangeType.Added);
/// <summary>
/// Number of functions modified.
/// </summary>
public int ModifiedCount => Changes.Count(c => c.Type == ChangeType.Modified);
/// <summary>
/// Number of functions removed.
/// </summary>
public int RemovedCount => Changes.Count(c => c.Type == ChangeType.Removed);
/// <summary>
/// Number of functions with signature changes.
/// </summary>
public int SignatureChangedCount => Changes.Count(c => c.Type == ChangeType.SignatureChanged);
/// <summary>
/// Number of functions that remained unchanged.
/// </summary>
public int UnchangedCount => TotalFunctionsVulnerable - ModifiedCount - RemovedCount - SignatureChangedCount;
/// <summary>
/// Percentage of functions that changed (0-100).
/// </summary>
public decimal ChangePercentage => TotalFunctionsVulnerable > 0
? 100m * (ModifiedCount + SignatureChangedCount) / TotalFunctionsVulnerable
: 0m;
/// <summary>
/// Summary statistics.
/// </summary>
public DiffStatistics Statistics => new()
{
TotalVulnerable = TotalFunctionsVulnerable,
TotalPatched = TotalFunctionsPatched,
Added = AddedCount,
Modified = ModifiedCount,
Removed = RemovedCount,
SignatureChanged = SignatureChangedCount,
Unchanged = UnchangedCount
};
}
/// <summary>
/// Summary statistics for a diff.
/// </summary>
public sealed record DiffStatistics
{
/// <summary>
/// Total functions in vulnerable version.
/// </summary>
public int TotalVulnerable { get; init; }
/// <summary>
/// Total functions in patched version.
/// </summary>
public int TotalPatched { get; init; }
/// <summary>
/// Functions added.
/// </summary>
public int Added { get; init; }
/// <summary>
/// Functions modified.
/// </summary>
public int Modified { get; init; }
/// <summary>
/// Functions removed.
/// </summary>
public int Removed { get; init; }
/// <summary>
/// Functions with signature changes.
/// </summary>
public int SignatureChanged { get; init; }
/// <summary>
/// Functions unchanged.
/// </summary>
public int Unchanged { get; init; }
}
/// <summary>
/// Options for computing diffs.
/// </summary>
public sealed record DiffOptions
{
/// <summary>
/// Minimum similarity score to consider two functions as the same (modified vs. different).
/// </summary>
public decimal SimilarityThreshold { get; init; } = 0.5m;
/// <summary>
/// Whether to use fuzzy name matching for renamed functions.
/// </summary>
public bool FuzzyNameMatching { get; init; } = true;
/// <summary>
/// Whether to include functions that are unchanged in the result.
/// </summary>
public bool IncludeUnchanged { get; init; } = false;
/// <summary>
/// Weights for different hash algorithms when computing similarity.
/// </summary>
public HashWeights Weights { get; init; } = HashWeights.Default;
/// <summary>
/// Whether to detect renamed functions.
/// </summary>
public bool DetectRenames { get; init; } = true;
/// <summary>
/// Minimum score to consider a function renamed (vs. added+removed).
/// </summary>
public decimal RenameThreshold { get; init; } = 0.7m;
}
/// <summary>
/// Weights for different hash algorithms when computing similarity.
/// </summary>
public sealed record HashWeights
{
/// <summary>
/// Weight for basic block hash comparison.
/// </summary>
public decimal BasicBlockWeight { get; init; } = 0.5m;
/// <summary>
/// Weight for CFG hash comparison.
/// </summary>
public decimal CfgWeight { get; init; } = 0.3m;
/// <summary>
/// Weight for string refs hash comparison.
/// </summary>
public decimal StringRefsWeight { get; init; } = 0.2m;
/// <summary>
/// Default weights.
/// </summary>
public static HashWeights Default => new();
/// <summary>
/// Validates that weights sum to 1.0.
/// </summary>
public bool IsValid => Math.Abs(BasicBlockWeight + CfgWeight + StringRefsWeight - 1.0m) < 0.001m;
}

View File

@@ -0,0 +1,428 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Builds distro packages from source with reproducible settings.
/// Supports building both vulnerable and patched versions for fingerprint diffing.
/// </summary>
public interface IReproducibleBuilder
{
/// <summary>
/// Gets the distro identifier this builder supports (e.g., "alpine", "debian", "rhel").
/// </summary>
string Distro { get; }
/// <summary>
/// Gets the releases this builder can target (e.g., "3.18", "bookworm", "9").
/// </summary>
IReadOnlyList<string> SupportedReleases { get; }
/// <summary>
/// Builds a package from source with optional patches applied.
/// </summary>
/// <param name="request">Build request parameters.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Build result with output binaries and fingerprints.</returns>
Task<BuildResult> BuildAsync(BuildRequest request, CancellationToken ct = default);
/// <summary>
/// Builds both vulnerable and patched versions, returning the diff of function fingerprints.
/// This is the primary method for CVE fix attribution.
/// </summary>
/// <param name="request">Patch diff request parameters.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Diff result showing which functions changed between versions.</returns>
Task<PatchDiffResult> BuildAndDiffAsync(PatchDiffRequest request, CancellationToken ct = default);
/// <summary>
/// Validates that the build environment is correctly configured for the target release.
/// </summary>
/// <param name="release">Target release to validate.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Validation result with any issues found.</returns>
Task<BuildEnvironmentValidation> ValidateEnvironmentAsync(string release, CancellationToken ct = default);
}
/// <summary>
/// Request parameters for a reproducible build.
/// </summary>
public sealed record BuildRequest
{
/// <summary>
/// Source package name (e.g., "openssl", "curl").
/// </summary>
public required string SourcePackage { get; init; }
/// <summary>
/// Package version to build.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Target distro release (e.g., "3.18", "bookworm").
/// </summary>
public required string Release { get; init; }
/// <summary>
/// Optional patches to apply before building.
/// </summary>
public IReadOnlyList<PatchReference>? Patches { get; init; }
/// <summary>
/// Target architecture (e.g., "x86_64", "aarch64"). Defaults to current arch.
/// </summary>
public string? Architecture { get; init; }
/// <summary>
/// Build options for reproducibility and normalization.
/// </summary>
public BuildOptions? Options { get; init; }
/// <summary>
/// Optional unique identifier for this build request (for tracking).
/// </summary>
public string? RequestId { get; init; }
}
/// <summary>
/// Reference to a security patch.
/// </summary>
public sealed record PatchReference
{
/// <summary>
/// CVE identifier this patch fixes.
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// URL to the patch file.
/// </summary>
public required string PatchUrl { get; init; }
/// <summary>
/// Expected SHA-256 hash of the patch file for integrity verification.
/// </summary>
public string? PatchSha256 { get; init; }
/// <summary>
/// Git commit ID if the patch comes from a repository.
/// </summary>
public string? CommitId { get; init; }
/// <summary>
/// Optional ordering hint for patch application (lower = earlier).
/// </summary>
public int Order { get; init; } = 0;
}
/// <summary>
/// Options controlling build reproducibility.
/// </summary>
public sealed record BuildOptions
{
/// <summary>
/// SOURCE_DATE_EPOCH value. If null, extracted from changelog/git.
/// </summary>
public DateTimeOffset? SourceDateEpoch { get; init; }
/// <summary>
/// Whether to strip binaries after building. Default: false.
/// </summary>
public bool StripBinaries { get; init; } = false;
/// <summary>
/// Whether to extract function-level fingerprints. Default: true.
/// </summary>
public bool ExtractFunctionFingerprints { get; init; } = true;
/// <summary>
/// Minimum function size (bytes) to include in fingerprint extraction.
/// </summary>
public int MinFunctionSize { get; init; } = 16;
/// <summary>
/// Build timeout. Default: 30 minutes.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Whether to keep build artifacts for debugging.
/// </summary>
public bool KeepBuildArtifacts { get; init; } = false;
}
/// <summary>
/// Result of a reproducible build.
/// </summary>
public sealed record BuildResult
{
/// <summary>
/// Whether the build succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Built binaries with extracted fingerprints.
/// </summary>
public IReadOnlyList<BuiltBinary>? Binaries { get; init; }
/// <summary>
/// Error message if build failed.
/// </summary>
public string? ErrorMessage { get; init; }
/// <summary>
/// Total build duration.
/// </summary>
public TimeSpan Duration { get; init; }
/// <summary>
/// Reference to full build log (e.g., content-addressed storage ID).
/// </summary>
public string? BuildLogRef { get; init; }
/// <summary>
/// SOURCE_DATE_EPOCH used for this build.
/// </summary>
public DateTimeOffset? SourceDateEpoch { get; init; }
/// <summary>
/// Build container image used.
/// </summary>
public string? BuilderImage { get; init; }
/// <summary>
/// Creates a failed build result.
/// </summary>
public static BuildResult Failed(string message, TimeSpan duration) => new()
{
Success = false,
ErrorMessage = message,
Duration = duration
};
}
/// <summary>
/// A single binary produced by a build.
/// </summary>
public sealed record BuiltBinary
{
/// <summary>
/// Relative path within the build output.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// ELF Build-ID (hex-encoded).
/// </summary>
public required string BuildId { get; init; }
/// <summary>
/// SHA-256 of the .text section.
/// </summary>
public required byte[] TextSha256 { get; init; }
/// <summary>
/// Combined fingerprint hash.
/// </summary>
public required byte[] Fingerprint { get; init; }
/// <summary>
/// File-level SHA-256.
/// </summary>
public byte[]? FileSha256 { get; init; }
/// <summary>
/// Function-level fingerprints if extraction was enabled.
/// </summary>
public IReadOnlyList<FunctionFingerprint>? Functions { get; init; }
/// <summary>
/// Binary format (ELF, PE, Mach-O).
/// </summary>
public string Format { get; init; } = "elf";
/// <summary>
/// Target architecture.
/// </summary>
public string? Architecture { get; init; }
/// <summary>
/// Whether the binary is stripped of debug symbols.
/// </summary>
public bool IsStripped { get; init; }
}
/// <summary>
/// Request for building and diffing vulnerable vs. patched versions.
/// </summary>
public sealed record PatchDiffRequest
{
/// <summary>
/// Source package name.
/// </summary>
public required string SourcePackage { get; init; }
/// <summary>
/// Vulnerable version to build first.
/// </summary>
public required string VulnerableVersion { get; init; }
/// <summary>
/// Patched version or patches to apply to vulnerable version.
/// </summary>
public required PatchTarget PatchTarget { get; init; }
/// <summary>
/// Target distro release.
/// </summary>
public required string Release { get; init; }
/// <summary>
/// CVE being fixed (for attribution).
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Build options.
/// </summary>
public BuildOptions? Options { get; init; }
}
/// <summary>
/// Specifies how to get the patched version.
/// </summary>
public sealed record PatchTarget
{
/// <summary>
/// If set, build this version as the patched version (e.g., downstream fixed release).
/// </summary>
public string? PatchedVersion { get; init; }
/// <summary>
/// If set, apply these patches to the vulnerable version.
/// </summary>
public IReadOnlyList<PatchReference>? Patches { get; init; }
}
/// <summary>
/// Result of comparing vulnerable and patched builds.
/// </summary>
public sealed record PatchDiffResult
{
/// <summary>
/// Whether both builds succeeded and diff was computed.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Vulnerable build result.
/// </summary>
public BuildResult? VulnerableBuild { get; init; }
/// <summary>
/// Patched build result.
/// </summary>
public BuildResult? PatchedBuild { get; init; }
/// <summary>
/// Function-level changes per binary.
/// </summary>
public IReadOnlyList<BinaryDiff>? BinaryDiffs { get; init; }
/// <summary>
/// Error message if diff failed.
/// </summary>
public string? ErrorMessage { get; init; }
/// <summary>
/// Creates a failed result.
/// </summary>
public static PatchDiffResult Failed(string message) => new()
{
Success = false,
ErrorMessage = message
};
}
/// <summary>
/// Diff results for a single binary between vulnerable and patched builds.
/// </summary>
public sealed record BinaryDiff
{
/// <summary>
/// Binary path (common between both builds).
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Function changes detected.
/// </summary>
public required IReadOnlyList<FunctionChange> Changes { get; init; }
/// <summary>
/// Build-ID of the vulnerable version.
/// </summary>
public string? VulnerableBuildId { get; init; }
/// <summary>
/// Build-ID of the patched version.
/// </summary>
public string? PatchedBuildId { get; init; }
/// <summary>
/// Total functions in vulnerable binary.
/// </summary>
public int TotalFunctionsVulnerable { get; init; }
/// <summary>
/// Total functions in patched binary.
/// </summary>
public int TotalFunctionsPatched { get; init; }
}
/// <summary>
/// Build environment validation result.
/// </summary>
public sealed record BuildEnvironmentValidation
{
/// <summary>
/// Whether the environment is valid for building.
/// </summary>
public required bool IsValid { get; init; }
/// <summary>
/// Issues found during validation.
/// </summary>
public IReadOnlyList<string>? Issues { get; init; }
/// <summary>
/// Builder container image available.
/// </summary>
public string? BuilderImage { get; init; }
/// <summary>
/// Toolchain versions detected.
/// </summary>
public IReadOnlyDictionary<string, string>? ToolchainVersions { get; init; }
/// <summary>
/// Creates a valid result.
/// </summary>
public static BuildEnvironmentValidation Valid(string image, IReadOnlyDictionary<string, string>? versions = null) => new()
{
IsValid = true,
BuilderImage = image,
ToolchainVersions = versions
};
/// <summary>
/// Creates an invalid result.
/// </summary>
public static BuildEnvironmentValidation Invalid(IReadOnlyList<string> issues) => new()
{
IsValid = false,
Issues = issues
};
}

View File

@@ -0,0 +1,288 @@
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Computes diffs between function fingerprints of vulnerable and patched binaries.
/// </summary>
public sealed class PatchDiffEngine : IPatchDiffEngine
{
private readonly ILogger<PatchDiffEngine> _logger;
public PatchDiffEngine(ILogger<PatchDiffEngine> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public FunctionDiffResult ComputeDiff(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
DiffOptions? options = null)
{
ArgumentNullException.ThrowIfNull(vulnerable);
ArgumentNullException.ThrowIfNull(patched);
options ??= new DiffOptions();
_logger.LogDebug(
"Computing diff: {VulnerableCount} vulnerable functions, {PatchedCount} patched functions",
vulnerable.Count, patched.Count);
var changes = new List<FunctionChange>();
// Index by name for quick lookup
var vulnerableByName = vulnerable.ToDictionary(f => f.Name, f => f);
var patchedByName = patched.ToDictionary(f => f.Name, f => f);
// Track processed functions to find additions
var processedPatched = new HashSet<string>();
// Find modifications and removals
foreach (var vulnFunc in vulnerable)
{
if (patchedByName.TryGetValue(vulnFunc.Name, out var patchedFunc))
{
processedPatched.Add(vulnFunc.Name);
var similarity = ComputeSimilarity(vulnFunc, patchedFunc);
if (similarity >= 1.0m)
{
// Unchanged
if (options.IncludeUnchanged)
{
// Not adding unchanged to results by default
}
}
else if (similarity >= options.SimilarityThreshold)
{
// Modified
var differingHashes = GetDifferingHashes(vulnFunc, patchedFunc);
changes.Add(new FunctionChange
{
FunctionName = vulnFunc.Name,
Type = ChangeType.Modified,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = patchedFunc,
SimilarityScore = similarity,
DifferingHashes = differingHashes
});
}
else
{
// Signature changed (too different to be considered same function)
changes.Add(new FunctionChange
{
FunctionName = vulnFunc.Name,
Type = ChangeType.SignatureChanged,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = patchedFunc,
SimilarityScore = similarity,
DifferingHashes = GetDifferingHashes(vulnFunc, patchedFunc)
});
}
}
else
{
// Not found by name - check if renamed
if (options.DetectRenames)
{
var bestMatch = FindBestMatch(vulnFunc, patched, processedPatched, options.RenameThreshold);
if (bestMatch != null)
{
processedPatched.Add(bestMatch.Name);
var similarity = ComputeSimilarity(vulnFunc, bestMatch);
changes.Add(new FunctionChange
{
FunctionName = $"{vulnFunc.Name} → {bestMatch.Name}",
Type = ChangeType.Modified,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = bestMatch,
SimilarityScore = similarity,
DifferingHashes = GetDifferingHashes(vulnFunc, bestMatch)
});
continue;
}
}
// Removed
changes.Add(new FunctionChange
{
FunctionName = vulnFunc.Name,
Type = ChangeType.Removed,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = null,
SimilarityScore = null
});
}
}
// Find additions (functions in patched but not in vulnerable)
foreach (var patchedFunc in patched)
{
if (!processedPatched.Contains(patchedFunc.Name))
{
changes.Add(new FunctionChange
{
FunctionName = patchedFunc.Name,
Type = ChangeType.Added,
VulnerableFingerprint = null,
PatchedFingerprint = patchedFunc,
SimilarityScore = null
});
}
}
_logger.LogInformation(
"Diff computed: {Added} added, {Modified} modified, {Removed} removed, {SignatureChanged} signature changed",
changes.Count(c => c.Type == ChangeType.Added),
changes.Count(c => c.Type == ChangeType.Modified),
changes.Count(c => c.Type == ChangeType.Removed),
changes.Count(c => c.Type == ChangeType.SignatureChanged));
return new FunctionDiffResult
{
Changes = changes,
TotalFunctionsVulnerable = vulnerable.Count,
TotalFunctionsPatched = patched.Count
};
}
/// <inheritdoc />
public decimal ComputeSimilarity(FunctionFingerprint a, FunctionFingerprint b)
{
ArgumentNullException.ThrowIfNull(a);
ArgumentNullException.ThrowIfNull(b);
// Compute weighted similarity based on hash matches
decimal totalWeight = 0m;
decimal matchedWeight = 0m;
// Basic block hash (weight: 0.5)
const decimal bbWeight = 0.5m;
totalWeight += bbWeight;
if (HashesEqual(a.BasicBlockHash, b.BasicBlockHash))
{
matchedWeight += bbWeight;
}
// CFG hash (weight: 0.3)
const decimal cfgWeight = 0.3m;
totalWeight += cfgWeight;
if (HashesEqual(a.CfgHash, b.CfgHash))
{
matchedWeight += cfgWeight;
}
// String refs hash (weight: 0.2)
const decimal strWeight = 0.2m;
totalWeight += strWeight;
if (HashesEqual(a.StringRefsHash, b.StringRefsHash))
{
matchedWeight += strWeight;
}
// Size similarity bonus (if sizes are within 10%, add small bonus)
if (a.Size > 0 && b.Size > 0)
{
var sizeDiff = Math.Abs(a.Size - b.Size) / (decimal)Math.Max(a.Size, b.Size);
if (sizeDiff <= 0.1m)
{
matchedWeight += 0.05m * (1m - sizeDiff * 10m);
totalWeight += 0.05m;
}
}
return totalWeight > 0 ? matchedWeight / totalWeight : 0m;
}
/// <inheritdoc />
public IReadOnlyDictionary<string, string> FindFunctionMappings(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
decimal threshold = 0.8m)
{
ArgumentNullException.ThrowIfNull(vulnerable);
ArgumentNullException.ThrowIfNull(patched);
var mappings = new Dictionary<string, string>();
var usedPatched = new HashSet<string>();
// First pass: exact name matches
foreach (var vulnFunc in vulnerable)
{
var match = patched.FirstOrDefault(p => p.Name == vulnFunc.Name);
if (match != null)
{
mappings[vulnFunc.Name] = match.Name;
usedPatched.Add(match.Name);
}
}
// Second pass: fingerprint-based matches for unmatched functions
var unmatchedVulnerable = vulnerable.Where(v => !mappings.ContainsKey(v.Name)).ToList();
var unmatchedPatched = patched.Where(p => !usedPatched.Contains(p.Name)).ToList();
foreach (var vulnFunc in unmatchedVulnerable)
{
var bestMatch = FindBestMatch(vulnFunc, unmatchedPatched, usedPatched, threshold);
if (bestMatch != null)
{
mappings[vulnFunc.Name] = bestMatch.Name;
usedPatched.Add(bestMatch.Name);
}
}
return mappings;
}
private FunctionFingerprint? FindBestMatch(
FunctionFingerprint target,
IReadOnlyList<FunctionFingerprint> candidates,
HashSet<string> excludeNames,
decimal threshold)
{
FunctionFingerprint? bestMatch = null;
var bestScore = threshold - 0.001m; // Must exceed threshold
foreach (var candidate in candidates)
{
if (excludeNames.Contains(candidate.Name))
continue;
var score = ComputeSimilarity(target, candidate);
if (score > bestScore)
{
bestScore = score;
bestMatch = candidate;
}
}
return bestMatch;
}
private IReadOnlyList<string> GetDifferingHashes(FunctionFingerprint a, FunctionFingerprint b)
{
var differing = new List<string>();
if (!HashesEqual(a.BasicBlockHash, b.BasicBlockHash))
differing.Add("basic_block");
if (!HashesEqual(a.CfgHash, b.CfgHash))
differing.Add("cfg");
if (!HashesEqual(a.StringRefsHash, b.StringRefsHash))
differing.Add("string_refs");
return differing;
}
private static bool HashesEqual(byte[]? a, byte[]? b)
{
if (a == null && b == null) return true;
if (a == null || b == null) return false;
return a.SequenceEqual(b);
}
}

View File

@@ -0,0 +1,371 @@
// -----------------------------------------------------------------------------
// ReproducibleBuildJobTypes.cs
// Types for the ReproducibleBuildJob orchestration
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Interface for the reproducible build job.
/// </summary>
public interface IReproducibleBuildJob
{
/// <summary>
/// Executes the build job, processing all pending CVEs.
/// </summary>
/// <param name="ct">Cancellation token.</param>
Task ExecuteAsync(CancellationToken ct);
/// <summary>
/// Processes a single CVE attribution request.
/// </summary>
/// <param name="cve">CVE to process.</param>
/// <param name="ct">Cancellation token.</param>
Task ProcessCveAsync(CveAttribution cve, CancellationToken ct);
}
/// <summary>
/// CVE attribution request.
/// </summary>
public sealed record CveAttribution
{
/// <summary>
/// CVE identifier (e.g., "CVE-2024-0001").
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Source package name (e.g., "openssl", "curl").
/// </summary>
public required string SourcePackage { get; init; }
/// <summary>
/// Distribution identifier (e.g., "debian", "alpine", "rhel").
/// </summary>
public required string Distro { get; init; }
/// <summary>
/// Distribution release (e.g., "bookworm", "3.19", "9").
/// </summary>
public required string Release { get; init; }
/// <summary>
/// Vulnerable package version.
/// </summary>
public required string VulnerableVersion { get; init; }
/// <summary>
/// Fixed/patched package version.
/// </summary>
public required string FixedVersion { get; init; }
/// <summary>
/// Git commit that introduced the fix (optional).
/// </summary>
public string? PatchCommit { get; init; }
/// <summary>
/// Advisory identifier (optional).
/// </summary>
public string? AdvisoryId { get; init; }
}
/// <summary>
/// Advisory feed monitor interface.
/// Watches for new CVE advisories that need binary attribution.
/// </summary>
public interface IAdvisoryFeedMonitor
{
/// <summary>
/// Gets CVEs pending binary attribution.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of CVEs needing processing.</returns>
Task<IReadOnlyList<CveAttribution>> GetPendingCvesAsync(CancellationToken ct);
}
/// <summary>
/// Configuration options for reproducible builds.
/// </summary>
public sealed class ReproducibleBuildOptions
{
/// <summary>
/// Maximum time allowed for a single build.
/// </summary>
public TimeSpan BuildTimeout { get; set; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Default target architecture.
/// </summary>
public string DefaultArchitecture { get; set; } = "amd64";
/// <summary>
/// Minimum function size to extract fingerprints for.
/// </summary>
public int MinFunctionSize { get; set; } = 16;
/// <summary>
/// Maximum concurrent builds.
/// </summary>
public int MaxConcurrentBuilds { get; set; } = 2;
/// <summary>
/// Directory for build cache storage.
/// </summary>
public string BuildCacheDirectory { get; set; } = "/var/cache/stellaops/builds";
}
/// <summary>
/// Background job that orchestrates reproducible builds for binary CVE attribution.
/// Monitors advisory feeds, triggers builds, extracts fingerprints, and creates claims.
/// </summary>
public sealed class ReproducibleBuildJob : IReproducibleBuildJob
{
private readonly ILogger<ReproducibleBuildJob> _logger;
private readonly ReproducibleBuildOptions _options;
private readonly IEnumerable<IReproducibleBuilder> _builders;
private readonly IFunctionFingerprintExtractor _fingerprintExtractor;
private readonly IPatchDiffEngine _diffEngine;
private readonly IFingerprintClaimRepository _claimRepository;
private readonly IAdvisoryFeedMonitor _advisoryMonitor;
/// <summary>
/// Initializes a new instance of <see cref="ReproducibleBuildJob"/>.
/// </summary>
public ReproducibleBuildJob(
ILogger<ReproducibleBuildJob> logger,
Microsoft.Extensions.Options.IOptions<ReproducibleBuildOptions> options,
IEnumerable<IReproducibleBuilder> builders,
IFunctionFingerprintExtractor fingerprintExtractor,
IPatchDiffEngine diffEngine,
IFingerprintClaimRepository claimRepository,
IAdvisoryFeedMonitor advisoryMonitor)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_builders = builders ?? throw new ArgumentNullException(nameof(builders));
_fingerprintExtractor = fingerprintExtractor ?? throw new ArgumentNullException(nameof(fingerprintExtractor));
_diffEngine = diffEngine ?? throw new ArgumentNullException(nameof(diffEngine));
_claimRepository = claimRepository ?? throw new ArgumentNullException(nameof(claimRepository));
_advisoryMonitor = advisoryMonitor ?? throw new ArgumentNullException(nameof(advisoryMonitor));
}
/// <inheritdoc />
public async Task ExecuteAsync(CancellationToken ct)
{
_logger.LogInformation("Starting reproducible build job");
try
{
// Step 1: Get pending CVEs that need binary attribution
var pendingCves = await _advisoryMonitor.GetPendingCvesAsync(ct);
_logger.LogInformation("Found {Count} CVEs pending binary attribution", pendingCves.Count);
foreach (var cve in pendingCves)
{
if (ct.IsCancellationRequested) break;
try
{
await ProcessCveAsync(cve, ct);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to process CVE {CveId}", cve.CveId);
// Continue with next CVE
}
}
_logger.LogInformation("Reproducible build job completed");
}
catch (OperationCanceledException)
{
_logger.LogInformation("Reproducible build job cancelled");
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Reproducible build job failed");
throw;
}
}
/// <inheritdoc />
public async Task ProcessCveAsync(CveAttribution cve, CancellationToken ct)
{
_logger.LogDebug("Processing CVE {CveId} for package {Package}", cve.CveId, cve.SourcePackage);
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
// Find appropriate builder for distro
var builder = _builders.FirstOrDefault(b =>
b.Distro.Equals(cve.Distro, StringComparison.OrdinalIgnoreCase));
if (builder == null)
{
_logger.LogWarning("No builder available for distro {Distro}", cve.Distro);
return;
}
// Build vulnerable version
var vulnerableBuild = await BuildVersionAsync(builder, cve, cve.VulnerableVersion, ct);
if (!vulnerableBuild.Success)
{
_logger.LogWarning("Failed to build vulnerable version {Version}", cve.VulnerableVersion);
return;
}
// Build patched version
var patchedBuild = await BuildVersionAsync(builder, cve, cve.FixedVersion, ct);
if (!patchedBuild.Success)
{
_logger.LogWarning("Failed to build patched version {Version}", cve.FixedVersion);
return;
}
// Extract function fingerprints from both builds
var vulnerableFunctions = await ExtractFunctionsAsync(vulnerableBuild, ct);
var patchedFunctions = await ExtractFunctionsAsync(patchedBuild, ct);
// Compute diff to identify changed functions
var diff = _diffEngine.ComputeDiff(vulnerableFunctions, patchedFunctions);
_logger.LogDebug(
"CVE {CveId}: {Modified} modified, {Added} added, {Removed} removed functions",
cve.CveId, diff.ModifiedCount, diff.AddedCount, diff.RemovedCount);
// Create fingerprint claims
await CreateClaimsAsync(cve, diff, vulnerableBuild, patchedBuild, ct);
stopwatch.Stop();
_logger.LogInformation(
"Processed CVE {CveId} in {Duration}ms",
cve.CveId, stopwatch.ElapsedMilliseconds);
}
private async Task<BuildResult> BuildVersionAsync(
IReproducibleBuilder builder,
CveAttribution cve,
string version,
CancellationToken ct)
{
var request = new BuildRequest
{
SourcePackage = cve.SourcePackage,
Version = version,
Release = cve.Release,
Architecture = _options.DefaultArchitecture,
Options = new BuildOptions
{
Timeout = _options.BuildTimeout
}
};
return await builder.BuildAsync(request, ct);
}
private async Task<IReadOnlyList<FunctionFingerprint>> ExtractFunctionsAsync(
BuildResult build,
CancellationToken ct)
{
var allFunctions = new List<FunctionFingerprint>();
foreach (var binary in build.Binaries ?? [])
{
if (binary.Functions != null)
{
allFunctions.AddRange(binary.Functions);
}
else
{
// Extract if not already done during build
var functions = await _fingerprintExtractor.ExtractAsync(
binary.Path,
new ExtractionOptions
{
IncludeInternalFunctions = false,
IncludeCallGraph = true,
MinFunctionSize = _options.MinFunctionSize
},
ct);
allFunctions.AddRange(functions);
}
}
return allFunctions;
}
private async Task CreateClaimsAsync(
CveAttribution cve,
FunctionDiffResult diff,
BuildResult vulnerableBuild,
BuildResult patchedBuild,
CancellationToken ct)
{
var claims = new List<FingerprintClaim>();
// Create "fixed" claims for patched binaries
foreach (var binary in patchedBuild.Binaries ?? [])
{
var changedFunctions = diff.Changes
.Where(c => c.Type is ChangeType.Modified or ChangeType.Added)
.Select(c => c.FunctionName)
.ToList();
var claim = new FingerprintClaim
{
Id = Guid.NewGuid(),
FingerprintId = Guid.Parse(binary.BuildId), // Assuming BuildId is GUID-like
CveId = cve.CveId,
Verdict = ClaimVerdict.Fixed,
Evidence = new FingerprintClaimEvidence
{
PatchCommit = cve.PatchCommit ?? "unknown",
ChangedFunctions = changedFunctions,
FunctionSimilarities = diff.Changes
.Where(c => c.SimilarityScore.HasValue)
.ToDictionary(c => c.FunctionName, c => c.SimilarityScore!.Value),
VulnerableBuildRef = vulnerableBuild.BuildLogRef,
PatchedBuildRef = patchedBuild.BuildLogRef
},
CreatedAt = DateTimeOffset.UtcNow
};
claims.Add(claim);
}
// Create "vulnerable" claims for vulnerable binaries
foreach (var binary in vulnerableBuild.Binaries ?? [])
{
var claim = new FingerprintClaim
{
Id = Guid.NewGuid(),
FingerprintId = Guid.Parse(binary.BuildId),
CveId = cve.CveId,
Verdict = ClaimVerdict.Vulnerable,
Evidence = new FingerprintClaimEvidence
{
PatchCommit = cve.PatchCommit ?? "unknown",
ChangedFunctions = diff.Changes
.Where(c => c.Type == ChangeType.Modified)
.Select(c => c.FunctionName)
.ToList(),
VulnerableBuildRef = vulnerableBuild.BuildLogRef
},
CreatedAt = DateTimeOffset.UtcNow
};
claims.Add(claim);
}
await _claimRepository.CreateClaimsBatchAsync(claims, ct);
_logger.LogDebug(
"Created {Count} fingerprint claims for CVE {CveId}",
claims.Count, cve.CveId);
}
}

View File

@@ -0,0 +1,62 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Extension methods for registering builder services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds the reproducible builder services to the DI container.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configuration">Configuration root.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddBinaryIndexBuilders(
this IServiceCollection services,
IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
// Configuration - register options with defaults (configuration binding happens via host)
services.Configure<BuilderServiceOptions>(options => { });
services.Configure<FunctionExtractionOptions>(options => { });
// Core services
services.TryAddSingleton<IPatchDiffEngine, PatchDiffEngine>();
// Builders will be added as they are implemented
// services.TryAddSingleton<IReproducibleBuilder, AlpineBuilder>();
// services.TryAddSingleton<IReproducibleBuilder, DebianBuilder>();
// services.TryAddSingleton<IReproducibleBuilder, RhelBuilder>();
// Function extractor will be added when implemented
// services.TryAddSingleton<IFunctionFingerprintExtractor, FunctionFingerprintExtractor>();
return services;
}
/// <summary>
/// Adds the reproducible builder services with custom options.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configureOptions">Options configuration delegate.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddBinaryIndexBuilders(
this IServiceCollection services,
Action<BuilderServiceOptions> configureOptions)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configureOptions);
services.Configure(configureOptions);
services.TryAddSingleton<IPatchDiffEngine, PatchDiffEngine>();
return services;
}
}

View File

@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<Description>Reproducible distro builders and function-level fingerprinting for StellaOps BinaryIndex.</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Docker.DotNet" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="../StellaOps.BinaryIndex.Fingerprints/StellaOps.BinaryIndex.Fingerprints.csproj" />
</ItemGroup>
</Project>