Fix build and code structure improvements. New but essential UI functionality. CI improvements. Documentation improvements. AI module improvements.

This commit is contained in:
StellaOps Bot
2025-12-26 21:54:17 +02:00
parent 335ff7da16
commit c2b9cd8d1f
3717 changed files with 264714 additions and 48202 deletions

View File

@@ -0,0 +1,175 @@
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Configuration options for the reproducible builder infrastructure.
/// </summary>
public sealed class BuilderServiceOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "BinaryIndex:Builders";
/// <summary>
/// Base path for builder Docker images.
/// </summary>
public string BuilderImageRegistry { get; set; } = "ghcr.io/stella-ops";
/// <summary>
/// Path to store build artifacts temporarily.
/// </summary>
public string ArtifactPath { get; set; } = "/tmp/binaryindex-builds";
/// <summary>
/// Path to store build logs.
/// </summary>
public string LogPath { get; set; } = "/tmp/binaryindex-build-logs";
/// <summary>
/// Default build timeout.
/// </summary>
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Maximum concurrent builds.
/// </summary>
public int MaxConcurrentBuilds { get; set; } = 4;
/// <summary>
/// Whether to keep failed build artifacts for debugging.
/// </summary>
public bool KeepFailedArtifacts { get; set; } = true;
/// <summary>
/// Cleanup interval for old artifacts.
/// </summary>
public TimeSpan ArtifactCleanupInterval { get; set; } = TimeSpan.FromHours(6);
/// <summary>
/// Maximum age for artifacts before cleanup.
/// </summary>
public TimeSpan ArtifactMaxAge { get; set; } = TimeSpan.FromDays(1);
/// <summary>
/// Docker socket path for container builds.
/// </summary>
public string DockerSocketPath { get; set; } = "/var/run/docker.sock";
/// <summary>
/// Whether to use podman instead of docker.
/// </summary>
public bool UsePodman { get; set; } = false;
/// <summary>
/// Distro-specific configuration.
/// </summary>
public DistroBuilderOptions Alpine { get; set; } = new() { Enabled = true, Distro = "alpine" };
/// <summary>
/// Debian builder configuration.
/// </summary>
public DistroBuilderOptions Debian { get; set; } = new() { Enabled = true, Distro = "debian" };
/// <summary>
/// RHEL/CentOS builder configuration.
/// </summary>
public DistroBuilderOptions Rhel { get; set; } = new() { Enabled = true, Distro = "rhel" };
}
/// <summary>
/// Configuration for a specific distro builder.
/// </summary>
public sealed class DistroBuilderOptions
{
/// <summary>
/// Distro identifier.
/// </summary>
public string Distro { get; set; } = string.Empty;
/// <summary>
/// Whether this builder is enabled.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// Supported releases for this distro.
/// </summary>
public List<string> SupportedReleases { get; set; } = new();
/// <summary>
/// Docker image template. Use {release} placeholder.
/// </summary>
public string ImageTemplate { get; set; } = "repro-builder-{distro}:{release}";
/// <summary>
/// Custom environment variables for builds.
/// </summary>
public Dictionary<string, string> EnvironmentVariables { get; set; } = new();
/// <summary>
/// Custom build flags to add.
/// </summary>
public List<string> ExtraCFlags { get; set; } = new();
/// <summary>
/// Timeout override for this distro.
/// </summary>
public TimeSpan? Timeout { get; set; }
}
/// <summary>
/// Options for function fingerprint extraction.
/// </summary>
public sealed class FunctionExtractionOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "BinaryIndex:FunctionExtraction";
/// <summary>
/// Minimum function size to extract.
/// </summary>
public int MinFunctionSize { get; set; } = 16;
/// <summary>
/// Maximum function size to extract. 0 = unlimited.
/// </summary>
public int MaxFunctionSize { get; set; } = 0;
/// <summary>
/// Whether to include internal (non-exported) functions.
/// </summary>
public bool IncludeInternalFunctions { get; set; } = false;
/// <summary>
/// Whether to build call graphs.
/// </summary>
public bool BuildCallGraph { get; set; } = true;
/// <summary>
/// Patterns to exclude from extraction (regex).
/// </summary>
public List<string> ExcludePatterns { get; set; } = new()
{
"^__.*", // Compiler-generated
"^_GLOBAL_.*", // Global constructors
"^.plt.*", // PLT stubs
"^.text.*" // Section markers
};
/// <summary>
/// Path to objdump binary.
/// </summary>
public string ObjdumpPath { get; set; } = "objdump";
/// <summary>
/// Path to nm binary.
/// </summary>
public string NmPath { get; set; } = "nm";
/// <summary>
/// Path to readelf binary.
/// </summary>
public string ReadelfPath { get; set; } = "readelf";
}

View File

@@ -0,0 +1,304 @@
using System.Text.Json.Serialization;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// A claim asserting a CVE verdict for a specific fingerprint.
/// Created when reproducible builds show a function was modified to fix a CVE.
/// </summary>
public sealed record FingerprintClaim
{
/// <summary>
/// Unique identifier for this claim.
/// </summary>
public Guid Id { get; init; }
/// <summary>
/// ID of the fingerprint this claim is about.
/// </summary>
public required Guid FingerprintId { get; init; }
/// <summary>
/// CVE identifier (e.g., "CVE-2023-12345").
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Verdict: whether this fingerprint is fixed, vulnerable, or unknown.
/// </summary>
public required ClaimVerdict Verdict { get; init; }
/// <summary>
/// Evidence supporting this claim.
/// </summary>
public required FingerprintClaimEvidence Evidence { get; init; }
/// <summary>
/// Hash of the DSSE attestation if signed.
/// </summary>
public string? AttestationDsseHash { get; init; }
/// <summary>
/// When this claim was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// When this claim was last updated.
/// </summary>
public DateTimeOffset? UpdatedAt { get; init; }
/// <summary>
/// Source that generated this claim (e.g., "repro-builder-alpine").
/// </summary>
public string? Source { get; init; }
/// <summary>
/// Confidence in this claim (0.0-1.0).
/// </summary>
public decimal Confidence { get; init; } = 1.0m;
}
/// <summary>
/// Verdict for a fingerprint claim.
/// </summary>
[JsonConverter(typeof(JsonStringEnumConverter))]
public enum ClaimVerdict
{
/// <summary>
/// The fingerprint is from a binary that contains the CVE fix.
/// </summary>
Fixed,
/// <summary>
/// The fingerprint is from a binary that is vulnerable to the CVE.
/// </summary>
Vulnerable,
/// <summary>
/// Unable to determine fix status.
/// </summary>
Unknown
}
/// <summary>
/// Evidence supporting a fingerprint claim.
/// </summary>
public sealed record FingerprintClaimEvidence
{
/// <summary>
/// Git commit or patch reference that introduced the fix.
/// </summary>
public required string PatchCommit { get; init; }
/// <summary>
/// List of function names that changed between vulnerable and fixed versions.
/// </summary>
public required IReadOnlyList<string> ChangedFunctions { get; init; }
/// <summary>
/// Similarity scores for modified functions (function name → score).
/// </summary>
public IReadOnlyDictionary<string, decimal>? FunctionSimilarities { get; init; }
/// <summary>
/// Reference to the vulnerable build artifacts.
/// </summary>
public string? VulnerableBuildRef { get; init; }
/// <summary>
/// Reference to the patched build artifacts.
/// </summary>
public string? PatchedBuildRef { get; init; }
/// <summary>
/// Source package name.
/// </summary>
public string? SourcePackage { get; init; }
/// <summary>
/// Vulnerable version string.
/// </summary>
public string? VulnerableVersion { get; init; }
/// <summary>
/// Patched version string.
/// </summary>
public string? PatchedVersion { get; init; }
/// <summary>
/// Distro and release this build was done for.
/// </summary>
public string? DistroRelease { get; init; }
/// <summary>
/// Builder image used for reproducible builds.
/// </summary>
public string? BuilderImage { get; init; }
/// <summary>
/// Timestamp of the vulnerable build.
/// </summary>
public DateTimeOffset? VulnerableBuildTimestamp { get; init; }
/// <summary>
/// Timestamp of the patched build.
/// </summary>
public DateTimeOffset? PatchedBuildTimestamp { get; init; }
/// <summary>
/// Diff statistics summary.
/// </summary>
public DiffStatistics? DiffStatistics { get; init; }
}
/// <summary>
/// Repository for managing fingerprint claims.
/// </summary>
public interface IFingerprintClaimRepository
{
/// <summary>
/// Creates a new fingerprint claim.
/// </summary>
/// <param name="claim">The claim to create.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The created claim ID.</returns>
Task<Guid> CreateClaimAsync(FingerprintClaim claim, CancellationToken ct = default);
/// <summary>
/// Creates multiple claims in a batch.
/// </summary>
/// <param name="claims">Claims to create.</param>
/// <param name="ct">Cancellation token.</param>
Task CreateClaimsBatchAsync(IEnumerable<FingerprintClaim> claims, CancellationToken ct = default);
/// <summary>
/// Gets a claim by ID.
/// </summary>
/// <param name="id">Claim ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The claim if found.</returns>
Task<FingerprintClaim?> GetClaimByIdAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Gets all claims for a specific fingerprint.
/// </summary>
/// <param name="fingerprintId">Fingerprint ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims for the fingerprint.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByFingerprintAsync(
Guid fingerprintId,
CancellationToken ct = default);
/// <summary>
/// Gets all claims for a specific fingerprint hash.
/// </summary>
/// <param name="fingerprintHash">Fingerprint hash (hex-encoded).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims for the fingerprint.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByFingerprintHashAsync(
string fingerprintHash,
CancellationToken ct = default);
/// <summary>
/// Gets all claims for a specific CVE.
/// </summary>
/// <param name="cveId">CVE identifier.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims for the CVE.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByCveAsync(
string cveId,
CancellationToken ct = default);
/// <summary>
/// Gets claims with a specific verdict.
/// </summary>
/// <param name="verdict">Verdict to filter by.</param>
/// <param name="limit">Maximum results to return.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of claims with the verdict.</returns>
Task<IReadOnlyList<FingerprintClaim>> GetClaimsByVerdictAsync(
ClaimVerdict verdict,
int limit = 100,
CancellationToken ct = default);
/// <summary>
/// Updates an existing claim.
/// </summary>
/// <param name="claim">The updated claim.</param>
/// <param name="ct">Cancellation token.</param>
Task UpdateClaimAsync(FingerprintClaim claim, CancellationToken ct = default);
/// <summary>
/// Deletes a claim by ID.
/// </summary>
/// <param name="id">Claim ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if deleted, false if not found.</returns>
Task<bool> DeleteClaimAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Checks if a claim already exists for a fingerprint+CVE combination.
/// </summary>
/// <param name="fingerprintId">Fingerprint ID.</param>
/// <param name="cveId">CVE identifier.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if a claim exists.</returns>
Task<bool> ClaimExistsAsync(Guid fingerprintId, string cveId, CancellationToken ct = default);
}
/// <summary>
/// Repository for managing function fingerprints (per-binary breakdown).
/// </summary>
public interface IFunctionFingerprintRepository
{
/// <summary>
/// Stores function fingerprints for a binary.
/// </summary>
/// <param name="binaryFingerprintId">Parent binary fingerprint ID.</param>
/// <param name="functions">Function fingerprints to store.</param>
/// <param name="ct">Cancellation token.</param>
Task StoreFunctionsAsync(
Guid binaryFingerprintId,
IEnumerable<FunctionFingerprint> functions,
CancellationToken ct = default);
/// <summary>
/// Gets all function fingerprints for a binary.
/// </summary>
/// <param name="binaryFingerprintId">Parent binary fingerprint ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> GetFunctionsByBinaryAsync(
Guid binaryFingerprintId,
CancellationToken ct = default);
/// <summary>
/// Searches for functions by name pattern.
/// </summary>
/// <param name="namePattern">Function name pattern (SQL LIKE).</param>
/// <param name="limit">Maximum results.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Matching functions with their binary IDs.</returns>
Task<IReadOnlyList<(Guid BinaryId, FunctionFingerprint Function)>> SearchFunctionsByNameAsync(
string namePattern,
int limit = 100,
CancellationToken ct = default);
/// <summary>
/// Finds functions matching a specific basic block hash.
/// </summary>
/// <param name="basicBlockHash">Hash to search for.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Matching functions with their binary IDs.</returns>
Task<IReadOnlyList<(Guid BinaryId, FunctionFingerprint Function)>> FindByBasicBlockHashAsync(
byte[] basicBlockHash,
CancellationToken ct = default);
/// <summary>
/// Deletes all function fingerprints for a binary.
/// </summary>
/// <param name="binaryFingerprintId">Parent binary fingerprint ID.</param>
/// <param name="ct">Cancellation token.</param>
Task DeleteFunctionsByBinaryAsync(Guid binaryFingerprintId, CancellationToken ct = default);
}

View File

@@ -0,0 +1,220 @@
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Extracts function-level fingerprints from binary files.
/// Uses multiple hashing strategies for robust matching.
/// </summary>
public interface IFunctionFingerprintExtractor
{
/// <summary>
/// Extracts function fingerprints from a binary file.
/// </summary>
/// <param name="binaryPath">Path to the binary file.</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> ExtractAsync(
string binaryPath,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Extracts function fingerprints from binary data in memory.
/// </summary>
/// <param name="binaryData">Binary file contents.</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of function fingerprints.</returns>
Task<IReadOnlyList<FunctionFingerprint>> ExtractFromMemoryAsync(
ReadOnlyMemory<byte> binaryData,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Gets supported binary formats for this extractor.
/// </summary>
IReadOnlyList<string> SupportedFormats { get; }
}
/// <summary>
/// Fingerprint data for a single function in a binary.
/// Uses multiple hash algorithms for robust cross-version matching.
/// </summary>
public sealed record FunctionFingerprint
{
/// <summary>
/// Function name (symbol name or synthesized from offset).
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Offset of the function within the .text section.
/// </summary>
public required long Offset { get; init; }
/// <summary>
/// Size of the function in bytes.
/// </summary>
public required int Size { get; init; }
/// <summary>
/// Hash of the basic block structure (opcode sequence, ignoring operands).
/// More stable across recompilation with different addresses.
/// </summary>
public required byte[] BasicBlockHash { get; init; }
/// <summary>
/// Hash of the control flow graph structure.
/// Captures branch patterns regardless of target addresses.
/// </summary>
public required byte[] CfgHash { get; init; }
/// <summary>
/// Hash of string references in the function.
/// Useful for identifying functions that use specific error messages or constants.
/// </summary>
public required byte[] StringRefsHash { get; init; }
/// <summary>
/// Combined fingerprint hash (all algorithms merged).
/// </summary>
public byte[]? CombinedHash { get; init; }
/// <summary>
/// List of functions called by this function.
/// </summary>
public IReadOnlyList<string>? Callees { get; init; }
/// <summary>
/// List of functions that call this function.
/// </summary>
public IReadOnlyList<string>? Callers { get; init; }
/// <summary>
/// Whether this is an exported (visible) symbol.
/// </summary>
public bool IsExported { get; init; }
/// <summary>
/// Whether this function has debug information available.
/// </summary>
public bool HasDebugInfo { get; init; }
/// <summary>
/// Source file path if debug info available.
/// </summary>
public string? SourceFile { get; init; }
/// <summary>
/// Source line number if debug info available.
/// </summary>
public int? SourceLine { get; init; }
}
/// <summary>
/// Options for function fingerprint extraction.
/// </summary>
public sealed record ExtractionOptions
{
/// <summary>
/// Whether to include internal/static functions (not exported).
/// </summary>
public bool IncludeInternalFunctions { get; init; } = false;
/// <summary>
/// Whether to build the call graph (callees/callers).
/// </summary>
public bool IncludeCallGraph { get; init; } = true;
/// <summary>
/// Minimum function size in bytes to include.
/// </summary>
public int MinFunctionSize { get; init; } = 16;
/// <summary>
/// Maximum function size in bytes to include. 0 = no limit.
/// </summary>
public int MaxFunctionSize { get; init; } = 0;
/// <summary>
/// Regex filter for function names to include. Null = all functions.
/// </summary>
public string? SymbolFilter { get; init; }
/// <summary>
/// Regex filter for function names to exclude.
/// </summary>
public string? ExcludeFilter { get; init; }
/// <summary>
/// Whether to compute the combined hash.
/// </summary>
public bool ComputeCombinedHash { get; init; } = true;
/// <summary>
/// Whether to extract debug information (source file/line).
/// </summary>
public bool ExtractDebugInfo { get; init; } = false;
}
/// <summary>
/// Represents a change to a function between two binary versions.
/// </summary>
public sealed record FunctionChange
{
/// <summary>
/// Function name.
/// </summary>
public required string FunctionName { get; init; }
/// <summary>
/// Type of change detected.
/// </summary>
public required ChangeType Type { get; init; }
/// <summary>
/// Fingerprint from the vulnerable version (null if Added).
/// </summary>
public FunctionFingerprint? VulnerableFingerprint { get; init; }
/// <summary>
/// Fingerprint from the patched version (null if Removed).
/// </summary>
public FunctionFingerprint? PatchedFingerprint { get; init; }
/// <summary>
/// Similarity score between versions (0.0-1.0) for Modified changes.
/// </summary>
public decimal? SimilarityScore { get; init; }
/// <summary>
/// Which hash algorithms showed differences.
/// </summary>
public IReadOnlyList<string>? DifferingHashes { get; init; }
}
/// <summary>
/// Type of change to a function between versions.
/// </summary>
public enum ChangeType
{
/// <summary>
/// Function was added in the patched version.
/// </summary>
Added,
/// <summary>
/// Function was modified (fingerprint changed).
/// </summary>
Modified,
/// <summary>
/// Function was removed in the patched version.
/// </summary>
Removed,
/// <summary>
/// Function signature changed (size/callees differ significantly).
/// </summary>
SignatureChanged
}

View File

@@ -0,0 +1,216 @@
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Computes diffs between function fingerprints of vulnerable and patched binaries.
/// Used to identify which functions were modified to fix a CVE.
/// </summary>
public interface IPatchDiffEngine
{
/// <summary>
/// Compares function fingerprints between vulnerable and patched builds.
/// </summary>
/// <param name="vulnerable">Functions from the vulnerable binary.</param>
/// <param name="patched">Functions from the patched binary.</param>
/// <param name="options">Diff options.</param>
/// <returns>Diff result with changes identified.</returns>
FunctionDiffResult ComputeDiff(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
DiffOptions? options = null);
/// <summary>
/// Computes similarity between two function fingerprints.
/// </summary>
/// <param name="a">First function fingerprint.</param>
/// <param name="b">Second function fingerprint.</param>
/// <returns>Similarity score (0.0-1.0).</returns>
decimal ComputeSimilarity(FunctionFingerprint a, FunctionFingerprint b);
/// <summary>
/// Identifies functions that likely correspond between versions despite name changes.
/// Uses fingerprint matching to find renamed or moved functions.
/// </summary>
/// <param name="vulnerable">Functions from the vulnerable binary.</param>
/// <param name="patched">Functions from the patched binary.</param>
/// <param name="threshold">Minimum similarity to consider a match.</param>
/// <returns>Mapping of vulnerable function names to patched function names.</returns>
IReadOnlyDictionary<string, string> FindFunctionMappings(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
decimal threshold = 0.8m);
}
/// <summary>
/// Result of computing a diff between function sets.
/// </summary>
public sealed record FunctionDiffResult
{
/// <summary>
/// All function changes detected.
/// </summary>
public required IReadOnlyList<FunctionChange> Changes { get; init; }
/// <summary>
/// Total functions in vulnerable version.
/// </summary>
public int TotalFunctionsVulnerable { get; init; }
/// <summary>
/// Total functions in patched version.
/// </summary>
public int TotalFunctionsPatched { get; init; }
/// <summary>
/// Number of functions added.
/// </summary>
public int AddedCount => Changes.Count(c => c.Type == ChangeType.Added);
/// <summary>
/// Number of functions modified.
/// </summary>
public int ModifiedCount => Changes.Count(c => c.Type == ChangeType.Modified);
/// <summary>
/// Number of functions removed.
/// </summary>
public int RemovedCount => Changes.Count(c => c.Type == ChangeType.Removed);
/// <summary>
/// Number of functions with signature changes.
/// </summary>
public int SignatureChangedCount => Changes.Count(c => c.Type == ChangeType.SignatureChanged);
/// <summary>
/// Number of functions that remained unchanged.
/// </summary>
public int UnchangedCount => TotalFunctionsVulnerable - ModifiedCount - RemovedCount - SignatureChangedCount;
/// <summary>
/// Percentage of functions that changed (0-100).
/// </summary>
public decimal ChangePercentage => TotalFunctionsVulnerable > 0
? 100m * (ModifiedCount + SignatureChangedCount) / TotalFunctionsVulnerable
: 0m;
/// <summary>
/// Summary statistics.
/// </summary>
public DiffStatistics Statistics => new()
{
TotalVulnerable = TotalFunctionsVulnerable,
TotalPatched = TotalFunctionsPatched,
Added = AddedCount,
Modified = ModifiedCount,
Removed = RemovedCount,
SignatureChanged = SignatureChangedCount,
Unchanged = UnchangedCount
};
}
/// <summary>
/// Summary statistics for a diff.
/// </summary>
public sealed record DiffStatistics
{
/// <summary>
/// Total functions in vulnerable version.
/// </summary>
public int TotalVulnerable { get; init; }
/// <summary>
/// Total functions in patched version.
/// </summary>
public int TotalPatched { get; init; }
/// <summary>
/// Functions added.
/// </summary>
public int Added { get; init; }
/// <summary>
/// Functions modified.
/// </summary>
public int Modified { get; init; }
/// <summary>
/// Functions removed.
/// </summary>
public int Removed { get; init; }
/// <summary>
/// Functions with signature changes.
/// </summary>
public int SignatureChanged { get; init; }
/// <summary>
/// Functions unchanged.
/// </summary>
public int Unchanged { get; init; }
}
/// <summary>
/// Options for computing diffs.
/// </summary>
public sealed record DiffOptions
{
/// <summary>
/// Minimum similarity score to consider two functions as the same (modified vs. different).
/// </summary>
public decimal SimilarityThreshold { get; init; } = 0.5m;
/// <summary>
/// Whether to use fuzzy name matching for renamed functions.
/// </summary>
public bool FuzzyNameMatching { get; init; } = true;
/// <summary>
/// Whether to include functions that are unchanged in the result.
/// </summary>
public bool IncludeUnchanged { get; init; } = false;
/// <summary>
/// Weights for different hash algorithms when computing similarity.
/// </summary>
public HashWeights Weights { get; init; } = HashWeights.Default;
/// <summary>
/// Whether to detect renamed functions.
/// </summary>
public bool DetectRenames { get; init; } = true;
/// <summary>
/// Minimum score to consider a function renamed (vs. added+removed).
/// </summary>
public decimal RenameThreshold { get; init; } = 0.7m;
}
/// <summary>
/// Weights for different hash algorithms when computing similarity.
/// </summary>
public sealed record HashWeights
{
/// <summary>
/// Weight for basic block hash comparison.
/// </summary>
public decimal BasicBlockWeight { get; init; } = 0.5m;
/// <summary>
/// Weight for CFG hash comparison.
/// </summary>
public decimal CfgWeight { get; init; } = 0.3m;
/// <summary>
/// Weight for string refs hash comparison.
/// </summary>
public decimal StringRefsWeight { get; init; } = 0.2m;
/// <summary>
/// Default weights.
/// </summary>
public static HashWeights Default => new();
/// <summary>
/// Validates that weights sum to 1.0.
/// </summary>
public bool IsValid => Math.Abs(BasicBlockWeight + CfgWeight + StringRefsWeight - 1.0m) < 0.001m;
}

View File

@@ -0,0 +1,428 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Builds distro packages from source with reproducible settings.
/// Supports building both vulnerable and patched versions for fingerprint diffing.
/// </summary>
public interface IReproducibleBuilder
{
/// <summary>
/// Gets the distro identifier this builder supports (e.g., "alpine", "debian", "rhel").
/// </summary>
string Distro { get; }
/// <summary>
/// Gets the releases this builder can target (e.g., "3.18", "bookworm", "9").
/// </summary>
IReadOnlyList<string> SupportedReleases { get; }
/// <summary>
/// Builds a package from source with optional patches applied.
/// </summary>
/// <param name="request">Build request parameters.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Build result with output binaries and fingerprints.</returns>
Task<BuildResult> BuildAsync(BuildRequest request, CancellationToken ct = default);
/// <summary>
/// Builds both vulnerable and patched versions, returning the diff of function fingerprints.
/// This is the primary method for CVE fix attribution.
/// </summary>
/// <param name="request">Patch diff request parameters.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Diff result showing which functions changed between versions.</returns>
Task<PatchDiffResult> BuildAndDiffAsync(PatchDiffRequest request, CancellationToken ct = default);
/// <summary>
/// Validates that the build environment is correctly configured for the target release.
/// </summary>
/// <param name="release">Target release to validate.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Validation result with any issues found.</returns>
Task<BuildEnvironmentValidation> ValidateEnvironmentAsync(string release, CancellationToken ct = default);
}
/// <summary>
/// Request parameters for a reproducible build.
/// </summary>
public sealed record BuildRequest
{
/// <summary>
/// Source package name (e.g., "openssl", "curl").
/// </summary>
public required string SourcePackage { get; init; }
/// <summary>
/// Package version to build.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Target distro release (e.g., "3.18", "bookworm").
/// </summary>
public required string Release { get; init; }
/// <summary>
/// Optional patches to apply before building.
/// </summary>
public IReadOnlyList<PatchReference>? Patches { get; init; }
/// <summary>
/// Target architecture (e.g., "x86_64", "aarch64"). Defaults to current arch.
/// </summary>
public string? Architecture { get; init; }
/// <summary>
/// Build options for reproducibility and normalization.
/// </summary>
public BuildOptions? Options { get; init; }
/// <summary>
/// Optional unique identifier for this build request (for tracking).
/// </summary>
public string? RequestId { get; init; }
}
/// <summary>
/// Reference to a security patch.
/// </summary>
public sealed record PatchReference
{
/// <summary>
/// CVE identifier this patch fixes.
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// URL to the patch file.
/// </summary>
public required string PatchUrl { get; init; }
/// <summary>
/// Expected SHA-256 hash of the patch file for integrity verification.
/// </summary>
public string? PatchSha256 { get; init; }
/// <summary>
/// Git commit ID if the patch comes from a repository.
/// </summary>
public string? CommitId { get; init; }
/// <summary>
/// Optional ordering hint for patch application (lower = earlier).
/// </summary>
public int Order { get; init; } = 0;
}
/// <summary>
/// Options controlling build reproducibility.
/// </summary>
public sealed record BuildOptions
{
/// <summary>
/// SOURCE_DATE_EPOCH value. If null, extracted from changelog/git.
/// </summary>
public DateTimeOffset? SourceDateEpoch { get; init; }
/// <summary>
/// Whether to strip binaries after building. Default: false.
/// </summary>
public bool StripBinaries { get; init; } = false;
/// <summary>
/// Whether to extract function-level fingerprints. Default: true.
/// </summary>
public bool ExtractFunctionFingerprints { get; init; } = true;
/// <summary>
/// Minimum function size (bytes) to include in fingerprint extraction.
/// </summary>
public int MinFunctionSize { get; init; } = 16;
/// <summary>
/// Build timeout. Default: 30 minutes.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Whether to keep build artifacts for debugging.
/// </summary>
public bool KeepBuildArtifacts { get; init; } = false;
}
/// <summary>
/// Result of a reproducible build.
/// </summary>
public sealed record BuildResult
{
/// <summary>
/// Whether the build succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Built binaries with extracted fingerprints.
/// </summary>
public IReadOnlyList<BuiltBinary>? Binaries { get; init; }
/// <summary>
/// Error message if build failed.
/// </summary>
public string? ErrorMessage { get; init; }
/// <summary>
/// Total build duration.
/// </summary>
public TimeSpan Duration { get; init; }
/// <summary>
/// Reference to full build log (e.g., content-addressed storage ID).
/// </summary>
public string? BuildLogRef { get; init; }
/// <summary>
/// SOURCE_DATE_EPOCH used for this build.
/// </summary>
public DateTimeOffset? SourceDateEpoch { get; init; }
/// <summary>
/// Build container image used.
/// </summary>
public string? BuilderImage { get; init; }
/// <summary>
/// Creates a failed build result.
/// </summary>
public static BuildResult Failed(string message, TimeSpan duration) => new()
{
Success = false,
ErrorMessage = message,
Duration = duration
};
}
/// <summary>
/// A single binary produced by a build.
/// </summary>
public sealed record BuiltBinary
{
/// <summary>
/// Relative path within the build output.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// ELF Build-ID (hex-encoded).
/// </summary>
public required string BuildId { get; init; }
/// <summary>
/// SHA-256 of the .text section.
/// </summary>
public required byte[] TextSha256 { get; init; }
/// <summary>
/// Combined fingerprint hash.
/// </summary>
public required byte[] Fingerprint { get; init; }
/// <summary>
/// File-level SHA-256.
/// </summary>
public byte[]? FileSha256 { get; init; }
/// <summary>
/// Function-level fingerprints if extraction was enabled.
/// </summary>
public IReadOnlyList<FunctionFingerprint>? Functions { get; init; }
/// <summary>
/// Binary format (ELF, PE, Mach-O).
/// </summary>
public string Format { get; init; } = "elf";
/// <summary>
/// Target architecture.
/// </summary>
public string? Architecture { get; init; }
/// <summary>
/// Whether the binary is stripped of debug symbols.
/// </summary>
public bool IsStripped { get; init; }
}
/// <summary>
/// Request for building and diffing vulnerable vs. patched versions.
/// </summary>
public sealed record PatchDiffRequest
{
/// <summary>
/// Source package name.
/// </summary>
public required string SourcePackage { get; init; }
/// <summary>
/// Vulnerable version to build first.
/// </summary>
public required string VulnerableVersion { get; init; }
/// <summary>
/// Patched version or patches to apply to vulnerable version.
/// </summary>
public required PatchTarget PatchTarget { get; init; }
/// <summary>
/// Target distro release.
/// </summary>
public required string Release { get; init; }
/// <summary>
/// CVE being fixed (for attribution).
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Build options.
/// </summary>
public BuildOptions? Options { get; init; }
}
/// <summary>
/// Specifies how to get the patched version.
/// </summary>
public sealed record PatchTarget
{
/// <summary>
/// If set, build this version as the patched version (e.g., downstream fixed release).
/// </summary>
public string? PatchedVersion { get; init; }
/// <summary>
/// If set, apply these patches to the vulnerable version.
/// </summary>
public IReadOnlyList<PatchReference>? Patches { get; init; }
}
/// <summary>
/// Result of comparing vulnerable and patched builds.
/// </summary>
public sealed record PatchDiffResult
{
/// <summary>
/// Whether both builds succeeded and diff was computed.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Vulnerable build result.
/// </summary>
public BuildResult? VulnerableBuild { get; init; }
/// <summary>
/// Patched build result.
/// </summary>
public BuildResult? PatchedBuild { get; init; }
/// <summary>
/// Function-level changes per binary.
/// </summary>
public IReadOnlyList<BinaryDiff>? BinaryDiffs { get; init; }
/// <summary>
/// Error message if diff failed.
/// </summary>
public string? ErrorMessage { get; init; }
/// <summary>
/// Creates a failed result.
/// </summary>
public static PatchDiffResult Failed(string message) => new()
{
Success = false,
ErrorMessage = message
};
}
/// <summary>
/// Diff results for a single binary between vulnerable and patched builds.
/// </summary>
public sealed record BinaryDiff
{
/// <summary>
/// Binary path (common between both builds).
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Function changes detected.
/// </summary>
public required IReadOnlyList<FunctionChange> Changes { get; init; }
/// <summary>
/// Build-ID of the vulnerable version.
/// </summary>
public string? VulnerableBuildId { get; init; }
/// <summary>
/// Build-ID of the patched version.
/// </summary>
public string? PatchedBuildId { get; init; }
/// <summary>
/// Total functions in vulnerable binary.
/// </summary>
public int TotalFunctionsVulnerable { get; init; }
/// <summary>
/// Total functions in patched binary.
/// </summary>
public int TotalFunctionsPatched { get; init; }
}
/// <summary>
/// Build environment validation result.
/// </summary>
public sealed record BuildEnvironmentValidation
{
/// <summary>
/// Whether the environment is valid for building.
/// </summary>
public required bool IsValid { get; init; }
/// <summary>
/// Issues found during validation.
/// </summary>
public IReadOnlyList<string>? Issues { get; init; }
/// <summary>
/// Builder container image available.
/// </summary>
public string? BuilderImage { get; init; }
/// <summary>
/// Toolchain versions detected.
/// </summary>
public IReadOnlyDictionary<string, string>? ToolchainVersions { get; init; }
/// <summary>
/// Creates a valid result.
/// </summary>
public static BuildEnvironmentValidation Valid(string image, IReadOnlyDictionary<string, string>? versions = null) => new()
{
IsValid = true,
BuilderImage = image,
ToolchainVersions = versions
};
/// <summary>
/// Creates an invalid result.
/// </summary>
public static BuildEnvironmentValidation Invalid(IReadOnlyList<string> issues) => new()
{
IsValid = false,
Issues = issues
};
}

View File

@@ -0,0 +1,288 @@
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Computes diffs between function fingerprints of vulnerable and patched binaries.
/// </summary>
public sealed class PatchDiffEngine : IPatchDiffEngine
{
private readonly ILogger<PatchDiffEngine> _logger;
public PatchDiffEngine(ILogger<PatchDiffEngine> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public FunctionDiffResult ComputeDiff(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
DiffOptions? options = null)
{
ArgumentNullException.ThrowIfNull(vulnerable);
ArgumentNullException.ThrowIfNull(patched);
options ??= new DiffOptions();
_logger.LogDebug(
"Computing diff: {VulnerableCount} vulnerable functions, {PatchedCount} patched functions",
vulnerable.Count, patched.Count);
var changes = new List<FunctionChange>();
// Index by name for quick lookup
var vulnerableByName = vulnerable.ToDictionary(f => f.Name, f => f);
var patchedByName = patched.ToDictionary(f => f.Name, f => f);
// Track processed functions to find additions
var processedPatched = new HashSet<string>();
// Find modifications and removals
foreach (var vulnFunc in vulnerable)
{
if (patchedByName.TryGetValue(vulnFunc.Name, out var patchedFunc))
{
processedPatched.Add(vulnFunc.Name);
var similarity = ComputeSimilarity(vulnFunc, patchedFunc);
if (similarity >= 1.0m)
{
// Unchanged
if (options.IncludeUnchanged)
{
// Not adding unchanged to results by default
}
}
else if (similarity >= options.SimilarityThreshold)
{
// Modified
var differingHashes = GetDifferingHashes(vulnFunc, patchedFunc);
changes.Add(new FunctionChange
{
FunctionName = vulnFunc.Name,
Type = ChangeType.Modified,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = patchedFunc,
SimilarityScore = similarity,
DifferingHashes = differingHashes
});
}
else
{
// Signature changed (too different to be considered same function)
changes.Add(new FunctionChange
{
FunctionName = vulnFunc.Name,
Type = ChangeType.SignatureChanged,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = patchedFunc,
SimilarityScore = similarity,
DifferingHashes = GetDifferingHashes(vulnFunc, patchedFunc)
});
}
}
else
{
// Not found by name - check if renamed
if (options.DetectRenames)
{
var bestMatch = FindBestMatch(vulnFunc, patched, processedPatched, options.RenameThreshold);
if (bestMatch != null)
{
processedPatched.Add(bestMatch.Name);
var similarity = ComputeSimilarity(vulnFunc, bestMatch);
changes.Add(new FunctionChange
{
FunctionName = $"{vulnFunc.Name} → {bestMatch.Name}",
Type = ChangeType.Modified,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = bestMatch,
SimilarityScore = similarity,
DifferingHashes = GetDifferingHashes(vulnFunc, bestMatch)
});
continue;
}
}
// Removed
changes.Add(new FunctionChange
{
FunctionName = vulnFunc.Name,
Type = ChangeType.Removed,
VulnerableFingerprint = vulnFunc,
PatchedFingerprint = null,
SimilarityScore = null
});
}
}
// Find additions (functions in patched but not in vulnerable)
foreach (var patchedFunc in patched)
{
if (!processedPatched.Contains(patchedFunc.Name))
{
changes.Add(new FunctionChange
{
FunctionName = patchedFunc.Name,
Type = ChangeType.Added,
VulnerableFingerprint = null,
PatchedFingerprint = patchedFunc,
SimilarityScore = null
});
}
}
_logger.LogInformation(
"Diff computed: {Added} added, {Modified} modified, {Removed} removed, {SignatureChanged} signature changed",
changes.Count(c => c.Type == ChangeType.Added),
changes.Count(c => c.Type == ChangeType.Modified),
changes.Count(c => c.Type == ChangeType.Removed),
changes.Count(c => c.Type == ChangeType.SignatureChanged));
return new FunctionDiffResult
{
Changes = changes,
TotalFunctionsVulnerable = vulnerable.Count,
TotalFunctionsPatched = patched.Count
};
}
/// <inheritdoc />
public decimal ComputeSimilarity(FunctionFingerprint a, FunctionFingerprint b)
{
ArgumentNullException.ThrowIfNull(a);
ArgumentNullException.ThrowIfNull(b);
// Compute weighted similarity based on hash matches
decimal totalWeight = 0m;
decimal matchedWeight = 0m;
// Basic block hash (weight: 0.5)
const decimal bbWeight = 0.5m;
totalWeight += bbWeight;
if (HashesEqual(a.BasicBlockHash, b.BasicBlockHash))
{
matchedWeight += bbWeight;
}
// CFG hash (weight: 0.3)
const decimal cfgWeight = 0.3m;
totalWeight += cfgWeight;
if (HashesEqual(a.CfgHash, b.CfgHash))
{
matchedWeight += cfgWeight;
}
// String refs hash (weight: 0.2)
const decimal strWeight = 0.2m;
totalWeight += strWeight;
if (HashesEqual(a.StringRefsHash, b.StringRefsHash))
{
matchedWeight += strWeight;
}
// Size similarity bonus (if sizes are within 10%, add small bonus)
if (a.Size > 0 && b.Size > 0)
{
var sizeDiff = Math.Abs(a.Size - b.Size) / (decimal)Math.Max(a.Size, b.Size);
if (sizeDiff <= 0.1m)
{
matchedWeight += 0.05m * (1m - sizeDiff * 10m);
totalWeight += 0.05m;
}
}
return totalWeight > 0 ? matchedWeight / totalWeight : 0m;
}
/// <inheritdoc />
public IReadOnlyDictionary<string, string> FindFunctionMappings(
IReadOnlyList<FunctionFingerprint> vulnerable,
IReadOnlyList<FunctionFingerprint> patched,
decimal threshold = 0.8m)
{
ArgumentNullException.ThrowIfNull(vulnerable);
ArgumentNullException.ThrowIfNull(patched);
var mappings = new Dictionary<string, string>();
var usedPatched = new HashSet<string>();
// First pass: exact name matches
foreach (var vulnFunc in vulnerable)
{
var match = patched.FirstOrDefault(p => p.Name == vulnFunc.Name);
if (match != null)
{
mappings[vulnFunc.Name] = match.Name;
usedPatched.Add(match.Name);
}
}
// Second pass: fingerprint-based matches for unmatched functions
var unmatchedVulnerable = vulnerable.Where(v => !mappings.ContainsKey(v.Name)).ToList();
var unmatchedPatched = patched.Where(p => !usedPatched.Contains(p.Name)).ToList();
foreach (var vulnFunc in unmatchedVulnerable)
{
var bestMatch = FindBestMatch(vulnFunc, unmatchedPatched, usedPatched, threshold);
if (bestMatch != null)
{
mappings[vulnFunc.Name] = bestMatch.Name;
usedPatched.Add(bestMatch.Name);
}
}
return mappings;
}
private FunctionFingerprint? FindBestMatch(
FunctionFingerprint target,
IReadOnlyList<FunctionFingerprint> candidates,
HashSet<string> excludeNames,
decimal threshold)
{
FunctionFingerprint? bestMatch = null;
var bestScore = threshold - 0.001m; // Must exceed threshold
foreach (var candidate in candidates)
{
if (excludeNames.Contains(candidate.Name))
continue;
var score = ComputeSimilarity(target, candidate);
if (score > bestScore)
{
bestScore = score;
bestMatch = candidate;
}
}
return bestMatch;
}
private IReadOnlyList<string> GetDifferingHashes(FunctionFingerprint a, FunctionFingerprint b)
{
var differing = new List<string>();
if (!HashesEqual(a.BasicBlockHash, b.BasicBlockHash))
differing.Add("basic_block");
if (!HashesEqual(a.CfgHash, b.CfgHash))
differing.Add("cfg");
if (!HashesEqual(a.StringRefsHash, b.StringRefsHash))
differing.Add("string_refs");
return differing;
}
private static bool HashesEqual(byte[]? a, byte[]? b)
{
if (a == null && b == null) return true;
if (a == null || b == null) return false;
return a.SequenceEqual(b);
}
}

View File

@@ -0,0 +1,371 @@
// -----------------------------------------------------------------------------
// ReproducibleBuildJobTypes.cs
// Types for the ReproducibleBuildJob orchestration
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Interface for the reproducible build job.
/// </summary>
public interface IReproducibleBuildJob
{
/// <summary>
/// Executes the build job, processing all pending CVEs.
/// </summary>
/// <param name="ct">Cancellation token.</param>
Task ExecuteAsync(CancellationToken ct);
/// <summary>
/// Processes a single CVE attribution request.
/// </summary>
/// <param name="cve">CVE to process.</param>
/// <param name="ct">Cancellation token.</param>
Task ProcessCveAsync(CveAttribution cve, CancellationToken ct);
}
/// <summary>
/// CVE attribution request.
/// </summary>
public sealed record CveAttribution
{
/// <summary>
/// CVE identifier (e.g., "CVE-2024-0001").
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Source package name (e.g., "openssl", "curl").
/// </summary>
public required string SourcePackage { get; init; }
/// <summary>
/// Distribution identifier (e.g., "debian", "alpine", "rhel").
/// </summary>
public required string Distro { get; init; }
/// <summary>
/// Distribution release (e.g., "bookworm", "3.19", "9").
/// </summary>
public required string Release { get; init; }
/// <summary>
/// Vulnerable package version.
/// </summary>
public required string VulnerableVersion { get; init; }
/// <summary>
/// Fixed/patched package version.
/// </summary>
public required string FixedVersion { get; init; }
/// <summary>
/// Git commit that introduced the fix (optional).
/// </summary>
public string? PatchCommit { get; init; }
/// <summary>
/// Advisory identifier (optional).
/// </summary>
public string? AdvisoryId { get; init; }
}
/// <summary>
/// Advisory feed monitor interface.
/// Watches for new CVE advisories that need binary attribution.
/// </summary>
public interface IAdvisoryFeedMonitor
{
/// <summary>
/// Gets CVEs pending binary attribution.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of CVEs needing processing.</returns>
Task<IReadOnlyList<CveAttribution>> GetPendingCvesAsync(CancellationToken ct);
}
/// <summary>
/// Configuration options for reproducible builds.
/// </summary>
public sealed class ReproducibleBuildOptions
{
/// <summary>
/// Maximum time allowed for a single build.
/// </summary>
public TimeSpan BuildTimeout { get; set; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Default target architecture.
/// </summary>
public string DefaultArchitecture { get; set; } = "amd64";
/// <summary>
/// Minimum function size to extract fingerprints for.
/// </summary>
public int MinFunctionSize { get; set; } = 16;
/// <summary>
/// Maximum concurrent builds.
/// </summary>
public int MaxConcurrentBuilds { get; set; } = 2;
/// <summary>
/// Directory for build cache storage.
/// </summary>
public string BuildCacheDirectory { get; set; } = "/var/cache/stellaops/builds";
}
/// <summary>
/// Background job that orchestrates reproducible builds for binary CVE attribution.
/// Monitors advisory feeds, triggers builds, extracts fingerprints, and creates claims.
/// </summary>
public sealed class ReproducibleBuildJob : IReproducibleBuildJob
{
private readonly ILogger<ReproducibleBuildJob> _logger;
private readonly ReproducibleBuildOptions _options;
private readonly IEnumerable<IReproducibleBuilder> _builders;
private readonly IFunctionFingerprintExtractor _fingerprintExtractor;
private readonly IPatchDiffEngine _diffEngine;
private readonly IFingerprintClaimRepository _claimRepository;
private readonly IAdvisoryFeedMonitor _advisoryMonitor;
/// <summary>
/// Initializes a new instance of <see cref="ReproducibleBuildJob"/>.
/// </summary>
public ReproducibleBuildJob(
ILogger<ReproducibleBuildJob> logger,
Microsoft.Extensions.Options.IOptions<ReproducibleBuildOptions> options,
IEnumerable<IReproducibleBuilder> builders,
IFunctionFingerprintExtractor fingerprintExtractor,
IPatchDiffEngine diffEngine,
IFingerprintClaimRepository claimRepository,
IAdvisoryFeedMonitor advisoryMonitor)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_builders = builders ?? throw new ArgumentNullException(nameof(builders));
_fingerprintExtractor = fingerprintExtractor ?? throw new ArgumentNullException(nameof(fingerprintExtractor));
_diffEngine = diffEngine ?? throw new ArgumentNullException(nameof(diffEngine));
_claimRepository = claimRepository ?? throw new ArgumentNullException(nameof(claimRepository));
_advisoryMonitor = advisoryMonitor ?? throw new ArgumentNullException(nameof(advisoryMonitor));
}
/// <inheritdoc />
public async Task ExecuteAsync(CancellationToken ct)
{
_logger.LogInformation("Starting reproducible build job");
try
{
// Step 1: Get pending CVEs that need binary attribution
var pendingCves = await _advisoryMonitor.GetPendingCvesAsync(ct);
_logger.LogInformation("Found {Count} CVEs pending binary attribution", pendingCves.Count);
foreach (var cve in pendingCves)
{
if (ct.IsCancellationRequested) break;
try
{
await ProcessCveAsync(cve, ct);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to process CVE {CveId}", cve.CveId);
// Continue with next CVE
}
}
_logger.LogInformation("Reproducible build job completed");
}
catch (OperationCanceledException)
{
_logger.LogInformation("Reproducible build job cancelled");
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Reproducible build job failed");
throw;
}
}
/// <inheritdoc />
public async Task ProcessCveAsync(CveAttribution cve, CancellationToken ct)
{
_logger.LogDebug("Processing CVE {CveId} for package {Package}", cve.CveId, cve.SourcePackage);
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
// Find appropriate builder for distro
var builder = _builders.FirstOrDefault(b =>
b.Distro.Equals(cve.Distro, StringComparison.OrdinalIgnoreCase));
if (builder == null)
{
_logger.LogWarning("No builder available for distro {Distro}", cve.Distro);
return;
}
// Build vulnerable version
var vulnerableBuild = await BuildVersionAsync(builder, cve, cve.VulnerableVersion, ct);
if (!vulnerableBuild.Success)
{
_logger.LogWarning("Failed to build vulnerable version {Version}", cve.VulnerableVersion);
return;
}
// Build patched version
var patchedBuild = await BuildVersionAsync(builder, cve, cve.FixedVersion, ct);
if (!patchedBuild.Success)
{
_logger.LogWarning("Failed to build patched version {Version}", cve.FixedVersion);
return;
}
// Extract function fingerprints from both builds
var vulnerableFunctions = await ExtractFunctionsAsync(vulnerableBuild, ct);
var patchedFunctions = await ExtractFunctionsAsync(patchedBuild, ct);
// Compute diff to identify changed functions
var diff = _diffEngine.ComputeDiff(vulnerableFunctions, patchedFunctions);
_logger.LogDebug(
"CVE {CveId}: {Modified} modified, {Added} added, {Removed} removed functions",
cve.CveId, diff.ModifiedCount, diff.AddedCount, diff.RemovedCount);
// Create fingerprint claims
await CreateClaimsAsync(cve, diff, vulnerableBuild, patchedBuild, ct);
stopwatch.Stop();
_logger.LogInformation(
"Processed CVE {CveId} in {Duration}ms",
cve.CveId, stopwatch.ElapsedMilliseconds);
}
private async Task<BuildResult> BuildVersionAsync(
IReproducibleBuilder builder,
CveAttribution cve,
string version,
CancellationToken ct)
{
var request = new BuildRequest
{
SourcePackage = cve.SourcePackage,
Version = version,
Release = cve.Release,
Architecture = _options.DefaultArchitecture,
Options = new BuildOptions
{
Timeout = _options.BuildTimeout
}
};
return await builder.BuildAsync(request, ct);
}
private async Task<IReadOnlyList<FunctionFingerprint>> ExtractFunctionsAsync(
BuildResult build,
CancellationToken ct)
{
var allFunctions = new List<FunctionFingerprint>();
foreach (var binary in build.Binaries ?? [])
{
if (binary.Functions != null)
{
allFunctions.AddRange(binary.Functions);
}
else
{
// Extract if not already done during build
var functions = await _fingerprintExtractor.ExtractAsync(
binary.Path,
new ExtractionOptions
{
IncludeInternalFunctions = false,
IncludeCallGraph = true,
MinFunctionSize = _options.MinFunctionSize
},
ct);
allFunctions.AddRange(functions);
}
}
return allFunctions;
}
private async Task CreateClaimsAsync(
CveAttribution cve,
FunctionDiffResult diff,
BuildResult vulnerableBuild,
BuildResult patchedBuild,
CancellationToken ct)
{
var claims = new List<FingerprintClaim>();
// Create "fixed" claims for patched binaries
foreach (var binary in patchedBuild.Binaries ?? [])
{
var changedFunctions = diff.Changes
.Where(c => c.Type is ChangeType.Modified or ChangeType.Added)
.Select(c => c.FunctionName)
.ToList();
var claim = new FingerprintClaim
{
Id = Guid.NewGuid(),
FingerprintId = Guid.Parse(binary.BuildId), // Assuming BuildId is GUID-like
CveId = cve.CveId,
Verdict = ClaimVerdict.Fixed,
Evidence = new FingerprintClaimEvidence
{
PatchCommit = cve.PatchCommit ?? "unknown",
ChangedFunctions = changedFunctions,
FunctionSimilarities = diff.Changes
.Where(c => c.SimilarityScore.HasValue)
.ToDictionary(c => c.FunctionName, c => c.SimilarityScore!.Value),
VulnerableBuildRef = vulnerableBuild.BuildLogRef,
PatchedBuildRef = patchedBuild.BuildLogRef
},
CreatedAt = DateTimeOffset.UtcNow
};
claims.Add(claim);
}
// Create "vulnerable" claims for vulnerable binaries
foreach (var binary in vulnerableBuild.Binaries ?? [])
{
var claim = new FingerprintClaim
{
Id = Guid.NewGuid(),
FingerprintId = Guid.Parse(binary.BuildId),
CveId = cve.CveId,
Verdict = ClaimVerdict.Vulnerable,
Evidence = new FingerprintClaimEvidence
{
PatchCommit = cve.PatchCommit ?? "unknown",
ChangedFunctions = diff.Changes
.Where(c => c.Type == ChangeType.Modified)
.Select(c => c.FunctionName)
.ToList(),
VulnerableBuildRef = vulnerableBuild.BuildLogRef
},
CreatedAt = DateTimeOffset.UtcNow
};
claims.Add(claim);
}
await _claimRepository.CreateClaimsBatchAsync(claims, ct);
_logger.LogDebug(
"Created {Count} fingerprint claims for CVE {CveId}",
claims.Count, cve.CveId);
}
}

View File

@@ -0,0 +1,62 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
/// Extension methods for registering builder services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds the reproducible builder services to the DI container.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configuration">Configuration root.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddBinaryIndexBuilders(
this IServiceCollection services,
IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
// Configuration - register options with defaults (configuration binding happens via host)
services.Configure<BuilderServiceOptions>(options => { });
services.Configure<FunctionExtractionOptions>(options => { });
// Core services
services.TryAddSingleton<IPatchDiffEngine, PatchDiffEngine>();
// Builders will be added as they are implemented
// services.TryAddSingleton<IReproducibleBuilder, AlpineBuilder>();
// services.TryAddSingleton<IReproducibleBuilder, DebianBuilder>();
// services.TryAddSingleton<IReproducibleBuilder, RhelBuilder>();
// Function extractor will be added when implemented
// services.TryAddSingleton<IFunctionFingerprintExtractor, FunctionFingerprintExtractor>();
return services;
}
/// <summary>
/// Adds the reproducible builder services with custom options.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configureOptions">Options configuration delegate.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddBinaryIndexBuilders(
this IServiceCollection services,
Action<BuilderServiceOptions> configureOptions)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configureOptions);
services.Configure(configureOptions);
services.TryAddSingleton<IPatchDiffEngine, PatchDiffEngine>();
return services;
}
}

View File

@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<Description>Reproducible distro builders and function-level fingerprinting for StellaOps BinaryIndex.</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Docker.DotNet" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="../StellaOps.BinaryIndex.Fingerprints/StellaOps.BinaryIndex.Fingerprints.csproj" />
</ItemGroup>
</Project>

View File

@@ -59,14 +59,14 @@ public sealed class CachedBinaryVulnerabilityService : IBinaryVulnerabilityServi
// Try cache first
var cached = await GetFromCacheAsync<ImmutableArray<BinaryVulnMatch>>(cacheKey, ct).ConfigureAwait(false);
if (cached.HasValue)
if (!cached.IsDefault)
{
sw.Stop();
_logger.LogDebug(
"Cache hit for identity {BinaryKey} in {ElapsedMs}ms",
identity.BinaryKey,
sw.Elapsed.TotalMilliseconds);
return cached.Value;
return cached;
}
// Cache miss - call inner service
@@ -186,14 +186,14 @@ public sealed class CachedBinaryVulnerabilityService : IBinaryVulnerabilityServi
var sw = Stopwatch.StartNew();
// Try cache first
var cached = await GetFromCacheAsync<FixStatusResult?>(cacheKey, ct).ConfigureAwait(false);
if (cached.HasValue)
var cached = await GetFromCacheAsync<FixStatusResult>(cacheKey, ct).ConfigureAwait(false);
if (cached is not null)
{
sw.Stop();
_logger.LogDebug(
"Cache hit for fix status {Distro}:{SourcePkg}:{CveId} in {ElapsedMs}ms",
distro, sourcePkg, cveId, sw.Elapsed.TotalMilliseconds);
return cached.Value;
return cached;
}
// Cache miss
@@ -296,11 +296,11 @@ public sealed class CachedBinaryVulnerabilityService : IBinaryVulnerabilityServi
// Try cache first
var cached = await GetFromCacheAsync<ImmutableArray<BinaryVulnMatch>>(cacheKey, ct).ConfigureAwait(false);
if (cached.HasValue)
if (!cached.IsDefault)
{
sw.Stop();
_logger.LogDebug("Cache hit for fingerprint in {ElapsedMs}ms", sw.Elapsed.TotalMilliseconds);
return cached.Value;
return cached;
}
// Cache miss

View File

@@ -0,0 +1,279 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StackExchange.Redis;
using StellaOps.BinaryIndex.Contracts.Resolution;
namespace StellaOps.BinaryIndex.Cache;
/// <summary>
/// Caching service for binary resolution results.
/// Uses Valkey/Redis for high-performance caching with configurable TTLs.
/// </summary>
public interface IResolutionCacheService
{
/// <summary>
/// Get cached resolution status.
/// </summary>
/// <param name="cacheKey">The cache key.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Cached resolution if found, null otherwise.</returns>
Task<CachedResolution?> GetAsync(string cacheKey, CancellationToken ct = default);
/// <summary>
/// Cache resolution result.
/// </summary>
/// <param name="cacheKey">The cache key.</param>
/// <param name="result">The resolution result to cache.</param>
/// <param name="ttl">Time-to-live for the cache entry.</param>
/// <param name="ct">Cancellation token.</param>
Task SetAsync(string cacheKey, CachedResolution result, TimeSpan ttl, CancellationToken ct = default);
/// <summary>
/// Invalidate cache entries by pattern.
/// </summary>
/// <param name="pattern">Redis pattern (e.g., "resolution:*:debian:*").</param>
/// <param name="ct">Cancellation token.</param>
Task InvalidateByPatternAsync(string pattern, CancellationToken ct = default);
/// <summary>
/// Generate cache key from resolution request.
/// </summary>
/// <param name="request">The resolution request.</param>
/// <returns>Deterministic cache key.</returns>
string GenerateCacheKey(VulnResolutionRequest request);
}
/// <summary>
/// Cached resolution entry.
/// </summary>
public sealed record CachedResolution
{
/// <summary>Resolution status.</summary>
public required ResolutionStatus Status { get; init; }
/// <summary>Fixed version if applicable.</summary>
public string? FixedVersion { get; init; }
/// <summary>Reference to evidence record.</summary>
public string? EvidenceRef { get; init; }
/// <summary>When this entry was cached.</summary>
public DateTimeOffset CachedAt { get; init; }
/// <summary>Version key for invalidation.</summary>
public string? VersionKey { get; init; }
/// <summary>Confidence score.</summary>
public decimal Confidence { get; init; }
/// <summary>Match type used.</summary>
public string? MatchType { get; init; }
}
/// <summary>
/// Configuration options for resolution caching.
/// </summary>
public sealed class ResolutionCacheOptions
{
/// <summary>Configuration section name.</summary>
public const string SectionName = "ResolutionCache";
/// <summary>TTL for fixed (high confidence) results.</summary>
public TimeSpan FixedTtl { get; set; } = TimeSpan.FromHours(24);
/// <summary>TTL for vulnerable results.</summary>
public TimeSpan VulnerableTtl { get; set; } = TimeSpan.FromHours(4);
/// <summary>TTL for unknown results.</summary>
public TimeSpan UnknownTtl { get; set; } = TimeSpan.FromHours(1);
/// <summary>Cache key prefix.</summary>
public string KeyPrefix { get; set; } = "resolution";
/// <summary>Enable probabilistic early expiry to prevent stampedes.</summary>
public bool EnableEarlyExpiry { get; set; } = true;
/// <summary>Early expiry factor (0.0-1.0).</summary>
public double EarlyExpiryFactor { get; set; } = 0.1;
}
/// <summary>
/// Valkey/Redis implementation of resolution caching.
/// </summary>
public sealed class ResolutionCacheService : IResolutionCacheService
{
private readonly IConnectionMultiplexer _redis;
private readonly ResolutionCacheOptions _options;
private readonly ILogger<ResolutionCacheService> _logger;
private readonly JsonSerializerOptions _jsonOptions;
public ResolutionCacheService(
IConnectionMultiplexer redis,
IOptions<ResolutionCacheOptions> options,
ILogger<ResolutionCacheService> logger)
{
_redis = redis ?? throw new ArgumentNullException(nameof(redis));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_jsonOptions = new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false
};
}
/// <inheritdoc />
public async Task<CachedResolution?> GetAsync(string cacheKey, CancellationToken ct = default)
{
try
{
var db = _redis.GetDatabase();
var value = await db.StringGetAsync(cacheKey);
if (value.IsNullOrEmpty)
{
_logger.LogDebug("Cache miss for key {CacheKey}", cacheKey);
return null;
}
var cached = JsonSerializer.Deserialize<CachedResolution>(value.ToString(), _jsonOptions);
// Check for probabilistic early expiry
if (_options.EnableEarlyExpiry && cached is not null)
{
var ttl = await db.KeyTimeToLiveAsync(cacheKey);
if (ShouldExpireEarly(ttl))
{
_logger.LogDebug("Early expiry triggered for key {CacheKey}", cacheKey);
return null;
}
}
_logger.LogDebug("Cache hit for key {CacheKey}", cacheKey);
return cached;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to get cache entry for key {CacheKey}", cacheKey);
return null;
}
}
/// <inheritdoc />
public async Task SetAsync(string cacheKey, CachedResolution result, TimeSpan ttl, CancellationToken ct = default)
{
try
{
var db = _redis.GetDatabase();
var value = JsonSerializer.Serialize(result, _jsonOptions);
await db.StringSetAsync(cacheKey, value, ttl);
_logger.LogDebug("Cached resolution for key {CacheKey} with TTL {Ttl}", cacheKey, ttl);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to cache resolution for key {CacheKey}", cacheKey);
}
}
/// <inheritdoc />
public async Task InvalidateByPatternAsync(string pattern, CancellationToken ct = default)
{
try
{
var server = _redis.GetServer(_redis.GetEndPoints().First());
var db = _redis.GetDatabase();
var keys = server.Keys(pattern: pattern).ToArray();
if (keys.Length > 0)
{
await db.KeyDeleteAsync(keys);
_logger.LogInformation("Invalidated {Count} cache entries matching pattern {Pattern}",
keys.Length, pattern);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to invalidate cache entries matching pattern {Pattern}", pattern);
}
}
/// <inheritdoc />
public string GenerateCacheKey(VulnResolutionRequest request)
{
ArgumentNullException.ThrowIfNull(request);
// Build deterministic cache key
// Format: resolution:{algorithm}:{hash}:{cve_id_or_all}
var algorithm = DetermineAlgorithm(request);
var hash = ComputeIdentityHash(request);
var cveId = request.CveId ?? "all";
return $"{_options.KeyPrefix}:{algorithm}:{hash}:{cveId}";
}
/// <summary>
/// Get appropriate TTL based on resolution status.
/// </summary>
public TimeSpan GetTtlForStatus(ResolutionStatus status)
{
return status switch
{
ResolutionStatus.Fixed => _options.FixedTtl,
ResolutionStatus.Vulnerable => _options.VulnerableTtl,
ResolutionStatus.NotAffected => _options.FixedTtl,
_ => _options.UnknownTtl
};
}
private static string DetermineAlgorithm(VulnResolutionRequest request)
{
if (!string.IsNullOrEmpty(request.BuildId))
return "build_id";
if (!string.IsNullOrEmpty(request.Fingerprint))
return request.FingerprintAlgorithm ?? "combined";
if (request.Hashes?.TextSha256 != null)
return "text_sha256";
if (request.Hashes?.FileSha256 != null)
return "file_sha256";
return "package";
}
private static string ComputeIdentityHash(VulnResolutionRequest request)
{
// Use the most specific identifier available
if (!string.IsNullOrEmpty(request.BuildId))
return request.BuildId;
if (!string.IsNullOrEmpty(request.Fingerprint))
return ComputeShortHash(request.Fingerprint);
if (request.Hashes?.TextSha256 != null)
return request.Hashes.TextSha256;
if (request.Hashes?.FileSha256 != null)
return request.Hashes.FileSha256;
// Fall back to package + distro
var key = $"{request.Package}:{request.DistroRelease ?? "unknown"}";
return ComputeShortHash(key);
}
private static string ComputeShortHash(string input)
{
var bytes = System.Text.Encoding.UTF8.GetBytes(input);
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
return Convert.ToHexStringLower(hash)[..16];
}
private bool ShouldExpireEarly(TimeSpan? remainingTtl)
{
if (!remainingTtl.HasValue || remainingTtl.Value <= TimeSpan.Zero)
return true;
// Probabilistic early expiry using exponential decay
var random = Random.Shared.NextDouble();
var threshold = _options.EarlyExpiryFactor * Math.Exp(-remainingTtl.Value.TotalSeconds / 3600);
return random < threshold;
}
}

View File

@@ -13,14 +13,19 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="StackExchange.Redis" Version="2.8.37" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
<PackageReference Include="StackExchange.Redis" />
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" />
<PackageReference Include="Scrutor" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="../StellaOps.BinaryIndex.FixIndex/StellaOps.BinaryIndex.FixIndex.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,186 @@
using System.ComponentModel.DataAnnotations;
namespace StellaOps.BinaryIndex.Contracts.Resolution;
/// <summary>
/// Request to resolve vulnerability status for a binary.
/// </summary>
public sealed record VulnResolutionRequest
{
/// <summary>
/// Package URL (PURL) or CPE identifier.
/// </summary>
[Required]
public required string Package { get; init; }
/// <summary>
/// File path within container/filesystem.
/// </summary>
public string? FilePath { get; init; }
/// <summary>
/// ELF Build-ID, PE CodeView GUID, or Mach-O UUID.
/// </summary>
public string? BuildId { get; init; }
/// <summary>
/// Hash values for matching.
/// </summary>
public ResolutionHashes? Hashes { get; init; }
/// <summary>
/// Fingerprint bytes (Base64-encoded).
/// </summary>
public string? Fingerprint { get; init; }
/// <summary>
/// Fingerprint algorithm if fingerprint provided (e.g., "combined", "tlsh", "ssdeep").
/// </summary>
public string? FingerprintAlgorithm { get; init; }
/// <summary>
/// CVE to check (optional, for targeted queries). If not provided, checks all known CVEs.
/// </summary>
public string? CveId { get; init; }
/// <summary>
/// Distro hint for fix status lookup (e.g., "debian:bookworm").
/// </summary>
public string? DistroRelease { get; init; }
}
/// <summary>
/// Hash values for binary matching.
/// </summary>
public sealed record ResolutionHashes
{
/// <summary>SHA-256 hash of the entire file.</summary>
public string? FileSha256 { get; init; }
/// <summary>SHA-256 hash of the .text section.</summary>
public string? TextSha256 { get; init; }
/// <summary>BLAKE3 hash (future-proof).</summary>
public string? Blake3 { get; init; }
}
/// <summary>
/// Response from vulnerability resolution.
/// </summary>
public sealed record VulnResolutionResponse
{
/// <summary>Package identifier from request.</summary>
public required string Package { get; init; }
/// <summary>Resolution status.</summary>
public required ResolutionStatus Status { get; init; }
/// <summary>Version where fix was applied (if status is Fixed).</summary>
public string? FixedVersion { get; init; }
/// <summary>Evidence supporting the resolution.</summary>
public ResolutionEvidence? Evidence { get; init; }
/// <summary>DSSE attestation envelope (Base64-encoded JSON).</summary>
public string? AttestationDsse { get; init; }
/// <summary>Timestamp when resolution was computed.</summary>
public DateTimeOffset ResolvedAt { get; init; }
/// <summary>Whether result was served from cache.</summary>
public bool FromCache { get; init; }
/// <summary>CVE ID if a specific CVE was queried.</summary>
public string? CveId { get; init; }
}
/// <summary>
/// Resolution status enumeration.
/// </summary>
public enum ResolutionStatus
{
/// <summary>Vulnerability is fixed in this binary (backport detected).</summary>
Fixed,
/// <summary>Binary is vulnerable.</summary>
Vulnerable,
/// <summary>Binary is not affected by this CVE.</summary>
NotAffected,
/// <summary>Resolution status unknown.</summary>
Unknown
}
/// <summary>
/// Evidence supporting a resolution decision.
/// </summary>
public sealed record ResolutionEvidence
{
/// <summary>Match method used (build_id, fingerprint, hash_exact).</summary>
public required string MatchType { get; init; }
/// <summary>Confidence score (0.0-1.0).</summary>
public decimal Confidence { get; init; }
/// <summary>Distro advisory ID (e.g., DSA-5343-1, RHSA-2024:1234).</summary>
public string? DistroAdvisoryId { get; init; }
/// <summary>SHA-256 of the security patch.</summary>
public string? PatchHash { get; init; }
/// <summary>List of matched fingerprint IDs.</summary>
public IReadOnlyList<string>? MatchedFingerprintIds { get; init; }
/// <summary>Summary of function-level differences.</summary>
public string? FunctionDiffSummary { get; init; }
/// <summary>Source package name.</summary>
public string? SourcePackage { get; init; }
/// <summary>Detection method (security_feed, changelog, patch_header).</summary>
public string? FixMethod { get; init; }
}
/// <summary>
/// Batch request for resolving multiple vulnerabilities.
/// </summary>
public sealed record BatchVulnResolutionRequest
{
/// <summary>List of resolution requests.</summary>
[Required]
public required IReadOnlyList<VulnResolutionRequest> Items { get; init; }
/// <summary>Resolution options.</summary>
public BatchResolutionOptions? Options { get; init; }
}
/// <summary>
/// Options for batch resolution.
/// </summary>
public sealed record BatchResolutionOptions
{
/// <summary>Bypass cache and perform fresh lookups.</summary>
public bool BypassCache { get; init; } = false;
/// <summary>Include DSSE attestation in responses.</summary>
public bool IncludeDsseAttestation { get; init; } = true;
}
/// <summary>
/// Response from batch vulnerability resolution.
/// </summary>
public sealed record BatchVulnResolutionResponse
{
/// <summary>List of resolution results.</summary>
public required IReadOnlyList<VulnResolutionResponse> Results { get; init; }
/// <summary>Total items processed.</summary>
public int TotalCount { get; init; }
/// <summary>Number of items served from cache.</summary>
public int CacheHits { get; init; }
/// <summary>Processing time in milliseconds.</summary>
public long ProcessingTimeMs { get; init; }
}

View File

@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<Description>API contracts for BinaryIndex resolution endpoints</Description>
</PropertyGroup>
</Project>

View File

@@ -0,0 +1,360 @@
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.Contracts.Resolution;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.Core.Services;
namespace StellaOps.BinaryIndex.Core.Resolution;
/// <summary>
/// Service for resolving binary vulnerability status.
/// </summary>
public interface IResolutionService
{
/// <summary>
/// Resolve vulnerability status for a single binary.
/// </summary>
Task<VulnResolutionResponse> ResolveAsync(
VulnResolutionRequest request,
ResolutionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Resolve vulnerability status for multiple binaries.
/// </summary>
Task<BatchVulnResolutionResponse> ResolveBatchAsync(
BatchVulnResolutionRequest request,
ResolutionOptions? options = null,
CancellationToken ct = default);
}
/// <summary>
/// Options for resolution operations.
/// </summary>
public sealed record ResolutionOptions
{
/// <summary>Bypass cache and perform fresh lookups.</summary>
public bool BypassCache { get; init; } = false;
/// <summary>Include DSSE attestation in response.</summary>
public bool IncludeDsseAttestation { get; init; } = true;
/// <summary>Custom TTL for cache entries.</summary>
public TimeSpan? CacheTtl { get; init; }
/// <summary>Tenant ID for multi-tenancy.</summary>
public string? TenantId { get; init; }
}
/// <summary>
/// Default resolution service configuration.
/// </summary>
public sealed class ResolutionServiceOptions
{
/// <summary>Configuration section name.</summary>
public const string SectionName = "Resolution";
/// <summary>Default cache TTL.</summary>
public TimeSpan DefaultCacheTtl { get; set; } = TimeSpan.FromHours(4);
/// <summary>Maximum batch size.</summary>
public int MaxBatchSize { get; set; } = 500;
/// <summary>Enable DSSE attestation by default.</summary>
public bool EnableDsseByDefault { get; set; } = true;
/// <summary>Minimum confidence threshold for resolution.</summary>
public decimal MinConfidenceThreshold { get; set; } = 0.70m;
}
/// <summary>
/// Implementation of the resolution service.
/// </summary>
public sealed class ResolutionService : IResolutionService
{
private readonly IBinaryVulnerabilityService _vulnerabilityService;
private readonly ResolutionServiceOptions _options;
private readonly ILogger<ResolutionService> _logger;
public ResolutionService(
IBinaryVulnerabilityService vulnerabilityService,
IOptions<ResolutionServiceOptions> options,
ILogger<ResolutionService> logger)
{
_vulnerabilityService = vulnerabilityService ?? throw new ArgumentNullException(nameof(vulnerabilityService));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<VulnResolutionResponse> ResolveAsync(
VulnResolutionRequest request,
ResolutionOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var effectiveOptions = options ?? new ResolutionOptions();
_logger.LogDebug("Resolving vulnerability for package {Package}", request.Package);
// Build binary identity from request
var identity = BuildBinaryIdentity(request);
// Perform lookup
var lookupOptions = new LookupOptions
{
DistroHint = ExtractDistro(request.DistroRelease),
ReleaseHint = ExtractRelease(request.DistroRelease),
TenantId = effectiveOptions.TenantId
};
// Check if specific CVE requested
if (!string.IsNullOrEmpty(request.CveId))
{
return await ResolveSingleCveAsync(request, identity, lookupOptions, effectiveOptions, sw, ct);
}
// Full lookup - all CVEs
return await ResolveAllCvesAsync(request, identity, lookupOptions, effectiveOptions, sw, ct);
}
/// <inheritdoc />
public async Task<BatchVulnResolutionResponse> ResolveBatchAsync(
BatchVulnResolutionRequest request,
ResolutionOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var effectiveOptions = options ?? new ResolutionOptions();
var items = request.Items;
if (items.Count > _options.MaxBatchSize)
{
_logger.LogWarning("Batch size {Count} exceeds maximum {Max}, truncating",
items.Count, _options.MaxBatchSize);
items = items.Take(_options.MaxBatchSize).ToList();
}
var results = new List<VulnResolutionResponse>(items.Count);
var cacheHits = 0;
// Apply batch options
if (request.Options is not null)
{
effectiveOptions = effectiveOptions with
{
BypassCache = request.Options.BypassCache,
IncludeDsseAttestation = request.Options.IncludeDsseAttestation
};
}
foreach (var item in items)
{
ct.ThrowIfCancellationRequested();
try
{
var result = await ResolveAsync(item, effectiveOptions, ct);
results.Add(result);
if (result.FromCache)
cacheHits++;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to resolve item {Package}", item.Package);
// Add error result
results.Add(new VulnResolutionResponse
{
Package = item.Package,
Status = ResolutionStatus.Unknown,
ResolvedAt = DateTimeOffset.UtcNow,
FromCache = false
});
}
}
return new BatchVulnResolutionResponse
{
Results = results,
TotalCount = results.Count,
CacheHits = cacheHits,
ProcessingTimeMs = sw.ElapsedMilliseconds
};
}
private async Task<VulnResolutionResponse> ResolveSingleCveAsync(
VulnResolutionRequest request,
BinaryIdentity identity,
LookupOptions lookupOptions,
ResolutionOptions options,
Stopwatch sw,
CancellationToken ct)
{
// Check fix status for specific CVE
var fixStatus = await _vulnerabilityService.GetFixStatusAsync(
ExtractDistro(request.DistroRelease) ?? "unknown",
ExtractRelease(request.DistroRelease) ?? "unknown",
ExtractSourcePackage(request.Package) ?? request.Package,
request.CveId!,
ct);
var (status, evidence) = MapFixStatusToResolution(fixStatus);
return new VulnResolutionResponse
{
Package = request.Package,
Status = status,
FixedVersion = fixStatus?.FixedVersion,
Evidence = evidence,
CveId = request.CveId,
ResolvedAt = DateTimeOffset.UtcNow,
FromCache = false
};
}
private async Task<VulnResolutionResponse> ResolveAllCvesAsync(
VulnResolutionRequest request,
BinaryIdentity identity,
LookupOptions lookupOptions,
ResolutionOptions options,
Stopwatch sw,
CancellationToken ct)
{
// Perform full binary lookup
var matches = await _vulnerabilityService.LookupByIdentityAsync(identity, lookupOptions, ct);
if (matches.IsEmpty)
{
_logger.LogDebug("No vulnerabilities found for {Package}", request.Package);
return new VulnResolutionResponse
{
Package = request.Package,
Status = ResolutionStatus.NotAffected,
ResolvedAt = DateTimeOffset.UtcNow,
FromCache = false
};
}
// Find the most severe/relevant match
var primaryMatch = matches.OrderByDescending(m => m.Confidence).First();
var evidence = new ResolutionEvidence
{
MatchType = primaryMatch.Method.ToString().ToLowerInvariant(),
Confidence = primaryMatch.Confidence,
MatchedFingerprintIds = matches.Select(m => m.CveId).ToList()
};
// Map to resolution status
var status = primaryMatch.Method switch
{
MatchMethod.BuildIdCatalog => ResolutionStatus.Fixed,
MatchMethod.FingerprintMatch when primaryMatch.Confidence >= _options.MinConfidenceThreshold
=> ResolutionStatus.Fixed,
_ => ResolutionStatus.Unknown
};
return new VulnResolutionResponse
{
Package = request.Package,
Status = status,
Evidence = evidence,
ResolvedAt = DateTimeOffset.UtcNow,
FromCache = false
};
}
private static BinaryIdentity BuildBinaryIdentity(VulnResolutionRequest request)
{
var binaryKey = request.BuildId
?? request.Hashes?.FileSha256
?? request.Package;
return new BinaryIdentity
{
BinaryKey = binaryKey,
BuildId = request.BuildId,
FileSha256 = request.Hashes?.FileSha256 ?? "sha256:unknown",
TextSha256 = request.Hashes?.TextSha256,
Blake3Hash = request.Hashes?.Blake3,
Format = BinaryFormat.Elf,
Architecture = "unknown"
};
}
private static (ResolutionStatus Status, ResolutionEvidence? Evidence) MapFixStatusToResolution(
FixStatusResult? fixStatus)
{
if (fixStatus is null)
{
return (ResolutionStatus.Unknown, null);
}
var status = fixStatus.State switch
{
FixState.Fixed => ResolutionStatus.Fixed,
FixState.Vulnerable => ResolutionStatus.Vulnerable,
FixState.NotAffected => ResolutionStatus.NotAffected,
FixState.Wontfix => ResolutionStatus.NotAffected,
_ => ResolutionStatus.Unknown
};
var evidence = new ResolutionEvidence
{
MatchType = "fix_status",
Confidence = fixStatus.Confidence,
FixMethod = fixStatus.Method.ToString().ToLowerInvariant()
};
return (status, evidence);
}
private static string? ExtractDistro(string? distroRelease)
{
if (string.IsNullOrEmpty(distroRelease))
return null;
var parts = distroRelease.Split(':');
return parts.Length > 0 ? parts[0] : null;
}
private static string? ExtractRelease(string? distroRelease)
{
if (string.IsNullOrEmpty(distroRelease))
return null;
var parts = distroRelease.Split(':');
return parts.Length > 1 ? parts[1] : null;
}
private static string? ExtractSourcePackage(string purl)
{
if (string.IsNullOrEmpty(purl))
return null;
try
{
var parts = purl.Split('/');
if (parts.Length >= 3)
{
var nameVersion = parts[^1];
var atIndex = nameVersion.IndexOf('@');
return atIndex > 0 ? nameVersion[..atIndex] : nameVersion;
}
}
catch
{
// Ignore parsing errors
}
return null;
}
}

View File

@@ -96,6 +96,9 @@ public sealed record FingerprintLookupOptions
/// <summary>Release hint for fix status lookup.</summary>
public string? ReleaseHint { get; init; }
/// <summary>Fingerprint algorithm to use (e.g., "combined", "tlsh", "ssdeep").</summary>
public string? Algorithm { get; init; }
}
public sealed record LookupOptions
@@ -103,6 +106,7 @@ public sealed record LookupOptions
public bool CheckFixIndex { get; init; } = true;
public string? DistroHint { get; init; }
public string? ReleaseHint { get; init; }
public string? TenantId { get; init; }
}
public sealed record BinaryVulnMatch

View File

@@ -8,7 +8,11 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Collections.Immutable" Version="9.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Contracts\StellaOps.BinaryIndex.Contracts.csproj" />
</ItemGroup>
</Project>

View File

@@ -78,7 +78,7 @@ public sealed class AlpinePackageExtractor
try
{
var identity = await _featureExtractor.ExtractIdentityAsync(ms, entry.Key ?? "", ct);
var identity = await _featureExtractor.ExtractIdentityAsync(ms, ct);
results.Add(new ExtractedBinaryInfo(identity, entry.Key ?? ""));
}
catch (Exception ex)
@@ -102,7 +102,7 @@ public sealed class AlpinePackageExtractor
// We need to skip to the data.tar.gz portion
// The structure is: signature.tar.gz + control.tar.gz + data.tar.gz
using var gzip = new GZipStream(apkStream, SharpCompress.Compressors.CompressionMode.Decompress, leaveOpen: true);
using var gzip = new GZipStream(apkStream, SharpCompress.Compressors.CompressionMode.Decompress);
using var ms = new MemoryStream();
await gzip.CopyToAsync(ms, ct);
ms.Position = 0;

View File

@@ -8,9 +8,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="SharpCompress" Version="0.38.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="SharpCompress" />
</ItemGroup>
<ItemGroup>

View File

@@ -8,9 +8,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="SharpCompress" Version="0.38.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="SharpCompress" />
</ItemGroup>
<ItemGroup>

View File

@@ -7,7 +7,7 @@
using Microsoft.Extensions.Logging;
using SharpCompress.Archives;
using SharpCompress.Compressors.Xz;
using SharpCompress.Readers.Cpio;
using SharpCompress.Readers;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.Core.Services;
using StellaOps.BinaryIndex.Corpus;
@@ -60,7 +60,7 @@ public sealed class RpmPackageExtractor
return results;
}
using var reader = CpioReader.Open(payloadStream);
using var reader = ReaderFactory.Open(payloadStream);
while (reader.MoveToNextEntry())
{
ct.ThrowIfCancellationRequested();
@@ -82,7 +82,7 @@ public sealed class RpmPackageExtractor
try
{
var identity = await _featureExtractor.ExtractIdentityAsync(ms, reader.Entry.Key ?? "", ct);
var identity = await _featureExtractor.ExtractIdentityAsync(ms, ct);
results.Add(new ExtractedBinaryInfo(identity, reader.Entry.Key ?? ""));
}
catch (Exception ex)

View File

@@ -8,9 +8,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="SharpCompress" Version="0.38.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="SharpCompress" />
</ItemGroup>
<ItemGroup>

View File

@@ -8,7 +8,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<ItemGroup>

View File

@@ -8,7 +8,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<ItemGroup>

View File

@@ -37,7 +37,8 @@ public sealed partial class AlpineSecfixesParser : ISecfixesParser
if (string.IsNullOrWhiteSpace(apkbuild))
yield break;
var lines = apkbuild.Split('\n');
// Normalize line endings to handle both Unix and Windows formats
var lines = apkbuild.ReplaceLineEndings("\n").Split('\n');
var inSecfixes = false;
string? currentVersion = null;

View File

@@ -30,7 +30,8 @@ public sealed partial class DebianChangelogParser : IChangelogParser
if (string.IsNullOrWhiteSpace(changelog))
yield break;
var lines = changelog.Split('\n');
// Normalize line endings to handle both Unix and Windows formats
var lines = changelog.ReplaceLineEndings("\n").Split('\n');
if (lines.Length == 0)
yield break;

View File

@@ -25,7 +25,8 @@ public sealed partial class PatchHeaderParser : IPatchParser
foreach (var (path, content, sha256) in patches)
{
// Read first 80 lines as header (typical patch header size)
var headerLines = content.Split('\n').Take(80);
// Normalize line endings to handle both Unix and Windows formats
var headerLines = content.ReplaceLineEndings("\n").Split('\n').Take(80);
var header = string.Join('\n', headerLines);
// Also check filename for CVE (e.g., "CVE-2024-1234.patch")

View File

@@ -39,7 +39,8 @@ public sealed partial class RpmChangelogParser : IChangelogParser
if (string.IsNullOrWhiteSpace(specContent))
yield break;
var lines = specContent.Split('\n');
// Normalize line endings to handle both Unix and Windows formats
var lines = specContent.ReplaceLineEndings("\n").Split('\n');
var inChangelog = false;
var inFirstEntry = false;
string? currentVersion = null;
@@ -128,7 +129,8 @@ public sealed partial class RpmChangelogParser : IChangelogParser
if (string.IsNullOrWhiteSpace(specContent))
yield break;
var lines = specContent.Split('\n');
// Normalize line endings to handle both Unix and Windows formats
var lines = specContent.ReplaceLineEndings("\n").Split('\n');
var inChangelog = false;
string? currentVersion = null;
var currentEntry = new List<string>();

View File

@@ -8,7 +8,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<ItemGroup>

View File

@@ -0,0 +1,432 @@
-- =============================================================================
-- 001_initial_schema.sql
-- Consolidated initial schema for BinaryIndex module
-- Combines: 001_create_binaries_schema, 002_create_fingerprint_tables,
-- 003_create_fix_index_tables, 20251226_AddFingerprintTables
-- Date: 2025-12-27
-- Note: Transaction control handled by MigrationRunner, not this script
-- =============================================================================
-- =============================================================================
-- SCHEMA CREATION
-- =============================================================================
CREATE SCHEMA IF NOT EXISTS binaries;
CREATE SCHEMA IF NOT EXISTS binaries_app;
-- RLS helper function
CREATE OR REPLACE FUNCTION binaries_app.require_current_tenant()
RETURNS TEXT
LANGUAGE plpgsql STABLE SECURITY DEFINER
AS $$
DECLARE
v_tenant TEXT;
BEGIN
v_tenant := current_setting('app.tenant_id', true);
IF v_tenant IS NULL OR v_tenant = '' THEN
RAISE EXCEPTION 'app.tenant_id session variable not set';
END IF;
RETURN v_tenant;
END;
$$;
-- =============================================================================
-- CORE TABLES
-- =============================================================================
-- binary_identity: Core binary identification table
CREATE TABLE IF NOT EXISTS binaries.binary_identity (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
binary_key TEXT NOT NULL,
build_id TEXT,
build_id_type TEXT CHECK (build_id_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),
file_sha256 TEXT NOT NULL,
text_sha256 TEXT,
blake3_hash TEXT,
format TEXT NOT NULL CHECK (format IN ('elf', 'pe', 'macho')),
architecture TEXT NOT NULL,
osabi TEXT,
binary_type TEXT CHECK (binary_type IN ('executable', 'shared_library', 'static_library', 'object')),
is_stripped BOOLEAN DEFAULT FALSE,
first_seen_snapshot_id UUID,
last_seen_snapshot_id UUID,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT binary_identity_key_unique UNIQUE (tenant_id, binary_key)
);
-- corpus_snapshots: Distribution corpus snapshots
CREATE TABLE IF NOT EXISTS binaries.corpus_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
distro TEXT NOT NULL,
release TEXT NOT NULL,
architecture TEXT NOT NULL,
snapshot_id TEXT NOT NULL,
packages_processed INT NOT NULL DEFAULT 0,
binaries_indexed INT NOT NULL DEFAULT 0,
repo_metadata_digest TEXT,
signing_key_id TEXT,
dsse_envelope_ref TEXT,
status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
error TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT corpus_snapshots_unique UNIQUE (tenant_id, distro, release, architecture, snapshot_id)
);
-- binary_package_map: Mapping binaries to packages
CREATE TABLE IF NOT EXISTS binaries.binary_package_map (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
binary_identity_id UUID NOT NULL REFERENCES binaries.binary_identity(id) ON DELETE CASCADE,
binary_key TEXT NOT NULL,
distro TEXT NOT NULL,
release TEXT NOT NULL,
source_pkg TEXT NOT NULL,
binary_pkg TEXT NOT NULL,
pkg_version TEXT NOT NULL,
pkg_purl TEXT,
architecture TEXT NOT NULL,
file_path_in_pkg TEXT NOT NULL,
snapshot_id UUID NOT NULL REFERENCES binaries.corpus_snapshots(id),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT binary_package_map_unique UNIQUE (binary_identity_id, snapshot_id, file_path_in_pkg)
);
-- vulnerable_buildids: Known vulnerable build IDs
CREATE TABLE IF NOT EXISTS binaries.vulnerable_buildids (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
buildid_type TEXT NOT NULL CHECK (buildid_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),
buildid_value TEXT NOT NULL,
purl TEXT NOT NULL,
pkg_version TEXT NOT NULL,
distro TEXT,
release TEXT,
confidence TEXT NOT NULL DEFAULT 'exact' CHECK (confidence IN ('exact', 'inferred', 'heuristic')),
provenance JSONB DEFAULT '{}',
snapshot_id UUID REFERENCES binaries.corpus_snapshots(id),
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT vulnerable_buildids_unique UNIQUE (tenant_id, buildid_value, buildid_type, purl, pkg_version)
);
-- binary_vuln_assertion: Vulnerability assertions for binaries
CREATE TABLE IF NOT EXISTS binaries.binary_vuln_assertion (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
binary_key TEXT NOT NULL,
binary_identity_id UUID REFERENCES binaries.binary_identity(id),
cve_id TEXT NOT NULL,
advisory_id UUID,
status TEXT NOT NULL CHECK (status IN ('affected', 'not_affected', 'fixed', 'unknown')),
method TEXT NOT NULL CHECK (method IN ('range_match', 'buildid_catalog', 'fingerprint_match', 'fix_index')),
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
evidence_ref TEXT,
evidence_digest TEXT,
evaluated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT binary_vuln_assertion_unique UNIQUE (tenant_id, binary_key, cve_id)
);
-- =============================================================================
-- FIX INDEX TABLES
-- =============================================================================
-- fix_evidence: Audit trail for how fix status was determined
CREATE TABLE IF NOT EXISTS binaries.fix_evidence (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
evidence_type TEXT NOT NULL CHECK (evidence_type IN ('changelog', 'patch_header', 'security_feed', 'upstream_match')),
source_file TEXT,
source_sha256 TEXT,
excerpt TEXT,
metadata JSONB NOT NULL DEFAULT '{}',
snapshot_id UUID,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- cve_fix_index: Patch-aware CVE fix status per distro/release/package
CREATE TABLE IF NOT EXISTS binaries.cve_fix_index (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
distro TEXT NOT NULL,
release TEXT NOT NULL,
source_pkg TEXT NOT NULL,
cve_id TEXT NOT NULL,
architecture TEXT,
state TEXT NOT NULL CHECK (state IN ('fixed', 'vulnerable', 'not_affected', 'wontfix', 'unknown')),
fixed_version TEXT,
method TEXT NOT NULL CHECK (method IN ('security_feed', 'changelog', 'patch_header', 'upstream_match')),
confidence DECIMAL(3,2) NOT NULL CHECK (confidence >= 0.00 AND confidence <= 1.00),
evidence_id UUID REFERENCES binaries.fix_evidence(id),
snapshot_id UUID,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT cve_fix_index_unique UNIQUE (tenant_id, distro, release, source_pkg, cve_id, architecture)
);
-- fix_index_priority: Resolution priority when multiple sources conflict
CREATE TABLE IF NOT EXISTS binaries.fix_index_priority (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
priority INTEGER NOT NULL,
method TEXT NOT NULL,
description TEXT,
is_active BOOLEAN NOT NULL DEFAULT true,
CONSTRAINT fix_index_priority_unique UNIQUE (tenant_id, method)
);
-- =============================================================================
-- FINGERPRINT TABLES
-- =============================================================================
-- vulnerable_fingerprints: Function-level vulnerability fingerprints
CREATE TABLE IF NOT EXISTS binaries.vulnerable_fingerprints (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
cve_id TEXT NOT NULL,
component TEXT NOT NULL,
purl TEXT,
algorithm TEXT NOT NULL CHECK (algorithm IN ('basic_block', 'cfg', 'control_flow_graph', 'string_refs', 'combined')),
fingerprint_id TEXT NOT NULL,
fingerprint_hash BYTEA NOT NULL,
architecture TEXT NOT NULL,
function_name TEXT,
source_file TEXT,
source_line INT,
similarity_threshold DECIMAL(3,2) DEFAULT 0.95 CHECK (similarity_threshold BETWEEN 0 AND 1),
confidence DECIMAL(3,2) CHECK (confidence IS NULL OR confidence BETWEEN 0 AND 1),
validated BOOLEAN DEFAULT false,
validation_stats JSONB DEFAULT '{}',
vuln_build_ref TEXT,
fixed_build_ref TEXT,
notes TEXT,
evidence_ref TEXT,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT vulnerable_fingerprints_unique UNIQUE (tenant_id, fingerprint_id)
);
-- fingerprint_corpus_metadata: Metadata about fingerprinted packages
CREATE TABLE IF NOT EXISTS binaries.fingerprint_corpus_metadata (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
purl TEXT NOT NULL,
version TEXT NOT NULL,
algorithm TEXT NOT NULL,
binary_digest TEXT,
function_count INT NOT NULL DEFAULT 0,
fingerprints_indexed INT NOT NULL DEFAULT 0,
indexed_by TEXT,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT fingerprint_corpus_metadata_unique UNIQUE (tenant_id, purl, version, algorithm)
);
-- fingerprint_matches: Results of fingerprint matching operations
CREATE TABLE IF NOT EXISTS binaries.fingerprint_matches (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL DEFAULT binaries_app.require_current_tenant(),
scan_id UUID NOT NULL,
match_type TEXT NOT NULL CHECK (match_type IN ('fingerprint', 'build_id', 'buildid', 'hash_exact')),
binary_key TEXT NOT NULL,
binary_identity_id UUID REFERENCES binaries.binary_identity(id),
vulnerable_purl TEXT NOT NULL,
vulnerable_version TEXT NOT NULL,
matched_fingerprint_id UUID REFERENCES binaries.vulnerable_fingerprints(id),
matched_function TEXT,
similarity DECIMAL(3,2) CHECK (similarity IS NULL OR similarity BETWEEN 0 AND 1),
advisory_ids TEXT[],
reachability_status TEXT CHECK (reachability_status IN ('reachable', 'unreachable', 'unknown', 'partial')),
evidence JSONB DEFAULT '{}',
matched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- =============================================================================
-- INDEXES - CORE TABLES
-- =============================================================================
CREATE INDEX IF NOT EXISTS idx_binary_identity_tenant ON binaries.binary_identity(tenant_id);
CREATE INDEX IF NOT EXISTS idx_binary_identity_buildid ON binaries.binary_identity(build_id) WHERE build_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_binary_identity_sha256 ON binaries.binary_identity(file_sha256);
CREATE INDEX IF NOT EXISTS idx_binary_identity_key ON binaries.binary_identity(binary_key);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_tenant ON binaries.binary_package_map(tenant_id);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_binary ON binaries.binary_package_map(binary_identity_id);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_distro ON binaries.binary_package_map(distro, release, source_pkg);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_snapshot ON binaries.binary_package_map(snapshot_id);
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_tenant ON binaries.corpus_snapshots(tenant_id);
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_distro ON binaries.corpus_snapshots(distro, release, architecture);
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_status ON binaries.corpus_snapshots(status) WHERE status IN ('pending', 'processing');
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_tenant ON binaries.vulnerable_buildids(tenant_id);
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_value ON binaries.vulnerable_buildids(buildid_type, buildid_value);
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_purl ON binaries.vulnerable_buildids(purl);
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_tenant ON binaries.binary_vuln_assertion(tenant_id);
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_binary ON binaries.binary_vuln_assertion(binary_key);
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_cve ON binaries.binary_vuln_assertion(cve_id);
-- =============================================================================
-- INDEXES - FIX INDEX TABLES
-- =============================================================================
CREATE INDEX IF NOT EXISTS idx_fix_evidence_snapshot ON binaries.fix_evidence(tenant_id, snapshot_id);
CREATE INDEX IF NOT EXISTS idx_cve_fix_lookup ON binaries.cve_fix_index(tenant_id, distro, release, source_pkg, cve_id);
CREATE INDEX IF NOT EXISTS idx_cve_fix_by_cve ON binaries.cve_fix_index(tenant_id, cve_id, distro, release);
CREATE INDEX IF NOT EXISTS idx_cve_fix_by_version ON binaries.cve_fix_index(tenant_id, distro, release, source_pkg, fixed_version);
CREATE INDEX IF NOT EXISTS idx_cve_fix_snapshot ON binaries.cve_fix_index(tenant_id, snapshot_id);
CREATE INDEX IF NOT EXISTS idx_cve_fix_by_state ON binaries.cve_fix_index(tenant_id, distro, release, state);
-- =============================================================================
-- INDEXES - FINGERPRINT TABLES
-- =============================================================================
CREATE INDEX IF NOT EXISTS idx_fingerprint_cve ON binaries.vulnerable_fingerprints(tenant_id, cve_id);
CREATE INDEX IF NOT EXISTS idx_fingerprint_component ON binaries.vulnerable_fingerprints(tenant_id, component);
CREATE INDEX IF NOT EXISTS idx_fingerprint_algorithm ON binaries.vulnerable_fingerprints(tenant_id, algorithm, architecture);
CREATE INDEX IF NOT EXISTS idx_fingerprint_hash ON binaries.vulnerable_fingerprints USING hash (fingerprint_hash);
CREATE INDEX IF NOT EXISTS idx_fingerprint_validated ON binaries.vulnerable_fingerprints(tenant_id, validated) WHERE validated = true;
CREATE INDEX IF NOT EXISTS idx_fingerprint_corpus_tenant ON binaries.fingerprint_corpus_metadata(tenant_id);
CREATE INDEX IF NOT EXISTS idx_fingerprint_corpus_purl ON binaries.fingerprint_corpus_metadata(purl, version);
CREATE INDEX IF NOT EXISTS idx_match_scan ON binaries.fingerprint_matches(tenant_id, scan_id);
CREATE INDEX IF NOT EXISTS idx_match_fingerprint ON binaries.fingerprint_matches(matched_fingerprint_id);
CREATE INDEX IF NOT EXISTS idx_match_binary ON binaries.fingerprint_matches(tenant_id, binary_key);
CREATE INDEX IF NOT EXISTS idx_match_reachability ON binaries.fingerprint_matches(tenant_id, reachability_status);
-- =============================================================================
-- ROW-LEVEL SECURITY - CORE TABLES
-- =============================================================================
ALTER TABLE binaries.binary_identity ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_identity FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_identity_tenant_isolation ON binaries.binary_identity
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.corpus_snapshots ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.corpus_snapshots FORCE ROW LEVEL SECURITY;
CREATE POLICY corpus_snapshots_tenant_isolation ON binaries.corpus_snapshots
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.binary_package_map ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_package_map FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_package_map_tenant_isolation ON binaries.binary_package_map
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.vulnerable_buildids ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.vulnerable_buildids FORCE ROW LEVEL SECURITY;
CREATE POLICY vulnerable_buildids_tenant_isolation ON binaries.vulnerable_buildids
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.binary_vuln_assertion ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_vuln_assertion FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_vuln_assertion_tenant_isolation ON binaries.binary_vuln_assertion
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
-- =============================================================================
-- ROW-LEVEL SECURITY - FIX INDEX TABLES
-- =============================================================================
ALTER TABLE binaries.fix_evidence ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fix_evidence FORCE ROW LEVEL SECURITY;
CREATE POLICY fix_evidence_tenant_isolation ON binaries.fix_evidence
USING (tenant_id = binaries_app.require_current_tenant());
ALTER TABLE binaries.cve_fix_index ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.cve_fix_index FORCE ROW LEVEL SECURITY;
CREATE POLICY cve_fix_index_tenant_isolation ON binaries.cve_fix_index
USING (tenant_id = binaries_app.require_current_tenant());
ALTER TABLE binaries.fix_index_priority ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fix_index_priority FORCE ROW LEVEL SECURITY;
CREATE POLICY fix_index_priority_tenant_isolation ON binaries.fix_index_priority
USING (tenant_id = binaries_app.require_current_tenant());
-- =============================================================================
-- ROW-LEVEL SECURITY - FINGERPRINT TABLES
-- =============================================================================
ALTER TABLE binaries.vulnerable_fingerprints ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.vulnerable_fingerprints FORCE ROW LEVEL SECURITY;
CREATE POLICY vulnerable_fingerprints_tenant_isolation ON binaries.vulnerable_fingerprints
USING (tenant_id = binaries_app.require_current_tenant());
ALTER TABLE binaries.fingerprint_corpus_metadata ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fingerprint_corpus_metadata FORCE ROW LEVEL SECURITY;
CREATE POLICY fingerprint_corpus_metadata_tenant_isolation ON binaries.fingerprint_corpus_metadata
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.fingerprint_matches ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fingerprint_matches FORCE ROW LEVEL SECURITY;
CREATE POLICY fingerprint_matches_tenant_isolation ON binaries.fingerprint_matches
USING (tenant_id = binaries_app.require_current_tenant());
-- =============================================================================
-- TABLE COMMENTS
-- =============================================================================
COMMENT ON TABLE binaries.binary_identity IS
'Core binary identification table storing file hashes, build IDs, and metadata';
COMMENT ON TABLE binaries.corpus_snapshots IS
'Distribution corpus snapshots tracking package indexing progress';
COMMENT ON TABLE binaries.binary_package_map IS
'Maps binaries to their source and binary packages within distributions';
COMMENT ON TABLE binaries.vulnerable_buildids IS
'Known vulnerable build IDs for direct binary matching';
COMMENT ON TABLE binaries.binary_vuln_assertion IS
'Vulnerability assertions for specific binaries with evidence references';
COMMENT ON TABLE binaries.fix_evidence IS
'Audit trail for CVE fix determinations, storing excerpts and metadata for traceability';
COMMENT ON TABLE binaries.cve_fix_index IS
'Patch-aware CVE fix index enabling accurate vulnerability status despite version pinning';
COMMENT ON COLUMN binaries.cve_fix_index.confidence IS
'Confidence score: security_feed=0.99, patch_header=0.90, changelog=0.80, upstream_match=0.85';
COMMENT ON COLUMN binaries.cve_fix_index.method IS
'How fix status was determined: security_feed (OVAL/DSA), changelog, patch_header (DEP-3), upstream_match';
COMMENT ON TABLE binaries.fix_index_priority IS
'Resolution priority when multiple sources conflict (lower priority number = higher precedence)';
COMMENT ON TABLE binaries.vulnerable_fingerprints IS
'Function-level vulnerability fingerprints for detecting vulnerable code independent of package metadata';
COMMENT ON COLUMN binaries.vulnerable_fingerprints.algorithm IS
'Fingerprinting algorithm: basic_block, cfg (control flow graph), string_refs, or combined (ensemble)';
COMMENT ON COLUMN binaries.vulnerable_fingerprints.fingerprint_hash IS
'Binary fingerprint data (16-48 bytes depending on algorithm)';
COMMENT ON COLUMN binaries.vulnerable_fingerprints.validation_stats IS
'JSON object with tp, fp, tn, fn counts from validation corpus';
COMMENT ON TABLE binaries.fingerprint_corpus_metadata IS
'Metadata about fingerprinted packages including function counts and indexing status';
COMMENT ON TABLE binaries.fingerprint_matches IS
'Results of fingerprint matching operations during scans';
COMMENT ON COLUMN binaries.fingerprint_matches.similarity IS
'Similarity score (0.0-1.0) for fingerprint matches';

View File

@@ -0,0 +1,251 @@
-- Migration: 002_fingerprint_claims
-- Description: Adds tables for function-level fingerprints and CVE claims
-- Created: 2025-12-28
-- Sprint: SPRINT_1227_0002_0001 - Reproducible Builders
-- Ensure schema exists
CREATE SCHEMA IF NOT EXISTS binary_index;
-- ============================================================================
-- Function-level fingerprints (child of binary_fingerprints)
-- Stores per-function hashes for fine-grained CVE attribution
-- ============================================================================
CREATE TABLE IF NOT EXISTS binary_index.function_fingerprints (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Parent binary fingerprint
binary_fingerprint_id UUID NOT NULL,
-- Function identification
function_name TEXT NOT NULL,
function_offset BIGINT NOT NULL,
function_size INT NOT NULL,
-- Multi-algorithm fingerprints for robust matching
basic_block_hash BYTEA NOT NULL, -- Hash of opcode sequences
cfg_hash BYTEA NOT NULL, -- Hash of control flow graph
string_refs_hash BYTEA NOT NULL, -- Hash of string references
combined_hash BYTEA, -- Combined fingerprint (optional)
-- Call graph (optional)
callees TEXT[], -- Functions this function calls
callers TEXT[], -- Functions that call this function
-- Metadata
is_exported BOOLEAN NOT NULL DEFAULT false,
has_debug_info BOOLEAN NOT NULL DEFAULT false,
source_file TEXT,
source_line INT,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Unique constraint: one entry per function per binary
CONSTRAINT uq_function_fingerprints_binary_func
UNIQUE (binary_fingerprint_id, function_name, function_offset)
);
-- Indexes for function fingerprints
CREATE INDEX IF NOT EXISTS idx_function_fingerprints_binary
ON binary_index.function_fingerprints(binary_fingerprint_id);
CREATE INDEX IF NOT EXISTS idx_function_fingerprints_name
ON binary_index.function_fingerprints(function_name);
CREATE INDEX IF NOT EXISTS idx_function_fingerprints_basic_block
ON binary_index.function_fingerprints USING hash(basic_block_hash);
CREATE INDEX IF NOT EXISTS idx_function_fingerprints_combined
ON binary_index.function_fingerprints USING hash(combined_hash)
WHERE combined_hash IS NOT NULL;
-- ============================================================================
-- Fingerprint CVE claims
-- Records assertions about whether a fingerprint contains a CVE fix
-- ============================================================================
CREATE TABLE IF NOT EXISTS binary_index.fingerprint_claims (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Target fingerprint (can be binary-level or function-level)
fingerprint_id UUID NOT NULL,
-- CVE identification
cve_id TEXT NOT NULL,
-- Verdict
verdict TEXT NOT NULL CHECK (verdict IN ('fixed', 'vulnerable', 'unknown')),
-- Confidence in this claim (0.0-1.0)
confidence NUMERIC(4,3) NOT NULL DEFAULT 1.0,
-- Evidence (JSONB for flexibility)
evidence JSONB NOT NULL,
-- Attestation reference (if signed)
attestation_dsse_hash TEXT,
-- Source/provenance
source TEXT, -- e.g., "repro-builder-alpine", "manual", "advisory-import"
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ,
-- Unique constraint: one claim per fingerprint+CVE
CONSTRAINT uq_fingerprint_claims_fingerprint_cve
UNIQUE (fingerprint_id, cve_id)
);
-- Indexes for fingerprint claims
CREATE INDEX IF NOT EXISTS idx_fingerprint_claims_fingerprint
ON binary_index.fingerprint_claims(fingerprint_id);
CREATE INDEX IF NOT EXISTS idx_fingerprint_claims_cve
ON binary_index.fingerprint_claims(cve_id);
CREATE INDEX IF NOT EXISTS idx_fingerprint_claims_verdict
ON binary_index.fingerprint_claims(verdict)
WHERE verdict = 'fixed';
CREATE INDEX IF NOT EXISTS idx_fingerprint_claims_source
ON binary_index.fingerprint_claims(source)
WHERE source IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_fingerprint_claims_confidence
ON binary_index.fingerprint_claims(confidence DESC)
WHERE confidence < 1.0;
-- GIN index on evidence JSONB for querying specific fields
CREATE INDEX IF NOT EXISTS idx_fingerprint_claims_evidence
ON binary_index.fingerprint_claims USING gin(evidence);
-- ============================================================================
-- Reproducible build records
-- Tracks builds performed for fingerprint generation
-- ============================================================================
CREATE TABLE IF NOT EXISTS binary_index.reproducible_builds (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Build identification
request_id TEXT UNIQUE, -- Client-provided correlation ID
-- Package info
source_package TEXT NOT NULL,
version TEXT NOT NULL,
distro TEXT NOT NULL,
release TEXT NOT NULL,
architecture TEXT NOT NULL DEFAULT 'x86_64',
-- Build status
status TEXT NOT NULL CHECK (status IN ('pending', 'building', 'success', 'failed')),
error_message TEXT,
-- Timing
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ,
duration_ms BIGINT,
-- Reproducibility
source_date_epoch BIGINT,
builder_image TEXT,
-- Artifact references (content-addressed)
build_log_ref TEXT,
artifact_ref TEXT,
-- Patches applied (if any)
patches JSONB, -- Array of {cve_id, patch_url, commit_id}
-- Results summary
binaries_produced INT,
functions_extracted INT,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Indexes for reproducible builds
CREATE INDEX IF NOT EXISTS idx_reproducible_builds_package
ON binary_index.reproducible_builds(source_package, version);
CREATE INDEX IF NOT EXISTS idx_reproducible_builds_distro
ON binary_index.reproducible_builds(distro, release);
CREATE INDEX IF NOT EXISTS idx_reproducible_builds_status
ON binary_index.reproducible_builds(status)
WHERE status IN ('pending', 'building');
CREATE INDEX IF NOT EXISTS idx_reproducible_builds_created
ON binary_index.reproducible_builds(created_at DESC);
-- ============================================================================
-- Build output binaries
-- Links reproducible builds to the fingerprints they generated
-- ============================================================================
CREATE TABLE IF NOT EXISTS binary_index.build_outputs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Parent build
build_id UUID NOT NULL REFERENCES binary_index.reproducible_builds(id) ON DELETE CASCADE,
-- Output binary info
binary_path TEXT NOT NULL,
build_id_hash TEXT NOT NULL, -- ELF Build-ID
-- Generated fingerprint
fingerprint_id UUID, -- References binary_fingerprints (when created)
-- Hashes
file_sha256 BYTEA NOT NULL,
text_sha256 BYTEA NOT NULL,
combined_fingerprint BYTEA NOT NULL,
-- Metadata
format TEXT NOT NULL DEFAULT 'elf',
architecture TEXT,
is_stripped BOOLEAN NOT NULL DEFAULT false,
-- Function extraction stats
functions_extracted INT,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Indexes for build outputs
CREATE INDEX IF NOT EXISTS idx_build_outputs_build
ON binary_index.build_outputs(build_id);
CREATE INDEX IF NOT EXISTS idx_build_outputs_build_id_hash
ON binary_index.build_outputs(build_id_hash);
CREATE INDEX IF NOT EXISTS idx_build_outputs_fingerprint
ON binary_index.build_outputs(fingerprint_id)
WHERE fingerprint_id IS NOT NULL;
-- ============================================================================
-- Comments for documentation
-- ============================================================================
COMMENT ON TABLE binary_index.function_fingerprints IS
'Per-function fingerprints for fine-grained CVE attribution. Generated from reproducible builds.';
COMMENT ON TABLE binary_index.fingerprint_claims IS
'CVE fix/vulnerability claims for fingerprints. Evidence from reproducible build diffing.';
COMMENT ON TABLE binary_index.reproducible_builds IS
'Records of reproducible builds performed for fingerprint corpus generation.';
COMMENT ON TABLE binary_index.build_outputs IS
'Binary artifacts produced by reproducible builds with their fingerprints.';
COMMENT ON COLUMN binary_index.function_fingerprints.basic_block_hash IS
'SHA-256 of normalized opcode sequences (ignoring operand values)';
COMMENT ON COLUMN binary_index.function_fingerprints.cfg_hash IS
'SHA-256 of control flow graph structure (branch patterns)';
COMMENT ON COLUMN binary_index.function_fingerprints.string_refs_hash IS
'SHA-256 of string literals referenced by the function';
COMMENT ON COLUMN binary_index.fingerprint_claims.evidence IS
'JSONB containing: patch_commit, changed_functions[], function_similarities{}, build_refs';

View File

@@ -31,21 +31,21 @@ public sealed class CorpusSnapshotRepository : ICorpusSnapshotRepository
distro,
release,
architecture,
metadata_digest,
captured_at,
snapshot_id,
repo_metadata_digest,
created_at
)
VALUES (
@Id,
binaries_app.current_tenant()::uuid,
binaries_app.require_current_tenant()::uuid,
@Distro,
@Release,
@Architecture,
@SnapshotId,
@MetadataDigest,
@CapturedAt,
NOW()
)
RETURNING id, distro, release, architecture, metadata_digest, captured_at
RETURNING id, distro, release, architecture, repo_metadata_digest AS metadata_digest, created_at AS captured_at
""";
var row = await conn.QuerySingleAsync<CorpusSnapshotRow>(sql, new
@@ -54,8 +54,8 @@ public sealed class CorpusSnapshotRepository : ICorpusSnapshotRepository
snapshot.Distro,
snapshot.Release,
snapshot.Architecture,
snapshot.MetadataDigest,
snapshot.CapturedAt
SnapshotId = $"{snapshot.Distro}_{snapshot.Release}_{snapshot.Architecture}_{snapshot.CapturedAt:yyyyMMddHHmmss}",
snapshot.MetadataDigest
});
_logger.LogInformation(
@@ -74,12 +74,14 @@ public sealed class CorpusSnapshotRepository : ICorpusSnapshotRepository
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, distro, release, architecture, metadata_digest, captured_at
SELECT id, distro, release, architecture,
repo_metadata_digest AS metadata_digest,
created_at AS captured_at
FROM binaries.corpus_snapshots
WHERE distro = @Distro
AND release = @Release
AND architecture = @Architecture
ORDER BY captured_at DESC
ORDER BY created_at DESC
LIMIT 1
""";
@@ -98,7 +100,9 @@ public sealed class CorpusSnapshotRepository : ICorpusSnapshotRepository
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, distro, release, architecture, metadata_digest, captured_at
SELECT id, distro, release, architecture,
repo_metadata_digest AS metadata_digest,
created_at AS captured_at
FROM binaries.corpus_snapshots
WHERE id = @Id
""";

View File

@@ -8,9 +8,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Npgsql" Version="9.0.2" />
<PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Npgsql" />
<PackageReference Include="Dapper" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<ItemGroup>

View File

@@ -0,0 +1,122 @@
using System.Text.Json.Nodes;
namespace StellaOps.BinaryIndex.VexBridge;
/// <summary>
/// Constants and schema definitions for binary match evidence in VEX observations.
/// </summary>
public static class BinaryMatchEvidenceSchema
{
/// <summary>Evidence type identifier for binary fingerprint matches.</summary>
public const string EvidenceType = "binary_fingerprint_match";
/// <summary>Schema version for evidence payloads.</summary>
public const string SchemaVersion = "1.0";
/// <summary>Evidence field names.</summary>
public static class Fields
{
public const string Type = "type";
public const string SchemaVersion = "schema_version";
public const string MatchType = "match_type";
public const string BuildId = "build_id";
public const string FileSha256 = "file_sha256";
public const string TextSha256 = "text_sha256";
public const string FingerprintAlgorithm = "fingerprint_algorithm";
public const string Similarity = "similarity";
public const string DistroRelease = "distro_release";
public const string SourcePackage = "source_package";
public const string FixedVersion = "fixed_version";
public const string FixMethod = "fix_method";
public const string FixConfidence = "fix_confidence";
public const string EvidenceRef = "evidence_ref";
public const string MatchedFunction = "matched_function";
public const string BinaryKey = "binary_key";
public const string Architecture = "architecture";
public const string ResolvedAt = "resolved_at";
}
/// <summary>Match type values.</summary>
public static class MatchTypes
{
public const string BuildId = "build_id";
public const string Fingerprint = "fingerprint";
public const string HashExact = "hash_exact";
}
/// <summary>
/// Creates an evidence JSON object from the provided parameters.
/// </summary>
public static JsonObject CreateEvidence(
string matchType,
string? buildId = null,
string? fileSha256 = null,
string? textSha256 = null,
string? fingerprintAlgorithm = null,
decimal? similarity = null,
string? distroRelease = null,
string? sourcePackage = null,
string? fixedVersion = null,
string? fixMethod = null,
decimal? fixConfidence = null,
string? evidenceRef = null,
string? matchedFunction = null,
string? binaryKey = null,
string? architecture = null,
DateTimeOffset? resolvedAt = null)
{
var evidence = new JsonObject
{
[Fields.Type] = EvidenceType,
[Fields.SchemaVersion] = SchemaVersion,
[Fields.MatchType] = matchType
};
if (!string.IsNullOrWhiteSpace(buildId))
evidence[Fields.BuildId] = buildId;
if (!string.IsNullOrWhiteSpace(fileSha256))
evidence[Fields.FileSha256] = fileSha256;
if (!string.IsNullOrWhiteSpace(textSha256))
evidence[Fields.TextSha256] = textSha256;
if (!string.IsNullOrWhiteSpace(fingerprintAlgorithm))
evidence[Fields.FingerprintAlgorithm] = fingerprintAlgorithm;
if (similarity.HasValue)
evidence[Fields.Similarity] = similarity.Value;
if (!string.IsNullOrWhiteSpace(distroRelease))
evidence[Fields.DistroRelease] = distroRelease;
if (!string.IsNullOrWhiteSpace(sourcePackage))
evidence[Fields.SourcePackage] = sourcePackage;
if (!string.IsNullOrWhiteSpace(fixedVersion))
evidence[Fields.FixedVersion] = fixedVersion;
if (!string.IsNullOrWhiteSpace(fixMethod))
evidence[Fields.FixMethod] = fixMethod;
if (fixConfidence.HasValue)
evidence[Fields.FixConfidence] = fixConfidence.Value;
if (!string.IsNullOrWhiteSpace(evidenceRef))
evidence[Fields.EvidenceRef] = evidenceRef;
if (!string.IsNullOrWhiteSpace(matchedFunction))
evidence[Fields.MatchedFunction] = matchedFunction;
if (!string.IsNullOrWhiteSpace(binaryKey))
evidence[Fields.BinaryKey] = binaryKey;
if (!string.IsNullOrWhiteSpace(architecture))
evidence[Fields.Architecture] = architecture;
if (resolvedAt.HasValue)
evidence[Fields.ResolvedAt] = resolvedAt.Value.ToString("O");
return evidence;
}
}

View File

@@ -0,0 +1,59 @@
// -----------------------------------------------------------------------------
// IDsseSigningAdapter.cs
// Sprint: SPRINT_1227_0001_0001_LB_binary_vex_generator
// Task: T5 — DSSE signing integration
// -----------------------------------------------------------------------------
namespace StellaOps.BinaryIndex.VexBridge;
/// <summary>
/// Adapter interface for DSSE signing operations.
/// Abstracts the Attestor signing service for VexBridge use.
/// </summary>
public interface IDsseSigningAdapter
{
/// <summary>
/// Sign a payload and return a DSSE envelope.
/// </summary>
/// <param name="payload">The payload bytes to sign.</param>
/// <param name="payloadType">The DSSE payload type URI.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>DSSE envelope as JSON bytes.</returns>
Task<byte[]> SignAsync(byte[] payload, string payloadType, CancellationToken ct = default);
/// <summary>
/// Verify a DSSE envelope signature.
/// </summary>
/// <param name="envelope">The DSSE envelope bytes.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if signature is valid.</returns>
Task<bool> VerifyAsync(byte[] envelope, CancellationToken ct = default);
/// <summary>
/// Get the key ID used for signing.
/// </summary>
string SigningKeyId { get; }
/// <summary>
/// Check if signing is available.
/// </summary>
bool IsAvailable { get; }
}
/// <summary>
/// DSSE envelope result with metadata.
/// </summary>
public sealed record DsseEnvelopeResult
{
/// <summary>The DSSE envelope as JSON string.</summary>
public required string Envelope { get; init; }
/// <summary>The signing key ID used.</summary>
public required string KeyId { get; init; }
/// <summary>SHA-256 hash of the envelope.</summary>
public required string EnvelopeHash { get; init; }
/// <summary>Timestamp when signed.</summary>
public required DateTimeOffset SignedAt { get; init; }
}

View File

@@ -0,0 +1,97 @@
using System.Collections.Immutable;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.Core.Services;
using StellaOps.Excititor.Core.Observations;
namespace StellaOps.BinaryIndex.VexBridge;
/// <summary>
/// Generates VEX observations from binary vulnerability match results.
/// Bridges the gap between binary fingerprint analysis and VEX decision flow.
/// </summary>
public interface IVexEvidenceGenerator
{
/// <summary>
/// Generate a VEX observation from a binary vulnerability match.
/// </summary>
/// <param name="match">The binary vulnerability match result.</param>
/// <param name="identity">The binary identity being analyzed.</param>
/// <param name="fixStatus">Optional fix status from the fix index.</param>
/// <param name="context">Generation context with tenant and scan metadata.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>A VEX observation ready for Excititor persistence.</returns>
Task<VexObservation> GenerateFromBinaryMatchAsync(
BinaryVulnMatch match,
BinaryIdentity identity,
FixStatusResult? fixStatus,
VexGenerationContext context,
CancellationToken ct = default);
/// <summary>
/// Batch generation of VEX observations for scan performance.
/// </summary>
/// <param name="matches">Collection of matches with their context.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of VEX observations in deterministic order.</returns>
Task<IReadOnlyList<VexObservation>> GenerateBatchAsync(
IEnumerable<BinaryMatchWithContext> matches,
CancellationToken ct = default);
/// <summary>
/// Generate observation ID deterministically for replay/idempotency.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="cveId">CVE identifier.</param>
/// <param name="productKey">PURL or product key.</param>
/// <param name="scanId">Scan identifier.</param>
/// <returns>Deterministic UUID5-based observation ID.</returns>
string GenerateObservationId(string tenantId, string cveId, string productKey, string scanId);
}
/// <summary>
/// Context for VEX observation generation.
/// </summary>
public sealed record VexGenerationContext
{
/// <summary>Tenant identifier.</summary>
public required string TenantId { get; init; }
/// <summary>Scan identifier for traceability.</summary>
public required string ScanId { get; init; }
/// <summary>Product key, typically a PURL.</summary>
public required string ProductKey { get; init; }
/// <summary>Optional distro release identifier (e.g., "debian:bookworm").</summary>
public string? DistroRelease { get; init; }
/// <summary>Whether to sign the observation with DSSE. Default true.</summary>
public bool SignWithDsse { get; init; } = true;
/// <summary>Provider ID for the VEX observation. Defaults to "stellaops.binaryindex".</summary>
public string ProviderId { get; init; } = "stellaops.binaryindex";
/// <summary>Stream ID for the VEX observation. Defaults to "binary_resolution".</summary>
public string StreamId { get; init; } = "binary_resolution";
/// <summary>Optional version for the resolution evidence.</summary>
public string? EvidenceVersion { get; init; }
}
/// <summary>
/// Wrapper for a binary match with its full context.
/// </summary>
public sealed record BinaryMatchWithContext
{
/// <summary>The binary vulnerability match.</summary>
public required BinaryVulnMatch Match { get; init; }
/// <summary>The binary identity being analyzed.</summary>
public required BinaryIdentity Identity { get; init; }
/// <summary>Optional fix status from the fix index.</summary>
public FixStatusResult? FixStatus { get; init; }
/// <summary>Generation context.</summary>
public required VexGenerationContext Context { get; init; }
}

View File

@@ -0,0 +1,50 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
namespace StellaOps.BinaryIndex.VexBridge;
/// <summary>
/// Extension methods for registering VexBridge services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds VEX Bridge services for converting binary matches to VEX observations.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configuration">Configuration containing VexBridge section.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddBinaryVexBridge(
this IServiceCollection services,
IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.Configure<VexBridgeOptions>(
configuration.GetSection(VexBridgeOptions.SectionName));
services.AddSingleton<IVexEvidenceGenerator, VexEvidenceGenerator>();
return services;
}
/// <summary>
/// Adds VEX Bridge services with custom options configuration.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configureOptions">Action to configure options.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddBinaryVexBridge(
this IServiceCollection services,
Action<VexBridgeOptions> configureOptions)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configureOptions);
services.Configure(configureOptions);
services.AddSingleton<IVexEvidenceGenerator, VexEvidenceGenerator>();
return services;
}
}

View File

@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<Description>Bridges binary fingerprint matching to VEX observation generation for StellaOps.</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="../StellaOps.BinaryIndex.FixIndex/StellaOps.BinaryIndex.FixIndex.csproj" />
<ProjectReference Include="../../../Excititor/__Libraries/StellaOps.Excititor.Core/StellaOps.Excititor.Core.csproj" />
<ProjectReference Include="../../../Attestor/StellaOps.Attestor.Envelope/StellaOps.Attestor.Envelope.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,54 @@
namespace StellaOps.BinaryIndex.VexBridge;
/// <summary>
/// Configuration options for the VEX Bridge.
/// </summary>
public sealed class VexBridgeOptions
{
/// <summary>Configuration section name.</summary>
public const string SectionName = "VexBridge";
/// <summary>
/// Whether to sign generated VEX observations with DSSE.
/// Default: true
/// </summary>
public bool SignWithDsse { get; set; } = true;
/// <summary>
/// Key ID to use for DSSE signing.
/// If null, uses the default attestor key.
/// </summary>
public string? DsseKeyId { get; set; }
/// <summary>
/// Default provider ID for generated observations.
/// </summary>
public string DefaultProviderId { get; set; } = "stellaops.binaryindex";
/// <summary>
/// Default stream ID for generated observations.
/// </summary>
public string DefaultStreamId { get; set; } = "binary_resolution";
/// <summary>
/// Minimum confidence threshold for creating observations.
/// Matches below this threshold will be skipped.
/// </summary>
public decimal MinConfidenceThreshold { get; set; } = 0.70m;
/// <summary>
/// Whether to include function-level evidence when available.
/// </summary>
public bool IncludeFunctionEvidence { get; set; } = true;
/// <summary>
/// Maximum number of observations to generate in a single batch.
/// </summary>
public int MaxBatchSize { get; set; } = 1000;
/// <summary>
/// Namespace UUID for generating deterministic observation IDs.
/// Default: StellaOps BinaryIndex namespace.
/// </summary>
public Guid ObservationIdNamespace { get; set; } = new("d9e0a5f3-7b2c-4e8d-9a1f-6c3b5d8e2f0a");
}

View File

@@ -0,0 +1,468 @@
using System.Collections.Immutable;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.Json.Nodes;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.Core.Services;
using StellaOps.Excititor.Core;
using StellaOps.Excititor.Core.Observations;
namespace StellaOps.BinaryIndex.VexBridge;
/// <summary>
/// Generates VEX observations from binary vulnerability matches.
/// Maps FixState to VexClaimStatus with appropriate justifications.
/// Supports optional DSSE signing for attestable proofs.
/// </summary>
public sealed class VexEvidenceGenerator : IVexEvidenceGenerator
{
private readonly ILogger<VexEvidenceGenerator> _logger;
private readonly VexBridgeOptions _options;
private readonly IDsseSigningAdapter? _dsseSigner;
public VexEvidenceGenerator(
ILogger<VexEvidenceGenerator> logger,
IOptions<VexBridgeOptions> options,
IDsseSigningAdapter? dsseSigner = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_dsseSigner = dsseSigner;
}
/// <inheritdoc />
public async Task<VexObservation> GenerateFromBinaryMatchAsync(
BinaryVulnMatch match,
BinaryIdentity identity,
FixStatusResult? fixStatus,
VexGenerationContext context,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(match);
ArgumentNullException.ThrowIfNull(identity);
ArgumentNullException.ThrowIfNull(context);
ct.ThrowIfCancellationRequested();
// Check confidence threshold
var effectiveConfidence = fixStatus?.Confidence ?? match.Confidence;
if (effectiveConfidence < _options.MinConfidenceThreshold)
{
_logger.LogDebug(
"Skipping observation for {CveId}: confidence {Confidence} below threshold {Threshold}",
match.CveId, effectiveConfidence, _options.MinConfidenceThreshold);
throw new InvalidOperationException(
$"Match confidence {effectiveConfidence} is below minimum threshold {_options.MinConfidenceThreshold}");
}
var observation = await CreateObservationAsync(match, identity, fixStatus, context, ct);
return observation;
}
/// <inheritdoc />
public async Task<IReadOnlyList<VexObservation>> GenerateBatchAsync(
IEnumerable<BinaryMatchWithContext> matches,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(matches);
var results = new List<VexObservation>();
var batchItems = matches.ToList();
if (batchItems.Count > _options.MaxBatchSize)
{
_logger.LogWarning(
"Batch size {Count} exceeds maximum {Max}, truncating",
batchItems.Count, _options.MaxBatchSize);
batchItems = batchItems.Take(_options.MaxBatchSize).ToList();
}
foreach (var item in batchItems)
{
ct.ThrowIfCancellationRequested();
try
{
var observation = await GenerateFromBinaryMatchAsync(
item.Match,
item.Identity,
item.FixStatus,
item.Context,
ct);
results.Add(observation);
}
catch (InvalidOperationException ex) when (ex.Message.Contains("below minimum threshold"))
{
// Skip items below threshold, continue with batch
_logger.LogDebug("Skipping batch item: {Message}", ex.Message);
}
}
// Return in deterministic order (by observation ID)
return results.OrderBy(o => o.ObservationId, StringComparer.Ordinal).ToList();
}
/// <inheritdoc />
public string GenerateObservationId(string tenantId, string cveId, string productKey, string scanId)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(cveId);
ArgumentException.ThrowIfNullOrWhiteSpace(productKey);
ArgumentException.ThrowIfNullOrWhiteSpace(scanId);
// UUID5 generation: namespace + name
var name = $"{tenantId.ToLowerInvariant()}:{cveId.ToUpperInvariant()}:{productKey}:{scanId}";
return GenerateUuid5(_options.ObservationIdNamespace, name).ToString();
}
private async Task<VexObservation> CreateObservationAsync(
BinaryVulnMatch match,
BinaryIdentity identity,
FixStatusResult? fixStatus,
VexGenerationContext context,
CancellationToken ct)
{
var observationId = GenerateObservationId(
context.TenantId,
match.CveId,
context.ProductKey,
context.ScanId);
var now = DateTimeOffset.UtcNow;
// Map fix status to VEX status and justification
var (vexStatus, justification) = MapToVexStatus(fixStatus);
// Create evidence JSON
var evidence = CreateEvidencePayload(match, identity, fixStatus, context, now);
// Create upstream metadata with optional DSSE signing
var upstream = await CreateUpstreamAsync(observationId, evidence, now, context.SignWithDsse, ct);
// Create statement
var statement = CreateStatement(match, context, vexStatus, justification, fixStatus);
// Create content
var content = CreateContent(evidence);
// Create linkset
var linkset = CreateLinkset(match, identity);
var attributes = ImmutableDictionary<string, string>.Empty
.Add("generator", "StellaOps.BinaryIndex.VexBridge")
.Add("generator_version", "1.0.0")
.Add("scan_id", context.ScanId);
// Add DSSE signature info to attributes if signed
if (context.SignWithDsse && upstream.Signature.Present)
{
attributes = attributes
.Add("dsse_signed", "true")
.Add("dsse_key_id", upstream.Signature.KeyId ?? "unknown");
}
return new VexObservation(
observationId: observationId,
tenant: context.TenantId,
providerId: context.ProviderId,
streamId: context.StreamId,
upstream: upstream,
statements: ImmutableArray.Create(statement),
content: content,
linkset: linkset,
createdAt: now,
attributes: attributes);
}
private static (VexClaimStatus Status, VexJustification? Justification) MapToVexStatus(FixStatusResult? fixStatus)
{
if (fixStatus is null)
{
return (VexClaimStatus.UnderInvestigation, null);
}
return fixStatus.State switch
{
FixState.Fixed => (VexClaimStatus.NotAffected, VexJustification.VulnerableCodeNotPresent),
FixState.Vulnerable => (VexClaimStatus.Affected, null),
FixState.NotAffected => (VexClaimStatus.NotAffected, VexJustification.ComponentNotPresent),
FixState.Wontfix => (VexClaimStatus.NotAffected, VexJustification.InlineMitigationsAlreadyExist),
FixState.Unknown => (VexClaimStatus.UnderInvestigation, null),
_ => (VexClaimStatus.UnderInvestigation, null)
};
}
private static JsonObject CreateEvidencePayload(
BinaryVulnMatch match,
BinaryIdentity identity,
FixStatusResult? fixStatus,
VexGenerationContext context,
DateTimeOffset resolvedAt)
{
var matchType = match.Method switch
{
MatchMethod.BuildIdCatalog => BinaryMatchEvidenceSchema.MatchTypes.BuildId,
MatchMethod.FingerprintMatch => BinaryMatchEvidenceSchema.MatchTypes.Fingerprint,
MatchMethod.RangeMatch => BinaryMatchEvidenceSchema.MatchTypes.HashExact,
_ => BinaryMatchEvidenceSchema.MatchTypes.Fingerprint
};
return BinaryMatchEvidenceSchema.CreateEvidence(
matchType: matchType,
buildId: identity.BuildId,
fileSha256: identity.FileSha256,
textSha256: identity.TextSha256,
fingerprintAlgorithm: matchType == BinaryMatchEvidenceSchema.MatchTypes.Fingerprint ? "combined" : null,
similarity: match.Evidence?.Similarity ?? match.Confidence,
distroRelease: context.DistroRelease,
sourcePackage: ExtractSourcePackage(match.VulnerablePurl),
fixedVersion: fixStatus?.FixedVersion,
fixMethod: fixStatus?.Method.ToString()?.ToLowerInvariant(),
fixConfidence: fixStatus?.Confidence,
evidenceRef: fixStatus?.EvidenceId?.ToString(),
matchedFunction: match.Evidence?.MatchedFunction,
binaryKey: identity.BinaryKey,
architecture: identity.Architecture,
resolvedAt: resolvedAt);
}
private async Task<VexObservationUpstream> CreateUpstreamAsync(
string observationId,
JsonObject evidence,
DateTimeOffset now,
bool signWithDsse,
CancellationToken ct)
{
// Compute content hash of the evidence
var evidenceJson = evidence.ToJsonString(new JsonSerializerOptions { WriteIndented = false });
var contentHash = ComputeSha256(evidenceJson);
VexObservationSignature signature;
// Sign with DSSE if requested and signer is available
if (signWithDsse && _dsseSigner is { IsAvailable: true })
{
try
{
var payloadBytes = Encoding.UTF8.GetBytes(evidenceJson);
var envelopeBytes = await _dsseSigner.SignAsync(
payloadBytes,
"application/vnd.stellaops.binary-resolution+json",
ct);
var envelopeBase64 = Convert.ToBase64String(envelopeBytes);
var envelopeHash = ComputeSha256(Encoding.UTF8.GetString(envelopeBytes));
signature = new VexObservationSignature(
present: true,
format: "dsse",
keyId: _dsseSigner.SigningKeyId,
signature: envelopeBase64);
_logger.LogDebug(
"DSSE signature generated for observation {ObservationId} with key {KeyId}",
observationId, _dsseSigner.SigningKeyId);
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Failed to generate DSSE signature for observation {ObservationId}, proceeding unsigned",
observationId);
signature = new VexObservationSignature(
present: false,
format: null,
keyId: null,
signature: null);
}
}
else
{
if (signWithDsse && _dsseSigner is null)
{
_logger.LogDebug(
"DSSE signing requested but no signer configured for observation {ObservationId}",
observationId);
}
signature = new VexObservationSignature(
present: false,
format: null,
keyId: null,
signature: null);
}
return new VexObservationUpstream(
upstreamId: $"binary:{observationId}",
documentVersion: "1.0",
fetchedAt: now,
receivedAt: now,
contentHash: contentHash,
signature: signature,
metadata: ImmutableDictionary<string, string>.Empty
.Add("source", "binary_fingerprint_analysis"));
}
private static VexObservationStatement CreateStatement(
BinaryVulnMatch match,
VexGenerationContext context,
VexClaimStatus status,
VexJustification? justification,
FixStatusResult? fixStatus)
{
var detail = BuildStatementDetail(match, fixStatus);
return new VexObservationStatement(
vulnerabilityId: match.CveId,
productKey: context.ProductKey,
status: status,
lastObserved: DateTimeOffset.UtcNow,
locator: null,
justification: justification,
introducedVersion: null,
fixedVersion: fixStatus?.FixedVersion,
purl: match.VulnerablePurl,
cpe: null,
evidence: null,
metadata: ImmutableDictionary<string, string>.Empty
.Add("impact_statement", detail));
}
private static string BuildStatementDetail(BinaryVulnMatch match, FixStatusResult? fixStatus)
{
var sb = new StringBuilder();
if (fixStatus is { State: FixState.Fixed })
{
sb.Append($"Binary fingerprint analysis indicates this binary contains the patched version.");
if (!string.IsNullOrEmpty(fixStatus.FixedVersion))
{
sb.Append($" Fixed in version: {fixStatus.FixedVersion}.");
}
}
else if (fixStatus is { State: FixState.Vulnerable })
{
sb.Append("Binary fingerprint analysis indicates this binary contains vulnerable code.");
}
else
{
sb.Append($"Binary fingerprint match with confidence {match.Confidence:P0}.");
}
return sb.ToString();
}
private static VexObservationContent CreateContent(JsonObject evidence)
{
return new VexObservationContent(
format: "application/json",
specVersion: "1.0",
raw: evidence);
}
private static VexObservationLinkset CreateLinkset(
BinaryVulnMatch match,
BinaryIdentity identity)
{
var refs = new List<VexObservationReference>
{
new(type: "vulnerability", url: $"https://nvd.nist.gov/vuln/detail/{match.CveId}"),
new(type: "package", url: match.VulnerablePurl)
};
if (!string.IsNullOrEmpty(identity.BuildId))
{
refs.Add(new(type: "build_id", url: $"urn:build-id:{identity.BuildId}"));
}
return new VexObservationLinkset(
aliases: ImmutableArray.Create(match.CveId),
purls: ImmutableArray.Create(match.VulnerablePurl),
cpes: null,
references: refs);
}
private static string? ExtractSourcePackage(string purl)
{
// Simple extraction from PURL: pkg:deb/debian/openssl@3.0.7 → openssl
if (string.IsNullOrEmpty(purl))
return null;
try
{
var parts = purl.Split('/');
if (parts.Length >= 3)
{
var nameVersion = parts[^1];
var atIndex = nameVersion.IndexOf('@');
return atIndex > 0 ? nameVersion[..atIndex] : nameVersion;
}
}
catch
{
// Ignore parsing errors
}
return null;
}
private static string ComputeSha256(string content)
{
var bytes = Encoding.UTF8.GetBytes(content);
var hash = SHA256.HashData(bytes);
return $"sha256:{Convert.ToHexStringLower(hash)}";
}
/// <summary>
/// Generate a UUID v5 (name-based, SHA-1) from namespace and name.
/// </summary>
private static Guid GenerateUuid5(Guid namespaceId, string name)
{
// Convert namespace GUID to bytes (big-endian format for UUID)
var namespaceBytes = namespaceId.ToByteArray();
// Swap bytes for big-endian (UUID format)
SwapGuidBytesForBigEndian(namespaceBytes);
var nameBytes = Encoding.UTF8.GetBytes(name);
// Concatenate namespace + name
var combined = new byte[namespaceBytes.Length + nameBytes.Length];
Buffer.BlockCopy(namespaceBytes, 0, combined, 0, namespaceBytes.Length);
Buffer.BlockCopy(nameBytes, 0, combined, namespaceBytes.Length, nameBytes.Length);
// Hash with SHA-1
var hash = SHA1.HashData(combined);
// Take first 16 bytes
var guidBytes = new byte[16];
Array.Copy(hash, guidBytes, 16);
// Set version (5) and variant (RFC 4122)
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x50); // Version 5
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80); // Variant RFC 4122
// Swap back to little-endian for .NET Guid
SwapGuidBytesForBigEndian(guidBytes);
return new Guid(guidBytes);
}
private static void SwapGuidBytesForBigEndian(byte[] bytes)
{
// Swap first 4 bytes
(bytes[0], bytes[3]) = (bytes[3], bytes[0]);
(bytes[1], bytes[2]) = (bytes[2], bytes[1]);
// Swap bytes 4-5
(bytes[4], bytes[5]) = (bytes[5], bytes[4]);
// Swap bytes 6-7
(bytes[6], bytes[7]) = (bytes[7], bytes[6]);
}
}