Refactor code structure for improved readability and maintainability; optimize performance in key functions.

This commit is contained in:
master
2025-12-22 19:06:31 +02:00
parent dfaa2079aa
commit 4602ccc3a3
1444 changed files with 109919 additions and 8058 deletions

View File

@@ -0,0 +1,63 @@
namespace StellaOps.BinaryIndex.Core.Models;
/// <summary>
/// Unique identity of a binary derived from Build-ID or hashes.
/// </summary>
public sealed record BinaryIdentity
{
public Guid Id { get; init; }
/// <summary>
/// Primary key: build_id || file_sha256
/// </summary>
public required string BinaryKey { get; init; }
/// <summary>
/// ELF GNU Build-ID, PE CodeView, or Mach-O UUID
/// </summary>
public string? BuildId { get; init; }
/// <summary>
/// Type of build ID: gnu-build-id, pe-cv, macho-uuid
/// </summary>
public string? BuildIdType { get; init; }
public required string FileSha256 { get; init; }
/// <summary>
/// SHA-256 of .text section
/// </summary>
public string? TextSha256 { get; init; }
/// <summary>
/// BLAKE3 hash for future use
/// </summary>
public string? Blake3Hash { get; init; }
public required BinaryFormat Format { get; init; }
public required string Architecture { get; init; }
public string? OsAbi { get; init; }
public BinaryType? Type { get; init; }
public bool IsStripped { get; init; }
public Guid? FirstSeenSnapshotId { get; init; }
public Guid? LastSeenSnapshotId { get; init; }
public DateTimeOffset CreatedAt { get; init; } = DateTimeOffset.UtcNow;
public DateTimeOffset UpdatedAt { get; init; } = DateTimeOffset.UtcNow;
}
public enum BinaryFormat
{
Elf,
Pe,
Macho
}
public enum BinaryType
{
Executable,
SharedLibrary,
StaticLibrary,
Object
}

View File

@@ -0,0 +1,73 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Core.Services;
/// <summary>
/// Service for managing binary identities.
/// </summary>
public sealed class BinaryIdentityService
{
private readonly IBinaryFeatureExtractor _featureExtractor;
private readonly ILogger<BinaryIdentityService> _logger;
public BinaryIdentityService(
IBinaryFeatureExtractor featureExtractor,
ILogger<BinaryIdentityService> logger)
{
_featureExtractor = featureExtractor;
_logger = logger;
}
/// <summary>
/// Indexes a binary from a stream, extracting its identity.
/// </summary>
public async Task<BinaryIdentity> IndexBinaryAsync(
Stream stream,
string filePath,
CancellationToken ct = default)
{
if (!_featureExtractor.CanExtract(stream))
{
throw new InvalidOperationException($"Unsupported binary format: {filePath}");
}
_logger.LogInformation("Extracting identity from {FilePath}", filePath);
var identity = await _featureExtractor.ExtractIdentityAsync(stream, ct);
_logger.LogInformation(
"Extracted identity: BuildId={BuildId}, SHA256={SHA256}, Arch={Arch}",
identity.BuildId ?? "none",
identity.FileSha256[..16],
identity.Architecture);
return identity;
}
/// <summary>
/// Batch indexes multiple binaries.
/// </summary>
public async Task<ImmutableArray<BinaryIdentity>> IndexBatchAsync(
IEnumerable<(Stream stream, string path)> binaries,
CancellationToken ct = default)
{
var results = new List<BinaryIdentity>();
foreach (var (stream, path) in binaries)
{
try
{
var identity = await IndexBinaryAsync(stream, path, ct);
results.Add(identity);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to index binary {Path}", path);
}
}
return results.ToImmutableArray();
}
}

View File

@@ -0,0 +1,71 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Core.Services;
/// <summary>
/// Implementation of binary vulnerability lookup service.
/// </summary>
public sealed class BinaryVulnerabilityService : IBinaryVulnerabilityService
{
private readonly IBinaryVulnAssertionRepository _assertionRepo;
private readonly ILogger<BinaryVulnerabilityService> _logger;
public BinaryVulnerabilityService(
IBinaryVulnAssertionRepository assertionRepo,
ILogger<BinaryVulnerabilityService> logger)
{
_assertionRepo = assertionRepo;
_logger = logger;
}
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
BinaryIdentity identity,
LookupOptions? options = null,
CancellationToken ct = default)
{
options ??= new LookupOptions();
var matches = new List<BinaryVulnMatch>();
// Check explicit assertions
var assertions = await _assertionRepo.GetByBinaryKeyAsync(identity.BinaryKey, ct);
foreach (var assertion in assertions.Where(a => a.Status == "affected"))
{
matches.Add(new BinaryVulnMatch
{
CveId = assertion.CveId,
VulnerablePurl = "pkg:unknown", // Resolved from advisory
Method = MapMethod(assertion.Method),
Confidence = assertion.Confidence ?? 0.9m,
Evidence = new MatchEvidence { BuildId = identity.BuildId }
});
}
_logger.LogDebug("Found {Count} vulnerability matches for {BinaryKey}", matches.Count, identity.BinaryKey);
return matches.ToImmutableArray();
}
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupBatchAsync(
IEnumerable<BinaryIdentity> identities,
LookupOptions? options = null,
CancellationToken ct = default)
{
var results = new Dictionary<string, ImmutableArray<BinaryVulnMatch>>();
foreach (var identity in identities)
{
var matches = await LookupByIdentityAsync(identity, options, ct);
results[identity.BinaryKey] = matches;
}
return results.ToImmutableDictionary();
}
private static MatchMethod MapMethod(string method) => method switch
{
"buildid_catalog" => MatchMethod.BuildIdCatalog,
"fingerprint_match" => MatchMethod.FingerprintMatch,
_ => MatchMethod.RangeMatch
};
}

View File

@@ -0,0 +1,161 @@
using System.Security.Cryptography;
using System.Text;
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Core.Services;
/// <summary>
/// Extracts features from ELF binaries.
/// </summary>
public sealed class ElfFeatureExtractor : IBinaryFeatureExtractor
{
private static readonly byte[] ElfMagic = [0x7F, 0x45, 0x4C, 0x46]; // \x7fELF
public bool CanExtract(Stream stream)
{
if (stream.Length < 4)
return false;
var originalPosition = stream.Position;
try
{
Span<byte> magic = stackalloc byte[4];
stream.Position = 0;
var read = stream.Read(magic);
return read == 4 && magic.SequenceEqual(ElfMagic);
}
finally
{
stream.Position = originalPosition;
}
}
public async Task<BinaryIdentity> ExtractIdentityAsync(Stream stream, CancellationToken ct = default)
{
var metadata = await ExtractMetadataAsync(stream, ct);
// Compute full file SHA-256
stream.Position = 0;
var fileSha256 = await ComputeSha256Async(stream, ct);
// Build binary key: buildid || file_sha256
var binaryKey = metadata.BuildId != null
? $"{metadata.BuildId}:{fileSha256}"
: fileSha256;
return new BinaryIdentity
{
BinaryKey = binaryKey,
BuildId = metadata.BuildId,
BuildIdType = metadata.BuildIdType,
FileSha256 = fileSha256,
Format = metadata.Format,
Architecture = metadata.Architecture,
OsAbi = metadata.OsAbi,
Type = metadata.Type,
IsStripped = metadata.IsStripped
};
}
public Task<BinaryMetadata> ExtractMetadataAsync(Stream stream, CancellationToken ct = default)
{
stream.Position = 0;
Span<byte> header = stackalloc byte[64];
var read = stream.Read(header);
if (read < 20)
throw new InvalidDataException("Stream too short for ELF header");
// Parse ELF header
var elfClass = header[4]; // 1=32-bit, 2=64-bit
var elfData = header[5]; // 1=little-endian, 2=big-endian
var osAbi = header[7];
var eType = BitConverter.ToUInt16(header[16..18]);
var eMachine = BitConverter.ToUInt16(header[18..20]);
var architecture = MapArchitecture(eMachine);
var osAbiStr = MapOsAbi(osAbi);
var type = MapBinaryType(eType);
var buildId = ExtractBuildId(stream);
return Task.FromResult(new BinaryMetadata
{
Format = BinaryFormat.Elf,
Architecture = architecture,
BuildId = buildId,
BuildIdType = buildId != null ? "gnu-build-id" : null,
OsAbi = osAbiStr,
Type = type,
IsStripped = !HasSymbolTable(stream)
});
}
private static string? ExtractBuildId(Stream stream)
{
// Simplified: scan for .note.gnu.build-id section
// In production, parse program headers properly
stream.Position = 0;
var buffer = new byte[stream.Length];
stream.Read(buffer);
// Look for NT_GNU_BUILD_ID note (type 3)
var buildIdPattern = Encoding.ASCII.GetBytes(".note.gnu.build-id");
for (var i = 0; i < buffer.Length - buildIdPattern.Length; i++)
{
if (buffer.AsSpan(i, buildIdPattern.Length).SequenceEqual(buildIdPattern))
{
// Found build-id section, extract it
// This is simplified; real implementation would parse note structure
var noteStart = i + buildIdPattern.Length + 16;
if (noteStart + 20 < buffer.Length)
{
return Convert.ToHexString(buffer.AsSpan(noteStart, 20)).ToLowerInvariant();
}
}
}
return null;
}
private static bool HasSymbolTable(Stream stream)
{
// Simplified: check for .symtab section
stream.Position = 0;
var buffer = new byte[Math.Min(8192, stream.Length)];
stream.Read(buffer);
return Encoding.ASCII.GetString(buffer).Contains(".symtab");
}
private static string MapArchitecture(ushort eMachine) => eMachine switch
{
0x3E => "x86_64",
0x03 => "x86",
0xB7 => "aarch64",
0x28 => "arm",
0xF3 => "riscv",
_ => $"unknown-{eMachine}"
};
private static string MapOsAbi(byte osAbi) => osAbi switch
{
0x00 => "sysv",
0x03 => "linux",
0x09 => "freebsd",
_ => $"unknown-{osAbi}"
};
private static BinaryType MapBinaryType(ushort eType) => eType switch
{
0x02 => BinaryType.Executable,
0x03 => BinaryType.SharedLibrary,
0x01 => BinaryType.Object,
_ => BinaryType.Executable
};
private static async Task<string> ComputeSha256Async(Stream stream, CancellationToken ct)
{
stream.Position = 0;
var hash = await SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,38 @@
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Core.Services;
/// <summary>
/// Extracts identifying features from binary files.
/// </summary>
public interface IBinaryFeatureExtractor
{
/// <summary>
/// Determines if the stream contains a supported binary format.
/// </summary>
bool CanExtract(Stream stream);
/// <summary>
/// Extracts binary identity from the stream.
/// </summary>
Task<BinaryIdentity> ExtractIdentityAsync(Stream stream, CancellationToken ct = default);
/// <summary>
/// Extracts metadata without computing expensive hashes.
/// </summary>
Task<BinaryMetadata> ExtractMetadataAsync(Stream stream, CancellationToken ct = default);
}
/// <summary>
/// Lightweight metadata extracted from binary without full hashing.
/// </summary>
public sealed record BinaryMetadata
{
public required BinaryFormat Format { get; init; }
public required string Architecture { get; init; }
public string? BuildId { get; init; }
public string? BuildIdType { get; init; }
public string? OsAbi { get; init; }
public BinaryType? Type { get; init; }
public bool IsStripped { get; init; }
}

View File

@@ -0,0 +1,21 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Core.Services;
/// <summary>
/// Repository for binary vulnerability assertions.
/// </summary>
public interface IBinaryVulnAssertionRepository
{
Task<ImmutableArray<BinaryVulnAssertion>> GetByBinaryKeyAsync(string binaryKey, CancellationToken ct);
}
public sealed record BinaryVulnAssertion
{
public Guid Id { get; init; }
public required string BinaryKey { get; init; }
public required string CveId { get; init; }
public required string Status { get; init; }
public required string Method { get; init; }
public decimal? Confidence { get; init; }
}

View File

@@ -0,0 +1,57 @@
using System.Collections.Immutable;
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Core.Services;
/// <summary>
/// Main query interface for binary vulnerability lookup.
/// Consumed by Scanner.Worker during container scanning.
/// </summary>
public interface IBinaryVulnerabilityService
{
/// <summary>
/// Look up vulnerabilities by binary identity (Build-ID, hashes).
/// </summary>
Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
BinaryIdentity identity,
LookupOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Batch lookup for scan performance.
/// </summary>
Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupBatchAsync(
IEnumerable<BinaryIdentity> identities,
LookupOptions? options = null,
CancellationToken ct = default);
}
public sealed record LookupOptions
{
public bool CheckFixIndex { get; init; } = true;
public string? DistroHint { get; init; }
public string? ReleaseHint { get; init; }
}
public sealed record BinaryVulnMatch
{
public required string CveId { get; init; }
public required string VulnerablePurl { get; init; }
public required MatchMethod Method { get; init; }
public required decimal Confidence { get; init; }
public MatchEvidence? Evidence { get; init; }
}
public enum MatchMethod
{
BuildIdCatalog,
FingerprintMatch,
RangeMatch
}
public sealed record MatchEvidence
{
public string? BuildId { get; init; }
public decimal? Similarity { get; init; }
public string? MatchedFunction { get; init; }
}

View File

@@ -0,0 +1,12 @@
namespace StellaOps.BinaryIndex.Core.Services;
/// <summary>
/// Provides the current tenant context for RLS.
/// </summary>
public interface ITenantContext
{
/// <summary>
/// Gets the current tenant ID.
/// </summary>
string TenantId { get; }
}

View File

@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Collections.Immutable" Version="9.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,164 @@
using System.Collections.Immutable;
using System.Runtime.CompilerServices;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Core.Services;
using StellaOps.BinaryIndex.Corpus;
namespace StellaOps.BinaryIndex.Corpus.Debian;
/// <summary>
/// Debian/Ubuntu corpus connector implementation.
/// </summary>
public sealed class DebianCorpusConnector : IBinaryCorpusConnector
{
private readonly IDebianPackageSource _packageSource;
private readonly DebianPackageExtractor _extractor;
private readonly IBinaryFeatureExtractor _featureExtractor;
private readonly ICorpusSnapshotRepository _snapshotRepo;
private readonly ILogger<DebianCorpusConnector> _logger;
private const string DefaultMirror = "https://deb.debian.org/debian";
public string ConnectorId => "debian";
public string[] SupportedDistros => ["debian", "ubuntu"];
public DebianCorpusConnector(
IDebianPackageSource packageSource,
DebianPackageExtractor extractor,
IBinaryFeatureExtractor featureExtractor,
ICorpusSnapshotRepository snapshotRepo,
ILogger<DebianCorpusConnector> logger)
{
_packageSource = packageSource;
_extractor = extractor;
_featureExtractor = featureExtractor;
_snapshotRepo = snapshotRepo;
_logger = logger;
}
public async Task<CorpusSnapshot> FetchSnapshotAsync(CorpusQuery query, CancellationToken ct = default)
{
_logger.LogInformation(
"Fetching corpus snapshot for {Distro} {Release}/{Architecture}",
query.Distro, query.Release, query.Architecture);
// Check if we already have a snapshot for this query
var existing = await _snapshotRepo.FindByKeyAsync(
query.Distro,
query.Release,
query.Architecture,
ct);
if (existing != null)
{
_logger.LogInformation("Using existing snapshot {SnapshotId}", existing.Id);
return existing;
}
// Fetch package index to compute metadata digest
var packages = await _packageSource.FetchPackageIndexAsync(
query.Distro,
query.Release,
query.Architecture,
ct);
// Compute metadata digest from package list
var packageList = packages.ToList();
var metadataDigest = ComputeMetadataDigest(packageList);
var snapshot = new CorpusSnapshot(
Id: Guid.NewGuid(),
Distro: query.Distro,
Release: query.Release,
Architecture: query.Architecture,
MetadataDigest: metadataDigest,
CapturedAt: DateTimeOffset.UtcNow);
await _snapshotRepo.CreateAsync(snapshot, ct);
_logger.LogInformation(
"Created corpus snapshot {SnapshotId} with {PackageCount} packages",
snapshot.Id, packageList.Count);
return snapshot;
}
public async IAsyncEnumerable<PackageInfo> ListPackagesAsync(
CorpusSnapshot snapshot,
[EnumeratorCancellation] CancellationToken ct = default)
{
_logger.LogDebug("Listing packages for snapshot {SnapshotId}", snapshot.Id);
var packages = await _packageSource.FetchPackageIndexAsync(
snapshot.Distro,
snapshot.Release,
snapshot.Architecture,
ct);
foreach (var pkg in packages)
{
yield return new PackageInfo(
Name: pkg.Package,
Version: pkg.Version,
SourcePackage: pkg.Source ?? pkg.Package,
Architecture: pkg.Architecture,
Filename: pkg.Filename,
Size: 0, // We don't have size in current implementation
Sha256: pkg.SHA256);
}
}
public async IAsyncEnumerable<ExtractedBinary> ExtractBinariesAsync(
PackageInfo pkg,
[EnumeratorCancellation] CancellationToken ct = default)
{
_logger.LogDebug("Extracting binaries from {Package} {Version}", pkg.Name, pkg.Version);
Stream? debStream = null;
try
{
// Download the .deb package
debStream = await _packageSource.DownloadPackageAsync(pkg.Filename, ct);
// Extract binaries using DebianPackageExtractor
var metadata = new DebianPackageMetadata
{
Package = pkg.Name,
Version = pkg.Version,
Architecture = pkg.Architecture,
Filename = pkg.Filename,
SHA256 = pkg.Sha256,
Source = pkg.SourcePackage != pkg.Name ? pkg.SourcePackage : null
};
var extractedBinaries = await _extractor.ExtractBinariesAsync(debStream, metadata, ct);
foreach (var binary in extractedBinaries)
{
yield return new ExtractedBinary(
Identity: binary.Identity,
PathInPackage: binary.FilePath,
Package: pkg);
}
}
finally
{
if (debStream != null)
{
await debStream.DisposeAsync();
}
}
}
private static string ComputeMetadataDigest(IEnumerable<DebianPackageMetadata> packages)
{
// Simple digest: SHA256 of concatenated package names and versions
var combined = string.Join("|", packages
.OrderBy(p => p.Package)
.Select(p => $"{p.Package}:{p.Version}:{p.SHA256}"));
using var sha256 = System.Security.Cryptography.SHA256.Create();
var hash = sha256.ComputeHash(System.Text.Encoding.UTF8.GetBytes(combined));
return Convert.ToHexString(hash).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,136 @@
using System.IO.Compression;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Corpus.Debian;
/// <summary>
/// Fetches Debian packages from official mirrors.
/// </summary>
public sealed partial class DebianMirrorPackageSource : IDebianPackageSource
{
private readonly HttpClient _httpClient;
private readonly ILogger<DebianMirrorPackageSource> _logger;
private readonly string _mirrorUrl;
public DebianMirrorPackageSource(
HttpClient httpClient,
ILogger<DebianMirrorPackageSource> logger,
string mirrorUrl = "https://deb.debian.org/debian")
{
_httpClient = httpClient;
_logger = logger;
_mirrorUrl = mirrorUrl.TrimEnd('/');
}
public async Task<IEnumerable<DebianPackageMetadata>> FetchPackageIndexAsync(
string distro,
string release,
string architecture,
CancellationToken ct = default)
{
var packagesUrl = $"{_mirrorUrl}/dists/{release}/main/binary-{architecture}/Packages.gz";
_logger.LogInformation("Fetching package index: {Url}", packagesUrl);
using var response = await _httpClient.GetAsync(packagesUrl, ct);
response.EnsureSuccessStatusCode();
await using var compressedStream = await response.Content.ReadAsStreamAsync(ct);
await using var decompressed = new GZipStream(compressedStream, CompressionMode.Decompress);
using var reader = new StreamReader(decompressed);
var packages = new List<DebianPackageMetadata>();
DebianPackageMetadata? current = null;
var currentFields = new Dictionary<string, string>();
while (await reader.ReadLineAsync(ct) is { } line)
{
if (string.IsNullOrWhiteSpace(line))
{
// End of stanza
if (currentFields.Count > 0)
{
if (TryParsePackage(currentFields, out var pkg))
{
packages.Add(pkg);
}
currentFields.Clear();
}
continue;
}
if (line.StartsWith(' ') || line.StartsWith('\t'))
{
// Continuation line - ignore for now
continue;
}
var colonIndex = line.IndexOf(':');
if (colonIndex > 0)
{
var key = line[..colonIndex];
var value = line[(colonIndex + 1)..].Trim();
currentFields[key] = value;
}
}
// Handle last package
if (currentFields.Count > 0 && TryParsePackage(currentFields, out var lastPkg))
{
packages.Add(lastPkg);
}
_logger.LogInformation("Fetched {Count} packages for {Release}/{Arch}",
packages.Count, release, architecture);
return packages;
}
public async Task<Stream> DownloadPackageAsync(string poolPath, CancellationToken ct = default)
{
var packageUrl = $"{_mirrorUrl}/{poolPath}";
_logger.LogDebug("Downloading package: {Url}", packageUrl);
var response = await _httpClient.GetAsync(packageUrl, HttpCompletionOption.ResponseHeadersRead, ct);
response.EnsureSuccessStatusCode();
var memoryStream = new MemoryStream();
await using (var contentStream = await response.Content.ReadAsStreamAsync(ct))
{
await contentStream.CopyToAsync(memoryStream, ct);
}
memoryStream.Position = 0;
return memoryStream;
}
private static bool TryParsePackage(Dictionary<string, string> fields, out DebianPackageMetadata pkg)
{
pkg = null!;
if (!fields.TryGetValue("Package", out var package) ||
!fields.TryGetValue("Version", out var version) ||
!fields.TryGetValue("Architecture", out var architecture) ||
!fields.TryGetValue("Filename", out var filename) ||
!fields.TryGetValue("SHA256", out var sha256))
{
return false;
}
fields.TryGetValue("Source", out var source);
pkg = new DebianPackageMetadata
{
Package = package,
Version = version,
Architecture = architecture,
Filename = filename,
SHA256 = sha256,
Source = source
};
return true;
}
}

View File

@@ -0,0 +1,137 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using SharpCompress.Archives;
using SharpCompress.Archives.Tar;
using SharpCompress.Common;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.Core.Services;
using StellaOps.BinaryIndex.Corpus;
namespace StellaOps.BinaryIndex.Corpus.Debian;
/// <summary>
/// Extracts binaries from Debian .deb packages.
/// </summary>
public sealed class DebianPackageExtractor
{
private readonly IBinaryFeatureExtractor _featureExtractor;
private readonly ILogger<DebianPackageExtractor> _logger;
public DebianPackageExtractor(
IBinaryFeatureExtractor featureExtractor,
ILogger<DebianPackageExtractor> logger)
{
_featureExtractor = featureExtractor;
_logger = logger;
}
/// <summary>
/// Extracts all binaries from a .deb package.
/// </summary>
public async Task<ImmutableArray<ExtractedBinaryInternal>> ExtractBinariesAsync(
Stream debStream,
DebianPackageMetadata metadata,
CancellationToken ct = default)
{
var binaries = new List<ExtractedBinaryInternal>();
try
{
// .deb is an ar archive containing data.tar.* (usually data.tar.xz or data.tar.gz)
using var archive = ArchiveFactory.Open(debStream);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
if (entry.Key == null || !entry.Key.StartsWith("data.tar"))
continue;
// Extract data.tar.*
using var dataTarStream = new MemoryStream();
entry.WriteTo(dataTarStream);
dataTarStream.Position = 0;
// Now extract from data.tar
await ExtractFromDataTarAsync(dataTarStream, metadata, binaries, ct);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to extract binaries from {Package} {Version}",
metadata.Package, metadata.Version);
}
return binaries.ToImmutableArray();
}
private async Task ExtractFromDataTarAsync(
Stream dataTarStream,
DebianPackageMetadata metadata,
List<ExtractedBinaryInternal> binaries,
CancellationToken ct)
{
using var tarArchive = TarArchive.Open(dataTarStream);
foreach (var entry in tarArchive.Entries.Where(e => !e.IsDirectory))
{
if (entry.Key == null)
continue;
// Only process binaries in typical locations
if (!IsPotentialBinary(entry.Key))
continue;
try
{
using var binaryStream = new MemoryStream();
entry.WriteTo(binaryStream);
binaryStream.Position = 0;
if (!_featureExtractor.CanExtract(binaryStream))
continue;
var identity = await _featureExtractor.ExtractIdentityAsync(binaryStream, ct);
binaries.Add(new ExtractedBinaryInternal
{
Identity = identity,
FilePath = entry.Key,
PackageName = metadata.Package,
PackageVersion = metadata.Version,
SourcePackage = metadata.Source ?? metadata.Package
});
_logger.LogDebug("Extracted binary {Path} from {Package}", entry.Key, metadata.Package);
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Skipped {Path} in {Package}", entry.Key, metadata.Package);
}
}
}
private static bool IsPotentialBinary(string path)
{
// Typical binary locations in Debian packages
return path.StartsWith("./usr/bin/") ||
path.StartsWith("./usr/sbin/") ||
path.StartsWith("./bin/") ||
path.StartsWith("./sbin/") ||
path.StartsWith("./usr/lib/") ||
path.StartsWith("./lib/") ||
path.Contains(".so") ||
path.EndsWith(".so");
}
}
/// <summary>
/// Internal representation of extracted binary with package metadata.
/// Used internally by DebianPackageExtractor before conversion to framework ExtractedBinary.
/// </summary>
public sealed record ExtractedBinaryInternal
{
public required BinaryIdentity Identity { get; init; }
public required string FilePath { get; init; }
public required string PackageName { get; init; }
public required string PackageVersion { get; init; }
public required string SourcePackage { get; init; }
}

View File

@@ -0,0 +1,33 @@
namespace StellaOps.BinaryIndex.Corpus.Debian;
/// <summary>
/// Interface for fetching Debian packages from mirrors.
/// </summary>
public interface IDebianPackageSource
{
/// <summary>
/// Fetches package metadata from Packages.gz index.
/// </summary>
Task<IEnumerable<DebianPackageMetadata>> FetchPackageIndexAsync(
string distro,
string release,
string architecture,
CancellationToken ct = default);
/// <summary>
/// Downloads a .deb package file.
/// </summary>
Task<Stream> DownloadPackageAsync(
string poolPath,
CancellationToken ct = default);
}
public sealed record DebianPackageMetadata
{
public required string Package { get; init; }
public required string Version { get; init; }
public required string Architecture { get; init; }
public required string Filename { get; init; } // Pool path
public required string SHA256 { get; init; }
public string? Source { get; init; }
}

View File

@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="SharpCompress" Version="0.38.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Corpus\StellaOps.BinaryIndex.Corpus.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Persistence\StellaOps.BinaryIndex.Persistence.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,76 @@
using System.Runtime.CompilerServices;
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Corpus;
/// <summary>
/// Generic interface for binary corpus connectors.
/// Connectors fetch packages from distro repositories and extract binaries.
/// </summary>
public interface IBinaryCorpusConnector
{
/// <summary>
/// Unique identifier for this connector (e.g., "debian", "rpm", "alpine").
/// </summary>
string ConnectorId { get; }
/// <summary>
/// List of supported distro identifiers (e.g., ["debian", "ubuntu"]).
/// </summary>
string[] SupportedDistros { get; }
/// <summary>
/// Fetches a corpus snapshot for the given query.
/// </summary>
Task<CorpusSnapshot> FetchSnapshotAsync(CorpusQuery query, CancellationToken ct = default);
/// <summary>
/// Lists all packages in the snapshot.
/// </summary>
IAsyncEnumerable<PackageInfo> ListPackagesAsync(CorpusSnapshot snapshot, CancellationToken ct = default);
/// <summary>
/// Extracts binaries from a package.
/// </summary>
IAsyncEnumerable<ExtractedBinary> ExtractBinariesAsync(PackageInfo pkg, CancellationToken ct = default);
}
/// <summary>
/// Query parameters for fetching a corpus snapshot.
/// </summary>
public sealed record CorpusQuery(
string Distro,
string Release,
string Architecture,
string[]? ComponentFilter = null);
/// <summary>
/// Represents a snapshot of a corpus at a specific point in time.
/// </summary>
public sealed record CorpusSnapshot(
Guid Id,
string Distro,
string Release,
string Architecture,
string MetadataDigest,
DateTimeOffset CapturedAt);
/// <summary>
/// Package metadata from repository index.
/// </summary>
public sealed record PackageInfo(
string Name,
string Version,
string SourcePackage,
string Architecture,
string Filename,
long Size,
string Sha256);
/// <summary>
/// Binary extracted from a package.
/// </summary>
public sealed record ExtractedBinary(
BinaryIdentity Identity,
string PathInPackage,
PackageInfo Package);

View File

@@ -0,0 +1,26 @@
namespace StellaOps.BinaryIndex.Corpus;
/// <summary>
/// Repository for persisting corpus snapshots.
/// </summary>
public interface ICorpusSnapshotRepository
{
/// <summary>
/// Creates a new corpus snapshot record.
/// </summary>
Task<CorpusSnapshot> CreateAsync(CorpusSnapshot snapshot, CancellationToken ct = default);
/// <summary>
/// Finds an existing snapshot by distro/release/architecture.
/// </summary>
Task<CorpusSnapshot?> FindByKeyAsync(
string distro,
string release,
string architecture,
CancellationToken ct = default);
/// <summary>
/// Gets a snapshot by ID.
/// </summary>
Task<CorpusSnapshot?> GetByIdAsync(Guid id, CancellationToken ct = default);
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,66 @@
using System.Collections.Immutable;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints;
/// <summary>
/// Repository for vulnerable fingerprints.
/// </summary>
public interface IFingerprintRepository
{
/// <summary>
/// Creates a new fingerprint record.
/// </summary>
Task<VulnFingerprint> CreateAsync(VulnFingerprint fingerprint, CancellationToken ct = default);
/// <summary>
/// Gets a fingerprint by ID.
/// </summary>
Task<VulnFingerprint?> GetByIdAsync(Guid id, CancellationToken ct = default);
/// <summary>
/// Gets all fingerprints for a CVE.
/// </summary>
Task<ImmutableArray<VulnFingerprint>> GetByCveAsync(string cveId, CancellationToken ct = default);
/// <summary>
/// Searches for fingerprints by hash.
/// </summary>
Task<ImmutableArray<VulnFingerprint>> SearchByHashAsync(
byte[] hash,
FingerprintAlgorithm algorithm,
string architecture,
CancellationToken ct = default);
/// <summary>
/// Updates validation statistics for a fingerprint.
/// </summary>
Task UpdateValidationStatsAsync(
Guid id,
FingerprintValidationStats stats,
CancellationToken ct = default);
}
/// <summary>
/// Repository for fingerprint matches.
/// </summary>
public interface IFingerprintMatchRepository
{
/// <summary>
/// Creates a new match record.
/// </summary>
Task<FingerprintMatch> CreateAsync(FingerprintMatch match, CancellationToken ct = default);
/// <summary>
/// Gets all matches for a scan.
/// </summary>
Task<ImmutableArray<FingerprintMatch>> GetByScanAsync(Guid scanId, CancellationToken ct = default);
/// <summary>
/// Updates reachability status for a match.
/// </summary>
Task UpdateReachabilityAsync(
Guid id,
ReachabilityStatus status,
CancellationToken ct = default);
}

View File

@@ -0,0 +1,180 @@
namespace StellaOps.BinaryIndex.Fingerprints.Models;
/// <summary>
/// Represents a fingerprint of a vulnerable function.
/// </summary>
public sealed record VulnFingerprint
{
/// <summary>Unique fingerprint identifier</summary>
public Guid Id { get; init; }
/// <summary>CVE identifier</summary>
public required string CveId { get; init; }
/// <summary>Component name (e.g., "openssl")</summary>
public required string Component { get; init; }
/// <summary>Package URL (PURL) if applicable</summary>
public string? Purl { get; init; }
/// <summary>Fingerprinting algorithm used</summary>
public required FingerprintAlgorithm Algorithm { get; init; }
/// <summary>Fingerprint identifier (hex string)</summary>
public required string FingerprintId { get; init; }
/// <summary>Fingerprint hash bytes</summary>
public required byte[] FingerprintHash { get; init; }
/// <summary>Target architecture (e.g., "x86_64")</summary>
public required string Architecture { get; init; }
/// <summary>Function name if known</summary>
public string? FunctionName { get; init; }
/// <summary>Source file if known</summary>
public string? SourceFile { get; init; }
/// <summary>Source line if known</summary>
public int? SourceLine { get; init; }
/// <summary>Similarity threshold for matching (0.0-1.0)</summary>
public decimal SimilarityThreshold { get; init; } = 0.95m;
/// <summary>Confidence score (0.0-1.0)</summary>
public decimal? Confidence { get; init; }
/// <summary>Whether this fingerprint has been validated</summary>
public bool Validated { get; init; }
/// <summary>Validation statistics</summary>
public FingerprintValidationStats? ValidationStats { get; init; }
/// <summary>Reference to vulnerable build artifact</summary>
public string? VulnBuildRef { get; init; }
/// <summary>Reference to fixed build artifact</summary>
public string? FixedBuildRef { get; init; }
/// <summary>Timestamp when this fingerprint was indexed</summary>
public DateTimeOffset IndexedAt { get; init; }
}
/// <summary>
/// Fingerprinting algorithm types.
/// </summary>
public enum FingerprintAlgorithm
{
/// <summary>Basic block level fingerprinting</summary>
BasicBlock,
/// <summary>Control flow graph based</summary>
ControlFlowGraph,
/// <summary>String reference based</summary>
StringRefs,
/// <summary>Combined algorithm</summary>
Combined
}
/// <summary>
/// Validation statistics for a fingerprint.
/// </summary>
public sealed record FingerprintValidationStats
{
/// <summary>Number of true positive matches</summary>
public int TruePositives { get; init; }
/// <summary>Number of false positive matches</summary>
public int FalsePositives { get; init; }
/// <summary>Number of true negative non-matches</summary>
public int TrueNegatives { get; init; }
/// <summary>Number of false negative non-matches</summary>
public int FalseNegatives { get; init; }
/// <summary>Precision: TP / (TP + FP)</summary>
public decimal Precision => TruePositives + FalsePositives == 0 ? 0 :
(decimal)TruePositives / (TruePositives + FalsePositives);
/// <summary>Recall: TP / (TP + FN)</summary>
public decimal Recall => TruePositives + FalseNegatives == 0 ? 0 :
(decimal)TruePositives / (TruePositives + FalseNegatives);
}
/// <summary>
/// Represents a fingerprint match result.
/// </summary>
public sealed record FingerprintMatch
{
/// <summary>Match identifier</summary>
public Guid Id { get; init; }
/// <summary>Scan identifier</summary>
public Guid ScanId { get; init; }
/// <summary>Match type</summary>
public required MatchType Type { get; init; }
/// <summary>Binary key that was matched</summary>
public required string BinaryKey { get; init; }
/// <summary>Vulnerable package PURL</summary>
public required string VulnerablePurl { get; init; }
/// <summary>Vulnerable version</summary>
public required string VulnerableVersion { get; init; }
/// <summary>Matched fingerprint ID</summary>
public Guid? MatchedFingerprintId { get; init; }
/// <summary>Matched function name</summary>
public string? MatchedFunction { get; init; }
/// <summary>Similarity score (0.0-1.0)</summary>
public decimal? Similarity { get; init; }
/// <summary>Associated advisory IDs (CVEs, etc.)</summary>
public string[]? AdvisoryIds { get; init; }
/// <summary>Reachability status</summary>
public ReachabilityStatus? ReachabilityStatus { get; init; }
/// <summary>Timestamp when match occurred</summary>
public DateTimeOffset MatchedAt { get; init; }
}
/// <summary>
/// Match type enumeration.
/// </summary>
public enum MatchType
{
/// <summary>Match via fingerprint comparison</summary>
Fingerprint,
/// <summary>Match via Build-ID</summary>
BuildId,
/// <summary>Exact hash match</summary>
HashExact
}
/// <summary>
/// Reachability status for matched vulnerabilities.
/// </summary>
public enum ReachabilityStatus
{
/// <summary>Vulnerable function is reachable</summary>
Reachable,
/// <summary>Vulnerable function is unreachable</summary>
Unreachable,
/// <summary>Reachability unknown</summary>
Unknown,
/// <summary>Partial reachability</summary>
Partial
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,103 @@
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Storage;
/// <summary>
/// Blob storage implementation for fingerprints.
/// NOTE: This is a placeholder implementation showing the structure.
/// Production implementation would use RustFS or S3-compatible storage.
/// </summary>
public sealed class FingerprintBlobStorage : IFingerprintBlobStorage
{
private readonly ILogger<FingerprintBlobStorage> _logger;
private const string BasePath = "binaryindex/fingerprints";
public FingerprintBlobStorage(ILogger<FingerprintBlobStorage> logger)
{
_logger = logger;
}
/// <summary>
/// Stores fingerprint data to blob storage.
/// Layout: {BasePath}/{algorithm}/{prefix}/{fingerprint_id}.bin
/// where prefix is first 2 chars of fingerprint_id for sharding.
/// </summary>
public async Task<string> StoreFingerprintAsync(
VulnFingerprint fingerprint,
byte[] fullData,
CancellationToken ct = default)
{
var prefix = fingerprint.FingerprintId.Length >= 2
? fingerprint.FingerprintId[..2]
: "00";
var algorithm = fingerprint.Algorithm.ToString().ToLowerInvariant();
var storagePath = $"{BasePath}/{algorithm}/{prefix}/{fingerprint.FingerprintId}.bin";
_logger.LogDebug(
"Storing fingerprint {FingerprintId} to {Path}",
fingerprint.FingerprintId,
storagePath);
// TODO: Actual RustFS or S3 storage implementation
// await _rustFs.PutAsync(storagePath, fullData, ct);
// Placeholder: Would write to actual blob storage
await Task.CompletedTask;
return storagePath;
}
public async Task<byte[]?> RetrieveFingerprintAsync(
string storagePath,
CancellationToken ct = default)
{
_logger.LogDebug("Retrieving fingerprint from {Path}", storagePath);
// TODO: Actual retrieval from RustFS or S3
// return await _rustFs.GetAsync(storagePath, ct);
await Task.CompletedTask;
return null;
}
/// <summary>
/// Stores reference build artifacts.
/// Layout: {BasePath}/refbuilds/{cve_id}/{build_type}.tar.zst
/// </summary>
public async Task<string> StoreReferenceBuildAsync(
string cveId,
string buildType,
byte[] buildArtifact,
CancellationToken ct = default)
{
var storagePath = $"{BasePath}/refbuilds/{cveId}/{buildType}.tar.zst";
_logger.LogInformation(
"Storing {BuildType} reference build for {CveId} to {Path}",
buildType,
cveId,
storagePath);
// TODO: Actual RustFS or S3 storage implementation
// await _rustFs.PutAsync(storagePath, buildArtifact, ct);
await Task.CompletedTask;
return storagePath;
}
public async Task<byte[]?> RetrieveReferenceBuildAsync(
string storagePath,
CancellationToken ct = default)
{
_logger.LogDebug("Retrieving reference build from {Path}", storagePath);
// TODO: Actual retrieval from RustFS or S3
// return await _rustFs.GetAsync(storagePath, ct);
await Task.CompletedTask;
return null;
}
}

View File

@@ -0,0 +1,49 @@
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Fingerprints.Storage;
/// <summary>
/// Interface for fingerprint blob storage.
/// </summary>
public interface IFingerprintBlobStorage
{
/// <summary>
/// Stores fingerprint data to blob storage.
/// </summary>
/// <param name="fingerprint">Fingerprint metadata</param>
/// <param name="fullData">Full fingerprint data blob</param>
/// <param name="ct">Cancellation token</param>
/// <returns>Storage path</returns>
Task<string> StoreFingerprintAsync(
VulnFingerprint fingerprint,
byte[] fullData,
CancellationToken ct = default);
/// <summary>
/// Retrieves fingerprint data from blob storage.
/// </summary>
Task<byte[]?> RetrieveFingerprintAsync(
string storagePath,
CancellationToken ct = default);
/// <summary>
/// Stores a reference build artifact (vulnerable or fixed version).
/// </summary>
/// <param name="cveId">CVE identifier</param>
/// <param name="buildType">"vulnerable" or "fixed"</param>
/// <param name="buildArtifact">Build artifact data (tar.zst compressed)</param>
/// <param name="ct">Cancellation token</param>
/// <returns>Storage path</returns>
Task<string> StoreReferenceBuildAsync(
string cveId,
string buildType,
byte[] buildArtifact,
CancellationToken ct = default);
/// <summary>
/// Retrieves a reference build artifact.
/// </summary>
Task<byte[]?> RetrieveReferenceBuildAsync(
string storagePath,
CancellationToken ct = default);
}

View File

@@ -0,0 +1,132 @@
namespace StellaOps.BinaryIndex.FixIndex.Models;
/// <summary>
/// Evidence of a CVE fix in a distro package.
/// </summary>
public sealed record FixEvidence
{
/// <summary>Distro identifier (e.g., "debian", "ubuntu", "alpine")</summary>
public required string Distro { get; init; }
/// <summary>Release/codename (e.g., "bookworm", "jammy", "v3.19")</summary>
public required string Release { get; init; }
/// <summary>Source package name</summary>
public required string SourcePkg { get; init; }
/// <summary>CVE identifier (e.g., "CVE-2024-1234")</summary>
public required string CveId { get; init; }
/// <summary>Fix state</summary>
public required FixState State { get; init; }
/// <summary>Version where the fix was applied (if applicable)</summary>
public string? FixedVersion { get; init; }
/// <summary>Method used to detect the fix</summary>
public required FixMethod Method { get; init; }
/// <summary>Confidence score (0.0 - 1.0)</summary>
public required decimal Confidence { get; init; }
/// <summary>Evidence payload for audit trail</summary>
public required FixEvidencePayload Evidence { get; init; }
/// <summary>Corpus snapshot ID (if from snapshot ingestion)</summary>
public Guid? SnapshotId { get; init; }
/// <summary>Timestamp when this evidence was created</summary>
public DateTimeOffset CreatedAt { get; init; }
}
/// <summary>
/// Fix state enumeration.
/// </summary>
public enum FixState
{
/// <summary>CVE is fixed in this version</summary>
Fixed,
/// <summary>CVE affects this package</summary>
Vulnerable,
/// <summary>CVE does not affect this package</summary>
NotAffected,
/// <summary>Fix won't be applied (e.g., EOL version)</summary>
Wontfix,
/// <summary>Unknown status</summary>
Unknown
}
/// <summary>
/// Method used to identify the fix.
/// </summary>
public enum FixMethod
{
/// <summary>From official security feed (OVAL, DSA, etc.)</summary>
SecurityFeed,
/// <summary>Parsed from Debian/Ubuntu changelog</summary>
Changelog,
/// <summary>Extracted from patch header (DEP-3)</summary>
PatchHeader,
/// <summary>Matched against upstream patch database</summary>
UpstreamPatchMatch
}
/// <summary>
/// Base class for evidence payloads.
/// </summary>
public abstract record FixEvidencePayload;
/// <summary>
/// Evidence from changelog parsing.
/// </summary>
public sealed record ChangelogEvidence : FixEvidencePayload
{
/// <summary>Path to changelog file</summary>
public required string File { get; init; }
/// <summary>Version from changelog entry</summary>
public required string Version { get; init; }
/// <summary>Excerpt from changelog mentioning CVE</summary>
public required string Excerpt { get; init; }
/// <summary>Line number where CVE was mentioned</summary>
public int? LineNumber { get; init; }
}
/// <summary>
/// Evidence from patch header parsing.
/// </summary>
public sealed record PatchHeaderEvidence : FixEvidencePayload
{
/// <summary>Path to patch file</summary>
public required string PatchPath { get; init; }
/// <summary>SHA-256 digest of patch file</summary>
public required string PatchSha256 { get; init; }
/// <summary>Excerpt from patch header</summary>
public required string HeaderExcerpt { get; init; }
}
/// <summary>
/// Evidence from official security feed.
/// </summary>
public sealed record SecurityFeedEvidence : FixEvidencePayload
{
/// <summary>Feed identifier (e.g., "alpine-secfixes", "debian-oval")</summary>
public required string FeedId { get; init; }
/// <summary>Entry identifier within the feed</summary>
public required string EntryId { get; init; }
/// <summary>Published timestamp from feed</summary>
public required DateTimeOffset PublishedAt { get; init; }
}

View File

@@ -0,0 +1,92 @@
using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
/// <summary>
/// Parses Alpine APKBUILD secfixes section for CVE fix evidence.
/// </summary>
/// <remarks>
/// APKBUILD secfixes format:
/// # secfixes:
/// # 1.2.3-r0:
/// # - CVE-2024-1234
/// # - CVE-2024-1235
/// </remarks>
public sealed partial class AlpineSecfixesParser : ISecfixesParser
{
[GeneratedRegex(@"^#\s*secfixes:\s*$", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex SecfixesPatternRegex();
[GeneratedRegex(@"^#\s+(\d+\.\d+[^:]*):$", RegexOptions.Compiled)]
private static partial Regex VersionPatternRegex();
[GeneratedRegex(@"^#\s+-\s+(CVE-\d{4}-\d{4,7})$", RegexOptions.Compiled)]
private static partial Regex CvePatternRegex();
/// <summary>
/// Parses APKBUILD secfixes section for version-to-CVE mappings.
/// </summary>
public IEnumerable<FixEvidence> Parse(
string apkbuild,
string distro,
string release,
string sourcePkg)
{
if (string.IsNullOrWhiteSpace(apkbuild))
yield break;
var lines = apkbuild.Split('\n');
var inSecfixes = false;
string? currentVersion = null;
foreach (var line in lines)
{
if (SecfixesPatternRegex().IsMatch(line))
{
inSecfixes = true;
continue;
}
if (!inSecfixes)
continue;
// Exit secfixes block on non-comment line
if (!line.TrimStart().StartsWith('#'))
{
inSecfixes = false;
continue;
}
var versionMatch = VersionPatternRegex().Match(line);
if (versionMatch.Success)
{
currentVersion = versionMatch.Groups[1].Value;
continue;
}
var cveMatch = CvePatternRegex().Match(line);
if (cveMatch.Success && currentVersion != null)
{
yield return new FixEvidence
{
Distro = distro,
Release = release,
SourcePkg = sourcePkg,
CveId = cveMatch.Groups[1].Value,
State = FixState.Fixed,
FixedVersion = currentVersion,
Method = FixMethod.SecurityFeed, // APKBUILD is authoritative
Confidence = 0.95m,
Evidence = new SecurityFeedEvidence
{
FeedId = "alpine-secfixes",
EntryId = $"{sourcePkg}/{currentVersion}",
PublishedAt = DateTimeOffset.UtcNow
},
CreatedAt = DateTimeOffset.UtcNow
};
}
}
}
}

View File

@@ -0,0 +1,81 @@
using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
/// <summary>
/// Parses Debian/Ubuntu changelog files for CVE mentions.
/// </summary>
public sealed partial class DebianChangelogParser : IChangelogParser
{
[GeneratedRegex(@"\bCVE-\d{4}-\d{4,7}\b", RegexOptions.Compiled)]
private static partial Regex CvePatternRegex();
[GeneratedRegex(@"^(\S+)\s+\(([^)]+)\)\s+", RegexOptions.Compiled)]
private static partial Regex EntryHeaderPatternRegex();
[GeneratedRegex(@"^\s+--\s+", RegexOptions.Compiled)]
private static partial Regex TrailerPatternRegex();
/// <summary>
/// Parses the top entry of a Debian changelog for CVE mentions.
/// </summary>
public IEnumerable<FixEvidence> ParseTopEntry(
string changelog,
string distro,
string release,
string sourcePkg)
{
if (string.IsNullOrWhiteSpace(changelog))
yield break;
var lines = changelog.Split('\n');
if (lines.Length == 0)
yield break;
// Parse first entry header: "package (version) distribution; urgency"
var headerMatch = EntryHeaderPatternRegex().Match(lines[0]);
if (!headerMatch.Success)
yield break;
var version = headerMatch.Groups[2].Value;
// Collect entry lines until trailer (" -- Maintainer <email> Date")
var entryLines = new List<string> { lines[0] };
foreach (var line in lines.Skip(1))
{
entryLines.Add(line);
if (TrailerPatternRegex().IsMatch(line))
break;
}
var entryText = string.Join('\n', entryLines);
var cves = CvePatternRegex().Matches(entryText)
.Select(m => m.Value)
.Distinct()
.ToList();
foreach (var cve in cves)
{
yield return new FixEvidence
{
Distro = distro,
Release = release,
SourcePkg = sourcePkg,
CveId = cve,
State = FixState.Fixed,
FixedVersion = version,
Method = FixMethod.Changelog,
Confidence = 0.80m,
Evidence = new ChangelogEvidence
{
File = "debian/changelog",
Version = version,
Excerpt = entryText.Length > 2000 ? entryText[..2000] : entryText,
LineNumber = null // Could be enhanced to track line number
},
CreatedAt = DateTimeOffset.UtcNow
};
}
}
}

View File

@@ -0,0 +1,18 @@
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
/// <summary>
/// Interface for parsing changelogs for CVE fix evidence.
/// </summary>
public interface IChangelogParser
{
/// <summary>
/// Parses the top entry of a changelog for CVE mentions.
/// </summary>
IEnumerable<FixEvidence> ParseTopEntry(
string changelog,
string distro,
string release,
string sourcePkg);
}

View File

@@ -0,0 +1,19 @@
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
/// <summary>
/// Interface for parsing patch files for CVE fix evidence.
/// </summary>
public interface IPatchParser
{
/// <summary>
/// Parses patches for CVE mentions in headers.
/// </summary>
IEnumerable<FixEvidence> ParsePatches(
IEnumerable<(string path, string content, string sha256)> patches,
string distro,
string release,
string sourcePkg,
string version);
}

View File

@@ -0,0 +1,18 @@
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
/// <summary>
/// Interface for parsing Alpine APKBUILD secfixes for CVE mappings.
/// </summary>
public interface ISecfixesParser
{
/// <summary>
/// Parses APKBUILD secfixes section for version-to-CVE mappings.
/// </summary>
IEnumerable<FixEvidence> Parse(
string apkbuild,
string distro,
string release,
string sourcePkg);
}

View File

@@ -0,0 +1,60 @@
using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
/// <summary>
/// Parses patch headers (DEP-3 format) for CVE mentions.
/// </summary>
public sealed partial class PatchHeaderParser : IPatchParser
{
[GeneratedRegex(@"\bCVE-\d{4}-\d{4,7}\b", RegexOptions.Compiled)]
private static partial Regex CvePatternRegex();
/// <summary>
/// Parses patches for CVE mentions in headers.
/// </summary>
public IEnumerable<FixEvidence> ParsePatches(
IEnumerable<(string path, string content, string sha256)> patches,
string distro,
string release,
string sourcePkg,
string version)
{
foreach (var (path, content, sha256) in patches)
{
// Read first 80 lines as header (typical patch header size)
var headerLines = content.Split('\n').Take(80);
var header = string.Join('\n', headerLines);
// Also check filename for CVE (e.g., "CVE-2024-1234.patch")
var searchText = header + "\n" + Path.GetFileName(path);
var cves = CvePatternRegex().Matches(searchText)
.Select(m => m.Value)
.Distinct()
.ToList();
foreach (var cve in cves)
{
yield return new FixEvidence
{
Distro = distro,
Release = release,
SourcePkg = sourcePkg,
CveId = cve,
State = FixState.Fixed,
FixedVersion = version,
Method = FixMethod.PatchHeader,
Confidence = 0.87m,
Evidence = new PatchHeaderEvidence
{
PatchPath = path,
PatchSha256 = sha256,
HeaderExcerpt = header.Length > 1200 ? header[..1200] : header
},
CreatedAt = DateTimeOffset.UtcNow
};
}
}
}
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,36 @@
using Npgsql;
using StellaOps.BinaryIndex.Core.Services;
namespace StellaOps.BinaryIndex.Persistence;
/// <summary>
/// Database context for BinaryIndex with tenant isolation.
/// </summary>
public sealed class BinaryIndexDbContext
{
private readonly NpgsqlDataSource _dataSource;
private readonly ITenantContext _tenantContext;
public BinaryIndexDbContext(
NpgsqlDataSource dataSource,
ITenantContext tenantContext)
{
_dataSource = dataSource;
_tenantContext = tenantContext;
}
/// <summary>
/// Opens a connection with the tenant context set for RLS.
/// </summary>
public async Task<NpgsqlConnection> OpenConnectionAsync(CancellationToken ct = default)
{
var connection = await _dataSource.OpenConnectionAsync(ct);
// Set tenant context for RLS
await using var cmd = connection.CreateCommand();
cmd.CommandText = $"SET app.tenant_id = '{_tenantContext.TenantId}'";
await cmd.ExecuteNonQueryAsync(ct);
return connection;
}
}

View File

@@ -0,0 +1,79 @@
using Microsoft.Extensions.Logging;
using Npgsql;
namespace StellaOps.BinaryIndex.Persistence;
/// <summary>
/// Runs embedded SQL migrations for the binaries schema.
/// </summary>
public sealed class BinaryIndexMigrationRunner
{
private readonly NpgsqlDataSource _dataSource;
private readonly ILogger<BinaryIndexMigrationRunner> _logger;
public BinaryIndexMigrationRunner(
NpgsqlDataSource dataSource,
ILogger<BinaryIndexMigrationRunner> logger)
{
_dataSource = dataSource;
_logger = logger;
}
/// <summary>
/// Applies all embedded migrations to the database.
/// </summary>
public async Task MigrateAsync(CancellationToken ct = default)
{
const string lockKey = "binaries_schema_migration";
var lockHash = unchecked((int)lockKey.GetHashCode());
await using var connection = await _dataSource.OpenConnectionAsync(ct);
// Acquire advisory lock to prevent concurrent migrations
await using var lockCmd = connection.CreateCommand();
lockCmd.CommandText = $"SELECT pg_try_advisory_lock({lockHash})";
var acquired = (bool)(await lockCmd.ExecuteScalarAsync(ct))!;
if (!acquired)
{
_logger.LogInformation("Migration already in progress, skipping");
return;
}
try
{
var migrations = GetEmbeddedMigrations();
foreach (var (name, sql) in migrations.OrderBy(m => m.name))
{
_logger.LogInformation("Applying migration: {Name}", name);
await using var cmd = connection.CreateCommand();
cmd.CommandText = sql;
await cmd.ExecuteNonQueryAsync(ct);
_logger.LogInformation("Migration {Name} applied successfully", name);
}
}
finally
{
// Release advisory lock
await using var unlockCmd = connection.CreateCommand();
unlockCmd.CommandText = $"SELECT pg_advisory_unlock({lockHash})";
await unlockCmd.ExecuteScalarAsync(ct);
}
}
private static IEnumerable<(string name, string sql)> GetEmbeddedMigrations()
{
var assembly = typeof(BinaryIndexMigrationRunner).Assembly;
var prefix = "StellaOps.BinaryIndex.Persistence.Migrations.";
foreach (var resourceName in assembly.GetManifestResourceNames()
.Where(n => n.StartsWith(prefix) && n.EndsWith(".sql")))
{
using var stream = assembly.GetManifestResourceStream(resourceName)!;
using var reader = new StreamReader(stream);
var sql = reader.ReadToEnd();
var name = resourceName[prefix.Length..];
yield return (name, sql);
}
}
}

View File

@@ -0,0 +1,193 @@
-- 001_create_binaries_schema.sql
-- Creates the binaries schema for BinaryIndex module
-- Author: BinaryIndex Team
-- Date: 2025-12-22
BEGIN;
-- ============================================================================
-- SCHEMA CREATION
-- ============================================================================
CREATE SCHEMA IF NOT EXISTS binaries;
CREATE SCHEMA IF NOT EXISTS binaries_app;
-- RLS helper function
CREATE OR REPLACE FUNCTION binaries_app.require_current_tenant()
RETURNS TEXT
LANGUAGE plpgsql STABLE SECURITY DEFINER
AS $$
DECLARE
v_tenant TEXT;
BEGIN
v_tenant := current_setting('app.tenant_id', true);
IF v_tenant IS NULL OR v_tenant = '' THEN
RAISE EXCEPTION 'app.tenant_id session variable not set';
END IF;
RETURN v_tenant;
END;
$$;
-- ============================================================================
-- CORE TABLES
-- ============================================================================
-- binary_identity table
CREATE TABLE IF NOT EXISTS binaries.binary_identity (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
binary_key TEXT NOT NULL,
build_id TEXT,
build_id_type TEXT CHECK (build_id_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),
file_sha256 TEXT NOT NULL,
text_sha256 TEXT,
blake3_hash TEXT,
format TEXT NOT NULL CHECK (format IN ('elf', 'pe', 'macho')),
architecture TEXT NOT NULL,
osabi TEXT,
binary_type TEXT CHECK (binary_type IN ('executable', 'shared_library', 'static_library', 'object')),
is_stripped BOOLEAN DEFAULT FALSE,
first_seen_snapshot_id UUID,
last_seen_snapshot_id UUID,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT binary_identity_key_unique UNIQUE (tenant_id, binary_key)
);
-- corpus_snapshots table
CREATE TABLE IF NOT EXISTS binaries.corpus_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
distro TEXT NOT NULL,
release TEXT NOT NULL,
architecture TEXT NOT NULL,
snapshot_id TEXT NOT NULL,
packages_processed INT NOT NULL DEFAULT 0,
binaries_indexed INT NOT NULL DEFAULT 0,
repo_metadata_digest TEXT,
signing_key_id TEXT,
dsse_envelope_ref TEXT,
status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
error TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT corpus_snapshots_unique UNIQUE (tenant_id, distro, release, architecture, snapshot_id)
);
-- binary_package_map table
CREATE TABLE IF NOT EXISTS binaries.binary_package_map (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
binary_identity_id UUID NOT NULL REFERENCES binaries.binary_identity(id) ON DELETE CASCADE,
binary_key TEXT NOT NULL,
distro TEXT NOT NULL,
release TEXT NOT NULL,
source_pkg TEXT NOT NULL,
binary_pkg TEXT NOT NULL,
pkg_version TEXT NOT NULL,
pkg_purl TEXT,
architecture TEXT NOT NULL,
file_path_in_pkg TEXT NOT NULL,
snapshot_id UUID NOT NULL REFERENCES binaries.corpus_snapshots(id),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT binary_package_map_unique UNIQUE (binary_identity_id, snapshot_id, file_path_in_pkg)
);
-- vulnerable_buildids table
CREATE TABLE IF NOT EXISTS binaries.vulnerable_buildids (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
buildid_type TEXT NOT NULL CHECK (buildid_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),
buildid_value TEXT NOT NULL,
purl TEXT NOT NULL,
pkg_version TEXT NOT NULL,
distro TEXT,
release TEXT,
confidence TEXT NOT NULL DEFAULT 'exact' CHECK (confidence IN ('exact', 'inferred', 'heuristic')),
provenance JSONB DEFAULT '{}',
snapshot_id UUID REFERENCES binaries.corpus_snapshots(id),
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT vulnerable_buildids_unique UNIQUE (tenant_id, buildid_value, buildid_type, purl, pkg_version)
);
-- binary_vuln_assertion table
CREATE TABLE IF NOT EXISTS binaries.binary_vuln_assertion (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
binary_key TEXT NOT NULL,
binary_identity_id UUID REFERENCES binaries.binary_identity(id),
cve_id TEXT NOT NULL,
advisory_id UUID,
status TEXT NOT NULL CHECK (status IN ('affected', 'not_affected', 'fixed', 'unknown')),
method TEXT NOT NULL CHECK (method IN ('range_match', 'buildid_catalog', 'fingerprint_match', 'fix_index')),
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
evidence_ref TEXT,
evidence_digest TEXT,
evaluated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT binary_vuln_assertion_unique UNIQUE (tenant_id, binary_key, cve_id)
);
-- ============================================================================
-- INDEXES
-- ============================================================================
CREATE INDEX IF NOT EXISTS idx_binary_identity_tenant ON binaries.binary_identity(tenant_id);
CREATE INDEX IF NOT EXISTS idx_binary_identity_buildid ON binaries.binary_identity(build_id) WHERE build_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_binary_identity_sha256 ON binaries.binary_identity(file_sha256);
CREATE INDEX IF NOT EXISTS idx_binary_identity_key ON binaries.binary_identity(binary_key);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_tenant ON binaries.binary_package_map(tenant_id);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_binary ON binaries.binary_package_map(binary_identity_id);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_distro ON binaries.binary_package_map(distro, release, source_pkg);
CREATE INDEX IF NOT EXISTS idx_binary_package_map_snapshot ON binaries.binary_package_map(snapshot_id);
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_tenant ON binaries.corpus_snapshots(tenant_id);
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_distro ON binaries.corpus_snapshots(distro, release, architecture);
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_status ON binaries.corpus_snapshots(status) WHERE status IN ('pending', 'processing');
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_tenant ON binaries.vulnerable_buildids(tenant_id);
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_value ON binaries.vulnerable_buildids(buildid_type, buildid_value);
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_purl ON binaries.vulnerable_buildids(purl);
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_tenant ON binaries.binary_vuln_assertion(tenant_id);
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_binary ON binaries.binary_vuln_assertion(binary_key);
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_cve ON binaries.binary_vuln_assertion(cve_id);
-- ============================================================================
-- ROW-LEVEL SECURITY
-- ============================================================================
ALTER TABLE binaries.binary_identity ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_identity FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_identity_tenant_isolation ON binaries.binary_identity
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.corpus_snapshots ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.corpus_snapshots FORCE ROW LEVEL SECURITY;
CREATE POLICY corpus_snapshots_tenant_isolation ON binaries.corpus_snapshots
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.binary_package_map ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_package_map FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_package_map_tenant_isolation ON binaries.binary_package_map
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.vulnerable_buildids ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.vulnerable_buildids FORCE ROW LEVEL SECURITY;
CREATE POLICY vulnerable_buildids_tenant_isolation ON binaries.vulnerable_buildids
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.binary_vuln_assertion ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_vuln_assertion FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_vuln_assertion_tenant_isolation ON binaries.binary_vuln_assertion
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
COMMIT;

View File

@@ -0,0 +1,158 @@
-- 002_create_fingerprint_tables.sql
-- Adds fingerprint-related tables for MVP 3
-- Advisory lock to prevent concurrent migrations
SELECT pg_advisory_lock(hashtext('binaries_schema_002_fingerprints'));
BEGIN;
-- Fix index tables (from MVP 2)
CREATE TABLE IF NOT EXISTS binaries.cve_fix_evidence (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
distro TEXT NOT NULL,
release TEXT NOT NULL,
source_pkg TEXT NOT NULL,
cve_id TEXT NOT NULL,
state TEXT NOT NULL CHECK (state IN ('fixed', 'vulnerable', 'not_affected', 'wontfix', 'unknown')),
fixed_version TEXT,
method TEXT NOT NULL CHECK (method IN ('security_feed', 'changelog', 'patch_header', 'upstream_patch_match')),
confidence NUMERIC(3,2) NOT NULL CHECK (confidence >= 0 AND confidence <= 1),
evidence JSONB NOT NULL,
snapshot_id UUID REFERENCES binaries.corpus_snapshots(id),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS binaries.cve_fix_index (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
distro TEXT NOT NULL,
release TEXT NOT NULL,
source_pkg TEXT NOT NULL,
cve_id TEXT NOT NULL,
architecture TEXT,
state TEXT NOT NULL CHECK (state IN ('fixed', 'vulnerable', 'not_affected', 'wontfix', 'unknown')),
fixed_version TEXT,
primary_method TEXT NOT NULL,
confidence NUMERIC(3,2) NOT NULL,
evidence_ids UUID[],
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT cve_fix_index_unique UNIQUE (tenant_id, distro, release, source_pkg, cve_id, architecture)
);
-- Fingerprint tables
CREATE TABLE IF NOT EXISTS binaries.vulnerable_fingerprints (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
cve_id TEXT NOT NULL,
component TEXT NOT NULL,
purl TEXT,
algorithm TEXT NOT NULL CHECK (algorithm IN ('basic_block', 'control_flow_graph', 'string_refs', 'combined')),
fingerprint_id TEXT NOT NULL,
fingerprint_hash BYTEA NOT NULL,
architecture TEXT NOT NULL,
function_name TEXT,
source_file TEXT,
source_line INT,
similarity_threshold NUMERIC(3,2) DEFAULT 0.95,
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
validated BOOLEAN DEFAULT FALSE,
validation_stats JSONB DEFAULT '{}',
vuln_build_ref TEXT,
fixed_build_ref TEXT,
notes TEXT,
evidence_ref TEXT,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT vulnerable_fingerprints_unique UNIQUE (tenant_id, cve_id, algorithm, fingerprint_id, architecture)
);
CREATE TABLE IF NOT EXISTS binaries.fingerprint_corpus_metadata (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
purl TEXT NOT NULL,
version TEXT NOT NULL,
algorithm TEXT NOT NULL,
binary_digest TEXT,
function_count INT NOT NULL DEFAULT 0,
fingerprints_indexed INT NOT NULL DEFAULT 0,
indexed_by TEXT,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT fingerprint_corpus_metadata_unique UNIQUE (tenant_id, purl, version, algorithm)
);
CREATE TABLE IF NOT EXISTS binaries.fingerprint_matches (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
scan_id UUID NOT NULL,
match_type TEXT NOT NULL CHECK (match_type IN ('fingerprint', 'buildid', 'hash_exact')),
binary_key TEXT NOT NULL,
binary_identity_id UUID REFERENCES binaries.binary_identity(id),
vulnerable_purl TEXT NOT NULL,
vulnerable_version TEXT NOT NULL,
matched_fingerprint_id UUID REFERENCES binaries.vulnerable_fingerprints(id),
matched_function TEXT,
similarity NUMERIC(3,2),
advisory_ids TEXT[],
reachability_status TEXT CHECK (reachability_status IN ('reachable', 'unreachable', 'unknown', 'partial')),
evidence JSONB DEFAULT '{}',
matched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_cve_fix_evidence_tenant ON binaries.cve_fix_evidence(tenant_id);
CREATE INDEX IF NOT EXISTS idx_cve_fix_evidence_key ON binaries.cve_fix_evidence(distro, release, source_pkg, cve_id);
CREATE INDEX IF NOT EXISTS idx_cve_fix_index_tenant ON binaries.cve_fix_index(tenant_id);
CREATE INDEX IF NOT EXISTS idx_cve_fix_index_lookup ON binaries.cve_fix_index(distro, release, source_pkg, cve_id);
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_tenant ON binaries.vulnerable_fingerprints(tenant_id);
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_cve ON binaries.vulnerable_fingerprints(cve_id);
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_component ON binaries.vulnerable_fingerprints(component, architecture);
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_hash ON binaries.vulnerable_fingerprints USING hash (fingerprint_hash);
CREATE INDEX IF NOT EXISTS idx_fingerprint_corpus_tenant ON binaries.fingerprint_corpus_metadata(tenant_id);
CREATE INDEX IF NOT EXISTS idx_fingerprint_corpus_purl ON binaries.fingerprint_corpus_metadata(purl, version);
CREATE INDEX IF NOT EXISTS idx_fingerprint_matches_tenant ON binaries.fingerprint_matches(tenant_id);
CREATE INDEX IF NOT EXISTS idx_fingerprint_matches_scan ON binaries.fingerprint_matches(scan_id);
-- RLS
ALTER TABLE binaries.cve_fix_evidence ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.cve_fix_evidence FORCE ROW LEVEL SECURITY;
CREATE POLICY cve_fix_evidence_tenant_isolation ON binaries.cve_fix_evidence
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.cve_fix_index ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.cve_fix_index FORCE ROW LEVEL SECURITY;
CREATE POLICY cve_fix_index_tenant_isolation ON binaries.cve_fix_index
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.vulnerable_fingerprints ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.vulnerable_fingerprints FORCE ROW LEVEL SECURITY;
CREATE POLICY vulnerable_fingerprints_tenant_isolation ON binaries.vulnerable_fingerprints
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.fingerprint_corpus_metadata ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fingerprint_corpus_metadata FORCE ROW LEVEL SECURITY;
CREATE POLICY fingerprint_corpus_metadata_tenant_isolation ON binaries.fingerprint_corpus_metadata
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
ALTER TABLE binaries.fingerprint_matches ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fingerprint_matches FORCE ROW LEVEL SECURITY;
CREATE POLICY fingerprint_matches_tenant_isolation ON binaries.fingerprint_matches
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
COMMIT;
-- Release advisory lock
SELECT pg_advisory_unlock(hashtext('binaries_schema_002_fingerprints'));

View File

@@ -0,0 +1,153 @@
using System.Collections.Immutable;
using Dapper;
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Persistence.Repositories;
/// <summary>
/// Repository implementation for binary identity operations.
/// </summary>
public sealed class BinaryIdentityRepository : IBinaryIdentityRepository
{
private readonly BinaryIndexDbContext _dbContext;
public BinaryIdentityRepository(BinaryIndexDbContext dbContext)
{
_dbContext = dbContext;
}
public async Task<BinaryIdentity?> GetByBuildIdAsync(string buildId, string buildIdType, CancellationToken ct)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
last_seen_snapshot_id, created_at, updated_at
FROM binaries.binary_identity
WHERE build_id = @BuildId AND build_id_type = @BuildIdType
LIMIT 1
""";
var row = await conn.QuerySingleOrDefaultAsync<BinaryIdentityRow>(sql, new { BuildId = buildId, BuildIdType = buildIdType });
return row?.ToModel();
}
public async Task<BinaryIdentity?> GetByKeyAsync(string binaryKey, CancellationToken ct)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
last_seen_snapshot_id, created_at, updated_at
FROM binaries.binary_identity
WHERE binary_key = @BinaryKey
LIMIT 1
""";
var row = await conn.QuerySingleOrDefaultAsync<BinaryIdentityRow>(sql, new { BinaryKey = binaryKey });
return row?.ToModel();
}
public async Task<BinaryIdentity> UpsertAsync(BinaryIdentity identity, CancellationToken ct)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
INSERT INTO binaries.binary_identity (
tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
last_seen_snapshot_id, created_at, updated_at
) VALUES (
current_setting('app.tenant_id')::uuid, @BinaryKey, @BuildId, @BuildIdType, @FileSha256,
@TextSha256, @Blake3Hash, @Format, @Architecture, @OsAbi, @BinaryType, @IsStripped,
@FirstSeenSnapshotId, @LastSeenSnapshotId, @CreatedAt, @UpdatedAt
)
ON CONFLICT (tenant_id, binary_key) DO UPDATE SET
updated_at = EXCLUDED.updated_at,
last_seen_snapshot_id = EXCLUDED.last_seen_snapshot_id
RETURNING id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
last_seen_snapshot_id, created_at, updated_at
""";
var row = await conn.QuerySingleAsync<BinaryIdentityRow>(sql, new
{
identity.BinaryKey,
identity.BuildId,
identity.BuildIdType,
identity.FileSha256,
identity.TextSha256,
identity.Blake3Hash,
Format = identity.Format.ToString().ToLowerInvariant(),
identity.Architecture,
identity.OsAbi,
BinaryType = identity.Type?.ToString().ToLowerInvariant(),
identity.IsStripped,
identity.FirstSeenSnapshotId,
identity.LastSeenSnapshotId,
identity.CreatedAt,
identity.UpdatedAt
});
return row.ToModel();
}
public async Task<ImmutableArray<BinaryIdentity>> GetBatchAsync(IEnumerable<string> binaryKeys, CancellationToken ct)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
last_seen_snapshot_id, created_at, updated_at
FROM binaries.binary_identity
WHERE binary_key = ANY(@BinaryKeys)
""";
var rows = await conn.QueryAsync<BinaryIdentityRow>(sql, new { BinaryKeys = binaryKeys.ToArray() });
return rows.Select(r => r.ToModel()).ToImmutableArray();
}
private sealed record BinaryIdentityRow
{
public Guid Id { get; init; }
public Guid TenantId { get; init; }
public string BinaryKey { get; init; } = string.Empty;
public string? BuildId { get; init; }
public string? BuildIdType { get; init; }
public string FileSha256 { get; init; } = string.Empty;
public string? TextSha256 { get; init; }
public string? Blake3Hash { get; init; }
public string Format { get; init; } = string.Empty;
public string Architecture { get; init; } = string.Empty;
public string? OsAbi { get; init; }
public string? BinaryType { get; init; }
public bool IsStripped { get; init; }
public Guid? FirstSeenSnapshotId { get; init; }
public Guid? LastSeenSnapshotId { get; init; }
public DateTimeOffset CreatedAt { get; init; }
public DateTimeOffset UpdatedAt { get; init; }
public BinaryIdentity ToModel() => new()
{
Id = Id,
BinaryKey = BinaryKey,
BuildId = BuildId,
BuildIdType = BuildIdType,
FileSha256 = FileSha256,
TextSha256 = TextSha256,
Blake3Hash = Blake3Hash,
Format = Enum.Parse<BinaryFormat>(Format, ignoreCase: true),
Architecture = Architecture,
OsAbi = OsAbi,
Type = BinaryType != null ? Enum.Parse<BinaryType>(BinaryType, ignoreCase: true) : null,
IsStripped = IsStripped,
FirstSeenSnapshotId = FirstSeenSnapshotId,
LastSeenSnapshotId = LastSeenSnapshotId,
CreatedAt = CreatedAt,
UpdatedAt = UpdatedAt
};
}
}

View File

@@ -0,0 +1,29 @@
using System.Collections.Immutable;
using Dapper;
using StellaOps.BinaryIndex.Core.Services;
namespace StellaOps.BinaryIndex.Persistence.Repositories;
public sealed class BinaryVulnAssertionRepository : IBinaryVulnAssertionRepository
{
private readonly BinaryIndexDbContext _dbContext;
public BinaryVulnAssertionRepository(BinaryIndexDbContext dbContext)
{
_dbContext = dbContext;
}
public async Task<ImmutableArray<BinaryVulnAssertion>> GetByBinaryKeyAsync(string binaryKey, CancellationToken ct)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, binary_key, cve_id, status, method, confidence
FROM binaries.binary_vuln_assertion
WHERE binary_key = @BinaryKey
""";
var rows = await conn.QueryAsync<BinaryVulnAssertion>(sql, new { BinaryKey = binaryKey });
return rows.ToImmutableArray();
}
}

View File

@@ -0,0 +1,127 @@
using Dapper;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Corpus;
namespace StellaOps.BinaryIndex.Persistence.Repositories;
/// <summary>
/// Repository for corpus snapshots.
/// </summary>
public sealed class CorpusSnapshotRepository : ICorpusSnapshotRepository
{
private readonly BinaryIndexDbContext _dbContext;
private readonly ILogger<CorpusSnapshotRepository> _logger;
public CorpusSnapshotRepository(
BinaryIndexDbContext dbContext,
ILogger<CorpusSnapshotRepository> logger)
{
_dbContext = dbContext;
_logger = logger;
}
public async Task<CorpusSnapshot> CreateAsync(CorpusSnapshot snapshot, CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
INSERT INTO binaries.corpus_snapshots (
id,
tenant_id,
distro,
release,
architecture,
metadata_digest,
captured_at,
created_at
)
VALUES (
@Id,
binaries_app.current_tenant()::uuid,
@Distro,
@Release,
@Architecture,
@MetadataDigest,
@CapturedAt,
NOW()
)
RETURNING id, distro, release, architecture, metadata_digest, captured_at
""";
var row = await conn.QuerySingleAsync<CorpusSnapshotRow>(sql, new
{
snapshot.Id,
snapshot.Distro,
snapshot.Release,
snapshot.Architecture,
snapshot.MetadataDigest,
snapshot.CapturedAt
});
_logger.LogInformation(
"Created corpus snapshot {Id} for {Distro} {Release}/{Architecture}",
row.Id, row.Distro, row.Release, row.Architecture);
return row.ToModel();
}
public async Task<CorpusSnapshot?> FindByKeyAsync(
string distro,
string release,
string architecture,
CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, distro, release, architecture, metadata_digest, captured_at
FROM binaries.corpus_snapshots
WHERE distro = @Distro
AND release = @Release
AND architecture = @Architecture
ORDER BY captured_at DESC
LIMIT 1
""";
var row = await conn.QuerySingleOrDefaultAsync<CorpusSnapshotRow>(sql, new
{
Distro = distro,
Release = release,
Architecture = architecture
});
return row?.ToModel();
}
public async Task<CorpusSnapshot?> GetByIdAsync(Guid id, CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, distro, release, architecture, metadata_digest, captured_at
FROM binaries.corpus_snapshots
WHERE id = @Id
""";
var row = await conn.QuerySingleOrDefaultAsync<CorpusSnapshotRow>(sql, new { Id = id });
return row?.ToModel();
}
private sealed record CorpusSnapshotRow(
Guid Id,
string Distro,
string Release,
string Architecture,
string MetadataDigest,
DateTimeOffset CapturedAt)
{
public CorpusSnapshot ToModel() => new(
Id: Id,
Distro: Distro,
Release: Release,
Architecture: Architecture,
MetadataDigest: MetadataDigest,
CapturedAt: CapturedAt);
}
}

View File

@@ -0,0 +1,211 @@
using System.Collections.Immutable;
using Dapper;
using StellaOps.BinaryIndex.Fingerprints;
using StellaOps.BinaryIndex.Fingerprints.Models;
namespace StellaOps.BinaryIndex.Persistence.Repositories;
/// <summary>
/// Repository implementation for vulnerable fingerprints.
/// </summary>
public sealed class FingerprintRepository : IFingerprintRepository
{
private readonly BinaryIndexDbContext _dbContext;
public FingerprintRepository(BinaryIndexDbContext dbContext)
{
_dbContext = dbContext;
}
public async Task<VulnFingerprint> CreateAsync(VulnFingerprint fingerprint, CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
INSERT INTO binaries.vulnerable_fingerprints (
id, tenant_id, cve_id, component, purl, algorithm, fingerprint_id, fingerprint_hash,
architecture, function_name, source_file, source_line, similarity_threshold,
confidence, validated, validation_stats, vuln_build_ref, fixed_build_ref, indexed_at
)
VALUES (
@Id, binaries_app.current_tenant()::uuid, @CveId, @Component, @Purl, @Algorithm,
@FingerprintId, @FingerprintHash, @Architecture, @FunctionName, @SourceFile,
@SourceLine, @SimilarityThreshold, @Confidence, @Validated, @ValidationStats::jsonb,
@VulnBuildRef, @FixedBuildRef, @IndexedAt
)
RETURNING id
""";
var id = await conn.ExecuteScalarAsync<Guid>(sql, new
{
Id = fingerprint.Id != Guid.Empty ? fingerprint.Id : Guid.NewGuid(),
fingerprint.CveId,
fingerprint.Component,
fingerprint.Purl,
Algorithm = fingerprint.Algorithm.ToString().ToLowerInvariant().Replace("_", ""),
fingerprint.FingerprintId,
fingerprint.FingerprintHash,
fingerprint.Architecture,
fingerprint.FunctionName,
fingerprint.SourceFile,
fingerprint.SourceLine,
fingerprint.SimilarityThreshold,
fingerprint.Confidence,
fingerprint.Validated,
ValidationStats = fingerprint.ValidationStats != null
? System.Text.Json.JsonSerializer.Serialize(fingerprint.ValidationStats)
: "{}",
fingerprint.VulnBuildRef,
fingerprint.FixedBuildRef,
fingerprint.IndexedAt
});
return fingerprint with { Id = id };
}
public async Task<VulnFingerprint?> GetByIdAsync(Guid id, CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, cve_id as CveId, component, purl, algorithm, fingerprint_id as FingerprintId,
fingerprint_hash as FingerprintHash, architecture, function_name as FunctionName,
source_file as SourceFile, source_line as SourceLine,
similarity_threshold as SimilarityThreshold, confidence, validated,
validation_stats as ValidationStats, vuln_build_ref as VulnBuildRef,
fixed_build_ref as FixedBuildRef, indexed_at as IndexedAt
FROM binaries.vulnerable_fingerprints
WHERE id = @Id
""";
// Simplified: Would need proper mapping from DB row to model
// Including JSONB deserialization for validation_stats
return null; // Placeholder for brevity
}
public async Task<ImmutableArray<VulnFingerprint>> GetByCveAsync(string cveId, CancellationToken ct = default)
{
// Similar implementation to GetByIdAsync but for multiple records
return ImmutableArray<VulnFingerprint>.Empty;
}
public async Task<ImmutableArray<VulnFingerprint>> SearchByHashAsync(
byte[] hash,
FingerprintAlgorithm algorithm,
string architecture,
CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
SELECT id, cve_id as CveId, component, purl, algorithm, fingerprint_id as FingerprintId,
fingerprint_hash as FingerprintHash, architecture, function_name as FunctionName,
source_file as SourceFile, source_line as SourceLine,
similarity_threshold as SimilarityThreshold, confidence, validated,
validation_stats as ValidationStats, vuln_build_ref as VulnBuildRef,
fixed_build_ref as FixedBuildRef, indexed_at as IndexedAt
FROM binaries.vulnerable_fingerprints
WHERE fingerprint_hash = @Hash
AND algorithm = @Algorithm
AND architecture = @Architecture
""";
// Simplified: Would need proper mapping
return ImmutableArray<VulnFingerprint>.Empty;
}
public async Task UpdateValidationStatsAsync(
Guid id,
FingerprintValidationStats stats,
CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
UPDATE binaries.vulnerable_fingerprints
SET validation_stats = @Stats::jsonb,
validated = TRUE
WHERE id = @Id
""";
await conn.ExecuteAsync(sql, new
{
Id = id,
Stats = System.Text.Json.JsonSerializer.Serialize(stats)
});
}
}
/// <summary>
/// Repository implementation for fingerprint matches.
/// </summary>
public sealed class FingerprintMatchRepository : IFingerprintMatchRepository
{
private readonly BinaryIndexDbContext _dbContext;
public FingerprintMatchRepository(BinaryIndexDbContext dbContext)
{
_dbContext = dbContext;
}
public async Task<FingerprintMatch> CreateAsync(FingerprintMatch match, CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
INSERT INTO binaries.fingerprint_matches (
id, tenant_id, scan_id, match_type, binary_key, binary_identity_id,
vulnerable_purl, vulnerable_version, matched_fingerprint_id, matched_function,
similarity, advisory_ids, reachability_status, matched_at
)
VALUES (
@Id, binaries_app.current_tenant()::uuid, @ScanId, @MatchType, @BinaryKey,
@BinaryIdentityId, @VulnerablePurl, @VulnerableVersion, @MatchedFingerprintId,
@MatchedFunction, @Similarity, @AdvisoryIds, @ReachabilityStatus, @MatchedAt
)
RETURNING id
""";
var id = await conn.ExecuteScalarAsync<Guid>(sql, new
{
Id = match.Id != Guid.Empty ? match.Id : Guid.NewGuid(),
match.ScanId,
MatchType = match.Type.ToString().ToLowerInvariant(),
match.BinaryKey,
BinaryIdentityId = (Guid?)null,
match.VulnerablePurl,
match.VulnerableVersion,
match.MatchedFingerprintId,
match.MatchedFunction,
match.Similarity,
match.AdvisoryIds,
ReachabilityStatus = match.ReachabilityStatus?.ToString().ToLowerInvariant(),
match.MatchedAt
});
return match with { Id = id };
}
public async Task<ImmutableArray<FingerprintMatch>> GetByScanAsync(Guid scanId, CancellationToken ct = default)
{
// Simplified: Would need proper implementation with mapping
return ImmutableArray<FingerprintMatch>.Empty;
}
public async Task UpdateReachabilityAsync(Guid id, ReachabilityStatus status, CancellationToken ct = default)
{
await using var conn = await _dbContext.OpenConnectionAsync(ct);
const string sql = """
UPDATE binaries.fingerprint_matches
SET reachability_status = @Status
WHERE id = @Id
""";
await conn.ExecuteAsync(sql, new
{
Id = id,
Status = status.ToString().ToLowerInvariant()
});
}
}

View File

@@ -0,0 +1,30 @@
using System.Collections.Immutable;
using StellaOps.BinaryIndex.Core.Models;
namespace StellaOps.BinaryIndex.Persistence.Repositories;
/// <summary>
/// Repository for binary identity operations.
/// </summary>
public interface IBinaryIdentityRepository
{
/// <summary>
/// Gets a binary identity by its Build-ID.
/// </summary>
Task<BinaryIdentity?> GetByBuildIdAsync(string buildId, string buildIdType, CancellationToken ct);
/// <summary>
/// Gets a binary identity by its key.
/// </summary>
Task<BinaryIdentity?> GetByKeyAsync(string binaryKey, CancellationToken ct);
/// <summary>
/// Upserts a binary identity.
/// </summary>
Task<BinaryIdentity> UpsertAsync(BinaryIdentity identity, CancellationToken ct);
/// <summary>
/// Gets multiple binary identities by their keys.
/// </summary>
Task<ImmutableArray<BinaryIdentity>> GetBatchAsync(IEnumerable<string> binaryKeys, CancellationToken ct);
}

View File

@@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Npgsql" Version="9.0.2" />
<PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Corpus\StellaOps.BinaryIndex.Corpus.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Fingerprints\StellaOps.BinaryIndex.Fingerprints.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Migrations\*.sql" />
</ItemGroup>
</Project>