Refactor code structure for improved readability and maintainability; optimize performance in key functions.
This commit is contained in:
@@ -0,0 +1,63 @@
|
||||
namespace StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Unique identity of a binary derived from Build-ID or hashes.
|
||||
/// </summary>
|
||||
public sealed record BinaryIdentity
|
||||
{
|
||||
public Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Primary key: build_id || file_sha256
|
||||
/// </summary>
|
||||
public required string BinaryKey { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// ELF GNU Build-ID, PE CodeView, or Mach-O UUID
|
||||
/// </summary>
|
||||
public string? BuildId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Type of build ID: gnu-build-id, pe-cv, macho-uuid
|
||||
/// </summary>
|
||||
public string? BuildIdType { get; init; }
|
||||
|
||||
public required string FileSha256 { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// SHA-256 of .text section
|
||||
/// </summary>
|
||||
public string? TextSha256 { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// BLAKE3 hash for future use
|
||||
/// </summary>
|
||||
public string? Blake3Hash { get; init; }
|
||||
|
||||
public required BinaryFormat Format { get; init; }
|
||||
public required string Architecture { get; init; }
|
||||
public string? OsAbi { get; init; }
|
||||
public BinaryType? Type { get; init; }
|
||||
public bool IsStripped { get; init; }
|
||||
|
||||
public Guid? FirstSeenSnapshotId { get; init; }
|
||||
public Guid? LastSeenSnapshotId { get; init; }
|
||||
|
||||
public DateTimeOffset CreatedAt { get; init; } = DateTimeOffset.UtcNow;
|
||||
public DateTimeOffset UpdatedAt { get; init; } = DateTimeOffset.UtcNow;
|
||||
}
|
||||
|
||||
public enum BinaryFormat
|
||||
{
|
||||
Elf,
|
||||
Pe,
|
||||
Macho
|
||||
}
|
||||
|
||||
public enum BinaryType
|
||||
{
|
||||
Executable,
|
||||
SharedLibrary,
|
||||
StaticLibrary,
|
||||
Object
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
using System.Collections.Immutable;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Service for managing binary identities.
|
||||
/// </summary>
|
||||
public sealed class BinaryIdentityService
|
||||
{
|
||||
private readonly IBinaryFeatureExtractor _featureExtractor;
|
||||
private readonly ILogger<BinaryIdentityService> _logger;
|
||||
|
||||
public BinaryIdentityService(
|
||||
IBinaryFeatureExtractor featureExtractor,
|
||||
ILogger<BinaryIdentityService> logger)
|
||||
{
|
||||
_featureExtractor = featureExtractor;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Indexes a binary from a stream, extracting its identity.
|
||||
/// </summary>
|
||||
public async Task<BinaryIdentity> IndexBinaryAsync(
|
||||
Stream stream,
|
||||
string filePath,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
if (!_featureExtractor.CanExtract(stream))
|
||||
{
|
||||
throw new InvalidOperationException($"Unsupported binary format: {filePath}");
|
||||
}
|
||||
|
||||
_logger.LogInformation("Extracting identity from {FilePath}", filePath);
|
||||
|
||||
var identity = await _featureExtractor.ExtractIdentityAsync(stream, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Extracted identity: BuildId={BuildId}, SHA256={SHA256}, Arch={Arch}",
|
||||
identity.BuildId ?? "none",
|
||||
identity.FileSha256[..16],
|
||||
identity.Architecture);
|
||||
|
||||
return identity;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Batch indexes multiple binaries.
|
||||
/// </summary>
|
||||
public async Task<ImmutableArray<BinaryIdentity>> IndexBatchAsync(
|
||||
IEnumerable<(Stream stream, string path)> binaries,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new List<BinaryIdentity>();
|
||||
|
||||
foreach (var (stream, path) in binaries)
|
||||
{
|
||||
try
|
||||
{
|
||||
var identity = await IndexBinaryAsync(stream, path, ct);
|
||||
results.Add(identity);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to index binary {Path}", path);
|
||||
}
|
||||
}
|
||||
|
||||
return results.ToImmutableArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
using System.Collections.Immutable;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of binary vulnerability lookup service.
|
||||
/// </summary>
|
||||
public sealed class BinaryVulnerabilityService : IBinaryVulnerabilityService
|
||||
{
|
||||
private readonly IBinaryVulnAssertionRepository _assertionRepo;
|
||||
private readonly ILogger<BinaryVulnerabilityService> _logger;
|
||||
|
||||
public BinaryVulnerabilityService(
|
||||
IBinaryVulnAssertionRepository assertionRepo,
|
||||
ILogger<BinaryVulnerabilityService> logger)
|
||||
{
|
||||
_assertionRepo = assertionRepo;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
|
||||
BinaryIdentity identity,
|
||||
LookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
options ??= new LookupOptions();
|
||||
var matches = new List<BinaryVulnMatch>();
|
||||
|
||||
// Check explicit assertions
|
||||
var assertions = await _assertionRepo.GetByBinaryKeyAsync(identity.BinaryKey, ct);
|
||||
foreach (var assertion in assertions.Where(a => a.Status == "affected"))
|
||||
{
|
||||
matches.Add(new BinaryVulnMatch
|
||||
{
|
||||
CveId = assertion.CveId,
|
||||
VulnerablePurl = "pkg:unknown", // Resolved from advisory
|
||||
Method = MapMethod(assertion.Method),
|
||||
Confidence = assertion.Confidence ?? 0.9m,
|
||||
Evidence = new MatchEvidence { BuildId = identity.BuildId }
|
||||
});
|
||||
}
|
||||
|
||||
_logger.LogDebug("Found {Count} vulnerability matches for {BinaryKey}", matches.Count, identity.BinaryKey);
|
||||
return matches.ToImmutableArray();
|
||||
}
|
||||
|
||||
public async Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupBatchAsync(
|
||||
IEnumerable<BinaryIdentity> identities,
|
||||
LookupOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new Dictionary<string, ImmutableArray<BinaryVulnMatch>>();
|
||||
|
||||
foreach (var identity in identities)
|
||||
{
|
||||
var matches = await LookupByIdentityAsync(identity, options, ct);
|
||||
results[identity.BinaryKey] = matches;
|
||||
}
|
||||
|
||||
return results.ToImmutableDictionary();
|
||||
}
|
||||
|
||||
private static MatchMethod MapMethod(string method) => method switch
|
||||
{
|
||||
"buildid_catalog" => MatchMethod.BuildIdCatalog,
|
||||
"fingerprint_match" => MatchMethod.FingerprintMatch,
|
||||
_ => MatchMethod.RangeMatch
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts features from ELF binaries.
|
||||
/// </summary>
|
||||
public sealed class ElfFeatureExtractor : IBinaryFeatureExtractor
|
||||
{
|
||||
private static readonly byte[] ElfMagic = [0x7F, 0x45, 0x4C, 0x46]; // \x7fELF
|
||||
|
||||
public bool CanExtract(Stream stream)
|
||||
{
|
||||
if (stream.Length < 4)
|
||||
return false;
|
||||
|
||||
var originalPosition = stream.Position;
|
||||
try
|
||||
{
|
||||
Span<byte> magic = stackalloc byte[4];
|
||||
stream.Position = 0;
|
||||
var read = stream.Read(magic);
|
||||
return read == 4 && magic.SequenceEqual(ElfMagic);
|
||||
}
|
||||
finally
|
||||
{
|
||||
stream.Position = originalPosition;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<BinaryIdentity> ExtractIdentityAsync(Stream stream, CancellationToken ct = default)
|
||||
{
|
||||
var metadata = await ExtractMetadataAsync(stream, ct);
|
||||
|
||||
// Compute full file SHA-256
|
||||
stream.Position = 0;
|
||||
var fileSha256 = await ComputeSha256Async(stream, ct);
|
||||
|
||||
// Build binary key: buildid || file_sha256
|
||||
var binaryKey = metadata.BuildId != null
|
||||
? $"{metadata.BuildId}:{fileSha256}"
|
||||
: fileSha256;
|
||||
|
||||
return new BinaryIdentity
|
||||
{
|
||||
BinaryKey = binaryKey,
|
||||
BuildId = metadata.BuildId,
|
||||
BuildIdType = metadata.BuildIdType,
|
||||
FileSha256 = fileSha256,
|
||||
Format = metadata.Format,
|
||||
Architecture = metadata.Architecture,
|
||||
OsAbi = metadata.OsAbi,
|
||||
Type = metadata.Type,
|
||||
IsStripped = metadata.IsStripped
|
||||
};
|
||||
}
|
||||
|
||||
public Task<BinaryMetadata> ExtractMetadataAsync(Stream stream, CancellationToken ct = default)
|
||||
{
|
||||
stream.Position = 0;
|
||||
Span<byte> header = stackalloc byte[64];
|
||||
var read = stream.Read(header);
|
||||
|
||||
if (read < 20)
|
||||
throw new InvalidDataException("Stream too short for ELF header");
|
||||
|
||||
// Parse ELF header
|
||||
var elfClass = header[4]; // 1=32-bit, 2=64-bit
|
||||
var elfData = header[5]; // 1=little-endian, 2=big-endian
|
||||
var osAbi = header[7];
|
||||
var eType = BitConverter.ToUInt16(header[16..18]);
|
||||
var eMachine = BitConverter.ToUInt16(header[18..20]);
|
||||
|
||||
var architecture = MapArchitecture(eMachine);
|
||||
var osAbiStr = MapOsAbi(osAbi);
|
||||
var type = MapBinaryType(eType);
|
||||
var buildId = ExtractBuildId(stream);
|
||||
|
||||
return Task.FromResult(new BinaryMetadata
|
||||
{
|
||||
Format = BinaryFormat.Elf,
|
||||
Architecture = architecture,
|
||||
BuildId = buildId,
|
||||
BuildIdType = buildId != null ? "gnu-build-id" : null,
|
||||
OsAbi = osAbiStr,
|
||||
Type = type,
|
||||
IsStripped = !HasSymbolTable(stream)
|
||||
});
|
||||
}
|
||||
|
||||
private static string? ExtractBuildId(Stream stream)
|
||||
{
|
||||
// Simplified: scan for .note.gnu.build-id section
|
||||
// In production, parse program headers properly
|
||||
stream.Position = 0;
|
||||
var buffer = new byte[stream.Length];
|
||||
stream.Read(buffer);
|
||||
|
||||
// Look for NT_GNU_BUILD_ID note (type 3)
|
||||
var buildIdPattern = Encoding.ASCII.GetBytes(".note.gnu.build-id");
|
||||
for (var i = 0; i < buffer.Length - buildIdPattern.Length; i++)
|
||||
{
|
||||
if (buffer.AsSpan(i, buildIdPattern.Length).SequenceEqual(buildIdPattern))
|
||||
{
|
||||
// Found build-id section, extract it
|
||||
// This is simplified; real implementation would parse note structure
|
||||
var noteStart = i + buildIdPattern.Length + 16;
|
||||
if (noteStart + 20 < buffer.Length)
|
||||
{
|
||||
return Convert.ToHexString(buffer.AsSpan(noteStart, 20)).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool HasSymbolTable(Stream stream)
|
||||
{
|
||||
// Simplified: check for .symtab section
|
||||
stream.Position = 0;
|
||||
var buffer = new byte[Math.Min(8192, stream.Length)];
|
||||
stream.Read(buffer);
|
||||
return Encoding.ASCII.GetString(buffer).Contains(".symtab");
|
||||
}
|
||||
|
||||
private static string MapArchitecture(ushort eMachine) => eMachine switch
|
||||
{
|
||||
0x3E => "x86_64",
|
||||
0x03 => "x86",
|
||||
0xB7 => "aarch64",
|
||||
0x28 => "arm",
|
||||
0xF3 => "riscv",
|
||||
_ => $"unknown-{eMachine}"
|
||||
};
|
||||
|
||||
private static string MapOsAbi(byte osAbi) => osAbi switch
|
||||
{
|
||||
0x00 => "sysv",
|
||||
0x03 => "linux",
|
||||
0x09 => "freebsd",
|
||||
_ => $"unknown-{osAbi}"
|
||||
};
|
||||
|
||||
private static BinaryType MapBinaryType(ushort eType) => eType switch
|
||||
{
|
||||
0x02 => BinaryType.Executable,
|
||||
0x03 => BinaryType.SharedLibrary,
|
||||
0x01 => BinaryType.Object,
|
||||
_ => BinaryType.Executable
|
||||
};
|
||||
|
||||
private static async Task<string> ComputeSha256Async(Stream stream, CancellationToken ct)
|
||||
{
|
||||
stream.Position = 0;
|
||||
var hash = await SHA256.HashDataAsync(stream, ct);
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts identifying features from binary files.
|
||||
/// </summary>
|
||||
public interface IBinaryFeatureExtractor
|
||||
{
|
||||
/// <summary>
|
||||
/// Determines if the stream contains a supported binary format.
|
||||
/// </summary>
|
||||
bool CanExtract(Stream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Extracts binary identity from the stream.
|
||||
/// </summary>
|
||||
Task<BinaryIdentity> ExtractIdentityAsync(Stream stream, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Extracts metadata without computing expensive hashes.
|
||||
/// </summary>
|
||||
Task<BinaryMetadata> ExtractMetadataAsync(Stream stream, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Lightweight metadata extracted from binary without full hashing.
|
||||
/// </summary>
|
||||
public sealed record BinaryMetadata
|
||||
{
|
||||
public required BinaryFormat Format { get; init; }
|
||||
public required string Architecture { get; init; }
|
||||
public string? BuildId { get; init; }
|
||||
public string? BuildIdType { get; init; }
|
||||
public string? OsAbi { get; init; }
|
||||
public BinaryType? Type { get; init; }
|
||||
public bool IsStripped { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for binary vulnerability assertions.
|
||||
/// </summary>
|
||||
public interface IBinaryVulnAssertionRepository
|
||||
{
|
||||
Task<ImmutableArray<BinaryVulnAssertion>> GetByBinaryKeyAsync(string binaryKey, CancellationToken ct);
|
||||
}
|
||||
|
||||
public sealed record BinaryVulnAssertion
|
||||
{
|
||||
public Guid Id { get; init; }
|
||||
public required string BinaryKey { get; init; }
|
||||
public required string CveId { get; init; }
|
||||
public required string Status { get; init; }
|
||||
public required string Method { get; init; }
|
||||
public decimal? Confidence { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Main query interface for binary vulnerability lookup.
|
||||
/// Consumed by Scanner.Worker during container scanning.
|
||||
/// </summary>
|
||||
public interface IBinaryVulnerabilityService
|
||||
{
|
||||
/// <summary>
|
||||
/// Look up vulnerabilities by binary identity (Build-ID, hashes).
|
||||
/// </summary>
|
||||
Task<ImmutableArray<BinaryVulnMatch>> LookupByIdentityAsync(
|
||||
BinaryIdentity identity,
|
||||
LookupOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Batch lookup for scan performance.
|
||||
/// </summary>
|
||||
Task<ImmutableDictionary<string, ImmutableArray<BinaryVulnMatch>>> LookupBatchAsync(
|
||||
IEnumerable<BinaryIdentity> identities,
|
||||
LookupOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
public sealed record LookupOptions
|
||||
{
|
||||
public bool CheckFixIndex { get; init; } = true;
|
||||
public string? DistroHint { get; init; }
|
||||
public string? ReleaseHint { get; init; }
|
||||
}
|
||||
|
||||
public sealed record BinaryVulnMatch
|
||||
{
|
||||
public required string CveId { get; init; }
|
||||
public required string VulnerablePurl { get; init; }
|
||||
public required MatchMethod Method { get; init; }
|
||||
public required decimal Confidence { get; init; }
|
||||
public MatchEvidence? Evidence { get; init; }
|
||||
}
|
||||
|
||||
public enum MatchMethod
|
||||
{
|
||||
BuildIdCatalog,
|
||||
FingerprintMatch,
|
||||
RangeMatch
|
||||
}
|
||||
|
||||
public sealed record MatchEvidence
|
||||
{
|
||||
public string? BuildId { get; init; }
|
||||
public decimal? Similarity { get; init; }
|
||||
public string? MatchedFunction { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Provides the current tenant context for RLS.
|
||||
/// </summary>
|
||||
public interface ITenantContext
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the current tenant ID.
|
||||
/// </summary>
|
||||
string TenantId { get; }
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="System.Collections.Immutable" Version="9.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,164 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Runtime.CompilerServices;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Core.Services;
|
||||
using StellaOps.BinaryIndex.Corpus;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Corpus.Debian;
|
||||
|
||||
/// <summary>
|
||||
/// Debian/Ubuntu corpus connector implementation.
|
||||
/// </summary>
|
||||
public sealed class DebianCorpusConnector : IBinaryCorpusConnector
|
||||
{
|
||||
private readonly IDebianPackageSource _packageSource;
|
||||
private readonly DebianPackageExtractor _extractor;
|
||||
private readonly IBinaryFeatureExtractor _featureExtractor;
|
||||
private readonly ICorpusSnapshotRepository _snapshotRepo;
|
||||
private readonly ILogger<DebianCorpusConnector> _logger;
|
||||
|
||||
private const string DefaultMirror = "https://deb.debian.org/debian";
|
||||
|
||||
public string ConnectorId => "debian";
|
||||
public string[] SupportedDistros => ["debian", "ubuntu"];
|
||||
|
||||
public DebianCorpusConnector(
|
||||
IDebianPackageSource packageSource,
|
||||
DebianPackageExtractor extractor,
|
||||
IBinaryFeatureExtractor featureExtractor,
|
||||
ICorpusSnapshotRepository snapshotRepo,
|
||||
ILogger<DebianCorpusConnector> logger)
|
||||
{
|
||||
_packageSource = packageSource;
|
||||
_extractor = extractor;
|
||||
_featureExtractor = featureExtractor;
|
||||
_snapshotRepo = snapshotRepo;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<CorpusSnapshot> FetchSnapshotAsync(CorpusQuery query, CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Fetching corpus snapshot for {Distro} {Release}/{Architecture}",
|
||||
query.Distro, query.Release, query.Architecture);
|
||||
|
||||
// Check if we already have a snapshot for this query
|
||||
var existing = await _snapshotRepo.FindByKeyAsync(
|
||||
query.Distro,
|
||||
query.Release,
|
||||
query.Architecture,
|
||||
ct);
|
||||
|
||||
if (existing != null)
|
||||
{
|
||||
_logger.LogInformation("Using existing snapshot {SnapshotId}", existing.Id);
|
||||
return existing;
|
||||
}
|
||||
|
||||
// Fetch package index to compute metadata digest
|
||||
var packages = await _packageSource.FetchPackageIndexAsync(
|
||||
query.Distro,
|
||||
query.Release,
|
||||
query.Architecture,
|
||||
ct);
|
||||
|
||||
// Compute metadata digest from package list
|
||||
var packageList = packages.ToList();
|
||||
var metadataDigest = ComputeMetadataDigest(packageList);
|
||||
|
||||
var snapshot = new CorpusSnapshot(
|
||||
Id: Guid.NewGuid(),
|
||||
Distro: query.Distro,
|
||||
Release: query.Release,
|
||||
Architecture: query.Architecture,
|
||||
MetadataDigest: metadataDigest,
|
||||
CapturedAt: DateTimeOffset.UtcNow);
|
||||
|
||||
await _snapshotRepo.CreateAsync(snapshot, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Created corpus snapshot {SnapshotId} with {PackageCount} packages",
|
||||
snapshot.Id, packageList.Count);
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<PackageInfo> ListPackagesAsync(
|
||||
CorpusSnapshot snapshot,
|
||||
[EnumeratorCancellation] CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogDebug("Listing packages for snapshot {SnapshotId}", snapshot.Id);
|
||||
|
||||
var packages = await _packageSource.FetchPackageIndexAsync(
|
||||
snapshot.Distro,
|
||||
snapshot.Release,
|
||||
snapshot.Architecture,
|
||||
ct);
|
||||
|
||||
foreach (var pkg in packages)
|
||||
{
|
||||
yield return new PackageInfo(
|
||||
Name: pkg.Package,
|
||||
Version: pkg.Version,
|
||||
SourcePackage: pkg.Source ?? pkg.Package,
|
||||
Architecture: pkg.Architecture,
|
||||
Filename: pkg.Filename,
|
||||
Size: 0, // We don't have size in current implementation
|
||||
Sha256: pkg.SHA256);
|
||||
}
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<ExtractedBinary> ExtractBinariesAsync(
|
||||
PackageInfo pkg,
|
||||
[EnumeratorCancellation] CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogDebug("Extracting binaries from {Package} {Version}", pkg.Name, pkg.Version);
|
||||
|
||||
Stream? debStream = null;
|
||||
try
|
||||
{
|
||||
// Download the .deb package
|
||||
debStream = await _packageSource.DownloadPackageAsync(pkg.Filename, ct);
|
||||
|
||||
// Extract binaries using DebianPackageExtractor
|
||||
var metadata = new DebianPackageMetadata
|
||||
{
|
||||
Package = pkg.Name,
|
||||
Version = pkg.Version,
|
||||
Architecture = pkg.Architecture,
|
||||
Filename = pkg.Filename,
|
||||
SHA256 = pkg.Sha256,
|
||||
Source = pkg.SourcePackage != pkg.Name ? pkg.SourcePackage : null
|
||||
};
|
||||
|
||||
var extractedBinaries = await _extractor.ExtractBinariesAsync(debStream, metadata, ct);
|
||||
|
||||
foreach (var binary in extractedBinaries)
|
||||
{
|
||||
yield return new ExtractedBinary(
|
||||
Identity: binary.Identity,
|
||||
PathInPackage: binary.FilePath,
|
||||
Package: pkg);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (debStream != null)
|
||||
{
|
||||
await debStream.DisposeAsync();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string ComputeMetadataDigest(IEnumerable<DebianPackageMetadata> packages)
|
||||
{
|
||||
// Simple digest: SHA256 of concatenated package names and versions
|
||||
var combined = string.Join("|", packages
|
||||
.OrderBy(p => p.Package)
|
||||
.Select(p => $"{p.Package}:{p.Version}:{p.SHA256}"));
|
||||
|
||||
using var sha256 = System.Security.Cryptography.SHA256.Create();
|
||||
var hash = sha256.ComputeHash(System.Text.Encoding.UTF8.GetBytes(combined));
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
using System.IO.Compression;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Corpus.Debian;
|
||||
|
||||
/// <summary>
|
||||
/// Fetches Debian packages from official mirrors.
|
||||
/// </summary>
|
||||
public sealed partial class DebianMirrorPackageSource : IDebianPackageSource
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger<DebianMirrorPackageSource> _logger;
|
||||
private readonly string _mirrorUrl;
|
||||
|
||||
public DebianMirrorPackageSource(
|
||||
HttpClient httpClient,
|
||||
ILogger<DebianMirrorPackageSource> logger,
|
||||
string mirrorUrl = "https://deb.debian.org/debian")
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_logger = logger;
|
||||
_mirrorUrl = mirrorUrl.TrimEnd('/');
|
||||
}
|
||||
|
||||
public async Task<IEnumerable<DebianPackageMetadata>> FetchPackageIndexAsync(
|
||||
string distro,
|
||||
string release,
|
||||
string architecture,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var packagesUrl = $"{_mirrorUrl}/dists/{release}/main/binary-{architecture}/Packages.gz";
|
||||
|
||||
_logger.LogInformation("Fetching package index: {Url}", packagesUrl);
|
||||
|
||||
using var response = await _httpClient.GetAsync(packagesUrl, ct);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
await using var compressedStream = await response.Content.ReadAsStreamAsync(ct);
|
||||
await using var decompressed = new GZipStream(compressedStream, CompressionMode.Decompress);
|
||||
using var reader = new StreamReader(decompressed);
|
||||
|
||||
var packages = new List<DebianPackageMetadata>();
|
||||
DebianPackageMetadata? current = null;
|
||||
var currentFields = new Dictionary<string, string>();
|
||||
|
||||
while (await reader.ReadLineAsync(ct) is { } line)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
{
|
||||
// End of stanza
|
||||
if (currentFields.Count > 0)
|
||||
{
|
||||
if (TryParsePackage(currentFields, out var pkg))
|
||||
{
|
||||
packages.Add(pkg);
|
||||
}
|
||||
currentFields.Clear();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.StartsWith(' ') || line.StartsWith('\t'))
|
||||
{
|
||||
// Continuation line - ignore for now
|
||||
continue;
|
||||
}
|
||||
|
||||
var colonIndex = line.IndexOf(':');
|
||||
if (colonIndex > 0)
|
||||
{
|
||||
var key = line[..colonIndex];
|
||||
var value = line[(colonIndex + 1)..].Trim();
|
||||
currentFields[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle last package
|
||||
if (currentFields.Count > 0 && TryParsePackage(currentFields, out var lastPkg))
|
||||
{
|
||||
packages.Add(lastPkg);
|
||||
}
|
||||
|
||||
_logger.LogInformation("Fetched {Count} packages for {Release}/{Arch}",
|
||||
packages.Count, release, architecture);
|
||||
|
||||
return packages;
|
||||
}
|
||||
|
||||
public async Task<Stream> DownloadPackageAsync(string poolPath, CancellationToken ct = default)
|
||||
{
|
||||
var packageUrl = $"{_mirrorUrl}/{poolPath}";
|
||||
|
||||
_logger.LogDebug("Downloading package: {Url}", packageUrl);
|
||||
|
||||
var response = await _httpClient.GetAsync(packageUrl, HttpCompletionOption.ResponseHeadersRead, ct);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var memoryStream = new MemoryStream();
|
||||
await using (var contentStream = await response.Content.ReadAsStreamAsync(ct))
|
||||
{
|
||||
await contentStream.CopyToAsync(memoryStream, ct);
|
||||
}
|
||||
|
||||
memoryStream.Position = 0;
|
||||
return memoryStream;
|
||||
}
|
||||
|
||||
private static bool TryParsePackage(Dictionary<string, string> fields, out DebianPackageMetadata pkg)
|
||||
{
|
||||
pkg = null!;
|
||||
|
||||
if (!fields.TryGetValue("Package", out var package) ||
|
||||
!fields.TryGetValue("Version", out var version) ||
|
||||
!fields.TryGetValue("Architecture", out var architecture) ||
|
||||
!fields.TryGetValue("Filename", out var filename) ||
|
||||
!fields.TryGetValue("SHA256", out var sha256))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
fields.TryGetValue("Source", out var source);
|
||||
|
||||
pkg = new DebianPackageMetadata
|
||||
{
|
||||
Package = package,
|
||||
Version = version,
|
||||
Architecture = architecture,
|
||||
Filename = filename,
|
||||
SHA256 = sha256,
|
||||
Source = source
|
||||
};
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
using System.Collections.Immutable;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using SharpCompress.Archives;
|
||||
using SharpCompress.Archives.Tar;
|
||||
using SharpCompress.Common;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
using StellaOps.BinaryIndex.Core.Services;
|
||||
using StellaOps.BinaryIndex.Corpus;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Corpus.Debian;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts binaries from Debian .deb packages.
|
||||
/// </summary>
|
||||
public sealed class DebianPackageExtractor
|
||||
{
|
||||
private readonly IBinaryFeatureExtractor _featureExtractor;
|
||||
private readonly ILogger<DebianPackageExtractor> _logger;
|
||||
|
||||
public DebianPackageExtractor(
|
||||
IBinaryFeatureExtractor featureExtractor,
|
||||
ILogger<DebianPackageExtractor> logger)
|
||||
{
|
||||
_featureExtractor = featureExtractor;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts all binaries from a .deb package.
|
||||
/// </summary>
|
||||
public async Task<ImmutableArray<ExtractedBinaryInternal>> ExtractBinariesAsync(
|
||||
Stream debStream,
|
||||
DebianPackageMetadata metadata,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var binaries = new List<ExtractedBinaryInternal>();
|
||||
|
||||
try
|
||||
{
|
||||
// .deb is an ar archive containing data.tar.* (usually data.tar.xz or data.tar.gz)
|
||||
using var archive = ArchiveFactory.Open(debStream);
|
||||
|
||||
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
|
||||
{
|
||||
if (entry.Key == null || !entry.Key.StartsWith("data.tar"))
|
||||
continue;
|
||||
|
||||
// Extract data.tar.*
|
||||
using var dataTarStream = new MemoryStream();
|
||||
entry.WriteTo(dataTarStream);
|
||||
dataTarStream.Position = 0;
|
||||
|
||||
// Now extract from data.tar
|
||||
await ExtractFromDataTarAsync(dataTarStream, metadata, binaries, ct);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to extract binaries from {Package} {Version}",
|
||||
metadata.Package, metadata.Version);
|
||||
}
|
||||
|
||||
return binaries.ToImmutableArray();
|
||||
}
|
||||
|
||||
private async Task ExtractFromDataTarAsync(
|
||||
Stream dataTarStream,
|
||||
DebianPackageMetadata metadata,
|
||||
List<ExtractedBinaryInternal> binaries,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var tarArchive = TarArchive.Open(dataTarStream);
|
||||
|
||||
foreach (var entry in tarArchive.Entries.Where(e => !e.IsDirectory))
|
||||
{
|
||||
if (entry.Key == null)
|
||||
continue;
|
||||
|
||||
// Only process binaries in typical locations
|
||||
if (!IsPotentialBinary(entry.Key))
|
||||
continue;
|
||||
|
||||
try
|
||||
{
|
||||
using var binaryStream = new MemoryStream();
|
||||
entry.WriteTo(binaryStream);
|
||||
binaryStream.Position = 0;
|
||||
|
||||
if (!_featureExtractor.CanExtract(binaryStream))
|
||||
continue;
|
||||
|
||||
var identity = await _featureExtractor.ExtractIdentityAsync(binaryStream, ct);
|
||||
|
||||
binaries.Add(new ExtractedBinaryInternal
|
||||
{
|
||||
Identity = identity,
|
||||
FilePath = entry.Key,
|
||||
PackageName = metadata.Package,
|
||||
PackageVersion = metadata.Version,
|
||||
SourcePackage = metadata.Source ?? metadata.Package
|
||||
});
|
||||
|
||||
_logger.LogDebug("Extracted binary {Path} from {Package}", entry.Key, metadata.Package);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Skipped {Path} in {Package}", entry.Key, metadata.Package);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsPotentialBinary(string path)
|
||||
{
|
||||
// Typical binary locations in Debian packages
|
||||
return path.StartsWith("./usr/bin/") ||
|
||||
path.StartsWith("./usr/sbin/") ||
|
||||
path.StartsWith("./bin/") ||
|
||||
path.StartsWith("./sbin/") ||
|
||||
path.StartsWith("./usr/lib/") ||
|
||||
path.StartsWith("./lib/") ||
|
||||
path.Contains(".so") ||
|
||||
path.EndsWith(".so");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Internal representation of extracted binary with package metadata.
|
||||
/// Used internally by DebianPackageExtractor before conversion to framework ExtractedBinary.
|
||||
/// </summary>
|
||||
public sealed record ExtractedBinaryInternal
|
||||
{
|
||||
public required BinaryIdentity Identity { get; init; }
|
||||
public required string FilePath { get; init; }
|
||||
public required string PackageName { get; init; }
|
||||
public required string PackageVersion { get; init; }
|
||||
public required string SourcePackage { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
namespace StellaOps.BinaryIndex.Corpus.Debian;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for fetching Debian packages from mirrors.
|
||||
/// </summary>
|
||||
public interface IDebianPackageSource
|
||||
{
|
||||
/// <summary>
|
||||
/// Fetches package metadata from Packages.gz index.
|
||||
/// </summary>
|
||||
Task<IEnumerable<DebianPackageMetadata>> FetchPackageIndexAsync(
|
||||
string distro,
|
||||
string release,
|
||||
string architecture,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Downloads a .deb package file.
|
||||
/// </summary>
|
||||
Task<Stream> DownloadPackageAsync(
|
||||
string poolPath,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
public sealed record DebianPackageMetadata
|
||||
{
|
||||
public required string Package { get; init; }
|
||||
public required string Version { get; init; }
|
||||
public required string Architecture { get; init; }
|
||||
public required string Filename { get; init; } // Pool path
|
||||
public required string SHA256 { get; init; }
|
||||
public string? Source { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="SharpCompress" Version="0.38.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Corpus\StellaOps.BinaryIndex.Corpus.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Persistence\StellaOps.BinaryIndex.Persistence.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,76 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Corpus;
|
||||
|
||||
/// <summary>
|
||||
/// Generic interface for binary corpus connectors.
|
||||
/// Connectors fetch packages from distro repositories and extract binaries.
|
||||
/// </summary>
|
||||
public interface IBinaryCorpusConnector
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique identifier for this connector (e.g., "debian", "rpm", "alpine").
|
||||
/// </summary>
|
||||
string ConnectorId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// List of supported distro identifiers (e.g., ["debian", "ubuntu"]).
|
||||
/// </summary>
|
||||
string[] SupportedDistros { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Fetches a corpus snapshot for the given query.
|
||||
/// </summary>
|
||||
Task<CorpusSnapshot> FetchSnapshotAsync(CorpusQuery query, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists all packages in the snapshot.
|
||||
/// </summary>
|
||||
IAsyncEnumerable<PackageInfo> ListPackagesAsync(CorpusSnapshot snapshot, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Extracts binaries from a package.
|
||||
/// </summary>
|
||||
IAsyncEnumerable<ExtractedBinary> ExtractBinariesAsync(PackageInfo pkg, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Query parameters for fetching a corpus snapshot.
|
||||
/// </summary>
|
||||
public sealed record CorpusQuery(
|
||||
string Distro,
|
||||
string Release,
|
||||
string Architecture,
|
||||
string[]? ComponentFilter = null);
|
||||
|
||||
/// <summary>
|
||||
/// Represents a snapshot of a corpus at a specific point in time.
|
||||
/// </summary>
|
||||
public sealed record CorpusSnapshot(
|
||||
Guid Id,
|
||||
string Distro,
|
||||
string Release,
|
||||
string Architecture,
|
||||
string MetadataDigest,
|
||||
DateTimeOffset CapturedAt);
|
||||
|
||||
/// <summary>
|
||||
/// Package metadata from repository index.
|
||||
/// </summary>
|
||||
public sealed record PackageInfo(
|
||||
string Name,
|
||||
string Version,
|
||||
string SourcePackage,
|
||||
string Architecture,
|
||||
string Filename,
|
||||
long Size,
|
||||
string Sha256);
|
||||
|
||||
/// <summary>
|
||||
/// Binary extracted from a package.
|
||||
/// </summary>
|
||||
public sealed record ExtractedBinary(
|
||||
BinaryIdentity Identity,
|
||||
string PathInPackage,
|
||||
PackageInfo Package);
|
||||
@@ -0,0 +1,26 @@
|
||||
namespace StellaOps.BinaryIndex.Corpus;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for persisting corpus snapshots.
|
||||
/// </summary>
|
||||
public interface ICorpusSnapshotRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new corpus snapshot record.
|
||||
/// </summary>
|
||||
Task<CorpusSnapshot> CreateAsync(CorpusSnapshot snapshot, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Finds an existing snapshot by distro/release/architecture.
|
||||
/// </summary>
|
||||
Task<CorpusSnapshot?> FindByKeyAsync(
|
||||
string distro,
|
||||
string release,
|
||||
string architecture,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a snapshot by ID.
|
||||
/// </summary>
|
||||
Task<CorpusSnapshot?> GetByIdAsync(Guid id, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,66 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for vulnerable fingerprints.
|
||||
/// </summary>
|
||||
public interface IFingerprintRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new fingerprint record.
|
||||
/// </summary>
|
||||
Task<VulnFingerprint> CreateAsync(VulnFingerprint fingerprint, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a fingerprint by ID.
|
||||
/// </summary>
|
||||
Task<VulnFingerprint?> GetByIdAsync(Guid id, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all fingerprints for a CVE.
|
||||
/// </summary>
|
||||
Task<ImmutableArray<VulnFingerprint>> GetByCveAsync(string cveId, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Searches for fingerprints by hash.
|
||||
/// </summary>
|
||||
Task<ImmutableArray<VulnFingerprint>> SearchByHashAsync(
|
||||
byte[] hash,
|
||||
FingerprintAlgorithm algorithm,
|
||||
string architecture,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates validation statistics for a fingerprint.
|
||||
/// </summary>
|
||||
Task UpdateValidationStatsAsync(
|
||||
Guid id,
|
||||
FingerprintValidationStats stats,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository for fingerprint matches.
|
||||
/// </summary>
|
||||
public interface IFingerprintMatchRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new match record.
|
||||
/// </summary>
|
||||
Task<FingerprintMatch> CreateAsync(FingerprintMatch match, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all matches for a scan.
|
||||
/// </summary>
|
||||
Task<ImmutableArray<FingerprintMatch>> GetByScanAsync(Guid scanId, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates reachability status for a match.
|
||||
/// </summary>
|
||||
Task UpdateReachabilityAsync(
|
||||
Guid id,
|
||||
ReachabilityStatus status,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,180 @@
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a fingerprint of a vulnerable function.
|
||||
/// </summary>
|
||||
public sealed record VulnFingerprint
|
||||
{
|
||||
/// <summary>Unique fingerprint identifier</summary>
|
||||
public Guid Id { get; init; }
|
||||
|
||||
/// <summary>CVE identifier</summary>
|
||||
public required string CveId { get; init; }
|
||||
|
||||
/// <summary>Component name (e.g., "openssl")</summary>
|
||||
public required string Component { get; init; }
|
||||
|
||||
/// <summary>Package URL (PURL) if applicable</summary>
|
||||
public string? Purl { get; init; }
|
||||
|
||||
/// <summary>Fingerprinting algorithm used</summary>
|
||||
public required FingerprintAlgorithm Algorithm { get; init; }
|
||||
|
||||
/// <summary>Fingerprint identifier (hex string)</summary>
|
||||
public required string FingerprintId { get; init; }
|
||||
|
||||
/// <summary>Fingerprint hash bytes</summary>
|
||||
public required byte[] FingerprintHash { get; init; }
|
||||
|
||||
/// <summary>Target architecture (e.g., "x86_64")</summary>
|
||||
public required string Architecture { get; init; }
|
||||
|
||||
/// <summary>Function name if known</summary>
|
||||
public string? FunctionName { get; init; }
|
||||
|
||||
/// <summary>Source file if known</summary>
|
||||
public string? SourceFile { get; init; }
|
||||
|
||||
/// <summary>Source line if known</summary>
|
||||
public int? SourceLine { get; init; }
|
||||
|
||||
/// <summary>Similarity threshold for matching (0.0-1.0)</summary>
|
||||
public decimal SimilarityThreshold { get; init; } = 0.95m;
|
||||
|
||||
/// <summary>Confidence score (0.0-1.0)</summary>
|
||||
public decimal? Confidence { get; init; }
|
||||
|
||||
/// <summary>Whether this fingerprint has been validated</summary>
|
||||
public bool Validated { get; init; }
|
||||
|
||||
/// <summary>Validation statistics</summary>
|
||||
public FingerprintValidationStats? ValidationStats { get; init; }
|
||||
|
||||
/// <summary>Reference to vulnerable build artifact</summary>
|
||||
public string? VulnBuildRef { get; init; }
|
||||
|
||||
/// <summary>Reference to fixed build artifact</summary>
|
||||
public string? FixedBuildRef { get; init; }
|
||||
|
||||
/// <summary>Timestamp when this fingerprint was indexed</summary>
|
||||
public DateTimeOffset IndexedAt { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fingerprinting algorithm types.
|
||||
/// </summary>
|
||||
public enum FingerprintAlgorithm
|
||||
{
|
||||
/// <summary>Basic block level fingerprinting</summary>
|
||||
BasicBlock,
|
||||
|
||||
/// <summary>Control flow graph based</summary>
|
||||
ControlFlowGraph,
|
||||
|
||||
/// <summary>String reference based</summary>
|
||||
StringRefs,
|
||||
|
||||
/// <summary>Combined algorithm</summary>
|
||||
Combined
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Validation statistics for a fingerprint.
|
||||
/// </summary>
|
||||
public sealed record FingerprintValidationStats
|
||||
{
|
||||
/// <summary>Number of true positive matches</summary>
|
||||
public int TruePositives { get; init; }
|
||||
|
||||
/// <summary>Number of false positive matches</summary>
|
||||
public int FalsePositives { get; init; }
|
||||
|
||||
/// <summary>Number of true negative non-matches</summary>
|
||||
public int TrueNegatives { get; init; }
|
||||
|
||||
/// <summary>Number of false negative non-matches</summary>
|
||||
public int FalseNegatives { get; init; }
|
||||
|
||||
/// <summary>Precision: TP / (TP + FP)</summary>
|
||||
public decimal Precision => TruePositives + FalsePositives == 0 ? 0 :
|
||||
(decimal)TruePositives / (TruePositives + FalsePositives);
|
||||
|
||||
/// <summary>Recall: TP / (TP + FN)</summary>
|
||||
public decimal Recall => TruePositives + FalseNegatives == 0 ? 0 :
|
||||
(decimal)TruePositives / (TruePositives + FalseNegatives);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a fingerprint match result.
|
||||
/// </summary>
|
||||
public sealed record FingerprintMatch
|
||||
{
|
||||
/// <summary>Match identifier</summary>
|
||||
public Guid Id { get; init; }
|
||||
|
||||
/// <summary>Scan identifier</summary>
|
||||
public Guid ScanId { get; init; }
|
||||
|
||||
/// <summary>Match type</summary>
|
||||
public required MatchType Type { get; init; }
|
||||
|
||||
/// <summary>Binary key that was matched</summary>
|
||||
public required string BinaryKey { get; init; }
|
||||
|
||||
/// <summary>Vulnerable package PURL</summary>
|
||||
public required string VulnerablePurl { get; init; }
|
||||
|
||||
/// <summary>Vulnerable version</summary>
|
||||
public required string VulnerableVersion { get; init; }
|
||||
|
||||
/// <summary>Matched fingerprint ID</summary>
|
||||
public Guid? MatchedFingerprintId { get; init; }
|
||||
|
||||
/// <summary>Matched function name</summary>
|
||||
public string? MatchedFunction { get; init; }
|
||||
|
||||
/// <summary>Similarity score (0.0-1.0)</summary>
|
||||
public decimal? Similarity { get; init; }
|
||||
|
||||
/// <summary>Associated advisory IDs (CVEs, etc.)</summary>
|
||||
public string[]? AdvisoryIds { get; init; }
|
||||
|
||||
/// <summary>Reachability status</summary>
|
||||
public ReachabilityStatus? ReachabilityStatus { get; init; }
|
||||
|
||||
/// <summary>Timestamp when match occurred</summary>
|
||||
public DateTimeOffset MatchedAt { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Match type enumeration.
|
||||
/// </summary>
|
||||
public enum MatchType
|
||||
{
|
||||
/// <summary>Match via fingerprint comparison</summary>
|
||||
Fingerprint,
|
||||
|
||||
/// <summary>Match via Build-ID</summary>
|
||||
BuildId,
|
||||
|
||||
/// <summary>Exact hash match</summary>
|
||||
HashExact
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reachability status for matched vulnerabilities.
|
||||
/// </summary>
|
||||
public enum ReachabilityStatus
|
||||
{
|
||||
/// <summary>Vulnerable function is reachable</summary>
|
||||
Reachable,
|
||||
|
||||
/// <summary>Vulnerable function is unreachable</summary>
|
||||
Unreachable,
|
||||
|
||||
/// <summary>Reachability unknown</summary>
|
||||
Unknown,
|
||||
|
||||
/// <summary>Partial reachability</summary>
|
||||
Partial
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,103 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Blob storage implementation for fingerprints.
|
||||
/// NOTE: This is a placeholder implementation showing the structure.
|
||||
/// Production implementation would use RustFS or S3-compatible storage.
|
||||
/// </summary>
|
||||
public sealed class FingerprintBlobStorage : IFingerprintBlobStorage
|
||||
{
|
||||
private readonly ILogger<FingerprintBlobStorage> _logger;
|
||||
private const string BasePath = "binaryindex/fingerprints";
|
||||
|
||||
public FingerprintBlobStorage(ILogger<FingerprintBlobStorage> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stores fingerprint data to blob storage.
|
||||
/// Layout: {BasePath}/{algorithm}/{prefix}/{fingerprint_id}.bin
|
||||
/// where prefix is first 2 chars of fingerprint_id for sharding.
|
||||
/// </summary>
|
||||
public async Task<string> StoreFingerprintAsync(
|
||||
VulnFingerprint fingerprint,
|
||||
byte[] fullData,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var prefix = fingerprint.FingerprintId.Length >= 2
|
||||
? fingerprint.FingerprintId[..2]
|
||||
: "00";
|
||||
|
||||
var algorithm = fingerprint.Algorithm.ToString().ToLowerInvariant();
|
||||
var storagePath = $"{BasePath}/{algorithm}/{prefix}/{fingerprint.FingerprintId}.bin";
|
||||
|
||||
_logger.LogDebug(
|
||||
"Storing fingerprint {FingerprintId} to {Path}",
|
||||
fingerprint.FingerprintId,
|
||||
storagePath);
|
||||
|
||||
// TODO: Actual RustFS or S3 storage implementation
|
||||
// await _rustFs.PutAsync(storagePath, fullData, ct);
|
||||
|
||||
// Placeholder: Would write to actual blob storage
|
||||
await Task.CompletedTask;
|
||||
|
||||
return storagePath;
|
||||
}
|
||||
|
||||
public async Task<byte[]?> RetrieveFingerprintAsync(
|
||||
string storagePath,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogDebug("Retrieving fingerprint from {Path}", storagePath);
|
||||
|
||||
// TODO: Actual retrieval from RustFS or S3
|
||||
// return await _rustFs.GetAsync(storagePath, ct);
|
||||
|
||||
await Task.CompletedTask;
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stores reference build artifacts.
|
||||
/// Layout: {BasePath}/refbuilds/{cve_id}/{build_type}.tar.zst
|
||||
/// </summary>
|
||||
public async Task<string> StoreReferenceBuildAsync(
|
||||
string cveId,
|
||||
string buildType,
|
||||
byte[] buildArtifact,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var storagePath = $"{BasePath}/refbuilds/{cveId}/{buildType}.tar.zst";
|
||||
|
||||
_logger.LogInformation(
|
||||
"Storing {BuildType} reference build for {CveId} to {Path}",
|
||||
buildType,
|
||||
cveId,
|
||||
storagePath);
|
||||
|
||||
// TODO: Actual RustFS or S3 storage implementation
|
||||
// await _rustFs.PutAsync(storagePath, buildArtifact, ct);
|
||||
|
||||
await Task.CompletedTask;
|
||||
|
||||
return storagePath;
|
||||
}
|
||||
|
||||
public async Task<byte[]?> RetrieveReferenceBuildAsync(
|
||||
string storagePath,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogDebug("Retrieving reference build from {Path}", storagePath);
|
||||
|
||||
// TODO: Actual retrieval from RustFS or S3
|
||||
// return await _rustFs.GetAsync(storagePath, ct);
|
||||
|
||||
await Task.CompletedTask;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Fingerprints.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for fingerprint blob storage.
|
||||
/// </summary>
|
||||
public interface IFingerprintBlobStorage
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores fingerprint data to blob storage.
|
||||
/// </summary>
|
||||
/// <param name="fingerprint">Fingerprint metadata</param>
|
||||
/// <param name="fullData">Full fingerprint data blob</param>
|
||||
/// <param name="ct">Cancellation token</param>
|
||||
/// <returns>Storage path</returns>
|
||||
Task<string> StoreFingerprintAsync(
|
||||
VulnFingerprint fingerprint,
|
||||
byte[] fullData,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves fingerprint data from blob storage.
|
||||
/// </summary>
|
||||
Task<byte[]?> RetrieveFingerprintAsync(
|
||||
string storagePath,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Stores a reference build artifact (vulnerable or fixed version).
|
||||
/// </summary>
|
||||
/// <param name="cveId">CVE identifier</param>
|
||||
/// <param name="buildType">"vulnerable" or "fixed"</param>
|
||||
/// <param name="buildArtifact">Build artifact data (tar.zst compressed)</param>
|
||||
/// <param name="ct">Cancellation token</param>
|
||||
/// <returns>Storage path</returns>
|
||||
Task<string> StoreReferenceBuildAsync(
|
||||
string cveId,
|
||||
string buildType,
|
||||
byte[] buildArtifact,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves a reference build artifact.
|
||||
/// </summary>
|
||||
Task<byte[]?> RetrieveReferenceBuildAsync(
|
||||
string storagePath,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Evidence of a CVE fix in a distro package.
|
||||
/// </summary>
|
||||
public sealed record FixEvidence
|
||||
{
|
||||
/// <summary>Distro identifier (e.g., "debian", "ubuntu", "alpine")</summary>
|
||||
public required string Distro { get; init; }
|
||||
|
||||
/// <summary>Release/codename (e.g., "bookworm", "jammy", "v3.19")</summary>
|
||||
public required string Release { get; init; }
|
||||
|
||||
/// <summary>Source package name</summary>
|
||||
public required string SourcePkg { get; init; }
|
||||
|
||||
/// <summary>CVE identifier (e.g., "CVE-2024-1234")</summary>
|
||||
public required string CveId { get; init; }
|
||||
|
||||
/// <summary>Fix state</summary>
|
||||
public required FixState State { get; init; }
|
||||
|
||||
/// <summary>Version where the fix was applied (if applicable)</summary>
|
||||
public string? FixedVersion { get; init; }
|
||||
|
||||
/// <summary>Method used to detect the fix</summary>
|
||||
public required FixMethod Method { get; init; }
|
||||
|
||||
/// <summary>Confidence score (0.0 - 1.0)</summary>
|
||||
public required decimal Confidence { get; init; }
|
||||
|
||||
/// <summary>Evidence payload for audit trail</summary>
|
||||
public required FixEvidencePayload Evidence { get; init; }
|
||||
|
||||
/// <summary>Corpus snapshot ID (if from snapshot ingestion)</summary>
|
||||
public Guid? SnapshotId { get; init; }
|
||||
|
||||
/// <summary>Timestamp when this evidence was created</summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fix state enumeration.
|
||||
/// </summary>
|
||||
public enum FixState
|
||||
{
|
||||
/// <summary>CVE is fixed in this version</summary>
|
||||
Fixed,
|
||||
|
||||
/// <summary>CVE affects this package</summary>
|
||||
Vulnerable,
|
||||
|
||||
/// <summary>CVE does not affect this package</summary>
|
||||
NotAffected,
|
||||
|
||||
/// <summary>Fix won't be applied (e.g., EOL version)</summary>
|
||||
Wontfix,
|
||||
|
||||
/// <summary>Unknown status</summary>
|
||||
Unknown
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Method used to identify the fix.
|
||||
/// </summary>
|
||||
public enum FixMethod
|
||||
{
|
||||
/// <summary>From official security feed (OVAL, DSA, etc.)</summary>
|
||||
SecurityFeed,
|
||||
|
||||
/// <summary>Parsed from Debian/Ubuntu changelog</summary>
|
||||
Changelog,
|
||||
|
||||
/// <summary>Extracted from patch header (DEP-3)</summary>
|
||||
PatchHeader,
|
||||
|
||||
/// <summary>Matched against upstream patch database</summary>
|
||||
UpstreamPatchMatch
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Base class for evidence payloads.
|
||||
/// </summary>
|
||||
public abstract record FixEvidencePayload;
|
||||
|
||||
/// <summary>
|
||||
/// Evidence from changelog parsing.
|
||||
/// </summary>
|
||||
public sealed record ChangelogEvidence : FixEvidencePayload
|
||||
{
|
||||
/// <summary>Path to changelog file</summary>
|
||||
public required string File { get; init; }
|
||||
|
||||
/// <summary>Version from changelog entry</summary>
|
||||
public required string Version { get; init; }
|
||||
|
||||
/// <summary>Excerpt from changelog mentioning CVE</summary>
|
||||
public required string Excerpt { get; init; }
|
||||
|
||||
/// <summary>Line number where CVE was mentioned</summary>
|
||||
public int? LineNumber { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Evidence from patch header parsing.
|
||||
/// </summary>
|
||||
public sealed record PatchHeaderEvidence : FixEvidencePayload
|
||||
{
|
||||
/// <summary>Path to patch file</summary>
|
||||
public required string PatchPath { get; init; }
|
||||
|
||||
/// <summary>SHA-256 digest of patch file</summary>
|
||||
public required string PatchSha256 { get; init; }
|
||||
|
||||
/// <summary>Excerpt from patch header</summary>
|
||||
public required string HeaderExcerpt { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Evidence from official security feed.
|
||||
/// </summary>
|
||||
public sealed record SecurityFeedEvidence : FixEvidencePayload
|
||||
{
|
||||
/// <summary>Feed identifier (e.g., "alpine-secfixes", "debian-oval")</summary>
|
||||
public required string FeedId { get; init; }
|
||||
|
||||
/// <summary>Entry identifier within the feed</summary>
|
||||
public required string EntryId { get; init; }
|
||||
|
||||
/// <summary>Published timestamp from feed</summary>
|
||||
public required DateTimeOffset PublishedAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Parses Alpine APKBUILD secfixes section for CVE fix evidence.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// APKBUILD secfixes format:
|
||||
/// # secfixes:
|
||||
/// # 1.2.3-r0:
|
||||
/// # - CVE-2024-1234
|
||||
/// # - CVE-2024-1235
|
||||
/// </remarks>
|
||||
public sealed partial class AlpineSecfixesParser : ISecfixesParser
|
||||
{
|
||||
[GeneratedRegex(@"^#\s*secfixes:\s*$", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex SecfixesPatternRegex();
|
||||
|
||||
[GeneratedRegex(@"^#\s+(\d+\.\d+[^:]*):$", RegexOptions.Compiled)]
|
||||
private static partial Regex VersionPatternRegex();
|
||||
|
||||
[GeneratedRegex(@"^#\s+-\s+(CVE-\d{4}-\d{4,7})$", RegexOptions.Compiled)]
|
||||
private static partial Regex CvePatternRegex();
|
||||
|
||||
/// <summary>
|
||||
/// Parses APKBUILD secfixes section for version-to-CVE mappings.
|
||||
/// </summary>
|
||||
public IEnumerable<FixEvidence> Parse(
|
||||
string apkbuild,
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(apkbuild))
|
||||
yield break;
|
||||
|
||||
var lines = apkbuild.Split('\n');
|
||||
var inSecfixes = false;
|
||||
string? currentVersion = null;
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
if (SecfixesPatternRegex().IsMatch(line))
|
||||
{
|
||||
inSecfixes = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inSecfixes)
|
||||
continue;
|
||||
|
||||
// Exit secfixes block on non-comment line
|
||||
if (!line.TrimStart().StartsWith('#'))
|
||||
{
|
||||
inSecfixes = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
var versionMatch = VersionPatternRegex().Match(line);
|
||||
if (versionMatch.Success)
|
||||
{
|
||||
currentVersion = versionMatch.Groups[1].Value;
|
||||
continue;
|
||||
}
|
||||
|
||||
var cveMatch = CvePatternRegex().Match(line);
|
||||
if (cveMatch.Success && currentVersion != null)
|
||||
{
|
||||
yield return new FixEvidence
|
||||
{
|
||||
Distro = distro,
|
||||
Release = release,
|
||||
SourcePkg = sourcePkg,
|
||||
CveId = cveMatch.Groups[1].Value,
|
||||
State = FixState.Fixed,
|
||||
FixedVersion = currentVersion,
|
||||
Method = FixMethod.SecurityFeed, // APKBUILD is authoritative
|
||||
Confidence = 0.95m,
|
||||
Evidence = new SecurityFeedEvidence
|
||||
{
|
||||
FeedId = "alpine-secfixes",
|
||||
EntryId = $"{sourcePkg}/{currentVersion}",
|
||||
PublishedAt = DateTimeOffset.UtcNow
|
||||
},
|
||||
CreatedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Parses Debian/Ubuntu changelog files for CVE mentions.
|
||||
/// </summary>
|
||||
public sealed partial class DebianChangelogParser : IChangelogParser
|
||||
{
|
||||
[GeneratedRegex(@"\bCVE-\d{4}-\d{4,7}\b", RegexOptions.Compiled)]
|
||||
private static partial Regex CvePatternRegex();
|
||||
|
||||
[GeneratedRegex(@"^(\S+)\s+\(([^)]+)\)\s+", RegexOptions.Compiled)]
|
||||
private static partial Regex EntryHeaderPatternRegex();
|
||||
|
||||
[GeneratedRegex(@"^\s+--\s+", RegexOptions.Compiled)]
|
||||
private static partial Regex TrailerPatternRegex();
|
||||
|
||||
/// <summary>
|
||||
/// Parses the top entry of a Debian changelog for CVE mentions.
|
||||
/// </summary>
|
||||
public IEnumerable<FixEvidence> ParseTopEntry(
|
||||
string changelog,
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(changelog))
|
||||
yield break;
|
||||
|
||||
var lines = changelog.Split('\n');
|
||||
if (lines.Length == 0)
|
||||
yield break;
|
||||
|
||||
// Parse first entry header: "package (version) distribution; urgency"
|
||||
var headerMatch = EntryHeaderPatternRegex().Match(lines[0]);
|
||||
if (!headerMatch.Success)
|
||||
yield break;
|
||||
|
||||
var version = headerMatch.Groups[2].Value;
|
||||
|
||||
// Collect entry lines until trailer (" -- Maintainer <email> Date")
|
||||
var entryLines = new List<string> { lines[0] };
|
||||
foreach (var line in lines.Skip(1))
|
||||
{
|
||||
entryLines.Add(line);
|
||||
if (TrailerPatternRegex().IsMatch(line))
|
||||
break;
|
||||
}
|
||||
|
||||
var entryText = string.Join('\n', entryLines);
|
||||
var cves = CvePatternRegex().Matches(entryText)
|
||||
.Select(m => m.Value)
|
||||
.Distinct()
|
||||
.ToList();
|
||||
|
||||
foreach (var cve in cves)
|
||||
{
|
||||
yield return new FixEvidence
|
||||
{
|
||||
Distro = distro,
|
||||
Release = release,
|
||||
SourcePkg = sourcePkg,
|
||||
CveId = cve,
|
||||
State = FixState.Fixed,
|
||||
FixedVersion = version,
|
||||
Method = FixMethod.Changelog,
|
||||
Confidence = 0.80m,
|
||||
Evidence = new ChangelogEvidence
|
||||
{
|
||||
File = "debian/changelog",
|
||||
Version = version,
|
||||
Excerpt = entryText.Length > 2000 ? entryText[..2000] : entryText,
|
||||
LineNumber = null // Could be enhanced to track line number
|
||||
},
|
||||
CreatedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for parsing changelogs for CVE fix evidence.
|
||||
/// </summary>
|
||||
public interface IChangelogParser
|
||||
{
|
||||
/// <summary>
|
||||
/// Parses the top entry of a changelog for CVE mentions.
|
||||
/// </summary>
|
||||
IEnumerable<FixEvidence> ParseTopEntry(
|
||||
string changelog,
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for parsing patch files for CVE fix evidence.
|
||||
/// </summary>
|
||||
public interface IPatchParser
|
||||
{
|
||||
/// <summary>
|
||||
/// Parses patches for CVE mentions in headers.
|
||||
/// </summary>
|
||||
IEnumerable<FixEvidence> ParsePatches(
|
||||
IEnumerable<(string path, string content, string sha256)> patches,
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg,
|
||||
string version);
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for parsing Alpine APKBUILD secfixes for CVE mappings.
|
||||
/// </summary>
|
||||
public interface ISecfixesParser
|
||||
{
|
||||
/// <summary>
|
||||
/// Parses APKBUILD secfixes section for version-to-CVE mappings.
|
||||
/// </summary>
|
||||
IEnumerable<FixEvidence> Parse(
|
||||
string apkbuild,
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg);
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.BinaryIndex.FixIndex.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Parses patch headers (DEP-3 format) for CVE mentions.
|
||||
/// </summary>
|
||||
public sealed partial class PatchHeaderParser : IPatchParser
|
||||
{
|
||||
[GeneratedRegex(@"\bCVE-\d{4}-\d{4,7}\b", RegexOptions.Compiled)]
|
||||
private static partial Regex CvePatternRegex();
|
||||
|
||||
/// <summary>
|
||||
/// Parses patches for CVE mentions in headers.
|
||||
/// </summary>
|
||||
public IEnumerable<FixEvidence> ParsePatches(
|
||||
IEnumerable<(string path, string content, string sha256)> patches,
|
||||
string distro,
|
||||
string release,
|
||||
string sourcePkg,
|
||||
string version)
|
||||
{
|
||||
foreach (var (path, content, sha256) in patches)
|
||||
{
|
||||
// Read first 80 lines as header (typical patch header size)
|
||||
var headerLines = content.Split('\n').Take(80);
|
||||
var header = string.Join('\n', headerLines);
|
||||
|
||||
// Also check filename for CVE (e.g., "CVE-2024-1234.patch")
|
||||
var searchText = header + "\n" + Path.GetFileName(path);
|
||||
var cves = CvePatternRegex().Matches(searchText)
|
||||
.Select(m => m.Value)
|
||||
.Distinct()
|
||||
.ToList();
|
||||
|
||||
foreach (var cve in cves)
|
||||
{
|
||||
yield return new FixEvidence
|
||||
{
|
||||
Distro = distro,
|
||||
Release = release,
|
||||
SourcePkg = sourcePkg,
|
||||
CveId = cve,
|
||||
State = FixState.Fixed,
|
||||
FixedVersion = version,
|
||||
Method = FixMethod.PatchHeader,
|
||||
Confidence = 0.87m,
|
||||
Evidence = new PatchHeaderEvidence
|
||||
{
|
||||
PatchPath = path,
|
||||
PatchSha256 = sha256,
|
||||
HeaderExcerpt = header.Length > 1200 ? header[..1200] : header
|
||||
},
|
||||
CreatedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,36 @@
|
||||
using Npgsql;
|
||||
using StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Persistence;
|
||||
|
||||
/// <summary>
|
||||
/// Database context for BinaryIndex with tenant isolation.
|
||||
/// </summary>
|
||||
public sealed class BinaryIndexDbContext
|
||||
{
|
||||
private readonly NpgsqlDataSource _dataSource;
|
||||
private readonly ITenantContext _tenantContext;
|
||||
|
||||
public BinaryIndexDbContext(
|
||||
NpgsqlDataSource dataSource,
|
||||
ITenantContext tenantContext)
|
||||
{
|
||||
_dataSource = dataSource;
|
||||
_tenantContext = tenantContext;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Opens a connection with the tenant context set for RLS.
|
||||
/// </summary>
|
||||
public async Task<NpgsqlConnection> OpenConnectionAsync(CancellationToken ct = default)
|
||||
{
|
||||
var connection = await _dataSource.OpenConnectionAsync(ct);
|
||||
|
||||
// Set tenant context for RLS
|
||||
await using var cmd = connection.CreateCommand();
|
||||
cmd.CommandText = $"SET app.tenant_id = '{_tenantContext.TenantId}'";
|
||||
await cmd.ExecuteNonQueryAsync(ct);
|
||||
|
||||
return connection;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Persistence;
|
||||
|
||||
/// <summary>
|
||||
/// Runs embedded SQL migrations for the binaries schema.
|
||||
/// </summary>
|
||||
public sealed class BinaryIndexMigrationRunner
|
||||
{
|
||||
private readonly NpgsqlDataSource _dataSource;
|
||||
private readonly ILogger<BinaryIndexMigrationRunner> _logger;
|
||||
|
||||
public BinaryIndexMigrationRunner(
|
||||
NpgsqlDataSource dataSource,
|
||||
ILogger<BinaryIndexMigrationRunner> logger)
|
||||
{
|
||||
_dataSource = dataSource;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Applies all embedded migrations to the database.
|
||||
/// </summary>
|
||||
public async Task MigrateAsync(CancellationToken ct = default)
|
||||
{
|
||||
const string lockKey = "binaries_schema_migration";
|
||||
var lockHash = unchecked((int)lockKey.GetHashCode());
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(ct);
|
||||
|
||||
// Acquire advisory lock to prevent concurrent migrations
|
||||
await using var lockCmd = connection.CreateCommand();
|
||||
lockCmd.CommandText = $"SELECT pg_try_advisory_lock({lockHash})";
|
||||
var acquired = (bool)(await lockCmd.ExecuteScalarAsync(ct))!;
|
||||
|
||||
if (!acquired)
|
||||
{
|
||||
_logger.LogInformation("Migration already in progress, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var migrations = GetEmbeddedMigrations();
|
||||
foreach (var (name, sql) in migrations.OrderBy(m => m.name))
|
||||
{
|
||||
_logger.LogInformation("Applying migration: {Name}", name);
|
||||
await using var cmd = connection.CreateCommand();
|
||||
cmd.CommandText = sql;
|
||||
await cmd.ExecuteNonQueryAsync(ct);
|
||||
_logger.LogInformation("Migration {Name} applied successfully", name);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Release advisory lock
|
||||
await using var unlockCmd = connection.CreateCommand();
|
||||
unlockCmd.CommandText = $"SELECT pg_advisory_unlock({lockHash})";
|
||||
await unlockCmd.ExecuteScalarAsync(ct);
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<(string name, string sql)> GetEmbeddedMigrations()
|
||||
{
|
||||
var assembly = typeof(BinaryIndexMigrationRunner).Assembly;
|
||||
var prefix = "StellaOps.BinaryIndex.Persistence.Migrations.";
|
||||
|
||||
foreach (var resourceName in assembly.GetManifestResourceNames()
|
||||
.Where(n => n.StartsWith(prefix) && n.EndsWith(".sql")))
|
||||
{
|
||||
using var stream = assembly.GetManifestResourceStream(resourceName)!;
|
||||
using var reader = new StreamReader(stream);
|
||||
var sql = reader.ReadToEnd();
|
||||
var name = resourceName[prefix.Length..];
|
||||
yield return (name, sql);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
-- 001_create_binaries_schema.sql
|
||||
-- Creates the binaries schema for BinaryIndex module
|
||||
-- Author: BinaryIndex Team
|
||||
-- Date: 2025-12-22
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ============================================================================
|
||||
-- SCHEMA CREATION
|
||||
-- ============================================================================
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS binaries;
|
||||
CREATE SCHEMA IF NOT EXISTS binaries_app;
|
||||
|
||||
-- RLS helper function
|
||||
CREATE OR REPLACE FUNCTION binaries_app.require_current_tenant()
|
||||
RETURNS TEXT
|
||||
LANGUAGE plpgsql STABLE SECURITY DEFINER
|
||||
AS $$
|
||||
DECLARE
|
||||
v_tenant TEXT;
|
||||
BEGIN
|
||||
v_tenant := current_setting('app.tenant_id', true);
|
||||
IF v_tenant IS NULL OR v_tenant = '' THEN
|
||||
RAISE EXCEPTION 'app.tenant_id session variable not set';
|
||||
END IF;
|
||||
RETURN v_tenant;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- ============================================================================
|
||||
-- CORE TABLES
|
||||
-- ============================================================================
|
||||
|
||||
-- binary_identity table
|
||||
CREATE TABLE IF NOT EXISTS binaries.binary_identity (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
binary_key TEXT NOT NULL,
|
||||
build_id TEXT,
|
||||
build_id_type TEXT CHECK (build_id_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),
|
||||
file_sha256 TEXT NOT NULL,
|
||||
text_sha256 TEXT,
|
||||
blake3_hash TEXT,
|
||||
format TEXT NOT NULL CHECK (format IN ('elf', 'pe', 'macho')),
|
||||
architecture TEXT NOT NULL,
|
||||
osabi TEXT,
|
||||
binary_type TEXT CHECK (binary_type IN ('executable', 'shared_library', 'static_library', 'object')),
|
||||
is_stripped BOOLEAN DEFAULT FALSE,
|
||||
first_seen_snapshot_id UUID,
|
||||
last_seen_snapshot_id UUID,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT binary_identity_key_unique UNIQUE (tenant_id, binary_key)
|
||||
);
|
||||
|
||||
-- corpus_snapshots table
|
||||
CREATE TABLE IF NOT EXISTS binaries.corpus_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
distro TEXT NOT NULL,
|
||||
release TEXT NOT NULL,
|
||||
architecture TEXT NOT NULL,
|
||||
snapshot_id TEXT NOT NULL,
|
||||
packages_processed INT NOT NULL DEFAULT 0,
|
||||
binaries_indexed INT NOT NULL DEFAULT 0,
|
||||
repo_metadata_digest TEXT,
|
||||
signing_key_id TEXT,
|
||||
dsse_envelope_ref TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
|
||||
error TEXT,
|
||||
started_at TIMESTAMPTZ,
|
||||
completed_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT corpus_snapshots_unique UNIQUE (tenant_id, distro, release, architecture, snapshot_id)
|
||||
);
|
||||
|
||||
-- binary_package_map table
|
||||
CREATE TABLE IF NOT EXISTS binaries.binary_package_map (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
binary_identity_id UUID NOT NULL REFERENCES binaries.binary_identity(id) ON DELETE CASCADE,
|
||||
binary_key TEXT NOT NULL,
|
||||
distro TEXT NOT NULL,
|
||||
release TEXT NOT NULL,
|
||||
source_pkg TEXT NOT NULL,
|
||||
binary_pkg TEXT NOT NULL,
|
||||
pkg_version TEXT NOT NULL,
|
||||
pkg_purl TEXT,
|
||||
architecture TEXT NOT NULL,
|
||||
file_path_in_pkg TEXT NOT NULL,
|
||||
snapshot_id UUID NOT NULL REFERENCES binaries.corpus_snapshots(id),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT binary_package_map_unique UNIQUE (binary_identity_id, snapshot_id, file_path_in_pkg)
|
||||
);
|
||||
|
||||
-- vulnerable_buildids table
|
||||
CREATE TABLE IF NOT EXISTS binaries.vulnerable_buildids (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
buildid_type TEXT NOT NULL CHECK (buildid_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),
|
||||
buildid_value TEXT NOT NULL,
|
||||
purl TEXT NOT NULL,
|
||||
pkg_version TEXT NOT NULL,
|
||||
distro TEXT,
|
||||
release TEXT,
|
||||
confidence TEXT NOT NULL DEFAULT 'exact' CHECK (confidence IN ('exact', 'inferred', 'heuristic')),
|
||||
provenance JSONB DEFAULT '{}',
|
||||
snapshot_id UUID REFERENCES binaries.corpus_snapshots(id),
|
||||
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT vulnerable_buildids_unique UNIQUE (tenant_id, buildid_value, buildid_type, purl, pkg_version)
|
||||
);
|
||||
|
||||
-- binary_vuln_assertion table
|
||||
CREATE TABLE IF NOT EXISTS binaries.binary_vuln_assertion (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
binary_key TEXT NOT NULL,
|
||||
binary_identity_id UUID REFERENCES binaries.binary_identity(id),
|
||||
cve_id TEXT NOT NULL,
|
||||
advisory_id UUID,
|
||||
status TEXT NOT NULL CHECK (status IN ('affected', 'not_affected', 'fixed', 'unknown')),
|
||||
method TEXT NOT NULL CHECK (method IN ('range_match', 'buildid_catalog', 'fingerprint_match', 'fix_index')),
|
||||
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
|
||||
evidence_ref TEXT,
|
||||
evidence_digest TEXT,
|
||||
evaluated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT binary_vuln_assertion_unique UNIQUE (tenant_id, binary_key, cve_id)
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- INDEXES
|
||||
-- ============================================================================
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_identity_tenant ON binaries.binary_identity(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_identity_buildid ON binaries.binary_identity(build_id) WHERE build_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_identity_sha256 ON binaries.binary_identity(file_sha256);
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_identity_key ON binaries.binary_identity(binary_key);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_package_map_tenant ON binaries.binary_package_map(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_package_map_binary ON binaries.binary_package_map(binary_identity_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_package_map_distro ON binaries.binary_package_map(distro, release, source_pkg);
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_package_map_snapshot ON binaries.binary_package_map(snapshot_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_tenant ON binaries.corpus_snapshots(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_distro ON binaries.corpus_snapshots(distro, release, architecture);
|
||||
CREATE INDEX IF NOT EXISTS idx_corpus_snapshots_status ON binaries.corpus_snapshots(status) WHERE status IN ('pending', 'processing');
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_tenant ON binaries.vulnerable_buildids(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_value ON binaries.vulnerable_buildids(buildid_type, buildid_value);
|
||||
CREATE INDEX IF NOT EXISTS idx_vulnerable_buildids_purl ON binaries.vulnerable_buildids(purl);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_tenant ON binaries.binary_vuln_assertion(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_binary ON binaries.binary_vuln_assertion(binary_key);
|
||||
CREATE INDEX IF NOT EXISTS idx_binary_vuln_assertion_cve ON binaries.binary_vuln_assertion(cve_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- ROW-LEVEL SECURITY
|
||||
-- ============================================================================
|
||||
|
||||
ALTER TABLE binaries.binary_identity ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.binary_identity FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY binary_identity_tenant_isolation ON binaries.binary_identity
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.corpus_snapshots ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.corpus_snapshots FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY corpus_snapshots_tenant_isolation ON binaries.corpus_snapshots
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.binary_package_map ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.binary_package_map FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY binary_package_map_tenant_isolation ON binaries.binary_package_map
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.vulnerable_buildids ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.vulnerable_buildids FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY vulnerable_buildids_tenant_isolation ON binaries.vulnerable_buildids
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.binary_vuln_assertion ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.binary_vuln_assertion FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY binary_vuln_assertion_tenant_isolation ON binaries.binary_vuln_assertion
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,158 @@
|
||||
-- 002_create_fingerprint_tables.sql
|
||||
-- Adds fingerprint-related tables for MVP 3
|
||||
|
||||
-- Advisory lock to prevent concurrent migrations
|
||||
SELECT pg_advisory_lock(hashtext('binaries_schema_002_fingerprints'));
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Fix index tables (from MVP 2)
|
||||
CREATE TABLE IF NOT EXISTS binaries.cve_fix_evidence (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
distro TEXT NOT NULL,
|
||||
release TEXT NOT NULL,
|
||||
source_pkg TEXT NOT NULL,
|
||||
cve_id TEXT NOT NULL,
|
||||
state TEXT NOT NULL CHECK (state IN ('fixed', 'vulnerable', 'not_affected', 'wontfix', 'unknown')),
|
||||
fixed_version TEXT,
|
||||
method TEXT NOT NULL CHECK (method IN ('security_feed', 'changelog', 'patch_header', 'upstream_patch_match')),
|
||||
confidence NUMERIC(3,2) NOT NULL CHECK (confidence >= 0 AND confidence <= 1),
|
||||
evidence JSONB NOT NULL,
|
||||
snapshot_id UUID REFERENCES binaries.corpus_snapshots(id),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS binaries.cve_fix_index (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
distro TEXT NOT NULL,
|
||||
release TEXT NOT NULL,
|
||||
source_pkg TEXT NOT NULL,
|
||||
cve_id TEXT NOT NULL,
|
||||
architecture TEXT,
|
||||
state TEXT NOT NULL CHECK (state IN ('fixed', 'vulnerable', 'not_affected', 'wontfix', 'unknown')),
|
||||
fixed_version TEXT,
|
||||
primary_method TEXT NOT NULL,
|
||||
confidence NUMERIC(3,2) NOT NULL,
|
||||
evidence_ids UUID[],
|
||||
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT cve_fix_index_unique UNIQUE (tenant_id, distro, release, source_pkg, cve_id, architecture)
|
||||
);
|
||||
|
||||
-- Fingerprint tables
|
||||
CREATE TABLE IF NOT EXISTS binaries.vulnerable_fingerprints (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
cve_id TEXT NOT NULL,
|
||||
component TEXT NOT NULL,
|
||||
purl TEXT,
|
||||
algorithm TEXT NOT NULL CHECK (algorithm IN ('basic_block', 'control_flow_graph', 'string_refs', 'combined')),
|
||||
fingerprint_id TEXT NOT NULL,
|
||||
fingerprint_hash BYTEA NOT NULL,
|
||||
architecture TEXT NOT NULL,
|
||||
function_name TEXT,
|
||||
source_file TEXT,
|
||||
source_line INT,
|
||||
similarity_threshold NUMERIC(3,2) DEFAULT 0.95,
|
||||
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
|
||||
validated BOOLEAN DEFAULT FALSE,
|
||||
validation_stats JSONB DEFAULT '{}',
|
||||
vuln_build_ref TEXT,
|
||||
fixed_build_ref TEXT,
|
||||
notes TEXT,
|
||||
evidence_ref TEXT,
|
||||
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT vulnerable_fingerprints_unique UNIQUE (tenant_id, cve_id, algorithm, fingerprint_id, architecture)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS binaries.fingerprint_corpus_metadata (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
purl TEXT NOT NULL,
|
||||
version TEXT NOT NULL,
|
||||
algorithm TEXT NOT NULL,
|
||||
binary_digest TEXT,
|
||||
function_count INT NOT NULL DEFAULT 0,
|
||||
fingerprints_indexed INT NOT NULL DEFAULT 0,
|
||||
indexed_by TEXT,
|
||||
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT fingerprint_corpus_metadata_unique UNIQUE (tenant_id, purl, version, algorithm)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS binaries.fingerprint_matches (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
scan_id UUID NOT NULL,
|
||||
match_type TEXT NOT NULL CHECK (match_type IN ('fingerprint', 'buildid', 'hash_exact')),
|
||||
binary_key TEXT NOT NULL,
|
||||
binary_identity_id UUID REFERENCES binaries.binary_identity(id),
|
||||
vulnerable_purl TEXT NOT NULL,
|
||||
vulnerable_version TEXT NOT NULL,
|
||||
matched_fingerprint_id UUID REFERENCES binaries.vulnerable_fingerprints(id),
|
||||
matched_function TEXT,
|
||||
similarity NUMERIC(3,2),
|
||||
advisory_ids TEXT[],
|
||||
reachability_status TEXT CHECK (reachability_status IN ('reachable', 'unreachable', 'unknown', 'partial')),
|
||||
evidence JSONB DEFAULT '{}',
|
||||
matched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_cve_fix_evidence_tenant ON binaries.cve_fix_evidence(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cve_fix_evidence_key ON binaries.cve_fix_evidence(distro, release, source_pkg, cve_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cve_fix_index_tenant ON binaries.cve_fix_index(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cve_fix_index_lookup ON binaries.cve_fix_index(distro, release, source_pkg, cve_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_tenant ON binaries.vulnerable_fingerprints(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_cve ON binaries.vulnerable_fingerprints(cve_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_component ON binaries.vulnerable_fingerprints(component, architecture);
|
||||
CREATE INDEX IF NOT EXISTS idx_vulnerable_fingerprints_hash ON binaries.vulnerable_fingerprints USING hash (fingerprint_hash);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_corpus_tenant ON binaries.fingerprint_corpus_metadata(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_corpus_purl ON binaries.fingerprint_corpus_metadata(purl, version);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_matches_tenant ON binaries.fingerprint_matches(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_fingerprint_matches_scan ON binaries.fingerprint_matches(scan_id);
|
||||
|
||||
-- RLS
|
||||
ALTER TABLE binaries.cve_fix_evidence ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.cve_fix_evidence FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY cve_fix_evidence_tenant_isolation ON binaries.cve_fix_evidence
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.cve_fix_index ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.cve_fix_index FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY cve_fix_index_tenant_isolation ON binaries.cve_fix_index
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.vulnerable_fingerprints ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.vulnerable_fingerprints FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY vulnerable_fingerprints_tenant_isolation ON binaries.vulnerable_fingerprints
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.fingerprint_corpus_metadata ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.fingerprint_corpus_metadata FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY fingerprint_corpus_metadata_tenant_isolation ON binaries.fingerprint_corpus_metadata
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
ALTER TABLE binaries.fingerprint_matches ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE binaries.fingerprint_matches FORCE ROW LEVEL SECURITY;
|
||||
CREATE POLICY fingerprint_matches_tenant_isolation ON binaries.fingerprint_matches
|
||||
FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Release advisory lock
|
||||
SELECT pg_advisory_unlock(hashtext('binaries_schema_002_fingerprints'));
|
||||
@@ -0,0 +1,153 @@
|
||||
using System.Collections.Immutable;
|
||||
using Dapper;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Persistence.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository implementation for binary identity operations.
|
||||
/// </summary>
|
||||
public sealed class BinaryIdentityRepository : IBinaryIdentityRepository
|
||||
{
|
||||
private readonly BinaryIndexDbContext _dbContext;
|
||||
|
||||
public BinaryIdentityRepository(BinaryIndexDbContext dbContext)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
}
|
||||
|
||||
public async Task<BinaryIdentity?> GetByBuildIdAsync(string buildId, string buildIdType, CancellationToken ct)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
|
||||
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
|
||||
last_seen_snapshot_id, created_at, updated_at
|
||||
FROM binaries.binary_identity
|
||||
WHERE build_id = @BuildId AND build_id_type = @BuildIdType
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
var row = await conn.QuerySingleOrDefaultAsync<BinaryIdentityRow>(sql, new { BuildId = buildId, BuildIdType = buildIdType });
|
||||
return row?.ToModel();
|
||||
}
|
||||
|
||||
public async Task<BinaryIdentity?> GetByKeyAsync(string binaryKey, CancellationToken ct)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
|
||||
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
|
||||
last_seen_snapshot_id, created_at, updated_at
|
||||
FROM binaries.binary_identity
|
||||
WHERE binary_key = @BinaryKey
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
var row = await conn.QuerySingleOrDefaultAsync<BinaryIdentityRow>(sql, new { BinaryKey = binaryKey });
|
||||
return row?.ToModel();
|
||||
}
|
||||
|
||||
public async Task<BinaryIdentity> UpsertAsync(BinaryIdentity identity, CancellationToken ct)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
INSERT INTO binaries.binary_identity (
|
||||
tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
|
||||
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
|
||||
last_seen_snapshot_id, created_at, updated_at
|
||||
) VALUES (
|
||||
current_setting('app.tenant_id')::uuid, @BinaryKey, @BuildId, @BuildIdType, @FileSha256,
|
||||
@TextSha256, @Blake3Hash, @Format, @Architecture, @OsAbi, @BinaryType, @IsStripped,
|
||||
@FirstSeenSnapshotId, @LastSeenSnapshotId, @CreatedAt, @UpdatedAt
|
||||
)
|
||||
ON CONFLICT (tenant_id, binary_key) DO UPDATE SET
|
||||
updated_at = EXCLUDED.updated_at,
|
||||
last_seen_snapshot_id = EXCLUDED.last_seen_snapshot_id
|
||||
RETURNING id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
|
||||
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
|
||||
last_seen_snapshot_id, created_at, updated_at
|
||||
""";
|
||||
|
||||
var row = await conn.QuerySingleAsync<BinaryIdentityRow>(sql, new
|
||||
{
|
||||
identity.BinaryKey,
|
||||
identity.BuildId,
|
||||
identity.BuildIdType,
|
||||
identity.FileSha256,
|
||||
identity.TextSha256,
|
||||
identity.Blake3Hash,
|
||||
Format = identity.Format.ToString().ToLowerInvariant(),
|
||||
identity.Architecture,
|
||||
identity.OsAbi,
|
||||
BinaryType = identity.Type?.ToString().ToLowerInvariant(),
|
||||
identity.IsStripped,
|
||||
identity.FirstSeenSnapshotId,
|
||||
identity.LastSeenSnapshotId,
|
||||
identity.CreatedAt,
|
||||
identity.UpdatedAt
|
||||
});
|
||||
|
||||
return row.ToModel();
|
||||
}
|
||||
|
||||
public async Task<ImmutableArray<BinaryIdentity>> GetBatchAsync(IEnumerable<string> binaryKeys, CancellationToken ct)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, binary_key, build_id, build_id_type, file_sha256, text_sha256, blake3_hash,
|
||||
format, architecture, osabi, binary_type, is_stripped, first_seen_snapshot_id,
|
||||
last_seen_snapshot_id, created_at, updated_at
|
||||
FROM binaries.binary_identity
|
||||
WHERE binary_key = ANY(@BinaryKeys)
|
||||
""";
|
||||
|
||||
var rows = await conn.QueryAsync<BinaryIdentityRow>(sql, new { BinaryKeys = binaryKeys.ToArray() });
|
||||
return rows.Select(r => r.ToModel()).ToImmutableArray();
|
||||
}
|
||||
|
||||
private sealed record BinaryIdentityRow
|
||||
{
|
||||
public Guid Id { get; init; }
|
||||
public Guid TenantId { get; init; }
|
||||
public string BinaryKey { get; init; } = string.Empty;
|
||||
public string? BuildId { get; init; }
|
||||
public string? BuildIdType { get; init; }
|
||||
public string FileSha256 { get; init; } = string.Empty;
|
||||
public string? TextSha256 { get; init; }
|
||||
public string? Blake3Hash { get; init; }
|
||||
public string Format { get; init; } = string.Empty;
|
||||
public string Architecture { get; init; } = string.Empty;
|
||||
public string? OsAbi { get; init; }
|
||||
public string? BinaryType { get; init; }
|
||||
public bool IsStripped { get; init; }
|
||||
public Guid? FirstSeenSnapshotId { get; init; }
|
||||
public Guid? LastSeenSnapshotId { get; init; }
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
|
||||
public BinaryIdentity ToModel() => new()
|
||||
{
|
||||
Id = Id,
|
||||
BinaryKey = BinaryKey,
|
||||
BuildId = BuildId,
|
||||
BuildIdType = BuildIdType,
|
||||
FileSha256 = FileSha256,
|
||||
TextSha256 = TextSha256,
|
||||
Blake3Hash = Blake3Hash,
|
||||
Format = Enum.Parse<BinaryFormat>(Format, ignoreCase: true),
|
||||
Architecture = Architecture,
|
||||
OsAbi = OsAbi,
|
||||
Type = BinaryType != null ? Enum.Parse<BinaryType>(BinaryType, ignoreCase: true) : null,
|
||||
IsStripped = IsStripped,
|
||||
FirstSeenSnapshotId = FirstSeenSnapshotId,
|
||||
LastSeenSnapshotId = LastSeenSnapshotId,
|
||||
CreatedAt = CreatedAt,
|
||||
UpdatedAt = UpdatedAt
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
using System.Collections.Immutable;
|
||||
using Dapper;
|
||||
using StellaOps.BinaryIndex.Core.Services;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Persistence.Repositories;
|
||||
|
||||
public sealed class BinaryVulnAssertionRepository : IBinaryVulnAssertionRepository
|
||||
{
|
||||
private readonly BinaryIndexDbContext _dbContext;
|
||||
|
||||
public BinaryVulnAssertionRepository(BinaryIndexDbContext dbContext)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
}
|
||||
|
||||
public async Task<ImmutableArray<BinaryVulnAssertion>> GetByBinaryKeyAsync(string binaryKey, CancellationToken ct)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, binary_key, cve_id, status, method, confidence
|
||||
FROM binaries.binary_vuln_assertion
|
||||
WHERE binary_key = @BinaryKey
|
||||
""";
|
||||
|
||||
var rows = await conn.QueryAsync<BinaryVulnAssertion>(sql, new { BinaryKey = binaryKey });
|
||||
return rows.ToImmutableArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
using Dapper;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Corpus;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Persistence.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for corpus snapshots.
|
||||
/// </summary>
|
||||
public sealed class CorpusSnapshotRepository : ICorpusSnapshotRepository
|
||||
{
|
||||
private readonly BinaryIndexDbContext _dbContext;
|
||||
private readonly ILogger<CorpusSnapshotRepository> _logger;
|
||||
|
||||
public CorpusSnapshotRepository(
|
||||
BinaryIndexDbContext dbContext,
|
||||
ILogger<CorpusSnapshotRepository> logger)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<CorpusSnapshot> CreateAsync(CorpusSnapshot snapshot, CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
INSERT INTO binaries.corpus_snapshots (
|
||||
id,
|
||||
tenant_id,
|
||||
distro,
|
||||
release,
|
||||
architecture,
|
||||
metadata_digest,
|
||||
captured_at,
|
||||
created_at
|
||||
)
|
||||
VALUES (
|
||||
@Id,
|
||||
binaries_app.current_tenant()::uuid,
|
||||
@Distro,
|
||||
@Release,
|
||||
@Architecture,
|
||||
@MetadataDigest,
|
||||
@CapturedAt,
|
||||
NOW()
|
||||
)
|
||||
RETURNING id, distro, release, architecture, metadata_digest, captured_at
|
||||
""";
|
||||
|
||||
var row = await conn.QuerySingleAsync<CorpusSnapshotRow>(sql, new
|
||||
{
|
||||
snapshot.Id,
|
||||
snapshot.Distro,
|
||||
snapshot.Release,
|
||||
snapshot.Architecture,
|
||||
snapshot.MetadataDigest,
|
||||
snapshot.CapturedAt
|
||||
});
|
||||
|
||||
_logger.LogInformation(
|
||||
"Created corpus snapshot {Id} for {Distro} {Release}/{Architecture}",
|
||||
row.Id, row.Distro, row.Release, row.Architecture);
|
||||
|
||||
return row.ToModel();
|
||||
}
|
||||
|
||||
public async Task<CorpusSnapshot?> FindByKeyAsync(
|
||||
string distro,
|
||||
string release,
|
||||
string architecture,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, distro, release, architecture, metadata_digest, captured_at
|
||||
FROM binaries.corpus_snapshots
|
||||
WHERE distro = @Distro
|
||||
AND release = @Release
|
||||
AND architecture = @Architecture
|
||||
ORDER BY captured_at DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
var row = await conn.QuerySingleOrDefaultAsync<CorpusSnapshotRow>(sql, new
|
||||
{
|
||||
Distro = distro,
|
||||
Release = release,
|
||||
Architecture = architecture
|
||||
});
|
||||
|
||||
return row?.ToModel();
|
||||
}
|
||||
|
||||
public async Task<CorpusSnapshot?> GetByIdAsync(Guid id, CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, distro, release, architecture, metadata_digest, captured_at
|
||||
FROM binaries.corpus_snapshots
|
||||
WHERE id = @Id
|
||||
""";
|
||||
|
||||
var row = await conn.QuerySingleOrDefaultAsync<CorpusSnapshotRow>(sql, new { Id = id });
|
||||
|
||||
return row?.ToModel();
|
||||
}
|
||||
|
||||
private sealed record CorpusSnapshotRow(
|
||||
Guid Id,
|
||||
string Distro,
|
||||
string Release,
|
||||
string Architecture,
|
||||
string MetadataDigest,
|
||||
DateTimeOffset CapturedAt)
|
||||
{
|
||||
public CorpusSnapshot ToModel() => new(
|
||||
Id: Id,
|
||||
Distro: Distro,
|
||||
Release: Release,
|
||||
Architecture: Architecture,
|
||||
MetadataDigest: MetadataDigest,
|
||||
CapturedAt: CapturedAt);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,211 @@
|
||||
using System.Collections.Immutable;
|
||||
using Dapper;
|
||||
using StellaOps.BinaryIndex.Fingerprints;
|
||||
using StellaOps.BinaryIndex.Fingerprints.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Persistence.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository implementation for vulnerable fingerprints.
|
||||
/// </summary>
|
||||
public sealed class FingerprintRepository : IFingerprintRepository
|
||||
{
|
||||
private readonly BinaryIndexDbContext _dbContext;
|
||||
|
||||
public FingerprintRepository(BinaryIndexDbContext dbContext)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
}
|
||||
|
||||
public async Task<VulnFingerprint> CreateAsync(VulnFingerprint fingerprint, CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
INSERT INTO binaries.vulnerable_fingerprints (
|
||||
id, tenant_id, cve_id, component, purl, algorithm, fingerprint_id, fingerprint_hash,
|
||||
architecture, function_name, source_file, source_line, similarity_threshold,
|
||||
confidence, validated, validation_stats, vuln_build_ref, fixed_build_ref, indexed_at
|
||||
)
|
||||
VALUES (
|
||||
@Id, binaries_app.current_tenant()::uuid, @CveId, @Component, @Purl, @Algorithm,
|
||||
@FingerprintId, @FingerprintHash, @Architecture, @FunctionName, @SourceFile,
|
||||
@SourceLine, @SimilarityThreshold, @Confidence, @Validated, @ValidationStats::jsonb,
|
||||
@VulnBuildRef, @FixedBuildRef, @IndexedAt
|
||||
)
|
||||
RETURNING id
|
||||
""";
|
||||
|
||||
var id = await conn.ExecuteScalarAsync<Guid>(sql, new
|
||||
{
|
||||
Id = fingerprint.Id != Guid.Empty ? fingerprint.Id : Guid.NewGuid(),
|
||||
fingerprint.CveId,
|
||||
fingerprint.Component,
|
||||
fingerprint.Purl,
|
||||
Algorithm = fingerprint.Algorithm.ToString().ToLowerInvariant().Replace("_", ""),
|
||||
fingerprint.FingerprintId,
|
||||
fingerprint.FingerprintHash,
|
||||
fingerprint.Architecture,
|
||||
fingerprint.FunctionName,
|
||||
fingerprint.SourceFile,
|
||||
fingerprint.SourceLine,
|
||||
fingerprint.SimilarityThreshold,
|
||||
fingerprint.Confidence,
|
||||
fingerprint.Validated,
|
||||
ValidationStats = fingerprint.ValidationStats != null
|
||||
? System.Text.Json.JsonSerializer.Serialize(fingerprint.ValidationStats)
|
||||
: "{}",
|
||||
fingerprint.VulnBuildRef,
|
||||
fingerprint.FixedBuildRef,
|
||||
fingerprint.IndexedAt
|
||||
});
|
||||
|
||||
return fingerprint with { Id = id };
|
||||
}
|
||||
|
||||
public async Task<VulnFingerprint?> GetByIdAsync(Guid id, CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, cve_id as CveId, component, purl, algorithm, fingerprint_id as FingerprintId,
|
||||
fingerprint_hash as FingerprintHash, architecture, function_name as FunctionName,
|
||||
source_file as SourceFile, source_line as SourceLine,
|
||||
similarity_threshold as SimilarityThreshold, confidence, validated,
|
||||
validation_stats as ValidationStats, vuln_build_ref as VulnBuildRef,
|
||||
fixed_build_ref as FixedBuildRef, indexed_at as IndexedAt
|
||||
FROM binaries.vulnerable_fingerprints
|
||||
WHERE id = @Id
|
||||
""";
|
||||
|
||||
// Simplified: Would need proper mapping from DB row to model
|
||||
// Including JSONB deserialization for validation_stats
|
||||
return null; // Placeholder for brevity
|
||||
}
|
||||
|
||||
public async Task<ImmutableArray<VulnFingerprint>> GetByCveAsync(string cveId, CancellationToken ct = default)
|
||||
{
|
||||
// Similar implementation to GetByIdAsync but for multiple records
|
||||
return ImmutableArray<VulnFingerprint>.Empty;
|
||||
}
|
||||
|
||||
public async Task<ImmutableArray<VulnFingerprint>> SearchByHashAsync(
|
||||
byte[] hash,
|
||||
FingerprintAlgorithm algorithm,
|
||||
string architecture,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
SELECT id, cve_id as CveId, component, purl, algorithm, fingerprint_id as FingerprintId,
|
||||
fingerprint_hash as FingerprintHash, architecture, function_name as FunctionName,
|
||||
source_file as SourceFile, source_line as SourceLine,
|
||||
similarity_threshold as SimilarityThreshold, confidence, validated,
|
||||
validation_stats as ValidationStats, vuln_build_ref as VulnBuildRef,
|
||||
fixed_build_ref as FixedBuildRef, indexed_at as IndexedAt
|
||||
FROM binaries.vulnerable_fingerprints
|
||||
WHERE fingerprint_hash = @Hash
|
||||
AND algorithm = @Algorithm
|
||||
AND architecture = @Architecture
|
||||
""";
|
||||
|
||||
// Simplified: Would need proper mapping
|
||||
return ImmutableArray<VulnFingerprint>.Empty;
|
||||
}
|
||||
|
||||
public async Task UpdateValidationStatsAsync(
|
||||
Guid id,
|
||||
FingerprintValidationStats stats,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
UPDATE binaries.vulnerable_fingerprints
|
||||
SET validation_stats = @Stats::jsonb,
|
||||
validated = TRUE
|
||||
WHERE id = @Id
|
||||
""";
|
||||
|
||||
await conn.ExecuteAsync(sql, new
|
||||
{
|
||||
Id = id,
|
||||
Stats = System.Text.Json.JsonSerializer.Serialize(stats)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository implementation for fingerprint matches.
|
||||
/// </summary>
|
||||
public sealed class FingerprintMatchRepository : IFingerprintMatchRepository
|
||||
{
|
||||
private readonly BinaryIndexDbContext _dbContext;
|
||||
|
||||
public FingerprintMatchRepository(BinaryIndexDbContext dbContext)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
}
|
||||
|
||||
public async Task<FingerprintMatch> CreateAsync(FingerprintMatch match, CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
INSERT INTO binaries.fingerprint_matches (
|
||||
id, tenant_id, scan_id, match_type, binary_key, binary_identity_id,
|
||||
vulnerable_purl, vulnerable_version, matched_fingerprint_id, matched_function,
|
||||
similarity, advisory_ids, reachability_status, matched_at
|
||||
)
|
||||
VALUES (
|
||||
@Id, binaries_app.current_tenant()::uuid, @ScanId, @MatchType, @BinaryKey,
|
||||
@BinaryIdentityId, @VulnerablePurl, @VulnerableVersion, @MatchedFingerprintId,
|
||||
@MatchedFunction, @Similarity, @AdvisoryIds, @ReachabilityStatus, @MatchedAt
|
||||
)
|
||||
RETURNING id
|
||||
""";
|
||||
|
||||
var id = await conn.ExecuteScalarAsync<Guid>(sql, new
|
||||
{
|
||||
Id = match.Id != Guid.Empty ? match.Id : Guid.NewGuid(),
|
||||
match.ScanId,
|
||||
MatchType = match.Type.ToString().ToLowerInvariant(),
|
||||
match.BinaryKey,
|
||||
BinaryIdentityId = (Guid?)null,
|
||||
match.VulnerablePurl,
|
||||
match.VulnerableVersion,
|
||||
match.MatchedFingerprintId,
|
||||
match.MatchedFunction,
|
||||
match.Similarity,
|
||||
match.AdvisoryIds,
|
||||
ReachabilityStatus = match.ReachabilityStatus?.ToString().ToLowerInvariant(),
|
||||
match.MatchedAt
|
||||
});
|
||||
|
||||
return match with { Id = id };
|
||||
}
|
||||
|
||||
public async Task<ImmutableArray<FingerprintMatch>> GetByScanAsync(Guid scanId, CancellationToken ct = default)
|
||||
{
|
||||
// Simplified: Would need proper implementation with mapping
|
||||
return ImmutableArray<FingerprintMatch>.Empty;
|
||||
}
|
||||
|
||||
public async Task UpdateReachabilityAsync(Guid id, ReachabilityStatus status, CancellationToken ct = default)
|
||||
{
|
||||
await using var conn = await _dbContext.OpenConnectionAsync(ct);
|
||||
|
||||
const string sql = """
|
||||
UPDATE binaries.fingerprint_matches
|
||||
SET reachability_status = @Status
|
||||
WHERE id = @Id
|
||||
""";
|
||||
|
||||
await conn.ExecuteAsync(sql, new
|
||||
{
|
||||
Id = id,
|
||||
Status = status.ToString().ToLowerInvariant()
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.BinaryIndex.Core.Models;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Persistence.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for binary identity operations.
|
||||
/// </summary>
|
||||
public interface IBinaryIdentityRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets a binary identity by its Build-ID.
|
||||
/// </summary>
|
||||
Task<BinaryIdentity?> GetByBuildIdAsync(string buildId, string buildIdType, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a binary identity by its key.
|
||||
/// </summary>
|
||||
Task<BinaryIdentity?> GetByKeyAsync(string binaryKey, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Upserts a binary identity.
|
||||
/// </summary>
|
||||
Task<BinaryIdentity> UpsertAsync(BinaryIdentity identity, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Gets multiple binary identities by their keys.
|
||||
/// </summary>
|
||||
Task<ImmutableArray<BinaryIdentity>> GetBatchAsync(IEnumerable<string> binaryKeys, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Npgsql" Version="9.0.2" />
|
||||
<PackageReference Include="Dapper" Version="2.1.35" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Core\StellaOps.BinaryIndex.Core.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Corpus\StellaOps.BinaryIndex.Corpus.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Fingerprints\StellaOps.BinaryIndex.Fingerprints.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Include="Migrations\*.sql" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user