// Copyright (c) StellaOps. All rights reserved. // Licensed under BUSL-1.1. See LICENSE in the project root. using Microsoft.Extensions.Logging; using StellaOps.BinaryIndex.Disassembly; using StellaOps.BinaryIndex.Normalization; using StellaOps.BinaryIndex.Semantic; using System.Collections.Immutable; using System.Globalization; using System.Security.Cryptography; namespace StellaOps.BinaryIndex.DeltaSig; /// /// Generates delta signatures from binaries for CVE detection. /// public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator { private readonly DisassemblyService _disassemblyService; private readonly NormalizationService _normalizationService; private readonly IIrLiftingService? _irLiftingService; private readonly ISemanticGraphExtractor? _graphExtractor; private readonly ISemanticFingerprintGenerator? _fingerprintGenerator; private readonly ILogger _logger; /// /// Creates a new delta signature generator without semantic analysis support. /// public DeltaSignatureGenerator( DisassemblyService disassemblyService, NormalizationService normalizationService, ILogger logger) : this(disassemblyService, normalizationService, null, null, null, logger) { } /// /// Creates a new delta signature generator with optional semantic analysis support. /// public DeltaSignatureGenerator( DisassemblyService disassemblyService, NormalizationService normalizationService, IIrLiftingService? irLiftingService, ISemanticGraphExtractor? graphExtractor, ISemanticFingerprintGenerator? fingerprintGenerator, ILogger logger) { _disassemblyService = disassemblyService ?? throw new ArgumentNullException(nameof(disassemblyService)); _normalizationService = normalizationService ?? throw new ArgumentNullException(nameof(normalizationService)); _irLiftingService = irLiftingService; _graphExtractor = graphExtractor; _fingerprintGenerator = fingerprintGenerator; _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } /// /// Gets a value indicating whether semantic analysis is available. /// public bool SemanticAnalysisAvailable => _irLiftingService is not null && _graphExtractor is not null && _fingerprintGenerator is not null; /// public async Task GenerateSignaturesAsync( Stream binaryStream, DeltaSignatureRequest request, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(binaryStream); ArgumentNullException.ThrowIfNull(request); _logger.LogInformation( "Generating delta signatures for {Cve} ({Package}) with {SymbolCount} target symbols", request.Cve, request.Package, request.TargetSymbols.Count); var options = request.Options ?? new SignatureOptions(); // Load and analyze the binary var (binary, plugin) = await Task.Run( () => _disassemblyService.LoadBinary(binaryStream), ct); _logger.LogDebug( "Loaded binary: format={Format}, arch={Arch}", binary.Format, binary.Architecture); // Get all symbols var symbols = plugin.GetSymbols(binary).ToDictionary(s => s.Name); // Generate signatures for each target symbol. Empty target list means "all symbols". var targetSymbols = request.TargetSymbols.Count == 0 ? symbols.Keys.OrderBy(v => v, StringComparer.Ordinal).ToArray() : request.TargetSymbols; // Generate signatures for each target symbol var symbolSignatures = new List(); var appliedSteps = new List(); foreach (var symbolName in targetSymbols) { ct.ThrowIfCancellationRequested(); if (!symbols.TryGetValue(symbolName, out var symbolInfo)) { _logger.LogWarning("Symbol {Symbol} not found in binary", symbolName); continue; } // Disassemble the symbol var instructions = plugin.DisassembleSymbol(binary, symbolInfo).ToList(); if (instructions.Count == 0) { _logger.LogWarning("No instructions for symbol {Symbol}", symbolName); continue; } // Normalize the instructions var normalized = _normalizationService.Normalize( instructions, binary.Architecture); // Track applied steps foreach (var step in normalized.AppliedSteps) { if (!appliedSteps.Contains(step)) appliedSteps.Add(step); } // Generate signature from normalized bytes var signature = await GenerateSymbolSignatureAsync( normalized, symbolName, symbolInfo.Section ?? ".text", instructions, binary.Architecture, options, ct); symbolSignatures.Add(signature); _logger.LogDebug( "Generated signature for {Symbol}: {Hash} ({Size} bytes)", symbolName, signature.HashHex, signature.SizeBytes); } // Get the pipeline used for normalization reference var pipeline = _normalizationService.GetPipeline(binary.Architecture); return new DeltaSignature { Cve = request.Cve, Package = new PackageRef(request.Package, request.Soname), Target = new TargetRef(request.Arch, request.Abi), Normalization = new NormalizationRef( pipeline.RecipeId, pipeline.RecipeVersion, [.. appliedSteps]), SignatureState = request.SignatureState, Symbols = [.. symbolSignatures], GeneratedAt = DateTimeOffset.UtcNow }; } /// public SymbolSignature GenerateSymbolSignature( ReadOnlySpan normalizedBytes, string symbolName, string scope, SignatureOptions? options = null) { options ??= new SignatureOptions(); // Compute the main hash var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm); // Compute chunk hashes for resilience ImmutableArray? chunks = null; if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize) { chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm); } // For byte-only overload, we cannot compute accurate CFG metrics // Use heuristic estimation instead int? bbCount = null; string? cfgEdgeHash = null; if (options.IncludeCfg) { bbCount = EstimateBasicBlockCount(normalizedBytes); } return new SymbolSignature { Name = symbolName, Scope = scope, HashAlg = options.HashAlgorithm, HashHex = hashHex, SizeBytes = normalizedBytes.Length, CfgBbCount = bbCount, CfgEdgeHash = cfgEdgeHash, Chunks = chunks }; } /// public SymbolSignature GenerateSymbolSignature( NormalizedFunction normalized, string symbolName, string scope, SignatureOptions? options = null) { options ??= new SignatureOptions(); // Get normalized bytes for hashing var normalizedBytes = GetNormalizedBytes(normalized); // Compute the main hash var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm); // Compute chunk hashes for resilience ImmutableArray? chunks = null; if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize) { chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm); } // Compute CFG metrics using proper CFG analysis int? bbCount = null; string? cfgEdgeHash = null; if (options.IncludeCfg && normalized.Instructions.Length > 0) { // Use first instruction's address as start address var startAddress = normalized.Instructions[0].OriginalAddress; var cfgMetrics = CfgExtractor.ComputeMetrics( normalized.Instructions.ToList(), startAddress); bbCount = cfgMetrics.BasicBlockCount; cfgEdgeHash = cfgMetrics.EdgeHash; } return new SymbolSignature { Name = symbolName, Scope = scope, HashAlg = options.HashAlgorithm, HashHex = hashHex, SizeBytes = normalizedBytes.Length, CfgBbCount = bbCount, CfgEdgeHash = cfgEdgeHash, Chunks = chunks }; } /// public async Task GenerateSymbolSignatureAsync( NormalizedFunction normalized, string symbolName, string scope, IReadOnlyList originalInstructions, CpuArchitecture architecture, SignatureOptions? options = null, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(normalized); ArgumentNullException.ThrowIfNull(symbolName); ArgumentNullException.ThrowIfNull(scope); ArgumentNullException.ThrowIfNull(originalInstructions); options ??= new SignatureOptions(); // Get normalized bytes for hashing var normalizedBytes = GetNormalizedBytes(normalized); // Compute the main hash var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm); // Compute chunk hashes for resilience ImmutableArray? chunks = null; if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize) { chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm); } // Compute CFG metrics using proper CFG analysis int? bbCount = null; string? cfgEdgeHash = null; if (options.IncludeCfg && normalized.Instructions.Length > 0) { // Use first instruction's address as start address var startAddress = normalized.Instructions[0].OriginalAddress; var cfgMetrics = CfgExtractor.ComputeMetrics( normalized.Instructions.ToList(), startAddress); bbCount = cfgMetrics.BasicBlockCount; cfgEdgeHash = cfgMetrics.EdgeHash; } // Compute semantic fingerprint if enabled and services available string? semanticHashHex = null; ImmutableArray? semanticApiCalls = null; if (options.IncludeSemantic && SemanticAnalysisAvailable && originalInstructions.Count > 0) { try { var semanticFingerprint = await ComputeSemanticFingerprintAsync( originalInstructions, symbolName, architecture, ct); if (semanticFingerprint is not null) { semanticHashHex = semanticFingerprint.GraphHashHex; semanticApiCalls = semanticFingerprint.ApiCalls; } } catch (Exception ex) { _logger.LogWarning( ex, "Failed to compute semantic fingerprint for {Symbol}, continuing without semantic data", symbolName); } } return new SymbolSignature { Name = symbolName, Scope = scope, HashAlg = options.HashAlgorithm, HashHex = hashHex, SizeBytes = normalizedBytes.Length, CfgBbCount = bbCount, CfgEdgeHash = cfgEdgeHash, Chunks = chunks, SemanticHashHex = semanticHashHex, SemanticApiCalls = semanticApiCalls }; } private async Task ComputeSemanticFingerprintAsync( IReadOnlyList instructions, string functionName, CpuArchitecture architecture, CancellationToken ct) { if (_irLiftingService is null || _graphExtractor is null || _fingerprintGenerator is null) { return null; } // Check if architecture is supported if (!_irLiftingService.SupportsArchitecture(architecture)) { _logger.LogDebug( "Architecture {Arch} not supported for semantic analysis", architecture); return null; } // Lift to IR var startAddress = instructions.Count > 0 ? instructions[0].Address : 0UL; var lifted = await _irLiftingService.LiftToIrAsync( instructions, functionName, startAddress, architecture, ct: ct); // Extract semantic graph var graph = await _graphExtractor.ExtractGraphAsync(lifted, ct: ct); // Generate fingerprint var fingerprint = await _fingerprintGenerator.GenerateAsync( graph, startAddress, ct: ct); return fingerprint; } private static byte[] GetNormalizedBytes(NormalizedFunction normalized) { // Concatenate all normalized instruction bytes var totalSize = normalized.Instructions.Sum(i => i.NormalizedBytes.Length); var result = new byte[totalSize]; var offset = 0; foreach (var instruction in normalized.Instructions) { instruction.NormalizedBytes.CopyTo(result.AsSpan(offset)); offset += instruction.NormalizedBytes.Length; } return result; } private static string ComputeHash(ReadOnlySpan data, string algorithm) { Span hash = stackalloc byte[64]; // Max hash size int bytesWritten; switch (algorithm.ToLowerInvariant()) { case "sha256": bytesWritten = SHA256.HashData(data, hash); break; case "sha384": bytesWritten = SHA384.HashData(data, hash); break; case "sha512": bytesWritten = SHA512.HashData(data, hash); break; default: throw new ArgumentException($"Unsupported hash algorithm: {algorithm}", nameof(algorithm)); } return Convert.ToHexString(hash[..bytesWritten]).ToLowerInvariant(); } private static ImmutableArray ComputeChunkHashes( ReadOnlySpan data, int chunkSize, string algorithm) { var chunks = new List(); var offset = 0; while (offset < data.Length) { var size = Math.Min(chunkSize, data.Length - offset); var chunkData = data.Slice(offset, size); var hash = ComputeHash(chunkData, algorithm); chunks.Add(new ChunkHash(offset, size, hash)); offset += size; } return [.. chunks]; } private static int EstimateBasicBlockCount(ReadOnlySpan data) { // Simplified heuristic: count potential block terminators // Real implementation would use proper CFG analysis var count = 1; // At least one block for (var i = 0; i < data.Length; i++) { var b = data[i]; // Common x64 block terminators if (b is 0xC3 or 0xE8 or 0xE9 or 0xEB or (>= 0x70 and <= 0x7F)) { count++; } // 0F 8x = conditional jumps else if (i + 1 < data.Length && b == 0x0F && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F) { count++; i++; // Skip next byte } } return count; } private static CpuArchitecture ParseArch(string arch) { return arch.ToLowerInvariant() switch { "x86_64" or "amd64" or "x64" => CpuArchitecture.X86_64, "x86" or "i386" or "i686" => CpuArchitecture.X86, "aarch64" or "arm64" => CpuArchitecture.ARM64, "arm" or "armv7" => CpuArchitecture.ARM32, "mips" or "mips32" => CpuArchitecture.MIPS32, "mips64" => CpuArchitecture.MIPS64, "riscv64" => CpuArchitecture.RISCV64, "ppc" or "ppc32" or "powerpc" => CpuArchitecture.PPC32, "ppc64" or "powerpc64" => CpuArchitecture.PPC64, _ => throw new ArgumentException($"Unknown architecture: {arch}", nameof(arch)) }; } }