// ----------------------------------------------------------------------------- // NativeAnalyzerExecutor.cs // Sprint: SPRINT_3500_0014_0001_native_analyzer_integration // Task: NAI-001 // Description: Executes native binary analysis during container scans. // Note: NUC-004 (unknown classification) deferred - requires project reference. // ----------------------------------------------------------------------------- using System.Diagnostics; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.Scanner.Analyzers.Native; using StellaOps.Scanner.Core.Contracts; using StellaOps.Scanner.Emit.Native; using StellaOps.Scanner.Worker.Diagnostics; using StellaOps.Scanner.Worker.Options; namespace StellaOps.Scanner.Worker.Processing; /// /// Executes native binary analysis during container scans. /// Discovers binaries, extracts metadata, correlates with Build-ID index, /// and emits SBOM components. /// public sealed class NativeAnalyzerExecutor { private readonly NativeBinaryDiscovery _discovery; private readonly INativeComponentEmitter _emitter; private readonly IElfSectionHashExtractor _sectionHashExtractor; private readonly NativeAnalyzerOptions _options; private readonly ILogger _logger; private readonly ScannerWorkerMetrics _metrics; public NativeAnalyzerExecutor( NativeBinaryDiscovery discovery, INativeComponentEmitter emitter, IElfSectionHashExtractor sectionHashExtractor, IOptions options, ILogger logger, ScannerWorkerMetrics metrics) { _discovery = discovery ?? throw new ArgumentNullException(nameof(discovery)); _emitter = emitter ?? throw new ArgumentNullException(nameof(emitter)); _sectionHashExtractor = sectionHashExtractor ?? throw new ArgumentNullException(nameof(sectionHashExtractor)); _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _metrics = metrics ?? throw new ArgumentNullException(nameof(metrics)); } /// /// Analyzes native binaries in the container filesystem. /// /// Path to the extracted container filesystem. /// Scan job context. /// Cancellation token. /// Analysis result with discovered components. public async Task ExecuteAsync( string rootPath, ScanJobContext context, CancellationToken cancellationToken = default) { if (!_options.Enabled) { _logger.LogDebug("Native analyzer is disabled"); return NativeAnalysisResult.Empty; } var sw = Stopwatch.StartNew(); try { using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); cts.CancelAfter(_options.TotalAnalysisTimeout); // Discover binaries var discovered = await _discovery.DiscoverAsync(rootPath, cts.Token).ConfigureAwait(false); if (discovered.Count == 0) { _logger.LogDebug("No native binaries discovered in {RootPath}", rootPath); return NativeAnalysisResult.Empty; } _logger.LogInformation( "Starting native analysis of {Count} binaries for job {JobId}", discovered.Count, context.JobId); // Convert to metadata and emit var metadataList = new List(discovered.Count); foreach (var binary in discovered) { var metadata = await ExtractMetadataAsync(binary, cts.Token).ConfigureAwait(false); if (metadata is not null) { metadataList.Add(metadata); } } // Batch emit components var emitResults = await _emitter.EmitBatchAsync(metadataList, cts.Token).ConfigureAwait(false); sw.Stop(); var result = new NativeAnalysisResult { DiscoveredCount = discovered.Count, AnalyzedCount = metadataList.Count, ResolvedCount = emitResults.Count(r => r.IndexMatch), UnresolvedCount = emitResults.Count(r => !r.IndexMatch), Components = emitResults, ElapsedMs = sw.ElapsedMilliseconds }; _metrics.RecordNativeAnalysis(result); _logger.LogInformation( "Native analysis complete for job {JobId}: {Resolved}/{Analyzed} resolved in {ElapsedMs}ms", context.JobId, result.ResolvedCount, result.AnalyzedCount, result.ElapsedMs); return result; } catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested) { _logger.LogWarning( "Native analysis timed out for job {JobId} after {ElapsedMs}ms", context.JobId, sw.ElapsedMilliseconds); return new NativeAnalysisResult { TimedOut = true, ElapsedMs = sw.ElapsedMilliseconds }; } catch (Exception ex) { _logger.LogError(ex, "Native analysis failed for job {JobId}", context.JobId); throw; } } private async Task ExtractMetadataAsync( DiscoveredBinary binary, CancellationToken cancellationToken) { try { using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); cts.CancelAfter(_options.SingleBinaryTimeout); var sectionHashes = binary.Format == BinaryFormat.Elf ? await _sectionHashExtractor.ExtractAsync(binary.AbsolutePath, cts.Token).ConfigureAwait(false) : null; cts.Token.ThrowIfCancellationRequested(); // Read binary header to extract Build-ID and other metadata var buildId = ExtractBuildId(binary) ?? sectionHashes?.BuildId; return new NativeBinaryMetadata { Format = binary.Format.ToString().ToLowerInvariant(), FilePath = binary.RelativePath, BuildId = buildId, Architecture = DetectArchitecture(binary), Platform = DetectPlatform(binary), FileDigest = sectionHashes?.FileHash, FileSize = binary.SizeBytes, ElfSectionHashes = sectionHashes }; } catch (OperationCanceledException) { _logger.LogDebug("Extraction timed out for binary: {Path}", binary.RelativePath); return null; } catch (Exception ex) { _logger.LogDebug(ex, "Failed to extract metadata from: {Path}", binary.RelativePath); return null; } } private string? ExtractBuildId(DiscoveredBinary binary) { if (binary.Format != BinaryFormat.Elf) { return null; } try { // Read ELF to find .note.gnu.build-id section using var fs = File.OpenRead(binary.AbsolutePath); using var reader = new BinaryReader(fs); // Skip to ELF header var magic = reader.ReadBytes(4); if (magic.Length < 4 || magic[0] != 0x7F || magic[1] != 0x45 || magic[2] != 0x4C || magic[3] != 0x46) { return null; } var elfClass = reader.ReadByte(); // 1 = 32-bit, 2 = 64-bit var is64Bit = elfClass == 2; // Skip to section headers (simplified - real implementation would parse properly) // For now, return null - full implementation is in the Analyzers.Native project return null; } catch { return null; } } private static string? DetectArchitecture(DiscoveredBinary binary) { if (binary.Format != BinaryFormat.Elf) { return null; } try { using var fs = File.OpenRead(binary.AbsolutePath); Span header = stackalloc byte[20]; if (fs.Read(header) < 20) { return null; } // e_machine is at offset 18 (2 bytes, little-endian typically) var machine = BitConverter.ToUInt16(header[18..20]); return machine switch { 0x03 => "i386", 0x3E => "x86_64", 0x28 => "arm", 0xB7 => "aarch64", 0xF3 => "riscv", _ => null }; } catch { return null; } } private static string? DetectPlatform(DiscoveredBinary binary) { return binary.Format switch { BinaryFormat.Elf => "linux", BinaryFormat.Pe => "windows", BinaryFormat.MachO => "darwin", _ => null }; } } /// /// Result of native binary analysis. /// public sealed record NativeAnalysisResult { public static readonly NativeAnalysisResult Empty = new(); /// Number of binaries discovered in filesystem. public int DiscoveredCount { get; init; } /// Number of binaries successfully analyzed. public int AnalyzedCount { get; init; } /// Number of binaries resolved via Build-ID index. public int ResolvedCount { get; init; } /// Number of binaries not found in Build-ID index. public int UnresolvedCount { get; init; } /// Whether the analysis timed out. public bool TimedOut { get; init; } /// Total elapsed time in milliseconds. public long ElapsedMs { get; init; } /// Emitted component results. public IReadOnlyList Components { get; init; } = Array.Empty(); /// Layer component fragments for SBOM merging. public IReadOnlyList LayerFragments { get; init; } = Array.Empty(); }