15 KiB
15 KiB
Extending Binary Analysis
This guide explains how to add support for new binary formats or custom section extractors to the binary diff attestation system.
Overview
The binary analysis system is designed for extensibility. You can add support for:
- New binary formats (PE, Mach-O, WebAssembly)
- Custom section extractors (additional ELF sections, custom hash algorithms)
- Verdict classifiers (custom backport detection logic)
Architecture
Core Interfaces
+---------------------------+ +----------------------+ +-------------------+
| IElfSectionHashExtractor |--->| BinaryDiffService |--->| BinaryDiffFinding |
+---------------------------+ +----------------------+ +-------------------+
Key Interfaces
/// <summary>
/// Extracts section hashes from ELF binaries.
/// </summary>
public interface IElfSectionHashExtractor
{
Task<ElfSectionHashSet?> ExtractAsync(
string elfPath,
CancellationToken cancellationToken = default);
Task<ElfSectionHashSet?> ExtractFromBytesAsync(
ReadOnlyMemory<byte> elfBytes,
string virtualPath,
CancellationToken cancellationToken = default);
}
Future multi-format support (PE, Mach-O) will introduce format detection and dedicated extractors similar to the ELF interface above.
Adding a New Binary Format
The current implementation is ELF-only. The steps below describe the intended shape for adding PE or Mach-O support; adjust interfaces as they are introduced.
Step 1: Define Configuration
// src/Scanner/__Libraries/StellaOps.Scanner.Contracts/PeSectionConfig.cs
namespace StellaOps.Scanner.Contracts;
/// <summary>
/// Configuration for PE section hash extraction.
/// </summary>
public sealed record PeSectionConfig
{
/// <summary>Sections to extract hashes from.</summary>
public ImmutableArray<string> Sections { get; init; } = [".text", ".rdata", ".data", ".rsrc"];
/// <summary>Hash algorithms to use.</summary>
public ImmutableArray<string> HashAlgorithms { get; init; } = ["sha256"];
/// <summary>Maximum section size to process (bytes).</summary>
public long MaxSectionSize { get; init; } = 100 * 1024 * 1024; // 100MB
/// <summary>Whether to extract version resources.</summary>
public bool ExtractVersionInfo { get; init; } = true;
}
Step 2: Implement the Extractor
// src/Scanner/StellaOps.Scanner.Analyzers.Native/Hardening/PeSectionHashExtractor.cs
namespace StellaOps.Scanner.Analyzers.Native;
public sealed class PeSectionHashExtractor : ISectionHashExtractor<PeSectionConfig>
{
private readonly TimeProvider _timeProvider;
private readonly ILogger<PeSectionHashExtractor> _logger;
public PeSectionHashExtractor(
TimeProvider timeProvider,
ILogger<PeSectionHashExtractor> logger)
{
_timeProvider = timeProvider;
_logger = logger;
}
public BinaryFormat SupportedFormat => BinaryFormat.Pe;
public async Task<SectionHashSet?> ExtractAsync(
string filePath,
PeSectionConfig? config = null,
CancellationToken cancellationToken = default)
{
config ??= new PeSectionConfig();
// Read file
var bytes = await File.ReadAllBytesAsync(filePath, cancellationToken);
return await ExtractFromBytesAsync(bytes, filePath, config, cancellationToken);
}
public async Task<SectionHashSet?> ExtractFromBytesAsync(
ReadOnlyMemory<byte> bytes,
string virtualPath,
PeSectionConfig? config = null,
CancellationToken cancellationToken = default)
{
config ??= new PeSectionConfig();
// Validate PE magic
if (!IsPeFile(bytes.Span))
{
_logger.LogDebug("Not a PE file: {Path}", virtualPath);
return null;
}
try
{
var sections = new Dictionary<string, SectionInfo>();
// Parse PE headers
using var peReader = new PEReader(new MemoryStream(bytes.ToArray()));
foreach (var sectionHeader in peReader.PEHeaders.SectionHeaders)
{
var sectionName = sectionHeader.Name;
if (!config.Sections.Contains(sectionName))
continue;
if (sectionHeader.SizeOfRawData > config.MaxSectionSize)
{
_logger.LogWarning(
"Section {Section} exceeds max size ({Size} > {Max})",
sectionName, sectionHeader.SizeOfRawData, config.MaxSectionSize);
continue;
}
// Get section data
var sectionData = peReader.GetSectionData(sectionName);
if (sectionData.Length == 0)
continue;
// Compute hash
var sha256 = ComputeSha256(sectionData.GetContent());
sections[sectionName] = new SectionInfo
{
Sha256 = sha256,
Size = sectionData.Length,
Offset = sectionHeader.PointerToRawData
};
}
// Compute file hash
var fileHash = ComputeSha256(bytes.Span);
return new SectionHashSet
{
FilePath = virtualPath,
FileHash = fileHash,
Sections = sections.ToImmutableDictionary(),
ExtractedAt = _timeProvider.GetUtcNow(),
ExtractorVersion = GetType().Assembly.GetName().Version?.ToString() ?? "1.0.0"
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to extract PE sections from {Path}", virtualPath);
return null;
}
}
private static bool IsPeFile(ReadOnlySpan<byte> bytes)
{
// Check DOS header magic (MZ)
if (bytes.Length < 64)
return false;
return bytes[0] == 0x4D && bytes[1] == 0x5A; // "MZ"
}
private static string ComputeSha256(ReadOnlySpan<byte> data)
{
Span<byte> hash = stackalloc byte[32];
SHA256.HashData(data, hash);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
Step 3: Register Services
// src/Scanner/StellaOps.Scanner.Analyzers.Native/ServiceCollectionExtensions.cs
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddNativeAnalyzers(
this IServiceCollection services,
IConfiguration configuration)
{
// Existing ELF extractor
services.AddSingleton<IElfSectionHashExtractor, ElfSectionHashExtractor>();
// New PE extractor
services.AddSingleton<ISectionHashExtractor<PeSectionConfig>, PeSectionHashExtractor>();
// Register in composite
services.AddSingleton<IBinaryFormatDetector, CompositeBinaryFormatDetector>();
services.AddSingleton<ICompositeSectionHashExtractor>(sp =>
{
var extractors = new Dictionary<BinaryFormat, object>
{
[BinaryFormat.Elf] = sp.GetRequiredService<IElfSectionHashExtractor>(),
[BinaryFormat.Pe] = sp.GetRequiredService<ISectionHashExtractor<PeSectionConfig>>()
};
return new CompositeSectionHashExtractor(extractors);
});
// Configuration
services.AddOptions<PeSectionConfig>()
.Bind(configuration.GetSection("Scanner:Native:PeSections"))
.ValidateDataAnnotations()
.ValidateOnStart();
return services;
}
}
Step 4: Add Tests
// src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Native.Tests/PeSectionHashExtractorTests.cs
namespace StellaOps.Scanner.Analyzers.Native.Tests;
public class PeSectionHashExtractorTests
{
private readonly PeSectionHashExtractor _extractor;
private readonly FakeTimeProvider _timeProvider;
public PeSectionHashExtractorTests()
{
_timeProvider = new FakeTimeProvider(new DateTimeOffset(2026, 1, 13, 12, 0, 0, TimeSpan.Zero));
_extractor = new PeSectionHashExtractor(
_timeProvider,
NullLogger<PeSectionHashExtractor>.Instance);
}
[Fact]
public async Task ExtractAsync_ValidPe_ReturnsAllSections()
{
// Arrange
var pePath = "TestData/sample.exe";
// Act
var result = await _extractor.ExtractAsync(pePath);
// Assert
Assert.NotNull(result);
Assert.Contains(".text", result.Sections.Keys);
Assert.Contains(".rdata", result.Sections.Keys);
Assert.NotEmpty(result.FileHash);
}
[Fact]
public async Task ExtractAsync_NotPeFile_ReturnsNull()
{
// Arrange
var elfPath = "TestData/sample.elf";
// Act
var result = await _extractor.ExtractAsync(elfPath);
// Assert
Assert.Null(result);
}
[Fact]
public async Task ExtractAsync_Deterministic_SameOutput()
{
// Arrange
var pePath = "TestData/sample.exe";
// Act
var result1 = await _extractor.ExtractAsync(pePath);
var result2 = await _extractor.ExtractAsync(pePath);
// Assert
Assert.Equal(result1!.FileHash, result2!.FileHash);
Assert.Equal(result1.Sections[".text"].Sha256, result2.Sections[".text"].Sha256);
}
}
Adding Custom Section Analysis
Custom Hash Algorithm
public interface IHashAlgorithmProvider
{
string Name { get; }
string ComputeHash(ReadOnlySpan<byte> data);
}
public sealed class Blake3HashProvider : IHashAlgorithmProvider
{
public string Name => "blake3";
public string ComputeHash(ReadOnlySpan<byte> data)
{
// Using Blake3 library
var hash = Blake3.Hasher.Hash(data);
return Convert.ToHexString(hash.AsSpan()).ToLowerInvariant();
}
}
Custom Verdict Classifier
public sealed class EnhancedVerdictClassifier : IVerdictClassifier
{
private readonly ISymbolAnalyzer _symbolAnalyzer;
public Verdict Classify(SectionHashSet? baseHashes, SectionHashSet? targetHashes)
{
if (baseHashes == null || targetHashes == null)
return Verdict.Unknown;
// Check .text section change
var textChanged = HasSectionChanged(baseHashes, targetHashes, ".text");
var symbolsChanged = HasSectionChanged(baseHashes, targetHashes, ".symtab");
// Custom logic: if .text changed but symbols are similar, likely a patch
if (textChanged && !symbolsChanged)
{
return Verdict.Patched;
}
// If everything changed significantly, it's a vanilla update
if (textChanged && symbolsChanged)
{
return Verdict.Vanilla;
}
return Verdict.Unknown;
}
public double ComputeConfidence(SectionHashSet? baseHashes, SectionHashSet? targetHashes)
{
if (baseHashes == null || targetHashes == null)
return 0.0;
// Compute similarity score
var matchingSections = 0;
var totalSections = 0;
foreach (var (name, baseInfo) in baseHashes.Sections)
{
totalSections++;
if (targetHashes.Sections.TryGetValue(name, out var targetInfo))
{
if (baseInfo.Sha256 == targetInfo.Sha256)
matchingSections++;
}
}
if (totalSections == 0)
return 0.0;
// Higher similarity = higher confidence in classification
return Math.Round((double)matchingSections / totalSections, 4, MidpointRounding.ToZero);
}
private static bool HasSectionChanged(SectionHashSet baseHashes, SectionHashSet targetHashes, string section)
{
if (!baseHashes.Sections.TryGetValue(section, out var baseInfo))
return false;
if (!targetHashes.Sections.TryGetValue(section, out var targetInfo))
return true;
return baseInfo.Sha256 != targetInfo.Sha256;
}
}
Best Practices
1. Determinism
Always ensure deterministic output:
// BAD - Non-deterministic
public SectionHashSet Extract(string path)
{
return new SectionHashSet
{
ExtractedAt = DateTimeOffset.UtcNow, // Non-deterministic!
// ...
};
}
// GOOD - Injected time provider
public SectionHashSet Extract(string path)
{
return new SectionHashSet
{
ExtractedAt = _timeProvider.GetUtcNow(), // Deterministic
// ...
};
}
2. Error Handling
Handle malformed binaries gracefully:
public async Task<SectionHashSet?> ExtractAsync(string path, CancellationToken ct)
{
try
{
// ... extraction logic
}
catch (BadImageFormatException ex)
{
_logger.LogDebug(ex, "Invalid binary format: {Path}", path);
return null; // Return null, don't throw
}
catch (IOException ex)
{
_logger.LogWarning(ex, "I/O error reading: {Path}", path);
return null;
}
}
3. Memory Management
Stream large binaries instead of loading entirely:
public async Task<SectionHashSet?> ExtractLargeBinaryAsync(
string path,
CancellationToken ct)
{
await using var stream = new FileStream(
path,
FileMode.Open,
FileAccess.Read,
FileShare.Read,
bufferSize: 81920,
useAsync: true);
// Stream section data instead of loading all at once
// ...
}
4. Configuration Validation
Validate configuration at startup:
public sealed class PeSectionConfigValidator : IValidateOptions<PeSectionConfig>
{
public ValidateOptionsResult Validate(string? name, PeSectionConfig options)
{
if (options.Sections.Length == 0)
return ValidateOptionsResult.Fail("At least one section must be specified");
if (options.MaxSectionSize <= 0)
return ValidateOptionsResult.Fail("MaxSectionSize must be positive");
return ValidateOptionsResult.Success;
}
}
Testing Guidelines
Golden File Tests
[Fact]
public async Task Extract_KnownBinary_MatchesGolden()
{
// Arrange
var binaryPath = "TestData/known-binary.exe";
var goldenPath = "TestData/known-binary.golden.json";
// Act
var result = await _extractor.ExtractAsync(binaryPath);
// Assert
var expected = JsonSerializer.Deserialize<SectionHashSet>(
await File.ReadAllTextAsync(goldenPath));
Assert.Equal(expected!.FileHash, result!.FileHash);
Assert.Equal(expected.Sections.Count, result.Sections.Count);
}
Fuzz Testing
[Theory]
[MemberData(nameof(MalformedBinaries))]
public async Task Extract_MalformedBinary_ReturnsNullOrThrows(byte[] malformedData)
{
// Act & Assert - Should not crash
var result = await _extractor.ExtractFromBytesAsync(
malformedData,
"test.bin");
// Either null or valid result, never exception
// (Exception would fail the test)
}