570 lines
17 KiB
Markdown
570 lines
17 KiB
Markdown
# Extending Binary Analysis
|
|
|
|
This guide explains how to add support for new binary formats or custom section extractors to the binary diff attestation system.
|
|
|
|
## Overview
|
|
|
|
The binary analysis system is designed for extensibility. You can add support for:
|
|
|
|
- **New binary formats** (PE, Mach-O, WebAssembly)
|
|
- **Custom section extractors** (additional ELF sections, custom hash algorithms)
|
|
- **Verdict classifiers** (custom backport detection logic)
|
|
|
|
## Architecture
|
|
|
|
### Core Interfaces
|
|
|
|
```
|
|
┌─────────────────────────────────────────────────────────────────┐
|
|
│ Binary Analysis Pipeline │
|
|
├─────────────────────────────────────────────────────────────────┤
|
|
│ │
|
|
│ IBinaryFormatDetector ──▶ ISectionHashExtractor<TConfig> │
|
|
│ │ │ │
|
|
│ ▼ ▼ │
|
|
│ BinaryFormat enum SectionHashSet │
|
|
│ (elf, pe, macho) (per-format) │
|
|
│ │ │
|
|
│ ▼ │
|
|
│ IVerdictClassifier │
|
|
│ │ │
|
|
│ ▼ │
|
|
│ BinaryDiffFinding │
|
|
│ │
|
|
└─────────────────────────────────────────────────────────────────┘
|
|
```
|
|
|
|
### Key Interfaces
|
|
|
|
```csharp
|
|
/// <summary>
|
|
/// Detects binary format from file magic/headers.
|
|
/// </summary>
|
|
public interface IBinaryFormatDetector
|
|
{
|
|
BinaryFormat Detect(ReadOnlySpan<byte> header);
|
|
BinaryFormat DetectFromPath(string filePath);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts section hashes for a specific binary format.
|
|
/// </summary>
|
|
public interface ISectionHashExtractor<TConfig> where TConfig : class
|
|
{
|
|
BinaryFormat SupportedFormat { get; }
|
|
|
|
Task<SectionHashSet?> ExtractAsync(
|
|
string filePath,
|
|
TConfig? config = null,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
Task<SectionHashSet?> ExtractFromBytesAsync(
|
|
ReadOnlyMemory<byte> bytes,
|
|
string virtualPath,
|
|
TConfig? config = null,
|
|
CancellationToken cancellationToken = default);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Classifies binary changes as patched/vanilla/unknown.
|
|
/// </summary>
|
|
public interface IVerdictClassifier
|
|
{
|
|
Verdict Classify(SectionHashSet? baseHashes, SectionHashSet? targetHashes);
|
|
double ComputeConfidence(SectionHashSet? baseHashes, SectionHashSet? targetHashes);
|
|
}
|
|
```
|
|
|
|
## Adding a New Binary Format
|
|
|
|
### Step 1: Define Configuration
|
|
|
|
```csharp
|
|
// src/Scanner/__Libraries/StellaOps.Scanner.Contracts/PeSectionConfig.cs
|
|
|
|
namespace StellaOps.Scanner.Contracts;
|
|
|
|
/// <summary>
|
|
/// Configuration for PE section hash extraction.
|
|
/// </summary>
|
|
public sealed record PeSectionConfig
|
|
{
|
|
/// <summary>Sections to extract hashes from.</summary>
|
|
public ImmutableArray<string> Sections { get; init; } = [".text", ".rdata", ".data", ".rsrc"];
|
|
|
|
/// <summary>Hash algorithms to use.</summary>
|
|
public ImmutableArray<string> HashAlgorithms { get; init; } = ["sha256"];
|
|
|
|
/// <summary>Maximum section size to process (bytes).</summary>
|
|
public long MaxSectionSize { get; init; } = 100 * 1024 * 1024; // 100MB
|
|
|
|
/// <summary>Whether to extract version resources.</summary>
|
|
public bool ExtractVersionInfo { get; init; } = true;
|
|
}
|
|
```
|
|
|
|
### Step 2: Implement the Extractor
|
|
|
|
```csharp
|
|
// src/Scanner/StellaOps.Scanner.Analyzers.Native/Hardening/PeSectionHashExtractor.cs
|
|
|
|
namespace StellaOps.Scanner.Analyzers.Native;
|
|
|
|
public sealed class PeSectionHashExtractor : ISectionHashExtractor<PeSectionConfig>
|
|
{
|
|
private readonly TimeProvider _timeProvider;
|
|
private readonly ILogger<PeSectionHashExtractor> _logger;
|
|
|
|
public PeSectionHashExtractor(
|
|
TimeProvider timeProvider,
|
|
ILogger<PeSectionHashExtractor> logger)
|
|
{
|
|
_timeProvider = timeProvider;
|
|
_logger = logger;
|
|
}
|
|
|
|
public BinaryFormat SupportedFormat => BinaryFormat.Pe;
|
|
|
|
public async Task<SectionHashSet?> ExtractAsync(
|
|
string filePath,
|
|
PeSectionConfig? config = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
config ??= new PeSectionConfig();
|
|
|
|
// Read file
|
|
var bytes = await File.ReadAllBytesAsync(filePath, cancellationToken);
|
|
return await ExtractFromBytesAsync(bytes, filePath, config, cancellationToken);
|
|
}
|
|
|
|
public async Task<SectionHashSet?> ExtractFromBytesAsync(
|
|
ReadOnlyMemory<byte> bytes,
|
|
string virtualPath,
|
|
PeSectionConfig? config = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
config ??= new PeSectionConfig();
|
|
|
|
// Validate PE magic
|
|
if (!IsPeFile(bytes.Span))
|
|
{
|
|
_logger.LogDebug("Not a PE file: {Path}", virtualPath);
|
|
return null;
|
|
}
|
|
|
|
try
|
|
{
|
|
var sections = new Dictionary<string, SectionInfo>();
|
|
|
|
// Parse PE headers
|
|
using var peReader = new PEReader(new MemoryStream(bytes.ToArray()));
|
|
|
|
foreach (var sectionHeader in peReader.PEHeaders.SectionHeaders)
|
|
{
|
|
var sectionName = sectionHeader.Name;
|
|
|
|
if (!config.Sections.Contains(sectionName))
|
|
continue;
|
|
|
|
if (sectionHeader.SizeOfRawData > config.MaxSectionSize)
|
|
{
|
|
_logger.LogWarning(
|
|
"Section {Section} exceeds max size ({Size} > {Max})",
|
|
sectionName, sectionHeader.SizeOfRawData, config.MaxSectionSize);
|
|
continue;
|
|
}
|
|
|
|
// Get section data
|
|
var sectionData = peReader.GetSectionData(sectionName);
|
|
if (sectionData.Length == 0)
|
|
continue;
|
|
|
|
// Compute hash
|
|
var sha256 = ComputeSha256(sectionData.GetContent());
|
|
|
|
sections[sectionName] = new SectionInfo
|
|
{
|
|
Sha256 = sha256,
|
|
Size = sectionData.Length,
|
|
Offset = sectionHeader.PointerToRawData
|
|
};
|
|
}
|
|
|
|
// Compute file hash
|
|
var fileHash = ComputeSha256(bytes.Span);
|
|
|
|
return new SectionHashSet
|
|
{
|
|
FilePath = virtualPath,
|
|
FileHash = fileHash,
|
|
Sections = sections.ToImmutableDictionary(),
|
|
ExtractedAt = _timeProvider.GetUtcNow(),
|
|
ExtractorVersion = GetType().Assembly.GetName().Version?.ToString() ?? "1.0.0"
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to extract PE sections from {Path}", virtualPath);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private static bool IsPeFile(ReadOnlySpan<byte> bytes)
|
|
{
|
|
// Check DOS header magic (MZ)
|
|
if (bytes.Length < 64)
|
|
return false;
|
|
|
|
return bytes[0] == 0x4D && bytes[1] == 0x5A; // "MZ"
|
|
}
|
|
|
|
private static string ComputeSha256(ReadOnlySpan<byte> data)
|
|
{
|
|
Span<byte> hash = stackalloc byte[32];
|
|
SHA256.HashData(data, hash);
|
|
return Convert.ToHexString(hash).ToLowerInvariant();
|
|
}
|
|
}
|
|
```
|
|
|
|
### Step 3: Register Services
|
|
|
|
```csharp
|
|
// src/Scanner/StellaOps.Scanner.Analyzers.Native/ServiceCollectionExtensions.cs
|
|
|
|
public static class ServiceCollectionExtensions
|
|
{
|
|
public static IServiceCollection AddNativeAnalyzers(
|
|
this IServiceCollection services,
|
|
IConfiguration configuration)
|
|
{
|
|
// Existing ELF extractor
|
|
services.AddSingleton<IElfSectionHashExtractor, ElfSectionHashExtractor>();
|
|
|
|
// New PE extractor
|
|
services.AddSingleton<ISectionHashExtractor<PeSectionConfig>, PeSectionHashExtractor>();
|
|
|
|
// Register in composite
|
|
services.AddSingleton<IBinaryFormatDetector, CompositeBinaryFormatDetector>();
|
|
services.AddSingleton<ICompositeSectionHashExtractor>(sp =>
|
|
{
|
|
var extractors = new Dictionary<BinaryFormat, object>
|
|
{
|
|
[BinaryFormat.Elf] = sp.GetRequiredService<IElfSectionHashExtractor>(),
|
|
[BinaryFormat.Pe] = sp.GetRequiredService<ISectionHashExtractor<PeSectionConfig>>()
|
|
};
|
|
return new CompositeSectionHashExtractor(extractors);
|
|
});
|
|
|
|
// Configuration
|
|
services.AddOptions<PeSectionConfig>()
|
|
.Bind(configuration.GetSection("Scanner:Native:PeSections"))
|
|
.ValidateDataAnnotations()
|
|
.ValidateOnStart();
|
|
|
|
return services;
|
|
}
|
|
}
|
|
```
|
|
|
|
### Step 4: Add Tests
|
|
|
|
```csharp
|
|
// src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Native.Tests/PeSectionHashExtractorTests.cs
|
|
|
|
namespace StellaOps.Scanner.Analyzers.Native.Tests;
|
|
|
|
public class PeSectionHashExtractorTests
|
|
{
|
|
private readonly PeSectionHashExtractor _extractor;
|
|
private readonly FakeTimeProvider _timeProvider;
|
|
|
|
public PeSectionHashExtractorTests()
|
|
{
|
|
_timeProvider = new FakeTimeProvider(new DateTimeOffset(2026, 1, 13, 12, 0, 0, TimeSpan.Zero));
|
|
_extractor = new PeSectionHashExtractor(
|
|
_timeProvider,
|
|
NullLogger<PeSectionHashExtractor>.Instance);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_ValidPe_ReturnsAllSections()
|
|
{
|
|
// Arrange
|
|
var pePath = "TestData/sample.exe";
|
|
|
|
// Act
|
|
var result = await _extractor.ExtractAsync(pePath);
|
|
|
|
// Assert
|
|
Assert.NotNull(result);
|
|
Assert.Contains(".text", result.Sections.Keys);
|
|
Assert.Contains(".rdata", result.Sections.Keys);
|
|
Assert.NotEmpty(result.FileHash);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_NotPeFile_ReturnsNull()
|
|
{
|
|
// Arrange
|
|
var elfPath = "TestData/sample.elf";
|
|
|
|
// Act
|
|
var result = await _extractor.ExtractAsync(elfPath);
|
|
|
|
// Assert
|
|
Assert.Null(result);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_Deterministic_SameOutput()
|
|
{
|
|
// Arrange
|
|
var pePath = "TestData/sample.exe";
|
|
|
|
// Act
|
|
var result1 = await _extractor.ExtractAsync(pePath);
|
|
var result2 = await _extractor.ExtractAsync(pePath);
|
|
|
|
// Assert
|
|
Assert.Equal(result1!.FileHash, result2!.FileHash);
|
|
Assert.Equal(result1.Sections[".text"].Sha256, result2.Sections[".text"].Sha256);
|
|
}
|
|
}
|
|
```
|
|
|
|
## Adding Custom Section Analysis
|
|
|
|
### Custom Hash Algorithm
|
|
|
|
```csharp
|
|
public interface IHashAlgorithmProvider
|
|
{
|
|
string Name { get; }
|
|
string ComputeHash(ReadOnlySpan<byte> data);
|
|
}
|
|
|
|
public sealed class Blake3HashProvider : IHashAlgorithmProvider
|
|
{
|
|
public string Name => "blake3";
|
|
|
|
public string ComputeHash(ReadOnlySpan<byte> data)
|
|
{
|
|
// Using Blake3 library
|
|
var hash = Blake3.Hasher.Hash(data);
|
|
return Convert.ToHexString(hash.AsSpan()).ToLowerInvariant();
|
|
}
|
|
}
|
|
```
|
|
|
|
### Custom Verdict Classifier
|
|
|
|
```csharp
|
|
public sealed class EnhancedVerdictClassifier : IVerdictClassifier
|
|
{
|
|
private readonly ISymbolAnalyzer _symbolAnalyzer;
|
|
|
|
public Verdict Classify(SectionHashSet? baseHashes, SectionHashSet? targetHashes)
|
|
{
|
|
if (baseHashes == null || targetHashes == null)
|
|
return Verdict.Unknown;
|
|
|
|
// Check .text section change
|
|
var textChanged = HasSectionChanged(baseHashes, targetHashes, ".text");
|
|
var symbolsChanged = HasSectionChanged(baseHashes, targetHashes, ".symtab");
|
|
|
|
// Custom logic: if .text changed but symbols are similar, likely a patch
|
|
if (textChanged && !symbolsChanged)
|
|
{
|
|
return Verdict.Patched;
|
|
}
|
|
|
|
// If everything changed significantly, it's a vanilla update
|
|
if (textChanged && symbolsChanged)
|
|
{
|
|
return Verdict.Vanilla;
|
|
}
|
|
|
|
return Verdict.Unknown;
|
|
}
|
|
|
|
public double ComputeConfidence(SectionHashSet? baseHashes, SectionHashSet? targetHashes)
|
|
{
|
|
if (baseHashes == null || targetHashes == null)
|
|
return 0.0;
|
|
|
|
// Compute similarity score
|
|
var matchingSections = 0;
|
|
var totalSections = 0;
|
|
|
|
foreach (var (name, baseInfo) in baseHashes.Sections)
|
|
{
|
|
totalSections++;
|
|
if (targetHashes.Sections.TryGetValue(name, out var targetInfo))
|
|
{
|
|
if (baseInfo.Sha256 == targetInfo.Sha256)
|
|
matchingSections++;
|
|
}
|
|
}
|
|
|
|
if (totalSections == 0)
|
|
return 0.0;
|
|
|
|
// Higher similarity = higher confidence in classification
|
|
return Math.Round((double)matchingSections / totalSections, 4, MidpointRounding.ToZero);
|
|
}
|
|
|
|
private static bool HasSectionChanged(SectionHashSet baseHashes, SectionHashSet targetHashes, string section)
|
|
{
|
|
if (!baseHashes.Sections.TryGetValue(section, out var baseInfo))
|
|
return false;
|
|
if (!targetHashes.Sections.TryGetValue(section, out var targetInfo))
|
|
return true;
|
|
|
|
return baseInfo.Sha256 != targetInfo.Sha256;
|
|
}
|
|
}
|
|
```
|
|
|
|
## Best Practices
|
|
|
|
### 1. Determinism
|
|
|
|
Always ensure deterministic output:
|
|
|
|
```csharp
|
|
// BAD - Non-deterministic
|
|
public SectionHashSet Extract(string path)
|
|
{
|
|
return new SectionHashSet
|
|
{
|
|
ExtractedAt = DateTimeOffset.UtcNow, // Non-deterministic!
|
|
// ...
|
|
};
|
|
}
|
|
|
|
// GOOD - Injected time provider
|
|
public SectionHashSet Extract(string path)
|
|
{
|
|
return new SectionHashSet
|
|
{
|
|
ExtractedAt = _timeProvider.GetUtcNow(), // Deterministic
|
|
// ...
|
|
};
|
|
}
|
|
```
|
|
|
|
### 2. Error Handling
|
|
|
|
Handle malformed binaries gracefully:
|
|
|
|
```csharp
|
|
public async Task<SectionHashSet?> ExtractAsync(string path, CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
// ... extraction logic
|
|
}
|
|
catch (BadImageFormatException ex)
|
|
{
|
|
_logger.LogDebug(ex, "Invalid binary format: {Path}", path);
|
|
return null; // Return null, don't throw
|
|
}
|
|
catch (IOException ex)
|
|
{
|
|
_logger.LogWarning(ex, "I/O error reading: {Path}", path);
|
|
return null;
|
|
}
|
|
}
|
|
```
|
|
|
|
### 3. Memory Management
|
|
|
|
Stream large binaries instead of loading entirely:
|
|
|
|
```csharp
|
|
public async Task<SectionHashSet?> ExtractLargeBinaryAsync(
|
|
string path,
|
|
CancellationToken ct)
|
|
{
|
|
await using var stream = new FileStream(
|
|
path,
|
|
FileMode.Open,
|
|
FileAccess.Read,
|
|
FileShare.Read,
|
|
bufferSize: 81920,
|
|
useAsync: true);
|
|
|
|
// Stream section data instead of loading all at once
|
|
// ...
|
|
}
|
|
```
|
|
|
|
### 4. Configuration Validation
|
|
|
|
Validate configuration at startup:
|
|
|
|
```csharp
|
|
public sealed class PeSectionConfigValidator : IValidateOptions<PeSectionConfig>
|
|
{
|
|
public ValidateOptionsResult Validate(string? name, PeSectionConfig options)
|
|
{
|
|
if (options.Sections.Length == 0)
|
|
return ValidateOptionsResult.Fail("At least one section must be specified");
|
|
|
|
if (options.MaxSectionSize <= 0)
|
|
return ValidateOptionsResult.Fail("MaxSectionSize must be positive");
|
|
|
|
return ValidateOptionsResult.Success;
|
|
}
|
|
}
|
|
```
|
|
|
|
## Testing Guidelines
|
|
|
|
### Golden File Tests
|
|
|
|
```csharp
|
|
[Fact]
|
|
public async Task Extract_KnownBinary_MatchesGolden()
|
|
{
|
|
// Arrange
|
|
var binaryPath = "TestData/known-binary.exe";
|
|
var goldenPath = "TestData/known-binary.golden.json";
|
|
|
|
// Act
|
|
var result = await _extractor.ExtractAsync(binaryPath);
|
|
|
|
// Assert
|
|
var expected = JsonSerializer.Deserialize<SectionHashSet>(
|
|
await File.ReadAllTextAsync(goldenPath));
|
|
|
|
Assert.Equal(expected!.FileHash, result!.FileHash);
|
|
Assert.Equal(expected.Sections.Count, result.Sections.Count);
|
|
}
|
|
```
|
|
|
|
### Fuzz Testing
|
|
|
|
```csharp
|
|
[Theory]
|
|
[MemberData(nameof(MalformedBinaries))]
|
|
public async Task Extract_MalformedBinary_ReturnsNullOrThrows(byte[] malformedData)
|
|
{
|
|
// Act & Assert - Should not crash
|
|
var result = await _extractor.ExtractFromBytesAsync(
|
|
malformedData,
|
|
"test.bin");
|
|
|
|
// Either null or valid result, never exception
|
|
// (Exception would fail the test)
|
|
}
|
|
```
|
|
|
|
## References
|
|
|
|
- [PE Format Specification](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format)
|
|
- [Mach-O Format Reference](https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/MachORuntime/)
|
|
- [ELF Specification](https://refspecs.linuxfoundation.org/elf/elf.pdf)
|
|
- [Binary Diff Attestation Architecture](../modules/scanner/binary-diff-attestation.md)
|