notify doctors work, audit work, new product advisory sprints
This commit is contained in:
569
docs/dev/extending-binary-analysis.md
Normal file
569
docs/dev/extending-binary-analysis.md
Normal file
@@ -0,0 +1,569 @@
|
||||
# Extending Binary Analysis
|
||||
|
||||
This guide explains how to add support for new binary formats or custom section extractors to the binary diff attestation system.
|
||||
|
||||
## Overview
|
||||
|
||||
The binary analysis system is designed for extensibility. You can add support for:
|
||||
|
||||
- **New binary formats** (PE, Mach-O, WebAssembly)
|
||||
- **Custom section extractors** (additional ELF sections, custom hash algorithms)
|
||||
- **Verdict classifiers** (custom backport detection logic)
|
||||
|
||||
## Architecture
|
||||
|
||||
### Core Interfaces
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Binary Analysis Pipeline │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ IBinaryFormatDetector ──▶ ISectionHashExtractor<TConfig> │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ BinaryFormat enum SectionHashSet │
|
||||
│ (elf, pe, macho) (per-format) │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ IVerdictClassifier │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ BinaryDiffFinding │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Key Interfaces
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Detects binary format from file magic/headers.
|
||||
/// </summary>
|
||||
public interface IBinaryFormatDetector
|
||||
{
|
||||
BinaryFormat Detect(ReadOnlySpan<byte> header);
|
||||
BinaryFormat DetectFromPath(string filePath);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts section hashes for a specific binary format.
|
||||
/// </summary>
|
||||
public interface ISectionHashExtractor<TConfig> where TConfig : class
|
||||
{
|
||||
BinaryFormat SupportedFormat { get; }
|
||||
|
||||
Task<SectionHashSet?> ExtractAsync(
|
||||
string filePath,
|
||||
TConfig? config = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<SectionHashSet?> ExtractFromBytesAsync(
|
||||
ReadOnlyMemory<byte> bytes,
|
||||
string virtualPath,
|
||||
TConfig? config = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Classifies binary changes as patched/vanilla/unknown.
|
||||
/// </summary>
|
||||
public interface IVerdictClassifier
|
||||
{
|
||||
Verdict Classify(SectionHashSet? baseHashes, SectionHashSet? targetHashes);
|
||||
double ComputeConfidence(SectionHashSet? baseHashes, SectionHashSet? targetHashes);
|
||||
}
|
||||
```
|
||||
|
||||
## Adding a New Binary Format
|
||||
|
||||
### Step 1: Define Configuration
|
||||
|
||||
```csharp
|
||||
// src/Scanner/__Libraries/StellaOps.Scanner.Contracts/PeSectionConfig.cs
|
||||
|
||||
namespace StellaOps.Scanner.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for PE section hash extraction.
|
||||
/// </summary>
|
||||
public sealed record PeSectionConfig
|
||||
{
|
||||
/// <summary>Sections to extract hashes from.</summary>
|
||||
public ImmutableArray<string> Sections { get; init; } = [".text", ".rdata", ".data", ".rsrc"];
|
||||
|
||||
/// <summary>Hash algorithms to use.</summary>
|
||||
public ImmutableArray<string> HashAlgorithms { get; init; } = ["sha256"];
|
||||
|
||||
/// <summary>Maximum section size to process (bytes).</summary>
|
||||
public long MaxSectionSize { get; init; } = 100 * 1024 * 1024; // 100MB
|
||||
|
||||
/// <summary>Whether to extract version resources.</summary>
|
||||
public bool ExtractVersionInfo { get; init; } = true;
|
||||
}
|
||||
```
|
||||
|
||||
### Step 2: Implement the Extractor
|
||||
|
||||
```csharp
|
||||
// src/Scanner/StellaOps.Scanner.Analyzers.Native/Hardening/PeSectionHashExtractor.cs
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Native;
|
||||
|
||||
public sealed class PeSectionHashExtractor : ISectionHashExtractor<PeSectionConfig>
|
||||
{
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<PeSectionHashExtractor> _logger;
|
||||
|
||||
public PeSectionHashExtractor(
|
||||
TimeProvider timeProvider,
|
||||
ILogger<PeSectionHashExtractor> logger)
|
||||
{
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public BinaryFormat SupportedFormat => BinaryFormat.Pe;
|
||||
|
||||
public async Task<SectionHashSet?> ExtractAsync(
|
||||
string filePath,
|
||||
PeSectionConfig? config = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
config ??= new PeSectionConfig();
|
||||
|
||||
// Read file
|
||||
var bytes = await File.ReadAllBytesAsync(filePath, cancellationToken);
|
||||
return await ExtractFromBytesAsync(bytes, filePath, config, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<SectionHashSet?> ExtractFromBytesAsync(
|
||||
ReadOnlyMemory<byte> bytes,
|
||||
string virtualPath,
|
||||
PeSectionConfig? config = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
config ??= new PeSectionConfig();
|
||||
|
||||
// Validate PE magic
|
||||
if (!IsPeFile(bytes.Span))
|
||||
{
|
||||
_logger.LogDebug("Not a PE file: {Path}", virtualPath);
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var sections = new Dictionary<string, SectionInfo>();
|
||||
|
||||
// Parse PE headers
|
||||
using var peReader = new PEReader(new MemoryStream(bytes.ToArray()));
|
||||
|
||||
foreach (var sectionHeader in peReader.PEHeaders.SectionHeaders)
|
||||
{
|
||||
var sectionName = sectionHeader.Name;
|
||||
|
||||
if (!config.Sections.Contains(sectionName))
|
||||
continue;
|
||||
|
||||
if (sectionHeader.SizeOfRawData > config.MaxSectionSize)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Section {Section} exceeds max size ({Size} > {Max})",
|
||||
sectionName, sectionHeader.SizeOfRawData, config.MaxSectionSize);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get section data
|
||||
var sectionData = peReader.GetSectionData(sectionName);
|
||||
if (sectionData.Length == 0)
|
||||
continue;
|
||||
|
||||
// Compute hash
|
||||
var sha256 = ComputeSha256(sectionData.GetContent());
|
||||
|
||||
sections[sectionName] = new SectionInfo
|
||||
{
|
||||
Sha256 = sha256,
|
||||
Size = sectionData.Length,
|
||||
Offset = sectionHeader.PointerToRawData
|
||||
};
|
||||
}
|
||||
|
||||
// Compute file hash
|
||||
var fileHash = ComputeSha256(bytes.Span);
|
||||
|
||||
return new SectionHashSet
|
||||
{
|
||||
FilePath = virtualPath,
|
||||
FileHash = fileHash,
|
||||
Sections = sections.ToImmutableDictionary(),
|
||||
ExtractedAt = _timeProvider.GetUtcNow(),
|
||||
ExtractorVersion = GetType().Assembly.GetName().Version?.ToString() ?? "1.0.0"
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to extract PE sections from {Path}", virtualPath);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsPeFile(ReadOnlySpan<byte> bytes)
|
||||
{
|
||||
// Check DOS header magic (MZ)
|
||||
if (bytes.Length < 64)
|
||||
return false;
|
||||
|
||||
return bytes[0] == 0x4D && bytes[1] == 0x5A; // "MZ"
|
||||
}
|
||||
|
||||
private static string ComputeSha256(ReadOnlySpan<byte> data)
|
||||
{
|
||||
Span<byte> hash = stackalloc byte[32];
|
||||
SHA256.HashData(data, hash);
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Step 3: Register Services
|
||||
|
||||
```csharp
|
||||
// src/Scanner/StellaOps.Scanner.Analyzers.Native/ServiceCollectionExtensions.cs
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddNativeAnalyzers(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration)
|
||||
{
|
||||
// Existing ELF extractor
|
||||
services.AddSingleton<IElfSectionHashExtractor, ElfSectionHashExtractor>();
|
||||
|
||||
// New PE extractor
|
||||
services.AddSingleton<ISectionHashExtractor<PeSectionConfig>, PeSectionHashExtractor>();
|
||||
|
||||
// Register in composite
|
||||
services.AddSingleton<IBinaryFormatDetector, CompositeBinaryFormatDetector>();
|
||||
services.AddSingleton<ICompositeSectionHashExtractor>(sp =>
|
||||
{
|
||||
var extractors = new Dictionary<BinaryFormat, object>
|
||||
{
|
||||
[BinaryFormat.Elf] = sp.GetRequiredService<IElfSectionHashExtractor>(),
|
||||
[BinaryFormat.Pe] = sp.GetRequiredService<ISectionHashExtractor<PeSectionConfig>>()
|
||||
};
|
||||
return new CompositeSectionHashExtractor(extractors);
|
||||
});
|
||||
|
||||
// Configuration
|
||||
services.AddOptions<PeSectionConfig>()
|
||||
.Bind(configuration.GetSection("Scanner:Native:PeSections"))
|
||||
.ValidateDataAnnotations()
|
||||
.ValidateOnStart();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Step 4: Add Tests
|
||||
|
||||
```csharp
|
||||
// src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Native.Tests/PeSectionHashExtractorTests.cs
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Native.Tests;
|
||||
|
||||
public class PeSectionHashExtractorTests
|
||||
{
|
||||
private readonly PeSectionHashExtractor _extractor;
|
||||
private readonly FakeTimeProvider _timeProvider;
|
||||
|
||||
public PeSectionHashExtractorTests()
|
||||
{
|
||||
_timeProvider = new FakeTimeProvider(new DateTimeOffset(2026, 1, 13, 12, 0, 0, TimeSpan.Zero));
|
||||
_extractor = new PeSectionHashExtractor(
|
||||
_timeProvider,
|
||||
NullLogger<PeSectionHashExtractor>.Instance);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_ValidPe_ReturnsAllSections()
|
||||
{
|
||||
// Arrange
|
||||
var pePath = "TestData/sample.exe";
|
||||
|
||||
// Act
|
||||
var result = await _extractor.ExtractAsync(pePath);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(result);
|
||||
Assert.Contains(".text", result.Sections.Keys);
|
||||
Assert.Contains(".rdata", result.Sections.Keys);
|
||||
Assert.NotEmpty(result.FileHash);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_NotPeFile_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
var elfPath = "TestData/sample.elf";
|
||||
|
||||
// Act
|
||||
var result = await _extractor.ExtractAsync(elfPath);
|
||||
|
||||
// Assert
|
||||
Assert.Null(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_Deterministic_SameOutput()
|
||||
{
|
||||
// Arrange
|
||||
var pePath = "TestData/sample.exe";
|
||||
|
||||
// Act
|
||||
var result1 = await _extractor.ExtractAsync(pePath);
|
||||
var result2 = await _extractor.ExtractAsync(pePath);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(result1!.FileHash, result2!.FileHash);
|
||||
Assert.Equal(result1.Sections[".text"].Sha256, result2.Sections[".text"].Sha256);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Adding Custom Section Analysis
|
||||
|
||||
### Custom Hash Algorithm
|
||||
|
||||
```csharp
|
||||
public interface IHashAlgorithmProvider
|
||||
{
|
||||
string Name { get; }
|
||||
string ComputeHash(ReadOnlySpan<byte> data);
|
||||
}
|
||||
|
||||
public sealed class Blake3HashProvider : IHashAlgorithmProvider
|
||||
{
|
||||
public string Name => "blake3";
|
||||
|
||||
public string ComputeHash(ReadOnlySpan<byte> data)
|
||||
{
|
||||
// Using Blake3 library
|
||||
var hash = Blake3.Hasher.Hash(data);
|
||||
return Convert.ToHexString(hash.AsSpan()).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Custom Verdict Classifier
|
||||
|
||||
```csharp
|
||||
public sealed class EnhancedVerdictClassifier : IVerdictClassifier
|
||||
{
|
||||
private readonly ISymbolAnalyzer _symbolAnalyzer;
|
||||
|
||||
public Verdict Classify(SectionHashSet? baseHashes, SectionHashSet? targetHashes)
|
||||
{
|
||||
if (baseHashes == null || targetHashes == null)
|
||||
return Verdict.Unknown;
|
||||
|
||||
// Check .text section change
|
||||
var textChanged = HasSectionChanged(baseHashes, targetHashes, ".text");
|
||||
var symbolsChanged = HasSectionChanged(baseHashes, targetHashes, ".symtab");
|
||||
|
||||
// Custom logic: if .text changed but symbols are similar, likely a patch
|
||||
if (textChanged && !symbolsChanged)
|
||||
{
|
||||
return Verdict.Patched;
|
||||
}
|
||||
|
||||
// If everything changed significantly, it's a vanilla update
|
||||
if (textChanged && symbolsChanged)
|
||||
{
|
||||
return Verdict.Vanilla;
|
||||
}
|
||||
|
||||
return Verdict.Unknown;
|
||||
}
|
||||
|
||||
public double ComputeConfidence(SectionHashSet? baseHashes, SectionHashSet? targetHashes)
|
||||
{
|
||||
if (baseHashes == null || targetHashes == null)
|
||||
return 0.0;
|
||||
|
||||
// Compute similarity score
|
||||
var matchingSections = 0;
|
||||
var totalSections = 0;
|
||||
|
||||
foreach (var (name, baseInfo) in baseHashes.Sections)
|
||||
{
|
||||
totalSections++;
|
||||
if (targetHashes.Sections.TryGetValue(name, out var targetInfo))
|
||||
{
|
||||
if (baseInfo.Sha256 == targetInfo.Sha256)
|
||||
matchingSections++;
|
||||
}
|
||||
}
|
||||
|
||||
if (totalSections == 0)
|
||||
return 0.0;
|
||||
|
||||
// Higher similarity = higher confidence in classification
|
||||
return Math.Round((double)matchingSections / totalSections, 4, MidpointRounding.ToZero);
|
||||
}
|
||||
|
||||
private static bool HasSectionChanged(SectionHashSet baseHashes, SectionHashSet targetHashes, string section)
|
||||
{
|
||||
if (!baseHashes.Sections.TryGetValue(section, out var baseInfo))
|
||||
return false;
|
||||
if (!targetHashes.Sections.TryGetValue(section, out var targetInfo))
|
||||
return true;
|
||||
|
||||
return baseInfo.Sha256 != targetInfo.Sha256;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Determinism
|
||||
|
||||
Always ensure deterministic output:
|
||||
|
||||
```csharp
|
||||
// BAD - Non-deterministic
|
||||
public SectionHashSet Extract(string path)
|
||||
{
|
||||
return new SectionHashSet
|
||||
{
|
||||
ExtractedAt = DateTimeOffset.UtcNow, // Non-deterministic!
|
||||
// ...
|
||||
};
|
||||
}
|
||||
|
||||
// GOOD - Injected time provider
|
||||
public SectionHashSet Extract(string path)
|
||||
{
|
||||
return new SectionHashSet
|
||||
{
|
||||
ExtractedAt = _timeProvider.GetUtcNow(), // Deterministic
|
||||
// ...
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Error Handling
|
||||
|
||||
Handle malformed binaries gracefully:
|
||||
|
||||
```csharp
|
||||
public async Task<SectionHashSet?> ExtractAsync(string path, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
// ... extraction logic
|
||||
}
|
||||
catch (BadImageFormatException ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Invalid binary format: {Path}", path);
|
||||
return null; // Return null, don't throw
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "I/O error reading: {Path}", path);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Memory Management
|
||||
|
||||
Stream large binaries instead of loading entirely:
|
||||
|
||||
```csharp
|
||||
public async Task<SectionHashSet?> ExtractLargeBinaryAsync(
|
||||
string path,
|
||||
CancellationToken ct)
|
||||
{
|
||||
await using var stream = new FileStream(
|
||||
path,
|
||||
FileMode.Open,
|
||||
FileAccess.Read,
|
||||
FileShare.Read,
|
||||
bufferSize: 81920,
|
||||
useAsync: true);
|
||||
|
||||
// Stream section data instead of loading all at once
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Configuration Validation
|
||||
|
||||
Validate configuration at startup:
|
||||
|
||||
```csharp
|
||||
public sealed class PeSectionConfigValidator : IValidateOptions<PeSectionConfig>
|
||||
{
|
||||
public ValidateOptionsResult Validate(string? name, PeSectionConfig options)
|
||||
{
|
||||
if (options.Sections.Length == 0)
|
||||
return ValidateOptionsResult.Fail("At least one section must be specified");
|
||||
|
||||
if (options.MaxSectionSize <= 0)
|
||||
return ValidateOptionsResult.Fail("MaxSectionSize must be positive");
|
||||
|
||||
return ValidateOptionsResult.Success;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Testing Guidelines
|
||||
|
||||
### Golden File Tests
|
||||
|
||||
```csharp
|
||||
[Fact]
|
||||
public async Task Extract_KnownBinary_MatchesGolden()
|
||||
{
|
||||
// Arrange
|
||||
var binaryPath = "TestData/known-binary.exe";
|
||||
var goldenPath = "TestData/known-binary.golden.json";
|
||||
|
||||
// Act
|
||||
var result = await _extractor.ExtractAsync(binaryPath);
|
||||
|
||||
// Assert
|
||||
var expected = JsonSerializer.Deserialize<SectionHashSet>(
|
||||
await File.ReadAllTextAsync(goldenPath));
|
||||
|
||||
Assert.Equal(expected!.FileHash, result!.FileHash);
|
||||
Assert.Equal(expected.Sections.Count, result.Sections.Count);
|
||||
}
|
||||
```
|
||||
|
||||
### Fuzz Testing
|
||||
|
||||
```csharp
|
||||
[Theory]
|
||||
[MemberData(nameof(MalformedBinaries))]
|
||||
public async Task Extract_MalformedBinary_ReturnsNullOrThrows(byte[] malformedData)
|
||||
{
|
||||
// Act & Assert - Should not crash
|
||||
var result = await _extractor.ExtractFromBytesAsync(
|
||||
malformedData,
|
||||
"test.bin");
|
||||
|
||||
// Either null or valid result, never exception
|
||||
// (Exception would fail the test)
|
||||
}
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [PE Format Specification](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format)
|
||||
- [Mach-O Format Reference](https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/MachORuntime/)
|
||||
- [ELF Specification](https://refspecs.linuxfoundation.org/elf/elf.pdf)
|
||||
- [Binary Diff Attestation Architecture](../modules/scanner/binary-diff-attestation.md)
|
||||
Reference in New Issue
Block a user