Refactor code structure for improved readability and maintainability; optimize performance in key functions.

This commit is contained in:
master
2025-12-22 19:06:31 +02:00
parent dfaa2079aa
commit 4602ccc3a3
1444 changed files with 109919 additions and 8058 deletions

View File

@@ -0,0 +1,35 @@
# AGENTS - Scanner CallGraph Library
## Mission
Provide deterministic call graph extraction for supported languages and native binaries, producing stable node/edge outputs for reachability analysis.
## Roles
- Backend/analyzer engineer (.NET 10, C# preview).
- QA engineer (unit + deterministic fixtures).
## Required Reading
- `docs/README.md`
- `docs/07_HIGH_LEVEL_ARCHITECTURE.md`
- `docs/modules/platform/architecture-overview.md`
- `docs/modules/scanner/architecture.md`
- `docs/reachability/DELIVERY_GUIDE.md`
- `docs/reachability/binary-reachability-schema.md`
## Working Directory & Boundaries
- Primary scope: `src/Scanner/__Libraries/StellaOps.Scanner.CallGraph/`
- Tests: `src/Scanner/__Tests/StellaOps.Scanner.CallGraph.Tests/`
- Avoid cross-module edits unless the sprint explicitly calls them out.
## Determinism & Offline Rules
- Stable ordering for nodes/edges; avoid wall-clock timestamps in outputs.
- No network access or external binaries at runtime.
- Normalize paths and symbol names consistently.
## Testing Expectations
- Add/extend unit tests for new extractors and edge kinds.
- Use deterministic fixtures/golden outputs; document inputs in test comments when needed.
- Run `dotnet test src/Scanner/StellaOps.Scanner.sln` when feasible.
## Workflow
- Update sprint status on start/finish (`TODO -> DOING -> DONE/BLOCKED`).
- Record notable decisions in the sprint Execution Log.

View File

@@ -0,0 +1,128 @@
using System.Collections.Immutable;
using StellaOps.Scanner.CallGraph;
using StellaOps.Scanner.CallGraph.Binary;
namespace StellaOps.Scanner.CallGraph.Binary.Analysis;
internal sealed class BinaryDynamicLoadDetector
{
private static readonly string[] LoaderSymbols =
[
"dlopen",
"dlsym",
"dlmopen",
"LoadLibraryA",
"LoadLibraryW",
"LoadLibraryExA",
"LoadLibraryExW",
"GetProcAddress"
];
private readonly BinaryStringLiteralScanner _stringScanner;
public BinaryDynamicLoadDetector(BinaryStringLiteralScanner? stringScanner = null)
{
_stringScanner = stringScanner ?? new BinaryStringLiteralScanner();
}
public async Task<ImmutableArray<CallGraphEdge>> ExtractAsync(
string path,
BinaryFormat format,
string binaryName,
IReadOnlyCollection<CallGraphEdge> directEdges,
IReadOnlyCollection<BinaryRelocation> relocations,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
var loaderNames = new HashSet<string>(LoaderSymbols, StringComparer.OrdinalIgnoreCase);
var loaderSources = new HashSet<string>(StringComparer.Ordinal);
var loaderTargets = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var edge in directEdges)
{
if (TryGetSymbol(edge.TargetId, out var targetSymbol)
&& loaderNames.Contains(targetSymbol))
{
loaderSources.Add(edge.SourceId);
loaderTargets.Add(targetSymbol);
}
}
foreach (var reloc in relocations)
{
if (string.IsNullOrWhiteSpace(reloc.TargetSymbol))
{
continue;
}
if (loaderNames.Contains(reloc.TargetSymbol))
{
loaderTargets.Add(reloc.TargetSymbol);
}
}
if (loaderSources.Count == 0 && loaderTargets.Count == 0)
{
return ImmutableArray<CallGraphEdge>.Empty;
}
if (loaderSources.Count == 0)
{
foreach (var target in loaderTargets)
{
loaderSources.Add($"native:{binaryName}/{target}");
}
}
var candidates = await _stringScanner.ExtractLibraryCandidatesAsync(path, format, ct);
if (candidates.IsDefaultOrEmpty)
{
return ImmutableArray<CallGraphEdge>.Empty;
}
var orderedSources = loaderSources.OrderBy(value => value, StringComparer.Ordinal).ToArray();
var orderedCandidates = candidates.OrderBy(value => value, StringComparer.Ordinal).ToArray();
var edges = ImmutableArray.CreateBuilder<CallGraphEdge>(orderedSources.Length * orderedCandidates.Length);
foreach (var source in orderedSources)
{
foreach (var candidate in orderedCandidates)
{
var targetId = $"native:external/{candidate}";
edges.Add(new CallGraphEdge(
SourceId: source,
TargetId: targetId,
CallKind: CallKind.Dynamic,
CallSite: $"string:{candidate}"));
}
}
return edges.ToImmutable();
}
private static bool TryGetSymbol(string nodeId, out string symbol)
{
symbol = string.Empty;
if (string.IsNullOrWhiteSpace(nodeId))
{
return false;
}
const string prefix = "native:";
if (!nodeId.StartsWith(prefix, StringComparison.Ordinal))
{
return false;
}
var remainder = nodeId.Substring(prefix.Length);
var slashIndex = remainder.IndexOf('/');
if (slashIndex < 0 || slashIndex == remainder.Length - 1)
{
return false;
}
symbol = remainder[(slashIndex + 1)..];
return !string.IsNullOrWhiteSpace(symbol);
}
}

View File

@@ -0,0 +1,464 @@
using System.Collections.Immutable;
using System.Text;
using StellaOps.Scanner.CallGraph.Binary;
namespace StellaOps.Scanner.CallGraph.Binary.Analysis;
internal sealed class BinaryStringLiteralScanner
{
private const int MinStringLength = 4;
public async Task<ImmutableArray<string>> ExtractLibraryCandidatesAsync(
string path,
BinaryFormat format,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
var sections = await ReadStringSectionsAsync(path, format, ct);
if (sections.Count == 0)
{
return ImmutableArray<string>.Empty;
}
var candidates = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var section in sections)
{
foreach (var value in ExtractStrings(section))
{
var normalized = NormalizeCandidate(value);
if (string.IsNullOrWhiteSpace(normalized))
{
continue;
}
if (IsLibraryCandidate(normalized))
{
candidates.Add(normalized);
}
}
}
return candidates
.OrderBy(value => value, StringComparer.Ordinal)
.ToImmutableArray();
}
private static IEnumerable<string> ExtractStrings(byte[] bytes)
{
if (bytes.Length == 0)
{
yield break;
}
var builder = new StringBuilder();
for (var i = 0; i < bytes.Length; i++)
{
var current = bytes[i];
if (current >= 0x20 && current <= 0x7E)
{
builder.Append((char)current);
continue;
}
if (builder.Length >= MinStringLength)
{
yield return builder.ToString();
}
builder.Clear();
}
if (builder.Length >= MinStringLength)
{
yield return builder.ToString();
}
}
private static string NormalizeCandidate(string value)
{
var trimmed = value.Trim().Trim('"', '\'');
if (trimmed.Length == 0)
{
return string.Empty;
}
return trimmed.Replace('\\', '/');
}
private static bool IsLibraryCandidate(string value)
{
var lowered = value.ToLowerInvariant();
if (lowered.EndsWith(".dll", StringComparison.Ordinal)
|| lowered.EndsWith(".dylib", StringComparison.Ordinal))
{
return true;
}
if (lowered.Contains(".so", StringComparison.Ordinal))
{
return true;
}
return false;
}
private static async Task<List<byte[]>> ReadStringSectionsAsync(
string path,
BinaryFormat format,
CancellationToken ct)
{
return format switch
{
BinaryFormat.Elf => await ReadElfStringSectionsAsync(path, ct),
BinaryFormat.Pe => await ReadPeStringSectionsAsync(path, ct),
BinaryFormat.MachO => await ReadMachOStringSectionsAsync(path, ct),
_ => []
};
}
private static async Task<List<byte[]>> ReadElfStringSectionsAsync(string path, CancellationToken ct)
{
using var stream = File.OpenRead(path);
using var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true);
var ident = reader.ReadBytes(16);
if (ident.Length < 16)
{
return [];
}
var is64Bit = ident[4] == 2;
var isLittleEndian = ident[5] == 1;
if (!isLittleEndian)
{
return [];
}
stream.Seek(is64Bit ? 40 : 32, SeekOrigin.Begin);
var sectionHeaderOffset = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
stream.Seek(is64Bit ? 58 : 46, SeekOrigin.Begin);
var sectionHeaderSize = reader.ReadUInt16();
var sectionHeaderCount = reader.ReadUInt16();
var strTabIndex = reader.ReadUInt16();
if (sectionHeaderOffset <= 0 || sectionHeaderCount == 0)
{
return [];
}
var nameTableOffset = ReadElfSectionOffset(reader, stream, sectionHeaderOffset, sectionHeaderSize, strTabIndex, is64Bit);
var nameTableSize = ReadElfSectionSize(reader, stream, sectionHeaderOffset, sectionHeaderSize, strTabIndex, is64Bit);
if (nameTableOffset <= 0 || nameTableSize <= 0)
{
return [];
}
stream.Seek(nameTableOffset, SeekOrigin.Begin);
var nameTable = reader.ReadBytes((int)nameTableSize);
var sections = new List<byte[]>();
for (int i = 0; i < sectionHeaderCount; i++)
{
ct.ThrowIfCancellationRequested();
stream.Seek(sectionHeaderOffset + i * sectionHeaderSize, SeekOrigin.Begin);
var nameIndex = reader.ReadUInt32();
reader.ReadUInt32(); // sh_type
if (is64Bit)
{
reader.ReadUInt64(); // sh_flags
reader.ReadUInt64(); // sh_addr
var offset = reader.ReadInt64();
var size = reader.ReadInt64();
if (ShouldReadSection(nameTable, nameIndex) && offset > 0 && size > 0)
{
sections.Add(ReadSection(reader, stream, offset, size));
}
}
else
{
reader.ReadUInt32(); // sh_flags
reader.ReadUInt32(); // sh_addr
var offset = reader.ReadInt32();
var size = reader.ReadInt32();
if (ShouldReadSection(nameTable, nameIndex) && offset > 0 && size > 0)
{
sections.Add(ReadSection(reader, stream, offset, size));
}
}
}
await Task.CompletedTask;
return sections;
}
private static bool ShouldReadSection(byte[] nameTable, uint nameIndex)
{
var name = ReadNullTerminatedString(nameTable, (int)nameIndex);
if (string.IsNullOrWhiteSpace(name))
{
return false;
}
return name.Contains("rodata", StringComparison.Ordinal)
|| name.Contains("rdata", StringComparison.Ordinal)
|| name.Contains("data", StringComparison.Ordinal)
|| name.Contains("cstring", StringComparison.Ordinal);
}
private static async Task<List<byte[]>> ReadPeStringSectionsAsync(string path, CancellationToken ct)
{
using var stream = File.OpenRead(path);
using var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true);
stream.Seek(0x3C, SeekOrigin.Begin);
var peOffset = reader.ReadInt32();
stream.Seek(peOffset, SeekOrigin.Begin);
var signature = reader.ReadUInt32();
if (signature != 0x00004550)
{
return [];
}
reader.ReadUInt16(); // machine
var numberOfSections = reader.ReadUInt16();
reader.ReadUInt32(); // timestamp
reader.ReadUInt32(); // symbol table ptr
reader.ReadUInt32(); // number of symbols
var optionalHeaderSize = reader.ReadUInt16();
reader.ReadUInt16(); // characteristics
if (optionalHeaderSize == 0)
{
return [];
}
stream.Seek(stream.Position + optionalHeaderSize, SeekOrigin.Begin);
var sections = new List<byte[]>();
for (int i = 0; i < numberOfSections; i++)
{
ct.ThrowIfCancellationRequested();
var nameBytes = reader.ReadBytes(8);
var name = Encoding.ASCII.GetString(nameBytes).TrimEnd('\0');
reader.ReadUInt32(); // virtual size
reader.ReadUInt32(); // virtual address
var sizeOfRawData = reader.ReadUInt32();
var pointerToRawData = reader.ReadUInt32();
reader.ReadUInt32(); // pointer to relocations
reader.ReadUInt32(); // pointer to line numbers
reader.ReadUInt16(); // number of relocations
reader.ReadUInt16(); // number of line numbers
reader.ReadUInt32(); // characteristics
if (!IsPeStringSection(name) || pointerToRawData == 0 || sizeOfRawData == 0)
{
continue;
}
sections.Add(ReadSection(reader, stream, pointerToRawData, sizeOfRawData));
}
await Task.CompletedTask;
return sections;
}
private static bool IsPeStringSection(string name)
{
return string.Equals(name, ".rdata", StringComparison.Ordinal)
|| string.Equals(name, ".data", StringComparison.Ordinal)
|| string.Equals(name, ".rodata", StringComparison.Ordinal);
}
private static async Task<List<byte[]>> ReadMachOStringSectionsAsync(string path, CancellationToken ct)
{
using var stream = File.OpenRead(path);
using var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true);
var magic = reader.ReadUInt32();
var is64Bit = magic is 0xFEEDFACF or 0xCFFAEDFE;
var isSwapped = magic is 0xCEFAEDFE or 0xCFFAEDFE;
if (isSwapped)
{
return [];
}
reader.ReadInt32(); // cputype
reader.ReadInt32(); // cpusubtype
reader.ReadUInt32(); // filetype
var ncmds = reader.ReadUInt32();
reader.ReadUInt32(); // sizeofcmds
reader.ReadUInt32(); // flags
if (is64Bit)
{
reader.ReadUInt32(); // reserved
}
var sections = new List<byte[]>();
for (int i = 0; i < ncmds; i++)
{
ct.ThrowIfCancellationRequested();
var cmdStart = stream.Position;
var cmd = reader.ReadUInt32();
var cmdsize = reader.ReadUInt32();
var isSegment = cmd == (is64Bit ? 0x19u : 0x1u);
if (!isSegment)
{
stream.Seek(cmdStart + cmdsize, SeekOrigin.Begin);
continue;
}
reader.ReadBytes(16); // segname
if (is64Bit)
{
reader.ReadUInt64(); // vmaddr
reader.ReadUInt64(); // vmsize
reader.ReadUInt64(); // fileoff
reader.ReadUInt64(); // filesize
reader.ReadInt32(); // maxprot
reader.ReadInt32(); // initprot
var nsects = reader.ReadUInt32();
reader.ReadUInt32(); // flags
for (int s = 0; s < nsects; s++)
{
var sectName = ReadFixedString(reader, 16);
reader.ReadBytes(16); // segname
reader.ReadUInt64(); // addr
var size = reader.ReadUInt64();
var offset = reader.ReadUInt32();
reader.ReadUInt32(); // align
reader.ReadUInt32(); // reloff
reader.ReadUInt32(); // nreloc
reader.ReadUInt32(); // flags
reader.ReadUInt32(); // reserved1
reader.ReadUInt32(); // reserved2
reader.ReadUInt32(); // reserved3
if (IsMachOStringSection(sectName) && offset > 0 && size > 0)
{
sections.Add(ReadSection(reader, stream, (long)offset, (long)size));
}
}
}
else
{
reader.ReadUInt32(); // vmaddr
reader.ReadUInt32(); // vmsize
reader.ReadUInt32(); // fileoff
reader.ReadUInt32(); // filesize
reader.ReadInt32(); // maxprot
reader.ReadInt32(); // initprot
var nsects = reader.ReadUInt32();
reader.ReadUInt32(); // flags
for (int s = 0; s < nsects; s++)
{
var sectName = ReadFixedString(reader, 16);
reader.ReadBytes(16); // segname
reader.ReadUInt32(); // addr
var size = reader.ReadUInt32();
var offset = reader.ReadUInt32();
reader.ReadUInt32(); // align
reader.ReadUInt32(); // reloff
reader.ReadUInt32(); // nreloc
reader.ReadUInt32(); // flags
reader.ReadUInt32(); // reserved1
reader.ReadUInt32(); // reserved2
if (IsMachOStringSection(sectName) && offset > 0 && size > 0)
{
sections.Add(ReadSection(reader, stream, (long)offset, (long)size));
}
}
}
stream.Seek(cmdStart + cmdsize, SeekOrigin.Begin);
}
await Task.CompletedTask;
return sections;
}
private static bool IsMachOStringSection(string sectName)
{
return string.Equals(sectName, "__cstring", StringComparison.Ordinal)
|| string.Equals(sectName, "__const", StringComparison.Ordinal)
|| string.Equals(sectName, "__data", StringComparison.Ordinal);
}
private static byte[] ReadSection(BinaryReader reader, Stream stream, long offset, long size)
{
if (offset < 0 || size <= 0 || offset + size > stream.Length)
{
return Array.Empty<byte>();
}
var current = stream.Position;
stream.Seek(offset, SeekOrigin.Begin);
var bytes = reader.ReadBytes((int)size);
stream.Seek(current, SeekOrigin.Begin);
return bytes;
}
private static byte[] ReadSection(BinaryReader reader, Stream stream, uint offset, uint size)
=> ReadSection(reader, stream, (long)offset, (long)size);
private static long ReadElfSectionOffset(BinaryReader reader, Stream stream, long sectionHeaderOffset, ushort entrySize, ushort index, bool is64Bit)
{
var position = sectionHeaderOffset + index * entrySize;
return ReadElfSectionOffset(reader, stream, position, is64Bit);
}
private static long ReadElfSectionOffset(BinaryReader reader, Stream stream, long position, bool is64Bit)
{
stream.Seek(position + (is64Bit ? 24 : 16), SeekOrigin.Begin);
return is64Bit ? reader.ReadInt64() : reader.ReadInt32();
}
private static long ReadElfSectionSize(BinaryReader reader, Stream stream, long sectionHeaderOffset, ushort entrySize, ushort index, bool is64Bit)
{
var position = sectionHeaderOffset + index * entrySize;
return ReadElfSectionSize(reader, stream, position, is64Bit);
}
private static long ReadElfSectionSize(BinaryReader reader, Stream stream, long position, bool is64Bit)
{
stream.Seek(position + (is64Bit ? 32 : 20), SeekOrigin.Begin);
return is64Bit ? reader.ReadInt64() : reader.ReadInt32();
}
private static string ReadFixedString(BinaryReader reader, int length)
{
var bytes = reader.ReadBytes(length);
var nullIndex = Array.IndexOf(bytes, (byte)0);
var count = nullIndex >= 0 ? nullIndex : bytes.Length;
return Encoding.ASCII.GetString(bytes, 0, count);
}
private static string ReadNullTerminatedString(byte[] buffer, int offset)
{
if (offset < 0 || offset >= buffer.Length)
{
return string.Empty;
}
var end = offset;
while (end < buffer.Length && buffer[end] != 0)
{
end++;
}
return Encoding.UTF8.GetString(buffer, offset, end - offset);
}
}

View File

@@ -6,6 +6,8 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.CallGraph.Binary.Analysis;
using StellaOps.Scanner.CallGraph.Binary.Disassembly;
using StellaOps.Scanner.Reachability;
namespace StellaOps.Scanner.CallGraph.Binary;
@@ -19,6 +21,8 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
private readonly ILogger<BinaryCallGraphExtractor> _logger;
private readonly TimeProvider _timeProvider;
private readonly BinaryEntrypointClassifier _entrypointClassifier;
private readonly DirectCallExtractor _directCallExtractor;
private readonly BinaryDynamicLoadDetector _dynamicLoadDetector;
public BinaryCallGraphExtractor(
ILogger<BinaryCallGraphExtractor> logger,
@@ -27,6 +31,8 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
_entrypointClassifier = new BinaryEntrypointClassifier();
_directCallExtractor = new DirectCallExtractor();
_dynamicLoadDetector = new BinaryDynamicLoadDetector();
}
/// <inheritdoc />
@@ -70,7 +76,18 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
_ => []
};
return BuildSnapshot(request.ScanId, targetPath, symbols, relocations);
var directEdges = await ExtractDirectCallEdgesAsync(targetPath, format, symbols, cancellationToken);
var dynamicEdges = await _dynamicLoadDetector.ExtractAsync(
targetPath,
format,
Path.GetFileName(targetPath),
directEdges,
relocations,
cancellationToken);
var extraEdges = directEdges.Concat(dynamicEdges).ToArray();
return BuildSnapshot(request.ScanId, targetPath, symbols, relocations, extraEdges);
}
private async Task<BinaryFormat> DetectBinaryFormatAsync(string path, CancellationToken ct)
@@ -107,6 +124,31 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
throw new NotSupportedException($"Unknown binary format: {path}");
}
private async Task<IReadOnlyCollection<CallGraphEdge>> ExtractDirectCallEdgesAsync(
string path,
BinaryFormat format,
List<BinarySymbol> symbols,
CancellationToken ct)
{
var textSection = await BinaryTextSectionReader.TryReadAsync(path, format, ct);
if (textSection is null)
{
return Array.Empty<CallGraphEdge>();
}
if (textSection.Architecture == BinaryArchitecture.Unknown)
{
_logger.LogDebug("Skipping disassembly; unknown architecture for {Path}", path);
return Array.Empty<CallGraphEdge>();
}
var binaryName = Path.GetFileName(path);
var edges = _directCallExtractor.Extract(textSection, symbols, binaryName);
_logger.LogDebug("Extracted {Count} direct call edges from .text", edges.Length);
return edges;
}
private async Task<List<BinarySymbol>> ExtractElfSymbolsAsync(string path, CancellationToken ct)
{
var symbols = new List<BinarySymbol>();
@@ -255,6 +297,7 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
reader.ReadUInt16(); // characteristics
var is64Bit = machine == 0x8664; // AMD64
var sectionBases = new ulong[numberOfSections + 1];
// Read optional header to get export directory
if (optionalHeaderSize > 0)
@@ -271,6 +314,28 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
// For now, just log that exports exist
_logger.LogDebug("PE has export directory at RVA 0x{Rva:X}", exportRva);
}
var sectionHeadersStart = optionalHeaderStart + optionalHeaderSize;
var currentPos = stream.Position;
stream.Seek(sectionHeadersStart, SeekOrigin.Begin);
for (int i = 0; i < numberOfSections; i++)
{
reader.ReadBytes(8); // name
reader.ReadUInt32(); // virtual size
var virtualAddress = reader.ReadUInt32();
reader.ReadUInt32(); // size of raw data
reader.ReadUInt32(); // pointer to raw data
reader.ReadUInt32(); // pointer to relocations
reader.ReadUInt32(); // pointer to line numbers
reader.ReadUInt16(); // number of relocations
reader.ReadUInt16(); // number of line numbers
reader.ReadUInt32(); // characteristics
sectionBases[i + 1] = virtualAddress;
}
stream.Seek(currentPos, SeekOrigin.Begin);
}
// Read COFF symbol table if present
@@ -310,10 +375,15 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
name = System.Text.Encoding.ASCII.GetString(nameBytes).TrimEnd('\0');
}
var baseAddress = section > 0 && section < sectionBases.Length
? sectionBases[section]
: 0;
var resolvedAddress = baseAddress + value;
symbols.Add(new BinarySymbol
{
Name = name,
Address = value,
Address = resolvedAddress,
Size = 0, // PE doesn't store function size in symbol table
IsGlobal = storageClass == 2, // IMAGE_SYM_CLASS_EXTERNAL
IsExported = false // Would need to check export directory
@@ -476,6 +546,7 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
{
// Process relocation section
var isRela = shType == 4;
var isPltReloc = sectionName.Contains(".plt", StringComparison.Ordinal);
var entrySize = is64Bit
? (isRela ? 24 : 16)
: (isRela ? 12 : 8);
@@ -511,9 +582,10 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
{
Address = relocOffset,
SymbolIndex = (int)symIndex,
SourceSymbol = "", // Will be resolved later
SourceSymbol = isPltReloc ? "__plt__" : "",
TargetSymbol = "", // Will be resolved later
IsExternal = true
IsExternal = true,
CallKind = isPltReloc ? CallKind.Plt : CallKind.Direct
});
}
}
@@ -593,13 +665,20 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
var magic = reader.ReadUInt16();
var is64Bit = magic == 0x20b; // PE32+
// Skip to data directories
stream.Seek(optionalHeaderStart + (is64Bit ? 112 : 96), SeekOrigin.Begin);
// Read import table RVA and size (directory entry 1)
stream.Seek(8, SeekOrigin.Current); // Skip export table
// Read data directories
var dataDirectoryOffset = optionalHeaderStart + (is64Bit ? 112 : 96);
stream.Seek(dataDirectoryOffset, SeekOrigin.Begin);
var exportTableRva = reader.ReadUInt32();
var exportTableSize = reader.ReadUInt32();
var importTableRva = reader.ReadUInt32();
var importTableSize = reader.ReadUInt32();
stream.Seek(dataDirectoryOffset + 13 * 8, SeekOrigin.Begin); // delay import entry
var delayImportRva = reader.ReadUInt32();
var delayImportSize = reader.ReadUInt32();
_ = exportTableRva;
_ = exportTableSize;
_ = importTableSize;
_ = delayImportSize;
if (importTableRva == 0)
{
@@ -618,6 +697,25 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
// Parse import directory
stream.Seek(importTableOffset, SeekOrigin.Begin);
ReadPeImportTable(stream, reader, sectionHeadersStart, numberOfSections, is64Bit, importTableOffset, relocations);
ReadPeDelayImportTable(stream, reader, sectionHeadersStart, numberOfSections, is64Bit, delayImportRva, relocations);
await Task.CompletedTask;
_logger.LogDebug("Extracted {Count} imports from PE", relocations.Count);
return relocations;
}
private static void ReadPeImportTable(
Stream stream,
BinaryReader reader,
long sectionHeadersStart,
int numberOfSections,
bool is64Bit,
long importTableOffset,
List<BinaryRelocation> relocations)
{
stream.Seek(importTableOffset, SeekOrigin.Begin);
while (true)
{
var importLookupTableRva = reader.ReadUInt32();
@@ -631,66 +729,151 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
break; // End of import directory
}
// Read DLL name
var nameOffset = RvaToFileOffset(stream, reader, sectionHeadersStart, numberOfSections, nameRva);
var currentPos = stream.Position;
stream.Seek(nameOffset, SeekOrigin.Begin);
var dllName = ReadCString(reader);
stream.Seek(currentPos, SeekOrigin.Begin);
var dllName = ReadPeDllName(stream, reader, sectionHeadersStart, numberOfSections, nameRva);
if (string.IsNullOrWhiteSpace(dllName))
{
continue;
}
// Parse import lookup table
var lookupOffset = RvaToFileOffset(stream, reader, sectionHeadersStart, numberOfSections, importLookupTableRva);
if (lookupOffset > 0)
{
var lookupPos = stream.Position;
stream.Seek(lookupOffset, SeekOrigin.Begin);
ParseImportLookupTable(stream, reader, sectionHeadersStart, numberOfSections, is64Bit, lookupOffset, dllName, relocations);
}
}
}
while (true)
private static void ReadPeDelayImportTable(
Stream stream,
BinaryReader reader,
long sectionHeadersStart,
int numberOfSections,
bool is64Bit,
uint delayImportRva,
List<BinaryRelocation> relocations)
{
if (delayImportRva == 0)
{
return;
}
var delayImportOffset = RvaToFileOffset(stream, reader, sectionHeadersStart, numberOfSections, delayImportRva);
if (delayImportOffset == 0)
{
return;
}
stream.Seek(delayImportOffset, SeekOrigin.Begin);
for (var i = 0; i < 256; i++)
{
var attributes = reader.ReadUInt32();
var nameRva = reader.ReadUInt32();
reader.ReadUInt32(); // module handle
reader.ReadUInt32(); // delay import address table
var delayImportNameTableRva = reader.ReadUInt32();
reader.ReadUInt32(); // bound delay import table
reader.ReadUInt32(); // unload delay import table
reader.ReadUInt32(); // timestamp
_ = attributes;
if (nameRva == 0)
{
break;
}
var dllName = ReadPeDllName(stream, reader, sectionHeadersStart, numberOfSections, nameRva);
if (string.IsNullOrWhiteSpace(dllName) || delayImportNameTableRva == 0)
{
continue;
}
var nameTableOffset = RvaToFileOffset(stream, reader, sectionHeadersStart, numberOfSections, delayImportNameTableRva);
if (nameTableOffset == 0)
{
continue;
}
ParseImportLookupTable(stream, reader, sectionHeadersStart, numberOfSections, is64Bit, nameTableOffset, dllName, relocations);
}
}
private static string? ReadPeDllName(
Stream stream,
BinaryReader reader,
long sectionHeadersStart,
int numberOfSections,
uint nameRva)
{
var nameOffset = RvaToFileOffset(stream, reader, sectionHeadersStart, numberOfSections, nameRva);
if (nameOffset == 0)
{
return null;
}
var currentPos = stream.Position;
stream.Seek(nameOffset, SeekOrigin.Begin);
var dllName = ReadCString(reader);
stream.Seek(currentPos, SeekOrigin.Begin);
return dllName;
}
private static void ParseImportLookupTable(
Stream stream,
BinaryReader reader,
long sectionHeadersStart,
int numberOfSections,
bool is64Bit,
long lookupOffset,
string dllName,
List<BinaryRelocation> relocations)
{
var lookupPos = stream.Position;
stream.Seek(lookupOffset, SeekOrigin.Begin);
while (true)
{
var entry = is64Bit ? reader.ReadUInt64() : reader.ReadUInt32();
if (entry == 0)
{
break;
}
var isOrdinal = is64Bit
? (entry & 0x8000000000000000) != 0
: (entry & 0x80000000) != 0;
if (!isOrdinal)
{
var hintNameRva = (uint)(entry & 0x7FFFFFFF);
var hintNameOffset = RvaToFileOffset(stream, reader, sectionHeadersStart, numberOfSections, hintNameRva);
if (hintNameOffset > 0)
{
var entry = is64Bit ? reader.ReadUInt64() : reader.ReadUInt32();
if (entry == 0)
var entryPos = stream.Position;
stream.Seek(hintNameOffset + 2, SeekOrigin.Begin); // Skip hint
var funcName = ReadCString(reader);
stream.Seek(entryPos, SeekOrigin.Begin);
if (!string.IsNullOrWhiteSpace(funcName))
{
break;
}
var isOrdinal = is64Bit
? (entry & 0x8000000000000000) != 0
: (entry & 0x80000000) != 0;
if (!isOrdinal)
{
var hintNameRva = (uint)(entry & 0x7FFFFFFF);
var hintNameOffset = RvaToFileOffset(stream, reader, sectionHeadersStart, numberOfSections, hintNameRva);
if (hintNameOffset > 0)
relocations.Add(new BinaryRelocation
{
var entryPos = stream.Position;
stream.Seek(hintNameOffset + 2, SeekOrigin.Begin); // Skip hint
var funcName = ReadCString(reader);
stream.Seek(entryPos, SeekOrigin.Begin);
relocations.Add(new BinaryRelocation
{
Address = 0,
SymbolIndex = 0,
SourceSymbol = dllName,
TargetSymbol = funcName,
IsExternal = true
});
}
Address = 0,
SymbolIndex = 0,
SourceSymbol = dllName,
TargetSymbol = funcName,
IsExternal = true,
CallKind = CallKind.Iat
});
}
}
stream.Seek(lookupPos, SeekOrigin.Begin);
}
}
await Task.CompletedTask;
_logger.LogDebug("Extracted {Count} imports from PE", relocations.Count);
return relocations;
stream.Seek(lookupPos, SeekOrigin.Begin);
}
private long RvaToFileOffset(
private static long RvaToFileOffset(
Stream stream,
BinaryReader reader,
long sectionHeadersStart,
@@ -797,7 +980,8 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
string scanId,
string binaryPath,
List<BinarySymbol> symbols,
List<BinaryRelocation> relocations)
List<BinaryRelocation> relocations,
IReadOnlyCollection<CallGraphEdge> extraEdges)
{
var nodesById = new Dictionary<string, CallGraphNode>(StringComparer.Ordinal);
var edges = new HashSet<CallGraphEdge>(CallGraphEdgeComparer.Instance);
@@ -826,7 +1010,10 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
// Add edges from relocations
foreach (var reloc in relocations)
{
var sourceId = $"native:{binaryName}/{reloc.SourceSymbol}";
var sourceSymbol = string.IsNullOrWhiteSpace(reloc.SourceSymbol)
? (reloc.CallKind == CallKind.Plt ? "__plt__" : "__reloc__")
: reloc.SourceSymbol;
var sourceId = $"native:{binaryName}/{sourceSymbol}";
var targetId = reloc.IsExternal
? $"native:external/{reloc.TargetSymbol}"
: $"native:{binaryName}/{reloc.TargetSymbol}";
@@ -834,10 +1021,20 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
edges.Add(new CallGraphEdge(
SourceId: sourceId,
TargetId: targetId,
CallKind: CallKind.Direct,
CallKind: reloc.CallKind,
CallSite: $"0x{reloc.Address:X}"));
}
if (extraEdges.Count > 0)
{
foreach (var edge in extraEdges)
{
edges.Add(edge);
}
}
EnsureNodesForEdges(nodesById, edges, binaryPath, binaryName);
var nodes = nodesById.Values
.Select(n => n.Trimmed())
.OrderBy(n => n.NodeId, StringComparer.Ordinal)
@@ -876,6 +1073,70 @@ public sealed class BinaryCallGraphExtractor : ICallGraphExtractor
return provisional with { GraphDigest = digest };
}
private static void EnsureNodesForEdges(
Dictionary<string, CallGraphNode> nodesById,
IEnumerable<CallGraphEdge> edges,
string binaryPath,
string binaryName)
{
foreach (var edge in edges)
{
EnsureNode(nodesById, edge.SourceId, binaryPath, binaryName);
EnsureNode(nodesById, edge.TargetId, binaryPath, binaryName);
}
}
private static void EnsureNode(
Dictionary<string, CallGraphNode> nodesById,
string nodeId,
string binaryPath,
string binaryName)
{
if (nodesById.ContainsKey(nodeId))
{
return;
}
var (package, symbol, isExternal) = ParseNodeId(nodeId, binaryName);
var filePath = isExternal ? string.Empty : binaryPath;
var visibility = isExternal ? Visibility.Public : Visibility.Private;
nodesById[nodeId] = new CallGraphNode(
NodeId: nodeId,
Symbol: symbol,
File: filePath,
Line: 0,
Package: package,
Visibility: visibility,
IsEntrypoint: false,
EntrypointType: null,
IsSink: false,
SinkCategory: null);
}
private static (string Package, string Symbol, bool IsExternal) ParseNodeId(string nodeId, string binaryName)
{
const string Prefix = "native:";
if (!nodeId.StartsWith(Prefix, StringComparison.Ordinal))
{
return (binaryName, nodeId, false);
}
var remainder = nodeId.Substring(Prefix.Length);
var slashIndex = remainder.IndexOf('/');
if (slashIndex < 0)
{
return (binaryName, remainder, false);
}
var package = remainder.Substring(0, slashIndex);
var symbol = remainder.Substring(slashIndex + 1);
var isExternal = string.Equals(package, "external", StringComparison.Ordinal);
return (package, symbol, isExternal);
}
private static string ReadNullTerminatedString(byte[] buffer, int offset)
{
if (offset < 0 || offset >= buffer.Length)
@@ -917,4 +1178,5 @@ internal sealed class BinaryRelocation
public ulong Address { get; init; }
public bool IsExternal { get; init; }
public int SymbolIndex { get; init; }
public CallKind CallKind { get; init; } = CallKind.Direct;
}

View File

@@ -0,0 +1,100 @@
using System.Collections.Immutable;
using Gee.External.Capstone;
using Gee.External.Capstone.Arm64;
using StellaOps.Scanner.CallGraph;
namespace StellaOps.Scanner.CallGraph.Binary.Disassembly;
internal sealed class Arm64Disassembler
{
public ImmutableArray<BinaryCallInstruction> ExtractDirectCalls(
ReadOnlySpan<byte> code,
ulong baseAddress)
{
if (code.IsEmpty)
{
return ImmutableArray<BinaryCallInstruction>.Empty;
}
if (!CapstoneDisassembler.IsArm64Supported)
{
return ImmutableArray<BinaryCallInstruction>.Empty;
}
try
{
using var disassembler = CapstoneDisassembler.CreateArm64Disassembler(
Arm64DisassembleMode.Arm | Arm64DisassembleMode.LittleEndian);
disassembler.EnableInstructionDetails = true;
var instructions = disassembler.Disassemble(code.ToArray(), (long)baseAddress);
if (instructions.Length == 0)
{
return ImmutableArray<BinaryCallInstruction>.Empty;
}
var calls = ImmutableArray.CreateBuilder<BinaryCallInstruction>();
foreach (var instruction in instructions)
{
if (instruction.IsSkippedData)
{
continue;
}
var isCall = instruction.Id is Arm64InstructionId.ARM64_INS_BL or Arm64InstructionId.ARM64_INS_BLR;
if (!isCall)
{
continue;
}
if (!instruction.HasDetails || instruction.Details is null)
{
continue;
}
var target = TryResolveTarget(instruction);
if (target is null)
{
calls.Add(new BinaryCallInstruction(
(ulong)instruction.Address,
0,
CallKind.Dynamic));
continue;
}
calls.Add(new BinaryCallInstruction(
(ulong)instruction.Address,
target.Value,
CallKind.Direct));
}
return calls.ToImmutable();
}
catch (DllNotFoundException)
{
return ImmutableArray<BinaryCallInstruction>.Empty;
}
catch (TypeInitializationException)
{
return ImmutableArray<BinaryCallInstruction>.Empty;
}
catch (BadImageFormatException)
{
return ImmutableArray<BinaryCallInstruction>.Empty;
}
}
private static ulong? TryResolveTarget(Arm64Instruction instruction)
{
foreach (var operand in instruction.Details!.Operands)
{
if (operand.Type == Arm64OperandType.Immediate)
{
return (ulong)operand.Immediate;
}
}
return null;
}
}

View File

@@ -0,0 +1,26 @@
using StellaOps.Scanner.CallGraph;
namespace StellaOps.Scanner.CallGraph.Binary.Disassembly;
internal enum BinaryArchitecture
{
Unknown,
X86,
X64,
Arm64
}
internal sealed record BinaryTextSection(
byte[] Bytes,
ulong VirtualAddress,
int Bitness,
BinaryArchitecture Architecture,
string SectionName)
{
public ulong EndAddress => VirtualAddress + (ulong)Bytes.Length;
}
internal sealed record BinaryCallInstruction(
ulong InstructionAddress,
ulong TargetAddress,
CallKind CallKind);

View File

@@ -0,0 +1,395 @@
using System.Text;
using StellaOps.Scanner.CallGraph.Binary;
namespace StellaOps.Scanner.CallGraph.Binary.Disassembly;
internal static class BinaryTextSectionReader
{
public static async Task<BinaryTextSection?> TryReadAsync(
string path,
BinaryFormat format,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
return format switch
{
BinaryFormat.Elf => await TryReadElfTextSectionAsync(path, ct),
BinaryFormat.Pe => await TryReadPeTextSectionAsync(path, ct),
BinaryFormat.MachO => await TryReadMachOTextSectionAsync(path, ct),
_ => null
};
}
private static async Task<BinaryTextSection?> TryReadElfTextSectionAsync(string path, CancellationToken ct)
{
using var stream = File.OpenRead(path);
using var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true);
var ident = reader.ReadBytes(16);
if (ident.Length < 16)
{
return null;
}
var is64Bit = ident[4] == 2;
var isLittleEndian = ident[5] == 1;
if (!isLittleEndian)
{
return null;
}
var eType = reader.ReadUInt16();
var eMachine = reader.ReadUInt16();
_ = eType;
var architecture = eMachine switch
{
3 => BinaryArchitecture.X86,
62 => BinaryArchitecture.X64,
183 => BinaryArchitecture.Arm64,
_ => BinaryArchitecture.Unknown
};
// e_shoff
stream.Seek(is64Bit ? 40 : 32, SeekOrigin.Begin);
var sectionHeaderOffset = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
// e_shentsize, e_shnum, e_shstrndx
stream.Seek(is64Bit ? 58 : 46, SeekOrigin.Begin);
var sectionHeaderSize = reader.ReadUInt16();
var sectionHeaderCount = reader.ReadUInt16();
var sectionNameIndex = reader.ReadUInt16();
if (sectionHeaderOffset <= 0 || sectionHeaderCount == 0)
{
return null;
}
// Read section name string table
var nameTableOffset = ReadElfSectionOffset(reader, stream, sectionHeaderOffset, sectionHeaderSize, sectionNameIndex, is64Bit);
var nameTableSize = ReadElfSectionSize(reader, stream, sectionHeaderOffset, sectionHeaderSize, sectionNameIndex, is64Bit);
if (nameTableOffset <= 0 || nameTableSize <= 0)
{
return null;
}
stream.Seek(nameTableOffset, SeekOrigin.Begin);
var nameTable = reader.ReadBytes((int)nameTableSize);
for (int i = 0; i < sectionHeaderCount; i++)
{
stream.Seek(sectionHeaderOffset + i * sectionHeaderSize, SeekOrigin.Begin);
var nameIndex = reader.ReadUInt32();
reader.ReadUInt32(); // sh_type
ulong sectionAddress;
long sectionOffset;
long sectionSize;
if (is64Bit)
{
reader.ReadUInt64(); // sh_flags
sectionAddress = reader.ReadUInt64();
sectionOffset = reader.ReadInt64();
sectionSize = reader.ReadInt64();
}
else
{
reader.ReadUInt32(); // sh_flags
sectionAddress = reader.ReadUInt32();
sectionOffset = reader.ReadInt32();
sectionSize = reader.ReadInt32();
}
var name = ReadNullTerminatedString(nameTable, (int)nameIndex);
if (string.Equals(name, ".text", StringComparison.Ordinal))
{
if (sectionOffset <= 0 || sectionSize <= 0)
{
return null;
}
stream.Seek(sectionOffset, SeekOrigin.Begin);
var bytes = reader.ReadBytes((int)sectionSize);
await Task.CompletedTask;
return new BinaryTextSection(
bytes,
sectionAddress,
is64Bit ? 64 : 32,
architecture,
name);
}
}
return null;
}
private static long ReadElfSectionOffset(BinaryReader reader, Stream stream, long sectionHeaderOffset, ushort entrySize, ushort index, bool is64Bit)
{
var position = sectionHeaderOffset + index * entrySize;
return ReadElfSectionOffset(reader, stream, position, is64Bit);
}
private static long ReadElfSectionOffset(BinaryReader reader, Stream stream, long position, bool is64Bit)
{
stream.Seek(position + (is64Bit ? 24 : 16), SeekOrigin.Begin);
return is64Bit ? reader.ReadInt64() : reader.ReadInt32();
}
private static long ReadElfSectionSize(BinaryReader reader, Stream stream, long sectionHeaderOffset, ushort entrySize, ushort index, bool is64Bit)
{
var position = sectionHeaderOffset + index * entrySize;
return ReadElfSectionSize(reader, stream, position, is64Bit);
}
private static long ReadElfSectionSize(BinaryReader reader, Stream stream, long position, bool is64Bit)
{
stream.Seek(position + (is64Bit ? 32 : 20), SeekOrigin.Begin);
return is64Bit ? reader.ReadInt64() : reader.ReadInt32();
}
private static async Task<BinaryTextSection?> TryReadPeTextSectionAsync(string path, CancellationToken ct)
{
using var stream = File.OpenRead(path);
using var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true);
stream.Seek(0x3C, SeekOrigin.Begin);
var peOffset = reader.ReadInt32();
stream.Seek(peOffset, SeekOrigin.Begin);
var signature = reader.ReadUInt32();
if (signature != 0x00004550)
{
return null;
}
var machine = reader.ReadUInt16();
var numberOfSections = reader.ReadUInt16();
reader.ReadUInt32(); // timestamp
reader.ReadUInt32(); // symbol table ptr
reader.ReadUInt32(); // number of symbols
var optionalHeaderSize = reader.ReadUInt16();
reader.ReadUInt16(); // characteristics
var architecture = machine switch
{
0x014c => BinaryArchitecture.X86,
0x8664 => BinaryArchitecture.X64,
0xaa64 => BinaryArchitecture.Arm64,
_ => BinaryArchitecture.Unknown
};
if (optionalHeaderSize == 0)
{
return null;
}
var optionalHeaderStart = stream.Position;
var magic = reader.ReadUInt16();
var is64Bit = magic == 0x20b;
_ = is64Bit;
stream.Seek(optionalHeaderStart + optionalHeaderSize, SeekOrigin.Begin);
for (int i = 0; i < numberOfSections; i++)
{
var nameBytes = reader.ReadBytes(8);
var name = Encoding.ASCII.GetString(nameBytes).TrimEnd('\0');
var virtualSize = reader.ReadUInt32();
var virtualAddress = reader.ReadUInt32();
var sizeOfRawData = reader.ReadUInt32();
var pointerToRawData = reader.ReadUInt32();
reader.ReadUInt32(); // pointer to relocations
reader.ReadUInt32(); // pointer to line numbers
reader.ReadUInt16(); // number of relocations
reader.ReadUInt16(); // number of line numbers
reader.ReadUInt32(); // characteristics
if (!string.Equals(name, ".text", StringComparison.Ordinal))
{
continue;
}
if (pointerToRawData == 0 || sizeOfRawData == 0)
{
return null;
}
stream.Seek(pointerToRawData, SeekOrigin.Begin);
var bytes = reader.ReadBytes((int)sizeOfRawData);
await Task.CompletedTask;
return new BinaryTextSection(
bytes,
virtualAddress,
is64Bit ? 64 : 32,
architecture,
name);
}
return null;
}
private static async Task<BinaryTextSection?> TryReadMachOTextSectionAsync(string path, CancellationToken ct)
{
using var stream = File.OpenRead(path);
using var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true);
var magic = reader.ReadUInt32();
var is64Bit = magic is 0xFEEDFACF or 0xCFFAEDFE;
var isSwapped = magic is 0xCEFAEDFE or 0xCFFAEDFE;
if (isSwapped)
{
return null;
}
var cpuType = reader.ReadInt32();
reader.ReadInt32(); // cpusubtype
reader.ReadUInt32(); // filetype
var ncmds = reader.ReadUInt32();
reader.ReadUInt32(); // sizeofcmds
reader.ReadUInt32(); // flags
if (is64Bit)
{
reader.ReadUInt32(); // reserved
}
var architecture = cpuType switch
{
7 => BinaryArchitecture.X86,
0x01000007 => BinaryArchitecture.X64,
0x0100000C => BinaryArchitecture.Arm64,
_ => BinaryArchitecture.Unknown
};
for (int i = 0; i < ncmds; i++)
{
var cmdStart = stream.Position;
var cmd = reader.ReadUInt32();
var cmdsize = reader.ReadUInt32();
var isSegment = cmd == (is64Bit ? 0x19u : 0x1u);
if (!isSegment)
{
stream.Seek(cmdStart + cmdsize, SeekOrigin.Begin);
continue;
}
var segName = ReadFixedString(reader, 16);
if (is64Bit)
{
reader.ReadUInt64(); // vmaddr
reader.ReadUInt64(); // vmsize
reader.ReadUInt64(); // fileoff
reader.ReadUInt64(); // filesize
reader.ReadInt32(); // maxprot
reader.ReadInt32(); // initprot
var nsects = reader.ReadUInt32();
reader.ReadUInt32(); // flags
for (int s = 0; s < nsects; s++)
{
var sectName = ReadFixedString(reader, 16);
var sectSegName = ReadFixedString(reader, 16);
var addr = reader.ReadUInt64();
var size = reader.ReadUInt64();
var offset = reader.ReadUInt32();
reader.ReadUInt32(); // align
reader.ReadUInt32(); // reloff
reader.ReadUInt32(); // nreloc
reader.ReadUInt32(); // flags
reader.ReadUInt32(); // reserved1
reader.ReadUInt32(); // reserved2
reader.ReadUInt32(); // reserved3
if (!string.Equals(sectName, "__text", StringComparison.Ordinal))
{
continue;
}
stream.Seek(offset, SeekOrigin.Begin);
var bytes = reader.ReadBytes((int)size);
await Task.CompletedTask;
return new BinaryTextSection(
bytes,
addr,
64,
architecture,
sectName);
}
}
else
{
reader.ReadUInt32(); // vmaddr
reader.ReadUInt32(); // vmsize
reader.ReadUInt32(); // fileoff
reader.ReadUInt32(); // filesize
reader.ReadInt32(); // maxprot
reader.ReadInt32(); // initprot
var nsects = reader.ReadUInt32();
reader.ReadUInt32(); // flags
for (int s = 0; s < nsects; s++)
{
var sectName = ReadFixedString(reader, 16);
var sectSegName = ReadFixedString(reader, 16);
var addr = reader.ReadUInt32();
var size = reader.ReadUInt32();
var offset = reader.ReadUInt32();
reader.ReadUInt32(); // align
reader.ReadUInt32(); // reloff
reader.ReadUInt32(); // nreloc
reader.ReadUInt32(); // flags
reader.ReadUInt32(); // reserved1
reader.ReadUInt32(); // reserved2
if (!string.Equals(sectName, "__text", StringComparison.Ordinal))
{
continue;
}
stream.Seek(offset, SeekOrigin.Begin);
var bytes = reader.ReadBytes((int)size);
await Task.CompletedTask;
return new BinaryTextSection(
bytes,
addr,
32,
architecture,
sectName);
}
}
stream.Seek(cmdStart + cmdsize, SeekOrigin.Begin);
}
return null;
}
private static string ReadFixedString(BinaryReader reader, int length)
{
var bytes = reader.ReadBytes(length);
var nullIndex = Array.IndexOf(bytes, (byte)0);
var count = nullIndex >= 0 ? nullIndex : bytes.Length;
return Encoding.ASCII.GetString(bytes, 0, count);
}
private static string ReadNullTerminatedString(byte[] buffer, int offset)
{
if (offset < 0 || offset >= buffer.Length)
{
return string.Empty;
}
var end = offset;
while (end < buffer.Length && buffer[end] != 0)
{
end++;
}
return Encoding.UTF8.GetString(buffer, offset, end - offset);
}
}

View File

@@ -0,0 +1,146 @@
using System.Collections.Immutable;
using StellaOps.Scanner.CallGraph;
using StellaOps.Scanner.CallGraph.Binary;
namespace StellaOps.Scanner.CallGraph.Binary.Disassembly;
internal sealed class DirectCallExtractor
{
private readonly X86Disassembler _x86Disassembler;
private readonly Arm64Disassembler _arm64Disassembler;
public DirectCallExtractor(
X86Disassembler? x86Disassembler = null,
Arm64Disassembler? arm64Disassembler = null)
{
_x86Disassembler = x86Disassembler ?? new X86Disassembler();
_arm64Disassembler = arm64Disassembler ?? new Arm64Disassembler();
}
public ImmutableArray<CallGraphEdge> Extract(
BinaryTextSection textSection,
IReadOnlyList<BinarySymbol> symbols,
string binaryName)
{
ArgumentNullException.ThrowIfNull(textSection);
ArgumentNullException.ThrowIfNull(symbols);
if (textSection.Bytes.Length == 0)
{
return ImmutableArray<CallGraphEdge>.Empty;
}
var orderedSymbols = symbols
.Where(symbol => symbol is not null)
.OrderBy(symbol => symbol.Address)
.ThenBy(symbol => symbol.Name, StringComparer.Ordinal)
.ToArray();
var calls = textSection.Architecture switch
{
BinaryArchitecture.X86 => _x86Disassembler.ExtractDirectCalls(
textSection.Bytes,
textSection.VirtualAddress,
32),
BinaryArchitecture.X64 => _x86Disassembler.ExtractDirectCalls(
textSection.Bytes,
textSection.VirtualAddress,
64),
BinaryArchitecture.Arm64 => _arm64Disassembler.ExtractDirectCalls(
textSection.Bytes,
textSection.VirtualAddress),
_ => ImmutableArray<BinaryCallInstruction>.Empty
};
if (calls.IsDefaultOrEmpty)
{
return ImmutableArray<CallGraphEdge>.Empty;
}
var edges = ImmutableArray.CreateBuilder<CallGraphEdge>(calls.Length);
foreach (var call in calls)
{
var sourceSymbol = ResolveSymbol(orderedSymbols, call.InstructionAddress);
var targetSymbol = ResolveSymbol(orderedSymbols, call.TargetAddress);
var targetIsInternal = call.TargetAddress >= textSection.VirtualAddress
&& call.TargetAddress < textSection.EndAddress;
var sourceId = BuildNodeId(binaryName, sourceSymbol, call.InstructionAddress, isExternal: false);
var targetId = BuildNodeId(
targetIsInternal ? binaryName : "external",
targetSymbol,
call.TargetAddress,
isExternal: !targetIsInternal);
edges.Add(new CallGraphEdge(
SourceId: sourceId,
TargetId: targetId,
CallKind: call.CallKind,
CallSite: $"0x{call.InstructionAddress:X}"));
}
return edges
.OrderBy(edge => edge.SourceId, StringComparer.Ordinal)
.ThenBy(edge => edge.TargetId, StringComparer.Ordinal)
.ThenBy(edge => edge.CallKind.ToString(), StringComparer.Ordinal)
.ThenBy(edge => edge.CallSite ?? string.Empty, StringComparer.Ordinal)
.ToImmutableArray();
}
private static string? ResolveSymbol(IReadOnlyList<BinarySymbol> symbols, ulong address)
{
string? bestSymbol = null;
ulong bestAddress = 0;
foreach (var symbol in symbols)
{
if (symbol.Address > address)
{
break;
}
if (symbol.Address == address)
{
return symbol.Name;
}
if (symbol.Address <= address)
{
bestSymbol = symbol.Name;
bestAddress = symbol.Address;
}
}
if (bestSymbol is null)
{
return null;
}
var candidate = symbols.FirstOrDefault(s => s.Address == bestAddress);
if (candidate is not null && candidate.Size > 0)
{
var end = candidate.Address + candidate.Size;
if (address >= end)
{
return null;
}
}
return bestSymbol;
}
private static string BuildNodeId(
string binaryName,
string? symbol,
ulong address,
bool isExternal)
{
var safeSymbol = string.IsNullOrWhiteSpace(symbol) ? $"addr_{address:X}" : symbol!;
if (isExternal)
{
return $"native:external/{safeSymbol}";
}
return $"native:{binaryName}/{safeSymbol}";
}
}

View File

@@ -0,0 +1,53 @@
using System.Collections.Immutable;
using Iced.Intel;
using StellaOps.Scanner.CallGraph;
namespace StellaOps.Scanner.CallGraph.Binary.Disassembly;
internal sealed class X86Disassembler
{
public ImmutableArray<BinaryCallInstruction> ExtractDirectCalls(
ReadOnlySpan<byte> code,
ulong baseAddress,
int bitness)
{
if (bitness is not (16 or 32 or 64))
{
throw new ArgumentOutOfRangeException(nameof(bitness), "Bitness must be 16, 32, or 64.");
}
if (code.IsEmpty)
{
return ImmutableArray<BinaryCallInstruction>.Empty;
}
var reader = new ByteArrayCodeReader(code.ToArray());
var decoder = Decoder.Create(bitness, reader);
decoder.IP = baseAddress;
var calls = ImmutableArray.CreateBuilder<BinaryCallInstruction>();
while (reader.CanReadByte)
{
decoder.Decode(out var instruction);
if (instruction.IsInvalid)
{
break;
}
if (instruction.IsCallNear || instruction.IsJmpNear)
{
if (instruction.Op0Kind is OpKind.NearBranch16 or OpKind.NearBranch32 or OpKind.NearBranch64)
{
var target = instruction.NearBranchTarget;
calls.Add(new BinaryCallInstruction(
instruction.IP,
target,
CallKind.Direct));
}
}
}
return calls.ToImmutable();
}
}

View File

@@ -123,7 +123,9 @@ public enum CallKind
Virtual,
Delegate,
Reflection,
Dynamic
Dynamic,
Plt,
Iat
}
[JsonConverter(typeof(JsonStringEnumConverter<EntrypointType>))]

View File

@@ -12,6 +12,8 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="Gee.External.Capstone" Version="2.3.0" />
<PackageReference Include="Iced" Version="1.21.0" />
<PackageReference Include="Microsoft.Build.Locator" Version="1.10.0" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="4.14.0" />
<PackageReference Include="Microsoft.CodeAnalysis.Workspaces.MSBuild" Version="4.14.0" />