using System.Buffers.Binary; using System.Security.Cryptography; using System.Text; using System.Threading; using System.Threading.Tasks; using StellaOps.Scanner.Analyzers.Native; using StellaOps.Scanner.Reachability; using StellaOps.Scanner.Reachability.Lifters; using Xunit; namespace StellaOps.Scanner.Reachability.Tests; public class BinaryReachabilityLifterTests { [Trait("Category", TestCategories.Unit)] [Fact] public async Task EmitsSymbolAndCodeIdForBinary() { using var temp = new TempDir(); var binaryPath = System.IO.Path.Combine(temp.Path, "sample.so"); var bytes = CreateMinimalElf(); await System.IO.File.WriteAllBytesAsync(binaryPath, bytes); var context = new ReachabilityLifterContext { RootPath = temp.Path, AnalysisId = "analysis-42" }; var builder = new ReachabilityGraphBuilder(); var lifter = new BinaryReachabilityLifter(); await lifter.LiftAsync(context, builder, CancellationToken.None); var graph = builder.ToUnionGraph(SymbolId.Lang.Binary); var node = Assert.Single(graph.Nodes); Assert.Equal(SymbolId.Lang.Binary, node.Lang); var shaHex = Convert.ToHexString(SHA256.HashData(bytes)).ToLowerInvariant(); var expectedSymbolId = SymbolId.ForBinaryAddressed($"sha256:{shaHex}", ".text", "0x0", "sample.so", "static"); Assert.Equal(expectedSymbolId, node.SymbolId); Assert.NotNull(node.Attributes); var expectedCodeId = CodeId.ForBinarySegment("elf", $"sha256:{shaHex}", "0x0", bytes.LongLength, ".text"); Assert.Equal(expectedCodeId, node.Attributes!["code_id"]); var rich = RichGraphBuilder.FromUnion(graph, "test-analyzer", "1.0.0"); var richNode = Assert.Single(rich.Nodes); Assert.Equal(expectedCodeId, richNode.CodeId); } [Trait("Category", TestCategories.Unit)] [Fact] public async Task EmitsEntryPointForElfWithNonZeroEntryAddress() { using var temp = new TempDir(); var binaryPath = System.IO.Path.Combine(temp.Path, "sample.so"); var bytes = CreateElfWithEntryPoint(0x401000); await System.IO.File.WriteAllBytesAsync(binaryPath, bytes); var context = new ReachabilityLifterContext { RootPath = temp.Path, AnalysisId = "analysis-entry" }; var builder = new ReachabilityGraphBuilder(); var lifter = new BinaryReachabilityLifter(); await lifter.LiftAsync(context, builder, CancellationToken.None); var graph = builder.ToUnionGraph(SymbolId.Lang.Binary); // Should have binary node + entry point node Assert.Equal(2, graph.Nodes.Count); var entryNode = graph.Nodes.FirstOrDefault(n => n.Kind == "entry_point" && n.Attributes?.ContainsKey("is_synthetic_root") == true); Assert.NotNull(entryNode); Assert.Equal("_start", entryNode!.Display); // Should have edge from entry point to binary var entryEdge = graph.Edges.FirstOrDefault(e => e.EdgeType == EdgeTypes.Call && e.To == graph.Nodes.First(n => n.Kind == "binary").SymbolId); Assert.NotNull(entryEdge); } [Trait("Category", TestCategories.Unit)] [Fact] public async Task EmitsPurlForLibrary() { using var temp = new TempDir(); var binaryPath = System.IO.Path.Combine(temp.Path, "libssl.so.3"); var bytes = CreateMinimalElf(); await System.IO.File.WriteAllBytesAsync(binaryPath, bytes); var context = new ReachabilityLifterContext { RootPath = temp.Path, AnalysisId = "analysis-purl" }; var builder = new ReachabilityGraphBuilder(); var lifter = new BinaryReachabilityLifter(); await lifter.LiftAsync(context, builder, CancellationToken.None); var graph = builder.ToUnionGraph(SymbolId.Lang.Binary); var node = Assert.Single(graph.Nodes); Assert.NotNull(node.Attributes); Assert.True(node.Attributes!.ContainsKey("purl")); Assert.Equal("pkg:generic/libssl@3", node.Attributes["purl"]); } [Trait("Category", TestCategories.Unit)] [Fact] public async Task DoesNotEmitEntryPointForElfWithZeroEntry() { using var temp = new TempDir(); var binaryPath = System.IO.Path.Combine(temp.Path, "noop.so"); var bytes = CreateMinimalElf(); // Entry is 0x0 await System.IO.File.WriteAllBytesAsync(binaryPath, bytes); var context = new ReachabilityLifterContext { RootPath = temp.Path, AnalysisId = "analysis-noentry" }; var builder = new ReachabilityGraphBuilder(); var lifter = new BinaryReachabilityLifter(); await lifter.LiftAsync(context, builder, CancellationToken.None); var graph = builder.ToUnionGraph(SymbolId.Lang.Binary); // Should only have the binary node, no entry point Assert.Single(graph.Nodes); Assert.DoesNotContain(graph.Nodes, n => n.Kind == "entry_point"); } [Trait("Category", TestCategories.Unit)] [Fact] public async Task EmitsUnknownsForElfUndefinedDynsymSymbols() { using var temp = new TempDir(); var binaryPath = System.IO.Path.Combine(temp.Path, "sample.so"); var bytes = CreateElfWithDynsymUndefinedSymbol("puts"); await System.IO.File.WriteAllBytesAsync(binaryPath, bytes); var context = new ReachabilityLifterContext { RootPath = temp.Path, AnalysisId = "analysis-unknowns" }; var builder = new ReachabilityGraphBuilder(); var lifter = new BinaryReachabilityLifter(); await lifter.LiftAsync(context, builder, CancellationToken.None); var graph = builder.ToUnionGraph(SymbolId.Lang.Binary); var binaryNode = Assert.Single(graph.Nodes, n => n.Kind == "binary"); var unknownNode = Assert.Single(graph.Nodes, n => n.Kind == "unknown" && n.Display == "?puts"); Assert.NotNull(unknownNode.Attributes); Assert.Equal("true", unknownNode.Attributes!["is_unknown"]); Assert.Equal("elf-dynsym-undef", unknownNode.Attributes["reason"]); Assert.Contains(graph.Edges, e => e.EdgeType == EdgeTypes.Call && e.From == binaryNode.SymbolId && e.To == unknownNode.SymbolId); } [Trait("Category", TestCategories.Unit)] [Fact] public async Task RichGraphIncludesPurlAndSymbolDigestForElfDependencies() { using var temp = new TempDir(); var binaryPath = System.IO.Path.Combine(temp.Path, "sample.elf"); var bytes = CreateElf64WithDependencies(["libc.so.6"]); await System.IO.File.WriteAllBytesAsync(binaryPath, bytes); var context = new ReachabilityLifterContext { RootPath = temp.Path, AnalysisId = "analysis-elf-deps" }; var builder = new ReachabilityGraphBuilder(); var lifter = new BinaryReachabilityLifter(); await lifter.LiftAsync(context, builder, CancellationToken.None); var union = builder.ToUnionGraph(SymbolId.Lang.Binary); var rich = RichGraphBuilder.FromUnion(union, "test-analyzer", "1.0.0"); var edge = Assert.Single(rich.Edges); Assert.Equal(EdgeTypes.Import, edge.Kind); Assert.Equal("pkg:generic/libc@6", edge.Purl); Assert.NotNull(edge.SymbolDigest); Assert.StartsWith("sha256:", edge.SymbolDigest, StringComparison.Ordinal); } [Trait("Category", TestCategories.Unit)] [Fact] public async Task RichGraphIncludesPurlAndSymbolDigestForPeImports() { using var temp = new TempDir(); var binaryPath = System.IO.Path.Combine(temp.Path, "sample.exe"); var bytes = CreatePe64WithImports(["KERNEL32.dll"]); await System.IO.File.WriteAllBytesAsync(binaryPath, bytes); var context = new ReachabilityLifterContext { RootPath = temp.Path, AnalysisId = "analysis-pe-imports" }; var builder = new ReachabilityGraphBuilder(); var lifter = new BinaryReachabilityLifter(); await lifter.LiftAsync(context, builder, CancellationToken.None); var union = builder.ToUnionGraph(SymbolId.Lang.Binary); var rich = RichGraphBuilder.FromUnion(union, "test-analyzer", "1.0.0"); var edge = Assert.Single(rich.Edges); Assert.Equal(EdgeTypes.Import, edge.Kind); Assert.Equal("pkg:generic/KERNEL32", edge.Purl); Assert.NotNull(edge.SymbolDigest); Assert.StartsWith("sha256:", edge.SymbolDigest, StringComparison.Ordinal); } private static byte[] CreateMinimalElf() { var data = new byte[64]; data[0] = 0x7F; data[1] = (byte)'E'; data[2] = (byte)'L'; data[3] = (byte)'F'; data[4] = 2; // 64-bit data[5] = 1; // little endian data[7] = 0; // System V ABI data[18] = 0x3E; // EM_X86_64 data[19] = 0x00; // Entry point at offset 24 is 0 (default) return data; } private static byte[] CreateElfWithEntryPoint(ulong entryAddr) { var data = new byte[64]; data[0] = 0x7F; data[1] = (byte)'E'; data[2] = (byte)'L'; data[3] = (byte)'F'; data[4] = 2; // 64-bit data[5] = 1; // little endian data[7] = 0; // System V ABI data[18] = 0x3E; // EM_X86_64 data[19] = 0x00; // Set e_entry at offset 24 (little endian 64-bit) BitConverter.TryWriteBytes(data.AsSpan(24, 8), entryAddr); return data; } private static byte[] CreateElfWithDynsymUndefinedSymbol(string symbolName) { var shstr = Encoding.ASCII.GetBytes("\0.shstrtab\0.dynstr\0.dynsym\0"); var dynstr = Encoding.ASCII.GetBytes("\0" + symbolName + "\0"); const int elfHeaderSize = 64; const int shEntrySize = 64; const int dynsymEntrySize = 24; const int dynsymEntries = 2; var offset = elfHeaderSize; var shstrOffset = offset; offset = Align(offset + shstr.Length, 8); var dynstrOffset = offset; offset = Align(offset + dynstr.Length, 8); var dynsymOffset = offset; var dynsymSize = dynsymEntrySize * dynsymEntries; offset = Align(offset + dynsymSize, 8); var shoff = offset; const int shnum = 4; var totalSize = shoff + shnum * shEntrySize; var buffer = new byte[totalSize]; // ELF header (64-bit LE) with section headers. buffer[0] = 0x7F; buffer[1] = (byte)'E'; buffer[2] = (byte)'L'; buffer[3] = (byte)'F'; buffer[4] = 2; // 64-bit buffer[5] = 1; // little endian buffer[6] = 1; // version buffer[7] = 0; // System V ABI WriteU16LE(buffer, 16, 3); // e_type = ET_DYN WriteU16LE(buffer, 18, 0x3E); // e_machine = EM_X86_64 WriteU32LE(buffer, 20, 1); // e_version WriteU64LE(buffer, 24, 0); // e_entry WriteU64LE(buffer, 32, 0); // e_phoff WriteU64LE(buffer, 40, (ulong)shoff); // e_shoff WriteU32LE(buffer, 48, 0); // e_flags WriteU16LE(buffer, 52, elfHeaderSize); // e_ehsize WriteU16LE(buffer, 54, 0); // e_phentsize WriteU16LE(buffer, 56, 0); // e_phnum WriteU16LE(buffer, 58, shEntrySize); // e_shentsize WriteU16LE(buffer, 60, shnum); // e_shnum WriteU16LE(buffer, 62, 1); // e_shstrndx shstr.CopyTo(buffer, shstrOffset); dynstr.CopyTo(buffer, dynstrOffset); // .dynsym with one undefined global function symbol. var sym1 = dynsymOffset + dynsymEntrySize; WriteU32LE(buffer, sym1 + 0, 1u); // st_name (offset into dynstr) buffer[sym1 + 4] = 0x12; // st_info = STB_GLOBAL(1) | STT_FUNC(2) buffer[sym1 + 5] = 0x00; // st_other WriteU16LE(buffer, sym1 + 6, 0); // st_shndx = SHN_UNDEF // Section headers. // Section 1: .shstrtab var sh1 = shoff + shEntrySize; WriteU32LE(buffer, sh1 + 0, 1u); // sh_name WriteU32LE(buffer, sh1 + 4, 3u); // sh_type = SHT_STRTAB WriteU64LE(buffer, sh1 + 24, (ulong)shstrOffset); // sh_offset WriteU64LE(buffer, sh1 + 32, (ulong)shstr.Length); // sh_size WriteU64LE(buffer, sh1 + 48, 1u); // sh_addralign // Section 2: .dynstr var sh2 = shoff + shEntrySize * 2; WriteU32LE(buffer, sh2 + 0, 11u); // sh_name WriteU32LE(buffer, sh2 + 4, 3u); // sh_type = SHT_STRTAB WriteU64LE(buffer, sh2 + 24, (ulong)dynstrOffset); // sh_offset WriteU64LE(buffer, sh2 + 32, (ulong)dynstr.Length); // sh_size WriteU64LE(buffer, sh2 + 48, 1u); // sh_addralign // Section 3: .dynsym var sh3 = shoff + shEntrySize * 3; WriteU32LE(buffer, sh3 + 0, 19u); // sh_name WriteU32LE(buffer, sh3 + 4, 11u); // sh_type = SHT_DYNSYM WriteU64LE(buffer, sh3 + 24, (ulong)dynsymOffset); // sh_offset WriteU64LE(buffer, sh3 + 32, (ulong)dynsymSize); // sh_size WriteU32LE(buffer, sh3 + 40, 2u); // sh_link = dynstr WriteU32LE(buffer, sh3 + 44, 1u); // sh_info (one local symbol) WriteU64LE(buffer, sh3 + 48, 8u); // sh_addralign WriteU64LE(buffer, sh3 + 56, dynsymEntrySize); // sh_entsize return buffer; } private static int Align(int value, int alignment) => (value + (alignment - 1)) / alignment * alignment; private static void WriteU16LE(byte[] buffer, int offset, int value) => BinaryPrimitives.WriteUInt16LittleEndian(buffer.AsSpan(offset, 2), (ushort)value); private static void WriteU16LE(byte[] buffer, int offset, ushort value) => BinaryPrimitives.WriteUInt16LittleEndian(buffer.AsSpan(offset, 2), value); private static void WriteU32LE(byte[] buffer, int offset, uint value) => BinaryPrimitives.WriteUInt32LittleEndian(buffer.AsSpan(offset, 4), value); private static void WriteU64LE(byte[] buffer, int offset, ulong value) => BinaryPrimitives.WriteUInt64LittleEndian(buffer.AsSpan(offset, 8), value); private static byte[] CreateElf64WithDependencies(IReadOnlyList dependencies) { dependencies ??= []; const string interpreter = "/lib64/ld-linux-x86-64.so.2"; using var ms = new MemoryStream(); using var writer = new BinaryWriter(ms); using StellaOps.TestKit; var stringTable = new StringBuilder(); stringTable.Append('\0'); var stringOffsets = new Dictionary(StringComparer.Ordinal); void AddString(string s) { if (stringOffsets.ContainsKey(s)) { return; } stringOffsets[s] = stringTable.Length; stringTable.Append(s); stringTable.Append('\0'); } AddString(interpreter); foreach (var dep in dependencies) { AddString(dep); } var stringTableBytes = Encoding.UTF8.GetBytes(stringTable.ToString()); const int elfHeaderSize = 64; const int phdrSize = 56; const int phdrCount = 3; // PT_INTERP, PT_LOAD, PT_DYNAMIC var phdrOffset = elfHeaderSize; var interpOffset = phdrOffset + (phdrSize * phdrCount); var interpSize = Encoding.UTF8.GetByteCount(interpreter) + 1; var dynamicOffset = interpOffset + interpSize; var dynEntries = new List<(ulong Tag, ulong Value)>(); foreach (var dep in dependencies) { dynEntries.Add((1, (ulong)stringOffsets[dep])); // DT_NEEDED } dynEntries.Add((5, 0)); // DT_STRTAB (patched later) dynEntries.Add((10, (ulong)stringTableBytes.Length)); // DT_STRSZ dynEntries.Add((0, 0)); // DT_NULL var dynamicSize = dynEntries.Count * 16; var stringTableOffset = dynamicOffset + dynamicSize; var totalSize = stringTableOffset + stringTableBytes.Length; for (var i = 0; i < dynEntries.Count; i++) { if (dynEntries[i].Tag == 5) { dynEntries[i] = (5, (ulong)stringTableOffset); break; } } writer.Write(new byte[] { 0x7f, 0x45, 0x4c, 0x46 }); // Magic writer.Write((byte)2); // 64-bit writer.Write((byte)1); // Little endian writer.Write((byte)1); // ELF version writer.Write((byte)0); // OS ABI writer.Write(new byte[8]); // Padding writer.Write((ushort)2); // ET_EXEC writer.Write((ushort)0x3e); // x86_64 writer.Write(1u); // Version writer.Write(0ul); // Entry point writer.Write((ulong)phdrOffset); // Program header offset writer.Write(0ul); // Section header offset writer.Write(0u); // Flags writer.Write((ushort)elfHeaderSize); // ELF header size writer.Write((ushort)phdrSize); // Program header entry size writer.Write((ushort)phdrCount); // Number of program headers writer.Write((ushort)0); // Section header entry size writer.Write((ushort)0); // Number of section headers writer.Write((ushort)0); // Section name string table index // PT_INTERP writer.Write(3u); writer.Write(4u); writer.Write((ulong)interpOffset); writer.Write((ulong)interpOffset); writer.Write((ulong)interpOffset); writer.Write((ulong)interpSize); writer.Write((ulong)interpSize); writer.Write(1ul); // PT_LOAD writer.Write(1u); writer.Write(5u); writer.Write(0ul); writer.Write(0ul); writer.Write(0ul); writer.Write((ulong)totalSize); writer.Write((ulong)totalSize); writer.Write(0x1000ul); // PT_DYNAMIC writer.Write(2u); writer.Write(6u); writer.Write((ulong)dynamicOffset); writer.Write((ulong)dynamicOffset); writer.Write((ulong)dynamicOffset); writer.Write((ulong)dynamicSize); writer.Write((ulong)dynamicSize); writer.Write(8ul); writer.Write(Encoding.UTF8.GetBytes(interpreter)); writer.Write((byte)0); foreach (var (tag, value) in dynEntries) { writer.Write(tag); writer.Write(value); } writer.Write(stringTableBytes); return ms.ToArray(); } private static byte[] CreatePe64WithImports(IReadOnlyList imports) { imports ??= []; if (imports.Count == 0) { throw new ArgumentException("Must provide at least one import.", nameof(imports)); } const int peHeaderOffset = 0x80; const int optionalHeaderSize = 240; const uint sectionVirtualAddress = 0x1000; const uint sectionVirtualSize = 0x200; const uint sectionRawSize = 0x200; const uint sectionRawOffset = 0x200; const uint importDirRva = sectionVirtualAddress; const uint importDirSize = 40; // 2 descriptors const uint nameRva = sectionVirtualAddress + 0x100; var dllNameBytes = Encoding.ASCII.GetBytes(imports[0] + "\0"); var totalSize = (int)(sectionRawOffset + sectionRawSize); if (sectionRawOffset + 0x100 + dllNameBytes.Length > sectionRawOffset + sectionRawSize) { totalSize = (int)(sectionRawOffset + 0x100 + dllNameBytes.Length); } var buffer = new byte[totalSize]; buffer[0] = (byte)'M'; buffer[1] = (byte)'Z'; BinaryPrimitives.WriteInt32LittleEndian(buffer.AsSpan(0x3C, 4), peHeaderOffset); WriteU32LE(buffer, peHeaderOffset, 0x00004550); // PE\0\0 var coff = peHeaderOffset + 4; WriteU16LE(buffer, coff + 0, 0x8664); // Machine WriteU16LE(buffer, coff + 2, 1); // NumberOfSections WriteU32LE(buffer, coff + 12, 0); // NumberOfSymbols WriteU16LE(buffer, coff + 16, (ushort)optionalHeaderSize); // SizeOfOptionalHeader WriteU16LE(buffer, coff + 18, 0x22); // Characteristics var opt = peHeaderOffset + 24; WriteU16LE(buffer, opt + 0, 0x20b); // PE32+ WriteU16LE(buffer, opt + 68, (ushort)PeSubsystem.WindowsConsole); // Subsystem WriteU32LE(buffer, opt + 108, 16); // NumberOfRvaAndSizes var dataDir = opt + 112; // Import directory entry (#1) WriteU32LE(buffer, dataDir + 8, importDirRva); WriteU32LE(buffer, dataDir + 12, importDirSize); var sectionHeader = opt + optionalHeaderSize; var sectionName = Encoding.ASCII.GetBytes(".rdata\0\0"); sectionName.CopyTo(buffer, sectionHeader); WriteU32LE(buffer, sectionHeader + 8, sectionVirtualSize); WriteU32LE(buffer, sectionHeader + 12, sectionVirtualAddress); WriteU32LE(buffer, sectionHeader + 16, sectionRawSize); WriteU32LE(buffer, sectionHeader + 20, sectionRawOffset); // Import descriptor #1 at RVA 0x1000 -> file offset 0x200. var importOffset = (int)sectionRawOffset; WriteU32LE(buffer, importOffset + 0, 0); // OriginalFirstThunk (skip function parsing) WriteU32LE(buffer, importOffset + 12, nameRva); // Name RVA // Import descriptor #2 is the terminator (zeros), already zero-initialized. // DLL name string var nameOffset = (int)(sectionRawOffset + (nameRva - sectionVirtualAddress)); dllNameBytes.CopyTo(buffer, nameOffset); return buffer; } }