up the blokcing tasks
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Manifest Integrity / Validate Schema Integrity (push) Has been cancelled
Manifest Integrity / Validate Contract Documents (push) Has been cancelled
Manifest Integrity / Validate Pack Fixtures (push) Has been cancelled
Manifest Integrity / Audit SHA256SUMS Files (push) Has been cancelled
Manifest Integrity / Verify Merkle Roots (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Risk Bundle CI / risk-bundle-build (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Risk Bundle CI / risk-bundle-offline-kit (push) Has been cancelled
Risk Bundle CI / publish-checksums (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-11 02:32:18 +02:00
parent 92bc4d3a07
commit 49922dff5a
474 changed files with 76071 additions and 12411 deletions

View File

@@ -0,0 +1,5 @@
global using System.Collections.Immutable;
global using System.Security.Cryptography;
global using System.Text;
global using System.Text.Json;
global using System.Text.Json.Serialization;

View File

@@ -0,0 +1,337 @@
using StellaOps.Scanner.Analyzers.Native.Internal.Elf;
using StellaOps.Scanner.Analyzers.Native.Internal.Graph;
namespace StellaOps.Scanner.Analyzers.Native.Internal.Callgraph;
/// <summary>
/// Builds native reachability graphs from ELF files.
/// Extracts functions, call edges, synthetic roots, and emits unknowns.
/// </summary>
internal sealed class NativeCallgraphBuilder
{
private readonly Dictionary<string, NativeFunctionNode> _functions = new();
private readonly List<NativeCallEdge> _edges = new();
private readonly List<NativeSyntheticRoot> _roots = new();
private readonly List<NativeUnknown> _unknowns = new();
private readonly Dictionary<ulong, string> _addressToSymbolId = new();
private readonly string _layerDigest;
private int _binaryCount;
public NativeCallgraphBuilder(string layerDigest)
{
_layerDigest = layerDigest;
}
/// <summary>
/// Adds an ELF file to the graph.
/// </summary>
public void AddElfFile(ElfFile elf)
{
_binaryCount++;
// Add function symbols
foreach (var sym in elf.Symbols.Concat(elf.DynamicSymbols))
{
if (sym.Type != ElfSymbolType.Func || string.IsNullOrEmpty(sym.Name))
{
continue;
}
AddFunction(sym, elf);
}
// Add synthetic roots for _start, _init, main
AddSyntheticRoots(elf);
// Add edges from relocations
AddRelocationEdges(elf);
// Add edges from init arrays
AddInitArrayEdges(elf);
}
/// <summary>
/// Builds the final reachability graph.
/// </summary>
public NativeReachabilityGraph Build()
{
var functions = _functions.Values
.OrderBy(f => f.BinaryPath)
.ThenBy(f => f.Address)
.ToImmutableArray();
var edges = _edges
.OrderBy(e => e.CallerId)
.ThenBy(e => e.CallSiteOffset)
.ToImmutableArray();
var roots = _roots
.OrderBy(r => r.BinaryPath)
.ThenBy(r => r.Phase)
.ThenBy(r => r.Order)
.ToImmutableArray();
var unknowns = _unknowns
.OrderBy(u => u.BinaryPath)
.ThenBy(u => u.SourceId)
.ToImmutableArray();
var contentHash = NativeGraphIdentifiers.ComputeGraphHash(functions, edges, roots);
var metadata = new NativeGraphMetadata(
GeneratedAt: DateTimeOffset.UtcNow,
GeneratorVersion: NativeGraphIdentifiers.GetGeneratorVersion(),
LayerDigest: _layerDigest,
BinaryCount: _binaryCount,
FunctionCount: functions.Length,
EdgeCount: edges.Length,
UnknownCount: unknowns.Length,
SyntheticRootCount: roots.Length);
return new NativeReachabilityGraph(
_layerDigest,
functions,
edges,
roots,
unknowns,
metadata,
contentHash);
}
private void AddFunction(ElfSymbol sym, ElfFile elf)
{
var binding = sym.Binding.ToString().ToLowerInvariant();
var visibility = sym.Visibility.ToString().ToLowerInvariant();
var symbolId = NativeGraphIdentifiers.ComputeSymbolId(sym.Name, sym.Value, sym.Size, binding);
var symbolDigest = NativeGraphIdentifiers.ComputeSymbolDigest(sym.Name, sym.Value, sym.Size, binding);
// Generate PURL based on binary path (simplified - would use proper package mapping in production)
var purl = GeneratePurl(elf.Path, sym.Name);
var isExported = sym.Binding == ElfSymbolBinding.Global && sym.Visibility == ElfSymbolVisibility.Default;
var func = new NativeFunctionNode(
SymbolId: symbolId,
Name: sym.Name,
Purl: purl,
BinaryPath: elf.Path,
BuildId: elf.BuildId,
Address: sym.Value,
Size: sym.Size,
SymbolDigest: symbolDigest,
Binding: binding,
Visibility: visibility,
IsExported: isExported);
_functions.TryAdd(symbolId, func);
_addressToSymbolId.TryAdd(sym.Value, symbolId);
}
private void AddSyntheticRoots(ElfFile elf)
{
// Find and add _start
AddRootIfExists(elf, "_start", NativeRootType.Start, "load", 0);
// Find and add _init
AddRootIfExists(elf, "_init", NativeRootType.Init, "init", 0);
// Find and add _fini
AddRootIfExists(elf, "_fini", NativeRootType.Fini, "fini", 0);
// Find and add main
AddRootIfExists(elf, "main", NativeRootType.Main, "main", 0);
// Add preinit_array entries
for (var i = 0; i < elf.PreInitArraySymbols.Length; i++)
{
var symName = elf.PreInitArraySymbols[i];
AddRootByName(elf, symName, NativeRootType.PreInitArray, "preinit", i);
}
// Add init_array entries
for (var i = 0; i < elf.InitArraySymbols.Length; i++)
{
var symName = elf.InitArraySymbols[i];
AddRootByName(elf, symName, NativeRootType.InitArray, "init", i);
}
}
private void AddRootIfExists(ElfFile elf, string symbolName, NativeRootType rootType, string phase, int order)
{
var sym = elf.Symbols.Concat(elf.DynamicSymbols)
.FirstOrDefault(s => s.Name == symbolName && s.Type == ElfSymbolType.Func);
if (sym is null)
{
return;
}
var binding = sym.Binding.ToString().ToLowerInvariant();
var symbolId = NativeGraphIdentifiers.ComputeSymbolId(sym.Name, sym.Value, sym.Size, binding);
var rootId = NativeGraphIdentifiers.ComputeRootId(symbolId, rootType, order);
_roots.Add(new NativeSyntheticRoot(
RootId: rootId,
TargetId: symbolId,
RootType: rootType,
BinaryPath: elf.Path,
Phase: phase,
Order: order));
}
private void AddRootByName(ElfFile elf, string symbolName, NativeRootType rootType, string phase, int order)
{
// Check if it's a hex address placeholder
if (symbolName.StartsWith("func_0x", StringComparison.Ordinal))
{
// Create an unknown for unresolved init array entry
var unknownId = NativeGraphIdentifiers.ComputeUnknownId(symbolName, NativeUnknownType.UnresolvedTarget, symbolName);
_unknowns.Add(new NativeUnknown(
UnknownId: unknownId,
UnknownType: NativeUnknownType.UnresolvedTarget,
SourceId: $"{elf.Path}:{phase}:{order}",
Name: symbolName,
Reason: "Init array entry could not be resolved to a symbol",
BinaryPath: elf.Path));
return;
}
AddRootIfExists(elf, symbolName, rootType, phase, order);
}
private void AddRelocationEdges(ElfFile elf)
{
var allSymbols = elf.Symbols.Concat(elf.DynamicSymbols).ToList();
foreach (var reloc in elf.Relocations)
{
if (reloc.SymbolIndex == 0 || reloc.SymbolIndex >= allSymbols.Count)
{
continue;
}
var targetSym = allSymbols[(int)reloc.SymbolIndex];
if (targetSym.Type != ElfSymbolType.Func || string.IsNullOrEmpty(targetSym.Name))
{
continue;
}
// Find the function containing this relocation
var callerSym = FindFunctionContainingAddress(allSymbols, reloc.Offset);
if (callerSym is null)
{
continue;
}
var callerBinding = callerSym.Binding.ToString().ToLowerInvariant();
var targetBinding = targetSym.Binding.ToString().ToLowerInvariant();
var callerId = NativeGraphIdentifiers.ComputeSymbolId(callerSym.Name, callerSym.Value, callerSym.Size, callerBinding);
var calleeId = NativeGraphIdentifiers.ComputeSymbolId(targetSym.Name, targetSym.Value, targetSym.Size, targetBinding);
var calleeDigest = NativeGraphIdentifiers.ComputeSymbolDigest(targetSym.Name, targetSym.Value, targetSym.Size, targetBinding);
var edgeId = NativeGraphIdentifiers.ComputeEdgeId(callerId, calleeId, reloc.Offset);
// Determine if target is resolved (has a defined address)
var isResolved = targetSym.Value != 0 || targetSym.SectionIndex != 0;
var calleePurl = isResolved ? GeneratePurl(elf.Path, targetSym.Name) : null;
_edges.Add(new NativeCallEdge(
EdgeId: edgeId,
CallerId: callerId,
CalleeId: calleeId,
CalleePurl: calleePurl,
CalleeSymbolDigest: calleeDigest,
EdgeType: NativeEdgeType.Relocation,
CallSiteOffset: reloc.Offset,
IsResolved: isResolved,
Confidence: isResolved ? 1.0 : 0.5));
if (!isResolved)
{
// Emit unknown for unresolved external symbol
var unknownId = NativeGraphIdentifiers.ComputeUnknownId(edgeId, NativeUnknownType.UnresolvedTarget, targetSym.Name);
_unknowns.Add(new NativeUnknown(
UnknownId: unknownId,
UnknownType: NativeUnknownType.UnresolvedTarget,
SourceId: edgeId,
Name: targetSym.Name,
Reason: "External symbol not resolved within this layer",
BinaryPath: elf.Path));
}
}
}
private void AddInitArrayEdges(ElfFile elf)
{
var allSymbols = elf.Symbols.Concat(elf.DynamicSymbols).ToList();
// Add edges from synthetic _init root to init_array entries
var initSym = allSymbols.FirstOrDefault(s => s.Name == "_init" && s.Type == ElfSymbolType.Func);
if (initSym is not null)
{
var initBinding = initSym.Binding.ToString().ToLowerInvariant();
var initId = NativeGraphIdentifiers.ComputeSymbolId(initSym.Name, initSym.Value, initSym.Size, initBinding);
foreach (var (symName, idx) in elf.InitArraySymbols.Select((s, i) => (s, i)))
{
if (symName.StartsWith("func_0x", StringComparison.Ordinal))
{
continue; // Already handled as unknown
}
var targetSym = allSymbols.FirstOrDefault(s => s.Name == symName && s.Type == ElfSymbolType.Func);
if (targetSym is null)
{
continue;
}
var targetBinding = targetSym.Binding.ToString().ToLowerInvariant();
var targetId = NativeGraphIdentifiers.ComputeSymbolId(targetSym.Name, targetSym.Value, targetSym.Size, targetBinding);
var targetDigest = NativeGraphIdentifiers.ComputeSymbolDigest(targetSym.Name, targetSym.Value, targetSym.Size, targetBinding);
var edgeId = NativeGraphIdentifiers.ComputeEdgeId(initId, targetId, (ulong)idx);
_edges.Add(new NativeCallEdge(
EdgeId: edgeId,
CallerId: initId,
CalleeId: targetId,
CalleePurl: GeneratePurl(elf.Path, targetSym.Name),
CalleeSymbolDigest: targetDigest,
EdgeType: NativeEdgeType.InitArray,
CallSiteOffset: (ulong)idx,
IsResolved: true,
Confidence: 1.0));
}
}
}
private static ElfSymbol? FindFunctionContainingAddress(IList<ElfSymbol> symbols, ulong address)
{
return symbols
.Where(s => s.Type == ElfSymbolType.Func && s.Size > 0)
.FirstOrDefault(s => address >= s.Value && address < s.Value + s.Size);
}
private static string? GeneratePurl(string binaryPath, string symbolName)
{
// Extract library name from path (simplified)
var fileName = Path.GetFileName(binaryPath);
// Handle common patterns like libfoo.so.1.2.3
if (fileName.StartsWith("lib", StringComparison.Ordinal))
{
var soIndex = fileName.IndexOf(".so", StringComparison.Ordinal);
if (soIndex > 3)
{
var libName = fileName[3..soIndex];
return $"pkg:elf/{libName}#{symbolName}";
}
}
// For executables or other binaries
return $"pkg:elf/{fileName}#{symbolName}";
}
}

View File

@@ -0,0 +1,515 @@
using System.Buffers.Binary;
namespace StellaOps.Scanner.Analyzers.Native.Internal.Elf;
/// <summary>
/// Reads and parses ELF (Executable and Linkable Format) files.
/// Extracts build-id, symbols, relocations, and init arrays for reachability analysis.
/// </summary>
internal static class ElfReader
{
/// <summary>
/// Checks if a file starts with ELF magic bytes.
/// </summary>
public static bool IsElf(ReadOnlySpan<byte> data) =>
data.Length >= ElfMagic.IdentSize && data[..4].SequenceEqual(ElfMagic.Bytes);
/// <summary>
/// Parses an ELF file from a stream.
/// </summary>
public static ElfFile? Parse(Stream stream, string path, string layerDigest)
{
ArgumentNullException.ThrowIfNull(stream);
Span<byte> ident = stackalloc byte[ElfMagic.IdentSize];
if (stream.Read(ident) < ElfMagic.IdentSize || !IsElf(ident))
{
return null;
}
var elfClass = (ElfClass)ident[4];
var elfData = (ElfData)ident[5];
if (elfClass is not (ElfClass.Elf32 or ElfClass.Elf64))
{
return null;
}
var isLittleEndian = elfData == ElfData.Lsb;
var is64Bit = elfClass == ElfClass.Elf64;
stream.Position = 0;
var fileData = new byte[stream.Length];
stream.ReadExactly(fileData);
return Parse(fileData, path, layerDigest, is64Bit, isLittleEndian);
}
private static ElfFile Parse(byte[] data, string path, string layerDigest, bool is64Bit, bool isLittleEndian)
{
var reader = new ElfDataReader(data, isLittleEndian);
// Parse header
var header = ParseHeader(reader, is64Bit);
// Parse section headers
var sections = ParseSectionHeaders(reader, header, is64Bit);
// Get string table for section names
var shStrTab = GetStringTable(data, sections, header.SectionNameStringTableIndex);
// Update section names
sections = sections.Select(s => s with { Name = GetString(shStrTab, s.NameIndex) }).ToImmutableArray();
// Parse symbol tables
var (symbols, symStrTab) = ParseSymbolTable(data, sections, ".symtab", is64Bit, isLittleEndian);
var (dynSymbols, dynStrTab) = ParseSymbolTable(data, sections, ".dynsym", is64Bit, isLittleEndian);
// Update symbol names
symbols = symbols.Select(s => s with { Name = GetString(symStrTab, s.NameIndex) }).ToImmutableArray();
dynSymbols = dynSymbols.Select(s => s with { Name = GetString(dynStrTab, s.NameIndex) }).ToImmutableArray();
// Parse notes (for build-id)
var notes = ParseNotes(data, sections, isLittleEndian);
// Extract build-id from GNU notes
var buildId = ExtractBuildId(notes);
var codeId = buildId is not null ? FormatCodeId(buildId) : null;
// Compute .text section hash as fallback identifier
var textSectionHash = ComputeTextSectionHash(data, sections);
// Parse relocations
var relocations = ParseRelocations(data, sections, is64Bit, isLittleEndian);
// Extract init array symbols
var initArraySymbols = ExtractInitArraySymbols(data, sections, symbols, dynSymbols, is64Bit, isLittleEndian);
var preInitArraySymbols = ExtractPreInitArraySymbols(data, sections, symbols, dynSymbols, is64Bit, isLittleEndian);
// Extract needed libraries from .dynamic section
var neededLibraries = ExtractNeededLibraries(data, sections, is64Bit, isLittleEndian);
return new ElfFile(
path,
layerDigest,
header,
sections,
symbols,
dynSymbols,
notes,
relocations,
buildId,
codeId,
textSectionHash,
initArraySymbols,
preInitArraySymbols,
neededLibraries);
}
private static ElfHeader ParseHeader(ElfDataReader reader, bool is64Bit)
{
reader.Position = 0;
// Skip e_ident (already validated)
reader.Position = ElfMagic.IdentSize;
var type = (ElfType)reader.ReadUInt16();
var machine = (ElfMachine)reader.ReadUInt16();
var version = reader.ReadUInt32();
ulong entry, phOff, shOff;
if (is64Bit)
{
entry = reader.ReadUInt64();
phOff = reader.ReadUInt64();
shOff = reader.ReadUInt64();
}
else
{
entry = reader.ReadUInt32();
phOff = reader.ReadUInt32();
shOff = reader.ReadUInt32();
}
var flags = reader.ReadUInt32();
var ehSize = reader.ReadUInt16();
var phEntSize = reader.ReadUInt16();
var phNum = reader.ReadUInt16();
var shEntSize = reader.ReadUInt16();
var shNum = reader.ReadUInt16();
var shStrNdx = reader.ReadUInt16();
return new ElfHeader(
Class: is64Bit ? ElfClass.Elf64 : ElfClass.Elf32,
Data: reader.IsLittleEndian ? ElfData.Lsb : ElfData.Msb,
OsAbi: (ElfOsAbi)reader.Data[7],
Type: type,
Machine: machine,
EntryPoint: entry,
ProgramHeaderOffset: phOff,
SectionHeaderOffset: shOff,
ProgramHeaderEntrySize: phEntSize,
ProgramHeaderCount: phNum,
SectionHeaderEntrySize: shEntSize,
SectionHeaderCount: shNum,
SectionNameStringTableIndex: shStrNdx);
}
private static ImmutableArray<ElfSectionHeader> ParseSectionHeaders(ElfDataReader reader, ElfHeader header, bool is64Bit)
{
var sections = ImmutableArray.CreateBuilder<ElfSectionHeader>(header.SectionHeaderCount);
var entrySize = is64Bit ? 64 : 40;
for (var i = 0; i < header.SectionHeaderCount; i++)
{
reader.Position = (int)header.SectionHeaderOffset + i * entrySize;
var nameIndex = reader.ReadUInt32();
var type = (ElfSectionType)reader.ReadUInt32();
ulong flags, addr, offset, size;
uint link, info;
ulong addralign, entsize;
if (is64Bit)
{
flags = reader.ReadUInt64();
addr = reader.ReadUInt64();
offset = reader.ReadUInt64();
size = reader.ReadUInt64();
link = reader.ReadUInt32();
info = reader.ReadUInt32();
addralign = reader.ReadUInt64();
entsize = reader.ReadUInt64();
}
else
{
flags = reader.ReadUInt32();
addr = reader.ReadUInt32();
offset = reader.ReadUInt32();
size = reader.ReadUInt32();
link = reader.ReadUInt32();
info = reader.ReadUInt32();
addralign = reader.ReadUInt32();
entsize = reader.ReadUInt32();
}
sections.Add(new ElfSectionHeader(
nameIndex, string.Empty, type, flags, addr, offset, size, link, info, addralign, entsize));
}
return sections.ToImmutable();
}
private static (ImmutableArray<ElfSymbol> Symbols, byte[] StringTable) ParseSymbolTable(
byte[] data, ImmutableArray<ElfSectionHeader> sections, string tableName, bool is64Bit, bool isLittleEndian)
{
var symTab = sections.FirstOrDefault(s => s.Name == tableName);
if (symTab is null || symTab.Type is not (ElfSectionType.SymTab or ElfSectionType.DynSym))
{
return (ImmutableArray<ElfSymbol>.Empty, Array.Empty<byte>());
}
// Get associated string table
var strTab = sections.ElementAtOrDefault((int)symTab.Link);
var strTabData = strTab is not null
? data.AsSpan((int)strTab.Offset, (int)strTab.Size).ToArray()
: Array.Empty<byte>();
var entrySize = is64Bit ? 24 : 16;
var symbolCount = (int)(symTab.Size / (ulong)entrySize);
var symbols = ImmutableArray.CreateBuilder<ElfSymbol>(symbolCount);
var reader = new ElfDataReader(data, isLittleEndian) { Position = (int)symTab.Offset };
for (var i = 0; i < symbolCount; i++)
{
uint nameIdx;
ulong value, size;
byte info, other;
ushort shndx;
if (is64Bit)
{
nameIdx = reader.ReadUInt32();
info = reader.ReadByte();
other = reader.ReadByte();
shndx = reader.ReadUInt16();
value = reader.ReadUInt64();
size = reader.ReadUInt64();
}
else
{
nameIdx = reader.ReadUInt32();
value = reader.ReadUInt32();
size = reader.ReadUInt32();
info = reader.ReadByte();
other = reader.ReadByte();
shndx = reader.ReadUInt16();
}
var binding = (ElfSymbolBinding)(info >> 4);
var type = (ElfSymbolType)(info & 0xF);
var visibility = (ElfSymbolVisibility)(other & 0x3);
symbols.Add(new ElfSymbol(nameIdx, string.Empty, value, size, binding, type, visibility, shndx));
}
return (symbols.ToImmutable(), strTabData);
}
private static ImmutableArray<ElfNote> ParseNotes(byte[] data, ImmutableArray<ElfSectionHeader> sections, bool isLittleEndian)
{
var notes = ImmutableArray.CreateBuilder<ElfNote>();
foreach (var section in sections.Where(s => s.Type == ElfSectionType.Note))
{
var reader = new ElfDataReader(data, isLittleEndian) { Position = (int)section.Offset };
var end = (int)(section.Offset + section.Size);
while (reader.Position < end)
{
var namesz = reader.ReadUInt32();
var descsz = reader.ReadUInt32();
var type = (ElfGnuNoteType)reader.ReadUInt32();
var name = Encoding.ASCII.GetString(data, reader.Position, (int)namesz - 1);
reader.Position += Align4((int)namesz);
var desc = data.AsMemory(reader.Position, (int)descsz);
reader.Position += Align4((int)descsz);
notes.Add(new ElfNote(name, type, desc));
}
}
return notes.ToImmutable();
}
private static ImmutableArray<ElfRelocation> ParseRelocations(
byte[] data, ImmutableArray<ElfSectionHeader> sections, bool is64Bit, bool isLittleEndian)
{
var relocations = ImmutableArray.CreateBuilder<ElfRelocation>();
foreach (var section in sections.Where(s => s.Type is ElfSectionType.Rela or ElfSectionType.Rel))
{
var hasAddend = section.Type == ElfSectionType.Rela;
var entrySize = is64Bit ? (hasAddend ? 24 : 16) : (hasAddend ? 12 : 8);
var count = (int)(section.Size / (ulong)entrySize);
var reader = new ElfDataReader(data, isLittleEndian) { Position = (int)section.Offset };
for (var i = 0; i < count; i++)
{
ulong offset;
uint type, symIdx;
long addend = 0;
if (is64Bit)
{
offset = reader.ReadUInt64();
var info = reader.ReadUInt64();
type = (uint)(info & 0xFFFFFFFF);
symIdx = (uint)(info >> 32);
if (hasAddend) addend = reader.ReadInt64();
}
else
{
offset = reader.ReadUInt32();
var info = reader.ReadUInt32();
type = info & 0xFF;
symIdx = info >> 8;
if (hasAddend) addend = reader.ReadInt32();
}
relocations.Add(new ElfRelocation(offset, type, symIdx, addend));
}
}
return relocations.ToImmutable();
}
private static string? ExtractBuildId(ImmutableArray<ElfNote> notes)
{
var gnuBuildId = notes.FirstOrDefault(n => n.Name == "GNU" && n.Type == ElfGnuNoteType.BuildId);
if (gnuBuildId is null)
{
return null;
}
return Convert.ToHexString(gnuBuildId.Descriptor.Span).ToLowerInvariant();
}
private static string FormatCodeId(string buildId)
{
// Format as ELF code-id (same as build-id for ELF)
return buildId;
}
private static string ComputeTextSectionHash(byte[] data, ImmutableArray<ElfSectionHeader> sections)
{
var textSection = sections.FirstOrDefault(s => s.Name == ".text");
if (textSection is null || textSection.Size == 0)
{
return string.Empty;
}
var textData = data.AsSpan((int)textSection.Offset, (int)textSection.Size);
var hash = SHA256.HashData(textData);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static ImmutableArray<string> ExtractInitArraySymbols(
byte[] data, ImmutableArray<ElfSectionHeader> sections,
ImmutableArray<ElfSymbol> symbols, ImmutableArray<ElfSymbol> dynSymbols,
bool is64Bit, bool isLittleEndian)
{
return ExtractArraySymbols(data, sections, symbols, dynSymbols, ".init_array", is64Bit, isLittleEndian);
}
private static ImmutableArray<string> ExtractPreInitArraySymbols(
byte[] data, ImmutableArray<ElfSectionHeader> sections,
ImmutableArray<ElfSymbol> symbols, ImmutableArray<ElfSymbol> dynSymbols,
bool is64Bit, bool isLittleEndian)
{
return ExtractArraySymbols(data, sections, symbols, dynSymbols, ".preinit_array", is64Bit, isLittleEndian);
}
private static ImmutableArray<string> ExtractArraySymbols(
byte[] data, ImmutableArray<ElfSectionHeader> sections,
ImmutableArray<ElfSymbol> symbols, ImmutableArray<ElfSymbol> dynSymbols,
string sectionName, bool is64Bit, bool isLittleEndian)
{
var section = sections.FirstOrDefault(s => s.Name == sectionName);
if (section is null || section.Size == 0)
{
return ImmutableArray<string>.Empty;
}
var allSymbols = symbols.Concat(dynSymbols).ToList();
var ptrSize = is64Bit ? 8 : 4;
var count = (int)(section.Size / (ulong)ptrSize);
var result = ImmutableArray.CreateBuilder<string>(count);
var reader = new ElfDataReader(data, isLittleEndian) { Position = (int)section.Offset };
for (var i = 0; i < count; i++)
{
var addr = is64Bit ? reader.ReadUInt64() : reader.ReadUInt32();
var sym = allSymbols.FirstOrDefault(s => s.Value == addr && s.Type == ElfSymbolType.Func);
result.Add(sym?.Name ?? $"func_0x{addr:x}");
}
return result.ToImmutable();
}
private static ImmutableArray<string> ExtractNeededLibraries(
byte[] data, ImmutableArray<ElfSectionHeader> sections, bool is64Bit, bool isLittleEndian)
{
var dynSection = sections.FirstOrDefault(s => s.Name == ".dynamic");
if (dynSection is null)
{
return ImmutableArray<string>.Empty;
}
var dynStrSection = sections.FirstOrDefault(s => s.Name == ".dynstr");
if (dynStrSection is null)
{
return ImmutableArray<string>.Empty;
}
var strTab = data.AsSpan((int)dynStrSection.Offset, (int)dynStrSection.Size).ToArray();
var entrySize = is64Bit ? 16 : 8;
var count = (int)(dynSection.Size / (ulong)entrySize);
var result = ImmutableArray.CreateBuilder<string>();
var reader = new ElfDataReader(data, isLittleEndian) { Position = (int)dynSection.Offset };
const ulong DT_NEEDED = 1;
const ulong DT_NULL = 0;
for (var i = 0; i < count; i++)
{
var tag = is64Bit ? reader.ReadUInt64() : reader.ReadUInt32();
var val = is64Bit ? reader.ReadUInt64() : reader.ReadUInt32();
if (tag == DT_NULL) break;
if (tag == DT_NEEDED)
{
result.Add(GetString(strTab, (uint)val));
}
}
return result.ToImmutable();
}
private static byte[] GetStringTable(byte[] data, ImmutableArray<ElfSectionHeader> sections, ushort index)
{
if (index >= sections.Length) return Array.Empty<byte>();
var section = sections[index];
return data.AsSpan((int)section.Offset, (int)section.Size).ToArray();
}
private static string GetString(byte[] strTab, uint offset)
{
if (offset >= strTab.Length) return string.Empty;
var end = Array.IndexOf(strTab, (byte)0, (int)offset);
if (end < 0) end = strTab.Length;
return Encoding.UTF8.GetString(strTab, (int)offset, end - (int)offset);
}
private static int Align4(int value) => (value + 3) & ~3;
/// <summary>
/// Helper for reading binary data with endianness support.
/// </summary>
private sealed class ElfDataReader(byte[] data, bool isLittleEndian)
{
public byte[] Data { get; } = data;
public bool IsLittleEndian { get; } = isLittleEndian;
public int Position { get; set; }
public byte ReadByte() => Data[Position++];
public ushort ReadUInt16()
{
var value = IsLittleEndian
? BinaryPrimitives.ReadUInt16LittleEndian(Data.AsSpan(Position))
: BinaryPrimitives.ReadUInt16BigEndian(Data.AsSpan(Position));
Position += 2;
return value;
}
public uint ReadUInt32()
{
var value = IsLittleEndian
? BinaryPrimitives.ReadUInt32LittleEndian(Data.AsSpan(Position))
: BinaryPrimitives.ReadUInt32BigEndian(Data.AsSpan(Position));
Position += 4;
return value;
}
public ulong ReadUInt64()
{
var value = IsLittleEndian
? BinaryPrimitives.ReadUInt64LittleEndian(Data.AsSpan(Position))
: BinaryPrimitives.ReadUInt64BigEndian(Data.AsSpan(Position));
Position += 8;
return value;
}
public int ReadInt32()
{
var value = IsLittleEndian
? BinaryPrimitives.ReadInt32LittleEndian(Data.AsSpan(Position))
: BinaryPrimitives.ReadInt32BigEndian(Data.AsSpan(Position));
Position += 4;
return value;
}
public long ReadInt64()
{
var value = IsLittleEndian
? BinaryPrimitives.ReadInt64LittleEndian(Data.AsSpan(Position))
: BinaryPrimitives.ReadInt64BigEndian(Data.AsSpan(Position));
Position += 8;
return value;
}
}
}

View File

@@ -0,0 +1,220 @@
namespace StellaOps.Scanner.Analyzers.Native.Internal.Elf;
/// <summary>
/// ELF file class (32-bit or 64-bit).
/// </summary>
internal enum ElfClass : byte
{
None = 0,
Elf32 = 1,
Elf64 = 2,
}
/// <summary>
/// ELF data encoding (endianness).
/// </summary>
internal enum ElfData : byte
{
None = 0,
Lsb = 1, // Little-endian
Msb = 2, // Big-endian
}
/// <summary>
/// ELF OS/ABI.
/// </summary>
internal enum ElfOsAbi : byte
{
None = 0,
Linux = 3,
FreeBsd = 9,
}
/// <summary>
/// ELF file type.
/// </summary>
internal enum ElfType : ushort
{
None = 0,
Rel = 1, // Relocatable
Exec = 2, // Executable
Dyn = 3, // Shared object
Core = 4, // Core dump
}
/// <summary>
/// ELF machine architecture.
/// </summary>
internal enum ElfMachine : ushort
{
None = 0,
I386 = 3,
X86_64 = 62,
Arm = 40,
Aarch64 = 183,
RiscV = 243,
LoongArch = 258,
}
/// <summary>
/// ELF section type.
/// </summary>
internal enum ElfSectionType : uint
{
Null = 0,
ProgBits = 1,
SymTab = 2,
StrTab = 3,
Rela = 4,
Hash = 5,
Dynamic = 6,
Note = 7,
NoBits = 8,
Rel = 9,
ShLib = 10,
DynSym = 11,
InitArray = 14,
FiniArray = 15,
PreInitArray = 16,
Group = 17,
SymTabShndx = 18,
}
/// <summary>
/// ELF symbol binding.
/// </summary>
internal enum ElfSymbolBinding : byte
{
Local = 0,
Global = 1,
Weak = 2,
}
/// <summary>
/// ELF symbol type.
/// </summary>
internal enum ElfSymbolType : byte
{
NoType = 0,
Object = 1,
Func = 2,
Section = 3,
File = 4,
Common = 5,
Tls = 6,
}
/// <summary>
/// ELF symbol visibility.
/// </summary>
internal enum ElfSymbolVisibility : byte
{
Default = 0,
Internal = 1,
Hidden = 2,
Protected = 3,
}
/// <summary>
/// ELF note type for GNU notes.
/// </summary>
internal enum ElfGnuNoteType : uint
{
AbiTag = 1,
Hwcap = 2,
BuildId = 3,
GoldVersion = 4,
Property = 5,
}
/// <summary>
/// Parsed ELF header information.
/// </summary>
internal sealed record ElfHeader(
ElfClass Class,
ElfData Data,
ElfOsAbi OsAbi,
ElfType Type,
ElfMachine Machine,
ulong EntryPoint,
ulong ProgramHeaderOffset,
ulong SectionHeaderOffset,
ushort ProgramHeaderEntrySize,
ushort ProgramHeaderCount,
ushort SectionHeaderEntrySize,
ushort SectionHeaderCount,
ushort SectionNameStringTableIndex);
/// <summary>
/// Parsed ELF section header.
/// </summary>
internal sealed record ElfSectionHeader(
uint NameIndex,
string Name,
ElfSectionType Type,
ulong Flags,
ulong Address,
ulong Offset,
ulong Size,
uint Link,
uint Info,
ulong AddressAlign,
ulong EntrySize);
/// <summary>
/// Parsed ELF symbol.
/// </summary>
internal sealed record ElfSymbol(
uint NameIndex,
string Name,
ulong Value,
ulong Size,
ElfSymbolBinding Binding,
ElfSymbolType Type,
ElfSymbolVisibility Visibility,
ushort SectionIndex);
/// <summary>
/// Parsed ELF note.
/// </summary>
internal sealed record ElfNote(
string Name,
ElfGnuNoteType Type,
ReadOnlyMemory<byte> Descriptor);
/// <summary>
/// ELF relocation entry.
/// </summary>
internal sealed record ElfRelocation(
ulong Offset,
uint Type,
uint SymbolIndex,
long Addend);
/// <summary>
/// Parsed ELF file summary.
/// </summary>
internal sealed record ElfFile(
string Path,
string LayerDigest,
ElfHeader Header,
ImmutableArray<ElfSectionHeader> Sections,
ImmutableArray<ElfSymbol> Symbols,
ImmutableArray<ElfSymbol> DynamicSymbols,
ImmutableArray<ElfNote> Notes,
ImmutableArray<ElfRelocation> Relocations,
string? BuildId,
string? CodeId,
string TextSectionHash,
ImmutableArray<string> InitArraySymbols,
ImmutableArray<string> PreInitArraySymbols,
ImmutableArray<string> NeededLibraries);
/// <summary>
/// Magic bytes for ELF identification.
/// </summary>
internal static class ElfMagic
{
public static ReadOnlySpan<byte> Bytes => "\x7FELF"u8;
public const int IdentSize = 16;
}

View File

@@ -0,0 +1,300 @@
namespace StellaOps.Scanner.Analyzers.Native.Internal.Graph;
/// <summary>
/// Writes native reachability graphs as DSSE bundles (NDJSON format).
/// Per reachability spec: deterministic ordering, UTC timestamps, stable hashes.
/// </summary>
internal static class NativeGraphDsseWriter
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
WriteIndented = false,
};
/// <summary>
/// Writes the graph as NDJSON to a stream.
/// </summary>
public static async Task WriteNdjsonAsync(NativeReachabilityGraph graph, Stream stream, CancellationToken cancellationToken = default)
{
await using var writer = new StreamWriter(stream, Encoding.UTF8, leaveOpen: true);
// Write metadata header
var header = new NdjsonGraphHeader(
Type: "native.reachability.graph",
Version: "1.0.0",
LayerDigest: graph.LayerDigest,
ContentHash: graph.ContentHash,
GeneratedAt: graph.Metadata.GeneratedAt.ToString("O"),
GeneratorVersion: graph.Metadata.GeneratorVersion,
BinaryCount: graph.Metadata.BinaryCount,
FunctionCount: graph.Metadata.FunctionCount,
EdgeCount: graph.Metadata.EdgeCount,
UnknownCount: graph.Metadata.UnknownCount,
SyntheticRootCount: graph.Metadata.SyntheticRootCount);
await WriteLineAsync(writer, header, cancellationToken);
// Write functions (sorted by symbol_id for determinism)
foreach (var func in graph.Functions.OrderBy(f => f.SymbolId))
{
cancellationToken.ThrowIfCancellationRequested();
var record = new NdjsonFunctionRecord(
RecordType: "function",
SymbolId: func.SymbolId,
Name: func.Name,
Purl: func.Purl,
BinaryPath: func.BinaryPath,
BuildId: func.BuildId,
Address: $"0x{func.Address:x}",
Size: func.Size,
SymbolDigest: func.SymbolDigest,
Binding: func.Binding,
Visibility: func.Visibility,
IsExported: func.IsExported);
await WriteLineAsync(writer, record, cancellationToken);
}
// Write edges (sorted by edge_id for determinism)
foreach (var edge in graph.Edges.OrderBy(e => e.EdgeId))
{
cancellationToken.ThrowIfCancellationRequested();
var record = new NdjsonEdgeRecord(
RecordType: "edge",
EdgeId: edge.EdgeId,
CallerId: edge.CallerId,
CalleeId: edge.CalleeId,
CalleePurl: edge.CalleePurl,
CalleeSymbolDigest: edge.CalleeSymbolDigest,
EdgeType: edge.EdgeType.ToString().ToLowerInvariant(),
CallSiteOffset: $"0x{edge.CallSiteOffset:x}",
IsResolved: edge.IsResolved,
Confidence: edge.Confidence);
await WriteLineAsync(writer, record, cancellationToken);
}
// Write synthetic roots (sorted by root_id for determinism)
foreach (var root in graph.SyntheticRoots.OrderBy(r => r.RootId))
{
cancellationToken.ThrowIfCancellationRequested();
var record = new NdjsonRootRecord(
RecordType: "synthetic_root",
RootId: root.RootId,
TargetId: root.TargetId,
RootType: root.RootType.ToString().ToLowerInvariant(),
BinaryPath: root.BinaryPath,
Phase: root.Phase,
Order: root.Order);
await WriteLineAsync(writer, record, cancellationToken);
}
// Write unknowns (sorted by unknown_id for determinism)
foreach (var unknown in graph.Unknowns.OrderBy(u => u.UnknownId))
{
cancellationToken.ThrowIfCancellationRequested();
var record = new NdjsonUnknownRecord(
RecordType: "unknown",
UnknownId: unknown.UnknownId,
UnknownType: unknown.UnknownType.ToString().ToLowerInvariant(),
SourceId: unknown.SourceId,
Name: unknown.Name,
Reason: unknown.Reason,
BinaryPath: unknown.BinaryPath);
await WriteLineAsync(writer, record, cancellationToken);
}
await writer.FlushAsync(cancellationToken);
}
/// <summary>
/// Writes the graph as a JSON object (for DSSE payload).
/// </summary>
public static string WriteJson(NativeReachabilityGraph graph)
{
var payload = new NdjsonGraphPayload(
Type: "native.reachability.graph",
Version: "1.0.0",
LayerDigest: graph.LayerDigest,
ContentHash: graph.ContentHash,
Metadata: new NdjsonMetadataPayload(
GeneratedAt: graph.Metadata.GeneratedAt.ToString("O"),
GeneratorVersion: graph.Metadata.GeneratorVersion,
BinaryCount: graph.Metadata.BinaryCount,
FunctionCount: graph.Metadata.FunctionCount,
EdgeCount: graph.Metadata.EdgeCount,
UnknownCount: graph.Metadata.UnknownCount,
SyntheticRootCount: graph.Metadata.SyntheticRootCount),
Functions: graph.Functions.OrderBy(f => f.SymbolId).Select(f => new NdjsonFunctionPayload(
SymbolId: f.SymbolId,
Name: f.Name,
Purl: f.Purl,
BinaryPath: f.BinaryPath,
BuildId: f.BuildId,
Address: $"0x{f.Address:x}",
Size: f.Size,
SymbolDigest: f.SymbolDigest,
Binding: f.Binding,
Visibility: f.Visibility,
IsExported: f.IsExported)).ToArray(),
Edges: graph.Edges.OrderBy(e => e.EdgeId).Select(e => new NdjsonEdgePayload(
EdgeId: e.EdgeId,
CallerId: e.CallerId,
CalleeId: e.CalleeId,
CalleePurl: e.CalleePurl,
CalleeSymbolDigest: e.CalleeSymbolDigest,
EdgeType: e.EdgeType.ToString().ToLowerInvariant(),
CallSiteOffset: $"0x{e.CallSiteOffset:x}",
IsResolved: e.IsResolved,
Confidence: e.Confidence)).ToArray(),
SyntheticRoots: graph.SyntheticRoots.OrderBy(r => r.RootId).Select(r => new NdjsonRootPayload(
RootId: r.RootId,
TargetId: r.TargetId,
RootType: r.RootType.ToString().ToLowerInvariant(),
BinaryPath: r.BinaryPath,
Phase: r.Phase,
Order: r.Order)).ToArray(),
Unknowns: graph.Unknowns.OrderBy(u => u.UnknownId).Select(u => new NdjsonUnknownPayload(
UnknownId: u.UnknownId,
UnknownType: u.UnknownType.ToString().ToLowerInvariant(),
SourceId: u.SourceId,
Name: u.Name,
Reason: u.Reason,
BinaryPath: u.BinaryPath)).ToArray());
return JsonSerializer.Serialize(payload, JsonOptions);
}
private static async Task WriteLineAsync<T>(StreamWriter writer, T record, CancellationToken ct)
{
var json = JsonSerializer.Serialize(record, JsonOptions);
await writer.WriteLineAsync(json.AsMemory(), ct);
}
// NDJSON record types
private sealed record NdjsonGraphHeader(
[property: JsonPropertyName("type")] string Type,
[property: JsonPropertyName("version")] string Version,
[property: JsonPropertyName("layer_digest")] string LayerDigest,
[property: JsonPropertyName("content_hash")] string ContentHash,
[property: JsonPropertyName("generated_at")] string GeneratedAt,
[property: JsonPropertyName("generator_version")] string GeneratorVersion,
[property: JsonPropertyName("binary_count")] int BinaryCount,
[property: JsonPropertyName("function_count")] int FunctionCount,
[property: JsonPropertyName("edge_count")] int EdgeCount,
[property: JsonPropertyName("unknown_count")] int UnknownCount,
[property: JsonPropertyName("synthetic_root_count")] int SyntheticRootCount);
private sealed record NdjsonFunctionRecord(
[property: JsonPropertyName("record_type")] string RecordType,
[property: JsonPropertyName("symbol_id")] string SymbolId,
[property: JsonPropertyName("name")] string Name,
[property: JsonPropertyName("purl")] string? Purl,
[property: JsonPropertyName("binary_path")] string BinaryPath,
[property: JsonPropertyName("build_id")] string? BuildId,
[property: JsonPropertyName("address")] string Address,
[property: JsonPropertyName("size")] ulong Size,
[property: JsonPropertyName("symbol_digest")] string SymbolDigest,
[property: JsonPropertyName("binding")] string Binding,
[property: JsonPropertyName("visibility")] string Visibility,
[property: JsonPropertyName("is_exported")] bool IsExported);
private sealed record NdjsonEdgeRecord(
[property: JsonPropertyName("record_type")] string RecordType,
[property: JsonPropertyName("edge_id")] string EdgeId,
[property: JsonPropertyName("caller_id")] string CallerId,
[property: JsonPropertyName("callee_id")] string CalleeId,
[property: JsonPropertyName("callee_purl")] string? CalleePurl,
[property: JsonPropertyName("callee_symbol_digest")] string? CalleeSymbolDigest,
[property: JsonPropertyName("edge_type")] string EdgeType,
[property: JsonPropertyName("call_site_offset")] string CallSiteOffset,
[property: JsonPropertyName("is_resolved")] bool IsResolved,
[property: JsonPropertyName("confidence")] double Confidence);
private sealed record NdjsonRootRecord(
[property: JsonPropertyName("record_type")] string RecordType,
[property: JsonPropertyName("root_id")] string RootId,
[property: JsonPropertyName("target_id")] string TargetId,
[property: JsonPropertyName("root_type")] string RootType,
[property: JsonPropertyName("binary_path")] string BinaryPath,
[property: JsonPropertyName("phase")] string Phase,
[property: JsonPropertyName("order")] int Order);
private sealed record NdjsonUnknownRecord(
[property: JsonPropertyName("record_type")] string RecordType,
[property: JsonPropertyName("unknown_id")] string UnknownId,
[property: JsonPropertyName("unknown_type")] string UnknownType,
[property: JsonPropertyName("source_id")] string SourceId,
[property: JsonPropertyName("name")] string? Name,
[property: JsonPropertyName("reason")] string Reason,
[property: JsonPropertyName("binary_path")] string BinaryPath);
// JSON payload types (for DSSE envelope)
private sealed record NdjsonGraphPayload(
[property: JsonPropertyName("type")] string Type,
[property: JsonPropertyName("version")] string Version,
[property: JsonPropertyName("layer_digest")] string LayerDigest,
[property: JsonPropertyName("content_hash")] string ContentHash,
[property: JsonPropertyName("metadata")] NdjsonMetadataPayload Metadata,
[property: JsonPropertyName("functions")] NdjsonFunctionPayload[] Functions,
[property: JsonPropertyName("edges")] NdjsonEdgePayload[] Edges,
[property: JsonPropertyName("synthetic_roots")] NdjsonRootPayload[] SyntheticRoots,
[property: JsonPropertyName("unknowns")] NdjsonUnknownPayload[] Unknowns);
private sealed record NdjsonMetadataPayload(
[property: JsonPropertyName("generated_at")] string GeneratedAt,
[property: JsonPropertyName("generator_version")] string GeneratorVersion,
[property: JsonPropertyName("binary_count")] int BinaryCount,
[property: JsonPropertyName("function_count")] int FunctionCount,
[property: JsonPropertyName("edge_count")] int EdgeCount,
[property: JsonPropertyName("unknown_count")] int UnknownCount,
[property: JsonPropertyName("synthetic_root_count")] int SyntheticRootCount);
private sealed record NdjsonFunctionPayload(
[property: JsonPropertyName("symbol_id")] string SymbolId,
[property: JsonPropertyName("name")] string Name,
[property: JsonPropertyName("purl")] string? Purl,
[property: JsonPropertyName("binary_path")] string BinaryPath,
[property: JsonPropertyName("build_id")] string? BuildId,
[property: JsonPropertyName("address")] string Address,
[property: JsonPropertyName("size")] ulong Size,
[property: JsonPropertyName("symbol_digest")] string SymbolDigest,
[property: JsonPropertyName("binding")] string Binding,
[property: JsonPropertyName("visibility")] string Visibility,
[property: JsonPropertyName("is_exported")] bool IsExported);
private sealed record NdjsonEdgePayload(
[property: JsonPropertyName("edge_id")] string EdgeId,
[property: JsonPropertyName("caller_id")] string CallerId,
[property: JsonPropertyName("callee_id")] string CalleeId,
[property: JsonPropertyName("callee_purl")] string? CalleePurl,
[property: JsonPropertyName("callee_symbol_digest")] string? CalleeSymbolDigest,
[property: JsonPropertyName("edge_type")] string EdgeType,
[property: JsonPropertyName("call_site_offset")] string CallSiteOffset,
[property: JsonPropertyName("is_resolved")] bool IsResolved,
[property: JsonPropertyName("confidence")] double Confidence);
private sealed record NdjsonRootPayload(
[property: JsonPropertyName("root_id")] string RootId,
[property: JsonPropertyName("target_id")] string TargetId,
[property: JsonPropertyName("root_type")] string RootType,
[property: JsonPropertyName("binary_path")] string BinaryPath,
[property: JsonPropertyName("phase")] string Phase,
[property: JsonPropertyName("order")] int Order);
private sealed record NdjsonUnknownPayload(
[property: JsonPropertyName("unknown_id")] string UnknownId,
[property: JsonPropertyName("unknown_type")] string UnknownType,
[property: JsonPropertyName("source_id")] string SourceId,
[property: JsonPropertyName("name")] string? Name,
[property: JsonPropertyName("reason")] string Reason,
[property: JsonPropertyName("binary_path")] string BinaryPath);
}

View File

@@ -0,0 +1,293 @@
namespace StellaOps.Scanner.Analyzers.Native.Internal.Graph;
/// <summary>
/// Native reachability graph containing functions, call edges, and metadata.
/// Per SCAN-NATIVE-REACH-0146-13 requirements.
/// </summary>
public sealed record NativeReachabilityGraph(
string LayerDigest,
ImmutableArray<NativeFunctionNode> Functions,
ImmutableArray<NativeCallEdge> Edges,
ImmutableArray<NativeSyntheticRoot> SyntheticRoots,
ImmutableArray<NativeUnknown> Unknowns,
NativeGraphMetadata Metadata,
string ContentHash);
/// <summary>
/// A function node in the native call graph.
/// </summary>
/// <param name="SymbolId">Deterministic symbol identifier (sha256 of purl+name+binding).</param>
/// <param name="Name">Demangled or raw symbol name.</param>
/// <param name="Purl">Package URL if resolvable (e.g., pkg:elf/libc.so.6).</param>
/// <param name="BinaryPath">Path to the containing binary.</param>
/// <param name="BuildId">ELF build-id if available.</param>
/// <param name="Address">Virtual address of the function.</param>
/// <param name="Size">Size of the function in bytes.</param>
/// <param name="SymbolDigest">SHA-256 of (name + addr + size + binding).</param>
/// <param name="Binding">Symbol binding (local/global/weak).</param>
/// <param name="Visibility">Symbol visibility.</param>
/// <param name="IsExported">Whether the symbol is exported (visible externally).</param>
public sealed record NativeFunctionNode(
string SymbolId,
string Name,
string? Purl,
string BinaryPath,
string? BuildId,
ulong Address,
ulong Size,
string SymbolDigest,
string Binding,
string Visibility,
bool IsExported);
/// <summary>
/// A call edge in the native call graph.
/// </summary>
/// <param name="EdgeId">Deterministic edge identifier.</param>
/// <param name="CallerId">SymbolId of the calling function.</param>
/// <param name="CalleeId">SymbolId of the called function (or Unknown placeholder).</param>
/// <param name="CalleePurl">PURL of the callee if resolvable.</param>
/// <param name="CalleeSymbolDigest">Symbol digest of the callee.</param>
/// <param name="EdgeType">Type of edge (direct, plt, got, reloc).</param>
/// <param name="CallSiteOffset">Offset within caller where call occurs.</param>
/// <param name="IsResolved">Whether the callee was successfully resolved.</param>
/// <param name="Confidence">Confidence level (1.0 for resolved, lower for heuristic).</param>
public sealed record NativeCallEdge(
string EdgeId,
string CallerId,
string CalleeId,
string? CalleePurl,
string? CalleeSymbolDigest,
NativeEdgeType EdgeType,
ulong CallSiteOffset,
bool IsResolved,
double Confidence);
/// <summary>
/// Type of call edge.
/// </summary>
public enum NativeEdgeType
{
/// <summary>Direct function call.</summary>
Direct,
/// <summary>Call through PLT (Procedure Linkage Table).</summary>
Plt,
/// <summary>Call through GOT (Global Offset Table).</summary>
Got,
/// <summary>Relocation-based call.</summary>
Relocation,
/// <summary>Indirect call (target unknown).</summary>
Indirect,
/// <summary>Init/preinit array entry.</summary>
InitArray,
/// <summary>Fini array entry.</summary>
FiniArray,
}
/// <summary>
/// A synthetic root in the call graph (entry points that don't have callers).
/// </summary>
/// <param name="RootId">Deterministic root identifier.</param>
/// <param name="TargetId">SymbolId of the target function.</param>
/// <param name="RootType">Type of synthetic root.</param>
/// <param name="BinaryPath">Path to the containing binary.</param>
/// <param name="Phase">Execution phase (load, init, main, fini).</param>
/// <param name="Order">Order within the phase (for init arrays).</param>
public sealed record NativeSyntheticRoot(
string RootId,
string TargetId,
NativeRootType RootType,
string BinaryPath,
string Phase,
int Order);
/// <summary>
/// Type of synthetic root.
/// </summary>
public enum NativeRootType
{
/// <summary>_start entry point.</summary>
Start,
/// <summary>_init function.</summary>
Init,
/// <summary>.preinit_array entry.</summary>
PreInitArray,
/// <summary>.init_array entry.</summary>
InitArray,
/// <summary>.fini_array entry.</summary>
FiniArray,
/// <summary>_fini function.</summary>
Fini,
/// <summary>main function.</summary>
Main,
/// <summary>Constructor (C++).</summary>
Constructor,
/// <summary>Destructor (C++).</summary>
Destructor,
}
/// <summary>
/// An unknown/unresolved reference in the call graph.
/// Per docs/signals/unknowns-registry.md specification.
/// </summary>
/// <param name="UnknownId">Deterministic identifier.</param>
/// <param name="UnknownType">Type of unknown reference.</param>
/// <param name="SourceId">SymbolId or EdgeId that references this unknown.</param>
/// <param name="Name">Symbol name if available.</param>
/// <param name="Reason">Why resolution failed.</param>
/// <param name="BinaryPath">Binary where the reference occurs.</param>
public sealed record NativeUnknown(
string UnknownId,
NativeUnknownType UnknownType,
string SourceId,
string? Name,
string Reason,
string BinaryPath);
/// <summary>
/// Type of unknown reference.
/// </summary>
public enum NativeUnknownType
{
/// <summary>Symbol could not be resolved to a PURL.</summary>
UnresolvedPurl,
/// <summary>Call target could not be determined.</summary>
UnresolvedTarget,
/// <summary>Symbol hash could not be computed.</summary>
UnresolvedHash,
/// <summary>Binary could not be identified.</summary>
UnresolvedBinary,
/// <summary>Indirect call target is ambiguous.</summary>
AmbiguousTarget,
}
/// <summary>
/// Metadata for the native reachability graph.
/// </summary>
/// <param name="GeneratedAt">UTC timestamp of generation.</param>
/// <param name="GeneratorVersion">Version of the generator.</param>
/// <param name="LayerDigest">Digest of the layer.</param>
/// <param name="BinaryCount">Number of binaries analyzed.</param>
/// <param name="FunctionCount">Number of functions discovered.</param>
/// <param name="EdgeCount">Number of edges discovered.</param>
/// <param name="UnknownCount">Number of unknown references.</param>
/// <param name="SyntheticRootCount">Number of synthetic roots.</param>
public sealed record NativeGraphMetadata(
DateTimeOffset GeneratedAt,
string GeneratorVersion,
string LayerDigest,
int BinaryCount,
int FunctionCount,
int EdgeCount,
int UnknownCount,
int SyntheticRootCount);
/// <summary>
/// Helper methods for creating deterministic identifiers.
/// </summary>
internal static class NativeGraphIdentifiers
{
private const string GeneratorVersion = "1.0.0";
/// <summary>
/// Computes a deterministic symbol ID from name, address, size, and binding.
/// </summary>
public static string ComputeSymbolId(string name, ulong address, ulong size, string binding)
{
var input = $"{name}:{address:x}:{size}:{binding}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
return $"sym:{Convert.ToHexString(hash[..8]).ToLowerInvariant()}";
}
/// <summary>
/// Computes a deterministic symbol digest.
/// </summary>
public static string ComputeSymbolDigest(string name, ulong address, ulong size, string binding)
{
var input = $"{name}:{address:x}:{size}:{binding}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
return Convert.ToHexString(hash).ToLowerInvariant();
}
/// <summary>
/// Computes a deterministic edge ID.
/// </summary>
public static string ComputeEdgeId(string callerId, string calleeId, ulong callSiteOffset)
{
var input = $"{callerId}:{calleeId}:{callSiteOffset:x}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
return $"edge:{Convert.ToHexString(hash[..8]).ToLowerInvariant()}";
}
/// <summary>
/// Computes a deterministic root ID.
/// </summary>
public static string ComputeRootId(string targetId, NativeRootType rootType, int order)
{
var input = $"{targetId}:{rootType}:{order}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
return $"root:{Convert.ToHexString(hash[..8]).ToLowerInvariant()}";
}
/// <summary>
/// Computes a deterministic unknown ID.
/// </summary>
public static string ComputeUnknownId(string sourceId, NativeUnknownType unknownType, string? name)
{
var input = $"{sourceId}:{unknownType}:{name ?? ""}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
return $"unk:{Convert.ToHexString(hash[..8]).ToLowerInvariant()}";
}
/// <summary>
/// Computes content hash for the entire graph.
/// </summary>
public static string ComputeGraphHash(
ImmutableArray<NativeFunctionNode> functions,
ImmutableArray<NativeCallEdge> edges,
ImmutableArray<NativeSyntheticRoot> roots)
{
using var sha = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
foreach (var f in functions.OrderBy(f => f.SymbolId))
{
sha.AppendData(Encoding.UTF8.GetBytes(f.SymbolId));
sha.AppendData(Encoding.UTF8.GetBytes(f.SymbolDigest));
}
foreach (var e in edges.OrderBy(e => e.EdgeId))
{
sha.AppendData(Encoding.UTF8.GetBytes(e.EdgeId));
}
foreach (var r in roots.OrderBy(r => r.RootId))
{
sha.AppendData(Encoding.UTF8.GetBytes(r.RootId));
}
return Convert.ToHexString(sha.GetCurrentHash()).ToLowerInvariant();
}
/// <summary>
/// Gets the current generator version.
/// </summary>
public static string GetGeneratorVersion() => GeneratorVersion;
}

View File

@@ -0,0 +1,248 @@
using StellaOps.Scanner.Analyzers.Native.Internal.Callgraph;
using StellaOps.Scanner.Analyzers.Native.Internal.Elf;
using StellaOps.Scanner.Analyzers.Native.Internal.Graph;
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Analyzes native ELF binaries for reachability graphs.
/// Implements SCAN-NATIVE-REACH-0146-13 requirements:
/// - Call-graph extraction from ELF binaries
/// - Synthetic roots (_init, .init_array, .preinit_array, entry points)
/// - Build-id capture
/// - PURL/symbol digests
/// - Unknowns emission
/// - DSSE graph bundles
/// </summary>
public sealed class NativeReachabilityAnalyzer
{
/// <summary>
/// Analyzes a directory of ELF binaries and produces a reachability graph.
/// </summary>
/// <param name="layerPath">Path to the layer directory.</param>
/// <param name="layerDigest">Digest of the layer.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The native reachability graph.</returns>
public async Task<NativeReachabilityGraph> AnalyzeLayerAsync(
string layerPath,
string layerDigest,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrEmpty(layerPath);
ArgumentException.ThrowIfNullOrEmpty(layerDigest);
var builder = new NativeCallgraphBuilder(layerDigest);
// Find all potential ELF files in the layer
await foreach (var filePath in FindElfFilesAsync(layerPath, cancellationToken))
{
cancellationToken.ThrowIfCancellationRequested();
try
{
await using var stream = File.OpenRead(filePath);
var relativePath = Path.GetRelativePath(layerPath, filePath).Replace('\\', '/');
var elf = ElfReader.Parse(stream, relativePath, layerDigest);
if (elf is not null)
{
builder.AddElfFile(elf);
}
}
catch (IOException)
{
// Skip files that can't be read
}
catch (UnauthorizedAccessException)
{
// Skip files without permission
}
}
return builder.Build();
}
/// <summary>
/// Analyzes a single ELF file and produces a reachability graph.
/// </summary>
public async Task<NativeReachabilityGraph> AnalyzeFileAsync(
string filePath,
string layerDigest,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrEmpty(filePath);
ArgumentException.ThrowIfNullOrEmpty(layerDigest);
var builder = new NativeCallgraphBuilder(layerDigest);
await using var stream = File.OpenRead(filePath);
var elf = ElfReader.Parse(stream, filePath, layerDigest);
if (elf is not null)
{
builder.AddElfFile(elf);
}
return builder.Build();
}
/// <summary>
/// Analyzes an ELF file from a stream.
/// </summary>
public NativeReachabilityGraph AnalyzeStream(
Stream stream,
string filePath,
string layerDigest)
{
ArgumentNullException.ThrowIfNull(stream);
ArgumentException.ThrowIfNullOrEmpty(filePath);
ArgumentException.ThrowIfNullOrEmpty(layerDigest);
var builder = new NativeCallgraphBuilder(layerDigest);
var elf = ElfReader.Parse(stream, filePath, layerDigest);
if (elf is not null)
{
builder.AddElfFile(elf);
}
return builder.Build();
}
/// <summary>
/// Writes the graph as NDJSON to a stream.
/// </summary>
public static Task WriteNdjsonAsync(
NativeReachabilityGraph graph,
Stream stream,
CancellationToken cancellationToken = default)
{
return NativeGraphDsseWriter.WriteNdjsonAsync(graph, stream, cancellationToken);
}
/// <summary>
/// Writes the graph as JSON (for DSSE payload).
/// </summary>
public static string WriteJson(NativeReachabilityGraph graph)
{
return NativeGraphDsseWriter.WriteJson(graph);
}
private static async IAsyncEnumerable<string> FindElfFilesAsync(
string rootPath,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
var searchDirs = new Stack<string>();
searchDirs.Push(rootPath);
// Common directories containing ELF binaries
var binaryDirs = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"bin", "sbin", "lib", "lib64", "lib32", "libx32",
"usr/bin", "usr/sbin", "usr/lib", "usr/lib64", "usr/lib32",
"usr/local/bin", "usr/local/sbin", "usr/local/lib",
"opt"
};
while (searchDirs.Count > 0)
{
cancellationToken.ThrowIfCancellationRequested();
var currentDir = searchDirs.Pop();
IEnumerable<string> files;
try
{
files = Directory.EnumerateFiles(currentDir);
}
catch (Exception) when (IsIgnorableException(default!))
{
continue;
}
foreach (var file in files)
{
cancellationToken.ThrowIfCancellationRequested();
// Quick check: skip obvious non-ELF files
var ext = Path.GetExtension(file);
if (IsSkippableExtension(ext))
{
continue;
}
// Check if file starts with ELF magic
if (await IsElfFileAsync(file, cancellationToken))
{
yield return file;
}
}
// Recurse into subdirectories
IEnumerable<string> subdirs;
try
{
subdirs = Directory.EnumerateDirectories(currentDir);
}
catch (Exception) when (IsIgnorableException(default!))
{
continue;
}
foreach (var subdir in subdirs)
{
var dirName = Path.GetFileName(subdir);
// Skip common non-binary directories
if (IsSkippableDirectory(dirName))
{
continue;
}
searchDirs.Push(subdir);
}
}
}
private static async Task<bool> IsElfFileAsync(string filePath, CancellationToken ct)
{
try
{
var buffer = new byte[4];
await using var stream = File.OpenRead(filePath);
var bytesRead = await stream.ReadAsync(buffer, ct);
return bytesRead >= 4 && ElfReader.IsElf(buffer);
}
catch
{
return false;
}
}
private static bool IsSkippableExtension(string ext)
{
return ext is ".txt" or ".md" or ".json" or ".xml" or ".yaml" or ".yml"
or ".html" or ".css" or ".js" or ".ts" or ".py" or ".rb" or ".php"
or ".java" or ".class" or ".jar" or ".war" or ".ear"
or ".png" or ".jpg" or ".jpeg" or ".gif" or ".svg" or ".ico"
or ".zip" or ".tar" or ".gz" or ".bz2" or ".xz" or ".7z"
or ".deb" or ".rpm" or ".apk"
or ".pem" or ".crt" or ".key" or ".pub"
or ".log" or ".pid" or ".lock";
}
private static bool IsSkippableDirectory(string dirName)
{
return dirName is "." or ".."
or "proc" or "sys" or "dev" or "run" or "tmp" or "var"
or "home" or "root" or "etc" or "boot" or "media" or "mnt"
or "node_modules" or ".git" or ".svn" or ".hg"
or "__pycache__" or ".cache" or ".npm" or ".cargo"
or "share" or "doc" or "man" or "info" or "locale";
}
private static bool IsIgnorableException(Exception ex)
{
return ex is IOException or UnauthorizedAccessException or DirectoryNotFoundException;
}
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<Compile Include="**\*.cs" Exclude="obj\**;bin\**" />
<EmbeddedResource Include="**\*.json" Exclude="obj\**;bin\**" />
<None Include="**\*" Exclude="**\*.cs;**\*.json;bin\**;obj\**" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Core\StellaOps.Scanner.Core.csproj" />
</ItemGroup>
</Project>