using System.Buffers.Binary; using System.Text; using System.Text.RegularExpressions; namespace StellaOps.Scanner.Analyzers.Native; /// /// Scans native binaries for heuristic dependency indicators. /// Detects dlopen/LoadLibrary strings, plugin configs, and ecosystem-specific hints. /// public static partial class HeuristicScanner { // Common shared library patterns private static readonly Regex ElfSonamePattern = SonameRegex(); private static readonly Regex WindowsDllPattern = DllRegex(); private static readonly Regex MacOsDylibPattern = DylibRegex(); // Plugin config patterns private static readonly string[] PluginConfigPatterns = [ "plugins.conf", "plugin.conf", "plugins.json", "plugin.json", "plugins.xml", "plugin.xml", ".so.conf", "modules.conf", "extensions.conf", ]; // Go-specific patterns private static readonly byte[] GoCgoImportMarker = "cgo_import_dynamic"u8.ToArray(); private static readonly byte[] GoCgoImportStatic = "cgo_import_static"u8.ToArray(); // Rust-specific patterns private static readonly byte[] RustPanicPrefix = "panicked at"u8.ToArray(); private static readonly byte[] RustCratePattern = ".rlib"u8.ToArray(); /// /// Scans a binary stream for heuristic dependency indicators. /// public static HeuristicScanResult Scan(Stream stream, NativeFormat format) { ArgumentNullException.ThrowIfNull(stream); using var buffer = new MemoryStream(); stream.CopyTo(buffer); var data = buffer.ToArray(); var edges = new List(); var pluginConfigs = new List(); // Extract printable strings and analyze them var strings = ExtractStrings(data, minLength: 4); foreach (var (str, offset) in strings) { // Check for dynamic library loading patterns AnalyzeDynamicLoadingString(str, offset, format, edges); // Check for plugin config references AnalyzePluginConfig(str, pluginConfigs); } // Check for Go-specific patterns ScanForGoPatterns(data, edges); // Check for Rust-specific patterns ScanForRustPatterns(data, edges); // Deduplicate edges by library name var uniqueEdges = edges .GroupBy(e => (e.LibraryName, e.ReasonCode)) .Select(g => g.OrderByDescending(e => e.Confidence).First()) .ToList(); return new HeuristicScanResult(uniqueEdges, pluginConfigs.Distinct().ToList()); } /// /// Scans specifically for dlopen/LoadLibrary style strings. /// public static IReadOnlyList ScanForDynamicLoading(byte[] data, NativeFormat format) { var edges = new List(); var strings = ExtractStrings(data, minLength: 4); foreach (var (str, offset) in strings) { AnalyzeDynamicLoadingString(str, offset, format, edges); } return edges .GroupBy(e => e.LibraryName) .Select(g => g.OrderByDescending(e => e.Confidence).First()) .ToList(); } /// /// Scans for plugin configuration file references. /// public static IReadOnlyList ScanForPluginConfigs(byte[] data) { var configs = new List(); var strings = ExtractStrings(data, minLength: 6); foreach (var (str, _) in strings) { AnalyzePluginConfig(str, configs); } return configs.Distinct().ToList(); } private static void AnalyzeDynamicLoadingString( string str, long offset, NativeFormat format, List edges) { // Check for format-appropriate library patterns switch (format) { case NativeFormat.Elf: if (ElfSonamePattern.IsMatch(str)) { var confidence = DetermineConfidence(str, isPathLike: str.Contains('/')); edges.Add(new HeuristicEdge( str, HeuristicReasonCodes.StringDlopen, confidence, "ELF soname pattern", offset)); } break; case NativeFormat.Pe: if (WindowsDllPattern.IsMatch(str)) { var confidence = DetermineConfidence(str, isPathLike: str.Contains('\\') || str.Contains('/')); edges.Add(new HeuristicEdge( str, HeuristicReasonCodes.StringLoadLibrary, confidence, "PE DLL pattern", offset)); } break; case NativeFormat.MachO: if (MacOsDylibPattern.IsMatch(str) || str.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase)) { var confidence = DetermineConfidence(str, isPathLike: str.Contains('/')); edges.Add(new HeuristicEdge( str, HeuristicReasonCodes.StringDlopen, confidence, "Mach-O dylib pattern", offset)); } break; } // Check for cross-platform dlopen-style patterns // Require at least 1 character between "lib" and ".so" (e.g., "libx.so" minimum) if (str.StartsWith("lib", StringComparison.Ordinal) && str.Contains(".so")) { var soIndex = str.IndexOf(".so", StringComparison.Ordinal); if (soIndex > 3 && !edges.Any(e => e.LibraryName == str)) { var confidence = DetermineConfidence(str, isPathLike: str.Contains('/')); edges.Add(new HeuristicEdge( str, HeuristicReasonCodes.StringDlopen, confidence, "Generic soname pattern", offset)); } } } private static HeuristicConfidence DetermineConfidence(string libraryName, bool isPathLike) { // Higher confidence for path-like strings (more likely to be actual dlopen args) if (isPathLike) { return HeuristicConfidence.High; } // Medium confidence for standard naming conventions if (libraryName.StartsWith("lib", StringComparison.Ordinal) || libraryName.EndsWith(".dll", StringComparison.OrdinalIgnoreCase) || libraryName.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase)) { return HeuristicConfidence.Medium; } // Lower confidence for generic matches return HeuristicConfidence.Low; } private static void AnalyzePluginConfig(string str, List configs) { foreach (var pattern in PluginConfigPatterns) { if (str.EndsWith(pattern, StringComparison.OrdinalIgnoreCase) || str.Contains(pattern, StringComparison.OrdinalIgnoreCase)) { // Extract just the filename if it's a path var filename = str; var lastSlash = str.LastIndexOfAny(['/', '\\']); if (lastSlash >= 0 && lastSlash < str.Length - 1) { filename = str[(lastSlash + 1)..]; } configs.Add(filename); break; } } } private static void ScanForGoPatterns(byte[] data, List edges) { // Look for cgo_import_dynamic markers var cgoImportOffsets = FindAllOccurrences(data, GoCgoImportMarker); foreach (var offset in cgoImportOffsets) { // Extract the library name following the marker var libraryName = ExtractFollowingString(data, offset + GoCgoImportMarker.Length); if (!string.IsNullOrEmpty(libraryName) && IsValidLibraryName(libraryName)) { edges.Add(new HeuristicEdge( libraryName, HeuristicReasonCodes.GoCgoImport, HeuristicConfidence.High, "Go CGO import directive", offset)); } } // Look for cgo_import_static markers var staticOffsets = FindAllOccurrences(data, GoCgoImportStatic); foreach (var offset in staticOffsets) { var libraryName = ExtractFollowingString(data, offset + GoCgoImportStatic.Length); if (!string.IsNullOrEmpty(libraryName) && IsValidLibraryName(libraryName)) { edges.Add(new HeuristicEdge( libraryName, HeuristicReasonCodes.GoCgoImport, HeuristicConfidence.High, "Go CGO static import", offset)); } } } private static void ScanForRustPatterns(byte[] data, List edges) { // Look for Rust panic messages that might indicate FFI usage var panicOffsets = FindAllOccurrences(data, RustPanicPrefix); if (panicOffsets.Count > 0) { // Binary is likely Rust - look for linked libraries in a more targeted way var strings = ExtractStrings(data, minLength: 4); foreach (var (str, offset) in strings) { // Look for extern "C" FFI patterns if (str.Contains("libstd-") || str.Contains("libcore-")) { continue; // Skip Rust standard library } // Look for native library references if ((str.StartsWith("lib", StringComparison.Ordinal) && str.Contains(".so")) || str.EndsWith(".dll", StringComparison.OrdinalIgnoreCase)) { // Only add if it looks like an FFI dependency if (!str.Contains("rust") && !str.Contains("std")) { edges.Add(new HeuristicEdge( str, HeuristicReasonCodes.RustFfi, HeuristicConfidence.Medium, "Rust FFI library reference", offset)); } } } } // Look for .rlib references var rlibOffsets = FindAllOccurrences(data, RustCratePattern); if (rlibOffsets.Count > 0) { // This is a Rust binary - we've already processed above } } private static List<(string Value, long Offset)> ExtractStrings(byte[] data, int minLength) { var results = new List<(string, long)>(); var currentString = new StringBuilder(); var stringStart = -1L; for (var i = 0; i < data.Length; i++) { var b = data[i]; // Check for printable ASCII if (b >= 0x20 && b < 0x7F) { if (currentString.Length == 0) { stringStart = i; } currentString.Append((char)b); } else { // End of string if (currentString.Length >= minLength) { results.Add((currentString.ToString(), stringStart)); } currentString.Clear(); } } // Don't forget the last string if (currentString.Length >= minLength) { results.Add((currentString.ToString(), stringStart)); } return results; } private static string? ExtractFollowingString(byte[] data, int startOffset) { // Skip whitespace and null bytes var i = startOffset; while (i < data.Length && (data[i] == 0 || data[i] == ' ' || data[i] == '\t')) { i++; } var sb = new StringBuilder(); while (i < data.Length && data[i] >= 0x20 && data[i] < 0x7F) { sb.Append((char)data[i]); i++; if (sb.Length > 256) break; // Sanity limit } var result = sb.ToString().Trim(); return string.IsNullOrEmpty(result) ? null : result; } private static List FindAllOccurrences(byte[] data, byte[] pattern) { var results = new List(); if (pattern.Length == 0 || data.Length < pattern.Length) { return results; } for (var i = 0; i <= data.Length - pattern.Length; i++) { var match = true; for (var j = 0; j < pattern.Length; j++) { if (data[i + j] != pattern[j]) { match = false; break; } } if (match) { results.Add(i); } } return results; } private static bool IsValidLibraryName(string name) { if (string.IsNullOrWhiteSpace(name) || name.Length < 3) { return false; } // Basic validation - should contain alphanumeric and common separators return name.All(c => char.IsLetterOrDigit(c) || c == '.' || c == '_' || c == '-' || c == '/'); } [GeneratedRegex(@"^(/[a-zA-Z0-9_/.-]+/)?lib[a-zA-Z0-9_+-]+\.so(\.[0-9]+)*$", RegexOptions.Compiled)] private static partial Regex SonameRegex(); [GeneratedRegex(@"^[a-zA-Z0-9_+-]+\.dll$", RegexOptions.IgnoreCase | RegexOptions.Compiled)] private static partial Regex DllRegex(); [GeneratedRegex(@"^(@rpath/|@loader_path/|@executable_path/|/)?[a-zA-Z0-9_+-]+\.dylib$", RegexOptions.Compiled)] private static partial Regex DylibRegex(); }