Files
git.stella-ops.org/src/Scanner/StellaOps.Scanner.Analyzers.Native/HeuristicScanner.cs
StellaOps Bot ea970ead2a
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
up
2025-11-27 07:46:56 +02:00

411 lines
14 KiB
C#

using System.Buffers.Binary;
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Native;
/// <summary>
/// Scans native binaries for heuristic dependency indicators.
/// Detects dlopen/LoadLibrary strings, plugin configs, and ecosystem-specific hints.
/// </summary>
public static partial class HeuristicScanner
{
// Common shared library patterns
private static readonly Regex ElfSonamePattern = SonameRegex();
private static readonly Regex WindowsDllPattern = DllRegex();
private static readonly Regex MacOsDylibPattern = DylibRegex();
// Plugin config patterns
private static readonly string[] PluginConfigPatterns =
[
"plugins.conf",
"plugin.conf",
"plugins.json",
"plugin.json",
"plugins.xml",
"plugin.xml",
".so.conf",
"modules.conf",
"extensions.conf",
];
// Go-specific patterns
private static readonly byte[] GoCgoImportMarker = "cgo_import_dynamic"u8.ToArray();
private static readonly byte[] GoCgoImportStatic = "cgo_import_static"u8.ToArray();
// Rust-specific patterns
private static readonly byte[] RustPanicPrefix = "panicked at"u8.ToArray();
private static readonly byte[] RustCratePattern = ".rlib"u8.ToArray();
/// <summary>
/// Scans a binary stream for heuristic dependency indicators.
/// </summary>
public static HeuristicScanResult Scan(Stream stream, NativeFormat format)
{
ArgumentNullException.ThrowIfNull(stream);
using var buffer = new MemoryStream();
stream.CopyTo(buffer);
var data = buffer.ToArray();
var edges = new List<HeuristicEdge>();
var pluginConfigs = new List<string>();
// Extract printable strings and analyze them
var strings = ExtractStrings(data, minLength: 4);
foreach (var (str, offset) in strings)
{
// Check for dynamic library loading patterns
AnalyzeDynamicLoadingString(str, offset, format, edges);
// Check for plugin config references
AnalyzePluginConfig(str, pluginConfigs);
}
// Check for Go-specific patterns
ScanForGoPatterns(data, edges);
// Check for Rust-specific patterns
ScanForRustPatterns(data, edges);
// Deduplicate edges by library name
var uniqueEdges = edges
.GroupBy(e => (e.LibraryName, e.ReasonCode))
.Select(g => g.OrderByDescending(e => e.Confidence).First())
.ToList();
return new HeuristicScanResult(uniqueEdges, pluginConfigs.Distinct().ToList());
}
/// <summary>
/// Scans specifically for dlopen/LoadLibrary style strings.
/// </summary>
public static IReadOnlyList<HeuristicEdge> ScanForDynamicLoading(byte[] data, NativeFormat format)
{
var edges = new List<HeuristicEdge>();
var strings = ExtractStrings(data, minLength: 4);
foreach (var (str, offset) in strings)
{
AnalyzeDynamicLoadingString(str, offset, format, edges);
}
return edges
.GroupBy(e => e.LibraryName)
.Select(g => g.OrderByDescending(e => e.Confidence).First())
.ToList();
}
/// <summary>
/// Scans for plugin configuration file references.
/// </summary>
public static IReadOnlyList<string> ScanForPluginConfigs(byte[] data)
{
var configs = new List<string>();
var strings = ExtractStrings(data, minLength: 6);
foreach (var (str, _) in strings)
{
AnalyzePluginConfig(str, configs);
}
return configs.Distinct().ToList();
}
private static void AnalyzeDynamicLoadingString(
string str,
long offset,
NativeFormat format,
List<HeuristicEdge> edges)
{
// Check for format-appropriate library patterns
switch (format)
{
case NativeFormat.Elf:
if (ElfSonamePattern.IsMatch(str))
{
var confidence = DetermineConfidence(str, isPathLike: str.Contains('/'));
edges.Add(new HeuristicEdge(
str,
HeuristicReasonCodes.StringDlopen,
confidence,
"ELF soname pattern",
offset));
}
break;
case NativeFormat.Pe:
if (WindowsDllPattern.IsMatch(str))
{
var confidence = DetermineConfidence(str, isPathLike: str.Contains('\\') || str.Contains('/'));
edges.Add(new HeuristicEdge(
str,
HeuristicReasonCodes.StringLoadLibrary,
confidence,
"PE DLL pattern",
offset));
}
break;
case NativeFormat.MachO:
if (MacOsDylibPattern.IsMatch(str) || str.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase))
{
var confidence = DetermineConfidence(str, isPathLike: str.Contains('/'));
edges.Add(new HeuristicEdge(
str,
HeuristicReasonCodes.StringDlopen,
confidence,
"Mach-O dylib pattern",
offset));
}
break;
}
// Check for cross-platform dlopen-style patterns
// Require at least 1 character between "lib" and ".so" (e.g., "libx.so" minimum)
if (str.StartsWith("lib", StringComparison.Ordinal) && str.Contains(".so"))
{
var soIndex = str.IndexOf(".so", StringComparison.Ordinal);
if (soIndex > 3 && !edges.Any(e => e.LibraryName == str))
{
var confidence = DetermineConfidence(str, isPathLike: str.Contains('/'));
edges.Add(new HeuristicEdge(
str,
HeuristicReasonCodes.StringDlopen,
confidence,
"Generic soname pattern",
offset));
}
}
}
private static HeuristicConfidence DetermineConfidence(string libraryName, bool isPathLike)
{
// Higher confidence for path-like strings (more likely to be actual dlopen args)
if (isPathLike)
{
return HeuristicConfidence.High;
}
// Medium confidence for standard naming conventions
if (libraryName.StartsWith("lib", StringComparison.Ordinal) ||
libraryName.EndsWith(".dll", StringComparison.OrdinalIgnoreCase) ||
libraryName.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase))
{
return HeuristicConfidence.Medium;
}
// Lower confidence for generic matches
return HeuristicConfidence.Low;
}
private static void AnalyzePluginConfig(string str, List<string> configs)
{
foreach (var pattern in PluginConfigPatterns)
{
if (str.EndsWith(pattern, StringComparison.OrdinalIgnoreCase) ||
str.Contains(pattern, StringComparison.OrdinalIgnoreCase))
{
// Extract just the filename if it's a path
var filename = str;
var lastSlash = str.LastIndexOfAny(['/', '\\']);
if (lastSlash >= 0 && lastSlash < str.Length - 1)
{
filename = str[(lastSlash + 1)..];
}
configs.Add(filename);
break;
}
}
}
private static void ScanForGoPatterns(byte[] data, List<HeuristicEdge> edges)
{
// Look for cgo_import_dynamic markers
var cgoImportOffsets = FindAllOccurrences(data, GoCgoImportMarker);
foreach (var offset in cgoImportOffsets)
{
// Extract the library name following the marker
var libraryName = ExtractFollowingString(data, offset + GoCgoImportMarker.Length);
if (!string.IsNullOrEmpty(libraryName) && IsValidLibraryName(libraryName))
{
edges.Add(new HeuristicEdge(
libraryName,
HeuristicReasonCodes.GoCgoImport,
HeuristicConfidence.High,
"Go CGO import directive",
offset));
}
}
// Look for cgo_import_static markers
var staticOffsets = FindAllOccurrences(data, GoCgoImportStatic);
foreach (var offset in staticOffsets)
{
var libraryName = ExtractFollowingString(data, offset + GoCgoImportStatic.Length);
if (!string.IsNullOrEmpty(libraryName) && IsValidLibraryName(libraryName))
{
edges.Add(new HeuristicEdge(
libraryName,
HeuristicReasonCodes.GoCgoImport,
HeuristicConfidence.High,
"Go CGO static import",
offset));
}
}
}
private static void ScanForRustPatterns(byte[] data, List<HeuristicEdge> edges)
{
// Look for Rust panic messages that might indicate FFI usage
var panicOffsets = FindAllOccurrences(data, RustPanicPrefix);
if (panicOffsets.Count > 0)
{
// Binary is likely Rust - look for linked libraries in a more targeted way
var strings = ExtractStrings(data, minLength: 4);
foreach (var (str, offset) in strings)
{
// Look for extern "C" FFI patterns
if (str.Contains("libstd-") || str.Contains("libcore-"))
{
continue; // Skip Rust standard library
}
// Look for native library references
if ((str.StartsWith("lib", StringComparison.Ordinal) && str.Contains(".so")) ||
str.EndsWith(".dll", StringComparison.OrdinalIgnoreCase))
{
// Only add if it looks like an FFI dependency
if (!str.Contains("rust") && !str.Contains("std"))
{
edges.Add(new HeuristicEdge(
str,
HeuristicReasonCodes.RustFfi,
HeuristicConfidence.Medium,
"Rust FFI library reference",
offset));
}
}
}
}
// Look for .rlib references
var rlibOffsets = FindAllOccurrences(data, RustCratePattern);
if (rlibOffsets.Count > 0)
{
// This is a Rust binary - we've already processed above
}
}
private static List<(string Value, long Offset)> ExtractStrings(byte[] data, int minLength)
{
var results = new List<(string, long)>();
var currentString = new StringBuilder();
var stringStart = -1L;
for (var i = 0; i < data.Length; i++)
{
var b = data[i];
// Check for printable ASCII
if (b >= 0x20 && b < 0x7F)
{
if (currentString.Length == 0)
{
stringStart = i;
}
currentString.Append((char)b);
}
else
{
// End of string
if (currentString.Length >= minLength)
{
results.Add((currentString.ToString(), stringStart));
}
currentString.Clear();
}
}
// Don't forget the last string
if (currentString.Length >= minLength)
{
results.Add((currentString.ToString(), stringStart));
}
return results;
}
private static string? ExtractFollowingString(byte[] data, int startOffset)
{
// Skip whitespace and null bytes
var i = startOffset;
while (i < data.Length && (data[i] == 0 || data[i] == ' ' || data[i] == '\t'))
{
i++;
}
var sb = new StringBuilder();
while (i < data.Length && data[i] >= 0x20 && data[i] < 0x7F)
{
sb.Append((char)data[i]);
i++;
if (sb.Length > 256) break; // Sanity limit
}
var result = sb.ToString().Trim();
return string.IsNullOrEmpty(result) ? null : result;
}
private static List<int> FindAllOccurrences(byte[] data, byte[] pattern)
{
var results = new List<int>();
if (pattern.Length == 0 || data.Length < pattern.Length)
{
return results;
}
for (var i = 0; i <= data.Length - pattern.Length; i++)
{
var match = true;
for (var j = 0; j < pattern.Length; j++)
{
if (data[i + j] != pattern[j])
{
match = false;
break;
}
}
if (match)
{
results.Add(i);
}
}
return results;
}
private static bool IsValidLibraryName(string name)
{
if (string.IsNullOrWhiteSpace(name) || name.Length < 3)
{
return false;
}
// Basic validation - should contain alphanumeric and common separators
return name.All(c => char.IsLetterOrDigit(c) || c == '.' || c == '_' || c == '-' || c == '/');
}
[GeneratedRegex(@"^(/[a-zA-Z0-9_/.-]+/)?lib[a-zA-Z0-9_+-]+\.so(\.[0-9]+)*$", RegexOptions.Compiled)]
private static partial Regex SonameRegex();
[GeneratedRegex(@"^[a-zA-Z0-9_+-]+\.dll$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex DllRegex();
[GeneratedRegex(@"^(@rpath/|@loader_path/|@executable_path/|/)?[a-zA-Z0-9_+-]+\.dylib$", RegexOptions.Compiled)]
private static partial Regex DylibRegex();
}