Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
411 lines
14 KiB
C#
411 lines
14 KiB
C#
using System.Buffers.Binary;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace StellaOps.Scanner.Analyzers.Native;
|
|
|
|
/// <summary>
|
|
/// Scans native binaries for heuristic dependency indicators.
|
|
/// Detects dlopen/LoadLibrary strings, plugin configs, and ecosystem-specific hints.
|
|
/// </summary>
|
|
public static partial class HeuristicScanner
|
|
{
|
|
// Common shared library patterns
|
|
private static readonly Regex ElfSonamePattern = SonameRegex();
|
|
private static readonly Regex WindowsDllPattern = DllRegex();
|
|
private static readonly Regex MacOsDylibPattern = DylibRegex();
|
|
|
|
// Plugin config patterns
|
|
private static readonly string[] PluginConfigPatterns =
|
|
[
|
|
"plugins.conf",
|
|
"plugin.conf",
|
|
"plugins.json",
|
|
"plugin.json",
|
|
"plugins.xml",
|
|
"plugin.xml",
|
|
".so.conf",
|
|
"modules.conf",
|
|
"extensions.conf",
|
|
];
|
|
|
|
// Go-specific patterns
|
|
private static readonly byte[] GoCgoImportMarker = "cgo_import_dynamic"u8.ToArray();
|
|
private static readonly byte[] GoCgoImportStatic = "cgo_import_static"u8.ToArray();
|
|
|
|
// Rust-specific patterns
|
|
private static readonly byte[] RustPanicPrefix = "panicked at"u8.ToArray();
|
|
private static readonly byte[] RustCratePattern = ".rlib"u8.ToArray();
|
|
|
|
/// <summary>
|
|
/// Scans a binary stream for heuristic dependency indicators.
|
|
/// </summary>
|
|
public static HeuristicScanResult Scan(Stream stream, NativeFormat format)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(stream);
|
|
|
|
using var buffer = new MemoryStream();
|
|
stream.CopyTo(buffer);
|
|
var data = buffer.ToArray();
|
|
|
|
var edges = new List<HeuristicEdge>();
|
|
var pluginConfigs = new List<string>();
|
|
|
|
// Extract printable strings and analyze them
|
|
var strings = ExtractStrings(data, minLength: 4);
|
|
|
|
foreach (var (str, offset) in strings)
|
|
{
|
|
// Check for dynamic library loading patterns
|
|
AnalyzeDynamicLoadingString(str, offset, format, edges);
|
|
|
|
// Check for plugin config references
|
|
AnalyzePluginConfig(str, pluginConfigs);
|
|
}
|
|
|
|
// Check for Go-specific patterns
|
|
ScanForGoPatterns(data, edges);
|
|
|
|
// Check for Rust-specific patterns
|
|
ScanForRustPatterns(data, edges);
|
|
|
|
// Deduplicate edges by library name
|
|
var uniqueEdges = edges
|
|
.GroupBy(e => (e.LibraryName, e.ReasonCode))
|
|
.Select(g => g.OrderByDescending(e => e.Confidence).First())
|
|
.ToList();
|
|
|
|
return new HeuristicScanResult(uniqueEdges, pluginConfigs.Distinct().ToList());
|
|
}
|
|
|
|
/// <summary>
|
|
/// Scans specifically for dlopen/LoadLibrary style strings.
|
|
/// </summary>
|
|
public static IReadOnlyList<HeuristicEdge> ScanForDynamicLoading(byte[] data, NativeFormat format)
|
|
{
|
|
var edges = new List<HeuristicEdge>();
|
|
var strings = ExtractStrings(data, minLength: 4);
|
|
|
|
foreach (var (str, offset) in strings)
|
|
{
|
|
AnalyzeDynamicLoadingString(str, offset, format, edges);
|
|
}
|
|
|
|
return edges
|
|
.GroupBy(e => e.LibraryName)
|
|
.Select(g => g.OrderByDescending(e => e.Confidence).First())
|
|
.ToList();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Scans for plugin configuration file references.
|
|
/// </summary>
|
|
public static IReadOnlyList<string> ScanForPluginConfigs(byte[] data)
|
|
{
|
|
var configs = new List<string>();
|
|
var strings = ExtractStrings(data, minLength: 6);
|
|
|
|
foreach (var (str, _) in strings)
|
|
{
|
|
AnalyzePluginConfig(str, configs);
|
|
}
|
|
|
|
return configs.Distinct().ToList();
|
|
}
|
|
|
|
private static void AnalyzeDynamicLoadingString(
|
|
string str,
|
|
long offset,
|
|
NativeFormat format,
|
|
List<HeuristicEdge> edges)
|
|
{
|
|
// Check for format-appropriate library patterns
|
|
switch (format)
|
|
{
|
|
case NativeFormat.Elf:
|
|
if (ElfSonamePattern.IsMatch(str))
|
|
{
|
|
var confidence = DetermineConfidence(str, isPathLike: str.Contains('/'));
|
|
edges.Add(new HeuristicEdge(
|
|
str,
|
|
HeuristicReasonCodes.StringDlopen,
|
|
confidence,
|
|
"ELF soname pattern",
|
|
offset));
|
|
}
|
|
break;
|
|
|
|
case NativeFormat.Pe:
|
|
if (WindowsDllPattern.IsMatch(str))
|
|
{
|
|
var confidence = DetermineConfidence(str, isPathLike: str.Contains('\\') || str.Contains('/'));
|
|
edges.Add(new HeuristicEdge(
|
|
str,
|
|
HeuristicReasonCodes.StringLoadLibrary,
|
|
confidence,
|
|
"PE DLL pattern",
|
|
offset));
|
|
}
|
|
break;
|
|
|
|
case NativeFormat.MachO:
|
|
if (MacOsDylibPattern.IsMatch(str) || str.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
var confidence = DetermineConfidence(str, isPathLike: str.Contains('/'));
|
|
edges.Add(new HeuristicEdge(
|
|
str,
|
|
HeuristicReasonCodes.StringDlopen,
|
|
confidence,
|
|
"Mach-O dylib pattern",
|
|
offset));
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Check for cross-platform dlopen-style patterns
|
|
// Require at least 1 character between "lib" and ".so" (e.g., "libx.so" minimum)
|
|
if (str.StartsWith("lib", StringComparison.Ordinal) && str.Contains(".so"))
|
|
{
|
|
var soIndex = str.IndexOf(".so", StringComparison.Ordinal);
|
|
if (soIndex > 3 && !edges.Any(e => e.LibraryName == str))
|
|
{
|
|
var confidence = DetermineConfidence(str, isPathLike: str.Contains('/'));
|
|
edges.Add(new HeuristicEdge(
|
|
str,
|
|
HeuristicReasonCodes.StringDlopen,
|
|
confidence,
|
|
"Generic soname pattern",
|
|
offset));
|
|
}
|
|
}
|
|
}
|
|
|
|
private static HeuristicConfidence DetermineConfidence(string libraryName, bool isPathLike)
|
|
{
|
|
// Higher confidence for path-like strings (more likely to be actual dlopen args)
|
|
if (isPathLike)
|
|
{
|
|
return HeuristicConfidence.High;
|
|
}
|
|
|
|
// Medium confidence for standard naming conventions
|
|
if (libraryName.StartsWith("lib", StringComparison.Ordinal) ||
|
|
libraryName.EndsWith(".dll", StringComparison.OrdinalIgnoreCase) ||
|
|
libraryName.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return HeuristicConfidence.Medium;
|
|
}
|
|
|
|
// Lower confidence for generic matches
|
|
return HeuristicConfidence.Low;
|
|
}
|
|
|
|
private static void AnalyzePluginConfig(string str, List<string> configs)
|
|
{
|
|
foreach (var pattern in PluginConfigPatterns)
|
|
{
|
|
if (str.EndsWith(pattern, StringComparison.OrdinalIgnoreCase) ||
|
|
str.Contains(pattern, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// Extract just the filename if it's a path
|
|
var filename = str;
|
|
var lastSlash = str.LastIndexOfAny(['/', '\\']);
|
|
if (lastSlash >= 0 && lastSlash < str.Length - 1)
|
|
{
|
|
filename = str[(lastSlash + 1)..];
|
|
}
|
|
|
|
configs.Add(filename);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
private static void ScanForGoPatterns(byte[] data, List<HeuristicEdge> edges)
|
|
{
|
|
// Look for cgo_import_dynamic markers
|
|
var cgoImportOffsets = FindAllOccurrences(data, GoCgoImportMarker);
|
|
foreach (var offset in cgoImportOffsets)
|
|
{
|
|
// Extract the library name following the marker
|
|
var libraryName = ExtractFollowingString(data, offset + GoCgoImportMarker.Length);
|
|
if (!string.IsNullOrEmpty(libraryName) && IsValidLibraryName(libraryName))
|
|
{
|
|
edges.Add(new HeuristicEdge(
|
|
libraryName,
|
|
HeuristicReasonCodes.GoCgoImport,
|
|
HeuristicConfidence.High,
|
|
"Go CGO import directive",
|
|
offset));
|
|
}
|
|
}
|
|
|
|
// Look for cgo_import_static markers
|
|
var staticOffsets = FindAllOccurrences(data, GoCgoImportStatic);
|
|
foreach (var offset in staticOffsets)
|
|
{
|
|
var libraryName = ExtractFollowingString(data, offset + GoCgoImportStatic.Length);
|
|
if (!string.IsNullOrEmpty(libraryName) && IsValidLibraryName(libraryName))
|
|
{
|
|
edges.Add(new HeuristicEdge(
|
|
libraryName,
|
|
HeuristicReasonCodes.GoCgoImport,
|
|
HeuristicConfidence.High,
|
|
"Go CGO static import",
|
|
offset));
|
|
}
|
|
}
|
|
}
|
|
|
|
private static void ScanForRustPatterns(byte[] data, List<HeuristicEdge> edges)
|
|
{
|
|
// Look for Rust panic messages that might indicate FFI usage
|
|
var panicOffsets = FindAllOccurrences(data, RustPanicPrefix);
|
|
if (panicOffsets.Count > 0)
|
|
{
|
|
// Binary is likely Rust - look for linked libraries in a more targeted way
|
|
var strings = ExtractStrings(data, minLength: 4);
|
|
foreach (var (str, offset) in strings)
|
|
{
|
|
// Look for extern "C" FFI patterns
|
|
if (str.Contains("libstd-") || str.Contains("libcore-"))
|
|
{
|
|
continue; // Skip Rust standard library
|
|
}
|
|
|
|
// Look for native library references
|
|
if ((str.StartsWith("lib", StringComparison.Ordinal) && str.Contains(".so")) ||
|
|
str.EndsWith(".dll", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// Only add if it looks like an FFI dependency
|
|
if (!str.Contains("rust") && !str.Contains("std"))
|
|
{
|
|
edges.Add(new HeuristicEdge(
|
|
str,
|
|
HeuristicReasonCodes.RustFfi,
|
|
HeuristicConfidence.Medium,
|
|
"Rust FFI library reference",
|
|
offset));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Look for .rlib references
|
|
var rlibOffsets = FindAllOccurrences(data, RustCratePattern);
|
|
if (rlibOffsets.Count > 0)
|
|
{
|
|
// This is a Rust binary - we've already processed above
|
|
}
|
|
}
|
|
|
|
private static List<(string Value, long Offset)> ExtractStrings(byte[] data, int minLength)
|
|
{
|
|
var results = new List<(string, long)>();
|
|
var currentString = new StringBuilder();
|
|
var stringStart = -1L;
|
|
|
|
for (var i = 0; i < data.Length; i++)
|
|
{
|
|
var b = data[i];
|
|
|
|
// Check for printable ASCII
|
|
if (b >= 0x20 && b < 0x7F)
|
|
{
|
|
if (currentString.Length == 0)
|
|
{
|
|
stringStart = i;
|
|
}
|
|
currentString.Append((char)b);
|
|
}
|
|
else
|
|
{
|
|
// End of string
|
|
if (currentString.Length >= minLength)
|
|
{
|
|
results.Add((currentString.ToString(), stringStart));
|
|
}
|
|
currentString.Clear();
|
|
}
|
|
}
|
|
|
|
// Don't forget the last string
|
|
if (currentString.Length >= minLength)
|
|
{
|
|
results.Add((currentString.ToString(), stringStart));
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
private static string? ExtractFollowingString(byte[] data, int startOffset)
|
|
{
|
|
// Skip whitespace and null bytes
|
|
var i = startOffset;
|
|
while (i < data.Length && (data[i] == 0 || data[i] == ' ' || data[i] == '\t'))
|
|
{
|
|
i++;
|
|
}
|
|
|
|
var sb = new StringBuilder();
|
|
while (i < data.Length && data[i] >= 0x20 && data[i] < 0x7F)
|
|
{
|
|
sb.Append((char)data[i]);
|
|
i++;
|
|
if (sb.Length > 256) break; // Sanity limit
|
|
}
|
|
|
|
var result = sb.ToString().Trim();
|
|
return string.IsNullOrEmpty(result) ? null : result;
|
|
}
|
|
|
|
private static List<int> FindAllOccurrences(byte[] data, byte[] pattern)
|
|
{
|
|
var results = new List<int>();
|
|
if (pattern.Length == 0 || data.Length < pattern.Length)
|
|
{
|
|
return results;
|
|
}
|
|
|
|
for (var i = 0; i <= data.Length - pattern.Length; i++)
|
|
{
|
|
var match = true;
|
|
for (var j = 0; j < pattern.Length; j++)
|
|
{
|
|
if (data[i + j] != pattern[j])
|
|
{
|
|
match = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (match)
|
|
{
|
|
results.Add(i);
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
private static bool IsValidLibraryName(string name)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(name) || name.Length < 3)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// Basic validation - should contain alphanumeric and common separators
|
|
return name.All(c => char.IsLetterOrDigit(c) || c == '.' || c == '_' || c == '-' || c == '/');
|
|
}
|
|
|
|
[GeneratedRegex(@"^(/[a-zA-Z0-9_/.-]+/)?lib[a-zA-Z0-9_+-]+\.so(\.[0-9]+)*$", RegexOptions.Compiled)]
|
|
private static partial Regex SonameRegex();
|
|
|
|
[GeneratedRegex(@"^[a-zA-Z0-9_+-]+\.dll$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
|
private static partial Regex DllRegex();
|
|
|
|
[GeneratedRegex(@"^(@rpath/|@loader_path/|@executable_path/|/)?[a-zA-Z0-9_+-]+\.dylib$", RegexOptions.Compiled)]
|
|
private static partial Regex DylibRegex();
|
|
}
|