using System.Buffers; using System.Collections.Concurrent; using System.Collections.Immutable; using System.Linq; using System.Text; namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal; internal static class RustBinaryClassifier { private static readonly ReadOnlyMemory ElfMagic = new byte[] { 0x7F, (byte)'E', (byte)'L', (byte)'F' }; private static readonly ReadOnlyMemory SymbolPrefix = new byte[] { (byte)'_', (byte)'Z', (byte)'N' }; private const int ChunkSize = 64 * 1024; private const int OverlapSize = 48; private const long MaxBinarySize = 128L * 1024L * 1024L; private static readonly HashSet StandardCrates = new(StringComparer.Ordinal) { "core", "alloc", "std", "panic_unwind", "panic_abort", }; private static readonly EnumerationOptions Enumeration = new() { MatchCasing = MatchCasing.CaseSensitive, IgnoreInaccessible = true, RecurseSubdirectories = true, AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint, }; private static readonly ConcurrentDictionary> CandidateCache = new(); public static IReadOnlyList Scan(string rootPath, CancellationToken cancellationToken) { if (string.IsNullOrWhiteSpace(rootPath)) { throw new ArgumentException("Root path is required", nameof(rootPath)); } var binaries = new List(); foreach (var path in Directory.EnumerateFiles(rootPath, "*", Enumeration)) { cancellationToken.ThrowIfCancellationRequested(); if (!IsEligibleBinary(path)) { continue; } if (!RustFileCacheKey.TryCreate(path, out var key)) { continue; } var candidates = CandidateCache.GetOrAdd( key, static (_, state) => ExtractCrateNames(state.Path, state.CancellationToken), (Path: path, CancellationToken: cancellationToken)); binaries.Add(new RustBinaryInfo(path, candidates)); } return binaries; } private static bool IsEligibleBinary(string path) { try { var info = new FileInfo(path); if (!info.Exists || info.Length == 0 || info.Length > MaxBinarySize) { return false; } using var stream = info.OpenRead(); Span buffer = stackalloc byte[4]; var read = stream.Read(buffer); if (read != 4) { return false; } return buffer.SequenceEqual(ElfMagic.Span); } catch (IOException) { return false; } catch (UnauthorizedAccessException) { return false; } } private static ImmutableArray ExtractCrateNames(string path, CancellationToken cancellationToken) { var names = new HashSet(StringComparer.Ordinal); var buffer = ArrayPool.Shared.Rent(ChunkSize + OverlapSize); var overlap = new byte[OverlapSize]; var overlapLength = 0; try { using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); while (true) { cancellationToken.ThrowIfCancellationRequested(); // Copy previous overlap to buffer prefix. if (overlapLength > 0) { Array.Copy(overlap, 0, buffer, 0, overlapLength); } var read = stream.Read(buffer, overlapLength, ChunkSize); if (read <= 0) { break; } var span = new ReadOnlySpan(buffer, 0, overlapLength + read); ScanForSymbols(span, names); overlapLength = Math.Min(OverlapSize, span.Length); if (overlapLength > 0) { span[^overlapLength..].CopyTo(overlap); } } } catch (IOException) { return ImmutableArray.Empty; } catch (UnauthorizedAccessException) { return ImmutableArray.Empty; } finally { ArrayPool.Shared.Return(buffer); } if (names.Count == 0) { return ImmutableArray.Empty; } var ordered = names .Where(static name => !string.IsNullOrWhiteSpace(name)) .Select(static name => name.Trim()) .Where(static name => name.Length > 1) .Where(name => !StandardCrates.Contains(name)) .Distinct(StringComparer.Ordinal) .OrderBy(static name => name, StringComparer.Ordinal) .ToImmutableArray(); return ordered; } private static void ScanForSymbols(ReadOnlySpan span, HashSet names) { var prefix = SymbolPrefix.Span; var index = 0; while (index < span.Length) { var slice = span[index..]; var offset = slice.IndexOf(prefix); if (offset < 0) { break; } index += offset + prefix.Length; if (index >= span.Length) { break; } var remaining = span[index..]; if (!TryParseCrate(remaining, out var crate, out var consumed)) { index += 1; continue; } if (!string.IsNullOrWhiteSpace(crate)) { names.Add(crate); } index += Math.Max(consumed, 1); } } private static bool TryParseCrate(ReadOnlySpan span, out string? crate, out int consumed) { crate = null; consumed = 0; var i = 0; var length = 0; while (i < span.Length && span[i] is >= (byte)'0' and <= (byte)'9') { length = (length * 10) + (span[i] - (byte)'0'); i++; if (length > 256) { return false; } } if (i == 0 || length <= 0 || i + length > span.Length) { return false; } crate = Encoding.ASCII.GetString(span.Slice(i, length)); consumed = i + length; return true; } } internal sealed record RustBinaryInfo(string AbsolutePath, ImmutableArray CrateCandidates) { public string ComputeSha256() { if (RustFileHashCache.TryGetSha256(AbsolutePath, out var sha256) && !string.IsNullOrEmpty(sha256)) { return sha256; } return string.Empty; } }