using Microsoft.Extensions.Logging; using StellaOps.Signals.Models; using System.Globalization; using System.Security.Cryptography; using System.Text.Json; using System.Text.RegularExpressions; namespace StellaOps.Zastava.Observer.Runtime.ProcSnapshot; /// /// Collects loaded .NET assembly information from a running process. /// Parses /proc//maps for loaded DLLs and correlates with deps.json for NuGet metadata. /// internal sealed partial class DotNetAssemblyCollector { private static readonly Regex DotNetProcessRegex = GenerateDotNetRegex(); private static readonly Regex MapsLineRegex = GenerateMapsRegex(); private const int MaxAssemblies = 512; private const long MaxFileSize = 50 * 1024 * 1024; // 50 MiB private const long MaxTotalHashBytes = 100_000_000; // ~95 MiB private readonly string _procRoot; private readonly ILogger _logger; public DotNetAssemblyCollector(string procRoot, ILogger logger) { _procRoot = procRoot?.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar) ?? throw new ArgumentNullException(nameof(procRoot)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } /// /// Check if a process appears to be a .NET process. /// public async Task IsDotNetProcessAsync(int pid, CancellationToken cancellationToken) { var cmdline = await ReadCmdlineAsync(pid, cancellationToken).ConfigureAwait(false); if (cmdline.Count == 0) { return false; } // Check if it's dotnet or a .dll being executed var exe = cmdline[0]; if (DotNetProcessRegex.IsMatch(exe)) { return true; } // Also check if cmdline contains a .dll argument (dotnet MyApp.dll) return cmdline.Any(arg => arg.EndsWith(".dll", StringComparison.OrdinalIgnoreCase)); } /// /// Collect loaded assemblies from a .NET process. /// public async Task> CollectAsync(int pid, CancellationToken cancellationToken) { var cmdline = await ReadCmdlineAsync(pid, cancellationToken).ConfigureAwait(false); if (cmdline.Count == 0) { return Array.Empty(); } if (!await IsDotNetProcessAsync(pid, cancellationToken).ConfigureAwait(false)) { _logger.LogDebug("Process {Pid} is not a .NET process", pid); return Array.Empty(); } // Find deps.json for NuGet metadata correlation var depsJson = await FindDepsJsonAsync(pid, cmdline, cancellationToken).ConfigureAwait(false); var depsMetadata = depsJson != null ? await ParseDepsJsonAsync(depsJson, cancellationToken).ConfigureAwait(false) : new DepsJsonMetadata(); // Parse /proc//maps for loaded assemblies var loadedPaths = await GetLoadedAssemblyPathsAsync(pid, cancellationToken).ConfigureAwait(false); if (loadedPaths.Count == 0) { _logger.LogDebug("No loaded assemblies found for .NET process {Pid}", pid); return Array.Empty(); } var entries = new List(); var totalBytesHashed = 0L; foreach (var path in loadedPaths.Take(MaxAssemblies)) { cancellationToken.ThrowIfCancellationRequested(); var (entry, bytesHashed) = await ProcessAssemblyAsync( path, depsMetadata, totalBytesHashed, cancellationToken).ConfigureAwait(false); if (entry != null) { entries.Add(entry); } totalBytesHashed += bytesHashed; } _logger.LogDebug("Collected {Count} assembly entries for .NET process {Pid}", entries.Count, pid); return entries; } private async Task> ReadCmdlineAsync(int pid, CancellationToken cancellationToken) { var cmdlinePath = Path.Combine(_procRoot, pid.ToString(CultureInfo.InvariantCulture), "cmdline"); if (!File.Exists(cmdlinePath)) { return new List(); } try { var content = await File.ReadAllBytesAsync(cmdlinePath, cancellationToken).ConfigureAwait(false); if (content.Length == 0) { return new List(); } return System.Text.Encoding.UTF8.GetString(content) .Split('\0', StringSplitOptions.RemoveEmptyEntries) .ToList(); } catch (Exception ex) when (ex is IOException or UnauthorizedAccessException) { _logger.LogDebug(ex, "Failed to read cmdline for PID {Pid}", pid); return new List(); } } private async Task> GetLoadedAssemblyPathsAsync(int pid, CancellationToken cancellationToken) { var mapsPath = Path.Combine(_procRoot, pid.ToString(CultureInfo.InvariantCulture), "maps"); var paths = new HashSet(StringComparer.OrdinalIgnoreCase); if (!File.Exists(mapsPath)) { return paths; } try { var lines = await File.ReadAllLinesAsync(mapsPath, cancellationToken).ConfigureAwait(false); foreach (var line in lines) { var match = MapsLineRegex.Match(line); if (!match.Success) { continue; } var path = match.Groups["path"].Value; if (string.IsNullOrWhiteSpace(path) || path.StartsWith('[')) { continue; } // Include .dll files and .NET native libraries (.so) if (path.EndsWith(".dll", StringComparison.OrdinalIgnoreCase) || (path.Contains("/dotnet/", StringComparison.OrdinalIgnoreCase) && path.EndsWith(".so", StringComparison.OrdinalIgnoreCase))) { paths.Add(path); } } } catch (Exception ex) when (ex is IOException or UnauthorizedAccessException) { _logger.LogDebug(ex, "Failed to read maps for PID {Pid}", pid); } return paths; } private async Task FindDepsJsonAsync(int pid, IReadOnlyList cmdline, CancellationToken cancellationToken) { // Try to find deps.json from: // 1. The directory of the main assembly (from cmdline) // 2. Working directory of the process // Look for the main .dll in cmdline string? mainDll = null; for (var i = 0; i < cmdline.Count; i++) { if (cmdline[i].EndsWith(".dll", StringComparison.OrdinalIgnoreCase)) { mainDll = cmdline[i]; break; } } if (!string.IsNullOrEmpty(mainDll) && File.Exists(mainDll)) { var depsPath = Path.ChangeExtension(mainDll, ".deps.json"); if (File.Exists(depsPath)) { return depsPath; } } // Try working directory var cwdPath = Path.Combine(_procRoot, pid.ToString(CultureInfo.InvariantCulture), "cwd"); try { if (Directory.Exists(cwdPath)) { var cwd = Path.GetFullPath(cwdPath); var depsFiles = Directory.GetFiles(cwd, "*.deps.json", SearchOption.TopDirectoryOnly); if (depsFiles.Length > 0) { return depsFiles[0]; } } } catch (Exception ex) when (ex is IOException or UnauthorizedAccessException) { _logger.LogDebug(ex, "Failed to search for deps.json in process working directory"); } return null; } private async Task ParseDepsJsonAsync(string depsJsonPath, CancellationToken cancellationToken) { var metadata = new DepsJsonMetadata(); try { var fileInfo = new FileInfo(depsJsonPath); if (fileInfo.Length > 10 * 1024 * 1024) // 10 MiB max for deps.json { _logger.LogDebug("deps.json too large: {Path} ({Size} bytes)", depsJsonPath, fileInfo.Length); return metadata; } await using var stream = new FileStream(depsJsonPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false); var root = doc.RootElement; // Extract RID from runtimeTarget if (root.TryGetProperty("runtimeTarget", out var runtimeTarget) && runtimeTarget.TryGetProperty("name", out var targetName)) { var target = targetName.GetString() ?? string.Empty; // Format: ".NETCoreApp,Version=v8.0/linux-x64" var slashIndex = target.LastIndexOf('/'); if (slashIndex > 0) { metadata.RuntimeIdentifier = target[(slashIndex + 1)..]; } } // Parse libraries for NuGet package info if (root.TryGetProperty("libraries", out var libraries)) { foreach (var library in libraries.EnumerateObject()) { var nameVersion = library.Name; var slashIndex = nameVersion.IndexOf('/'); if (slashIndex <= 0) { continue; } var packageName = nameVersion[..slashIndex]; var packageVersion = nameVersion[(slashIndex + 1)..]; var libInfo = new LibraryInfo { PackageName = packageName, PackageVersion = packageVersion }; if (library.Value.TryGetProperty("type", out var typeElement)) { libInfo.Type = typeElement.GetString(); } if (library.Value.TryGetProperty("sha512", out var sha512Element)) { libInfo.Sha512 = sha512Element.GetString(); } metadata.Libraries[packageName.ToLowerInvariant()] = libInfo; } } // Parse targets for assembly-to-package mapping if (root.TryGetProperty("targets", out var targets)) { foreach (var target in targets.EnumerateObject()) { foreach (var package in target.Value.EnumerateObject()) { var nameVersion = package.Name; var slashIndex = nameVersion.IndexOf('/'); if (slashIndex <= 0) { continue; } var packageName = nameVersion[..slashIndex]; var packageVersion = nameVersion[(slashIndex + 1)..]; // Map assemblies to packages foreach (var section in new[] { "runtime", "compile", "native" }) { if (!package.Value.TryGetProperty(section, out var assemblies)) { continue; } foreach (var assembly in assemblies.EnumerateObject()) { var assemblyPath = assembly.Name; var assemblyName = Path.GetFileNameWithoutExtension(assemblyPath); metadata.AssemblyPackages[assemblyName.ToLowerInvariant()] = new AssemblyPackageInfo { PackageName = packageName, PackageVersion = packageVersion, DepsSource = section }; } } } } } } catch (Exception ex) when (ex is JsonException or IOException) { _logger.LogDebug(ex, "Failed to parse deps.json: {Path}", depsJsonPath); } return metadata; } private async Task<(LoadedAssemblyEntry? Entry, long BytesHashed)> ProcessAssemblyAsync( string path, DepsJsonMetadata depsMetadata, long currentTotalBytesHashed, CancellationToken cancellationToken) { if (!File.Exists(path)) { return (new LoadedAssemblyEntry { Name = Path.GetFileNameWithoutExtension(path), Path = path }, 0); } var name = Path.GetFileNameWithoutExtension(path); var entry = new LoadedAssemblyEntry { Name = name, Path = path, Rid = depsMetadata.RuntimeIdentifier }; // Determine if framework assembly entry = entry with { IsFrameworkAssembly = IsFrameworkPath(path) }; // Look up NuGet package info from deps.json var nameLower = name.ToLowerInvariant(); if (depsMetadata.AssemblyPackages.TryGetValue(nameLower, out var packageInfo)) { entry = entry with { NuGetPackage = packageInfo.PackageName, NuGetVersion = packageInfo.PackageVersion, DepsSource = packageInfo.DepsSource, Purl = $"pkg:nuget/{packageInfo.PackageName}@{packageInfo.PackageVersion}" }; } // Hash the file if within limits long bytesHashed = 0; try { var fileInfo = new FileInfo(path); if (fileInfo.Length <= MaxFileSize && currentTotalBytesHashed + fileInfo.Length <= MaxTotalHashBytes) { var hash = await ComputeFileHashAsync(path, cancellationToken).ConfigureAwait(false); entry = entry with { Sha256 = hash }; bytesHashed = fileInfo.Length; } // Try to extract version from assembly var version = await TryGetAssemblyVersionAsync(path, cancellationToken).ConfigureAwait(false); if (!string.IsNullOrWhiteSpace(version)) { entry = entry with { Version = version }; } } catch (Exception ex) when (ex is IOException or UnauthorizedAccessException) { _logger.LogDebug(ex, "Failed to process assembly: {Path}", path); } return (entry, bytesHashed); } private static bool IsFrameworkPath(string path) { // Framework assemblies are typically in shared framework paths var pathLower = path.ToLowerInvariant(); return pathLower.Contains("/dotnet/shared/", StringComparison.Ordinal) || pathLower.Contains("/usr/share/dotnet/", StringComparison.Ordinal) || pathLower.Contains("\\dotnet\\shared\\", StringComparison.Ordinal) || pathLower.Contains("\\program files\\dotnet\\", StringComparison.Ordinal); } private static async Task ComputeFileHashAsync(string path, CancellationToken cancellationToken) { await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); var hash = await SHA256.HashDataAsync(stream, cancellationToken).ConfigureAwait(false); return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}"; } private static async Task TryGetAssemblyVersionAsync(string path, CancellationToken cancellationToken) { // For .dll files, try to read assembly version from PE header // This is a simplified version - full implementation would use System.Reflection.Metadata try { await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); using var reader = new BinaryReader(stream); // Check DOS header magic number if (reader.ReadUInt16() != 0x5A4D) // "MZ" { return null; } // Seek to PE header offset stream.Seek(0x3C, SeekOrigin.Begin); var peOffset = reader.ReadUInt32(); // Verify PE signature stream.Seek(peOffset, SeekOrigin.Begin); if (reader.ReadUInt32() != 0x00004550) // "PE\0\0" { return null; } // For managed assemblies, we'd need to parse the metadata tables // This would require System.Reflection.Metadata for proper implementation // For now, return null and rely on deps.json for version info return null; } catch { return null; } } [GeneratedRegex(@"(^|/)(dotnet)(\.(exe))?$", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)] private static partial Regex GenerateDotNetRegex(); [GeneratedRegex(@"^[0-9a-f]+-[0-9a-f]+\s+[r-][w-][x-][ps-]\s+[0-9a-f]+\s+[0-9a-f]+:[0-9a-f]+\s+\d+\s+(?.+)$", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)] private static partial Regex GenerateMapsRegex(); private sealed class DepsJsonMetadata { public string? RuntimeIdentifier { get; set; } public Dictionary Libraries { get; } = new(StringComparer.OrdinalIgnoreCase); public Dictionary AssemblyPackages { get; } = new(StringComparer.OrdinalIgnoreCase); } private sealed class LibraryInfo { public string PackageName { get; init; } = string.Empty; public string PackageVersion { get; init; } = string.Empty; public string? Type { get; set; } public string? Sha512 { get; set; } } private sealed class AssemblyPackageInfo { public string PackageName { get; init; } = string.Empty; public string PackageVersion { get; init; } = string.Empty; public string? DepsSource { get; set; } } }