Files
git.stella-ops.org/src/Zastava/StellaOps.Zastava.Observer/Runtime/ProcSnapshot/DotNetAssemblyCollector.cs
2026-02-01 21:37:40 +02:00

497 lines
18 KiB
C#

using Microsoft.Extensions.Logging;
using StellaOps.Signals.Models;
using System.Globalization;
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace StellaOps.Zastava.Observer.Runtime.ProcSnapshot;
/// <summary>
/// Collects loaded .NET assembly information from a running process.
/// Parses /proc/<pid>/maps for loaded DLLs and correlates with deps.json for NuGet metadata.
/// </summary>
internal sealed partial class DotNetAssemblyCollector
{
private static readonly Regex DotNetProcessRegex = GenerateDotNetRegex();
private static readonly Regex MapsLineRegex = GenerateMapsRegex();
private const int MaxAssemblies = 512;
private const long MaxFileSize = 50 * 1024 * 1024; // 50 MiB
private const long MaxTotalHashBytes = 100_000_000; // ~95 MiB
private readonly string _procRoot;
private readonly ILogger<DotNetAssemblyCollector> _logger;
public DotNetAssemblyCollector(string procRoot, ILogger<DotNetAssemblyCollector> logger)
{
_procRoot = procRoot?.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar)
?? throw new ArgumentNullException(nameof(procRoot));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Check if a process appears to be a .NET process.
/// </summary>
public async Task<bool> IsDotNetProcessAsync(int pid, CancellationToken cancellationToken)
{
var cmdline = await ReadCmdlineAsync(pid, cancellationToken).ConfigureAwait(false);
if (cmdline.Count == 0)
{
return false;
}
// Check if it's dotnet or a .dll being executed
var exe = cmdline[0];
if (DotNetProcessRegex.IsMatch(exe))
{
return true;
}
// Also check if cmdline contains a .dll argument (dotnet MyApp.dll)
return cmdline.Any(arg => arg.EndsWith(".dll", StringComparison.OrdinalIgnoreCase));
}
/// <summary>
/// Collect loaded assemblies from a .NET process.
/// </summary>
public async Task<IReadOnlyList<LoadedAssemblyEntry>> CollectAsync(int pid, CancellationToken cancellationToken)
{
var cmdline = await ReadCmdlineAsync(pid, cancellationToken).ConfigureAwait(false);
if (cmdline.Count == 0)
{
return Array.Empty<LoadedAssemblyEntry>();
}
if (!await IsDotNetProcessAsync(pid, cancellationToken).ConfigureAwait(false))
{
_logger.LogDebug("Process {Pid} is not a .NET process", pid);
return Array.Empty<LoadedAssemblyEntry>();
}
// Find deps.json for NuGet metadata correlation
var depsJson = await FindDepsJsonAsync(pid, cmdline, cancellationToken).ConfigureAwait(false);
var depsMetadata = depsJson != null
? await ParseDepsJsonAsync(depsJson, cancellationToken).ConfigureAwait(false)
: new DepsJsonMetadata();
// Parse /proc/<pid>/maps for loaded assemblies
var loadedPaths = await GetLoadedAssemblyPathsAsync(pid, cancellationToken).ConfigureAwait(false);
if (loadedPaths.Count == 0)
{
_logger.LogDebug("No loaded assemblies found for .NET process {Pid}", pid);
return Array.Empty<LoadedAssemblyEntry>();
}
var entries = new List<LoadedAssemblyEntry>();
var totalBytesHashed = 0L;
foreach (var path in loadedPaths.Take(MaxAssemblies))
{
cancellationToken.ThrowIfCancellationRequested();
var (entry, bytesHashed) = await ProcessAssemblyAsync(
path,
depsMetadata,
totalBytesHashed,
cancellationToken).ConfigureAwait(false);
if (entry != null)
{
entries.Add(entry);
}
totalBytesHashed += bytesHashed;
}
_logger.LogDebug("Collected {Count} assembly entries for .NET process {Pid}", entries.Count, pid);
return entries;
}
private async Task<List<string>> ReadCmdlineAsync(int pid, CancellationToken cancellationToken)
{
var cmdlinePath = Path.Combine(_procRoot, pid.ToString(CultureInfo.InvariantCulture), "cmdline");
if (!File.Exists(cmdlinePath))
{
return new List<string>();
}
try
{
var content = await File.ReadAllBytesAsync(cmdlinePath, cancellationToken).ConfigureAwait(false);
if (content.Length == 0)
{
return new List<string>();
}
return System.Text.Encoding.UTF8.GetString(content)
.Split('\0', StringSplitOptions.RemoveEmptyEntries)
.ToList();
}
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
{
_logger.LogDebug(ex, "Failed to read cmdline for PID {Pid}", pid);
return new List<string>();
}
}
private async Task<HashSet<string>> GetLoadedAssemblyPathsAsync(int pid, CancellationToken cancellationToken)
{
var mapsPath = Path.Combine(_procRoot, pid.ToString(CultureInfo.InvariantCulture), "maps");
var paths = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
if (!File.Exists(mapsPath))
{
return paths;
}
try
{
var lines = await File.ReadAllLinesAsync(mapsPath, cancellationToken).ConfigureAwait(false);
foreach (var line in lines)
{
var match = MapsLineRegex.Match(line);
if (!match.Success)
{
continue;
}
var path = match.Groups["path"].Value;
if (string.IsNullOrWhiteSpace(path) || path.StartsWith('['))
{
continue;
}
// Include .dll files and .NET native libraries (.so)
if (path.EndsWith(".dll", StringComparison.OrdinalIgnoreCase) ||
(path.Contains("/dotnet/", StringComparison.OrdinalIgnoreCase) &&
path.EndsWith(".so", StringComparison.OrdinalIgnoreCase)))
{
paths.Add(path);
}
}
}
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
{
_logger.LogDebug(ex, "Failed to read maps for PID {Pid}", pid);
}
return paths;
}
private async Task<string?> FindDepsJsonAsync(int pid, IReadOnlyList<string> cmdline, CancellationToken cancellationToken)
{
// Try to find deps.json from:
// 1. The directory of the main assembly (from cmdline)
// 2. Working directory of the process
// Look for the main .dll in cmdline
string? mainDll = null;
for (var i = 0; i < cmdline.Count; i++)
{
if (cmdline[i].EndsWith(".dll", StringComparison.OrdinalIgnoreCase))
{
mainDll = cmdline[i];
break;
}
}
if (!string.IsNullOrEmpty(mainDll) && File.Exists(mainDll))
{
var depsPath = Path.ChangeExtension(mainDll, ".deps.json");
if (File.Exists(depsPath))
{
return depsPath;
}
}
// Try working directory
var cwdPath = Path.Combine(_procRoot, pid.ToString(CultureInfo.InvariantCulture), "cwd");
try
{
if (Directory.Exists(cwdPath))
{
var cwd = Path.GetFullPath(cwdPath);
var depsFiles = Directory.GetFiles(cwd, "*.deps.json", SearchOption.TopDirectoryOnly);
if (depsFiles.Length > 0)
{
return depsFiles[0];
}
}
}
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
{
_logger.LogDebug(ex, "Failed to search for deps.json in process working directory");
}
return null;
}
private async Task<DepsJsonMetadata> ParseDepsJsonAsync(string depsJsonPath, CancellationToken cancellationToken)
{
var metadata = new DepsJsonMetadata();
try
{
var fileInfo = new FileInfo(depsJsonPath);
if (fileInfo.Length > 10 * 1024 * 1024) // 10 MiB max for deps.json
{
_logger.LogDebug("deps.json too large: {Path} ({Size} bytes)", depsJsonPath, fileInfo.Length);
return metadata;
}
await using var stream = new FileStream(depsJsonPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
var root = doc.RootElement;
// Extract RID from runtimeTarget
if (root.TryGetProperty("runtimeTarget", out var runtimeTarget) &&
runtimeTarget.TryGetProperty("name", out var targetName))
{
var target = targetName.GetString() ?? string.Empty;
// Format: ".NETCoreApp,Version=v8.0/linux-x64"
var slashIndex = target.LastIndexOf('/');
if (slashIndex > 0)
{
metadata.RuntimeIdentifier = target[(slashIndex + 1)..];
}
}
// Parse libraries for NuGet package info
if (root.TryGetProperty("libraries", out var libraries))
{
foreach (var library in libraries.EnumerateObject())
{
var nameVersion = library.Name;
var slashIndex = nameVersion.IndexOf('/');
if (slashIndex <= 0)
{
continue;
}
var packageName = nameVersion[..slashIndex];
var packageVersion = nameVersion[(slashIndex + 1)..];
var libInfo = new LibraryInfo
{
PackageName = packageName,
PackageVersion = packageVersion
};
if (library.Value.TryGetProperty("type", out var typeElement))
{
libInfo.Type = typeElement.GetString();
}
if (library.Value.TryGetProperty("sha512", out var sha512Element))
{
libInfo.Sha512 = sha512Element.GetString();
}
metadata.Libraries[packageName.ToLowerInvariant()] = libInfo;
}
}
// Parse targets for assembly-to-package mapping
if (root.TryGetProperty("targets", out var targets))
{
foreach (var target in targets.EnumerateObject())
{
foreach (var package in target.Value.EnumerateObject())
{
var nameVersion = package.Name;
var slashIndex = nameVersion.IndexOf('/');
if (slashIndex <= 0)
{
continue;
}
var packageName = nameVersion[..slashIndex];
var packageVersion = nameVersion[(slashIndex + 1)..];
// Map assemblies to packages
foreach (var section in new[] { "runtime", "compile", "native" })
{
if (!package.Value.TryGetProperty(section, out var assemblies))
{
continue;
}
foreach (var assembly in assemblies.EnumerateObject())
{
var assemblyPath = assembly.Name;
var assemblyName = Path.GetFileNameWithoutExtension(assemblyPath);
metadata.AssemblyPackages[assemblyName.ToLowerInvariant()] = new AssemblyPackageInfo
{
PackageName = packageName,
PackageVersion = packageVersion,
DepsSource = section
};
}
}
}
}
}
}
catch (Exception ex) when (ex is JsonException or IOException)
{
_logger.LogDebug(ex, "Failed to parse deps.json: {Path}", depsJsonPath);
}
return metadata;
}
private async Task<(LoadedAssemblyEntry? Entry, long BytesHashed)> ProcessAssemblyAsync(
string path,
DepsJsonMetadata depsMetadata,
long currentTotalBytesHashed,
CancellationToken cancellationToken)
{
if (!File.Exists(path))
{
return (new LoadedAssemblyEntry
{
Name = Path.GetFileNameWithoutExtension(path),
Path = path
}, 0);
}
var name = Path.GetFileNameWithoutExtension(path);
var entry = new LoadedAssemblyEntry
{
Name = name,
Path = path,
Rid = depsMetadata.RuntimeIdentifier
};
// Determine if framework assembly
entry = entry with
{
IsFrameworkAssembly = IsFrameworkPath(path)
};
// Look up NuGet package info from deps.json
var nameLower = name.ToLowerInvariant();
if (depsMetadata.AssemblyPackages.TryGetValue(nameLower, out var packageInfo))
{
entry = entry with
{
NuGetPackage = packageInfo.PackageName,
NuGetVersion = packageInfo.PackageVersion,
DepsSource = packageInfo.DepsSource,
Purl = $"pkg:nuget/{packageInfo.PackageName}@{packageInfo.PackageVersion}"
};
}
// Hash the file if within limits
long bytesHashed = 0;
try
{
var fileInfo = new FileInfo(path);
if (fileInfo.Length <= MaxFileSize && currentTotalBytesHashed + fileInfo.Length <= MaxTotalHashBytes)
{
var hash = await ComputeFileHashAsync(path, cancellationToken).ConfigureAwait(false);
entry = entry with { Sha256 = hash };
bytesHashed = fileInfo.Length;
}
// Try to extract version from assembly
var version = await TryGetAssemblyVersionAsync(path, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(version))
{
entry = entry with { Version = version };
}
}
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
{
_logger.LogDebug(ex, "Failed to process assembly: {Path}", path);
}
return (entry, bytesHashed);
}
private static bool IsFrameworkPath(string path)
{
// Framework assemblies are typically in shared framework paths
var pathLower = path.ToLowerInvariant();
return pathLower.Contains("/dotnet/shared/", StringComparison.Ordinal) ||
pathLower.Contains("/usr/share/dotnet/", StringComparison.Ordinal) ||
pathLower.Contains("\\dotnet\\shared\\", StringComparison.Ordinal) ||
pathLower.Contains("\\program files\\dotnet\\", StringComparison.Ordinal);
}
private static async Task<string> ComputeFileHashAsync(string path, CancellationToken cancellationToken)
{
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
var hash = await SHA256.HashDataAsync(stream, cancellationToken).ConfigureAwait(false);
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
private static async Task<string?> TryGetAssemblyVersionAsync(string path, CancellationToken cancellationToken)
{
// For .dll files, try to read assembly version from PE header
// This is a simplified version - full implementation would use System.Reflection.Metadata
try
{
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
using var reader = new BinaryReader(stream);
// Check DOS header magic number
if (reader.ReadUInt16() != 0x5A4D) // "MZ"
{
return null;
}
// Seek to PE header offset
stream.Seek(0x3C, SeekOrigin.Begin);
var peOffset = reader.ReadUInt32();
// Verify PE signature
stream.Seek(peOffset, SeekOrigin.Begin);
if (reader.ReadUInt32() != 0x00004550) // "PE\0\0"
{
return null;
}
// For managed assemblies, we'd need to parse the metadata tables
// This would require System.Reflection.Metadata for proper implementation
// For now, return null and rely on deps.json for version info
return null;
}
catch
{
return null;
}
}
[GeneratedRegex(@"(^|/)(dotnet)(\.(exe))?$", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
private static partial Regex GenerateDotNetRegex();
[GeneratedRegex(@"^[0-9a-f]+-[0-9a-f]+\s+[r-][w-][x-][ps-]\s+[0-9a-f]+\s+[0-9a-f]+:[0-9a-f]+\s+\d+\s+(?<path>.+)$", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
private static partial Regex GenerateMapsRegex();
private sealed class DepsJsonMetadata
{
public string? RuntimeIdentifier { get; set; }
public Dictionary<string, LibraryInfo> Libraries { get; } = new(StringComparer.OrdinalIgnoreCase);
public Dictionary<string, AssemblyPackageInfo> AssemblyPackages { get; } = new(StringComparer.OrdinalIgnoreCase);
}
private sealed class LibraryInfo
{
public string PackageName { get; init; } = string.Empty;
public string PackageVersion { get; init; } = string.Empty;
public string? Type { get; set; }
public string? Sha512 { get; set; }
}
private sealed class AssemblyPackageInfo
{
public string PackageName { get; init; } = string.Empty;
public string PackageVersion { get; init; } = string.Empty;
public string? DepsSource { get; set; }
}
}