Restructure solution layout by module

This commit is contained in:
master
2025-10-28 15:10:40 +02:00
parent 95daa159c4
commit d870da18ce
4103 changed files with 192899 additions and 187024 deletions

View File

@@ -0,0 +1,29 @@
# StellaOps.Scanner.Analyzers.Lang.DotNet — Agent Charter
## Role
Create the .NET analyzer plug-in that inspects `*.deps.json`, `runtimeconfig.json`, assemblies, and RID-specific assets to deliver accurate NuGet components with signing metadata.
## Scope
- Parse dependency graphs from `*.deps.json` and merge with `runtimeconfig.json` and bundle manifests.
- Capture assembly metadata (strong name, file version, Authenticode) and correlate with packages.
- Handle RID-specific asset selection, self-contained apps, and crossgen/native dependency hints.
- Package plug-in manifest, determinism fixtures, benchmarks, and Offline Kit documentation.
## Out of Scope
- Policy evaluation or Signer integration (handled elsewhere).
- Native dependency resolution outside RID mapping.
- Windows-specific MSI/SxS analyzers (covered by native analyzer roadmap).
## Expectations
- Performance target: multi-target app fixture <1.2s, memory <250MB.
- Deterministic RID collapsing to reduce component duplication by 40% vs naive approach.
- Offline-first; support air-gapped strong-name/Authenticode validation using cached root store.
- Rich telemetry (components per RID, strong-name validations) conforming to Scanner metrics.
## Dependencies
- Shared language analyzer infrastructure; Worker dispatcher; optional security key store for signature verification.
## Testing & Artifacts
- Fixtures for framework-dependent and self-contained apps (linux-musl, win-x64).
- Golden outputs capturing signature metadata and RID grouping.
- Benchmark comparing analyzer fidelity vs market competitors.

View File

@@ -0,0 +1,17 @@
using System;
using StellaOps.Scanner.Analyzers.Lang.Plugin;
namespace StellaOps.Scanner.Analyzers.Lang.DotNet;
public sealed class DotNetAnalyzerPlugin : ILanguageAnalyzerPlugin
{
public string Name => "StellaOps.Scanner.Analyzers.Lang.DotNet";
public bool IsAvailable(IServiceProvider services) => services is not null;
public ILanguageAnalyzer CreateAnalyzer(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return new DotNetLanguageAnalyzer();
}
}

View File

@@ -0,0 +1,37 @@
using StellaOps.Scanner.Analyzers.Lang.DotNet.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.DotNet;
public sealed class DotNetLanguageAnalyzer : ILanguageAnalyzer
{
public string Id => "dotnet";
public string DisplayName => ".NET Analyzer (preview)";
public async ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
var packages = await DotNetDependencyCollector.CollectAsync(context, cancellationToken).ConfigureAwait(false);
if (packages.Count == 0)
{
return;
}
foreach (var package in packages)
{
cancellationToken.ThrowIfCancellationRequested();
writer.AddFromPurl(
analyzerId: Id,
purl: package.Purl,
name: package.Name,
version: package.Version,
type: "nuget",
metadata: package.Metadata,
evidence: package.Evidence,
usedByEntrypoint: package.UsedByEntrypoint);
}
}
}

View File

@@ -0,0 +1,7 @@
global using System;
global using System.Collections.Generic;
global using System.IO;
global using System.Threading;
global using System.Threading.Tasks;
global using StellaOps.Scanner.Analyzers.Lang;

View File

@@ -0,0 +1,14 @@
namespace StellaOps.Scanner.Analyzers.Lang.DotNet;
public interface IDotNetAuthenticodeInspector
{
DotNetAuthenticodeMetadata? TryInspect(string assemblyPath, CancellationToken cancellationToken);
}
public sealed record DotNetAuthenticodeMetadata(
string? Subject,
string? Issuer,
DateTimeOffset? NotBefore,
DateTimeOffset? NotAfter,
string? Thumbprint,
string? SerialNumber);

View File

@@ -0,0 +1,518 @@
using System.Diagnostics.CodeAnalysis;
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.DotNet.Internal;
internal sealed class DotNetDepsFile
{
private DotNetDepsFile(string relativePath, IReadOnlyDictionary<string, DotNetLibrary> libraries)
{
RelativePath = relativePath;
Libraries = libraries;
}
public string RelativePath { get; }
public IReadOnlyDictionary<string, DotNetLibrary> Libraries { get; }
public static DotNetDepsFile? Load(string absolutePath, string relativePath, CancellationToken cancellationToken)
{
using var stream = File.OpenRead(absolutePath);
using var document = JsonDocument.Parse(stream, new JsonDocumentOptions
{
AllowTrailingCommas = true,
CommentHandling = JsonCommentHandling.Skip
});
var root = document.RootElement;
if (root.ValueKind is not JsonValueKind.Object)
{
return null;
}
var libraries = ParseLibraries(root, cancellationToken);
if (libraries.Count == 0)
{
return null;
}
PopulateTargets(root, libraries, cancellationToken);
return new DotNetDepsFile(relativePath, libraries);
}
private static Dictionary<string, DotNetLibrary> ParseLibraries(JsonElement root, CancellationToken cancellationToken)
{
var result = new Dictionary<string, DotNetLibrary>(StringComparer.Ordinal);
if (!root.TryGetProperty("libraries", out var librariesElement) || librariesElement.ValueKind is not JsonValueKind.Object)
{
return result;
}
foreach (var property in librariesElement.EnumerateObject())
{
cancellationToken.ThrowIfCancellationRequested();
if (DotNetLibrary.TryCreate(property.Name, property.Value, out var library))
{
result[property.Name] = library;
}
}
return result;
}
private static void PopulateTargets(JsonElement root, IDictionary<string, DotNetLibrary> libraries, CancellationToken cancellationToken)
{
if (!root.TryGetProperty("targets", out var targetsElement) || targetsElement.ValueKind is not JsonValueKind.Object)
{
return;
}
foreach (var targetProperty in targetsElement.EnumerateObject())
{
cancellationToken.ThrowIfCancellationRequested();
var (tfm, rid) = ParseTargetKey(targetProperty.Name);
if (targetProperty.Value.ValueKind is not JsonValueKind.Object)
{
continue;
}
foreach (var libraryProperty in targetProperty.Value.EnumerateObject())
{
cancellationToken.ThrowIfCancellationRequested();
if (!libraries.TryGetValue(libraryProperty.Name, out var library))
{
continue;
}
if (!string.IsNullOrEmpty(tfm))
{
library.AddTargetFramework(tfm);
}
if (!string.IsNullOrEmpty(rid))
{
library.AddRuntimeIdentifier(rid);
}
library.MergeTargetMetadata(libraryProperty.Value, tfm, rid);
}
}
}
private static (string tfm, string? rid) ParseTargetKey(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return (string.Empty, null);
}
var separatorIndex = value.IndexOf('/');
if (separatorIndex < 0)
{
return (value.Trim(), null);
}
var tfm = value[..separatorIndex].Trim();
var rid = value[(separatorIndex + 1)..].Trim();
return (tfm, string.IsNullOrEmpty(rid) ? null : rid);
}
}
internal sealed class DotNetLibrary
{
private readonly HashSet<string> _dependencies = new(StringComparer.OrdinalIgnoreCase);
private readonly HashSet<string> _runtimeIdentifiers = new(StringComparer.Ordinal);
private readonly List<DotNetLibraryAsset> _runtimeAssets = new();
private readonly HashSet<string> _targetFrameworks = new(StringComparer.Ordinal);
private DotNetLibrary(
string key,
string id,
string version,
string type,
bool? serviceable,
string? sha512,
string? path,
string? hashPath)
{
Key = key;
Id = id;
Version = version;
Type = type;
Serviceable = serviceable;
Sha512 = NormalizeValue(sha512);
PackagePath = NormalizePath(path);
HashPath = NormalizePath(hashPath);
}
public string Key { get; }
public string Id { get; }
public string Version { get; }
public string Type { get; }
public bool? Serviceable { get; }
public string? Sha512 { get; }
public string? PackagePath { get; }
public string? HashPath { get; }
public bool IsPackage => string.Equals(Type, "package", StringComparison.OrdinalIgnoreCase);
public IReadOnlyCollection<string> Dependencies => _dependencies;
public IReadOnlyCollection<string> TargetFrameworks => _targetFrameworks;
public IReadOnlyCollection<string> RuntimeIdentifiers => _runtimeIdentifiers;
public IReadOnlyCollection<DotNetLibraryAsset> RuntimeAssets => _runtimeAssets;
public static bool TryCreate(string key, JsonElement element, [NotNullWhen(true)] out DotNetLibrary? library)
{
library = null;
if (!TrySplitNameAndVersion(key, out var id, out var version))
{
return false;
}
var type = element.TryGetProperty("type", out var typeElement) && typeElement.ValueKind == JsonValueKind.String
? typeElement.GetString() ?? string.Empty
: string.Empty;
bool? serviceable = null;
if (element.TryGetProperty("serviceable", out var serviceableElement))
{
if (serviceableElement.ValueKind is JsonValueKind.True)
{
serviceable = true;
}
else if (serviceableElement.ValueKind is JsonValueKind.False)
{
serviceable = false;
}
}
var sha512 = element.TryGetProperty("sha512", out var sha512Element) && sha512Element.ValueKind == JsonValueKind.String
? sha512Element.GetString()
: null;
var path = element.TryGetProperty("path", out var pathElement) && pathElement.ValueKind == JsonValueKind.String
? pathElement.GetString()
: null;
var hashPath = element.TryGetProperty("hashPath", out var hashElement) && hashElement.ValueKind == JsonValueKind.String
? hashElement.GetString()
: null;
library = new DotNetLibrary(key, id, version, type, serviceable, sha512, path, hashPath);
library.MergeLibraryMetadata(element);
return true;
}
public void AddTargetFramework(string tfm)
{
if (!string.IsNullOrWhiteSpace(tfm))
{
_targetFrameworks.Add(tfm);
}
}
public void AddRuntimeIdentifier(string rid)
{
if (!string.IsNullOrWhiteSpace(rid))
{
_runtimeIdentifiers.Add(rid);
}
}
public void MergeTargetMetadata(JsonElement element, string? tfm, string? rid)
{
if (element.TryGetProperty("dependencies", out var dependenciesElement) && dependenciesElement.ValueKind is JsonValueKind.Object)
{
foreach (var dependencyProperty in dependenciesElement.EnumerateObject())
{
AddDependency(dependencyProperty.Name);
}
}
MergeRuntimeAssets(element, tfm, rid);
}
public void MergeLibraryMetadata(JsonElement element)
{
if (element.TryGetProperty("dependencies", out var dependenciesElement) && dependenciesElement.ValueKind is JsonValueKind.Object)
{
foreach (var dependencyProperty in dependenciesElement.EnumerateObject())
{
AddDependency(dependencyProperty.Name);
}
}
MergeRuntimeAssets(element, tfm: null, rid: null);
}
private void MergeRuntimeAssets(JsonElement element, string? tfm, string? rid)
{
AddRuntimeAssetsFromRuntime(element, tfm, rid);
AddRuntimeAssetsFromRuntimeTargets(element, tfm, rid);
}
private void AddRuntimeAssetsFromRuntime(JsonElement element, string? tfm, string? rid)
{
if (!element.TryGetProperty("runtime", out var runtimeElement) || runtimeElement.ValueKind is not JsonValueKind.Object)
{
return;
}
foreach (var assetProperty in runtimeElement.EnumerateObject())
{
if (DotNetLibraryAsset.TryCreateFromRuntime(assetProperty.Name, assetProperty.Value, tfm, rid, out var asset))
{
_runtimeAssets.Add(asset);
}
}
}
private void AddRuntimeAssetsFromRuntimeTargets(JsonElement element, string? tfm, string? rid)
{
if (!element.TryGetProperty("runtimeTargets", out var runtimeTargetsElement) || runtimeTargetsElement.ValueKind is not JsonValueKind.Object)
{
return;
}
foreach (var assetProperty in runtimeTargetsElement.EnumerateObject())
{
if (DotNetLibraryAsset.TryCreateFromRuntimeTarget(assetProperty.Name, assetProperty.Value, tfm, rid, out var asset))
{
_runtimeAssets.Add(asset);
}
}
}
private void AddDependency(string name)
{
if (string.IsNullOrWhiteSpace(name))
{
return;
}
var dependencyId = name;
if (TrySplitNameAndVersion(name, out var parsedName, out _))
{
dependencyId = parsedName;
}
if (!string.IsNullOrWhiteSpace(dependencyId))
{
_dependencies.Add(dependencyId);
}
}
private static bool TrySplitNameAndVersion(string key, out string name, out string version)
{
name = string.Empty;
version = string.Empty;
if (string.IsNullOrWhiteSpace(key))
{
return false;
}
var separatorIndex = key.LastIndexOf('/');
if (separatorIndex <= 0 || separatorIndex >= key.Length - 1)
{
return false;
}
name = key[..separatorIndex].Trim();
version = key[(separatorIndex + 1)..].Trim();
return name.Length > 0 && version.Length > 0;
}
private static string? NormalizePath(string? path)
{
if (string.IsNullOrWhiteSpace(path))
{
return null;
}
return path.Replace('\\', '/');
}
private static string? NormalizeValue(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
return value.Trim();
}
}
internal enum DotNetLibraryAssetKind
{
Runtime,
Native
}
internal sealed record DotNetLibraryAsset(
string RelativePath,
string? TargetFramework,
string? RuntimeIdentifier,
string? AssemblyVersion,
string? FileVersion,
DotNetLibraryAssetKind Kind)
{
public static bool TryCreateFromRuntime(string name, JsonElement element, string? tfm, string? rid, [NotNullWhen(true)] out DotNetLibraryAsset? asset)
{
asset = null;
if (string.IsNullOrWhiteSpace(name))
{
return false;
}
var normalizedPath = NormalizePath(name);
if (string.IsNullOrEmpty(normalizedPath))
{
return false;
}
if (!IsManagedAssembly(normalizedPath))
{
return false;
}
string? assemblyVersion = null;
string? fileVersion = null;
if (element.ValueKind == JsonValueKind.Object)
{
if (element.TryGetProperty("assemblyVersion", out var assemblyVersionElement) && assemblyVersionElement.ValueKind == JsonValueKind.String)
{
assemblyVersion = NormalizeValue(assemblyVersionElement.GetString());
}
if (element.TryGetProperty("fileVersion", out var fileVersionElement) && fileVersionElement.ValueKind == JsonValueKind.String)
{
fileVersion = NormalizeValue(fileVersionElement.GetString());
}
}
asset = new DotNetLibraryAsset(
RelativePath: normalizedPath,
TargetFramework: NormalizeValue(tfm),
RuntimeIdentifier: NormalizeValue(rid),
AssemblyVersion: assemblyVersion,
FileVersion: fileVersion,
Kind: DotNetLibraryAssetKind.Runtime);
return true;
}
public static bool TryCreateFromRuntimeTarget(string name, JsonElement element, string? tfm, string? rid, [NotNullWhen(true)] out DotNetLibraryAsset? asset)
{
asset = null;
if (string.IsNullOrWhiteSpace(name) || element.ValueKind is not JsonValueKind.Object)
{
return false;
}
var assetType = element.TryGetProperty("assetType", out var assetTypeElement) && assetTypeElement.ValueKind == JsonValueKind.String
? NormalizeValue(assetTypeElement.GetString())
: null;
var normalizedPath = NormalizePath(name);
if (string.IsNullOrEmpty(normalizedPath))
{
return false;
}
DotNetLibraryAssetKind kind;
if (assetType is null || string.Equals(assetType, "runtime", StringComparison.OrdinalIgnoreCase))
{
if (!IsManagedAssembly(normalizedPath))
{
return false;
}
kind = DotNetLibraryAssetKind.Runtime;
}
else if (string.Equals(assetType, "native", StringComparison.OrdinalIgnoreCase))
{
kind = DotNetLibraryAssetKind.Native;
}
else
{
return false;
}
string? assemblyVersion = null;
string? fileVersion = null;
if (kind == DotNetLibraryAssetKind.Runtime &&
element.TryGetProperty("assemblyVersion", out var assemblyVersionElement) &&
assemblyVersionElement.ValueKind == JsonValueKind.String)
{
assemblyVersion = NormalizeValue(assemblyVersionElement.GetString());
}
if (kind == DotNetLibraryAssetKind.Runtime &&
element.TryGetProperty("fileVersion", out var fileVersionElement) &&
fileVersionElement.ValueKind == JsonValueKind.String)
{
fileVersion = NormalizeValue(fileVersionElement.GetString());
}
string? runtimeIdentifier = rid;
if (element.TryGetProperty("rid", out var ridElement) && ridElement.ValueKind == JsonValueKind.String)
{
runtimeIdentifier = NormalizeValue(ridElement.GetString());
}
asset = new DotNetLibraryAsset(
RelativePath: normalizedPath,
TargetFramework: NormalizeValue(tfm),
RuntimeIdentifier: NormalizeValue(runtimeIdentifier),
AssemblyVersion: assemblyVersion,
FileVersion: fileVersion,
Kind: kind);
return true;
}
private static string NormalizePath(string value)
{
var normalized = NormalizeValue(value);
if (string.IsNullOrEmpty(normalized))
{
return string.Empty;
}
return normalized.Replace('\\', '/');
}
private static bool IsManagedAssembly(string path)
=> path.EndsWith(".dll", StringComparison.OrdinalIgnoreCase) ||
path.EndsWith(".exe", StringComparison.OrdinalIgnoreCase);
private static string? NormalizeValue(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
return value.Trim();
}
}

View File

@@ -0,0 +1,332 @@
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Security;
using System.Xml;
namespace StellaOps.Scanner.Analyzers.Lang.DotNet.Internal;
internal static class DotNetFileMetadataCache
{
private static readonly ConcurrentDictionary<DotNetFileCacheKey, Optional<string>> Sha256Cache = new();
private static readonly ConcurrentDictionary<DotNetFileCacheKey, Optional<AssemblyName>> AssemblyCache = new();
private static readonly ConcurrentDictionary<DotNetFileCacheKey, Optional<FileVersionInfo>> VersionCache = new();
public static bool TryGetSha256(string path, out string? sha256)
=> TryGet(path, Sha256Cache, ComputeSha256, out sha256);
public static bool TryGetAssemblyName(string path, out AssemblyName? assemblyName)
=> TryGet(path, AssemblyCache, TryReadAssemblyName, out assemblyName);
public static bool TryGetFileVersionInfo(string path, out FileVersionInfo? versionInfo)
=> TryGet(path, VersionCache, TryReadFileVersionInfo, out versionInfo);
private static bool TryGet<T>(string path, ConcurrentDictionary<DotNetFileCacheKey, Optional<T>> cache, Func<string, T?> resolver, out T? value)
where T : class
{
value = null;
DotNetFileCacheKey key;
try
{
var info = new FileInfo(path);
if (!info.Exists)
{
return false;
}
key = new DotNetFileCacheKey(info.FullName, info.Length, info.LastWriteTimeUtc.Ticks);
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
catch (SecurityException)
{
return false;
}
catch (ArgumentException)
{
return false;
}
catch (NotSupportedException)
{
return false;
}
var optional = cache.GetOrAdd(key, static (cacheKey, state) => CreateOptional(cacheKey.Path, state.resolver), (resolver, path));
if (!optional.HasValue)
{
return false;
}
value = optional.Value;
return value is not null;
}
private static Optional<T> CreateOptional<T>(string path, Func<string, T?> resolver) where T : class
{
try
{
var value = resolver(path);
return Optional<T>.From(value);
}
catch (FileNotFoundException)
{
return Optional<T>.None;
}
catch (FileLoadException)
{
return Optional<T>.None;
}
catch (BadImageFormatException)
{
return Optional<T>.None;
}
catch (UnauthorizedAccessException)
{
return Optional<T>.None;
}
catch (SecurityException)
{
return Optional<T>.None;
}
catch (IOException)
{
return Optional<T>.None;
}
}
private static string? ComputeSha256(string path)
{
using var stream = File.Open(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var sha = System.Security.Cryptography.SHA256.Create();
var hash = sha.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static AssemblyName? TryReadAssemblyName(string path)
{
try
{
return AssemblyName.GetAssemblyName(path);
}
catch (FileNotFoundException)
{
return null;
}
catch (FileLoadException)
{
return null;
}
catch (BadImageFormatException)
{
return null;
}
catch (IOException)
{
return null;
}
}
private static FileVersionInfo? TryReadFileVersionInfo(string path)
{
try
{
return FileVersionInfo.GetVersionInfo(path);
}
catch (FileNotFoundException)
{
return null;
}
catch (IOException)
{
return null;
}
catch (UnauthorizedAccessException)
{
return null;
}
}
}
internal static class DotNetLicenseCache
{
private static readonly ConcurrentDictionary<DotNetFileCacheKey, Optional<DotNetLicenseInfo>> Licenses = new();
public static bool TryGetLicenseInfo(string nuspecPath, out DotNetLicenseInfo? info)
{
info = null;
DotNetFileCacheKey key;
try
{
var fileInfo = new FileInfo(nuspecPath);
if (!fileInfo.Exists)
{
return false;
}
key = new DotNetFileCacheKey(fileInfo.FullName, fileInfo.Length, fileInfo.LastWriteTimeUtc.Ticks);
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
catch (SecurityException)
{
return false;
}
var optional = Licenses.GetOrAdd(key, static (cacheKey, path) => CreateOptional(path), nuspecPath);
if (!optional.HasValue)
{
return false;
}
info = optional.Value;
return info is not null;
}
private static Optional<DotNetLicenseInfo> CreateOptional(string nuspecPath)
{
try
{
var info = Parse(nuspecPath);
return Optional<DotNetLicenseInfo>.From(info);
}
catch (IOException)
{
return Optional<DotNetLicenseInfo>.None;
}
catch (UnauthorizedAccessException)
{
return Optional<DotNetLicenseInfo>.None;
}
catch (SecurityException)
{
return Optional<DotNetLicenseInfo>.None;
}
catch (XmlException)
{
return Optional<DotNetLicenseInfo>.None;
}
}
private static DotNetLicenseInfo? Parse(string path)
{
using var stream = File.OpenRead(path);
using var reader = XmlReader.Create(stream, new XmlReaderSettings
{
DtdProcessing = DtdProcessing.Ignore,
IgnoreComments = true,
IgnoreWhitespace = true,
});
var expressions = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
var files = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
var urls = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Element)
{
continue;
}
if (string.Equals(reader.LocalName, "license", StringComparison.OrdinalIgnoreCase))
{
var type = reader.GetAttribute("type");
var value = reader.ReadElementContentAsString()?.Trim();
if (string.IsNullOrWhiteSpace(value))
{
continue;
}
if (string.Equals(type, "expression", StringComparison.OrdinalIgnoreCase))
{
expressions.Add(value);
}
else if (string.Equals(type, "file", StringComparison.OrdinalIgnoreCase))
{
files.Add(NormalizeLicensePath(value));
}
else
{
expressions.Add(value);
}
}
else if (string.Equals(reader.LocalName, "licenseUrl", StringComparison.OrdinalIgnoreCase))
{
var value = reader.ReadElementContentAsString()?.Trim();
if (!string.IsNullOrWhiteSpace(value))
{
urls.Add(value);
}
}
}
if (expressions.Count == 0 && files.Count == 0 && urls.Count == 0)
{
return null;
}
return new DotNetLicenseInfo(
expressions.ToArray(),
files.ToArray(),
urls.ToArray());
}
private static string NormalizeLicensePath(string value)
=> value.Replace('\\', '/').Trim();
}
internal sealed record DotNetLicenseInfo(
IReadOnlyList<string> Expressions,
IReadOnlyList<string> Files,
IReadOnlyList<string> Urls);
internal readonly record struct DotNetFileCacheKey(string Path, long Length, long LastWriteTicks)
{
private readonly string _normalizedPath = OperatingSystem.IsWindows()
? Path.ToLowerInvariant()
: Path;
public bool Equals(DotNetFileCacheKey other)
=> Length == other.Length
&& LastWriteTicks == other.LastWriteTicks
&& string.Equals(_normalizedPath, other._normalizedPath, StringComparison.Ordinal);
public override int GetHashCode()
=> HashCode.Combine(_normalizedPath, Length, LastWriteTicks);
}
internal readonly struct Optional<T> where T : class
{
private Optional(bool hasValue, T? value)
{
HasValue = hasValue;
Value = value;
}
public bool HasValue { get; }
public T? Value { get; }
public static Optional<T> From(T? value)
=> value is null ? None : new Optional<T>(true, value);
public static Optional<T> None => default;
}

View File

@@ -0,0 +1,158 @@
using System.Linq;
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.DotNet.Internal;
internal sealed class DotNetRuntimeConfig
{
private DotNetRuntimeConfig(
string relativePath,
IReadOnlyCollection<string> tfms,
IReadOnlyCollection<string> frameworks,
IReadOnlyCollection<RuntimeGraphEntry> runtimeGraph)
{
RelativePath = relativePath;
Tfms = tfms;
Frameworks = frameworks;
RuntimeGraph = runtimeGraph;
}
public string RelativePath { get; }
public IReadOnlyCollection<string> Tfms { get; }
public IReadOnlyCollection<string> Frameworks { get; }
public IReadOnlyCollection<RuntimeGraphEntry> RuntimeGraph { get; }
public static DotNetRuntimeConfig? Load(string absolutePath, string relativePath, CancellationToken cancellationToken)
{
using var stream = File.OpenRead(absolutePath);
using var document = JsonDocument.Parse(stream, new JsonDocumentOptions
{
AllowTrailingCommas = true,
CommentHandling = JsonCommentHandling.Skip
});
var root = document.RootElement;
if (!root.TryGetProperty("runtimeOptions", out var runtimeOptions) || runtimeOptions.ValueKind is not JsonValueKind.Object)
{
return null;
}
var tfms = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
var frameworks = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
var runtimeGraph = new List<RuntimeGraphEntry>();
if (runtimeOptions.TryGetProperty("tfm", out var tfmElement) && tfmElement.ValueKind == JsonValueKind.String)
{
AddIfPresent(tfms, tfmElement.GetString());
}
if (runtimeOptions.TryGetProperty("framework", out var frameworkElement) && frameworkElement.ValueKind == JsonValueKind.Object)
{
var frameworkId = FormatFramework(frameworkElement);
AddIfPresent(frameworks, frameworkId);
}
if (runtimeOptions.TryGetProperty("frameworks", out var frameworksElement) && frameworksElement.ValueKind == JsonValueKind.Array)
{
foreach (var item in frameworksElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
var frameworkId = FormatFramework(item);
AddIfPresent(frameworks, frameworkId);
}
}
if (runtimeOptions.TryGetProperty("includedFrameworks", out var includedElement) && includedElement.ValueKind == JsonValueKind.Array)
{
foreach (var item in includedElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
var frameworkId = FormatFramework(item);
AddIfPresent(frameworks, frameworkId);
}
}
if (runtimeOptions.TryGetProperty("runtimeGraph", out var runtimeGraphElement) &&
runtimeGraphElement.ValueKind == JsonValueKind.Object &&
runtimeGraphElement.TryGetProperty("runtimes", out var runtimesElement) &&
runtimesElement.ValueKind == JsonValueKind.Object)
{
foreach (var ridProperty in runtimesElement.EnumerateObject())
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(ridProperty.Name))
{
continue;
}
var fallbacks = new List<string>();
if (ridProperty.Value.ValueKind == JsonValueKind.Object &&
ridProperty.Value.TryGetProperty("fallbacks", out var fallbacksElement) &&
fallbacksElement.ValueKind == JsonValueKind.Array)
{
foreach (var fallback in fallbacksElement.EnumerateArray())
{
if (fallback.ValueKind == JsonValueKind.String)
{
var fallbackValue = fallback.GetString();
if (!string.IsNullOrWhiteSpace(fallbackValue))
{
fallbacks.Add(fallbackValue.Trim());
}
}
}
}
runtimeGraph.Add(new RuntimeGraphEntry(ridProperty.Name.Trim(), fallbacks));
}
}
return new DotNetRuntimeConfig(
relativePath,
tfms.ToArray(),
frameworks.ToArray(),
runtimeGraph);
}
private static void AddIfPresent(ISet<string> set, string? value)
{
if (!string.IsNullOrWhiteSpace(value))
{
set.Add(value.Trim());
}
}
private static string? FormatFramework(JsonElement element)
{
if (element.ValueKind is not JsonValueKind.Object)
{
return null;
}
var name = element.TryGetProperty("name", out var nameElement) && nameElement.ValueKind == JsonValueKind.String
? nameElement.GetString()
: null;
var version = element.TryGetProperty("version", out var versionElement) && versionElement.ValueKind == JsonValueKind.String
? versionElement.GetString()
: null;
if (string.IsNullOrWhiteSpace(name))
{
return null;
}
if (string.IsNullOrWhiteSpace(version))
{
return name.Trim();
}
return $"{name.Trim()}@{version.Trim()}";
}
internal sealed record RuntimeGraphEntry(string Rid, IReadOnlyList<string> Fallbacks);
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
<None Include="**\\*" Exclude="**\\*.cs;**\\*.json;bin\\**;obj\\**" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Analyzers.Lang\StellaOps.Scanner.Analyzers.Lang.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,19 @@
# .NET Analyzer Task Flow
| Seq | ID | Status | Depends on | Description | Exit Criteria |
|-----|----|--------|------------|-------------|---------------|
| 1 | SCANNER-ANALYZERS-LANG-10-305A | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-307 | Parse `*.deps.json` + `runtimeconfig.json`, build RID graph, and normalize to `pkg:nuget` components. | RID graph deterministic; fixtures confirm consistent component ordering; fallback to `bin:{sha256}` documented. |
| 2 | SCANNER-ANALYZERS-LANG-10-305B | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-305A | Extract assembly metadata (strong name, file/product info) and optional Authenticode details when offline cert bundle provided. | Signing metadata captured for signed assemblies; offline trust store documented; hash validations deterministic. |
| 3 | SCANNER-ANALYZERS-LANG-10-305C | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-305B | Handle self-contained apps and native assets; merge with EntryTrace usage hints. | Self-contained fixtures map to components with RID flags; usage hints propagate; tests cover linux/win variants. |
| 4 | SCANNER-ANALYZERS-LANG-10-307D | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-305C | Integrate shared helpers (license mapping, quiet provenance) and concurrency-safe caches. | Shared helpers reused; concurrency tests for parallel layer scans pass; no redundant allocations. |
| 5 | SCANNER-ANALYZERS-LANG-10-308D | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-307D | Determinism fixtures + benchmark harness; compare to competitor scanners for accuracy/perf. | Fixtures in `Fixtures/lang/dotnet/`; determinism CI guard; benchmark demonstrates lower duplication + faster runtime. |
| 6 | SCANNER-ANALYZERS-LANG-10-309D | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-308D | Package plug-in (manifest, DI registration) and update Offline Kit instructions. | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer; Offline Kit doc updated. |
## .NET Entry-Point & Dependency Resolver (Sprint 11)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCANNER-ANALYZERS-LANG-11-001 | TODO | StellaOps.Scanner EPDR Guild, Language Analyzer Guild | - | Build entrypoint resolver that maps project/publish artifacts to entrypoint identities (assembly name, MVID, TFM, RID) and environment profiles (publish mode, host kind, probing paths). Output normalized `entrypoints[]` records with deterministic IDs. | Entrypoint records produced for fixtures (framework-dependent, self-contained, single-file, multi-TFM/RID); determinism check passes; docs updated. |
| SCANNER-ANALYZERS-LANG-11-002 | TODO | StellaOps.Scanner EPDR Guild | SCANNER-ANALYZERS-LANG-11-001 | Implement static analyzer (IL + reflection heuristics) capturing AssemblyRef, ModuleRef/PInvoke, DynamicDependency, reflection literals, DI patterns, and custom AssemblyLoadContext probing hints. Emit dependency edges with reason codes and confidence. | Static analysis coverage demonstrated on fixtures; edges carry reason codes (`il-assemblyref`, `il-moduleref`, `reflection-literal`, `alc-probing`); tests cover trimmed/single-file cases. |
| SCANNER-ANALYZERS-LANG-11-003 | TODO | StellaOps.Scanner EPDR Guild, Signals Guild | SCANNER-ANALYZERS-LANG-11-002 | Ingest optional runtime evidence (AssemblyLoad, Resolving, P/Invoke) via event listener harness; merge runtime edges with static/declared ones and attach reason codes/confidence. | Runtime listener service pluggable; fixtures record runtime edges; merged output shows combined reason set with confidence per edge. |
| SCANNER-ANALYZERS-LANG-11-004 | TODO | StellaOps.Scanner EPDR Guild, SBOM Service Guild | SCANNER-ANALYZERS-LANG-11-002 | Produce normalized observation export to Scanner writer: entrypoints + dependency edges + environment profiles (AOC compliant). Wire to SBOM service entrypoint tagging. | Analyzer writes observation records consumed by SBOM service tests; AOC compliance docs updated; determinism checked. |
| SCANNER-ANALYZERS-LANG-11-005 | TODO | StellaOps.Scanner EPDR Guild, QA Guild | SCANNER-ANALYZERS-LANG-11-004 | Add comprehensive fixtures/benchmarks covering framework-dependent, self-contained, single-file, trimmed, NativeAOT, multi-RID scenarios; include explain traces and perf benchmarks vs previous analyzer. | Fixtures stored under `fixtures/lang/dotnet/epdr`; determinism + perf thresholds validated; benchmark results documented. |

View File

@@ -0,0 +1,23 @@
{
"schemaVersion": "1.0",
"id": "stellaops.analyzer.lang.dotnet",
"displayName": "StellaOps .NET Analyzer (preview)",
"version": "0.1.0",
"requiresRestart": true,
"entryPoint": {
"type": "dotnet",
"assembly": "StellaOps.Scanner.Analyzers.Lang.DotNet.dll",
"typeName": "StellaOps.Scanner.Analyzers.Lang.DotNet.DotNetAnalyzerPlugin"
},
"capabilities": [
"language-analyzer",
"dotnet",
"nuget"
],
"metadata": {
"org.stellaops.analyzer.language": "dotnet",
"org.stellaops.analyzer.kind": "language",
"org.stellaops.restart.required": "true",
"org.stellaops.analyzer.status": "preview"
}
}

View File

@@ -0,0 +1,31 @@
# StellaOps.Scanner.Analyzers.Lang.Go — Agent Charter
## Role
Build the Go analyzer plug-in that reads Go build info, module metadata, and DWARF notes to attribute binaries with rich provenance inside Scanner.
## Scope
- Inspect binaries for build info (`.note.go.buildid`, Go build info blob) and extract module, version, VCS metadata.
- Parse DWARF-lite sections for commit hash / dirty flag and map to components.
- Manage shared hash cache to dedupe identical binaries across layers.
- Provide benchmarks and determinism fixtures; package plug-in manifest.
## Out of Scope
- Native library link analysis (belongs to native analyzer).
- VCS remote fetching or symbol download.
- Policy decisions or vulnerability joins.
## Expectations
- Latency targets: ≤400µs (hot) / ≤2ms (cold) per binary; minimal allocations via buffer pooling.
- Shared buffer pooling via `ArrayPool<byte>` for build-info/DWARF reads; safe for concurrent scans.
- Deterministic fallback to `bin:{sha256}` when metadata absent; heuristics clearly identified.
- Offline-first: rely solely on embedded metadata.
- Telemetry for binaries processed, metadata coverage, heuristics usage.
- Heuristic fallback metrics: `scanner_analyzer_golang_heuristic_total{indicator,version_hint}` increments whenever stripped binaries are classified via fallbacks.
## Dependencies
- Shared language analyzer core; Worker dispatcher; caching infrastructure (layer cache + file CAS).
## Testing & Artifacts
- Golden fixtures for modules with/without VCS info, stripped binaries, cross-compiled variants.
- Benchmark comparison with competitor scanners to demonstrate speed/fidelity advantages (captured in `src/Bench/StellaOps.Bench/Scanner.Analyzers/lang/go/`).
- ADR documenting heuristics and risk mitigation.

View File

@@ -0,0 +1,7 @@
global using System;
global using System.Collections.Generic;
global using System.IO;
global using System.Threading;
global using System.Threading.Tasks;
global using StellaOps.Scanner.Analyzers.Lang;

View File

@@ -0,0 +1,17 @@
using System;
using StellaOps.Scanner.Analyzers.Lang.Plugin;
namespace StellaOps.Scanner.Analyzers.Lang.Go;
public sealed class GoAnalyzerPlugin : ILanguageAnalyzerPlugin
{
public string Name => "StellaOps.Scanner.Analyzers.Lang.Go";
public bool IsAvailable(IServiceProvider services) => services is not null;
public ILanguageAnalyzer CreateAnalyzer(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return new GoLanguageAnalyzer();
}
}

View File

@@ -0,0 +1,385 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Security.Cryptography;
using System.Linq;
using StellaOps.Scanner.Analyzers.Lang.Go.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Go;
public sealed class GoLanguageAnalyzer : ILanguageAnalyzer
{
public string Id => "golang";
public string DisplayName => "Go Analyzer";
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
var candidatePaths = new List<string>(GoBinaryScanner.EnumerateCandidateFiles(context.RootPath));
candidatePaths.Sort(StringComparer.Ordinal);
var fallbackBinaries = new List<GoStrippedBinaryClassification>();
foreach (var absolutePath in candidatePaths)
{
cancellationToken.ThrowIfCancellationRequested();
if (!GoBuildInfoProvider.TryGetBuildInfo(absolutePath, out var buildInfo) || buildInfo is null)
{
if (GoBinaryScanner.TryClassifyStrippedBinary(absolutePath, out var classification))
{
fallbackBinaries.Add(classification);
}
continue;
}
EmitComponents(buildInfo, context, writer);
}
foreach (var fallback in fallbackBinaries)
{
cancellationToken.ThrowIfCancellationRequested();
EmitFallbackComponent(fallback, context, writer);
}
return ValueTask.CompletedTask;
}
private void EmitComponents(GoBuildInfo buildInfo, LanguageAnalyzerContext context, LanguageComponentWriter writer)
{
var components = new List<GoModule> { buildInfo.MainModule };
components.AddRange(buildInfo.Dependencies
.OrderBy(static module => module.Path, StringComparer.Ordinal)
.ThenBy(static module => module.Version, StringComparer.Ordinal));
string? binaryHash = null;
var binaryRelativePath = context.GetRelativePath(buildInfo.AbsoluteBinaryPath);
foreach (var module in components)
{
var metadata = BuildMetadata(buildInfo, module, binaryRelativePath);
var evidence = BuildEvidence(buildInfo, module, binaryRelativePath, context, ref binaryHash);
var usedByEntrypoint = module.IsMain && context.UsageHints.IsPathUsed(buildInfo.AbsoluteBinaryPath);
var purl = BuildPurl(module.Path, module.Version);
if (!string.IsNullOrEmpty(purl))
{
writer.AddFromPurl(
analyzerId: Id,
purl: purl,
name: module.Path,
version: module.Version,
type: "golang",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
}
else
{
var componentKey = BuildFallbackComponentKey(module, buildInfo, binaryRelativePath, ref binaryHash);
writer.AddFromExplicitKey(
analyzerId: Id,
componentKey: componentKey,
purl: null,
name: module.Path,
version: module.Version,
type: "golang",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
}
}
}
private static IEnumerable<KeyValuePair<string, string?>> BuildMetadata(GoBuildInfo buildInfo, GoModule module, string binaryRelativePath)
{
var entries = new List<KeyValuePair<string, string?>>(16)
{
new("modulePath", module.Path),
new("binaryPath", string.IsNullOrEmpty(binaryRelativePath) ? "." : binaryRelativePath),
};
if (!string.IsNullOrEmpty(module.Version))
{
entries.Add(new KeyValuePair<string, string?>("moduleVersion", module.Version));
}
if (!string.IsNullOrEmpty(module.Sum))
{
entries.Add(new KeyValuePair<string, string?>("moduleSum", module.Sum));
}
if (module.Replacement is not null)
{
entries.Add(new KeyValuePair<string, string?>("replacedBy.path", module.Replacement.Path));
if (!string.IsNullOrEmpty(module.Replacement.Version))
{
entries.Add(new KeyValuePair<string, string?>("replacedBy.version", module.Replacement.Version));
}
if (!string.IsNullOrEmpty(module.Replacement.Sum))
{
entries.Add(new KeyValuePair<string, string?>("replacedBy.sum", module.Replacement.Sum));
}
}
if (module.IsMain)
{
entries.Add(new KeyValuePair<string, string?>("go.version", buildInfo.GoVersion));
entries.Add(new KeyValuePair<string, string?>("modulePath.main", buildInfo.ModulePath));
foreach (var setting in buildInfo.Settings)
{
var key = $"build.{setting.Key}";
if (!entries.Any(pair => string.Equals(pair.Key, key, StringComparison.Ordinal)))
{
entries.Add(new KeyValuePair<string, string?>(key, setting.Value));
}
}
if (buildInfo.DwarfMetadata is { } dwarf)
{
AddIfMissing(entries, "build.vcs", dwarf.VcsSystem);
AddIfMissing(entries, "build.vcs.revision", dwarf.Revision);
AddIfMissing(entries, "build.vcs.modified", dwarf.Modified?.ToString()?.ToLowerInvariant());
AddIfMissing(entries, "build.vcs.time", dwarf.TimestampUtc);
}
}
entries.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
return entries;
}
private void EmitFallbackComponent(GoStrippedBinaryClassification strippedBinary, LanguageAnalyzerContext context, LanguageComponentWriter writer)
{
var relativePath = context.GetRelativePath(strippedBinary.AbsolutePath);
var normalizedRelative = string.IsNullOrEmpty(relativePath) ? "." : relativePath;
var usedByEntrypoint = context.UsageHints.IsPathUsed(strippedBinary.AbsolutePath);
var binaryHash = ComputeBinaryHash(strippedBinary.AbsolutePath);
var metadata = new List<KeyValuePair<string, string?>>
{
new("binaryPath", normalizedRelative),
new("languageHint", "golang"),
new("provenance", "binary"),
};
if (!string.IsNullOrEmpty(binaryHash))
{
metadata.Add(new KeyValuePair<string, string?>("binary.sha256", binaryHash));
}
if (!string.IsNullOrEmpty(strippedBinary.GoVersionHint))
{
metadata.Add(new KeyValuePair<string, string?>("go.version.hint", strippedBinary.GoVersionHint));
}
metadata.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
var evidence = new List<LanguageComponentEvidence>
{
new(
LanguageEvidenceKind.File,
"binary",
normalizedRelative,
null,
string.IsNullOrEmpty(binaryHash) ? null : binaryHash),
};
var detectionSource = strippedBinary.Indicator switch
{
GoStrippedBinaryIndicator.BuildId => "build-id",
GoStrippedBinaryIndicator.GoRuntimeMarkers => "runtime-markers",
_ => null,
};
if (!string.IsNullOrEmpty(detectionSource))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.heuristic",
"classification",
detectionSource,
null));
}
evidence.Sort(static (left, right) => string.CompareOrdinal(left.ComparisonKey, right.ComparisonKey));
var componentName = Path.GetFileName(strippedBinary.AbsolutePath);
if (string.IsNullOrWhiteSpace(componentName))
{
componentName = "golang-binary";
}
var componentKey = string.IsNullOrEmpty(binaryHash)
? $"golang::bin::{normalizedRelative}"
: $"golang::bin::sha256:{binaryHash}";
writer.AddFromExplicitKey(
analyzerId: Id,
componentKey: componentKey,
purl: null,
name: componentName,
version: null,
type: "bin",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
GoAnalyzerMetrics.RecordHeuristic(strippedBinary.Indicator, !string.IsNullOrEmpty(strippedBinary.GoVersionHint));
}
private static IEnumerable<LanguageComponentEvidence> BuildEvidence(GoBuildInfo buildInfo, GoModule module, string binaryRelativePath, LanguageAnalyzerContext context, ref string? binaryHash)
{
var evidence = new List<LanguageComponentEvidence>
{
new(
LanguageEvidenceKind.Metadata,
"go.buildinfo",
$"module:{module.Path}",
module.Version ?? string.Empty,
module.Sum)
};
if (module.IsMain)
{
foreach (var setting in buildInfo.Settings)
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.buildinfo.setting",
setting.Key,
setting.Value,
null));
}
if (buildInfo.DwarfMetadata is { } dwarf)
{
if (!string.IsNullOrWhiteSpace(dwarf.VcsSystem))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs",
dwarf.VcsSystem,
null));
}
if (!string.IsNullOrWhiteSpace(dwarf.Revision))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs.revision",
dwarf.Revision,
null));
}
if (dwarf.Modified.HasValue)
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs.modified",
dwarf.Modified.Value ? "true" : "false",
null));
}
if (!string.IsNullOrWhiteSpace(dwarf.TimestampUtc))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs.time",
dwarf.TimestampUtc,
null));
}
}
}
// Attach binary hash evidence for fallback components without purl.
if (string.IsNullOrEmpty(module.Version))
{
binaryHash ??= ComputeBinaryHash(buildInfo.AbsoluteBinaryPath);
if (!string.IsNullOrEmpty(binaryHash))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"binary",
string.IsNullOrEmpty(binaryRelativePath) ? "." : binaryRelativePath,
null,
binaryHash));
}
}
evidence.Sort(static (left, right) => string.CompareOrdinal(left.ComparisonKey, right.ComparisonKey));
return evidence;
}
private static string? BuildPurl(string path, string? version)
{
if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(version))
{
return null;
}
var cleanedPath = path.Trim();
var cleanedVersion = version.Trim();
var encodedVersion = Uri.EscapeDataString(cleanedVersion);
return $"pkg:golang/{cleanedPath}@{encodedVersion}";
}
private static string BuildFallbackComponentKey(GoModule module, GoBuildInfo buildInfo, string binaryRelativePath, ref string? binaryHash)
{
var relative = string.IsNullOrEmpty(binaryRelativePath) ? "." : binaryRelativePath;
binaryHash ??= ComputeBinaryHash(buildInfo.AbsoluteBinaryPath);
if (!string.IsNullOrEmpty(binaryHash))
{
return $"golang::module:{module.Path}::{relative}::{binaryHash}";
}
return $"golang::module:{module.Path}::{relative}";
}
private static void AddIfMissing(List<KeyValuePair<string, string?>> entries, string key, string? value)
{
if (string.IsNullOrWhiteSpace(key) || string.IsNullOrWhiteSpace(value))
{
return;
}
if (entries.Any(entry => string.Equals(entry.Key, key, StringComparison.Ordinal)))
{
return;
}
entries.Add(new KeyValuePair<string, string?>(key, value));
}
private static string? ComputeBinaryHash(string path)
{
try
{
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
catch (IOException)
{
return null;
}
catch (UnauthorizedAccessException)
{
return null;
}
}
}

View File

@@ -0,0 +1,30 @@
using System.Collections.Generic;
using System.Diagnostics.Metrics;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal static class GoAnalyzerMetrics
{
private static readonly Meter Meter = new("StellaOps.Scanner.Analyzers.Lang.Go", "1.0.0");
private static readonly Counter<long> HeuristicCounter = Meter.CreateCounter<long>(
"scanner_analyzer_golang_heuristic_total",
unit: "components",
description: "Counts Go components emitted via heuristic fallbacks when build metadata is missing.");
public static void RecordHeuristic(GoStrippedBinaryIndicator indicator, bool hasVersionHint)
{
HeuristicCounter.Add(
1,
new KeyValuePair<string, object?>("indicator", NormalizeIndicator(indicator)),
new KeyValuePair<string, object?>("version_hint", hasVersionHint ? "present" : "absent"));
}
private static string NormalizeIndicator(GoStrippedBinaryIndicator indicator)
=> indicator switch
{
GoStrippedBinaryIndicator.BuildId => "build-id",
GoStrippedBinaryIndicator.GoRuntimeMarkers => "runtime-markers",
_ => "unknown",
};
}

View File

@@ -0,0 +1,264 @@
using System;
using System.Collections.Generic;
using System.Buffers;
using System.IO;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal static class GoBinaryScanner
{
private static readonly ReadOnlyMemory<byte> BuildInfoMagic = new byte[]
{
0xFF, (byte)' ', (byte)'G', (byte)'o', (byte)' ', (byte)'b', (byte)'u', (byte)'i', (byte)'l', (byte)'d', (byte)'i', (byte)'n', (byte)'f', (byte)':'
};
private static readonly ReadOnlyMemory<byte> BuildIdMarker = Encoding.ASCII.GetBytes("Go build ID:");
private static readonly ReadOnlyMemory<byte> GoPclnTabMarker = Encoding.ASCII.GetBytes(".gopclntab");
private static readonly ReadOnlyMemory<byte> GoVersionPrefix = Encoding.ASCII.GetBytes("go1.");
public static IEnumerable<string> EnumerateCandidateFiles(string rootPath)
{
var enumeration = new EnumerationOptions
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
MatchCasing = MatchCasing.CaseSensitive,
};
foreach (var path in Directory.EnumerateFiles(rootPath, "*", enumeration))
{
yield return path;
}
}
public static bool TryReadBuildInfo(string filePath, out string? goVersion, out string? moduleData)
{
goVersion = null;
moduleData = null;
FileInfo info;
try
{
info = new FileInfo(filePath);
if (!info.Exists || info.Length < 64 || info.Length > 128 * 1024 * 1024)
{
return false;
}
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
catch (System.Security.SecurityException)
{
return false;
}
var length = info.Length;
if (length <= 0)
{
return false;
}
var inspectLength = (int)Math.Min(length, int.MaxValue);
var buffer = ArrayPool<byte>.Shared.Rent(inspectLength);
try
{
using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var totalRead = 0;
while (totalRead < inspectLength)
{
var read = stream.Read(buffer, totalRead, inspectLength - totalRead);
if (read <= 0)
{
break;
}
totalRead += read;
}
if (totalRead < 64)
{
return false;
}
var span = new ReadOnlySpan<byte>(buffer, 0, totalRead);
var offset = span.IndexOf(BuildInfoMagic.Span);
if (offset < 0)
{
return false;
}
var view = span[offset..];
return GoBuildInfoDecoder.TryDecode(view, out goVersion, out moduleData);
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
finally
{
Array.Clear(buffer, 0, inspectLength);
ArrayPool<byte>.Shared.Return(buffer);
}
}
public static bool TryClassifyStrippedBinary(string filePath, out GoStrippedBinaryClassification classification)
{
classification = default;
FileInfo fileInfo;
try
{
fileInfo = new FileInfo(filePath);
if (!fileInfo.Exists)
{
return false;
}
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
catch (System.Security.SecurityException)
{
return false;
}
var length = fileInfo.Length;
if (length < 128)
{
return false;
}
const int WindowSize = 128 * 1024;
var readSize = (int)Math.Min(length, WindowSize);
var buffer = ArrayPool<byte>.Shared.Rent(readSize);
try
{
using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var headRead = stream.Read(buffer, 0, readSize);
if (headRead <= 0)
{
return false;
}
var headSpan = new ReadOnlySpan<byte>(buffer, 0, headRead);
var hasBuildId = headSpan.IndexOf(BuildIdMarker.Span) >= 0;
var hasPcln = headSpan.IndexOf(GoPclnTabMarker.Span) >= 0;
var goVersion = ExtractGoVersion(headSpan);
if (length > headRead)
{
var tailSize = Math.Min(readSize, (int)length);
if (tailSize > 0)
{
stream.Seek(-tailSize, SeekOrigin.End);
var tailRead = stream.Read(buffer, 0, tailSize);
if (tailRead > 0)
{
var tailSpan = new ReadOnlySpan<byte>(buffer, 0, tailRead);
hasBuildId |= tailSpan.IndexOf(BuildIdMarker.Span) >= 0;
hasPcln |= tailSpan.IndexOf(GoPclnTabMarker.Span) >= 0;
goVersion ??= ExtractGoVersion(tailSpan);
}
}
}
if (hasBuildId)
{
classification = new GoStrippedBinaryClassification(
filePath,
GoStrippedBinaryIndicator.BuildId,
goVersion);
return true;
}
if (hasPcln && !string.IsNullOrEmpty(goVersion))
{
classification = new GoStrippedBinaryClassification(
filePath,
GoStrippedBinaryIndicator.GoRuntimeMarkers,
goVersion);
return true;
}
return false;
}
finally
{
Array.Clear(buffer, 0, readSize);
ArrayPool<byte>.Shared.Return(buffer);
}
}
private static string? ExtractGoVersion(ReadOnlySpan<byte> data)
{
var prefix = GoVersionPrefix.Span;
var span = data;
while (!span.IsEmpty)
{
var index = span.IndexOf(prefix);
if (index < 0)
{
return null;
}
var absoluteIndex = data.Length - span.Length + index;
if (absoluteIndex > 0)
{
var previous = (char)data[absoluteIndex - 1];
if (char.IsLetterOrDigit(previous))
{
span = span[(index + 1)..];
continue;
}
}
var start = absoluteIndex;
var end = start + prefix.Length;
while (end < data.Length && IsVersionCharacter((char)data[end]))
{
end++;
}
if (end - start <= prefix.Length)
{
span = span[(index + 1)..];
continue;
}
var candidate = data[start..end];
return Encoding.ASCII.GetString(candidate);
}
return null;
}
private static bool IsVersionCharacter(char value)
=> (value >= '0' && value <= '9')
|| (value >= 'a' && value <= 'z')
|| (value >= 'A' && value <= 'Z')
|| value is '.' or '-' or '+' or '_';
}

View File

@@ -0,0 +1,80 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal sealed class GoBuildInfo
{
public GoBuildInfo(
string goVersion,
string absoluteBinaryPath,
string modulePath,
GoModule mainModule,
IEnumerable<GoModule> dependencies,
IEnumerable<KeyValuePair<string, string?>> settings,
GoDwarfMetadata? dwarfMetadata = null)
: this(
goVersion,
absoluteBinaryPath,
modulePath,
mainModule,
dependencies?
.Where(static module => module is not null)
.ToImmutableArray()
?? ImmutableArray<GoModule>.Empty,
settings?
.Where(static pair => pair.Key is not null)
.Select(static pair => new KeyValuePair<string, string?>(pair.Key, pair.Value))
.ToImmutableArray()
?? ImmutableArray<KeyValuePair<string, string?>>.Empty,
dwarfMetadata)
{
}
private GoBuildInfo(
string goVersion,
string absoluteBinaryPath,
string modulePath,
GoModule mainModule,
ImmutableArray<GoModule> dependencies,
ImmutableArray<KeyValuePair<string, string?>> settings,
GoDwarfMetadata? dwarfMetadata)
{
GoVersion = goVersion ?? throw new ArgumentNullException(nameof(goVersion));
AbsoluteBinaryPath = absoluteBinaryPath ?? throw new ArgumentNullException(nameof(absoluteBinaryPath));
ModulePath = modulePath ?? throw new ArgumentNullException(nameof(modulePath));
MainModule = mainModule ?? throw new ArgumentNullException(nameof(mainModule));
Dependencies = dependencies;
Settings = settings;
DwarfMetadata = dwarfMetadata;
}
public string GoVersion { get; }
public string AbsoluteBinaryPath { get; }
public string ModulePath { get; }
public GoModule MainModule { get; }
public ImmutableArray<GoModule> Dependencies { get; }
public ImmutableArray<KeyValuePair<string, string?>> Settings { get; }
public GoDwarfMetadata? DwarfMetadata { get; }
public GoBuildInfo WithDwarf(GoDwarfMetadata metadata)
{
ArgumentNullException.ThrowIfNull(metadata);
return new GoBuildInfo(
GoVersion,
AbsoluteBinaryPath,
ModulePath,
MainModule,
Dependencies,
Settings,
metadata);
}
}

View File

@@ -0,0 +1,159 @@
using System;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal static class GoBuildInfoDecoder
{
private const string BuildInfoMagic = "\xff Go buildinf:";
private const int HeaderSize = 32;
private const byte VarintEncodingFlag = 0x02;
public static bool TryDecode(ReadOnlySpan<byte> data, out string? goVersion, out string? moduleData)
{
goVersion = null;
moduleData = null;
if (data.Length < HeaderSize)
{
return false;
}
if (!IsMagicMatch(data))
{
return false;
}
var pointerSize = data[14];
var flags = data[15];
if (pointerSize != 4 && pointerSize != 8)
{
return false;
}
if ((flags & VarintEncodingFlag) == 0)
{
// Older Go toolchains encode pointers to strings instead of inline data.
// The Sprint 10 scope targets Go 1.18+, which always sets the varint flag.
return false;
}
var payload = data.Slice(HeaderSize);
if (!TryReadVarString(payload, out var version, out var consumed))
{
return false;
}
payload = payload.Slice(consumed);
if (!TryReadVarString(payload, out var modules, out _))
{
return false;
}
if (string.IsNullOrWhiteSpace(version))
{
return false;
}
modules = StripSentinel(modules);
goVersion = version;
moduleData = modules;
return !string.IsNullOrWhiteSpace(moduleData);
}
private static bool IsMagicMatch(ReadOnlySpan<byte> data)
{
if (data.Length < BuildInfoMagic.Length)
{
return false;
}
for (var i = 0; i < BuildInfoMagic.Length; i++)
{
if (data[i] != BuildInfoMagic[i])
{
return false;
}
}
return true;
}
private static bool TryReadVarString(ReadOnlySpan<byte> data, out string result, out int consumed)
{
result = string.Empty;
consumed = 0;
if (!TryReadUVarint(data, out var length, out var lengthBytes))
{
return false;
}
if (length > int.MaxValue)
{
return false;
}
var stringLength = (int)length;
var totalRequired = lengthBytes + stringLength;
if (stringLength <= 0 || totalRequired > data.Length)
{
return false;
}
var slice = data.Slice(lengthBytes, stringLength);
result = Encoding.UTF8.GetString(slice);
consumed = totalRequired;
return true;
}
private static bool TryReadUVarint(ReadOnlySpan<byte> data, out ulong value, out int bytesRead)
{
value = 0;
bytesRead = 0;
ulong x = 0;
var shift = 0;
for (var i = 0; i < data.Length; i++)
{
var b = data[i];
if (b < 0x80)
{
if (i > 9 || i == 9 && b > 1)
{
return false;
}
value = x | (ulong)b << shift;
bytesRead = i + 1;
return true;
}
x |= (ulong)(b & 0x7F) << shift;
shift += 7;
}
return false;
}
private static string StripSentinel(string value)
{
if (string.IsNullOrEmpty(value) || value.Length < 33)
{
return value;
}
var sentinelIndex = value.Length - 17;
if (value[sentinelIndex] != '\n')
{
return value;
}
return value[16..^16];
}
}

View File

@@ -0,0 +1,234 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal static class GoBuildInfoParser
{
private const string PathPrefix = "path\t";
private const string ModulePrefix = "mod\t";
private const string DependencyPrefix = "dep\t";
private const string ReplacementPrefix = "=>\t";
private const string BuildPrefix = "build\t";
public static bool TryParse(string goVersion, string absoluteBinaryPath, string rawModuleData, out GoBuildInfo? info)
{
info = null;
if (string.IsNullOrWhiteSpace(goVersion) || string.IsNullOrWhiteSpace(rawModuleData))
{
return false;
}
string? modulePath = null;
GoModule? mainModule = null;
var dependencies = new List<GoModule>();
var settings = new SortedDictionary<string, string?>(StringComparer.Ordinal);
GoModule? lastModule = null;
using var reader = new StringReader(rawModuleData);
while (reader.ReadLine() is { } line)
{
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
if (line.StartsWith(PathPrefix, StringComparison.Ordinal))
{
modulePath = line[PathPrefix.Length..].Trim();
continue;
}
if (line.StartsWith(ModulePrefix, StringComparison.Ordinal))
{
mainModule = ParseModule(line.AsSpan(ModulePrefix.Length), isMain: true);
lastModule = mainModule;
continue;
}
if (line.StartsWith(DependencyPrefix, StringComparison.Ordinal))
{
var dependency = ParseModule(line.AsSpan(DependencyPrefix.Length), isMain: false);
if (dependency is not null)
{
dependencies.Add(dependency);
lastModule = dependency;
}
continue;
}
if (line.StartsWith(ReplacementPrefix, StringComparison.Ordinal))
{
if (lastModule is null)
{
continue;
}
var replacement = ParseReplacement(line.AsSpan(ReplacementPrefix.Length));
if (replacement is not null)
{
lastModule.SetReplacement(replacement);
}
continue;
}
if (line.StartsWith(BuildPrefix, StringComparison.Ordinal))
{
var pair = ParseBuildSetting(line.AsSpan(BuildPrefix.Length));
if (!string.IsNullOrEmpty(pair.Key))
{
settings[pair.Key] = pair.Value;
}
}
}
if (mainModule is null)
{
return false;
}
if (string.IsNullOrEmpty(modulePath))
{
modulePath = mainModule.Path;
}
info = new GoBuildInfo(
goVersion,
absoluteBinaryPath,
modulePath,
mainModule,
dependencies,
settings);
return true;
}
private static GoModule? ParseModule(ReadOnlySpan<char> span, bool isMain)
{
var fields = SplitFields(span, expected: 4);
if (fields.Count == 0)
{
return null;
}
var path = fields[0];
if (string.IsNullOrWhiteSpace(path))
{
return null;
}
var version = fields.Count > 1 ? fields[1] : null;
var sum = fields.Count > 2 ? fields[2] : null;
return new GoModule(path, version, sum, isMain);
}
private static GoModuleReplacement? ParseReplacement(ReadOnlySpan<char> span)
{
var fields = SplitFields(span, expected: 3);
if (fields.Count == 0)
{
return null;
}
var path = fields[0];
if (string.IsNullOrWhiteSpace(path))
{
return null;
}
var version = fields.Count > 1 ? fields[1] : null;
var sum = fields.Count > 2 ? fields[2] : null;
return new GoModuleReplacement(path, version, sum);
}
private static KeyValuePair<string, string?> ParseBuildSetting(ReadOnlySpan<char> span)
{
span = span.Trim();
if (span.IsEmpty)
{
return default;
}
var separatorIndex = span.IndexOf('=');
if (separatorIndex <= 0)
{
return default;
}
var rawKey = span[..separatorIndex].Trim();
var rawValue = span[(separatorIndex + 1)..].Trim();
var key = Unquote(rawKey.ToString());
if (string.IsNullOrWhiteSpace(key))
{
return default;
}
var value = Unquote(rawValue.ToString());
return new KeyValuePair<string, string?>(key, value);
}
private static List<string> SplitFields(ReadOnlySpan<char> span, int expected)
{
var fields = new List<string>(expected);
var builder = new StringBuilder();
for (var i = 0; i < span.Length; i++)
{
var current = span[i];
if (current == '\t')
{
fields.Add(builder.ToString());
builder.Clear();
continue;
}
builder.Append(current);
}
fields.Add(builder.ToString());
return fields;
}
private static string Unquote(string value)
{
if (string.IsNullOrEmpty(value))
{
return value;
}
value = value.Trim();
if (value.Length < 2)
{
return value;
}
if (value[0] == '"' && value[^1] == '"')
{
try
{
return JsonSerializer.Deserialize<string>(value) ?? value;
}
catch (JsonException)
{
return value;
}
}
if (value[0] == '`' && value[^1] == '`')
{
return value[1..^1];
}
return value;
}
}

View File

@@ -0,0 +1,82 @@
using System;
using System.Collections.Concurrent;
using System.IO;
using System.Security;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal static class GoBuildInfoProvider
{
private static readonly ConcurrentDictionary<GoBinaryCacheKey, GoBuildInfo?> Cache = new();
public static bool TryGetBuildInfo(string absolutePath, out GoBuildInfo? info)
{
info = null;
FileInfo fileInfo;
try
{
fileInfo = new FileInfo(absolutePath);
if (!fileInfo.Exists)
{
return false;
}
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
catch (System.Security.SecurityException)
{
return false;
}
var key = new GoBinaryCacheKey(absolutePath, fileInfo.Length, fileInfo.LastWriteTimeUtc.Ticks);
info = Cache.GetOrAdd(key, static (cacheKey, path) => CreateBuildInfo(path), absolutePath);
return info is not null;
}
private static GoBuildInfo? CreateBuildInfo(string absolutePath)
{
if (!GoBinaryScanner.TryReadBuildInfo(absolutePath, out var goVersion, out var moduleData))
{
return null;
}
if (string.IsNullOrWhiteSpace(goVersion) || string.IsNullOrWhiteSpace(moduleData))
{
return null;
}
if (!GoBuildInfoParser.TryParse(goVersion!, absolutePath, moduleData!, out var buildInfo) || buildInfo is null)
{
return null;
}
if (GoDwarfReader.TryRead(absolutePath, out var dwarf) && dwarf is not null)
{
buildInfo = buildInfo.WithDwarf(dwarf);
}
return buildInfo;
}
private readonly record struct GoBinaryCacheKey(string Path, long Length, long LastWriteTicks)
{
private readonly string _normalizedPath = OperatingSystem.IsWindows()
? Path.ToLowerInvariant()
: Path;
public bool Equals(GoBinaryCacheKey other)
=> Length == other.Length
&& LastWriteTicks == other.LastWriteTicks
&& string.Equals(_normalizedPath, other._normalizedPath, StringComparison.Ordinal);
public override int GetHashCode()
=> HashCode.Combine(_normalizedPath, Length, LastWriteTicks);
}
}

View File

@@ -0,0 +1,33 @@
using System;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal sealed class GoDwarfMetadata
{
public GoDwarfMetadata(string? vcsSystem, string? revision, bool? modified, string? timestampUtc)
{
VcsSystem = Normalize(vcsSystem);
Revision = Normalize(revision);
Modified = modified;
TimestampUtc = Normalize(timestampUtc);
}
public string? VcsSystem { get; }
public string? Revision { get; }
public bool? Modified { get; }
public string? TimestampUtc { get; }
private static string? Normalize(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var trimmed = value.Trim();
return trimmed.Length == 0 ? null : trimmed;
}
}

View File

@@ -0,0 +1,120 @@
using System;
using System.Buffers;
using System.IO;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal static class GoDwarfReader
{
private static readonly byte[] VcsSystemToken = Encoding.UTF8.GetBytes("vcs=");
private static readonly byte[] VcsRevisionToken = Encoding.UTF8.GetBytes("vcs.revision=");
private static readonly byte[] VcsModifiedToken = Encoding.UTF8.GetBytes("vcs.modified=");
private static readonly byte[] VcsTimeToken = Encoding.UTF8.GetBytes("vcs.time=");
public static bool TryRead(string path, out GoDwarfMetadata? metadata)
{
metadata = null;
FileInfo fileInfo;
try
{
fileInfo = new FileInfo(path);
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
if (!fileInfo.Exists || fileInfo.Length == 0 || fileInfo.Length > 256 * 1024 * 1024)
{
return false;
}
var length = fileInfo.Length;
var readLength = (int)Math.Min(length, int.MaxValue);
var buffer = ArrayPool<byte>.Shared.Rent(readLength);
var bytesRead = 0;
try
{
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
bytesRead = stream.Read(buffer, 0, readLength);
if (bytesRead <= 0)
{
return false;
}
var data = new ReadOnlySpan<byte>(buffer, 0, bytesRead);
var revision = ExtractValue(data, VcsRevisionToken);
var modifiedText = ExtractValue(data, VcsModifiedToken);
var timestamp = ExtractValue(data, VcsTimeToken);
var system = ExtractValue(data, VcsSystemToken);
bool? modified = null;
if (!string.IsNullOrWhiteSpace(modifiedText))
{
if (bool.TryParse(modifiedText, out var parsed))
{
modified = parsed;
}
}
if (string.IsNullOrWhiteSpace(revision) && string.IsNullOrWhiteSpace(system) && modified is null && string.IsNullOrWhiteSpace(timestamp))
{
return false;
}
metadata = new GoDwarfMetadata(system, revision, modified, timestamp);
return true;
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
finally
{
Array.Clear(buffer, 0, bytesRead);
ArrayPool<byte>.Shared.Return(buffer);
}
}
private static string? ExtractValue(ReadOnlySpan<byte> data, ReadOnlySpan<byte> token)
{
var index = data.IndexOf(token);
if (index < 0)
{
return null;
}
var start = index + token.Length;
var end = start;
while (end < data.Length)
{
var current = data[end];
if (current == 0 || current == (byte)'\n' || current == (byte)'\r')
{
break;
}
end++;
}
if (end <= start)
{
return null;
}
return Encoding.UTF8.GetString(data.Slice(start, end - start));
}
}

View File

@@ -0,0 +1,67 @@
using System;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal sealed class GoModule
{
public GoModule(string path, string? version, string? sum, bool isMain)
{
Path = path ?? throw new ArgumentNullException(nameof(path));
Version = Normalize(version);
Sum = Normalize(sum);
IsMain = isMain;
}
public string Path { get; }
public string? Version { get; }
public string? Sum { get; }
public GoModuleReplacement? Replacement { get; private set; }
public bool IsMain { get; }
public void SetReplacement(GoModuleReplacement replacement)
{
Replacement = replacement ?? throw new ArgumentNullException(nameof(replacement));
}
private static string? Normalize(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var trimmed = value.Trim();
return trimmed.Length == 0 ? null : trimmed;
}
}
internal sealed class GoModuleReplacement
{
public GoModuleReplacement(string path, string? version, string? sum)
{
Path = path ?? throw new ArgumentNullException(nameof(path));
Version = Normalize(version);
Sum = Normalize(sum);
}
public string Path { get; }
public string? Version { get; }
public string? Sum { get; }
private static string? Normalize(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var trimmed = value.Trim();
return trimmed.Length == 0 ? null : trimmed;
}
}

View File

@@ -0,0 +1,13 @@
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
internal readonly record struct GoStrippedBinaryClassification(
string AbsolutePath,
GoStrippedBinaryIndicator Indicator,
string? GoVersionHint);
internal enum GoStrippedBinaryIndicator
{
None = 0,
BuildId,
GoRuntimeMarkers,
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
<None Include="**\\*" Exclude="**\\*.cs;**\\*.json;bin\\**;obj\\**" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Analyzers.Lang\StellaOps.Scanner.Analyzers.Lang.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,12 @@
# Go Analyzer Task Flow
| Seq | ID | Status | Depends on | Description | Exit Criteria |
|-----|----|--------|------------|-------------|---------------|
| 1 | SCANNER-ANALYZERS-LANG-10-304A | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-307 | Parse Go build info blob (`runtime/debug` format) and `.note.go.buildid`; map to module/version and evidence. | Build info extracted across Go 1.181.23 fixtures; evidence includes VCS, module path, and build settings. |
| 2 | SCANNER-ANALYZERS-LANG-10-304B | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-304A | Implement DWARF-lite reader for VCS metadata + dirty flag; add cache to avoid re-reading identical binaries. | DWARF reader supplies commit hash for ≥95% fixtures; cache reduces duplicated IO by ≥70%. |
| 3 | SCANNER-ANALYZERS-LANG-10-304C | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-304B | Fallback heuristics for stripped binaries with deterministic `bin:{sha256}` labeling and quiet provenance. | Heuristic labels clearly separated; tests ensure no false “observed” provenance; documentation updated. |
| 4 | SCANNER-ANALYZERS-LANG-10-307G | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-304C | Wire shared helpers (license mapping, usage flags) and ensure concurrency-safe buffer reuse. | Analyzer reuses shared infrastructure; concurrency tests with parallel scans pass; no data races. |
| 5 | SCANNER-ANALYZERS-LANG-10-308G | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-307G | Determinism fixtures + benchmark harness (Vs competitor). | Fixtures under `Fixtures/lang/go/`; CI determinism check; benchmark runs showing ≥20% speed advantage. |
| 6 | SCANNER-ANALYZERS-LANG-10-309G | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-308G | Package plug-in manifest + Offline Kit notes; ensure Worker DI registration. | Manifest copied; Worker loads analyzer; Offline Kit docs updated with Go analyzer presence. |
| 7 | SCANNER-ANALYZERS-LANG-10-304D | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-304C | Emit telemetry counters for stripped-binary heuristics and document metrics wiring. | New `scanner_analyzer_golang_heuristic_total` counter recorded; docs updated with offline aggregation notes. |
| 8 | SCANNER-ANALYZERS-LANG-10-304E | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-304D | Plumb Go heuristic counter into Scanner metrics pipeline and alerting. | Counter emitted through Worker telemetry/export pipeline; dashboard & alert rule documented; smoke test proves metric visibility. |

View File

@@ -0,0 +1,23 @@
{
"schemaVersion": "1.0",
"id": "stellaops.analyzer.lang.go",
"displayName": "StellaOps Go Analyzer (preview)",
"version": "0.1.0",
"requiresRestart": true,
"entryPoint": {
"type": "dotnet",
"assembly": "StellaOps.Scanner.Analyzers.Lang.Go.dll",
"typeName": "StellaOps.Scanner.Analyzers.Lang.Go.GoAnalyzerPlugin"
},
"capabilities": [
"language-analyzer",
"golang",
"go"
],
"metadata": {
"org.stellaops.analyzer.language": "go",
"org.stellaops.analyzer.kind": "language",
"org.stellaops.restart.required": "true",
"org.stellaops.analyzer.status": "preview"
}
}

View File

@@ -0,0 +1,10 @@
global using System;
global using System.Collections.Generic;
global using System.IO;
global using System.IO.Compression;
global using System.Security.Cryptography;
global using System.Text;
global using System.Threading;
global using System.Threading.Tasks;
global using StellaOps.Scanner.Analyzers.Lang;

View File

@@ -0,0 +1,62 @@
using System.IO.Compression;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.ClassPath;
internal enum JavaClassLocationKind
{
ArchiveEntry,
EmbeddedArchiveEntry,
}
internal sealed record JavaClassLocation(
JavaClassLocationKind Kind,
JavaArchive Archive,
JavaArchiveEntry Entry,
string? NestedClassPath)
{
public static JavaClassLocation ForArchive(JavaArchive archive, JavaArchiveEntry entry)
=> new(JavaClassLocationKind.ArchiveEntry, archive, entry, NestedClassPath: null);
public static JavaClassLocation ForEmbedded(JavaArchive archive, JavaArchiveEntry entry, string nestedClassPath)
=> new(JavaClassLocationKind.EmbeddedArchiveEntry, archive, entry, nestedClassPath);
public Stream OpenClassStream(CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
return Kind switch
{
JavaClassLocationKind.ArchiveEntry => Archive.OpenEntry(Entry),
JavaClassLocationKind.EmbeddedArchiveEntry => OpenEmbeddedEntryStream(cancellationToken),
_ => throw new InvalidOperationException($"Unsupported class location kind '{Kind}'."),
};
}
private Stream OpenEmbeddedEntryStream(CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
using var embeddedStream = Archive.OpenEntry(Entry);
using var buffer = new MemoryStream();
embeddedStream.CopyTo(buffer);
buffer.Position = 0;
using var nestedArchive = new ZipArchive(buffer, ZipArchiveMode.Read, leaveOpen: true);
if (NestedClassPath is null)
{
throw new InvalidOperationException($"Nested class path not specified for embedded entry '{Entry.OriginalPath}'.");
}
var classEntry = nestedArchive.GetEntry(NestedClassPath);
if (classEntry is null)
{
throw new FileNotFoundException($"Class '{NestedClassPath}' not found inside embedded archive entry '{Entry.OriginalPath}'.");
}
using var classStream = classEntry.Open();
var output = new MemoryStream();
classStream.CopyTo(output);
output.Position = 0;
return output;
}
}

View File

@@ -0,0 +1,102 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.ClassPath;
internal sealed class JavaClassPathAnalysis
{
public JavaClassPathAnalysis(
IEnumerable<JavaClassPathSegment> segments,
IEnumerable<JavaModuleDescriptor> modules,
IEnumerable<JavaClassDuplicate> duplicateClasses,
IEnumerable<JavaSplitPackage> splitPackages)
{
Segments = segments
.Where(static segment => segment is not null)
.OrderBy(static segment => segment.Order)
.ThenBy(static segment => segment.Identifier, StringComparer.Ordinal)
.ToImmutableArray();
Modules = modules
.Where(static module => module is not null)
.OrderBy(static module => module.Name, StringComparer.Ordinal)
.ThenBy(static module => module.Source, StringComparer.Ordinal)
.ToImmutableArray();
DuplicateClasses = duplicateClasses
.Where(static duplicate => duplicate is not null)
.OrderBy(static duplicate => duplicate.ClassName, StringComparer.Ordinal)
.ToImmutableArray();
SplitPackages = splitPackages
.Where(static split => split is not null)
.OrderBy(static split => split.PackageName, StringComparer.Ordinal)
.ToImmutableArray();
}
public ImmutableArray<JavaClassPathSegment> Segments { get; }
public ImmutableArray<JavaModuleDescriptor> Modules { get; }
public ImmutableArray<JavaClassDuplicate> DuplicateClasses { get; }
public ImmutableArray<JavaSplitPackage> SplitPackages { get; }
}
internal sealed class JavaClassPathSegment
{
public JavaClassPathSegment(
string identifier,
string displayPath,
JavaClassPathSegmentKind kind,
JavaPackagingKind packaging,
int order,
JavaModuleDescriptor? module,
ImmutableSortedSet<string> classes,
ImmutableDictionary<string, JavaPackageFingerprint> packages,
ImmutableDictionary<string, ImmutableArray<string>> serviceDefinitions,
ImmutableDictionary<string, JavaClassLocation> classLocations)
{
Identifier = identifier ?? throw new ArgumentNullException(nameof(identifier));
DisplayPath = displayPath ?? throw new ArgumentNullException(nameof(displayPath));
Kind = kind;
Packaging = packaging;
Order = order;
Module = module;
Classes = classes;
Packages = packages ?? ImmutableDictionary<string, JavaPackageFingerprint>.Empty;
ServiceDefinitions = serviceDefinitions ?? ImmutableDictionary<string, ImmutableArray<string>>.Empty;
ClassLocations = classLocations ?? ImmutableDictionary<string, JavaClassLocation>.Empty;
}
public string Identifier { get; }
public string DisplayPath { get; }
public JavaClassPathSegmentKind Kind { get; }
public JavaPackagingKind Packaging { get; }
public int Order { get; }
public JavaModuleDescriptor? Module { get; }
public ImmutableSortedSet<string> Classes { get; }
public ImmutableDictionary<string, JavaPackageFingerprint> Packages { get; }
public ImmutableDictionary<string, ImmutableArray<string>> ServiceDefinitions { get; }
public ImmutableDictionary<string, JavaClassLocation> ClassLocations { get; }
}
internal enum JavaClassPathSegmentKind
{
Archive,
Directory,
}
internal sealed record JavaPackageFingerprint(string PackageName, int ClassCount, string Fingerprint);
internal sealed record JavaClassDuplicate(string ClassName, ImmutableArray<string> SegmentIdentifiers);
internal sealed record JavaSplitPackage(string PackageName, ImmutableArray<string> SegmentIdentifiers);

View File

@@ -0,0 +1,660 @@
using System.Collections.Immutable;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.ClassPath;
internal static class JavaClassPathBuilder
{
private const string ClassFileSuffix = ".class";
public static JavaClassPathAnalysis Build(JavaWorkspace workspace, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(workspace);
var segments = new List<JavaClassPathSegment>();
var modules = new List<JavaModuleDescriptor>();
var duplicateMap = new Dictionary<string, List<string>>(StringComparer.Ordinal);
var packageMap = new Dictionary<string, HashSet<string>>(StringComparer.Ordinal);
var order = 0;
foreach (var archive in workspace.Archives)
{
cancellationToken.ThrowIfCancellationRequested();
ProcessArchive(archive, segments, modules, duplicateMap, packageMap, ref order, cancellationToken);
}
var duplicateClasses = duplicateMap
.Where(static pair => pair.Value.Count > 1)
.Select(pair => new JavaClassDuplicate(
pair.Key,
pair.Value
.Distinct(StringComparer.Ordinal)
.OrderBy(static identifier => identifier, StringComparer.Ordinal)
.ToImmutableArray()))
.ToImmutableArray();
var splitPackages = packageMap
.Where(static pair => pair.Value.Count > 1)
.Select(pair => new JavaSplitPackage(
pair.Key,
pair.Value
.OrderBy(static identifier => identifier, StringComparer.Ordinal)
.ToImmutableArray()))
.ToImmutableArray();
return new JavaClassPathAnalysis(segments, modules, duplicateClasses, splitPackages);
}
private static void ProcessArchive(
JavaArchive archive,
List<JavaClassPathSegment> segments,
List<JavaModuleDescriptor> modules,
Dictionary<string, List<string>> duplicateMap,
Dictionary<string, HashSet<string>> packageMap,
ref int order,
CancellationToken cancellationToken)
{
var identifier = NormalizeArchivePath(archive.RelativePath);
var baseClasses = new List<JavaClassRecord>();
var baseServices = new Dictionary<string, List<string>>(StringComparer.Ordinal);
var bootClasses = new List<JavaClassRecord>();
var bootServices = new Dictionary<string, List<string>>(StringComparer.Ordinal);
var webClasses = new List<JavaClassRecord>();
var webServices = new Dictionary<string, List<string>>(StringComparer.Ordinal);
var embeddedEntries = new List<JavaArchiveEntry>();
JavaModuleDescriptor? moduleDescriptor = ParseModuleDescriptor(archive, identifier, cancellationToken);
foreach (var entry in archive.Entries)
{
cancellationToken.ThrowIfCancellationRequested();
var path = entry.EffectivePath;
if (path.Length == 0)
{
continue;
}
if (string.Equals(path, "module-info.class", StringComparison.OrdinalIgnoreCase))
{
continue;
}
if (path.StartsWith("BOOT-INF/classes/", StringComparison.OrdinalIgnoreCase))
{
var relative = path["BOOT-INF/classes/".Length..];
if (TryHandleServiceDescriptor(archive, entry, relative, bootServices, cancellationToken))
{
continue;
}
if (relative.EndsWith(ClassFileSuffix, StringComparison.OrdinalIgnoreCase))
{
AddClassRecord(bootClasses, archive, entry, relative);
}
continue;
}
if (path.StartsWith("WEB-INF/classes/", StringComparison.OrdinalIgnoreCase))
{
var relative = path["WEB-INF/classes/".Length..];
if (TryHandleServiceDescriptor(archive, entry, relative, webServices, cancellationToken))
{
continue;
}
if (relative.EndsWith(ClassFileSuffix, StringComparison.OrdinalIgnoreCase))
{
AddClassRecord(webClasses, archive, entry, relative);
}
continue;
}
if (TryHandleServiceDescriptor(archive, entry, path, baseServices, cancellationToken))
{
continue;
}
if (path.EndsWith(ClassFileSuffix, StringComparison.OrdinalIgnoreCase))
{
if (archive.Packaging == JavaPackagingKind.JMod && path.StartsWith("classes/", StringComparison.OrdinalIgnoreCase))
{
var relative = path["classes/".Length..];
AddClassRecord(baseClasses, archive, entry, relative);
}
else
{
AddClassRecord(baseClasses, archive, entry, path);
}
}
else if (path.EndsWith(".jar", StringComparison.OrdinalIgnoreCase) && IsEmbeddedLibrary(path))
{
embeddedEntries.Add(entry);
}
}
// Base archive segment.
if (baseClasses.Count > 0 || moduleDescriptor is not null)
{
AddSegment(
segments,
modules,
duplicateMap,
packageMap,
ref order,
identifier,
identifier,
JavaClassPathSegmentKind.Archive,
archive.Packaging,
moduleDescriptor,
baseClasses,
ToImmutableServiceMap(baseServices));
}
if (bootClasses.Count > 0)
{
var bootIdentifier = string.Concat(identifier, "!BOOT-INF/classes/");
AddSegment(
segments,
modules,
duplicateMap,
packageMap,
ref order,
bootIdentifier,
bootIdentifier,
JavaClassPathSegmentKind.Directory,
JavaPackagingKind.Unknown,
module: null,
bootClasses,
ToImmutableServiceMap(bootServices));
}
if (webClasses.Count > 0)
{
var webIdentifier = string.Concat(identifier, "!WEB-INF/classes/");
AddSegment(
segments,
modules,
duplicateMap,
packageMap,
ref order,
webIdentifier,
webIdentifier,
JavaClassPathSegmentKind.Directory,
JavaPackagingKind.Unknown,
module: null,
webClasses,
ToImmutableServiceMap(webServices));
}
foreach (var embedded in embeddedEntries.OrderBy(static entry => entry.EffectivePath, StringComparer.Ordinal))
{
cancellationToken.ThrowIfCancellationRequested();
var childIdentifier = string.Concat(identifier, "!", embedded.EffectivePath.Replace('\\', '/'));
var analysis = AnalyzeEmbeddedArchive(archive, embedded, childIdentifier, cancellationToken);
if (analysis is null)
{
continue;
}
AddSegment(
segments,
modules,
duplicateMap,
packageMap,
ref order,
childIdentifier,
childIdentifier,
JavaClassPathSegmentKind.Archive,
JavaPackagingKind.Jar,
analysis.Module,
analysis.Classes,
analysis.Services);
}
}
private static JavaModuleDescriptor? ParseModuleDescriptor(JavaArchive archive, string identifier, CancellationToken cancellationToken)
{
if (!archive.TryGetEntry("module-info.class", out var entry))
{
return null;
}
using var stream = archive.OpenEntry(entry);
return JavaModuleInfoParser.TryParse(stream, identifier, cancellationToken);
}
private static void AddSegment(
List<JavaClassPathSegment> segments,
List<JavaModuleDescriptor> modules,
Dictionary<string, List<string>> duplicateMap,
Dictionary<string, HashSet<string>> packageMap,
ref int order,
string identifier,
string displayPath,
JavaClassPathSegmentKind kind,
JavaPackagingKind packaging,
JavaModuleDescriptor? module,
IReadOnlyCollection<JavaClassRecord> classes,
ImmutableDictionary<string, ImmutableArray<string>> serviceDefinitions)
{
if ((classes is null || classes.Count == 0) && module is null && (serviceDefinitions is null || serviceDefinitions.Count == 0))
{
return;
}
var normalizedClasses = classes ?? Array.Empty<JavaClassRecord>();
var classSet = normalizedClasses.Count == 0
? ImmutableSortedSet<string>.Empty
: ImmutableSortedSet.CreateRange(StringComparer.Ordinal, normalizedClasses.Select(static record => record.ClassName));
var packageFingerprints = BuildPackageFingerprints(normalizedClasses);
var classLocations = normalizedClasses.Count == 0
? ImmutableDictionary<string, JavaClassLocation>.Empty
: normalizedClasses.ToImmutableDictionary(
static record => record.ClassName,
static record => record.Location,
StringComparer.Ordinal);
var segment = new JavaClassPathSegment(
identifier,
displayPath,
kind,
packaging,
order++,
module,
classSet,
packageFingerprints,
serviceDefinitions ?? ImmutableDictionary<string, ImmutableArray<string>>.Empty,
classLocations);
segments.Add(segment);
if (module is not null)
{
modules.Add(module);
}
foreach (var className in classSet)
{
if (!duplicateMap.TryGetValue(className, out var locations))
{
locations = new List<string>();
duplicateMap[className] = locations;
}
locations.Add(identifier);
}
foreach (var fingerprint in packageFingerprints.Values)
{
if (!packageMap.TryGetValue(fingerprint.PackageName, out var segmentsSet))
{
segmentsSet = new HashSet<string>(StringComparer.Ordinal);
packageMap[fingerprint.PackageName] = segmentsSet;
}
segmentsSet.Add(identifier);
}
}
private static bool TryHandleServiceDescriptor(
JavaArchive archive,
JavaArchiveEntry entry,
string relativePath,
Dictionary<string, List<string>> target,
CancellationToken cancellationToken)
{
if (!TryGetServiceId(relativePath, out var serviceId))
{
return false;
}
try
{
var providers = ReadServiceProviders(() => archive.OpenEntry(entry), cancellationToken);
if (providers.Count == 0)
{
return true;
}
if (!target.TryGetValue(serviceId, out var list))
{
list = new List<string>();
target[serviceId] = list;
}
list.AddRange(providers);
}
catch (IOException)
{
// Ignore malformed service descriptor.
}
catch (InvalidDataException)
{
// Ignore malformed service descriptor.
}
return true;
}
private static bool TryGetServiceId(string relativePath, out string serviceId)
{
const string Prefix = "META-INF/services/";
if (!relativePath.StartsWith(Prefix, StringComparison.OrdinalIgnoreCase))
{
serviceId = string.Empty;
return false;
}
var candidate = relativePath[Prefix.Length..].Trim();
if (candidate.Length == 0)
{
serviceId = string.Empty;
return false;
}
serviceId = candidate;
return true;
}
private static List<string> ReadServiceProviders(Func<Stream> streamFactory, CancellationToken cancellationToken)
{
using var stream = streamFactory();
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: false);
var providers = new List<string>();
while (reader.ReadLine() is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
var commentIndex = line.IndexOf('#');
if (commentIndex >= 0)
{
line = line[..commentIndex];
}
line = line.Trim();
if (line.Length == 0)
{
continue;
}
providers.Add(line);
}
return providers;
}
private static ImmutableDictionary<string, ImmutableArray<string>> ToImmutableServiceMap(Dictionary<string, List<string>> source)
{
if (source.Count == 0)
{
return ImmutableDictionary<string, ImmutableArray<string>>.Empty;
}
var builder = ImmutableDictionary.CreateBuilder<string, ImmutableArray<string>>(StringComparer.Ordinal);
foreach (var pair in source.OrderBy(static entry => entry.Key, StringComparer.Ordinal))
{
var cleaned = new List<string>(pair.Value.Count);
foreach (var value in pair.Value)
{
var trimmed = value?.Trim();
if (string.IsNullOrEmpty(trimmed))
{
continue;
}
cleaned.Add(trimmed);
}
if (cleaned.Count == 0)
{
continue;
}
builder[pair.Key] = ImmutableArray.CreateRange(cleaned);
}
return builder.ToImmutable();
}
private static ImmutableDictionary<string, JavaPackageFingerprint> BuildPackageFingerprints(IReadOnlyCollection<JavaClassRecord> classes)
{
if (classes is null || classes.Count == 0)
{
return ImmutableDictionary<string, JavaPackageFingerprint>.Empty;
}
var packages = classes
.GroupBy(static record => record.PackageName, StringComparer.Ordinal)
.OrderBy(static group => group.Key, StringComparer.Ordinal);
var builder = ImmutableDictionary.CreateBuilder<string, JavaPackageFingerprint>(StringComparer.Ordinal);
foreach (var group in packages)
{
var simpleNames = group
.Select(static record => record.SimpleName)
.OrderBy(static name => name, StringComparer.Ordinal)
.ToArray();
var fingerprint = ComputeFingerprint(simpleNames);
builder[group.Key] = new JavaPackageFingerprint(group.Key, simpleNames.Length, fingerprint);
}
return builder.ToImmutable();
}
private static string ComputeFingerprint(IEnumerable<string> values)
{
using var sha = SHA256.Create();
var buffer = string.Join('\n', values);
var bytes = Encoding.UTF8.GetBytes(buffer);
return Convert.ToHexString(sha.ComputeHash(bytes)).ToLowerInvariant();
}
private static EmbeddedArchiveAnalysis? AnalyzeEmbeddedArchive(JavaArchive parentArchive, JavaArchiveEntry entry, string identifier, CancellationToken cancellationToken)
{
using var sourceStream = parentArchive.OpenEntry(entry);
using var buffer = new MemoryStream();
sourceStream.CopyTo(buffer);
buffer.Position = 0;
using var zip = new ZipArchive(buffer, ZipArchiveMode.Read, leaveOpen: true);
var candidates = new Dictionary<string, EmbeddedClassCandidate>(StringComparer.Ordinal);
var services = new Dictionary<string, EmbeddedServiceCandidate>(StringComparer.Ordinal);
JavaModuleDescriptor? moduleDescriptor = null;
foreach (var zipEntry in zip.Entries)
{
cancellationToken.ThrowIfCancellationRequested();
var normalized = JavaZipEntryUtilities.NormalizeEntryName(zipEntry.FullName);
if (normalized.Length == 0)
{
continue;
}
if (string.Equals(normalized, "module-info.class", StringComparison.OrdinalIgnoreCase))
{
using var moduleStream = zipEntry.Open();
moduleDescriptor = JavaModuleInfoParser.TryParse(moduleStream, identifier, cancellationToken);
continue;
}
var effectivePath = normalized;
var version = 0;
if (JavaZipEntryUtilities.TryParseMultiReleasePath(normalized, out var candidatePath, out var candidateVersion))
{
effectivePath = candidatePath;
version = candidateVersion;
}
if (string.Equals(effectivePath, "module-info.class", StringComparison.OrdinalIgnoreCase))
{
using var moduleStream = zipEntry.Open();
moduleDescriptor = JavaModuleInfoParser.TryParse(moduleStream, identifier, cancellationToken);
continue;
}
if (TryGetServiceId(effectivePath, out var serviceId))
{
try
{
var providers = ReadServiceProviders(() => zipEntry.Open(), cancellationToken);
if (providers.Count == 0)
{
continue;
}
var providerArray = ImmutableArray.CreateRange(providers);
if (!services.TryGetValue(serviceId, out var existingService)
|| version > existingService.Version
|| (version == existingService.Version && string.CompareOrdinal(zipEntry.FullName, existingService.OriginalPath) < 0))
{
services[serviceId] = new EmbeddedServiceCandidate(zipEntry.FullName, version, providerArray);
}
}
catch (IOException)
{
// Ignore malformed descriptor.
}
catch (InvalidDataException)
{
// Ignore malformed descriptor.
}
continue;
}
if (!effectivePath.EndsWith(ClassFileSuffix, StringComparison.OrdinalIgnoreCase))
{
continue;
}
if (!candidates.TryGetValue(effectivePath, out var existing) || version > existing.Version || (version == existing.Version && string.CompareOrdinal(zipEntry.FullName, existing.OriginalPath) < 0))
{
candidates[effectivePath] = new EmbeddedClassCandidate(zipEntry.FullName, version);
}
}
var classes = new List<JavaClassRecord>(candidates.Count);
foreach (var pair in candidates)
{
AddClassRecord(classes, parentArchive, entry, pair.Key, pair.Value.OriginalPath);
}
var serviceMap = services.Count == 0
? ImmutableDictionary<string, ImmutableArray<string>>.Empty
: services
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToImmutableDictionary(
static pair => pair.Key,
static pair => pair.Value.Providers,
StringComparer.Ordinal);
if (classes.Count == 0 && moduleDescriptor is null && serviceMap.Count == 0)
{
return null;
}
return new EmbeddedArchiveAnalysis(classes, moduleDescriptor, serviceMap);
}
private static bool IsEmbeddedLibrary(string path)
{
if (path.StartsWith("BOOT-INF/lib/", StringComparison.OrdinalIgnoreCase))
{
return true;
}
if (path.StartsWith("WEB-INF/lib/", StringComparison.OrdinalIgnoreCase))
{
return true;
}
if (path.StartsWith("lib/", StringComparison.OrdinalIgnoreCase) || path.StartsWith("APP-INF/lib/", StringComparison.OrdinalIgnoreCase))
{
return true;
}
return false;
}
private static void AddClassRecord(
ICollection<JavaClassRecord> target,
JavaArchive archive,
JavaArchiveEntry entry,
string path,
string? nestedClassPath = null)
{
if (string.IsNullOrEmpty(path) || !path.EndsWith(ClassFileSuffix, StringComparison.OrdinalIgnoreCase))
{
return;
}
var withoutExtension = path[..^ClassFileSuffix.Length];
if (withoutExtension.Length == 0)
{
return;
}
var className = withoutExtension.Replace('/', '.');
var lastDot = className.LastIndexOf('.');
string packageName;
string simpleName;
if (lastDot >= 0)
{
packageName = className[..lastDot];
simpleName = className[(lastDot + 1)..];
}
else
{
packageName = string.Empty;
simpleName = className;
}
var location = nestedClassPath is null
? JavaClassLocation.ForArchive(archive, entry)
: JavaClassLocation.ForEmbedded(archive, entry, nestedClassPath);
target.Add(new JavaClassRecord(className, packageName, simpleName, location));
}
private static string NormalizeArchivePath(string relativePath)
{
if (string.IsNullOrEmpty(relativePath))
{
return ".";
}
var normalized = relativePath.Replace('\\', '/');
return normalized.Length == 0 ? "." : normalized;
}
private readonly record struct JavaClassRecord(
string ClassName,
string PackageName,
string SimpleName,
JavaClassLocation Location);
private sealed record EmbeddedArchiveAnalysis(
IReadOnlyCollection<JavaClassRecord> Classes,
JavaModuleDescriptor? Module,
ImmutableDictionary<string, ImmutableArray<string>> Services);
private readonly record struct EmbeddedClassCandidate(string OriginalPath, int Version);
private readonly record struct EmbeddedServiceCandidate(string OriginalPath, int Version, ImmutableArray<string> Providers);
}

View File

@@ -0,0 +1,22 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.ClassPath;
internal sealed record JavaModuleDescriptor(
string Name,
string? Version,
ushort Flags,
ImmutableArray<JavaModuleRequires> Requires,
ImmutableArray<JavaModuleExports> Exports,
ImmutableArray<JavaModuleOpens> Opens,
ImmutableArray<string> Uses,
ImmutableArray<JavaModuleProvides> Provides,
string Source);
internal sealed record JavaModuleRequires(string Name, ushort Flags, string? Version);
internal sealed record JavaModuleExports(string Package, ushort Flags, ImmutableArray<string> Targets);
internal sealed record JavaModuleOpens(string Package, ushort Flags, ImmutableArray<string> Targets);
internal sealed record JavaModuleProvides(string Service, ImmutableArray<string> Implementations);

View File

@@ -0,0 +1,367 @@
using System.Buffers.Binary;
using System.Collections.Immutable;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.ClassPath;
internal static class JavaModuleInfoParser
{
public static JavaModuleDescriptor? TryParse(Stream stream, string sourceIdentifier, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(stream);
using var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true);
if (!TryReadMagic(reader, out var magic) || magic != 0xCAFEBABE)
{
return null;
}
// Skip minor/major version.
_ = ReadUInt16(reader);
_ = ReadUInt16(reader);
var constantPool = ReadConstantPool(reader);
cancellationToken.ThrowIfCancellationRequested();
// access_flags, this_class, super_class
_ = ReadUInt16(reader);
_ = ReadUInt16(reader);
_ = ReadUInt16(reader);
// interfaces
var interfacesCount = ReadUInt16(reader);
SkipBytes(reader, interfacesCount * 2);
// fields
var fieldsCount = ReadUInt16(reader);
SkipMembers(reader, fieldsCount);
// methods
var methodsCount = ReadUInt16(reader);
SkipMembers(reader, methodsCount);
var attributesCount = ReadUInt16(reader);
JavaModuleDescriptor? descriptor = null;
for (var i = 0; i < attributesCount; i++)
{
cancellationToken.ThrowIfCancellationRequested();
var nameIndex = ReadUInt16(reader);
var length = ReadUInt32(reader);
var attributeName = GetUtf8(constantPool, nameIndex);
if (string.Equals(attributeName, "Module", StringComparison.Ordinal))
{
descriptor = ParseModuleAttribute(reader, constantPool, sourceIdentifier);
}
else
{
SkipBytes(reader, (int)length);
}
}
return descriptor;
}
private static JavaModuleDescriptor ParseModuleAttribute(BinaryReader reader, ConstantPoolEntry[] constantPool, string sourceIdentifier)
{
var moduleNameIndex = ReadUInt16(reader);
var moduleFlags = ReadUInt16(reader);
var moduleVersionIndex = ReadUInt16(reader);
var moduleName = GetModuleName(constantPool, moduleNameIndex);
var moduleVersion = moduleVersionIndex != 0 ? GetUtf8(constantPool, moduleVersionIndex) : null;
var requiresCount = ReadUInt16(reader);
var requiresBuilder = ImmutableArray.CreateBuilder<JavaModuleRequires>(requiresCount);
for (var i = 0; i < requiresCount; i++)
{
var requiresIndex = ReadUInt16(reader);
var requiresFlags = ReadUInt16(reader);
var requiresVersionIndex = ReadUInt16(reader);
var requiresName = GetModuleName(constantPool, requiresIndex);
var requiresVersion = requiresVersionIndex != 0 ? GetUtf8(constantPool, requiresVersionIndex) : null;
requiresBuilder.Add(new JavaModuleRequires(requiresName, requiresFlags, requiresVersion));
}
var exportsCount = ReadUInt16(reader);
var exportsBuilder = ImmutableArray.CreateBuilder<JavaModuleExports>(exportsCount);
for (var i = 0; i < exportsCount; i++)
{
var exportsIndex = ReadUInt16(reader);
var exportsFlags = ReadUInt16(reader);
var exportsToCount = ReadUInt16(reader);
var targetsBuilder = ImmutableArray.CreateBuilder<string>(exportsToCount);
for (var j = 0; j < exportsToCount; j++)
{
var targetIndex = ReadUInt16(reader);
targetsBuilder.Add(GetModuleName(constantPool, targetIndex));
}
var packageName = GetPackageName(constantPool, exportsIndex);
exportsBuilder.Add(new JavaModuleExports(packageName, exportsFlags, targetsBuilder.ToImmutable()));
}
var opensCount = ReadUInt16(reader);
var opensBuilder = ImmutableArray.CreateBuilder<JavaModuleOpens>(opensCount);
for (var i = 0; i < opensCount; i++)
{
var opensIndex = ReadUInt16(reader);
var opensFlags = ReadUInt16(reader);
var opensToCount = ReadUInt16(reader);
var targetsBuilder = ImmutableArray.CreateBuilder<string>(opensToCount);
for (var j = 0; j < opensToCount; j++)
{
var targetIndex = ReadUInt16(reader);
targetsBuilder.Add(GetModuleName(constantPool, targetIndex));
}
var packageName = GetPackageName(constantPool, opensIndex);
opensBuilder.Add(new JavaModuleOpens(packageName, opensFlags, targetsBuilder.ToImmutable()));
}
var usesCount = ReadUInt16(reader);
var usesBuilder = ImmutableArray.CreateBuilder<string>(usesCount);
for (var i = 0; i < usesCount; i++)
{
var classIndex = ReadUInt16(reader);
usesBuilder.Add(GetClassName(constantPool, classIndex));
}
var providesCount = ReadUInt16(reader);
var providesBuilder = ImmutableArray.CreateBuilder<JavaModuleProvides>(providesCount);
for (var i = 0; i < providesCount; i++)
{
var serviceIndex = ReadUInt16(reader);
var providesWithCount = ReadUInt16(reader);
var implementationsBuilder = ImmutableArray.CreateBuilder<string>(providesWithCount);
for (var j = 0; j < providesWithCount; j++)
{
var implIndex = ReadUInt16(reader);
implementationsBuilder.Add(GetClassName(constantPool, implIndex));
}
var serviceName = GetClassName(constantPool, serviceIndex);
providesBuilder.Add(new JavaModuleProvides(serviceName, implementationsBuilder.ToImmutable()));
}
return new JavaModuleDescriptor(
moduleName,
moduleVersion,
moduleFlags,
requiresBuilder.ToImmutable(),
exportsBuilder.ToImmutable(),
opensBuilder.ToImmutable(),
usesBuilder.ToImmutable(),
providesBuilder.ToImmutable(),
sourceIdentifier);
}
private static ConstantPoolEntry[] ReadConstantPool(BinaryReader reader)
{
var count = ReadUInt16(reader);
var pool = new ConstantPoolEntry[count];
var index = 1;
while (index < count)
{
var tag = reader.ReadByte();
switch (tag)
{
case 1: // Utf8
{
var length = ReadUInt16(reader);
var bytes = reader.ReadBytes(length);
var value = Encoding.UTF8.GetString(bytes);
pool[index] = new Utf8Entry(value);
break;
}
case 7: // Class
{
var nameIndex = ReadUInt16(reader);
pool[index] = new ClassEntry(nameIndex);
break;
}
case 8: // String
pool[index] = new SimpleEntry(tag);
reader.BaseStream.Seek(2, SeekOrigin.Current);
break;
case 3: // Integer
case 4: // Float
case 9: // Fieldref
case 10: // Methodref
case 11: // InterfaceMethodref
case 12: // NameAndType
case 17: // Dynamic
case 18: // InvokeDynamic
pool[index] = new SimpleEntry(tag);
reader.BaseStream.Seek(4, SeekOrigin.Current);
break;
case 5: // Long
case 6: // Double
pool[index] = new SimpleEntry(tag);
reader.BaseStream.Seek(8, SeekOrigin.Current);
index++;
break;
case 15: // MethodHandle
pool[index] = new SimpleEntry(tag);
reader.BaseStream.Seek(3, SeekOrigin.Current);
break;
case 16: // MethodType
pool[index] = new SimpleEntry(tag);
reader.BaseStream.Seek(2, SeekOrigin.Current);
break;
case 19: // Module
{
var nameIndex = ReadUInt16(reader);
pool[index] = new ModuleEntry(nameIndex);
break;
}
case 20: // Package
{
var nameIndex = ReadUInt16(reader);
pool[index] = new PackageEntry(nameIndex);
break;
}
default:
throw new InvalidDataException($"Unsupported constant pool tag {tag}.");
}
index++;
}
return pool;
}
private static string GetUtf8(ConstantPoolEntry[] pool, int index)
{
if (index <= 0 || index >= pool.Length)
{
return string.Empty;
}
if (pool[index] is Utf8Entry utf8)
{
return utf8.Value;
}
return string.Empty;
}
private static string GetModuleName(ConstantPoolEntry[] pool, int index)
{
if (index <= 0 || index >= pool.Length)
{
return string.Empty;
}
if (pool[index] is ModuleEntry module)
{
var utf8 = GetUtf8(pool, module.NameIndex);
return NormalizeBinaryName(utf8);
}
return string.Empty;
}
private static string GetPackageName(ConstantPoolEntry[] pool, int index)
{
if (index <= 0 || index >= pool.Length)
{
return string.Empty;
}
if (pool[index] is PackageEntry package)
{
var utf8 = GetUtf8(pool, package.NameIndex);
return NormalizeBinaryName(utf8);
}
return string.Empty;
}
private static string GetClassName(ConstantPoolEntry[] pool, int index)
{
if (index <= 0 || index >= pool.Length)
{
return string.Empty;
}
if (pool[index] is ClassEntry classEntry)
{
var utf8 = GetUtf8(pool, classEntry.NameIndex);
return NormalizeBinaryName(utf8);
}
return string.Empty;
}
private static string NormalizeBinaryName(string value)
=> string.IsNullOrEmpty(value) ? string.Empty : value.Replace('/', '.');
private static bool TryReadMagic(BinaryReader reader, out uint magic)
{
if (reader.BaseStream.Length - reader.BaseStream.Position < 4)
{
magic = 0;
return false;
}
magic = ReadUInt32(reader);
return true;
}
private static void SkipMembers(BinaryReader reader, int count)
{
for (var i = 0; i < count; i++)
{
// access_flags, name_index, descriptor_index
reader.BaseStream.Seek(6, SeekOrigin.Current);
var attributesCount = ReadUInt16(reader);
SkipAttributes(reader, attributesCount);
}
}
private static void SkipAttributes(BinaryReader reader, int count)
{
for (var i = 0; i < count; i++)
{
reader.BaseStream.Seek(2, SeekOrigin.Current); // name_index
var length = ReadUInt32(reader);
SkipBytes(reader, (int)length);
}
}
private static void SkipBytes(BinaryReader reader, int count)
{
if (count <= 0)
{
return;
}
reader.BaseStream.Seek(count, SeekOrigin.Current);
}
private static ushort ReadUInt16(BinaryReader reader)
=> BinaryPrimitives.ReadUInt16BigEndian(reader.ReadBytes(sizeof(ushort)));
private static uint ReadUInt32(BinaryReader reader)
=> BinaryPrimitives.ReadUInt32BigEndian(reader.ReadBytes(sizeof(uint)));
private abstract record ConstantPoolEntry(byte Tag);
private sealed record Utf8Entry(string Value) : ConstantPoolEntry(1);
private sealed record ClassEntry(ushort NameIndex) : ConstantPoolEntry(7);
private sealed record ModuleEntry(ushort NameIndex) : ConstantPoolEntry(19);
private sealed record PackageEntry(ushort NameIndex) : ConstantPoolEntry(20);
private sealed record SimpleEntry(byte Tag) : ConstantPoolEntry(Tag);
}

View File

@@ -0,0 +1,264 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal sealed class JavaArchive
{
private readonly ImmutableDictionary<string, JavaArchiveEntry> _entryMap;
private JavaArchive(
string absolutePath,
string relativePath,
JavaPackagingKind packaging,
ImmutableArray<string> layeredDirectories,
bool isMultiRelease,
bool hasModuleInfo,
ImmutableArray<JavaArchiveEntry> entries)
{
AbsolutePath = absolutePath ?? throw new ArgumentNullException(nameof(absolutePath));
RelativePath = relativePath ?? throw new ArgumentNullException(nameof(relativePath));
Packaging = packaging;
LayeredDirectories = layeredDirectories;
IsMultiRelease = isMultiRelease;
HasModuleInfo = hasModuleInfo;
Entries = entries;
_entryMap = entries.ToImmutableDictionary(static entry => entry.EffectivePath, static entry => entry, StringComparer.Ordinal);
}
public string AbsolutePath { get; }
public string RelativePath { get; }
public JavaPackagingKind Packaging { get; }
public ImmutableArray<string> LayeredDirectories { get; }
public bool IsMultiRelease { get; }
public bool HasModuleInfo { get; }
public ImmutableArray<JavaArchiveEntry> Entries { get; }
public static JavaArchive Load(string absolutePath, string relativePath)
{
ArgumentException.ThrowIfNullOrEmpty(absolutePath);
ArgumentException.ThrowIfNullOrEmpty(relativePath);
using var fileStream = new FileStream(absolutePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using var zip = new ZipArchive(fileStream, ZipArchiveMode.Read, leaveOpen: false);
var layeredDirectories = new HashSet<string>(StringComparer.Ordinal);
var candidates = new Dictionary<string, List<EntryCandidate>>(StringComparer.Ordinal);
var isMultiRelease = false;
var hasModuleInfo = false;
var hasBootInf = false;
var hasWebInf = false;
foreach (var entry in zip.Entries)
{
var normalized = JavaZipEntryUtilities.NormalizeEntryName(entry.FullName);
if (string.IsNullOrEmpty(normalized) || normalized.EndsWith('/'))
{
continue;
}
if (normalized.StartsWith("BOOT-INF/", StringComparison.OrdinalIgnoreCase))
{
hasBootInf = true;
layeredDirectories.Add("BOOT-INF");
}
if (normalized.StartsWith("WEB-INF/", StringComparison.OrdinalIgnoreCase))
{
hasWebInf = true;
layeredDirectories.Add("WEB-INF");
}
var version = 0;
var effectivePath = normalized;
if (JavaZipEntryUtilities.TryParseMultiReleasePath(normalized, out var candidatePath, out var candidateVersion))
{
effectivePath = candidatePath;
version = candidateVersion;
isMultiRelease = true;
}
if (string.IsNullOrEmpty(effectivePath))
{
continue;
}
if (string.Equals(effectivePath, "module-info.class", StringComparison.Ordinal))
{
hasModuleInfo = true;
}
var candidate = new EntryCandidate(
effectivePath,
entry.FullName,
version,
entry.Length,
entry.LastWriteTime.ToUniversalTime());
if (!candidates.TryGetValue(effectivePath, out var bucket))
{
bucket = new List<EntryCandidate>();
candidates[effectivePath] = bucket;
}
bucket.Add(candidate);
}
var entries = new List<JavaArchiveEntry>(candidates.Count);
foreach (var pair in candidates)
{
var selected = pair.Value
.OrderByDescending(static candidate => candidate.Version)
.ThenBy(static candidate => candidate.OriginalPath, StringComparer.Ordinal)
.First();
entries.Add(new JavaArchiveEntry(
pair.Key,
selected.OriginalPath,
selected.Version,
selected.Length,
selected.LastWriteTime));
}
entries.Sort(static (left, right) => StringComparer.Ordinal.Compare(left.EffectivePath, right.EffectivePath));
var packaging = DeterminePackaging(absolutePath, hasBootInf, hasWebInf);
return new JavaArchive(
absolutePath,
relativePath,
packaging,
layeredDirectories
.OrderBy(static directory => directory, StringComparer.Ordinal)
.ToImmutableArray(),
isMultiRelease,
hasModuleInfo,
entries.ToImmutableArray());
}
public bool TryGetEntry(string effectivePath, out JavaArchiveEntry entry)
{
ArgumentNullException.ThrowIfNull(effectivePath);
return _entryMap.TryGetValue(effectivePath, out entry!);
}
public Stream OpenEntry(JavaArchiveEntry entry)
{
ArgumentNullException.ThrowIfNull(entry);
var fileStream = new FileStream(AbsolutePath, FileMode.Open, FileAccess.Read, FileShare.Read);
ZipArchive? archive = null;
Stream? entryStream = null;
try
{
archive = new ZipArchive(fileStream, ZipArchiveMode.Read, leaveOpen: false);
var zipEntry = archive.GetEntry(entry.OriginalPath);
if (zipEntry is null)
{
throw new FileNotFoundException($"Entry '{entry.OriginalPath}' not found in archive '{AbsolutePath}'.");
}
entryStream = zipEntry.Open();
return new ZipEntryStream(fileStream, archive, entryStream);
}
catch
{
entryStream?.Dispose();
archive?.Dispose();
fileStream.Dispose();
throw;
}
}
private static JavaPackagingKind DeterminePackaging(string absolutePath, bool hasBootInf, bool hasWebInf)
{
var extension = Path.GetExtension(absolutePath);
return extension switch
{
".war" => JavaPackagingKind.War,
".ear" => JavaPackagingKind.Ear,
".jmod" => JavaPackagingKind.JMod,
".jimage" => JavaPackagingKind.JImage,
".jar" => hasBootInf ? JavaPackagingKind.SpringBootFatJar : JavaPackagingKind.Jar,
_ => JavaPackagingKind.Unknown,
};
}
private sealed record EntryCandidate(
string EffectivePath,
string OriginalPath,
int Version,
long Length,
DateTimeOffset LastWriteTime);
private sealed class ZipEntryStream : Stream
{
private readonly Stream _fileStream;
private readonly ZipArchive _archive;
private readonly Stream _entryStream;
public ZipEntryStream(Stream fileStream, ZipArchive archive, Stream entryStream)
{
_fileStream = fileStream;
_archive = archive;
_entryStream = entryStream;
}
public override bool CanRead => _entryStream.CanRead;
public override bool CanSeek => _entryStream.CanSeek;
public override bool CanWrite => _entryStream.CanWrite;
public override long Length => _entryStream.Length;
public override long Position
{
get => _entryStream.Position;
set => _entryStream.Position = value;
}
public override void Flush() => _entryStream.Flush();
public override int Read(byte[] buffer, int offset, int count)
=> _entryStream.Read(buffer, offset, count);
public override ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken = default)
=> _entryStream.ReadAsync(buffer, cancellationToken);
public override long Seek(long offset, SeekOrigin origin)
=> _entryStream.Seek(offset, origin);
public override void SetLength(long value)
=> _entryStream.SetLength(value);
public override void Write(byte[] buffer, int offset, int count)
=> _entryStream.Write(buffer, offset, count);
public override ValueTask DisposeAsync()
{
_entryStream.Dispose();
_archive.Dispose();
_fileStream.Dispose();
return ValueTask.CompletedTask;
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
_entryStream.Dispose();
_archive.Dispose();
_fileStream.Dispose();
}
base.Dispose(disposing);
}
}
}

View File

@@ -0,0 +1,8 @@
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal sealed record JavaArchiveEntry(
string EffectivePath,
string OriginalPath,
int Version,
long Length,
DateTimeOffset LastWriteTime);

View File

@@ -0,0 +1,12 @@
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal enum JavaPackagingKind
{
Jar,
SpringBootFatJar,
War,
Ear,
JMod,
JImage,
Unknown,
}

View File

@@ -0,0 +1,68 @@
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal static class JavaReleaseFileParser
{
public static JavaReleaseMetadata Parse(string filePath)
{
ArgumentException.ThrowIfNullOrEmpty(filePath);
using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true);
var map = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
string? line;
while ((line = reader.ReadLine()) is not null)
{
line = line.Trim();
if (line.Length == 0 || line.StartsWith('#'))
{
continue;
}
var separatorIndex = line.IndexOf('=');
if (separatorIndex <= 0)
{
continue;
}
var key = line[..separatorIndex].Trim();
if (key.Length == 0)
{
continue;
}
var value = line[(separatorIndex + 1)..].Trim();
map[key] = TrimQuotes(value);
}
map.TryGetValue("JAVA_VERSION", out var version);
if (string.IsNullOrWhiteSpace(version) && map.TryGetValue("JAVA_RUNTIME_VERSION", out var runtimeVersion))
{
version = runtimeVersion;
}
map.TryGetValue("IMPLEMENTOR", out var vendor);
if (string.IsNullOrWhiteSpace(vendor) && map.TryGetValue("IMPLEMENTOR_VERSION", out var implementorVersion))
{
vendor = implementorVersion;
}
return new JavaReleaseMetadata(
version?.Trim() ?? string.Empty,
vendor?.Trim() ?? string.Empty);
}
private static string TrimQuotes(string value)
{
if (value.Length >= 2 && value[0] == '"' && value[^1] == '"')
{
return value[1..^1];
}
return value;
}
public sealed record JavaReleaseMetadata(string Version, string Vendor);
}

View File

@@ -0,0 +1,7 @@
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal sealed record JavaRuntimeImage(
string AbsolutePath,
string RelativePath,
string JavaVersion,
string Vendor);

View File

@@ -0,0 +1,28 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal sealed class JavaWorkspace
{
public JavaWorkspace(
IEnumerable<JavaArchive> archives,
IEnumerable<JavaRuntimeImage> runtimeImages)
{
ArgumentNullException.ThrowIfNull(archives);
ArgumentNullException.ThrowIfNull(runtimeImages);
Archives = archives
.Where(static archive => archive is not null)
.OrderBy(static archive => archive.RelativePath, StringComparer.Ordinal)
.ToImmutableArray();
RuntimeImages = runtimeImages
.Where(static image => image is not null)
.OrderBy(static image => image.RelativePath, StringComparer.Ordinal)
.ToImmutableArray();
}
public ImmutableArray<JavaArchive> Archives { get; }
public ImmutableArray<JavaRuntimeImage> RuntimeImages { get; }
}

View File

@@ -0,0 +1,101 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal static class JavaWorkspaceNormalizer
{
private static readonly HashSet<string> SupportedExtensions = new(StringComparer.OrdinalIgnoreCase)
{
".jar",
".war",
".ear",
".jmod",
".jimage",
};
private static readonly EnumerationOptions EnumerationOptions = new()
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
ReturnSpecialDirectories = false,
};
public static JavaWorkspace Normalize(LanguageAnalyzerContext context, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
var archives = new List<JavaArchive>();
var runtimeImages = new List<JavaRuntimeImage>();
foreach (var filePath in Directory.EnumerateFiles(context.RootPath, "*", EnumerationOptions))
{
cancellationToken.ThrowIfCancellationRequested();
if (!SupportedExtensions.Contains(Path.GetExtension(filePath)))
{
continue;
}
try
{
var relative = context.GetRelativePath(filePath);
var archive = JavaArchive.Load(filePath, relative);
archives.Add(archive);
}
catch (IOException)
{
// Corrupt archives should not abort the scan.
}
catch (InvalidDataException)
{
// Skip non-zip payloads despite the extension.
}
}
foreach (var directory in Directory.EnumerateDirectories(context.RootPath, "*", EnumerationOptions))
{
cancellationToken.ThrowIfCancellationRequested();
try
{
if (!LooksLikeRuntimeImage(directory))
{
continue;
}
var releasePath = Path.Combine(directory, "release");
if (!File.Exists(releasePath))
{
continue;
}
var metadata = JavaReleaseFileParser.Parse(releasePath);
runtimeImages.Add(new JavaRuntimeImage(
AbsolutePath: directory,
RelativePath: context.GetRelativePath(directory),
JavaVersion: metadata.Version,
Vendor: metadata.Vendor));
}
catch (IOException)
{
// Skip directories we cannot access.
}
}
return new JavaWorkspace(archives, runtimeImages);
}
private static bool LooksLikeRuntimeImage(string directory)
{
if (!Directory.Exists(directory))
{
return false;
}
var libModules = Path.Combine(directory, "lib", "modules");
var binJava = Path.Combine(directory, "bin", OperatingSystem.IsWindows() ? "java.exe" : "java");
return File.Exists(libModules) || File.Exists(binJava);
}
}

View File

@@ -0,0 +1,52 @@
using System.Globalization;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal;
internal static class JavaZipEntryUtilities
{
public static string NormalizeEntryName(string entryName)
{
var normalized = entryName.Replace('\\', '/');
return normalized.TrimStart('/');
}
public static bool TryParseMultiReleasePath(string normalizedPath, out string effectivePath, out int version)
{
const string Prefix = "META-INF/versions/";
if (!normalizedPath.StartsWith(Prefix, StringComparison.OrdinalIgnoreCase))
{
effectivePath = normalizedPath;
version = 0;
return false;
}
var remainder = normalizedPath.AsSpan(Prefix.Length);
var separatorIndex = remainder.IndexOf('/');
if (separatorIndex <= 0)
{
effectivePath = normalizedPath;
version = 0;
return false;
}
var versionSpan = remainder[..separatorIndex];
if (!int.TryParse(versionSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsedVersion))
{
effectivePath = normalizedPath;
version = 0;
return false;
}
var relativeSpan = remainder[(separatorIndex + 1)..];
if (relativeSpan.IsEmpty)
{
effectivePath = normalizedPath;
version = 0;
return false;
}
effectivePath = relativeSpan.ToString();
version = parsedVersion;
return true;
}
}

View File

@@ -0,0 +1,44 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.Reflection;
internal sealed record JavaReflectionAnalysis(
ImmutableArray<JavaReflectionEdge> Edges,
ImmutableArray<JavaReflectionWarning> Warnings)
{
public static readonly JavaReflectionAnalysis Empty = new(ImmutableArray<JavaReflectionEdge>.Empty, ImmutableArray<JavaReflectionWarning>.Empty);
}
internal sealed record JavaReflectionEdge(
string SourceClass,
string SegmentIdentifier,
string? TargetType,
JavaReflectionReason Reason,
JavaReflectionConfidence Confidence,
string MethodName,
string MethodDescriptor,
int InstructionOffset,
string? Details);
internal sealed record JavaReflectionWarning(
string SourceClass,
string SegmentIdentifier,
string WarningCode,
string Message,
string MethodName,
string MethodDescriptor);
internal enum JavaReflectionReason
{
ClassForName,
ClassLoaderLoadClass,
ServiceLoaderLoad,
ResourceLookup,
}
internal enum JavaReflectionConfidence
{
Low = 1,
Medium = 2,
High = 3,
}

View File

@@ -0,0 +1,716 @@
using System.Buffers.Binary;
using System.Collections.Immutable;
using System.Text;
using StellaOps.Scanner.Analyzers.Lang.Java.Internal.ClassPath;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.Reflection;
internal static class JavaReflectionAnalyzer
{
public static JavaReflectionAnalysis Analyze(JavaClassPathAnalysis classPath, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(classPath);
if (classPath.Segments.IsDefaultOrEmpty)
{
return JavaReflectionAnalysis.Empty;
}
var edges = new List<JavaReflectionEdge>();
var warnings = new List<JavaReflectionWarning>();
foreach (var segment in classPath.Segments)
{
cancellationToken.ThrowIfCancellationRequested();
foreach (var kvp in segment.ClassLocations)
{
var className = kvp.Key;
var location = kvp.Value;
using var stream = location.OpenClassStream(cancellationToken);
var classFile = JavaClassFile.Parse(stream, cancellationToken);
foreach (var method in classFile.Methods)
{
cancellationToken.ThrowIfCancellationRequested();
AnalyzeMethod(classFile, method, segment.Identifier, className, edges, warnings);
}
}
}
if (edges.Count == 0 && warnings.Count == 0)
{
return JavaReflectionAnalysis.Empty;
}
return new JavaReflectionAnalysis(
edges.ToImmutableArray(),
warnings.ToImmutableArray());
}
private static void AnalyzeMethod(
JavaClassFile classFile,
JavaMethod method,
string segmentIdentifier,
string className,
List<JavaReflectionEdge> edges,
List<JavaReflectionWarning> warnings)
{
var pool = classFile.ConstantPool;
string? pendingStringLiteral = null;
string? pendingClassLiteral = null;
var sawCurrentThread = false;
var emittedTcclWarning = false;
var code = method.Code;
var offset = 0;
var length = code.Length;
while (offset < length)
{
var instructionOffset = offset;
var opcode = code[offset++];
switch (opcode)
{
case 0x12: // LDC
{
var index = code[offset++];
HandleLdc(index, pool, ref pendingStringLiteral, ref pendingClassLiteral);
break;
}
case 0x13: // LDC_W
case 0x14: // LDC2_W
{
var index = (code[offset++] << 8) | code[offset++];
HandleLdc(index, pool, ref pendingStringLiteral, ref pendingClassLiteral);
break;
}
case 0xB8: // invokestatic
case 0xB6: // invokevirtual
case 0xB7: // invokespecial
case 0xB9: // invokeinterface
{
var methodIndex = (code[offset++] << 8) | code[offset++];
if (opcode == 0xB9)
{
offset += 2; // count and zero
}
HandleInvocation(
pool,
method,
segmentIdentifier,
className,
instructionOffset,
methodIndex,
opcode,
ref pendingStringLiteral,
ref pendingClassLiteral,
ref sawCurrentThread,
ref emittedTcclWarning,
edges,
warnings);
pendingStringLiteral = null;
pendingClassLiteral = null;
break;
}
default:
{
if (IsStoreInstruction(opcode))
{
pendingStringLiteral = null;
pendingClassLiteral = null;
if (IsStoreWithExplicitIndex(opcode))
{
offset++;
}
}
else if (IsLoadInstructionWithIndex(opcode))
{
offset++;
}
else if (IsStackMutation(opcode))
{
pendingStringLiteral = null;
pendingClassLiteral = null;
}
break;
}
}
}
// When the method calls Thread.currentThread without accessing the context loader, we do not emit warnings.
}
private static void HandleLdc(
int constantIndex,
JavaConstantPool pool,
ref string? pendingString,
ref string? pendingClassLiteral)
{
var constantKind = pool.GetConstantKind(constantIndex);
switch (constantKind)
{
case JavaConstantKind.String:
pendingString = pool.GetString(constantIndex);
pendingClassLiteral = null;
break;
case JavaConstantKind.Class:
pendingClassLiteral = pool.GetClassName(constantIndex);
pendingString = null;
break;
default:
pendingString = null;
pendingClassLiteral = null;
break;
}
}
private static void HandleInvocation(
JavaConstantPool pool,
JavaMethod method,
string segmentIdentifier,
string className,
int instructionOffset,
int methodIndex,
byte opcode,
ref string? pendingString,
ref string? pendingClassLiteral,
ref bool sawCurrentThread,
ref bool emittedTcclWarning,
List<JavaReflectionEdge> edges,
List<JavaReflectionWarning> warnings)
{
var methodRef = pool.GetMethodReference(methodIndex);
var owner = methodRef.OwnerInternalName;
var name = methodRef.Name;
var descriptor = methodRef.Descriptor;
var normalizedOwner = owner ?? string.Empty;
var normalizedSource = NormalizeClassName(className) ?? className ?? string.Empty;
if (normalizedOwner == "java/lang/Class" && name == "forName")
{
var target = NormalizeClassName(pendingString);
var confidence = pendingString is null ? JavaReflectionConfidence.Low : JavaReflectionConfidence.High;
edges.Add(new JavaReflectionEdge(
normalizedSource,
segmentIdentifier,
target,
JavaReflectionReason.ClassForName,
confidence,
method.Name,
method.Descriptor,
instructionOffset,
null));
}
else if (normalizedOwner == "java/lang/ClassLoader" && name == "loadClass")
{
var target = NormalizeClassName(pendingString);
var confidence = pendingString is null ? JavaReflectionConfidence.Low : JavaReflectionConfidence.High;
edges.Add(new JavaReflectionEdge(
normalizedSource,
segmentIdentifier,
target,
JavaReflectionReason.ClassLoaderLoadClass,
confidence,
method.Name,
method.Descriptor,
instructionOffset,
null));
}
else if (normalizedOwner == "java/util/ServiceLoader" && name.StartsWith("load", StringComparison.Ordinal))
{
var target = NormalizeClassName(pendingClassLiteral);
var confidence = pendingClassLiteral is null ? JavaReflectionConfidence.Low : JavaReflectionConfidence.High;
edges.Add(new JavaReflectionEdge(
normalizedSource,
segmentIdentifier,
target,
JavaReflectionReason.ServiceLoaderLoad,
confidence,
method.Name,
method.Descriptor,
instructionOffset,
null));
}
else if (normalizedOwner == "java/lang/ClassLoader" && (name == "getResource" || name == "getResourceAsStream" || name == "getResources"))
{
var target = pendingString;
var confidence = pendingString is null ? JavaReflectionConfidence.Low : JavaReflectionConfidence.High;
edges.Add(new JavaReflectionEdge(
normalizedSource,
segmentIdentifier,
target,
JavaReflectionReason.ResourceLookup,
confidence,
method.Name,
method.Descriptor,
instructionOffset,
null));
}
else if (normalizedOwner == "java/lang/Thread" && name == "currentThread")
{
sawCurrentThread = true;
}
else if (normalizedOwner == "java/lang/Thread" && name == "getContextClassLoader")
{
if (sawCurrentThread && !emittedTcclWarning)
{
warnings.Add(new JavaReflectionWarning(
normalizedSource,
segmentIdentifier,
"tccl",
"Thread context class loader access detected.",
method.Name,
method.Descriptor));
emittedTcclWarning = true;
}
}
pendingString = null;
pendingClassLiteral = null;
}
private static string? NormalizeClassName(string? internalName)
{
if (string.IsNullOrWhiteSpace(internalName))
{
return null;
}
return internalName.Replace('/', '.');
}
private static bool IsStoreInstruction(byte opcode)
=> (opcode >= 0x3B && opcode <= 0x4E) || (opcode >= 0x4F && opcode <= 0x56) || (opcode >= 0x36 && opcode <= 0x3A);
private static bool IsStoreWithExplicitIndex(byte opcode)
=> opcode >= 0x36 && opcode <= 0x3A;
private static bool IsLoadInstructionWithIndex(byte opcode)
=> opcode >= 0x15 && opcode <= 0x19;
private static bool IsStackMutation(byte opcode)
=> opcode is 0x57 or 0x58 or 0x59 or 0x5A or 0x5B or 0x5C or 0x5D or 0x5E or 0x5F;
private sealed class JavaClassFile
{
public JavaClassFile(string thisClassName, JavaConstantPool constantPool, ImmutableArray<JavaMethod> methods)
{
ThisClassName = thisClassName;
ConstantPool = constantPool;
Methods = methods;
}
public string ThisClassName { get; }
public JavaConstantPool ConstantPool { get; }
public ImmutableArray<JavaMethod> Methods { get; }
public static JavaClassFile Parse(Stream stream, CancellationToken cancellationToken)
{
var reader = new BigEndianReader(stream, leaveOpen: true);
if (reader.ReadUInt32() != 0xCAFEBABE)
{
throw new InvalidDataException("Invalid Java class file magic header.");
}
_ = reader.ReadUInt16(); // minor
_ = reader.ReadUInt16(); // major
var constantPoolCount = reader.ReadUInt16();
var pool = new JavaConstantPool(constantPoolCount);
var index = 1;
while (index < constantPoolCount)
{
cancellationToken.ThrowIfCancellationRequested();
var tag = reader.ReadByte();
switch ((JavaConstantTag)tag)
{
case JavaConstantTag.Utf8:
{
pool.Set(index, JavaConstantPoolEntry.Utf8(reader.ReadUtf8()));
index++;
break;
}
case JavaConstantTag.Integer:
reader.Skip(4);
pool.Set(index, JavaConstantPoolEntry.Other(tag));
index++;
break;
case JavaConstantTag.Float:
reader.Skip(4);
pool.Set(index, JavaConstantPoolEntry.Other(tag));
index++;
break;
case JavaConstantTag.Long:
case JavaConstantTag.Double:
reader.Skip(8);
pool.Set(index, JavaConstantPoolEntry.Other(tag));
index += 2;
break;
case JavaConstantTag.Class:
case JavaConstantTag.String:
case JavaConstantTag.MethodType:
pool.Set(index, JavaConstantPoolEntry.Indexed(tag, reader.ReadUInt16()));
index++;
break;
case JavaConstantTag.Fieldref:
case JavaConstantTag.Methodref:
case JavaConstantTag.InterfaceMethodref:
case JavaConstantTag.NameAndType:
case JavaConstantTag.InvokeDynamic:
pool.Set(index, JavaConstantPoolEntry.IndexedPair(tag, reader.ReadUInt16(), reader.ReadUInt16()));
index++;
break;
case JavaConstantTag.MethodHandle:
reader.Skip(1); // reference kind
pool.Set(index, JavaConstantPoolEntry.Indexed(tag, reader.ReadUInt16()));
index++;
break;
default:
throw new InvalidDataException($"Unsupported constant pool tag {tag}.");
}
}
var accessFlags = reader.ReadUInt16();
var thisClassIndex = reader.ReadUInt16();
_ = reader.ReadUInt16(); // super
var interfacesCount = reader.ReadUInt16();
reader.Skip(interfacesCount * 2);
var fieldsCount = reader.ReadUInt16();
for (var i = 0; i < fieldsCount; i++)
{
SkipMember(reader);
}
var methodsCount = reader.ReadUInt16();
var methods = ImmutableArray.CreateBuilder<JavaMethod>(methodsCount);
for (var i = 0; i < methodsCount; i++)
{
cancellationToken.ThrowIfCancellationRequested();
_ = reader.ReadUInt16(); // method access flags
var nameIndex = reader.ReadUInt16();
var descriptorIndex = reader.ReadUInt16();
var attributesCount = reader.ReadUInt16();
byte[]? code = null;
for (var attr = 0; attr < attributesCount; attr++)
{
var attributeNameIndex = reader.ReadUInt16();
var attributeLength = reader.ReadUInt32();
var attributeName = pool.GetUtf8(attributeNameIndex) ?? string.Empty;
if (attributeName == "Code")
{
var maxStack = reader.ReadUInt16();
var maxLocals = reader.ReadUInt16();
var codeLength = reader.ReadUInt32();
code = reader.ReadBytes((int)codeLength);
var exceptionTableLength = reader.ReadUInt16();
reader.Skip(exceptionTableLength * 8);
var codeAttributeCount = reader.ReadUInt16();
for (var c = 0; c < codeAttributeCount; c++)
{
reader.Skip(2); // name index
var len = reader.ReadUInt32();
reader.Skip((int)len);
}
}
else
{
reader.Skip((int)attributeLength);
}
}
if (code is not null)
{
var name = pool.GetUtf8(nameIndex) ?? string.Empty;
var descriptor = pool.GetUtf8(descriptorIndex) ?? string.Empty;
methods.Add(new JavaMethod(name, descriptor, code));
}
}
var classAttributesCount = reader.ReadUInt16();
for (var a = 0; a < classAttributesCount; a++)
{
reader.Skip(2);
var len = reader.ReadUInt32();
reader.Skip((int)len);
}
var thisClassName = pool.GetClassName(thisClassIndex) ?? string.Empty;
return new JavaClassFile(thisClassName, pool, methods.ToImmutable());
}
private static void SkipMember(BigEndianReader reader)
{
reader.Skip(6); // access, name, descriptor
var attributeCount = reader.ReadUInt16();
for (var i = 0; i < attributeCount; i++)
{
reader.Skip(2);
var len = reader.ReadUInt32();
reader.Skip((int)len);
}
}
}
private sealed class JavaMethod
{
public JavaMethod(string name, string descriptor, byte[] code)
{
Name = name;
Descriptor = descriptor;
Code = code;
}
public string Name { get; }
public string Descriptor { get; }
public byte[] Code { get; }
}
private sealed class JavaConstantPool
{
private readonly JavaConstantPoolEntry?[] _entries;
public JavaConstantPool(int count)
{
_entries = new JavaConstantPoolEntry?[count];
}
public void Set(int index, JavaConstantPoolEntry entry)
{
_entries[index] = entry;
}
public JavaConstantKind GetConstantKind(int index)
{
var entry = _entries[index];
return entry?.Kind ?? JavaConstantKind.Other;
}
public string? GetUtf8(int index)
{
if (index <= 0 || index >= _entries.Length)
{
return null;
}
return _entries[index] is JavaConstantPoolEntry.Utf8Entry utf8 ? utf8.Value : null;
}
public string? GetString(int index)
{
if (_entries[index] is JavaConstantPoolEntry.IndexedEntry { Kind: JavaConstantKind.String, Index: var utf8Index })
{
return GetUtf8(utf8Index);
}
return null;
}
public string? GetClassName(int index)
{
if (_entries[index] is JavaConstantPoolEntry.IndexedEntry { Kind: JavaConstantKind.Class, Index: var nameIndex })
{
return GetUtf8(nameIndex);
}
return null;
}
public JavaMethodReference GetMethodReference(int index)
{
if (_entries[index] is not JavaConstantPoolEntry.IndexedPairEntry pair || pair.Kind is not (JavaConstantKind.Methodref or JavaConstantKind.InterfaceMethodref))
{
throw new InvalidDataException($"Constant pool entry {index} is not a method reference.");
}
var owner = GetClassName(pair.FirstIndex) ?? string.Empty;
var nameAndType = _entries[pair.SecondIndex] as JavaConstantPoolEntry.IndexedPairEntry;
if (nameAndType is null || nameAndType.Kind != JavaConstantKind.NameAndType)
{
throw new InvalidDataException("Invalid NameAndType entry for method reference.");
}
var name = GetUtf8(nameAndType.FirstIndex) ?? string.Empty;
var descriptor = GetUtf8(nameAndType.SecondIndex) ?? string.Empty;
return new JavaMethodReference(owner, name, descriptor);
}
}
private readonly record struct JavaMethodReference(string OwnerInternalName, string Name, string Descriptor);
private abstract record JavaConstantPoolEntry(JavaConstantKind Kind)
{
public sealed record Utf8Entry(string Value) : JavaConstantPoolEntry(JavaConstantKind.Utf8);
public sealed record IndexedEntry(JavaConstantKind Kind, ushort Index) : JavaConstantPoolEntry(Kind);
public sealed record IndexedPairEntry(JavaConstantKind Kind, ushort FirstIndex, ushort SecondIndex) : JavaConstantPoolEntry(Kind);
public sealed record OtherEntry(byte Tag) : JavaConstantPoolEntry(JavaConstantKind.Other);
public static JavaConstantPoolEntry Utf8(string value) => new Utf8Entry(value);
public static JavaConstantPoolEntry Indexed(byte tag, ushort index)
=> new IndexedEntry(ToKind(tag), index);
public static JavaConstantPoolEntry IndexedPair(byte tag, ushort first, ushort second)
=> new IndexedPairEntry(ToKind(tag), first, second);
public static JavaConstantPoolEntry Other(byte tag) => new OtherEntry(tag);
private static JavaConstantKind ToKind(byte tag)
=> tag switch
{
7 => JavaConstantKind.Class,
8 => JavaConstantKind.String,
9 => JavaConstantKind.Fieldref,
10 => JavaConstantKind.Methodref,
11 => JavaConstantKind.InterfaceMethodref,
12 => JavaConstantKind.NameAndType,
15 => JavaConstantKind.MethodHandle,
16 => JavaConstantKind.MethodType,
18 => JavaConstantKind.InvokeDynamic,
_ => JavaConstantKind.Other,
};
}
private enum JavaConstantKind
{
Utf8,
Integer,
Float,
Long,
Double,
Class,
String,
Fieldref,
Methodref,
InterfaceMethodref,
NameAndType,
MethodHandle,
MethodType,
InvokeDynamic,
Other,
}
private enum JavaConstantTag : byte
{
Utf8 = 1,
Integer = 3,
Float = 4,
Long = 5,
Double = 6,
Class = 7,
String = 8,
Fieldref = 9,
Methodref = 10,
InterfaceMethodref = 11,
NameAndType = 12,
MethodHandle = 15,
MethodType = 16,
InvokeDynamic = 18,
}
private sealed class BigEndianReader
{
private readonly Stream _stream;
private readonly BinaryReader _reader;
public BigEndianReader(Stream stream, bool leaveOpen)
{
_stream = stream;
_reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen);
}
public ushort ReadUInt16()
{
Span<byte> buffer = stackalloc byte[2];
Fill(buffer);
return BinaryPrimitives.ReadUInt16BigEndian(buffer);
}
public uint ReadUInt32()
{
Span<byte> buffer = stackalloc byte[4];
Fill(buffer);
return BinaryPrimitives.ReadUInt32BigEndian(buffer);
}
public int ReadInt32()
{
Span<byte> buffer = stackalloc byte[4];
Fill(buffer);
return BinaryPrimitives.ReadInt32BigEndian(buffer);
}
public byte ReadByte() => _reader.ReadByte();
public string ReadUtf8()
{
var length = ReadUInt16();
var bytes = ReadBytes(length);
return Encoding.UTF8.GetString(bytes);
}
public byte[] ReadBytes(int count)
{
var bytes = _reader.ReadBytes(count);
if (bytes.Length != count)
{
throw new EndOfStreamException();
}
return bytes;
}
public void Skip(int count)
{
if (count <= 0)
{
return;
}
var buffer = new byte[Math.Min(count, 4096)];
var remaining = count;
while (remaining > 0)
{
var read = _stream.Read(buffer, 0, Math.Min(buffer.Length, remaining));
if (read == 0)
{
throw new EndOfStreamException();
}
remaining -= read;
}
}
private void Fill(Span<byte> buffer)
{
var read = _stream.Read(buffer);
if (read != buffer.Length)
{
throw new EndOfStreamException();
}
}
}
}

View File

@@ -0,0 +1,160 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Threading;
using StellaOps.Scanner.Analyzers.Lang.Java.Internal.ClassPath;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.ServiceProviders;
internal static class JavaServiceProviderScanner
{
public static JavaServiceProviderAnalysis Scan(JavaClassPathAnalysis classPath, JavaSpiCatalog catalog, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(classPath);
ArgumentNullException.ThrowIfNull(catalog);
var services = new Dictionary<string, ServiceAccumulator>(StringComparer.Ordinal);
foreach (var segment in classPath.Segments.OrderBy(static s => s.Order))
{
cancellationToken.ThrowIfCancellationRequested();
foreach (var kvp in segment.ServiceDefinitions)
{
cancellationToken.ThrowIfCancellationRequested();
if (kvp.Value.IsDefaultOrEmpty)
{
continue;
}
if (!services.TryGetValue(kvp.Key, out var accumulator))
{
accumulator = new ServiceAccumulator();
services[kvp.Key] = accumulator;
}
var providerIndex = 0;
foreach (var provider in kvp.Value)
{
var normalizedProvider = provider?.Trim();
if (string.IsNullOrEmpty(normalizedProvider))
{
providerIndex++;
continue;
}
accumulator.AddCandidate(new JavaServiceProviderCandidateRecord(
ProviderClass: normalizedProvider,
SegmentIdentifier: segment.Identifier,
SegmentOrder: segment.Order,
ProviderIndex: providerIndex++,
IsSelected: false));
}
}
}
var records = new List<JavaServiceProviderRecord>(services.Count);
foreach (var pair in services.OrderBy(static entry => entry.Key, StringComparer.Ordinal))
{
var descriptor = catalog.Resolve(pair.Key);
var accumulator = pair.Value;
var orderedCandidates = accumulator.GetOrderedCandidates();
if (orderedCandidates.Count == 0)
{
continue;
}
var selectedIndex = accumulator.DetermineSelection(orderedCandidates);
var warnings = accumulator.BuildWarnings();
var candidateArray = ImmutableArray.CreateRange(orderedCandidates.Select((candidate, index) =>
candidate with { IsSelected = index == selectedIndex }));
var warningsArray = warnings.Count == 0
? ImmutableArray<string>.Empty
: ImmutableArray.CreateRange(warnings);
records.Add(new JavaServiceProviderRecord(
ServiceId: pair.Key,
DisplayName: descriptor.DisplayName,
Category: descriptor.Category,
Candidates: candidateArray,
SelectedIndex: selectedIndex,
Warnings: warningsArray));
}
return new JavaServiceProviderAnalysis(records.ToImmutableArray());
}
private sealed class ServiceAccumulator
{
private readonly List<JavaServiceProviderCandidateRecord> _candidates = new();
private readonly Dictionary<string, HashSet<string>> _providerSources = new(StringComparer.Ordinal);
public void AddCandidate(JavaServiceProviderCandidateRecord candidate)
{
_candidates.Add(candidate);
if (!_providerSources.TryGetValue(candidate.ProviderClass, out var sources))
{
sources = new HashSet<string>(StringComparer.Ordinal);
_providerSources[candidate.ProviderClass] = sources;
}
sources.Add(candidate.SegmentIdentifier);
}
public IReadOnlyList<JavaServiceProviderCandidateRecord> GetOrderedCandidates()
=> _candidates
.OrderBy(static c => c.SegmentOrder)
.ThenBy(static c => c.ProviderIndex)
.ThenBy(static c => c.ProviderClass, StringComparer.Ordinal)
.ToList();
public int DetermineSelection(IReadOnlyList<JavaServiceProviderCandidateRecord> orderedCandidates)
=> orderedCandidates.Count == 0 ? -1 : 0;
public List<string> BuildWarnings()
{
var warnings = new List<string>();
foreach (var pair in _providerSources.OrderBy(static entry => entry.Key, StringComparer.Ordinal))
{
if (pair.Value.Count <= 1)
{
continue;
}
var locations = pair.Value
.OrderBy(static value => value, StringComparer.Ordinal)
.ToArray();
warnings.Add($"duplicate-provider: {pair.Key} ({string.Join(", ", locations)})");
}
return warnings;
}
}
}
internal sealed record JavaServiceProviderAnalysis(ImmutableArray<JavaServiceProviderRecord> Services)
{
public static readonly JavaServiceProviderAnalysis Empty = new(ImmutableArray<JavaServiceProviderRecord>.Empty);
}
internal sealed record JavaServiceProviderRecord(
string ServiceId,
string DisplayName,
string Category,
ImmutableArray<JavaServiceProviderCandidateRecord> Candidates,
int SelectedIndex,
ImmutableArray<string> Warnings);
internal sealed record JavaServiceProviderCandidateRecord(
string ProviderClass,
string SegmentIdentifier,
int SegmentOrder,
int ProviderIndex,
bool IsSelected);

View File

@@ -0,0 +1,103 @@
using System.Collections.Immutable;
using System.Reflection;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.ServiceProviders;
internal sealed class JavaSpiCatalog
{
private static readonly Lazy<JavaSpiCatalog> LazyDefault = new(LoadDefaultCore);
private readonly ImmutableDictionary<string, JavaSpiDescriptor> _descriptors;
private JavaSpiCatalog(ImmutableDictionary<string, JavaSpiDescriptor> descriptors)
{
_descriptors = descriptors;
}
public static JavaSpiCatalog Default => LazyDefault.Value;
public JavaSpiDescriptor Resolve(string serviceId)
{
if (string.IsNullOrWhiteSpace(serviceId))
{
return JavaSpiDescriptor.CreateUnknown(string.Empty);
}
var key = serviceId.Trim();
if (_descriptors.TryGetValue(key, out var descriptor))
{
return descriptor;
}
return JavaSpiDescriptor.CreateUnknown(key);
}
private static JavaSpiCatalog LoadDefaultCore()
{
var assembly = typeof(JavaSpiCatalog).GetTypeInfo().Assembly;
var resourceName = "StellaOps.Scanner.Analyzers.Lang.Java.Internal.ServiceProviders.java-spi-catalog.json";
using var stream = assembly.GetManifestResourceStream(resourceName)
?? throw new InvalidOperationException($"Embedded SPI catalog '{resourceName}' not found.");
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: false);
var json = reader.ReadToEnd();
var items = JsonSerializer.Deserialize<List<JavaSpiDescriptor>>(json, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true,
ReadCommentHandling = JsonCommentHandling.Skip,
}) ?? new List<JavaSpiDescriptor>();
var descriptors = items
.Select(Normalize)
.Where(static item => !string.IsNullOrWhiteSpace(item.ServiceId))
.ToImmutableDictionary(
static item => item.ServiceId,
static item => item,
StringComparer.Ordinal);
return new JavaSpiCatalog(descriptors);
}
private static JavaSpiDescriptor Normalize(JavaSpiDescriptor descriptor)
{
var serviceId = descriptor.ServiceId?.Trim() ?? string.Empty;
var category = string.IsNullOrWhiteSpace(descriptor.Category)
? "unknown"
: descriptor.Category.Trim().ToLowerInvariant();
var displayName = string.IsNullOrWhiteSpace(descriptor.DisplayName)
? serviceId
: descriptor.DisplayName.Trim();
return descriptor with
{
ServiceId = serviceId,
Category = category,
DisplayName = displayName,
};
}
}
internal sealed record class JavaSpiDescriptor
{
[JsonPropertyName("serviceId")]
public string ServiceId { get; init; } = string.Empty;
[JsonPropertyName("category")]
public string Category { get; init; } = "unknown";
[JsonPropertyName("displayName")]
public string DisplayName { get; init; } = string.Empty;
[JsonPropertyName("notes")]
public string? Notes { get; init; }
public static JavaSpiDescriptor CreateUnknown(string serviceId)
=> new()
{
ServiceId = serviceId,
Category = "unknown",
DisplayName = serviceId,
};
}

View File

@@ -0,0 +1,52 @@
[
{
"serviceId": "java.sql.Driver",
"category": "jdk",
"displayName": "JDBC Driver"
},
{
"serviceId": "javax.annotation.processing.Processor",
"category": "jdk",
"displayName": "Annotation Processor"
},
{
"serviceId": "org.slf4j.spi.SLF4JServiceProvider",
"category": "logging",
"displayName": "SLF4J Service Provider"
},
{
"serviceId": "ch.qos.logback.classic.spi.Configurator",
"category": "logging",
"displayName": "Logback Configurator"
},
{
"serviceId": "com.fasterxml.jackson.core.TokenStreamFactory",
"category": "jackson",
"displayName": "Jackson Token Stream Factory"
},
{
"serviceId": "com.fasterxml.jackson.databind.Module",
"category": "jackson",
"displayName": "Jackson Module"
},
{
"serviceId": "org.springframework.boot.Bootstrapper",
"category": "spring",
"displayName": "Spring Boot Bootstrapper"
},
{
"serviceId": "org.springframework.boot.SpringApplicationRunListener",
"category": "spring",
"displayName": "Spring Application Run Listener"
},
{
"serviceId": "org.eclipse.microprofile.config.spi.ConfigSourceProvider",
"category": "microprofile",
"displayName": "MicroProfile Config Source Provider"
},
{
"serviceId": "org.eclipse.microprofile.config.spi.Converter",
"category": "microprofile",
"displayName": "MicroProfile Converter"
}
]

View File

@@ -0,0 +1,344 @@
using StellaOps.Scanner.Analyzers.Lang.Java.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Java;
public sealed class JavaLanguageAnalyzer : ILanguageAnalyzer
{
public string Id => "java";
public string DisplayName => "Java/Maven Analyzer";
public async ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
var workspace = JavaWorkspaceNormalizer.Normalize(context, cancellationToken);
foreach (var archive in workspace.Archives)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
await ProcessArchiveAsync(archive, context, writer, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
// Corrupt archives should not abort the scan.
}
catch (InvalidDataException)
{
// Skip non-zip payloads despite supported extensions.
}
}
}
private async ValueTask ProcessArchiveAsync(JavaArchive archive, LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ManifestMetadata? manifestMetadata = null;
if (archive.TryGetEntry("META-INF/MANIFEST.MF", out var manifestEntry))
{
manifestMetadata = await ParseManifestAsync(archive, manifestEntry, cancellationToken).ConfigureAwait(false);
}
foreach (var entry in archive.Entries)
{
cancellationToken.ThrowIfCancellationRequested();
if (IsManifestEntry(entry.EffectivePath))
{
continue;
}
if (!IsPomPropertiesEntry(entry.EffectivePath))
{
continue;
}
var artifact = await ParsePomPropertiesAsync(archive, entry, cancellationToken).ConfigureAwait(false);
if (artifact is null)
{
continue;
}
var metadata = new Dictionary<string, string?>(StringComparer.Ordinal)
{
["groupId"] = artifact.GroupId,
["artifactId"] = artifact.ArtifactId,
["jarPath"] = NormalizeArchivePath(archive.RelativePath),
};
if (!string.IsNullOrEmpty(artifact.Packaging))
{
metadata["packaging"] = artifact.Packaging;
}
if (!string.IsNullOrEmpty(artifact.Name))
{
metadata["displayName"] = artifact.Name;
}
if (manifestMetadata is not null)
{
manifestMetadata.ApplyMetadata(metadata);
}
var evidence = new List<LanguageComponentEvidence>
{
new(LanguageEvidenceKind.File, "pom.properties", BuildLocator(archive, entry.OriginalPath), null, artifact.PomSha256),
};
if (manifestMetadata is not null)
{
evidence.Add(manifestMetadata.CreateEvidence(archive));
}
var usedByEntrypoint = context.UsageHints.IsPathUsed(archive.AbsolutePath);
writer.AddFromPurl(
analyzerId: Id,
purl: artifact.Purl,
name: artifact.ArtifactId,
version: artifact.Version,
type: "maven",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
}
}
private static string BuildLocator(JavaArchive archive, string entryPath)
{
var relativeArchive = NormalizeArchivePath(archive.RelativePath);
var normalizedEntry = NormalizeEntry(entryPath);
if (string.Equals(relativeArchive, ".", StringComparison.Ordinal) || string.IsNullOrEmpty(relativeArchive))
{
return normalizedEntry;
}
return string.Concat(relativeArchive, "!", normalizedEntry);
}
private static string NormalizeEntry(string entryPath)
=> entryPath.Replace('\\', '/');
private static string NormalizeArchivePath(string relativePath)
{
if (string.IsNullOrEmpty(relativePath) || string.Equals(relativePath, ".", StringComparison.Ordinal))
{
return ".";
}
return relativePath.Replace('\\', '/');
}
private static bool IsPomPropertiesEntry(string entryName)
=> entryName.StartsWith("META-INF/maven/", StringComparison.OrdinalIgnoreCase)
&& entryName.EndsWith("/pom.properties", StringComparison.OrdinalIgnoreCase);
private static bool IsManifestEntry(string entryName)
=> string.Equals(entryName, "META-INF/MANIFEST.MF", StringComparison.OrdinalIgnoreCase);
private static async ValueTask<MavenArtifact?> ParsePomPropertiesAsync(JavaArchive archive, JavaArchiveEntry entry, CancellationToken cancellationToken)
{
await using var entryStream = archive.OpenEntry(entry);
using var buffer = new MemoryStream();
await entryStream.CopyToAsync(buffer, cancellationToken).ConfigureAwait(false);
buffer.Position = 0;
using var reader = new StreamReader(buffer, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: true);
var properties = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
while (await reader.ReadLineAsync().ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
line = line.Trim();
if (line.Length == 0 || line.StartsWith('#'))
{
continue;
}
var separatorIndex = line.IndexOf('=');
if (separatorIndex <= 0)
{
continue;
}
var key = line[..separatorIndex].Trim();
var value = line[(separatorIndex + 1)..].Trim();
if (key.Length == 0)
{
continue;
}
properties[key] = value;
}
if (!properties.TryGetValue("groupId", out var groupId) || string.IsNullOrWhiteSpace(groupId))
{
return null;
}
if (!properties.TryGetValue("artifactId", out var artifactId) || string.IsNullOrWhiteSpace(artifactId))
{
return null;
}
if (!properties.TryGetValue("version", out var version) || string.IsNullOrWhiteSpace(version))
{
return null;
}
var packaging = properties.TryGetValue("packaging", out var packagingValue) ? packagingValue : "jar";
var name = properties.TryGetValue("name", out var nameValue) ? nameValue : null;
var purl = BuildPurl(groupId, artifactId, version, packaging);
buffer.Position = 0;
var pomSha = Convert.ToHexString(SHA256.HashData(buffer)).ToLowerInvariant();
return new MavenArtifact(
GroupId: groupId.Trim(),
ArtifactId: artifactId.Trim(),
Version: version.Trim(),
Packaging: packaging?.Trim(),
Name: name?.Trim(),
Purl: purl,
PomSha256: pomSha);
}
private static async ValueTask<ManifestMetadata?> ParseManifestAsync(JavaArchive archive, JavaArchiveEntry entry, CancellationToken cancellationToken)
{
await using var entryStream = archive.OpenEntry(entry);
using var reader = new StreamReader(entryStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: false);
string? title = null;
string? version = null;
string? vendor = null;
while (await reader.ReadLineAsync().ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
var separatorIndex = line.IndexOf(':');
if (separatorIndex <= 0)
{
continue;
}
var key = line[..separatorIndex].Trim();
var value = line[(separatorIndex + 1)..].Trim();
if (key.Equals("Implementation-Title", StringComparison.OrdinalIgnoreCase))
{
title ??= value;
}
else if (key.Equals("Implementation-Version", StringComparison.OrdinalIgnoreCase))
{
version ??= value;
}
else if (key.Equals("Implementation-Vendor", StringComparison.OrdinalIgnoreCase))
{
vendor ??= value;
}
}
if (title is null && version is null && vendor is null)
{
return null;
}
return new ManifestMetadata(title, version, vendor);
}
private static string BuildPurl(string groupId, string artifactId, string version, string? packaging)
{
var normalizedGroup = groupId.Replace('.', '/');
var builder = new StringBuilder();
builder.Append("pkg:maven/");
builder.Append(normalizedGroup);
builder.Append('/');
builder.Append(artifactId);
builder.Append('@');
builder.Append(version);
if (!string.IsNullOrWhiteSpace(packaging) && !packaging.Equals("jar", StringComparison.OrdinalIgnoreCase))
{
builder.Append("?type=");
builder.Append(packaging);
}
return builder.ToString();
}
private sealed record MavenArtifact(
string GroupId,
string ArtifactId,
string Version,
string? Packaging,
string? Name,
string Purl,
string PomSha256);
private sealed record ManifestMetadata(string? ImplementationTitle, string? ImplementationVersion, string? ImplementationVendor)
{
public void ApplyMetadata(IDictionary<string, string?> target)
{
if (!string.IsNullOrWhiteSpace(ImplementationTitle))
{
target["manifestTitle"] = ImplementationTitle;
}
if (!string.IsNullOrWhiteSpace(ImplementationVersion))
{
target["manifestVersion"] = ImplementationVersion;
}
if (!string.IsNullOrWhiteSpace(ImplementationVendor))
{
target["manifestVendor"] = ImplementationVendor;
}
}
public LanguageComponentEvidence CreateEvidence(JavaArchive archive)
{
var locator = BuildLocator(archive, "META-INF/MANIFEST.MF");
var valueBuilder = new StringBuilder();
if (!string.IsNullOrWhiteSpace(ImplementationTitle))
{
valueBuilder.Append("title=").Append(ImplementationTitle);
}
if (!string.IsNullOrWhiteSpace(ImplementationVersion))
{
if (valueBuilder.Length > 0)
{
valueBuilder.Append(';');
}
valueBuilder.Append("version=").Append(ImplementationVersion);
}
if (!string.IsNullOrWhiteSpace(ImplementationVendor))
{
if (valueBuilder.Length > 0)
{
valueBuilder.Append(';');
}
valueBuilder.Append("vendor=").Append(ImplementationVendor);
}
var value = valueBuilder.Length > 0 ? valueBuilder.ToString() : null;
return new LanguageComponentEvidence(LanguageEvidenceKind.File, "MANIFEST.MF", locator, value, null);
}
}
}

View File

@@ -0,0 +1,3 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("StellaOps.Scanner.Analyzers.Lang.Java.Tests")]

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
<None Include="**\\*" Exclude="**\\*.cs;**\\*.json;bin\\**;obj\\**" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Analyzers.Lang\StellaOps.Scanner.Analyzers.Lang.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,31 @@
# Java Analyzer Task Board
> **Imposed rule:** work of this type or tasks of this type on this component — and everywhere else it should be applied.
## Java Static Core (Sprint 39)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCANNER-ANALYZERS-JAVA-21-001 | DONE (2025-10-27) | Java Analyzer Guild | SCANNER-CORE-09-501 | Build input normalizer and virtual file system for JAR/WAR/EAR/fat-jar/JMOD/jimage/container roots. Detect packaging type, layered dirs (BOOT-INF/WEB-INF), multi-release overlays, and jlink runtime metadata. | Normalizer walks fixtures without extraction, classifies packaging, selects MR overlays deterministically, records java version + vendor from runtime images. |
| SCANNER-ANALYZERS-JAVA-21-002 | DONE (2025-10-27) | Java Analyzer Guild | SCANNER-ANALYZERS-JAVA-21-001 | Implement module/classpath builder: JPMS graph parser (`module-info.class`), classpath order rules (fat jar, war, ear), duplicate & split-package detection, package fingerprinting. | Classpath order reproduced for fixtures; module graph serialized; duplicate provider + split-package warnings emitted deterministically. |
| SCANNER-ANALYZERS-JAVA-21-003 | DONE (2025-10-27) | Java Analyzer Guild | SCANNER-ANALYZERS-JAVA-21-002 | SPI scanner covering META-INF/services, provider selection, and warning generation. Include configurable SPI corpus (JDK, Spring, logging, Jackson, MicroProfile). | SPI tables produced with selected provider + candidates; fixtures show first-wins behaviour; warnings recorded for duplicate providers. |
| SCANNER-ANALYZERS-JAVA-21-004 | DOING (2025-10-27) | Java Analyzer Guild | SCANNER-ANALYZERS-JAVA-21-002 | Reflection/dynamic loader heuristics: scan constant pools, bytecode sites (Class.forName, loadClass, TCCL usage), resource-based plugin hints, manifest loader hints. Emit edges with reason codes + confidence. | Reflection edges generated for fixtures (classpath, boot, war); includes call site metadata and confidence scoring; TCCL warning emitted where detected. |
| SCANNER-ANALYZERS-JAVA-21-005 | TODO | Java Analyzer Guild | SCANNER-ANALYZERS-JAVA-21-002 | Framework config extraction: Spring Boot imports, spring.factories, application properties/yaml, Jakarta web.xml & fragments, JAX-RS/JPA/CDI/JAXB configs, logging files, Graal native-image configs. | Framework fixtures parsed; relevant class FQCNs surfaced with reasons (`config-spring`, `config-jaxrs`, etc.); non-class config ignored; determinism guard passes. |
| SCANNER-ANALYZERS-JAVA-21-006 | TODO | Java Analyzer Guild | SCANNER-ANALYZERS-JAVA-21-002 | JNI/native hint scanner: detect native methods, System.load/Library literals, bundled native libs, Graal JNI configs; emit `jni-load` edges for native analyzer correlation. | JNI fixtures produce hint edges pointing at embedded libs; metadata includes candidate paths and reason `jni`. |
| SCANNER-ANALYZERS-JAVA-21-007 | TODO | Java Analyzer Guild | SCANNER-ANALYZERS-JAVA-21-003 | Signature and manifest metadata collector: verify JAR signature structure, capture signers, manifest loader attributes (Main-Class, Agent-Class, Start-Class, Class-Path). | Signed jar fixture reports signer info and structural validation result; manifest metadata attached to entrypoints. |
> 2025-10-27 — SCANNER-ANALYZERS-JAVA-21-001 implemented `JavaWorkspaceNormalizer` + fixtures covering packaging, layered directories, multi-release overlays, and runtime image metadata.
>
> 2025-10-27 — SCANNER-ANALYZERS-JAVA-21-002 delivered `JavaClassPathBuilder` producing ordered segments (jar/war/boot fat, embedded libs), JPMS descriptors via `JavaModuleInfoParser`, and duplicate/split-package detection with package fingerprints + unit tests.
>
> 2025-10-27 — SCANNER-ANALYZERS-JAVA-21-004 in progress: added bytecode-driven `JavaReflectionAnalyzer` covering `Class.forName`, `ClassLoader.loadClass`, `ServiceLoader.load`, resource lookups, and TCCL warnings with unit fixtures (boot jar, embedded jar, synthetic classes).
>
> 2025-10-27 — SCANNER-ANALYZERS-JAVA-21-003 added SPI catalog + `JavaServiceProviderScanner`, capturing META-INF/services across layered jars, selecting first-wins providers, and emitting duplicate warnings with coverage tests (fat-jar, duplicates, simple jars).
## Java Observation & Runtime (Sprint 40)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCANNER-ANALYZERS-JAVA-21-008 | BLOCKED (2025-10-27) | Java Analyzer Guild | SCANNER-ANALYZERS-JAVA-21-003, SCANNER-ANALYZERS-JAVA-21-004, SCANNER-ANALYZERS-JAVA-21-005 | Implement resolver + AOC writer: produce entrypoints (env profiles, warnings), components (jar_id + semantic ids), edges (jpms, cp, spi, reflect, jni) with reason codes/confidence. | Observation JSON for fixtures deterministic; includes entrypoints, edges, warnings; passes AOC compliance lint. |
| SCANNER-ANALYZERS-JAVA-21-009 | TODO | Java Analyzer Guild, QA Guild | SCANNER-ANALYZERS-JAVA-21-008 | Author comprehensive fixtures (modular app, boot fat jar, war, ear, MR-jar, jlink image, JNI, reflection heavy, signed jar, microprofile) with golden outputs and perf benchmarks. | Fixture suite committed under `fixtures/lang/java/ep`; determinism + benchmark gates (<300ms fat jar) configured in CI. |
| SCANNER-ANALYZERS-JAVA-21-010 | TODO | Java Analyzer Guild, Signals Guild | SCANNER-ANALYZERS-JAVA-21-008 | Optional runtime ingestion: Java agent + JFR reader capturing class load, ServiceLoader, and System.load events with path scrubbing. Emit append-only runtime edges `runtime-class`/`runtime-spi`/`runtime-load`. | Runtime harness produces scrubbed events for sample app; edges merge with static output; docs describe sandbox & privacy. |
| SCANNER-ANALYZERS-JAVA-21-011 | TODO | Java Analyzer Guild, DevOps Guild | SCANNER-ANALYZERS-JAVA-21-008 | Package analyzer as restart-time plug-in (manifest/DI), update Offline Kit docs, add CLI/worker hooks for Java inspection commands. | Plugin manifest deployed to `plugins/scanner/analyzers/lang/`; Worker loads new analyzer; Offline Kit + CLI instructions updated; smoke test verifies packaging. |
> 2025-10-27 — SCANNER-ANALYZERS-JAVA-21-008 blocked pending upstream completion of tasks 003005 (module/classpath resolver, SPI scanner, reflection/config extraction). Observation writer needs their outputs for components/edges/warnings per exit criteria.

View File

@@ -0,0 +1,22 @@
{
"schemaVersion": "1.0",
"id": "stellaops.analyzer.lang.java",
"displayName": "StellaOps Java / Maven Analyzer",
"version": "0.1.0",
"requiresRestart": true,
"entryPoint": {
"type": "dotnet",
"assembly": "StellaOps.Scanner.Analyzers.Lang.Java.dll",
"typeName": "StellaOps.Scanner.Analyzers.Lang.Java.JavaLanguageAnalyzer"
},
"capabilities": [
"language-analyzer",
"java",
"maven"
],
"metadata": {
"org.stellaops.analyzer.language": "java",
"org.stellaops.analyzer.kind": "language",
"org.stellaops.restart.required": "true"
}
}

View File

@@ -0,0 +1,39 @@
# StellaOps.Scanner.Analyzers.Lang.Node — Agent Charter
## Role
Deliver the Node.js / npm / Yarn / PNPM analyzer plug-in that resolves workspace graphs, symlinks, and script metadata for Scanner Workers.
## Scope
- Deterministic filesystem walker for `node_modules`, PNPM store, Yarn Plug'n'Play, and workspace roots.
- Component identity normalization to `pkg:npm` with provenance evidence (manifest path, integrity hashes, lockfile references).
- Workspace + symlink attribution, script metadata (postinstall, lifecycle), and policy hints for risky scripts.
- Plug-in manifest authoring, DI bootstrap, and benchmark harness integration.
## Out of Scope
- OS package detection, native library linkage, or vulnerability joins.
- Language analyzers for other ecosystems (Python, Go, .NET, Rust).
- CLI/UI surfacing of analyzer diagnostics (handed to UI guild post-gate).
## Expectations
- Deterministic output across Yarn/NPM/PNPM variations; normalized casing and path separators.
- Performance targets: 10k-module fixture <1.8s, <220MB RSS on 4vCPU runner.
- Offline-first; no network dependency to resolve registries.
- Emit structured metrics + logs (`analyzer=node`) compatible with Scanner telemetry model.
- Update `TASKS.md`, `SPRINTS_LANG_IMPLEMENTATION_PLAN.md`, and corresponding fixtures as progress occurs.
## Dependencies
- Shared language analyzer core (`StellaOps.Scanner.Analyzers.Lang`).
- Worker dispatcher for plug-in discovery.
- EntryTrace usage hints (for script usage classification).
## Testing & Artifacts
- Determinism golden fixtures under `Fixtures/lang/node/`.
- Benchmark CSV + flamegraph stored in `src/Bench/StellaOps.Bench/Scanner.Analyzers/`.
- Plug-in manifest + cosign workflow added to Offline Kit instructions once analyzer is production-ready.
## Telemetry & Policy Hints
- Metrics: `scanner_analyzer_node_scripts_total{script}` increments for each install lifecycle script discovered.
- Metadata keys:
- `policyHint.installLifecycle` lists lifecycle scripts (`preinstall;install;postinstall`) observed for a package.
- `script.<name>` stores the canonical command string for each lifecycle script.
- Evidence: lifecycle script entries emit `LanguageEvidenceKind.Metadata` pointing to `package.json#scripts.<name>` with SHA-256 hashes for determinism.

View File

@@ -0,0 +1,9 @@
global using System;
global using System.Collections.Generic;
global using System.IO;
global using System.Linq;
global using System.Text.Json;
global using System.Threading;
global using System.Threading.Tasks;
global using StellaOps.Scanner.Analyzers.Lang;

View File

@@ -0,0 +1,31 @@
using System.Collections.Generic;
using System.Diagnostics.Metrics;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal static class NodeAnalyzerMetrics
{
private static readonly Meter Meter = new("StellaOps.Scanner.Analyzers.Lang.Node", "1.0.0");
private static readonly Counter<long> LifecycleScriptsCounter = Meter.CreateCounter<long>(
"scanner_analyzer_node_scripts_total",
unit: "scripts",
description: "Counts Node.js install lifecycle scripts discovered by the language analyzer.");
public static void RecordLifecycleScript(string scriptName)
{
var normalized = Normalize(scriptName);
LifecycleScriptsCounter.Add(
1,
new KeyValuePair<string, object?>("script", normalized));
}
private static string Normalize(string? scriptName)
{
if (string.IsNullOrWhiteSpace(scriptName))
{
return "unknown";
}
return scriptName.Trim().ToLowerInvariant();
}
}

View File

@@ -0,0 +1,37 @@
using System.Diagnostics.CodeAnalysis;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal sealed record NodeLifecycleScript
{
public NodeLifecycleScript(string name, string command)
{
ArgumentException.ThrowIfNullOrWhiteSpace(name);
ArgumentException.ThrowIfNullOrWhiteSpace(command);
Name = name.Trim();
Command = command.Trim();
Sha256 = ComputeSha256(Command);
}
public string Name { get; }
public string Command { get; }
public string Sha256 { get; }
[SuppressMessage("Security", "CA5350:Do Not Use Weak Cryptographic Algorithms", Justification = "SHA256 is required for deterministic evidence hashing.")]
private static string ComputeSha256(string value)
{
if (string.IsNullOrEmpty(value))
{
return string.Empty;
}
var bytes = Encoding.UTF8.GetBytes(value);
var hash = SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,446 @@
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal sealed class NodeLockData
{
private static readonly NodeLockData Empty = new(new Dictionary<string, NodeLockEntry>(StringComparer.Ordinal), new Dictionary<string, NodeLockEntry>(StringComparer.OrdinalIgnoreCase));
private readonly Dictionary<string, NodeLockEntry> _byPath;
private readonly Dictionary<string, NodeLockEntry> _byName;
private NodeLockData(Dictionary<string, NodeLockEntry> byPath, Dictionary<string, NodeLockEntry> byName)
{
_byPath = byPath;
_byName = byName;
}
public static ValueTask<NodeLockData> LoadAsync(string rootPath, CancellationToken cancellationToken)
{
var byPath = new Dictionary<string, NodeLockEntry>(StringComparer.Ordinal);
var byName = new Dictionary<string, NodeLockEntry>(StringComparer.OrdinalIgnoreCase);
LoadPackageLockJson(rootPath, byPath, byName, cancellationToken);
LoadYarnLock(rootPath, byName);
LoadPnpmLock(rootPath, byName);
if (byPath.Count == 0 && byName.Count == 0)
{
return ValueTask.FromResult(Empty);
}
return ValueTask.FromResult(new NodeLockData(byPath, byName));
}
public bool TryGet(string relativePath, string packageName, out NodeLockEntry? entry)
{
var normalizedPath = NormalizeLockPath(relativePath);
if (_byPath.TryGetValue(normalizedPath, out var byPathEntry))
{
entry = byPathEntry;
return true;
}
if (!string.IsNullOrEmpty(packageName))
{
var normalizedName = packageName.StartsWith('@') ? packageName : packageName;
if (_byName.TryGetValue(normalizedName, out var byNameEntry))
{
entry = byNameEntry;
return true;
}
}
entry = null;
return false;
}
private static NodeLockEntry? CreateEntry(JsonElement element)
{
string? version = null;
string? resolved = null;
string? integrity = null;
if (element.TryGetProperty("version", out var versionElement) && versionElement.ValueKind == JsonValueKind.String)
{
version = versionElement.GetString();
}
if (element.TryGetProperty("resolved", out var resolvedElement) && resolvedElement.ValueKind == JsonValueKind.String)
{
resolved = resolvedElement.GetString();
}
if (element.TryGetProperty("integrity", out var integrityElement) && integrityElement.ValueKind == JsonValueKind.String)
{
integrity = integrityElement.GetString();
}
if (version is null && resolved is null && integrity is null)
{
return null;
}
return new NodeLockEntry(version, resolved, integrity);
}
private static void TraverseLegacyDependencies(
string currentPath,
JsonElement dependenciesElement,
IDictionary<string, NodeLockEntry> byPath,
IDictionary<string, NodeLockEntry> byName)
{
foreach (var dependency in dependenciesElement.EnumerateObject())
{
var depValue = dependency.Value;
var path = $"{currentPath}/{dependency.Name}";
var entry = CreateEntry(depValue);
if (entry is not null)
{
var normalizedPath = NormalizeLockPath(path);
byPath[normalizedPath] = entry;
byName[dependency.Name] = entry;
}
if (depValue.TryGetProperty("dependencies", out var childDependencies) && childDependencies.ValueKind == JsonValueKind.Object)
{
TraverseLegacyDependencies(path + "/node_modules", childDependencies, byPath, byName);
}
}
}
private static void LoadPackageLockJson(string rootPath, IDictionary<string, NodeLockEntry> byPath, IDictionary<string, NodeLockEntry> byName, CancellationToken cancellationToken)
{
var packageLockPath = Path.Combine(rootPath, "package-lock.json");
if (!File.Exists(packageLockPath))
{
return;
}
try
{
using var stream = File.OpenRead(packageLockPath);
using var document = JsonDocument.Parse(stream);
cancellationToken.ThrowIfCancellationRequested();
var root = document.RootElement;
if (root.TryGetProperty("packages", out var packagesElement) && packagesElement.ValueKind == JsonValueKind.Object)
{
foreach (var packageProperty in packagesElement.EnumerateObject())
{
var entry = CreateEntry(packageProperty.Value);
if (entry is null)
{
continue;
}
var key = NormalizeLockPath(packageProperty.Name);
byPath[key] = entry;
var name = ExtractNameFromPath(key);
if (!string.IsNullOrEmpty(name))
{
byName[name] = entry;
}
if (packageProperty.Value.TryGetProperty("name", out var explicitNameElement) && explicitNameElement.ValueKind == JsonValueKind.String)
{
var explicitName = explicitNameElement.GetString();
if (!string.IsNullOrWhiteSpace(explicitName))
{
byName[explicitName] = entry;
}
}
}
}
else if (root.TryGetProperty("dependencies", out var dependenciesElement) && dependenciesElement.ValueKind == JsonValueKind.Object)
{
TraverseLegacyDependencies("node_modules", dependenciesElement, byPath, byName);
}
}
catch (IOException)
{
// Ignore unreadable package-lock.
}
catch (JsonException)
{
// Ignore malformed package-lock.
}
}
private static void LoadYarnLock(string rootPath, IDictionary<string, NodeLockEntry> byName)
{
var yarnLockPath = Path.Combine(rootPath, "yarn.lock");
if (!File.Exists(yarnLockPath))
{
return;
}
try
{
var lines = File.ReadAllLines(yarnLockPath);
string? currentName = null;
string? version = null;
string? resolved = null;
string? integrity = null;
void Flush()
{
if (string.IsNullOrWhiteSpace(currentName))
{
version = null;
resolved = null;
integrity = null;
return;
}
var simpleName = ExtractPackageNameFromYarnKey(currentName!);
if (string.IsNullOrEmpty(simpleName))
{
version = null;
resolved = null;
integrity = null;
return;
}
var entry = new NodeLockEntry(version, resolved, integrity);
byName[simpleName] = entry;
version = null;
resolved = null;
integrity = null;
}
foreach (var line in lines)
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed))
{
Flush();
currentName = null;
continue;
}
if (!char.IsWhiteSpace(line, 0) && trimmed.EndsWith(':'))
{
Flush();
currentName = trimmed.TrimEnd(':').Trim('"');
continue;
}
if (trimmed.StartsWith("version", StringComparison.OrdinalIgnoreCase))
{
version = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith("resolved", StringComparison.OrdinalIgnoreCase))
{
resolved = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith("integrity", StringComparison.OrdinalIgnoreCase))
{
integrity = ExtractQuotedValue(trimmed);
}
}
Flush();
}
catch (IOException)
{
// Ignore unreadable yarn.lock
}
}
private static void LoadPnpmLock(string rootPath, IDictionary<string, NodeLockEntry> byName)
{
var pnpmLockPath = Path.Combine(rootPath, "pnpm-lock.yaml");
if (!File.Exists(pnpmLockPath))
{
return;
}
try
{
using var reader = new StreamReader(pnpmLockPath);
string? currentPackage = null;
string? version = null;
string? resolved = null;
string? integrity = null;
var inPackages = false;
while (reader.ReadLine() is { } line)
{
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
if (!inPackages)
{
if (line.StartsWith("packages:", StringComparison.Ordinal))
{
inPackages = true;
}
continue;
}
if (line.StartsWith(" /", StringComparison.Ordinal))
{
if (!string.IsNullOrEmpty(currentPackage) && !string.IsNullOrEmpty(integrity))
{
var name = ExtractNameFromPnpmKey(currentPackage);
if (!string.IsNullOrEmpty(name))
{
byName[name] = new NodeLockEntry(version, resolved, integrity);
}
}
currentPackage = line.Trim().TrimEnd(':').TrimStart('/');
version = null;
resolved = null;
integrity = null;
continue;
}
if (string.IsNullOrEmpty(currentPackage))
{
continue;
}
var trimmed = line.Trim();
if (trimmed.StartsWith("resolution:", StringComparison.Ordinal))
{
var integrityIndex = trimmed.IndexOf("integrity", StringComparison.OrdinalIgnoreCase);
if (integrityIndex >= 0)
{
var integrityValue = trimmed[(integrityIndex + 9)..].Trim(' ', ':', '{', '}', '"');
integrity = integrityValue;
}
var tarballIndex = trimmed.IndexOf("tarball", StringComparison.OrdinalIgnoreCase);
if (tarballIndex >= 0)
{
var tarballValue = trimmed[(tarballIndex + 7)..].Trim(' ', ':', '{', '}', '"');
resolved = tarballValue;
}
}
else if (trimmed.StartsWith("integrity:", StringComparison.Ordinal))
{
integrity = trimmed[("integrity:".Length)..].Trim();
}
else if (trimmed.StartsWith("tarball:", StringComparison.Ordinal))
{
resolved = trimmed[("tarball:".Length)..].Trim();
}
else if (trimmed.StartsWith("version:", StringComparison.Ordinal))
{
version = trimmed[("version:".Length)..].Trim();
}
}
if (!string.IsNullOrEmpty(currentPackage) && !string.IsNullOrEmpty(integrity))
{
var name = ExtractNameFromPnpmKey(currentPackage);
if (!string.IsNullOrEmpty(name))
{
byName[name] = new NodeLockEntry(version, resolved, integrity);
}
}
}
catch (IOException)
{
// Ignore unreadable pnpm lock file.
}
}
private static string? ExtractQuotedValue(string line)
{
var quoteStart = line.IndexOf('"');
if (quoteStart < 0)
{
return null;
}
var quoteEnd = line.LastIndexOf('"');
if (quoteEnd <= quoteStart)
{
return null;
}
return line.Substring(quoteStart + 1, quoteEnd - quoteStart - 1);
}
private static string ExtractPackageNameFromYarnKey(string key)
{
var commaIndex = key.IndexOf(',');
var trimmed = commaIndex > 0 ? key[..commaIndex] : key;
trimmed = trimmed.Trim('"');
var atIndex = trimmed.IndexOf('@', 1);
if (atIndex > 0)
{
return trimmed[..atIndex];
}
return trimmed;
}
private static string ExtractNameFromPnpmKey(string key)
{
var parts = key.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (parts.Length == 0)
{
return string.Empty;
}
if (parts[0].StartsWith('@'))
{
return parts.Length >= 2 ? $"{parts[0]}/{parts[1]}" : parts[0];
}
return parts[0];
}
private static string NormalizeLockPath(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return string.Empty;
}
var normalized = path.Replace('\\', '/');
normalized = normalized.TrimStart('.', '/');
return normalized;
}
private static string ExtractNameFromPath(string normalizedPath)
{
if (string.IsNullOrEmpty(normalizedPath))
{
return string.Empty;
}
var segments = normalizedPath.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (segments.Length == 0)
{
return string.Empty;
}
if (segments[0] == "node_modules")
{
if (segments.Length >= 3 && segments[1].StartsWith('@'))
{
return $"{segments[1]}/{segments[2]}";
}
return segments.Length >= 2 ? segments[1] : string.Empty;
}
var last = segments[^1];
if (last.StartsWith('@') && segments.Length >= 2)
{
return $"{segments[^2]}/{last}";
}
return last;
}
}

View File

@@ -0,0 +1,3 @@
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal sealed record NodeLockEntry(string? Version, string? Resolved, string? Integrity);

View File

@@ -0,0 +1,179 @@
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal sealed class NodePackage
{
public NodePackage(
string name,
string version,
string relativePath,
string packageJsonLocator,
bool? isPrivate,
NodeLockEntry? lockEntry,
bool isWorkspaceMember,
string? workspaceRoot,
IReadOnlyList<string> workspaceTargets,
string? workspaceLink,
IReadOnlyList<NodeLifecycleScript> lifecycleScripts,
bool usedByEntrypoint)
{
Name = name;
Version = version;
RelativePath = relativePath;
PackageJsonLocator = packageJsonLocator;
IsPrivate = isPrivate;
LockEntry = lockEntry;
IsWorkspaceMember = isWorkspaceMember;
WorkspaceRoot = workspaceRoot;
WorkspaceTargets = workspaceTargets;
WorkspaceLink = workspaceLink;
LifecycleScripts = lifecycleScripts ?? Array.Empty<NodeLifecycleScript>();
IsUsedByEntrypoint = usedByEntrypoint;
}
public string Name { get; }
public string Version { get; }
public string RelativePath { get; }
public string PackageJsonLocator { get; }
public bool? IsPrivate { get; }
public NodeLockEntry? LockEntry { get; }
public bool IsWorkspaceMember { get; }
public string? WorkspaceRoot { get; }
public IReadOnlyList<string> WorkspaceTargets { get; }
public string? WorkspaceLink { get; }
public IReadOnlyList<NodeLifecycleScript> LifecycleScripts { get; }
public bool HasInstallScripts => LifecycleScripts.Count > 0;
public bool IsUsedByEntrypoint { get; }
public string RelativePathNormalized => string.IsNullOrEmpty(RelativePath) ? string.Empty : RelativePath.Replace(Path.DirectorySeparatorChar, '/');
public string ComponentKey => $"purl::{Purl}";
public string Purl => BuildPurl(Name, Version);
public IReadOnlyCollection<LanguageComponentEvidence> CreateEvidence()
{
var evidence = new List<LanguageComponentEvidence>
{
new LanguageComponentEvidence(LanguageEvidenceKind.File, "package.json", PackageJsonLocator, Value: null, Sha256: null)
};
foreach (var script in LifecycleScripts)
{
var locator = string.IsNullOrEmpty(PackageJsonLocator)
? $"package.json#scripts.{script.Name}"
: $"{PackageJsonLocator}#scripts.{script.Name}";
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"package.json:scripts",
locator,
script.Command,
script.Sha256));
}
return evidence;
}
public IReadOnlyCollection<KeyValuePair<string, string?>> CreateMetadata()
{
var entries = new List<KeyValuePair<string, string?>>(8)
{
new("path", string.IsNullOrEmpty(RelativePathNormalized) ? "." : RelativePathNormalized)
};
if (IsPrivate is bool isPrivate)
{
entries.Add(new KeyValuePair<string, string?>("private", isPrivate ? "true" : "false"));
}
if (LockEntry is not null)
{
if (!string.IsNullOrWhiteSpace(LockEntry.Resolved))
{
entries.Add(new KeyValuePair<string, string?>("resolved", LockEntry.Resolved));
}
if (!string.IsNullOrWhiteSpace(LockEntry.Integrity))
{
entries.Add(new KeyValuePair<string, string?>("integrity", LockEntry.Integrity));
}
}
if (IsWorkspaceMember)
{
entries.Add(new KeyValuePair<string, string?>("workspaceMember", "true"));
if (!string.IsNullOrWhiteSpace(WorkspaceRoot))
{
entries.Add(new KeyValuePair<string, string?>("workspaceRoot", WorkspaceRoot));
}
}
if (!string.IsNullOrWhiteSpace(WorkspaceLink))
{
entries.Add(new KeyValuePair<string, string?>("workspaceLink", WorkspaceLink));
}
if (WorkspaceTargets.Count > 0)
{
entries.Add(new KeyValuePair<string, string?>("workspaceTargets", string.Join(';', WorkspaceTargets)));
}
if (HasInstallScripts)
{
entries.Add(new KeyValuePair<string, string?>("installScripts", "true"));
var lifecycleNames = LifecycleScripts
.Select(static script => script.Name)
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static name => name, StringComparer.OrdinalIgnoreCase)
.ToArray();
if (lifecycleNames.Length > 0)
{
entries.Add(new KeyValuePair<string, string?>("policyHint.installLifecycle", string.Join(';', lifecycleNames)));
}
foreach (var script in LifecycleScripts.OrderBy(static script => script.Name, StringComparer.OrdinalIgnoreCase))
{
entries.Add(new KeyValuePair<string, string?>($"script.{script.Name}", script.Command));
}
}
return entries
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToArray();
}
private static string BuildPurl(string name, string version)
{
var normalizedName = NormalizeName(name);
return $"pkg:npm/{normalizedName}@{version}";
}
private static string NormalizeName(string name)
{
if (string.IsNullOrWhiteSpace(name))
{
return name;
}
if (name[0] == '@')
{
var scopeAndName = name[1..];
return $"%40{scopeAndName}";
}
return name;
}
}

View File

@@ -0,0 +1,378 @@
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal static class NodePackageCollector
{
private static readonly string[] IgnoredDirectories =
{
".bin",
".cache",
".store",
"__pycache__"
};
public static IReadOnlyList<NodePackage> CollectPackages(LanguageAnalyzerContext context, NodeLockData lockData, CancellationToken cancellationToken)
{
var packages = new List<NodePackage>();
var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var pendingNodeModuleRoots = new List<string>();
var rootPackageJson = Path.Combine(context.RootPath, "package.json");
var workspaceIndex = NodeWorkspaceIndex.Create(context.RootPath);
if (File.Exists(rootPackageJson))
{
var rootPackage = TryCreatePackage(context, rootPackageJson, string.Empty, lockData, workspaceIndex, cancellationToken);
if (rootPackage is not null)
{
packages.Add(rootPackage);
visited.Add(rootPackage.RelativePathNormalized);
}
}
foreach (var workspaceRelative in workspaceIndex.GetMembers())
{
var workspaceAbsolute = Path.Combine(context.RootPath, workspaceRelative.Replace('/', Path.DirectorySeparatorChar));
if (!Directory.Exists(workspaceAbsolute))
{
continue;
}
ProcessPackageDirectory(context, workspaceAbsolute, lockData, workspaceIndex, includeNestedNodeModules: false, packages, visited, cancellationToken);
var workspaceNodeModules = Path.Combine(workspaceAbsolute, "node_modules");
if (Directory.Exists(workspaceNodeModules))
{
pendingNodeModuleRoots.Add(workspaceNodeModules);
}
}
var nodeModules = Path.Combine(context.RootPath, "node_modules");
TraverseDirectory(context, nodeModules, lockData, workspaceIndex, packages, visited, cancellationToken);
foreach (var pendingRoot in pendingNodeModuleRoots.OrderBy(static path => path, StringComparer.Ordinal))
{
TraverseDirectory(context, pendingRoot, lockData, workspaceIndex, packages, visited, cancellationToken);
}
return packages;
}
private static void TraverseDirectory(
LanguageAnalyzerContext context,
string directory,
NodeLockData lockData,
NodeWorkspaceIndex workspaceIndex,
List<NodePackage> packages,
HashSet<string> visited,
CancellationToken cancellationToken)
{
if (!Directory.Exists(directory))
{
return;
}
foreach (var child in Directory.EnumerateDirectories(directory))
{
cancellationToken.ThrowIfCancellationRequested();
var name = Path.GetFileName(child);
if (string.IsNullOrEmpty(name))
{
continue;
}
if (ShouldSkipDirectory(name))
{
continue;
}
if (string.Equals(name, ".pnpm", StringComparison.OrdinalIgnoreCase))
{
TraversePnpmStore(context, child, lockData, workspaceIndex, packages, visited, cancellationToken);
continue;
}
if (name.StartsWith('@'))
{
foreach (var scoped in Directory.EnumerateDirectories(child))
{
ProcessPackageDirectory(context, scoped, lockData, workspaceIndex, includeNestedNodeModules: true, packages, visited, cancellationToken);
}
continue;
}
ProcessPackageDirectory(context, child, lockData, workspaceIndex, includeNestedNodeModules: true, packages, visited, cancellationToken);
}
}
private static void TraversePnpmStore(
LanguageAnalyzerContext context,
string pnpmDirectory,
NodeLockData lockData,
NodeWorkspaceIndex workspaceIndex,
List<NodePackage> packages,
HashSet<string> visited,
CancellationToken cancellationToken)
{
foreach (var storeEntry in Directory.EnumerateDirectories(pnpmDirectory))
{
cancellationToken.ThrowIfCancellationRequested();
var nestedNodeModules = Path.Combine(storeEntry, "node_modules");
if (Directory.Exists(nestedNodeModules))
{
TraverseDirectory(context, nestedNodeModules, lockData, workspaceIndex, packages, visited, cancellationToken);
}
}
}
private static void ProcessPackageDirectory(
LanguageAnalyzerContext context,
string directory,
NodeLockData lockData,
NodeWorkspaceIndex workspaceIndex,
bool includeNestedNodeModules,
List<NodePackage> packages,
HashSet<string> visited,
CancellationToken cancellationToken)
{
var packageJsonPath = Path.Combine(directory, "package.json");
var relativeDirectory = NormalizeRelativeDirectory(context, directory);
if (!visited.Add(relativeDirectory))
{
// Already processed this path.
if (includeNestedNodeModules)
{
TraverseNestedNodeModules(context, directory, lockData, workspaceIndex, packages, visited, cancellationToken);
}
return;
}
if (File.Exists(packageJsonPath))
{
var package = TryCreatePackage(context, packageJsonPath, relativeDirectory, lockData, workspaceIndex, cancellationToken);
if (package is not null)
{
packages.Add(package);
}
}
if (includeNestedNodeModules)
{
TraverseNestedNodeModules(context, directory, lockData, workspaceIndex, packages, visited, cancellationToken);
}
}
private static void TraverseNestedNodeModules(
LanguageAnalyzerContext context,
string directory,
NodeLockData lockData,
NodeWorkspaceIndex workspaceIndex,
List<NodePackage> packages,
HashSet<string> visited,
CancellationToken cancellationToken)
{
var nestedNodeModules = Path.Combine(directory, "node_modules");
TraverseDirectory(context, nestedNodeModules, lockData, workspaceIndex, packages, visited, cancellationToken);
}
private static NodePackage? TryCreatePackage(
LanguageAnalyzerContext context,
string packageJsonPath,
string relativeDirectory,
NodeLockData lockData,
NodeWorkspaceIndex workspaceIndex,
CancellationToken cancellationToken)
{
try
{
using var stream = File.OpenRead(packageJsonPath);
using var document = JsonDocument.Parse(stream);
var root = document.RootElement;
if (!root.TryGetProperty("name", out var nameElement))
{
return null;
}
var name = nameElement.GetString();
if (string.IsNullOrWhiteSpace(name))
{
return null;
}
if (!root.TryGetProperty("version", out var versionElement))
{
return null;
}
var version = versionElement.GetString();
if (string.IsNullOrWhiteSpace(version))
{
return null;
}
bool? isPrivate = null;
if (root.TryGetProperty("private", out var privateElement) && privateElement.ValueKind is JsonValueKind.True or JsonValueKind.False)
{
isPrivate = privateElement.GetBoolean();
}
var lockEntry = lockData.TryGet(relativeDirectory, name, out var entry) ? entry : null;
var locator = BuildLocator(relativeDirectory);
var usedByEntrypoint = context.UsageHints.IsPathUsed(packageJsonPath);
var isWorkspaceMember = workspaceIndex.TryGetMember(relativeDirectory, out var workspaceRoot);
var workspaceTargets = ExtractWorkspaceTargets(relativeDirectory, root, workspaceIndex);
var workspaceLink = !isWorkspaceMember && workspaceIndex.TryGetWorkspacePathByName(name, out var workspacePathByName)
? NormalizeRelativeDirectory(context, Path.Combine(context.RootPath, relativeDirectory))
: null;
var lifecycleScripts = ExtractLifecycleScripts(root);
return new NodePackage(
name: name.Trim(),
version: version.Trim(),
relativePath: relativeDirectory,
packageJsonLocator: locator,
isPrivate: isPrivate,
lockEntry: lockEntry,
isWorkspaceMember: isWorkspaceMember,
workspaceRoot: workspaceRoot,
workspaceTargets: workspaceTargets,
workspaceLink: workspaceLink,
lifecycleScripts: lifecycleScripts,
usedByEntrypoint: usedByEntrypoint);
}
catch (IOException)
{
return null;
}
catch (JsonException)
{
return null;
}
}
private static string NormalizeRelativeDirectory(LanguageAnalyzerContext context, string directory)
{
var relative = context.GetRelativePath(directory);
if (string.IsNullOrEmpty(relative) || relative == ".")
{
return string.Empty;
}
return relative.Replace(Path.DirectorySeparatorChar, '/');
}
private static string BuildLocator(string relativeDirectory)
{
if (string.IsNullOrEmpty(relativeDirectory))
{
return "package.json";
}
return relativeDirectory + "/package.json";
}
private static bool ShouldSkipDirectory(string name)
{
if (name.Length == 0)
{
return true;
}
if (name[0] == '.')
{
return !string.Equals(name, ".pnpm", StringComparison.OrdinalIgnoreCase);
}
return IgnoredDirectories.Any(ignored => string.Equals(name, ignored, StringComparison.OrdinalIgnoreCase));
}
private static IReadOnlyList<string> ExtractWorkspaceTargets(string relativeDirectory, JsonElement root, NodeWorkspaceIndex workspaceIndex)
{
var dependencies = workspaceIndex.ResolveWorkspaceTargets(relativeDirectory, TryGetProperty(root, "dependencies"));
var devDependencies = workspaceIndex.ResolveWorkspaceTargets(relativeDirectory, TryGetProperty(root, "devDependencies"));
var peerDependencies = workspaceIndex.ResolveWorkspaceTargets(relativeDirectory, TryGetProperty(root, "peerDependencies"));
if (dependencies.Count == 0 && devDependencies.Count == 0 && peerDependencies.Count == 0)
{
return Array.Empty<string>();
}
var combined = new HashSet<string>(StringComparer.Ordinal);
foreach (var item in dependencies)
{
combined.Add(item);
}
foreach (var item in devDependencies)
{
combined.Add(item);
}
foreach (var item in peerDependencies)
{
combined.Add(item);
}
return combined.OrderBy(static x => x, StringComparer.Ordinal).ToArray();
}
private static JsonElement? TryGetProperty(JsonElement element, string propertyName)
=> element.TryGetProperty(propertyName, out var property) ? property : null;
private static IReadOnlyList<NodeLifecycleScript> ExtractLifecycleScripts(JsonElement root)
{
if (!root.TryGetProperty("scripts", out var scriptsElement) || scriptsElement.ValueKind != JsonValueKind.Object)
{
return Array.Empty<NodeLifecycleScript>();
}
var lifecycleScripts = new Dictionary<string, NodeLifecycleScript>(StringComparer.OrdinalIgnoreCase);
foreach (var script in scriptsElement.EnumerateObject())
{
if (!IsLifecycleScriptName(script.Name))
{
continue;
}
if (script.Value.ValueKind != JsonValueKind.String)
{
continue;
}
var command = script.Value.GetString();
if (string.IsNullOrWhiteSpace(command))
{
continue;
}
var canonicalName = script.Name.Trim().ToLowerInvariant();
var lifecycleScript = new NodeLifecycleScript(canonicalName, command);
if (!lifecycleScripts.ContainsKey(canonicalName))
{
NodeAnalyzerMetrics.RecordLifecycleScript(canonicalName);
}
lifecycleScripts[canonicalName] = lifecycleScript;
}
if (lifecycleScripts.Count == 0)
{
return Array.Empty<NodeLifecycleScript>();
}
return lifecycleScripts.Values
.OrderBy(static script => script.Name, StringComparer.Ordinal)
.ToArray();
}
private static bool IsLifecycleScriptName(string name)
=> name.Equals("preinstall", StringComparison.OrdinalIgnoreCase)
|| name.Equals("install", StringComparison.OrdinalIgnoreCase)
|| name.Equals("postinstall", StringComparison.OrdinalIgnoreCase);
}

View File

@@ -0,0 +1,278 @@
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal sealed class NodeWorkspaceIndex
{
private readonly string _rootPath;
private readonly HashSet<string> _workspacePaths;
private readonly Dictionary<string, string> _workspaceByName;
private NodeWorkspaceIndex(string rootPath, HashSet<string> workspacePaths, Dictionary<string, string> workspaceByName)
{
_rootPath = rootPath;
_workspacePaths = workspacePaths;
_workspaceByName = workspaceByName;
}
public static NodeWorkspaceIndex Create(string rootPath)
{
var normalizedRoot = Path.GetFullPath(rootPath);
var workspacePaths = new HashSet<string>(StringComparer.Ordinal);
var workspaceByName = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
var packageJsonPath = Path.Combine(normalizedRoot, "package.json");
if (!File.Exists(packageJsonPath))
{
return new NodeWorkspaceIndex(normalizedRoot, workspacePaths, workspaceByName);
}
try
{
using var stream = File.OpenRead(packageJsonPath);
using var document = JsonDocument.Parse(stream);
var root = document.RootElement;
if (!root.TryGetProperty("workspaces", out var workspacesElement))
{
return new NodeWorkspaceIndex(normalizedRoot, workspacePaths, workspaceByName);
}
var patterns = ExtractPatterns(workspacesElement);
foreach (var pattern in patterns)
{
foreach (var workspacePath in ExpandPattern(normalizedRoot, pattern))
{
if (string.IsNullOrWhiteSpace(workspacePath))
{
continue;
}
workspacePaths.Add(workspacePath);
var packagePath = Path.Combine(normalizedRoot, workspacePath.Replace('/', Path.DirectorySeparatorChar), "package.json");
if (!File.Exists(packagePath))
{
continue;
}
try
{
using var workspaceStream = File.OpenRead(packagePath);
using var workspaceDoc = JsonDocument.Parse(workspaceStream);
if (workspaceDoc.RootElement.TryGetProperty("name", out var nameElement))
{
var name = nameElement.GetString();
if (!string.IsNullOrWhiteSpace(name))
{
workspaceByName[name] = workspacePath!;
}
}
}
catch (IOException)
{
// Ignore unreadable workspace package definitions.
}
catch (JsonException)
{
// Ignore malformed workspace package definitions.
}
}
}
}
catch (IOException)
{
// If the root package.json is unreadable we treat as no workspaces.
}
catch (JsonException)
{
// Malformed root package.json: treat as no workspaces.
}
return new NodeWorkspaceIndex(normalizedRoot, workspacePaths, workspaceByName);
}
public IEnumerable<string> GetMembers()
=> _workspacePaths.OrderBy(static path => path, StringComparer.Ordinal);
public bool TryGetMember(string relativePath, out string normalizedPath)
{
if (string.IsNullOrEmpty(relativePath))
{
normalizedPath = string.Empty;
return false;
}
var normalized = NormalizeRelative(relativePath);
if (_workspacePaths.Contains(normalized))
{
normalizedPath = normalized;
return true;
}
normalizedPath = string.Empty;
return false;
}
public bool TryGetWorkspacePathByName(string packageName, out string? relativePath)
=> _workspaceByName.TryGetValue(packageName, out relativePath);
public IReadOnlyList<string> ResolveWorkspaceTargets(string relativeDirectory, JsonElement? dependencies)
{
if (dependencies is null || dependencies.Value.ValueKind != JsonValueKind.Object)
{
return Array.Empty<string>();
}
var result = new HashSet<string>(StringComparer.Ordinal);
foreach (var property in dependencies.Value.EnumerateObject())
{
var value = property.Value;
if (value.ValueKind != JsonValueKind.String)
{
continue;
}
var targetSpec = value.GetString();
if (string.IsNullOrWhiteSpace(targetSpec))
{
continue;
}
const string workspacePrefix = "workspace:";
if (!targetSpec.StartsWith(workspacePrefix, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var descriptor = targetSpec[workspacePrefix.Length..].Trim();
if (string.IsNullOrEmpty(descriptor) || descriptor is "*" or "^")
{
if (_workspaceByName.TryGetValue(property.Name, out var workspaceByName))
{
result.Add(workspaceByName);
}
continue;
}
if (TryResolveWorkspaceTarget(relativeDirectory, descriptor, out var resolved))
{
result.Add(resolved);
}
}
if (result.Count == 0)
{
return Array.Empty<string>();
}
return result.OrderBy(static x => x, StringComparer.Ordinal).ToArray();
}
public bool TryResolveWorkspaceTarget(string relativeDirectory, string descriptor, out string normalized)
{
normalized = string.Empty;
var baseDirectory = string.IsNullOrEmpty(relativeDirectory) ? string.Empty : relativeDirectory;
var baseAbsolute = Path.GetFullPath(Path.Combine(_rootPath, baseDirectory));
var candidate = Path.GetFullPath(Path.Combine(baseAbsolute, descriptor.Replace('/', Path.DirectorySeparatorChar)));
if (!IsUnderRoot(_rootPath, candidate))
{
return false;
}
var relative = NormalizeRelative(Path.GetRelativePath(_rootPath, candidate));
if (_workspacePaths.Contains(relative))
{
normalized = relative;
return true;
}
return false;
}
private static IEnumerable<string> ExtractPatterns(JsonElement workspacesElement)
{
if (workspacesElement.ValueKind == JsonValueKind.Array)
{
foreach (var item in workspacesElement.EnumerateArray())
{
if (item.ValueKind == JsonValueKind.String)
{
var value = item.GetString();
if (!string.IsNullOrWhiteSpace(value))
{
yield return value.Trim();
}
}
}
}
else if (workspacesElement.ValueKind == JsonValueKind.Object)
{
if (workspacesElement.TryGetProperty("packages", out var packagesElement) && packagesElement.ValueKind == JsonValueKind.Array)
{
foreach (var pattern in ExtractPatterns(packagesElement))
{
yield return pattern;
}
}
}
}
private static IEnumerable<string> ExpandPattern(string rootPath, string pattern)
{
var cleanedPattern = pattern.Replace('\\', '/').Trim();
if (cleanedPattern.EndsWith("/*", StringComparison.Ordinal))
{
var baseSegment = cleanedPattern[..^2];
var baseAbsolute = CombineAndNormalize(rootPath, baseSegment);
if (baseAbsolute is null || !Directory.Exists(baseAbsolute))
{
yield break;
}
foreach (var directory in Directory.EnumerateDirectories(baseAbsolute))
{
var normalized = NormalizeRelative(Path.GetRelativePath(rootPath, directory));
yield return normalized;
}
}
else
{
var absolute = CombineAndNormalize(rootPath, cleanedPattern);
if (absolute is null || !Directory.Exists(absolute))
{
yield break;
}
var normalized = NormalizeRelative(Path.GetRelativePath(rootPath, absolute));
yield return normalized;
}
}
private static string? CombineAndNormalize(string rootPath, string relative)
{
var candidate = Path.GetFullPath(Path.Combine(rootPath, relative.Replace('/', Path.DirectorySeparatorChar)));
return IsUnderRoot(rootPath, candidate) ? candidate : null;
}
private static string NormalizeRelative(string relativePath)
{
if (string.IsNullOrEmpty(relativePath) || relativePath == ".")
{
return string.Empty;
}
var normalized = relativePath.Replace('\\', '/');
normalized = normalized.TrimStart('.', '/');
return normalized;
}
private static bool IsUnderRoot(string rootPath, string absolutePath)
{
if (OperatingSystem.IsWindows())
{
return absolutePath.StartsWith(rootPath, StringComparison.OrdinalIgnoreCase);
}
return absolutePath.StartsWith(rootPath, StringComparison.Ordinal);
}
}

View File

@@ -0,0 +1,18 @@
using System;
using StellaOps.Scanner.Analyzers.Lang;
using StellaOps.Scanner.Analyzers.Lang.Plugin;
namespace StellaOps.Scanner.Analyzers.Lang.Node;
public sealed class NodeAnalyzerPlugin : ILanguageAnalyzerPlugin
{
public string Name => "StellaOps.Scanner.Analyzers.Lang.Node";
public bool IsAvailable(IServiceProvider services) => services is not null;
public ILanguageAnalyzer CreateAnalyzer(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return new NodeLanguageAnalyzer();
}
}

View File

@@ -0,0 +1,37 @@
using StellaOps.Scanner.Analyzers.Lang.Node.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Node;
public sealed class NodeLanguageAnalyzer : ILanguageAnalyzer
{
public string Id => "node";
public string DisplayName => "Node.js Analyzer";
public async ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
var lockData = await NodeLockData.LoadAsync(context.RootPath, cancellationToken).ConfigureAwait(false);
var packages = NodePackageCollector.CollectPackages(context, lockData, cancellationToken);
foreach (var package in packages.OrderBy(static p => p.ComponentKey, StringComparer.Ordinal))
{
cancellationToken.ThrowIfCancellationRequested();
var metadata = package.CreateMetadata();
var evidence = package.CreateEvidence();
writer.AddFromPurl(
analyzerId: Id,
purl: package.Purl,
name: package.Name,
version: package.Version,
type: "npm",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: package.IsUsedByEntrypoint);
}
}
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
<None Include="**\\*" Exclude="**\\*.cs;**\\*.json;bin\\**;obj\\**" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Analyzers.Lang\StellaOps.Scanner.Analyzers.Lang.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,31 @@
# Node Analyzer Task Flow
| Seq | ID | Status | Depends on | Description | Exit Criteria |
|-----|----|--------|------------|-------------|---------------|
| 1 | SCANNER-ANALYZERS-LANG-10-302A | DONE (2025-10-19) | SCANNER-ANALYZERS-LANG-10-307 | Build deterministic module graph walker covering npm, Yarn, and PNPM; capture package.json provenance and integrity metadata. | Walker indexes >100k modules in <1.5s (hot cache); golden fixtures verify deterministic ordering and path normalization. |
| 2 | SCANNER-ANALYZERS-LANG-10-302B | DONE (2025-10-19) | SCANNER-ANALYZERS-LANG-10-302A | Resolve workspaces/symlinks and attribute components to originating package with usage hints; guard against directory traversal. | Workspace attribution accurate on multi-workspace fixture; symlink resolver proves canonical path; security tests ensure no traversal. |
| 3 | SCANNER-ANALYZERS-LANG-10-302C | DONE (2025-10-19) | SCANNER-ANALYZERS-LANG-10-302B | Surface script metadata (postinstall/preinstall) and policy hints; emit telemetry counters and evidence records. | Analyzer output includes script metadata + evidence; metrics `scanner_analyzer_node_scripts_total` recorded; policy hints documented. |
| 4 | SCANNER-ANALYZERS-LANG-10-307N | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-302C | Integrate shared helpers for license/licence evidence, canonical JSON serialization, and usage flag propagation. | Reuse shared helpers without duplication; unit tests confirm stable metadata merge; no analyzer-specific serializer drift. |
| 5 | SCANNER-ANALYZERS-LANG-10-308N | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-307N | Author determinism harness + fixtures for Node analyzer; add benchmark suite. | Fixtures committed under `Fixtures/lang/node/`; determinism CI job compares JSON snapshots; benchmark CSV published. |
| 6 | SCANNER-ANALYZERS-LANG-10-309N | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-308N | Package Node analyzer as restart-time plug-in (manifest, DI registration, Offline Kit notes). | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer after restart; Offline Kit docs updated. |
## Node Entry-Point Analyzer (Sprint 41)
> **Imposed rule:** work of this type or tasks of this type on this component — and everywhere else it should be applied.
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCANNER-ANALYZERS-NODE-22-001 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-LANG-10-309N | Build input normalizer + VFS for Node projects: dirs, tgz, container layers, pnpm store, Yarn PnP zips; detect Node version targets (`.nvmrc`, `.node-version`, Dockerfile) and workspace roots deterministically. | Normalizer handles fixtures (npm, pnpm, Yarn classic/PnP, container) without extraction; records node_version, workspace list, and symlink mode with golden outputs. |
| SCANNER-ANALYZERS-NODE-22-002 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-001 | Implement entrypoint discovery (bin/main/module/exports/imports, workers, electron, shebang scripts) and condition set builder per entrypoint. | Entrypoint inventory generated for fixtures (library, CLI, electron, worker); each entrypoint includes kind, start file, conditions; determinism harness updated. |
| SCANNER-ANALYZERS-NODE-22-003 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-001 | Parse JS/TS sources for static `import`, `require`, `import()` and string concat cases; flag dynamic patterns with confidence levels; support source map de-bundling. | Static edges + dynamic-specifier warnings emitted for fixtures (ESM, CJS, bundles); source map fixture rewrites concatenated modules with `confidence` metadata. |
| SCANNER-ANALYZERS-NODE-22-004 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-002 | Implement Node resolver engine for CJS + ESM (core modules, exports/imports maps, conditions, extension priorities, self-references) parameterised by node_version. | Resolver reproduces Node 18/20 semantics across fixture matrix; includes explain trace per edge; unit tests cover exports/conditions/extension ordering. |
| SCANNER-ANALYZERS-NODE-22-005 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-004 | Add package manager adapters: Yarn PnP (.pnp.data/.pnp.cjs), pnpm virtual store, npm/Yarn classic hoists; operate entirely in virtual FS. | PnP fixture resolves via `.pnp.data.json`/cache zips; pnpm fixture follows `.pnpm` symlinks; classic hoist fixture maintains first-wins ordering; warnings emitted for unreadable PnP. |
| SCANNER-ANALYZERS-NODE-22-006 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-004 | Detect bundles + source maps, reconstruct module specifiers, and correlate to original paths; support dual CJS/ESM graphs with conditions. | Bundle fixture using source maps produces `bundle-map` edges with medium confidence and original source paths; dual package fixture yields separate import/require graphs. |
| SCANNER-ANALYZERS-NODE-22-007 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-002 | Scan for native addons (.node), WASM modules, and core capability signals (child_process, vm, worker_threads); emit hint edges and native metadata. | Fixtures with native addon/WASM produce `native-addon`/`wasm` edges plus capability hints; metadata captures ABI, N-API, OS/arch where available. |
## Node Observation & Runtime (Sprint 42)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCANNER-ANALYZERS-NODE-22-008 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-004 | Produce AOC-compliant observations: entrypoints, components (pkg/native/wasm), edges (esm-import, cjs-require, exports, json, native-addon, wasm, worker) with reason codes/confidence and resolver traces. | Observation JSON for fixtures deterministic; edges include reason codes + confidence; resolves attach trace data; passes AOC lint. |
| SCANNER-ANALYZERS-NODE-22-009 | TODO | Node Analyzer Guild, QA Guild | SCANNER-ANALYZERS-NODE-22-008 | Author fixture suite + performance benchmarks (npm, pnpm, PnP, bundle, electron, worker) with golden outputs and latency budgets. | Fixtures stored under `fixtures/lang/node/ep`; determinism + perf (<350ms npm, <900ms PnP) enforced via CI benchmarks. |
| SCANNER-ANALYZERS-NODE-22-010 | TODO | Node Analyzer Guild, Signals Guild | SCANNER-ANALYZERS-NODE-22-008 | Implement optional runtime evidence hooks (ESM loader, CJS require hook) with path scrubbing and loader ID hashing; emit runtime-* edges. | Sandbox harness records runtime resolve events for sample app; paths hashed; runtime edges merge with static graph without altering first-wins selection. |
| SCANNER-ANALYZERS-NODE-22-011 | TODO | Node Analyzer Guild, DevOps Guild | SCANNER-ANALYZERS-NODE-22-008 | Package updated analyzer as restart-time plug-in, expose Scanner CLI (`stella node *`) commands, refresh Offline Kit documentation. | Plugins folder updated; worker loads analyzer; CLI commands documented and smoke-tested; Offline Kit instructions include Node analyzer usage. |
| SCANNER-ANALYZERS-NODE-22-012 | TODO | Node Analyzer Guild | SCANNER-ANALYZERS-NODE-22-001 | Integrate container filesystem adapter (OCI layers, Dockerfile hints) and record NODE_OPTIONS/env warnings. | Container fixture parsed: Node base image + NODE_OPTIONS captured; entrypoints resolved relative to image root; warnings emitted for loader flags. |

View File

@@ -0,0 +1,22 @@
{
"schemaVersion": "1.0",
"id": "stellaops.analyzer.lang.node",
"displayName": "StellaOps Node.js Analyzer",
"version": "0.1.0",
"requiresRestart": true,
"entryPoint": {
"type": "dotnet",
"assembly": "StellaOps.Scanner.Analyzers.Lang.Node.dll",
"typeName": "StellaOps.Scanner.Analyzers.Lang.Node.NodeAnalyzerPlugin"
},
"capabilities": [
"language-analyzer",
"node",
"npm"
],
"metadata": {
"org.stellaops.analyzer.language": "node",
"org.stellaops.analyzer.kind": "language",
"org.stellaops.restart.required": "true"
}
}

View File

@@ -0,0 +1,32 @@
# StellaOps.Scanner.Analyzers.Lang.Python — Agent Charter
## Role
Implement the Python analyzer plug-in that inspects installed distributions, RECORD hashes, entry points, and editable installs to feed Scanner SBOM views.
## Scope
- Parse `*.dist-info` and `*.data` directories, validating `METADATA`, `RECORD`, and `entry_points.txt`.
- Detect editable installs and pip caches, reconciling metadata with actual files.
- Integrate EntryTrace usage hints for runtime entry points and flag missing RECORD hashes.
- Package plug-in manifest and ensure deterministic fixtures + benchmarks.
## Out of Scope
- Language analyzers for other ecosystems.
- Policy evaluation, vulnerability correlation, or packaging into UI flows.
- Building Python interpreters or executing scripts (analysis is static only).
## Expectations
- Deterministic RECORD hashing with streaming IO; fallback heuristics clearly flagged.
- Performance target: ≥75MB/s RECORD verification, end-to-end fixture <2.0s.
- Offline-first: no PyPI calls; relies on local metadata only.
- Rich telemetry (components counted, hash mismatches) following Scanner metrics schema.
- Keep `TASKS.md` and `SPRINTS_LANG_IMPLEMENTATION_PLAN.md` in sync.
## Dependencies
- Shared language analyzer infrastructure.
- EntryTrace usage hints (for script activation).
- Worker dispatcher for plug-in loading.
## Testing & Artifacts
- Golden fixtures for venv, virtualenv, pipx, and editable installs.
- Benchmark results comparing hash-check throughput against competitor tools.
- Offline Kit guidance for bundling standard library metadata if required.

View File

@@ -0,0 +1,8 @@
global using System;
global using System.Collections.Generic;
global using System.IO;
global using System.Linq;
global using System.Threading;
global using System.Threading.Tasks;
global using StellaOps.Scanner.Analyzers.Lang;

View File

@@ -0,0 +1,17 @@
using System;
using StellaOps.Scanner.Analyzers.Lang.Plugin;
namespace StellaOps.Scanner.Analyzers.Lang.Python;
public sealed class PythonAnalyzerPlugin : ILanguageAnalyzerPlugin
{
public string Name => "StellaOps.Scanner.Analyzers.Lang.Python";
public bool IsAvailable(IServiceProvider services) => services is not null;
public ILanguageAnalyzer CreateAnalyzer(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return new PythonLanguageAnalyzer();
}
}

View File

@@ -0,0 +1,72 @@
using System.Text.Json;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Python;
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
{
private static readonly EnumerationOptions Enumeration = new()
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
public string Id => "python";
public string DisplayName => "Python Analyzer";
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
return AnalyzeInternalAsync(context, writer, cancellationToken);
}
private static async ValueTask AnalyzeInternalAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
var distInfoDirectories = Directory
.EnumerateDirectories(context.RootPath, "*.dist-info", Enumeration)
.OrderBy(static path => path, StringComparer.Ordinal)
.ToArray();
foreach (var distInfoPath in distInfoDirectories)
{
cancellationToken.ThrowIfCancellationRequested();
PythonDistribution? distribution;
try
{
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
continue;
}
catch (JsonException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
if (distribution is null)
{
continue;
}
writer.AddFromPurl(
analyzerId: "python",
purl: distribution.Purl,
name: distribution.Name,
version: distribution.Version,
type: "pypi",
metadata: distribution.SortedMetadata,
evidence: distribution.SortedEvidence,
usedByEntrypoint: distribution.UsedByEntrypoint);
}
}
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
<None Include="**\\*" Exclude="**\\*.cs;**\\*.json;bin\\**;obj\\**" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Analyzers.Lang\StellaOps.Scanner.Analyzers.Lang.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,31 @@
# Python Analyzer Task Flow
| Seq | ID | Status | Depends on | Description | Exit Criteria |
|-----|----|--------|------------|-------------|---------------|
| 1 | SCANNER-ANALYZERS-LANG-10-303A | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-307 | STREAM-based parser for `*.dist-info` (`METADATA`, `WHEEL`, `entry_points.txt`) with normalization + evidence capture. | Parser handles CPython 3.83.12 metadata variations; fixtures confirm canonical ordering and UTF-8 handling. |
| 2 | SCANNER-ANALYZERS-LANG-10-303B | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-303A | RECORD hash verifier with chunked hashing, Zip64 support, and mismatch diagnostics. | Verifier processes 5GB RECORD fixture without allocations >2MB; mismatches produce deterministic evidence records. |
| 3 | SCANNER-ANALYZERS-LANG-10-303C | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-303B | Editable install + pip cache detection; integrate EntryTrace hints for runtime usage flags. | Editable installs resolved to source path; usage flags propagated; regression tests cover mixed editable + wheel installs. |
| 4 | SCANNER-ANALYZERS-LANG-10-307P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-303C | Shared helper integration (license metadata, quiet provenance, component merging). | Shared helpers reused; analyzer-specific metadata minimal; deterministic merge tests pass. |
| 5 | SCANNER-ANALYZERS-LANG-10-308P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-307P | Golden fixtures + determinism harness for Python analyzer; add benchmark and hash throughput reporting. | Fixtures under `Fixtures/lang/python/`; determinism CI guard; benchmark CSV added with threshold alerts. |
| 6 | SCANNER-ANALYZERS-LANG-10-309P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-308P | Package plug-in (manifest, DI registration) and document Offline Kit bundling of Python stdlib metadata if needed. | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer; Offline Kit doc updated. |
## Python Entry-Point Analyzer (Sprint 43)
> **Imposed rule:** work of this type or tasks of this type on this component — and everywhere else it should be applied.
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCANNER-ANALYZERS-PYTHON-23-001 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-LANG-10-309P | Build input normalizer & virtual filesystem for wheels, sdists, editable installs, zipapps, site-packages trees, and container roots. Detect Python version targets (`pyproject.toml`, `runtime.txt`, Dockerfile) + virtualenv layout deterministically. | Normalizer ingests fixtures (venv, wheel, sdist, zipapp, container layer) without extraction; records python_version, root metadata, and namespace resolution hints; determinism harness updated. |
| SCANNER-ANALYZERS-PYTHON-23-002 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-001 | Entrypoint discovery: module `__main__`, console_scripts entry points, `scripts`, zipapp main, `manage.py`/gunicorn/celery patterns. Capture invocation context (module vs package, argv wrappers). | Fixtures produce entrypoint list with kind (console, module, package, zipapp, framework) and deterministic ordering; warnings for missing targets recorded. |
| SCANNER-ANALYZERS-PYTHON-23-003 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-001 | Static import graph builder using AST and bytecode fallback. Support `import`, `from ... import`, relative imports, `importlib.import_module`, `__import__` with literal args, `pkgutil.extend_path`. | AST scanner emits edges for explicit imports; literal importlib calls covered; unresolved/dynamic patterns yield `dynamic-import` warnings with candidate prefixes; regression fixtures pass. |
| SCANNER-ANALYZERS-PYTHON-23-004 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-003 | Python resolver engine (importlib semantics) handling namespace packages (PEP 420), package discovery order, `.pth` files, `sys.path` composition, zipimport, and site-packages precedence across virtualenv/container roots. | Resolver reproduces importlib behaviour on fixture matrix (namespace pkg, zipimport, multi-site-dir); includes explain traces; determinism tests for path ordering succeed. |
| SCANNER-ANALYZERS-PYTHON-23-005 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-004 | Packaging adapters: pip editable (`.egg-link`), Poetry/Flit layout, Conda prefix, `.dist-info/RECORD` cross-check, container layer overlays. | Adapters resolve editable links, conda pkgs, layered site-packages; edges capture provider path + metadata; warnings emitted for missing RECORD entries. |
| SCANNER-ANALYZERS-PYTHON-23-006 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-003 | Detect native extensions (`*.so`, `*.pyd`), CFFI modules, ctypes loaders, embedded WASM, and runtime capability signals (subprocess, multiprocessing, ctypes, eval). | Fixtures with native/CFFI/ctypes emit `native-extension`, `cffi`, `ctypes` hints; capability flags recorded; metadata captures ABI/platform info. |
| SCANNER-ANALYZERS-PYTHON-23-007 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-002 | Framework/config heuristics: Django, Flask, FastAPI, Celery, AWS Lambda handlers, Gunicorn, Click/Typer CLIs, logging configs, pyproject optional dependencies. Tagged as hints only. | Framework fixtures produce hint records with source files (settings.py, pyproject extras, celery app); no resolver impact; determinism maintained. |
## Python Observation & Runtime (Sprint 44)
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCANNER-ANALYZERS-PYTHON-23-008 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-004 | Produce AOC-compliant observations: entrypoints, components (modules/packages/native), edges (import, namespace, dynamic-hint, native-extension) with reason codes/confidence and resolver traces. | Observation JSON for fixtures deterministic; includes explain trace per edge and namespace resolution metadata; passes AOC compliance lint. |
| SCANNER-ANALYZERS-PYTHON-23-009 | TODO | Python Analyzer Guild, QA Guild | SCANNER-ANALYZERS-PYTHON-23-008 | Fixture suite + perf benchmarks covering virtualenv, namespace packages, zipapp, editable installs, containers, lambda handler. | Fixture set committed under `fixtures/lang/python/ep`; determinism CI and perf (<250ms medium project) gates enabled. |
| SCANNER-ANALYZERS-PYTHON-23-010 | TODO | Python Analyzer Guild, Signals Guild | SCANNER-ANALYZERS-PYTHON-23-008 | Optional runtime evidence: import hook capturing module load events with path scrubbing, optional bytecode instrumentation for `importlib` hooks, multiprocessing tracer. | Runtime harness records module loads for sample app; paths hashed; runtime edges merge without altering resolver precedence; privacy doc updated. |
| SCANNER-ANALYZERS-PYTHON-23-011 | TODO | Python Analyzer Guild, DevOps Guild | SCANNER-ANALYZERS-PYTHON-23-008 | Package analyzer plug-in, add CLI commands (`stella python inspect|resolve|trace`), update Offline Kit guidance. | Plugin manifest deployed; CLI commands documented & smoke tested; Offline Kit instructions cover Python analyzer usage; worker restart verified. |
| SCANNER-ANALYZERS-PYTHON-23-012 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-001 | Container/zipapp adapter enhancements: parse OCI layers for Python runtime, detect `PYTHONPATH`/`PYTHONHOME` env, record warnings for sitecustomize/startup hooks. | Container fixtures output runtime metadata (python binary, env vars) and warnings for startup hooks; zipapp fixture resolves internal modules; determinism retained. |

View File

@@ -0,0 +1,23 @@
{
"schemaVersion": "1.0",
"id": "stellaops.analyzer.lang.python",
"displayName": "StellaOps Python Analyzer (preview)",
"version": "0.1.0",
"requiresRestart": true,
"entryPoint": {
"type": "dotnet",
"assembly": "StellaOps.Scanner.Analyzers.Lang.Python.dll",
"typeName": "StellaOps.Scanner.Analyzers.Lang.Python.PythonAnalyzerPlugin"
},
"capabilities": [
"language-analyzer",
"python",
"pypi"
],
"metadata": {
"org.stellaops.analyzer.language": "python",
"org.stellaops.analyzer.kind": "language",
"org.stellaops.restart.required": "true",
"org.stellaops.analyzer.status": "preview"
}
}

View File

@@ -0,0 +1,29 @@
# StellaOps.Scanner.Analyzers.Lang.Rust — Agent Charter
## Role
Develop the Rust analyzer plug-in that resolves crates from metadata (`.fingerprint`, Cargo.lock, embedded markers) and provides deterministic fallbacks for stripped binaries.
## Scope
- Locate Cargo metadata in container layers (registry cache, target fingerprints, embedded Git info).
- Parse symbol tables / section data to heuristically identify crates when metadata missing, tagging provenance appropriately.
- Integrate binary hash fallback with quiet provenance classification.
- Package plug-in manifest, determinism fixtures, and performance/coverage benchmarks.
## Out of Scope
- Native linker analysis beyond crate attribution.
- Fetching Cargo registry metadata from the network.
- Policy decisions or UI surfacing.
## Expectations
- Accurate crate attribution (≥85% on curated fixtures) with explicit heuristic labeling.
- Analyzer runtime <1s over 500 binary corpus; minimal allocations through pooling.
- Offline-first; rely on local Cargo data.
- Telemetry capturing heuristic vs verified evidence ratios.
## Dependencies
- Shared language analyzer infrastructure; Worker dispatcher; optionally EntryTrace hints for runtime coverage.
## Testing & Artifacts
- Fixtures for cargo workspaces, release builds, stripped binaries, vendor caches.
- Determinism + benchmark artifacts comparing to competitor scanners.
- ADR documenting heuristic boundaries + risk mitigations.

View File

@@ -0,0 +1,7 @@
global using System;
global using System.Collections.Generic;
global using System.IO;
global using System.Threading;
global using System.Threading.Tasks;
global using StellaOps.Scanner.Analyzers.Lang;

View File

@@ -0,0 +1,650 @@
using System.Collections.Immutable;
using System.Linq;
using System.Security.Cryptography;
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustAnalyzerCollector
{
public static RustAnalyzerCollection Collect(LanguageAnalyzerContext context, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
var collector = new Collector(context);
collector.Execute(cancellationToken);
return collector.Build();
}
private sealed class Collector
{
private static readonly EnumerationOptions LockEnumeration = new()
{
MatchCasing = MatchCasing.CaseSensitive,
IgnoreInaccessible = true,
RecurseSubdirectories = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
};
private readonly LanguageAnalyzerContext _context;
private readonly Dictionary<RustCrateKey, RustCrateBuilder> _crates = new();
private readonly Dictionary<string, List<RustCrateBuilder>> _cratesByName = new(StringComparer.Ordinal);
private readonly Dictionary<string, RustHeuristicBuilder> _heuristics = new(StringComparer.Ordinal);
private readonly Dictionary<string, RustBinaryRecord> _binaries = new(StringComparer.Ordinal);
public Collector(LanguageAnalyzerContext context)
{
_context = context;
}
public void Execute(CancellationToken cancellationToken)
{
CollectCargoLocks(cancellationToken);
CollectFingerprints(cancellationToken);
CollectBinaries(cancellationToken);
}
public RustAnalyzerCollection Build()
{
var crateRecords = _crates.Values
.Select(static builder => builder.Build())
.OrderBy(static record => record.ComponentKey, StringComparer.Ordinal)
.ToImmutableArray();
var heuristicRecords = _heuristics.Values
.Select(static builder => builder.Build())
.OrderBy(static record => record.ComponentKey, StringComparer.Ordinal)
.ToImmutableArray();
var fallbackRecords = _binaries.Values
.Where(static record => !record.HasMatches)
.Select(BuildFallback)
.OrderBy(static record => record.ComponentKey, StringComparer.Ordinal)
.ToImmutableArray();
return new RustAnalyzerCollection(crateRecords, heuristicRecords, fallbackRecords);
}
private void CollectCargoLocks(CancellationToken cancellationToken)
{
foreach (var lockPath in Directory.EnumerateFiles(_context.RootPath, "Cargo.lock", LockEnumeration))
{
cancellationToken.ThrowIfCancellationRequested();
var packages = RustCargoLockParser.Parse(lockPath, cancellationToken);
if (packages.Count == 0)
{
continue;
}
var relativePath = NormalizeRelative(_context.GetRelativePath(lockPath));
foreach (var package in packages)
{
var builder = GetOrCreateCrate(package.Name, package.Version);
builder.ApplyCargoPackage(package, relativePath);
}
}
}
private void CollectFingerprints(CancellationToken cancellationToken)
{
var records = RustFingerprintScanner.Scan(_context.RootPath, cancellationToken);
foreach (var record in records)
{
cancellationToken.ThrowIfCancellationRequested();
var builder = GetOrCreateCrate(record.Name, record.Version);
var relative = NormalizeRelative(_context.GetRelativePath(record.AbsolutePath));
builder.ApplyFingerprint(record, relative);
}
}
private void CollectBinaries(CancellationToken cancellationToken)
{
var binaries = RustBinaryClassifier.Scan(_context.RootPath, cancellationToken);
foreach (var binary in binaries)
{
cancellationToken.ThrowIfCancellationRequested();
var relative = NormalizeRelative(_context.GetRelativePath(binary.AbsolutePath));
var usage = _context.UsageHints.IsPathUsed(binary.AbsolutePath);
var hash = binary.ComputeSha256();
if (!_binaries.TryGetValue(relative, out var record))
{
record = new RustBinaryRecord(binary.AbsolutePath, relative, usage, hash);
_binaries[relative] = record;
}
else
{
record.MergeUsage(usage);
record.EnsureHash(hash);
}
if (binary.CrateCandidates.IsDefaultOrEmpty || binary.CrateCandidates.Length == 0)
{
continue;
}
foreach (var candidate in binary.CrateCandidates)
{
if (string.IsNullOrWhiteSpace(candidate))
{
continue;
}
var crateBuilder = FindCrateByName(candidate);
if (crateBuilder is not null)
{
crateBuilder.AddBinaryEvidence(relative, record.Hash, usage);
record.MarkCrateMatch();
continue;
}
var heuristic = GetOrCreateHeuristic(candidate);
heuristic.AddBinary(relative, record.Hash, usage);
record.MarkHeuristicMatch();
}
}
}
private RustCrateBuilder GetOrCreateCrate(string name, string? version)
{
var key = new RustCrateKey(name, version);
if (_crates.TryGetValue(key, out var existing))
{
existing.EnsureVersion(version);
return existing;
}
var builder = new RustCrateBuilder(name, version);
_crates[key] = builder;
if (!_cratesByName.TryGetValue(builder.Name, out var list))
{
list = new List<RustCrateBuilder>();
_cratesByName[builder.Name] = list;
}
list.Add(builder);
return builder;
}
private RustCrateBuilder? FindCrateByName(string candidate)
{
var normalized = RustCrateBuilder.NormalizeName(candidate);
if (!_cratesByName.TryGetValue(normalized, out var builders) || builders.Count == 0)
{
return null;
}
return builders
.OrderBy(static builder => builder.Version ?? string.Empty, StringComparer.Ordinal)
.FirstOrDefault();
}
private RustHeuristicBuilder GetOrCreateHeuristic(string crateName)
{
var normalized = RustCrateBuilder.NormalizeName(crateName);
if (_heuristics.TryGetValue(normalized, out var existing))
{
return existing;
}
var builder = new RustHeuristicBuilder(normalized);
_heuristics[normalized] = builder;
return builder;
}
private RustComponentRecord BuildFallback(RustBinaryRecord record)
{
var metadata = new List<KeyValuePair<string, string?>>
{
new("binary.path", record.RelativePath),
new("provenance", "binary"),
};
if (!string.IsNullOrEmpty(record.Hash))
{
metadata.Add(new KeyValuePair<string, string?>("binary.sha256", record.Hash));
}
metadata.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
var evidence = new List<LanguageComponentEvidence>
{
new(
LanguageEvidenceKind.File,
"binary",
record.RelativePath,
null,
string.IsNullOrEmpty(record.Hash) ? null : record.Hash)
};
var componentName = Path.GetFileName(record.RelativePath);
if (string.IsNullOrWhiteSpace(componentName))
{
componentName = "binary";
}
var key = string.IsNullOrEmpty(record.Hash)
? $"bin::{record.RelativePath}"
: $"bin::sha256:{record.Hash}";
return new RustComponentRecord(
Name: componentName,
Version: null,
Type: "bin",
Purl: null,
ComponentKey: key,
Metadata: metadata,
Evidence: evidence,
UsedByEntrypoint: record.UsedByEntrypoint);
}
private static string NormalizeRelative(string relativePath)
{
if (string.IsNullOrWhiteSpace(relativePath) || relativePath == ".")
{
return ".";
}
return relativePath.Replace('\\', '/');
}
}
}
internal sealed record RustAnalyzerCollection(
ImmutableArray<RustComponentRecord> Crates,
ImmutableArray<RustComponentRecord> Heuristics,
ImmutableArray<RustComponentRecord> Fallbacks);
internal sealed record RustComponentRecord(
string Name,
string? Version,
string Type,
string? Purl,
string ComponentKey,
IReadOnlyList<KeyValuePair<string, string?>> Metadata,
IReadOnlyCollection<LanguageComponentEvidence> Evidence,
bool UsedByEntrypoint);
internal sealed class RustCrateBuilder
{
private readonly SortedDictionary<string, string?> _metadata = new(StringComparer.Ordinal);
private readonly HashSet<LanguageComponentEvidence> _evidence = new(new LanguageComponentEvidenceComparer());
private readonly SortedSet<string> _binaryPaths = new(StringComparer.Ordinal);
private readonly SortedSet<string> _binaryHashes = new(StringComparer.Ordinal);
private string? _version;
private string? _source;
private string? _checksum;
private bool _usedByEntrypoint;
public RustCrateBuilder(string name, string? version)
{
Name = NormalizeName(name);
EnsureVersion(version);
}
public string Name { get; }
public string? Version => _version;
public static string NormalizeName(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
return value.Trim();
}
public void EnsureVersion(string? version)
{
if (string.IsNullOrWhiteSpace(version))
{
return;
}
_version ??= version.Trim();
}
public void ApplyCargoPackage(RustCargoPackage package, string relativePath)
{
EnsureVersion(package.Version);
if (!string.IsNullOrWhiteSpace(package.Source))
{
_source ??= package.Source.Trim();
_metadata["source"] = _source;
}
if (!string.IsNullOrWhiteSpace(package.Checksum))
{
_checksum ??= package.Checksum.Trim();
_metadata["checksum"] = _checksum;
}
_metadata["cargo.lock.path"] = relativePath;
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"cargo.lock",
relativePath,
$"{package.Name} {package.Version}",
string.IsNullOrWhiteSpace(package.Checksum) ? null : package.Checksum));
}
public void ApplyFingerprint(RustFingerprintRecord record, string relativePath)
{
EnsureVersion(record.Version);
if (!string.IsNullOrWhiteSpace(record.Source))
{
_source ??= record.Source.Trim();
_metadata["source"] = _source;
}
AddMetadataIfEmpty("fingerprint.profile", record.Profile);
AddMetadataIfEmpty("fingerprint.targetKind", record.TargetKind);
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"cargo.fingerprint",
relativePath,
record.TargetKind ?? record.Profile ?? "fingerprint",
null));
}
public void AddBinaryEvidence(string relativePath, string? hash, bool usedByEntrypoint)
{
if (!string.IsNullOrWhiteSpace(relativePath))
{
_binaryPaths.Add(relativePath);
}
if (!string.IsNullOrWhiteSpace(hash))
{
_binaryHashes.Add(hash);
}
if (usedByEntrypoint)
{
_usedByEntrypoint = true;
}
if (!string.IsNullOrWhiteSpace(relativePath))
{
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"binary",
relativePath,
null,
string.IsNullOrWhiteSpace(hash) ? null : hash));
}
}
public RustComponentRecord Build()
{
if (_binaryPaths.Count > 0)
{
_metadata["binary.paths"] = string.Join(';', _binaryPaths);
}
if (_binaryHashes.Count > 0)
{
_metadata["binary.sha256"] = string.Join(';', _binaryHashes);
}
var metadata = _metadata
.Select(static pair => new KeyValuePair<string, string?>(pair.Key, pair.Value))
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToList();
var evidence = _evidence
.OrderBy(static item => item.ComparisonKey, StringComparer.Ordinal)
.ToImmutableArray();
var purl = BuildPurl(Name, _version);
var componentKey = string.IsNullOrEmpty(purl)
? $"cargo::{Name}::{_version ?? "unknown"}"
: $"purl::{purl}";
return new RustComponentRecord(
Name: Name,
Version: _version,
Type: "cargo",
Purl: purl,
ComponentKey: componentKey,
Metadata: metadata,
Evidence: evidence,
UsedByEntrypoint: _usedByEntrypoint);
}
private void AddMetadataIfEmpty(string key, string? value)
{
if (string.IsNullOrWhiteSpace(key) || string.IsNullOrWhiteSpace(value))
{
return;
}
if (_metadata.ContainsKey(key))
{
return;
}
_metadata[key] = value.Trim();
}
private static string? BuildPurl(string name, string? version)
{
if (string.IsNullOrWhiteSpace(name))
{
return null;
}
var escapedName = Uri.EscapeDataString(name.Trim());
if (string.IsNullOrWhiteSpace(version))
{
return $"pkg:cargo/{escapedName}";
}
var escapedVersion = Uri.EscapeDataString(version.Trim());
return $"pkg:cargo/{escapedName}@{escapedVersion}";
}
}
internal sealed class RustHeuristicBuilder
{
private readonly HashSet<LanguageComponentEvidence> _evidence = new(new LanguageComponentEvidenceComparer());
private readonly SortedSet<string> _binaryPaths = new(StringComparer.Ordinal);
private readonly SortedSet<string> _binaryHashes = new(StringComparer.Ordinal);
private bool _usedByEntrypoint;
public RustHeuristicBuilder(string crateName)
{
CrateName = RustCrateBuilder.NormalizeName(crateName);
}
public string CrateName { get; }
public void AddBinary(string relativePath, string? hash, bool usedByEntrypoint)
{
if (!string.IsNullOrWhiteSpace(relativePath))
{
_binaryPaths.Add(relativePath);
}
if (!string.IsNullOrWhiteSpace(hash))
{
_binaryHashes.Add(hash);
}
if (usedByEntrypoint)
{
_usedByEntrypoint = true;
}
if (!string.IsNullOrWhiteSpace(relativePath))
{
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Derived,
"rust.heuristic",
relativePath,
CrateName,
string.IsNullOrWhiteSpace(hash) ? null : hash));
}
}
public RustComponentRecord Build()
{
var metadata = new List<KeyValuePair<string, string?>>
{
new("crate", CrateName),
new("provenance", "heuristic"),
new("binary.paths", string.Join(';', _binaryPaths)),
};
if (_binaryHashes.Count > 0)
{
metadata.Add(new KeyValuePair<string, string?>("binary.sha256", string.Join(';', _binaryHashes)));
}
metadata.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
var evidence = _evidence
.OrderBy(static item => item.ComparisonKey, StringComparer.Ordinal)
.ToImmutableArray();
var suffix = string.Join("|", _binaryPaths);
var componentKey = $"rust::heuristic::{CrateName}::{suffix}";
return new RustComponentRecord(
Name: CrateName,
Version: null,
Type: "cargo",
Purl: null,
ComponentKey: componentKey,
Metadata: metadata,
Evidence: evidence,
UsedByEntrypoint: _usedByEntrypoint);
}
}
internal sealed class RustBinaryRecord
{
private string? _hash;
public RustBinaryRecord(string absolutePath, string relativePath, bool usedByEntrypoint, string? hash)
{
AbsolutePath = absolutePath ?? throw new ArgumentNullException(nameof(absolutePath));
RelativePath = string.IsNullOrWhiteSpace(relativePath) ? "." : relativePath;
UsedByEntrypoint = usedByEntrypoint;
_hash = string.IsNullOrWhiteSpace(hash) ? null : hash;
}
public string AbsolutePath { get; }
public string RelativePath { get; }
public bool UsedByEntrypoint { get; private set; }
public bool HasMatches => HasCrateMatch || HasHeuristicMatch;
public bool HasCrateMatch { get; private set; }
public bool HasHeuristicMatch { get; private set; }
public string? Hash => _hash;
public void MarkCrateMatch() => HasCrateMatch = true;
public void MarkHeuristicMatch() => HasHeuristicMatch = true;
public void MergeUsage(bool used)
{
if (used)
{
UsedByEntrypoint = true;
}
}
public void EnsureHash(string? hash)
{
if (!string.IsNullOrWhiteSpace(hash))
{
_hash ??= hash;
}
if (_hash is null)
{
_hash = ComputeHashSafely();
}
}
private string? ComputeHashSafely()
{
try
{
using var stream = new FileStream(AbsolutePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
catch (IOException)
{
return null;
}
catch (UnauthorizedAccessException)
{
return null;
}
}
}
internal readonly record struct RustCrateKey
{
public RustCrateKey(string name, string? version)
{
Name = RustCrateBuilder.NormalizeName(name);
Version = string.IsNullOrWhiteSpace(version) ? null : version.Trim();
}
public string Name { get; }
public string? Version { get; }
}
internal sealed class LanguageComponentEvidenceComparer : IEqualityComparer<LanguageComponentEvidence>
{
public bool Equals(LanguageComponentEvidence? x, LanguageComponentEvidence? y)
{
if (ReferenceEquals(x, y))
{
return true;
}
if (x is null || y is null)
{
return false;
}
return x.Kind == y.Kind &&
string.Equals(x.Source, y.Source, StringComparison.Ordinal) &&
string.Equals(x.Locator, y.Locator, StringComparison.Ordinal) &&
string.Equals(x.Value, y.Value, StringComparison.Ordinal) &&
string.Equals(x.Sha256, y.Sha256, StringComparison.Ordinal);
}
public int GetHashCode(LanguageComponentEvidence obj)
{
var hash = new HashCode();
hash.Add(obj.Kind);
hash.Add(obj.Source, StringComparer.Ordinal);
hash.Add(obj.Locator, StringComparer.Ordinal);
hash.Add(obj.Value, StringComparer.Ordinal);
hash.Add(obj.Sha256, StringComparer.Ordinal);
return hash.ToHashCode();
}
}

View File

@@ -0,0 +1,250 @@
using System.Buffers;
using System.Collections.Immutable;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustBinaryClassifier
{
private static readonly ReadOnlyMemory<byte> ElfMagic = new byte[] { 0x7F, (byte)'E', (byte)'L', (byte)'F' };
private static readonly ReadOnlyMemory<byte> SymbolPrefix = new byte[] { (byte)'_', (byte)'Z', (byte)'N' };
private const int ChunkSize = 64 * 1024;
private const int OverlapSize = 48;
private const long MaxBinarySize = 128L * 1024L * 1024L;
private static readonly HashSet<string> StandardCrates = new(StringComparer.Ordinal)
{
"core",
"alloc",
"std",
"panic_unwind",
"panic_abort",
};
private static readonly EnumerationOptions Enumeration = new()
{
MatchCasing = MatchCasing.CaseSensitive,
IgnoreInaccessible = true,
RecurseSubdirectories = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
};
public static IReadOnlyList<RustBinaryInfo> Scan(string rootPath, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(rootPath))
{
throw new ArgumentException("Root path is required", nameof(rootPath));
}
var binaries = new List<RustBinaryInfo>();
foreach (var path in Directory.EnumerateFiles(rootPath, "*", Enumeration))
{
cancellationToken.ThrowIfCancellationRequested();
if (!IsEligibleBinary(path))
{
continue;
}
var candidates = ExtractCrateNames(path, cancellationToken);
binaries.Add(new RustBinaryInfo(path, candidates));
}
return binaries;
}
private static bool IsEligibleBinary(string path)
{
try
{
var info = new FileInfo(path);
if (!info.Exists || info.Length == 0 || info.Length > MaxBinarySize)
{
return false;
}
using var stream = info.OpenRead();
Span<byte> buffer = stackalloc byte[4];
var read = stream.Read(buffer);
if (read != 4)
{
return false;
}
return buffer.SequenceEqual(ElfMagic.Span);
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
}
private static ImmutableArray<string> ExtractCrateNames(string path, CancellationToken cancellationToken)
{
var names = new HashSet<string>(StringComparer.Ordinal);
var buffer = ArrayPool<byte>.Shared.Rent(ChunkSize + OverlapSize);
var overlap = new byte[OverlapSize];
var overlapLength = 0;
try
{
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
while (true)
{
cancellationToken.ThrowIfCancellationRequested();
// Copy previous overlap to buffer prefix.
if (overlapLength > 0)
{
Array.Copy(overlap, 0, buffer, 0, overlapLength);
}
var read = stream.Read(buffer, overlapLength, ChunkSize);
if (read <= 0)
{
break;
}
var span = new ReadOnlySpan<byte>(buffer, 0, overlapLength + read);
ScanForSymbols(span, names);
overlapLength = Math.Min(OverlapSize, span.Length);
if (overlapLength > 0)
{
span[^overlapLength..].CopyTo(overlap);
}
}
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
catch (UnauthorizedAccessException)
{
return ImmutableArray<string>.Empty;
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
}
if (names.Count == 0)
{
return ImmutableArray<string>.Empty;
}
var ordered = names
.Where(static name => !string.IsNullOrWhiteSpace(name))
.Select(static name => name.Trim())
.Where(static name => name.Length > 1)
.Where(name => !StandardCrates.Contains(name))
.Distinct(StringComparer.Ordinal)
.OrderBy(static name => name, StringComparer.Ordinal)
.ToImmutableArray();
return ordered;
}
private static void ScanForSymbols(ReadOnlySpan<byte> span, HashSet<string> names)
{
var prefix = SymbolPrefix.Span;
var index = 0;
while (index < span.Length)
{
var slice = span[index..];
var offset = slice.IndexOf(prefix);
if (offset < 0)
{
break;
}
index += offset + prefix.Length;
if (index >= span.Length)
{
break;
}
var remaining = span[index..];
if (!TryParseCrate(remaining, out var crate, out var consumed))
{
index += 1;
continue;
}
if (!string.IsNullOrWhiteSpace(crate))
{
names.Add(crate);
}
index += Math.Max(consumed, 1);
}
}
private static bool TryParseCrate(ReadOnlySpan<byte> span, out string? crate, out int consumed)
{
crate = null;
consumed = 0;
var i = 0;
var length = 0;
while (i < span.Length && span[i] is >= (byte)'0' and <= (byte)'9')
{
length = (length * 10) + (span[i] - (byte)'0');
i++;
if (length > 256)
{
return false;
}
}
if (i == 0 || length <= 0 || i + length > span.Length)
{
return false;
}
crate = Encoding.ASCII.GetString(span.Slice(i, length));
consumed = i + length;
return true;
}
}
internal sealed record RustBinaryInfo(string AbsolutePath, ImmutableArray<string> CrateCandidates)
{
private string? _sha256;
public string ComputeSha256()
{
if (_sha256 is not null)
{
return _sha256;
}
try
{
using var stream = new FileStream(AbsolutePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
_sha256 = Convert.ToHexString(hash).ToLowerInvariant();
}
catch (IOException)
{
_sha256 = string.Empty;
}
catch (UnauthorizedAccessException)
{
_sha256 = string.Empty;
}
return _sha256 ?? string.Empty;
}
}

View File

@@ -0,0 +1,298 @@
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustCargoLockParser
{
public static IReadOnlyList<RustCargoPackage> Parse(string path, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(path))
{
throw new ArgumentException("Lock path is required", nameof(path));
}
var info = new FileInfo(path);
if (!info.Exists)
{
return Array.Empty<RustCargoPackage>();
}
var packages = new List<RustCargoPackage>();
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var reader = new StreamReader(stream);
RustCargoPackageBuilder? builder = null;
string? currentArrayKey = null;
var arrayValues = new List<string>();
while (!reader.EndOfStream)
{
cancellationToken.ThrowIfCancellationRequested();
var line = reader.ReadLine();
if (line is null)
{
break;
}
var trimmed = TrimComments(line.AsSpan());
if (trimmed.Length == 0)
{
continue;
}
if (IsPackageHeader(trimmed))
{
FlushCurrent(builder, packages);
builder = new RustCargoPackageBuilder();
currentArrayKey = null;
arrayValues.Clear();
continue;
}
if (builder is null)
{
continue;
}
if (currentArrayKey is not null)
{
if (trimmed[0] == ']')
{
builder.SetArray(currentArrayKey, arrayValues);
currentArrayKey = null;
arrayValues.Clear();
continue;
}
var value = ExtractString(trimmed);
if (!string.IsNullOrEmpty(value))
{
arrayValues.Add(value);
}
continue;
}
if (trimmed[0] == '[')
{
// Entering a new table; finish any pending package and skip section.
FlushCurrent(builder, packages);
builder = null;
continue;
}
var equalsIndex = trimmed.IndexOf('=');
if (equalsIndex < 0)
{
continue;
}
var key = trimmed[..equalsIndex].Trim();
var valuePart = trimmed[(equalsIndex + 1)..].Trim();
if (valuePart.Length == 0)
{
continue;
}
if (valuePart[0] == '[')
{
currentArrayKey = key.ToString();
arrayValues.Clear();
if (valuePart.Length > 1 && valuePart[^1] == ']')
{
var inline = valuePart[1..^1].Trim();
if (inline.Length > 0)
{
foreach (var token in SplitInlineArray(inline.ToString()))
{
var parsedValue = ExtractString(token.AsSpan());
if (!string.IsNullOrEmpty(parsedValue))
{
arrayValues.Add(parsedValue);
}
}
}
builder.SetArray(currentArrayKey, arrayValues);
currentArrayKey = null;
arrayValues.Clear();
}
continue;
}
var parsed = ExtractString(valuePart);
if (parsed is not null)
{
builder.SetField(key, parsed);
}
}
if (currentArrayKey is not null && arrayValues.Count > 0)
{
builder?.SetArray(currentArrayKey, arrayValues);
}
FlushCurrent(builder, packages);
return packages;
}
private static ReadOnlySpan<char> TrimComments(ReadOnlySpan<char> line)
{
var index = line.IndexOf('#');
if (index >= 0)
{
line = line[..index];
}
return line.Trim();
}
private static bool IsPackageHeader(ReadOnlySpan<char> value)
=> value.SequenceEqual("[[package]]".AsSpan());
private static IEnumerable<string> SplitInlineArray(string value)
{
var start = 0;
var inString = false;
for (var i = 0; i < value.Length; i++)
{
var current = value[i];
if (current == '"')
{
inString = !inString;
}
if (current == ',' && !inString)
{
var item = value.AsSpan(start, i - start).Trim();
if (item.Length > 0)
{
yield return item.ToString();
}
start = i + 1;
}
}
if (start < value.Length)
{
var item = value.AsSpan(start).Trim();
if (item.Length > 0)
{
yield return item.ToString();
}
}
}
private static string? ExtractString(ReadOnlySpan<char> value)
{
if (value.Length == 0)
{
return null;
}
if (value[0] == '"' && value[^1] == '"')
{
var inner = value[1..^1];
return inner.ToString();
}
var trimmed = value.Trim();
return trimmed.Length == 0 ? null : trimmed.ToString();
}
private static void FlushCurrent(RustCargoPackageBuilder? builder, List<RustCargoPackage> packages)
{
if (builder is null || !builder.HasData)
{
return;
}
if (builder.TryBuild(out var package))
{
packages.Add(package);
}
}
private sealed class RustCargoPackageBuilder
{
private readonly SortedSet<string> _dependencies = new(StringComparer.Ordinal);
private string? _name;
private string? _version;
private string? _source;
private string? _checksum;
public bool HasData => !string.IsNullOrWhiteSpace(_name);
public void SetField(ReadOnlySpan<char> key, string value)
{
if (key.SequenceEqual("name".AsSpan()))
{
_name ??= value.Trim();
}
else if (key.SequenceEqual("version".AsSpan()))
{
_version ??= value.Trim();
}
else if (key.SequenceEqual("source".AsSpan()))
{
_source ??= value.Trim();
}
else if (key.SequenceEqual("checksum".AsSpan()))
{
_checksum ??= value.Trim();
}
}
public void SetArray(string key, IEnumerable<string> values)
{
if (!string.Equals(key, "dependencies", StringComparison.Ordinal))
{
return;
}
foreach (var entry in values)
{
if (string.IsNullOrWhiteSpace(entry))
{
continue;
}
var normalized = entry.Trim();
if (normalized.Length > 0)
{
_dependencies.Add(normalized);
}
}
}
public bool TryBuild(out RustCargoPackage package)
{
if (string.IsNullOrWhiteSpace(_name))
{
package = null!;
return false;
}
package = new RustCargoPackage(
_name!,
_version ?? string.Empty,
_source,
_checksum,
_dependencies.ToArray());
return true;
}
}
}
internal sealed record RustCargoPackage(
string Name,
string Version,
string? Source,
string? Checksum,
IReadOnlyList<string> Dependencies);

View File

@@ -0,0 +1,178 @@
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustFingerprintScanner
{
private static readonly EnumerationOptions Enumeration = new()
{
MatchCasing = MatchCasing.CaseSensitive,
IgnoreInaccessible = true,
RecurseSubdirectories = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
};
private static readonly string FingerprintSegment = $"{Path.DirectorySeparatorChar}.fingerprint{Path.DirectorySeparatorChar}";
public static IReadOnlyList<RustFingerprintRecord> Scan(string rootPath, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(rootPath))
{
throw new ArgumentException("Root path is required", nameof(rootPath));
}
var results = new List<RustFingerprintRecord>();
foreach (var path in Directory.EnumerateFiles(rootPath, "*.json", Enumeration))
{
cancellationToken.ThrowIfCancellationRequested();
if (!path.Contains(FingerprintSegment, StringComparison.Ordinal))
{
continue;
}
if (TryParse(path, out var record))
{
results.Add(record);
}
}
return results;
}
private static bool TryParse(string path, out RustFingerprintRecord record)
{
record = default!;
try
{
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var document = JsonDocument.Parse(stream);
var root = document.RootElement;
var pkgId = TryGetString(root, "pkgid")
?? TryGetString(root, "package_id")
?? TryGetString(root, "packageId");
var (name, version, source) = ParseIdentity(pkgId, path);
if (string.IsNullOrWhiteSpace(name))
{
return false;
}
var profile = TryGetString(root, "profile");
var targetKind = TryGetKind(root);
record = new RustFingerprintRecord(
Name: name!,
Version: version,
Source: source,
TargetKind: targetKind,
Profile: profile,
AbsolutePath: path);
return true;
}
catch (JsonException)
{
return false;
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
}
private static (string? Name, string? Version, string? Source) ParseIdentity(string? pkgId, string filePath)
{
if (!string.IsNullOrWhiteSpace(pkgId))
{
var span = pkgId.AsSpan().Trim();
var firstSpace = span.IndexOf(' ');
if (firstSpace > 0 && firstSpace < span.Length - 1)
{
var name = span[..firstSpace].ToString();
var remaining = span[(firstSpace + 1)..].Trim();
var secondSpace = remaining.IndexOf(' ');
if (secondSpace < 0)
{
return (name, remaining.ToString(), null);
}
var version = remaining[..secondSpace].ToString();
var potentialSource = remaining[(secondSpace + 1)..].Trim();
if (potentialSource.Length > 1 && potentialSource[0] == '(' && potentialSource[^1] == ')')
{
potentialSource = potentialSource[1..^1].Trim();
}
var source = potentialSource.Length == 0 ? null : potentialSource.ToString();
return (name, version, source);
}
}
var directory = Path.GetDirectoryName(filePath);
if (string.IsNullOrEmpty(directory))
{
return (null, null, null);
}
var crateDirectory = Path.GetFileName(directory);
if (string.IsNullOrWhiteSpace(crateDirectory))
{
return (null, null, null);
}
var dashIndex = crateDirectory.LastIndexOf('-');
if (dashIndex <= 0)
{
return (crateDirectory, null, null);
}
var maybeName = crateDirectory[..dashIndex];
return (maybeName, null, null);
}
private static string? TryGetKind(JsonElement root)
{
if (root.TryGetProperty("target_kind", out var array) && array.ValueKind == JsonValueKind.Array && array.GetArrayLength() > 0)
{
var first = array[0];
if (first.ValueKind == JsonValueKind.String)
{
return first.GetString();
}
}
if (root.TryGetProperty("target", out var target) && target.ValueKind == JsonValueKind.String)
{
return target.GetString();
}
return null;
}
private static string? TryGetString(JsonElement element, string propertyName)
{
if (element.TryGetProperty(propertyName, out var value) && value.ValueKind == JsonValueKind.String)
{
return value.GetString();
}
return null;
}
}
internal sealed record RustFingerprintRecord(
string Name,
string? Version,
string? Source,
string? TargetKind,
string? Profile,
string AbsolutePath);

View File

@@ -0,0 +1,17 @@
using System;
using StellaOps.Scanner.Analyzers.Lang.Plugin;
namespace StellaOps.Scanner.Analyzers.Lang.Rust;
public sealed class RustAnalyzerPlugin : ILanguageAnalyzerPlugin
{
public string Name => "StellaOps.Scanner.Analyzers.Lang.Rust";
public bool IsAvailable(IServiceProvider services) => services is not null;
public ILanguageAnalyzer CreateAnalyzer(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return new RustLanguageAnalyzer();
}
}

View File

@@ -0,0 +1,66 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Rust;
public sealed class RustLanguageAnalyzer : ILanguageAnalyzer
{
public string Id => "rust";
public string DisplayName => "Rust Analyzer (preview)";
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
cancellationToken.ThrowIfCancellationRequested();
var collection = RustAnalyzerCollector.Collect(context, cancellationToken);
EmitRecords(Id, writer, collection.Crates);
EmitRecords(Id, writer, collection.Heuristics);
EmitRecords(Id, writer, collection.Fallbacks);
return ValueTask.CompletedTask;
}
private static void EmitRecords(string analyzerId, LanguageComponentWriter writer, IReadOnlyList<RustComponentRecord> records)
{
foreach (var record in records)
{
if (record is null)
{
continue;
}
if (!string.IsNullOrEmpty(record.Purl))
{
writer.AddFromPurl(
analyzerId: analyzerId,
purl: record.Purl!,
name: record.Name,
version: record.Version,
type: record.Type,
metadata: record.Metadata,
evidence: record.Evidence,
usedByEntrypoint: record.UsedByEntrypoint);
}
else
{
writer.AddFromExplicitKey(
analyzerId: analyzerId,
componentKey: record.ComponentKey,
purl: null,
name: record.Name,
version: record.Version,
type: record.Type,
metadata: record.Metadata,
evidence: record.Evidence,
usedByEntrypoint: record.UsedByEntrypoint);
}
}
}
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
<None Include="**\\*" Exclude="**\\*.cs;**\\*.json;bin\\**;obj\\**" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scanner.Analyzers.Lang\StellaOps.Scanner.Analyzers.Lang.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,10 @@
# Rust Analyzer Task Flow
| Seq | ID | Status | Depends on | Description | Exit Criteria |
|-----|----|--------|------------|-------------|---------------|
| 1 | SCANNER-ANALYZERS-LANG-10-306A | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-307 | Parse Cargo metadata (`Cargo.lock`, `.fingerprint`, `.metadata`) and map crates to components with evidence. | Fixtures confirm crate attribution ≥85% coverage; metadata normalized; evidence includes path + hash. |
| 2 | SCANNER-ANALYZERS-LANG-10-306B | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-306A | Implement heuristic classifier using ELF section names, symbol mangling, and `.comment` data for stripped binaries. | Heuristic output flagged as `heuristic`; regression tests ensure no false “observed” classifications. |
| 3 | SCANNER-ANALYZERS-LANG-10-306C | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-306B | Integrate binary hash fallback (`bin:{sha256}`) and tie into shared quiet provenance helpers. | Fallback path deterministic; shared helpers reused; tests verify consistent hashing. |
| 4 | SCANNER-ANALYZERS-LANG-10-307R | DOING (2025-10-23) | SCANNER-ANALYZERS-LANG-10-306C | Finalize shared helper usage (license, usage flags) and concurrency-safe caches. | Analyzer uses shared utilities; concurrency tests pass; no race conditions. |
| 5 | SCANNER-ANALYZERS-LANG-10-308R | TODO | SCANNER-ANALYZERS-LANG-10-307R | Determinism fixtures + performance benchmarks; compare against competitor heuristic coverage. | Fixtures `Fixtures/lang/rust/` committed; determinism guard; benchmark shows ≥15% better coverage vs competitor. |
| 6 | SCANNER-ANALYZERS-LANG-10-309R | TODO | SCANNER-ANALYZERS-LANG-10-308R | Package plug-in manifest + Offline Kit documentation; ensure Worker integration. | Manifest copied; Worker loads analyzer; Offline Kit doc updated. |

View File

@@ -0,0 +1,23 @@
{
"schemaVersion": "1.0",
"id": "stellaops.analyzer.lang.rust",
"displayName": "StellaOps Rust Analyzer (preview)",
"version": "0.1.0",
"requiresRestart": true,
"entryPoint": {
"type": "dotnet",
"assembly": "StellaOps.Scanner.Analyzers.Lang.Rust.dll",
"typeName": "StellaOps.Scanner.Analyzers.Lang.Rust.RustAnalyzerPlugin"
},
"capabilities": [
"language-analyzer",
"rust",
"cargo"
],
"metadata": {
"org.stellaops.analyzer.language": "rust",
"org.stellaops.analyzer.kind": "language",
"org.stellaops.restart.required": "true",
"org.stellaops.analyzer.status": "preview"
}
}

View File

@@ -0,0 +1,33 @@
# StellaOps.Scanner.Analyzers.Lang — Agent Charter
## Role
Deliver deterministic language ecosystem analyzers that run inside Scanner Workers, emit component evidence for SBOM assembly, and package as restart-time plug-ins.
## Scope
- Shared analyzer abstractions for installed application ecosystems (Java, Node.js, Python, Go, .NET, Rust).
- Evidence helpers that map on-disk artefacts to canonical component identities (purl/bin sha) with provenance and usage flags.
- File-system traversal, metadata parsing, and normalization for language-specific package formats.
- Plug-in bootstrap, manifest authoring, and DI registration so Workers load analyzers at start-up.
## Out of Scope
- OS package analyzers, native link graph, or EntryTrace plug-ins (handled by other guilds).
- SBOM composition, diffing, or signing (owned by Emit/Diff/Signer groups).
- Policy adjudication or vulnerability joins.
## Expectations
- Deterministic output: identical inputs → identical component ordering and hashes.
- Memory discipline: streaming walkers, avoid loading entire trees; reuse buffers.
- Cancellation-aware and timeboxed per layer.
- Enrich telemetry (counters + timings) via Scanner.Core primitives.
- Update `TASKS.md` as work progresses (TODO → DOING → DONE/BLOCKED).
## Dependencies
- Scanner.Core contracts + observability helpers.
- Scanner.Worker analyzer dispatcher.
- Upcoming Scanner.Emit models for SBOM assembly.
- Plugin host infrastructure under `StellaOps.Plugin`.
## Testing & Artifacts
- Determinism harness with golden fixtures under `Fixtures/`.
- Microbench benchmarks recorded per language where feasible.
- Plugin manifests stored under `plugins/scanner/analyzers/lang/` with cosign workflow documented.

View File

@@ -0,0 +1,24 @@
namespace StellaOps.Scanner.Analyzers.Lang;
/// <summary>
/// Contract implemented by language ecosystem analyzers. Analyzers must be deterministic,
/// cancellation-aware, and refrain from mutating shared state.
/// </summary>
public interface ILanguageAnalyzer
{
/// <summary>
/// Stable identifier (e.g., <c>java</c>, <c>node</c>).
/// </summary>
string Id { get; }
/// <summary>
/// Human-readable display name for diagnostics.
/// </summary>
string DisplayName { get; }
/// <summary>
/// Executes the analyzer against the resolved filesystem.
/// </summary>
ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,16 @@
namespace StellaOps.Scanner.Analyzers.Lang.Internal;
internal static class LanguageAnalyzerJson
{
public static JsonSerializerOptions CreateDefault(bool indent = false)
{
var options = new JsonSerializerOptions(JsonSerializerDefaults.Web)
{
WriteIndented = indent,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
options.Converters.Add(new JsonStringEnumConverter(JsonNamingPolicy.CamelCase));
return options;
}
}

View File

@@ -0,0 +1,67 @@
namespace StellaOps.Scanner.Analyzers.Lang;
public sealed class LanguageAnalyzerContext
{
public LanguageAnalyzerContext(string rootPath, TimeProvider timeProvider, LanguageUsageHints? usageHints = null, IServiceProvider? services = null)
{
if (string.IsNullOrWhiteSpace(rootPath))
{
throw new ArgumentException("Root path is required", nameof(rootPath));
}
RootPath = Path.GetFullPath(rootPath);
if (!Directory.Exists(RootPath))
{
throw new DirectoryNotFoundException($"Root path '{RootPath}' does not exist.");
}
TimeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
UsageHints = usageHints ?? LanguageUsageHints.Empty;
Services = services;
}
public string RootPath { get; }
public TimeProvider TimeProvider { get; }
public LanguageUsageHints UsageHints { get; }
public IServiceProvider? Services { get; }
public bool TryGetService<T>([NotNullWhen(true)] out T? service) where T : class
{
if (Services is null)
{
service = null;
return false;
}
service = Services.GetService(typeof(T)) as T;
return service is not null;
}
public string ResolvePath(ReadOnlySpan<char> relative)
{
if (relative.IsEmpty)
{
return RootPath;
}
var relativeString = new string(relative);
var combined = Path.Combine(RootPath, relativeString);
return Path.GetFullPath(combined);
}
public string GetRelativePath(string absolutePath)
{
if (string.IsNullOrWhiteSpace(absolutePath))
{
return string.Empty;
}
var relative = Path.GetRelativePath(RootPath, absolutePath);
return OperatingSystem.IsWindows()
? relative.Replace('\\', '/')
: relative;
}
}

View File

@@ -0,0 +1,59 @@
namespace StellaOps.Scanner.Analyzers.Lang;
public sealed class LanguageAnalyzerEngine
{
private readonly IReadOnlyList<ILanguageAnalyzer> _analyzers;
public LanguageAnalyzerEngine(IEnumerable<ILanguageAnalyzer> analyzers)
{
if (analyzers is null)
{
throw new ArgumentNullException(nameof(analyzers));
}
_analyzers = analyzers
.Where(static analyzer => analyzer is not null)
.Distinct(new AnalyzerIdComparer())
.OrderBy(static analyzer => analyzer.Id, StringComparer.Ordinal)
.ToArray();
}
public IReadOnlyList<ILanguageAnalyzer> Analyzers => _analyzers;
public async ValueTask<LanguageAnalyzerResult> AnalyzeAsync(LanguageAnalyzerContext context, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(context);
var builder = new LanguageAnalyzerResultBuilder();
var writer = new LanguageComponentWriter(builder);
foreach (var analyzer in _analyzers)
{
cancellationToken.ThrowIfCancellationRequested();
await analyzer.AnalyzeAsync(context, writer, cancellationToken).ConfigureAwait(false);
}
return builder.Build();
}
private sealed class AnalyzerIdComparer : IEqualityComparer<ILanguageAnalyzer>
{
public bool Equals(ILanguageAnalyzer? x, ILanguageAnalyzer? y)
{
if (ReferenceEquals(x, y))
{
return true;
}
if (x is null || y is null)
{
return false;
}
return string.Equals(x.Id, y.Id, StringComparison.Ordinal);
}
public int GetHashCode(ILanguageAnalyzer obj)
=> obj?.Id is null ? 0 : StringComparer.Ordinal.GetHashCode(obj.Id);
}
}

View File

@@ -0,0 +1,111 @@
using StellaOps.Scanner.Core.Contracts;
namespace StellaOps.Scanner.Analyzers.Lang;
public sealed class LanguageAnalyzerResult
{
private readonly ImmutableArray<LanguageComponentRecord> _components;
internal LanguageAnalyzerResult(IEnumerable<LanguageComponentRecord> components)
{
_components = components
.OrderBy(static record => record.ComponentKey, StringComparer.Ordinal)
.ThenBy(static record => record.AnalyzerId, StringComparer.Ordinal)
.ToImmutableArray();
}
public IReadOnlyList<LanguageComponentRecord> Components => _components;
public ImmutableArray<ComponentRecord> ToComponentRecords(string analyzerId, string? layerDigest = null)
=> LanguageComponentMapper.ToComponentRecords(analyzerId, _components, layerDigest);
public LayerComponentFragment ToLayerFragment(string analyzerId, string? layerDigest = null)
=> LanguageComponentMapper.ToLayerFragment(analyzerId, _components, layerDigest);
public IReadOnlyList<LanguageComponentSnapshot> ToSnapshots()
=> _components.Select(static component => component.ToSnapshot()).ToImmutableArray();
public string ToJson(bool indent = true)
{
var snapshots = ToSnapshots();
var options = Internal.LanguageAnalyzerJson.CreateDefault(indent);
return JsonSerializer.Serialize(snapshots, options);
}
}
internal sealed class LanguageAnalyzerResultBuilder
{
private readonly Dictionary<string, LanguageComponentRecord> _records = new(StringComparer.Ordinal);
private readonly object _sync = new();
public void Add(LanguageComponentRecord record)
{
ArgumentNullException.ThrowIfNull(record);
lock (_sync)
{
if (_records.TryGetValue(record.ComponentKey, out var existing))
{
existing.Merge(record);
return;
}
_records[record.ComponentKey] = record;
}
}
public void AddRange(IEnumerable<LanguageComponentRecord> records)
{
foreach (var record in records ?? Array.Empty<LanguageComponentRecord>())
{
Add(record);
}
}
public LanguageAnalyzerResult Build()
{
lock (_sync)
{
return new LanguageAnalyzerResult(_records.Values.ToArray());
}
}
}
public sealed class LanguageComponentWriter
{
private readonly LanguageAnalyzerResultBuilder _builder;
internal LanguageComponentWriter(LanguageAnalyzerResultBuilder builder)
{
_builder = builder ?? throw new ArgumentNullException(nameof(builder));
}
public void Add(LanguageComponentRecord record)
=> _builder.Add(record);
public void AddRange(IEnumerable<LanguageComponentRecord> records)
=> _builder.AddRange(records);
public void AddFromPurl(
string analyzerId,
string purl,
string name,
string? version,
string type,
IEnumerable<KeyValuePair<string, string?>>? metadata = null,
IEnumerable<LanguageComponentEvidence>? evidence = null,
bool usedByEntrypoint = false)
=> Add(LanguageComponentRecord.FromPurl(analyzerId, purl, name, version, type, metadata, evidence, usedByEntrypoint));
public void AddFromExplicitKey(
string analyzerId,
string componentKey,
string? purl,
string name,
string? version,
string type,
IEnumerable<KeyValuePair<string, string?>>? metadata = null,
IEnumerable<LanguageComponentEvidence>? evidence = null,
bool usedByEntrypoint = false)
=> Add(LanguageComponentRecord.FromExplicitKey(analyzerId, componentKey, purl, name, version, type, metadata, evidence, usedByEntrypoint));
}

View File

@@ -0,0 +1,18 @@
namespace StellaOps.Scanner.Analyzers.Lang;
public enum LanguageEvidenceKind
{
File,
Metadata,
Derived,
}
public sealed record LanguageComponentEvidence(
LanguageEvidenceKind Kind,
string Source,
string Locator,
string? Value,
string? Sha256)
{
public string ComparisonKey => string.Join('|', Kind, Source, Locator, Value, Sha256);
}

View File

@@ -0,0 +1,223 @@
using System.Collections.Immutable;
using System.Security.Cryptography;
using System.Text;
using StellaOps.Scanner.Core.Contracts;
namespace StellaOps.Scanner.Analyzers.Lang;
/// <summary>
/// Helpers converting language analyzer component records into canonical scanner component models.
/// </summary>
public static class LanguageComponentMapper
{
private const string LayerHashPrefix = "stellaops:lang:";
private const string MetadataPrefix = "stellaops.lang";
/// <summary>
/// Computes a deterministic synthetic layer digest for the supplied analyzer identifier.
/// </summary>
public static string ComputeLayerDigest(string analyzerId)
{
ArgumentException.ThrowIfNullOrWhiteSpace(analyzerId);
var payload = $"{LayerHashPrefix}{analyzerId.Trim().ToLowerInvariant()}";
var bytes = Encoding.UTF8.GetBytes(payload);
var hash = SHA256.HashData(bytes);
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
/// <summary>
/// Projects language component records into a deterministic set of component records.
/// </summary>
public static ImmutableArray<ComponentRecord> ToComponentRecords(
string analyzerId,
IEnumerable<LanguageComponentRecord> components,
string? layerDigest = null)
{
ArgumentException.ThrowIfNullOrWhiteSpace(analyzerId);
ArgumentNullException.ThrowIfNull(components);
var effectiveLayer = string.IsNullOrWhiteSpace(layerDigest)
? ComputeLayerDigest(analyzerId)
: layerDigest!;
var builder = ImmutableArray.CreateBuilder<ComponentRecord>();
foreach (var record in components.OrderBy(static component => component.ComponentKey, StringComparer.Ordinal))
{
builder.Add(CreateComponentRecord(analyzerId, effectiveLayer, record));
}
return builder.ToImmutable();
}
/// <summary>
/// Creates a layer component fragment using the supplied component records.
/// </summary>
public static LayerComponentFragment ToLayerFragment(
string analyzerId,
IEnumerable<LanguageComponentRecord> components,
string? layerDigest = null)
{
var componentRecords = ToComponentRecords(analyzerId, components, layerDigest);
if (componentRecords.IsEmpty)
{
return LayerComponentFragment.Create(ComputeLayerDigest(analyzerId), componentRecords);
}
return LayerComponentFragment.Create(componentRecords[0].LayerDigest, componentRecords);
}
private static ComponentRecord CreateComponentRecord(
string analyzerId,
string layerDigest,
LanguageComponentRecord record)
{
ArgumentNullException.ThrowIfNull(record);
var identity = ComponentIdentity.Create(
key: ResolveIdentityKey(record),
name: record.Name,
version: record.Version,
purl: record.Purl,
componentType: record.Type);
var evidence = MapEvidence(record);
var metadata = BuildMetadata(analyzerId, record);
var usage = record.UsedByEntrypoint
? ComponentUsage.Create(usedByEntrypoint: true)
: ComponentUsage.Unused;
return new ComponentRecord
{
Identity = identity,
LayerDigest = layerDigest,
Evidence = evidence,
Dependencies = ImmutableArray<string>.Empty,
Metadata = metadata,
Usage = usage,
};
}
private static ImmutableArray<ComponentEvidence> MapEvidence(LanguageComponentRecord record)
{
var builder = ImmutableArray.CreateBuilder<ComponentEvidence>();
foreach (var item in record.Evidence)
{
if (item is null)
{
continue;
}
var kind = item.Kind switch
{
LanguageEvidenceKind.File => "file",
LanguageEvidenceKind.Metadata => "metadata",
LanguageEvidenceKind.Derived => "derived",
_ => "unknown",
};
var value = string.IsNullOrWhiteSpace(item.Locator) ? item.Source : item.Locator;
if (string.IsNullOrWhiteSpace(value))
{
value = kind;
}
builder.Add(new ComponentEvidence
{
Kind = kind,
Value = value,
Source = string.IsNullOrWhiteSpace(item.Source) ? null : item.Source,
});
}
return builder.Count == 0
? ImmutableArray<ComponentEvidence>.Empty
: builder.ToImmutable();
}
private static ComponentMetadata? BuildMetadata(string analyzerId, LanguageComponentRecord record)
{
var properties = new SortedDictionary<string, string>(StringComparer.Ordinal)
{
[$"{MetadataPrefix}.analyzerId"] = analyzerId
};
var licenseList = new List<string>();
foreach (var pair in record.Metadata)
{
if (string.IsNullOrWhiteSpace(pair.Key))
{
continue;
}
if (!string.IsNullOrWhiteSpace(pair.Value))
{
var value = pair.Value.Trim();
properties[$"{MetadataPrefix}.meta.{pair.Key}"] = value;
if (IsLicenseKey(pair.Key) && value.Length > 0)
{
foreach (var candidate in value.Split(new[] { ',', ';' }, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries))
{
if (candidate.Length > 0)
{
licenseList.Add(candidate);
}
}
}
}
}
var evidenceIndex = 0;
foreach (var evidence in record.Evidence)
{
if (evidence is null)
{
continue;
}
var prefix = $"{MetadataPrefix}.evidence.{evidenceIndex}";
if (!string.IsNullOrWhiteSpace(evidence.Value))
{
properties[$"{prefix}.value"] = evidence.Value.Trim();
}
if (!string.IsNullOrWhiteSpace(evidence.Sha256))
{
properties[$"{prefix}.sha256"] = evidence.Sha256.Trim();
}
evidenceIndex++;
}
IReadOnlyList<string>? licenses = null;
if (licenseList.Count > 0)
{
licenses = licenseList
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static license => license, StringComparer.Ordinal)
.ToArray();
}
return new ComponentMetadata
{
Licenses = licenses,
Properties = properties.Count == 0 ? null : properties,
};
}
private static string ResolveIdentityKey(LanguageComponentRecord record)
{
var key = record.ComponentKey;
if (key.StartsWith("purl::", StringComparison.Ordinal))
{
return key[6..];
}
return key;
}
private static bool IsLicenseKey(string key)
=> key.Contains("license", StringComparison.OrdinalIgnoreCase);
}

View File

@@ -0,0 +1,219 @@
namespace StellaOps.Scanner.Analyzers.Lang;
public sealed class LanguageComponentRecord
{
private readonly SortedDictionary<string, string?> _metadata;
private readonly SortedDictionary<string, LanguageComponentEvidence> _evidence;
private LanguageComponentRecord(
string analyzerId,
string componentKey,
string? purl,
string name,
string? version,
string type,
IEnumerable<KeyValuePair<string, string?>> metadata,
IEnumerable<LanguageComponentEvidence> evidence,
bool usedByEntrypoint)
{
AnalyzerId = analyzerId ?? throw new ArgumentNullException(nameof(analyzerId));
ComponentKey = componentKey ?? throw new ArgumentNullException(nameof(componentKey));
Purl = string.IsNullOrWhiteSpace(purl) ? null : purl.Trim();
Name = name ?? throw new ArgumentNullException(nameof(name));
Version = string.IsNullOrWhiteSpace(version) ? null : version.Trim();
Type = string.IsNullOrWhiteSpace(type) ? throw new ArgumentException("Type is required", nameof(type)) : type.Trim();
UsedByEntrypoint = usedByEntrypoint;
_metadata = new SortedDictionary<string, string?>(StringComparer.Ordinal);
foreach (var entry in metadata ?? Array.Empty<KeyValuePair<string, string?>>())
{
if (string.IsNullOrWhiteSpace(entry.Key))
{
continue;
}
_metadata[entry.Key.Trim()] = entry.Value;
}
_evidence = new SortedDictionary<string, LanguageComponentEvidence>(StringComparer.Ordinal);
foreach (var evidenceItem in evidence ?? Array.Empty<LanguageComponentEvidence>())
{
if (evidenceItem is null)
{
continue;
}
_evidence[evidenceItem.ComparisonKey] = evidenceItem;
}
}
public string AnalyzerId { get; }
public string ComponentKey { get; }
public string? Purl { get; }
public string Name { get; }
public string? Version { get; }
public string Type { get; }
public bool UsedByEntrypoint { get; private set; }
public IReadOnlyDictionary<string, string?> Metadata => _metadata;
public IReadOnlyCollection<LanguageComponentEvidence> Evidence => _evidence.Values;
public static LanguageComponentRecord FromPurl(
string analyzerId,
string purl,
string name,
string? version,
string type,
IEnumerable<KeyValuePair<string, string?>>? metadata = null,
IEnumerable<LanguageComponentEvidence>? evidence = null,
bool usedByEntrypoint = false)
{
if (string.IsNullOrWhiteSpace(purl))
{
throw new ArgumentException("purl is required", nameof(purl));
}
var key = $"purl::{purl.Trim()}";
return new LanguageComponentRecord(
analyzerId,
key,
purl,
name,
version,
type,
metadata ?? Array.Empty<KeyValuePair<string, string?>>(),
evidence ?? Array.Empty<LanguageComponentEvidence>(),
usedByEntrypoint);
}
public static LanguageComponentRecord FromExplicitKey(
string analyzerId,
string componentKey,
string? purl,
string name,
string? version,
string type,
IEnumerable<KeyValuePair<string, string?>>? metadata = null,
IEnumerable<LanguageComponentEvidence>? evidence = null,
bool usedByEntrypoint = false)
{
if (string.IsNullOrWhiteSpace(componentKey))
{
throw new ArgumentException("Component key is required", nameof(componentKey));
}
return new LanguageComponentRecord(
analyzerId,
componentKey.Trim(),
purl,
name,
version,
type,
metadata ?? Array.Empty<KeyValuePair<string, string?>>(),
evidence ?? Array.Empty<LanguageComponentEvidence>(),
usedByEntrypoint);
}
internal void Merge(LanguageComponentRecord other)
{
ArgumentNullException.ThrowIfNull(other);
if (!ComponentKey.Equals(other.ComponentKey, StringComparison.Ordinal))
{
throw new InvalidOperationException($"Cannot merge component '{ComponentKey}' with '{other.ComponentKey}'.");
}
UsedByEntrypoint |= other.UsedByEntrypoint;
foreach (var entry in other._metadata)
{
if (!_metadata.TryGetValue(entry.Key, out var existing) || string.IsNullOrEmpty(existing))
{
_metadata[entry.Key] = entry.Value;
}
}
foreach (var evidenceItem in other._evidence)
{
_evidence[evidenceItem.Key] = evidenceItem.Value;
}
}
public LanguageComponentSnapshot ToSnapshot()
{
return new LanguageComponentSnapshot
{
AnalyzerId = AnalyzerId,
ComponentKey = ComponentKey,
Purl = Purl,
Name = Name,
Version = Version,
Type = Type,
UsedByEntrypoint = UsedByEntrypoint,
Metadata = _metadata.ToDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal),
Evidence = _evidence.Values.Select(static item => new LanguageComponentEvidenceSnapshot
{
Kind = item.Kind,
Source = item.Source,
Locator = item.Locator,
Value = item.Value,
Sha256 = item.Sha256,
}).ToArray(),
};
}
}
public sealed class LanguageComponentSnapshot
{
[JsonPropertyName("analyzerId")]
public string AnalyzerId { get; set; } = string.Empty;
[JsonPropertyName("componentKey")]
public string ComponentKey { get; set; } = string.Empty;
[JsonPropertyName("purl")]
public string? Purl { get; set; }
[JsonPropertyName("name")]
public string Name { get; set; } = string.Empty;
[JsonPropertyName("version")]
public string? Version { get; set; }
[JsonPropertyName("type")]
public string Type { get; set; } = string.Empty;
[JsonPropertyName("usedByEntrypoint")]
public bool UsedByEntrypoint { get; set; }
[JsonPropertyName("metadata")]
public IDictionary<string, string?> Metadata { get; set; } = new Dictionary<string, string?>(StringComparer.Ordinal);
[JsonPropertyName("evidence")]
public IReadOnlyList<LanguageComponentEvidenceSnapshot> Evidence { get; set; } = Array.Empty<LanguageComponentEvidenceSnapshot>();
}
public sealed class LanguageComponentEvidenceSnapshot
{
[JsonPropertyName("kind")]
public LanguageEvidenceKind Kind { get; set; }
[JsonPropertyName("source")]
public string Source { get; set; } = string.Empty;
[JsonPropertyName("locator")]
public string Locator { get; set; } = string.Empty;
[JsonPropertyName("value")]
public string? Value { get; set; }
[JsonPropertyName("sha256")]
public string? Sha256 { get; set; }
}

View File

@@ -0,0 +1,49 @@
namespace StellaOps.Scanner.Analyzers.Lang;
public sealed class LanguageUsageHints
{
private static readonly StringComparer Comparer = OperatingSystem.IsWindows()
? StringComparer.OrdinalIgnoreCase
: StringComparer.Ordinal;
private readonly ImmutableHashSet<string> _usedPaths;
public static LanguageUsageHints Empty { get; } = new(Array.Empty<string>());
public LanguageUsageHints(IEnumerable<string> usedPaths)
{
if (usedPaths is null)
{
throw new ArgumentNullException(nameof(usedPaths));
}
_usedPaths = usedPaths
.Select(Normalize)
.Where(static path => path.Length > 0)
.ToImmutableHashSet(Comparer);
}
public bool IsPathUsed(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return false;
}
var normalized = Normalize(path);
return _usedPaths.Contains(normalized);
}
private static string Normalize(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return string.Empty;
}
var full = Path.GetFullPath(path);
return OperatingSystem.IsWindows()
? full.Replace('\\', '/').TrimEnd('/')
: full;
}
}

View File

@@ -0,0 +1,11 @@
global using System;
global using System.Collections.Concurrent;
global using System.Collections.Generic;
global using System.Collections.Immutable;
global using System.Diagnostics.CodeAnalysis;
global using System.IO;
global using System.Linq;
global using System.Text.Json;
global using System.Text.Json.Serialization;
global using System.Threading;
global using System.Threading.Tasks;

View File

@@ -0,0 +1,15 @@
using System;
using StellaOps.Plugin;
namespace StellaOps.Scanner.Analyzers.Lang.Plugin;
/// <summary>
/// Represents a restart-time plug-in that exposes a language analyzer.
/// </summary>
public interface ILanguageAnalyzerPlugin : IAvailabilityPlugin
{
/// <summary>
/// Creates the analyzer instance bound to the service provider.
/// </summary>
ILanguageAnalyzer CreateAnalyzer(IServiceProvider services);
}

View File

@@ -0,0 +1,147 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Reflection;
using Microsoft.Extensions.Logging;
using StellaOps.Plugin;
using StellaOps.Plugin.Hosting;
using StellaOps.Scanner.Core.Security;
namespace StellaOps.Scanner.Analyzers.Lang.Plugin;
public interface ILanguageAnalyzerPluginCatalog
{
IReadOnlyCollection<ILanguageAnalyzerPlugin> Plugins { get; }
void LoadFromDirectory(string directory, bool seal = true);
IReadOnlyList<ILanguageAnalyzer> CreateAnalyzers(IServiceProvider services);
}
public sealed class LanguageAnalyzerPluginCatalog : ILanguageAnalyzerPluginCatalog
{
private readonly ILogger<LanguageAnalyzerPluginCatalog> _logger;
private readonly IPluginCatalogGuard _guard;
private readonly ConcurrentDictionary<string, Assembly> _assemblies = new(StringComparer.OrdinalIgnoreCase);
private IReadOnlyList<ILanguageAnalyzerPlugin> _plugins = Array.Empty<ILanguageAnalyzerPlugin>();
public LanguageAnalyzerPluginCatalog(IPluginCatalogGuard guard, ILogger<LanguageAnalyzerPluginCatalog> logger)
{
_guard = guard ?? throw new ArgumentNullException(nameof(guard));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public IReadOnlyCollection<ILanguageAnalyzerPlugin> Plugins => _plugins;
public void LoadFromDirectory(string directory, bool seal = true)
{
ArgumentException.ThrowIfNullOrWhiteSpace(directory);
var fullDirectory = Path.GetFullPath(directory);
var options = new PluginHostOptions
{
PluginsDirectory = fullDirectory,
EnsureDirectoryExists = false,
RecursiveSearch = false,
};
options.SearchPatterns.Add("StellaOps.Scanner.Analyzers.*.dll");
var result = PluginHost.LoadPlugins(options, _logger);
if (result.Plugins.Count == 0)
{
_logger.LogWarning("No language analyzer plug-ins discovered under '{Directory}'.", fullDirectory);
}
foreach (var descriptor in result.Plugins)
{
try
{
_guard.EnsureRegistrationAllowed(descriptor.AssemblyPath);
_assemblies[descriptor.AssemblyPath] = descriptor.Assembly;
_logger.LogInformation(
"Registered language analyzer plug-in assembly '{Assembly}' from '{Path}'.",
descriptor.Assembly.FullName,
descriptor.AssemblyPath);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to register language analyzer plug-in '{Path}'.", descriptor.AssemblyPath);
}
}
RefreshPluginList();
if (seal)
{
_guard.Seal();
}
}
public IReadOnlyList<ILanguageAnalyzer> CreateAnalyzers(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
if (_plugins.Count == 0)
{
_logger.LogWarning("No language analyzer plug-ins available; skipping language analysis.");
return Array.Empty<ILanguageAnalyzer>();
}
var analyzers = new List<ILanguageAnalyzer>(_plugins.Count);
foreach (var plugin in _plugins)
{
if (!IsPluginAvailable(plugin, services))
{
continue;
}
try
{
var analyzer = plugin.CreateAnalyzer(services);
if (analyzer is null)
{
continue;
}
analyzers.Add(analyzer);
}
catch (Exception ex)
{
_logger.LogError(ex, "Language analyzer plug-in '{Plugin}' failed to create analyzer instance.", plugin.Name);
}
}
if (analyzers.Count == 0)
{
_logger.LogWarning("All language analyzer plug-ins were unavailable.");
return Array.Empty<ILanguageAnalyzer>();
}
analyzers.Sort(static (a, b) => string.CompareOrdinal(a.Id, b.Id));
return new ReadOnlyCollection<ILanguageAnalyzer>(analyzers);
}
private void RefreshPluginList()
{
var assemblies = _assemblies.Values.ToArray();
var plugins = PluginLoader.LoadPlugins<ILanguageAnalyzerPlugin>(assemblies);
_plugins = plugins is IReadOnlyList<ILanguageAnalyzerPlugin> list
? list
: new ReadOnlyCollection<ILanguageAnalyzerPlugin>(plugins.ToArray());
}
private static bool IsPluginAvailable(ILanguageAnalyzerPlugin plugin, IServiceProvider services)
{
try
{
return plugin.IsAvailable(services);
}
catch
{
return false;
}
}
}

View File

@@ -0,0 +1,119 @@
# StellaOps Scanner — Language Analyzer Implementation Plan (2025Q4)
> **Goal.** Deliver best-in-class language analyzers that outperform competitors on fidelity, determinism, and offline readiness while integrating tightly with Scanner Worker orchestration and SBOM composition.
All sprints below assume prerequisites from SP10-G2 (core scaffolding + Java analyzer) are complete. Each sprint is sized for a focused guild (≈11.5weeks) and produces definitive gates for downstream teams (Emit, Policy, Scheduler).
---
## Sprint LA1 — Node Analyzer & Workspace Intelligence (Tasks 10-302, 10-307, 10-308, 10-309 subset) *(DOING — 2025-10-19)*
- **Scope:** Resolve hoisted `node_modules`, PNPM structures, Yarn Berry Plug'n'Play, symlinked workspaces, and detect security-sensitive scripts.
- **Deliverables:**
- `StellaOps.Scanner.Analyzers.Lang.Node` plug-in with manifest + DI registration.
- Deterministic walker supporting >100k modules with streaming JSON parsing.
- Workspace graph persisted as analyzer metadata (`package.json` provenance + symlink target proofs).
- **Acceptance Metrics:**
- 10k module fixture scans <1.8s on 4vCPU (p95).
- Memory ceiling <220MB (tracked via deterministic benchmark harness).
- All symlink targets canonicalized; path traversal guarded.
- **Gate Artifacts:**
- `Fixtures/lang/node/**` golden outputs.
- Analyzer benchmark CSV + flamegraph (commit under `src/Bench/StellaOps.Bench/Scanner.Analyzers`).
- Worker integration sample enabling Node analyzer via manifest.
- **Progress (2025-10-21):** Module walker with package-lock/yarn/pnpm resolution, workspace attribution, integrity metadata, and deterministic fixture harness committed; Node tasks 10-302A/B remain green. Shared component mapper + canonical result harness landed, closing tasks 10-307/308. Script metadata & telemetry (10-302C) emit policy hints, hashed evidence, and feed `scanner_analyzer_node_scripts_total` into Worker OpenTelemetry pipeline. Restart-time packaging closed (10-309): manifest added, Worker language catalog loads the Node analyzer, integration tests cover dispatch + layer fragments, and Offline Kit docs call out bundled language plug-ins.
## Sprint LA2 — Python Analyzer & Entry Point Attribution (Tasks 10-303, 10-307, 10-308, 10-309 subset)
- **Scope:** Parse `*.dist-info`, `RECORD` hashes, entry points, and pip-installed editable packages; integrate usage hints from EntryTrace.
- **Deliverables:**
- `StellaOps.Scanner.Analyzers.Lang.Python` plug-in.
- RECORD hash validation with optional Zip64 support for `.whl` caches.
- Entry-point mapping into `UsageFlags` for Emit stage.
- **Acceptance Metrics:**
- Hash verification throughput 75MB/s sustained with streaming reader.
- False-positive rate for editable installs <1% on curated fixtures.
- Determinism check across CPython 3.83.12 generated metadata.
- **Gate Artifacts:**
- Golden fixtures for `site-packages`, virtualenv, and layered pip caches.
- Usage hint propagation tests (EntryTrace analyzer SBOM).
- Metrics counters (`scanner_analyzer_python_components_total`) documented.
- **Progress (2025-10-21):** Python analyzer landed; Tasks 10-303A/B/C are DONE with dist-info parsing, RECORD verification, editable install detection, and deterministic `simple-venv` fixture + benchmark hooks recorded.
- **Progress (2025-10-23):** Closed Tasks 10-307P/308P/309P Python analyzer now emits quiet-provenance metadata via shared helpers, determinism harness covers `simple-venv`, `pip-cache`, and `layered-editable` fixtures, bench suite reports hash throughput (`python/hash-throughput-20251023.csv`), and Offline Kit docs list the Python plug-in in the language bundle manifest.
## Sprint LA3 — Go Analyzer & Build Info Synthesis (Tasks 10-304, 10-307, 10-308, 10-309 subset)
- **Scope:** Extract Go build metadata from `.note.go.buildid`, embedded module info, and fallback to `bin:{sha256}`; surface VCS provenance.
- **Deliverables:**
- `StellaOps.Scanner.Analyzers.Lang.Go` plug-in.
- DWARF-lite parser to enrich component origin (commit hash + dirty flag) when available.
- Shared hash cache to dedupe repeated binaries across layers.
- **Acceptance Metrics:**
- Analyzer latency 400µs per binary (hot cache) / 2ms (cold).
- Provenance coverage 95% on representative Go fixture suite.
- Zero allocations in happy path beyond pooled buffers (validated via BenchmarkDotNet).
- **Gate Artifacts:**
- Benchmarks vs competitor open-source tool (Trivy or Syft) demonstrating faster metadata extraction.
- Documentation snippet explaining VCS metadata fields for Policy team.
- **Progress (2025-10-22):** Build-info decoder shipped with DWARF-string fallback for `vcs.*` markers, plus cached metadata keyed by binary length/timestamp. Added Go test fixtures covering build-info and DWARF-only binaries with deterministic goldens; analyzer now emits `go.dwarf` evidence alongside `go.buildinfo` metadata to feed downstream provenance rules. Completed stripped-binary heuristics with deterministic `golang::bin::sha256` components and a new `stripped` fixture to guard quiet-provenance behaviour. Heuristic fallbacks now emit `scanner_analyzer_golang_heuristic_total{indicator,version_hint}` counters, and shared buffer pooling (`ArrayPool<byte>`) keeps concurrent scans allocation-lite. Bench harness (`src/Bench/StellaOps.Bench/Scanner.Analyzers/config.json`) gained a dedicated Go scenario with baseline mean 4.02ms; comparison against Syft v1.29.1 on the same fixture shows a 22% speed advantage (see `src/Bench/StellaOps.Bench/Scanner.Analyzers/lang/go/syft-comparison-20251021.csv`).
## Sprint LA4 — .NET Analyzer & RID Variants (Tasks 10-305, 10-307, 10-308, 10-309 subset)
- **Scope:** Parse `*.deps.json`, `runtimeconfig.json`, assembly metadata, and RID-specific assets; correlate with native dependencies.
- **Deliverables:**
- `StellaOps.Scanner.Analyzers.Lang.DotNet` plug-in.
- Strong-name + Authenticode optional verification when offline cert bundle provided.
- RID-aware component grouping with fallback to `bin:{sha256}` for self-contained apps.
- **Acceptance Metrics:**
- Multi-target app fixture processed <1.2s; memory <250MB.
- RID variant collapse reduces component explosion by 40% vs naive listing.
- All security metadata (signing Publisher, timestamp) surfaced deterministically.
- **Gate Artifacts:**
- Signed .NET sample apps (framework-dependent & self-contained) under `samples/scanner/lang/dotnet/`.
- Tests verifying dual runtimeconfig merge logic.
- Guidance for Policy on license propagation from NuGet metadata.
- **Progress (2025-10-22):** Completed task 10-305A with a deterministic deps/runtimeconfig ingest pipeline producing `pkg:nuget` components across RID targets. Added dotnet fixture + golden output to the shared harness, wired analyzer plugin availability, and surfaced RID metadata in component records for downstream emit/diff work. License provenance and quiet flagging now ride through the shared helpers (task 10-307D), including nuspec license expression/file ingestion, manifest provenance tagging, and concurrency-safe file metadata caching with new parallel tests.
- **Progress (2025-10-23):** Landed determinism + benchmark coverage (task 10-308D) via the new `multi` fixture, golden outputs, and bench scenario wired into `baseline.csv`, plus Syft comparison data. Packaged the .NET plug-in for restart-only distribution (task 10-309D), verified manifest copy into `plugins/scanner/analyzers/lang/`, and refreshed `docs/24_OFFLINE_KIT.md` with updated Offline Kit instructions.
## Sprint LA5 — Rust Analyzer & Binary Fingerprinting (Tasks 10-306, 10-307, 10-308, 10-309 subset)
- **Scope:** Detect crates via metadata in `.fingerprint`, Cargo.lock fragments, or embedded `rustc` markers; robust fallback to binary hash classification.
- **Deliverables:**
- `StellaOps.Scanner.Analyzers.Lang.Rust` plug-in.
- Symbol table heuristics capable of attributing stripped binaries by leveraging `.comment` and section names without violating determinism.
- Quiet-provenance flags to differentiate heuristics from hard evidence.
- **Acceptance Metrics:**
- Accurate crate attribution 85% on curated Cargo workspace fixtures.
- Heuristic fallback clearly labeled; no false certain claims.
- Analyzer completes <1s on 500 binary corpus.
- **Gate Artifacts:**
- Fixtures covering cargo workspaces, binaries with embedded metadata stripped.
- ADR documenting heuristic boundaries + risk mitigations.
## Sprint LA6 — Shared Evidence Enhancements & Worker Integration (Tasks 10-307, 10-308, 10-309 finalization)
- **Scope:** Finalize shared helpers, deterministic harness expansion, Worker/Emit wiring, and macro benchmarks.
- **Deliverables:**
- Consolidated `LanguageComponentWriter` extensions for license, vulnerability hints, and usage propagation.
- Worker dispatcher loading plug-ins via manifest registry + health checks.
- Combined analyzer benchmark suite executed in CI with regression thresholds.
- **Acceptance Metrics:**
- Worker executes mixed analyzer suite (Java+Node+Python+Go+.NET+Rust) within SLA: warm scan <6s, cold <25s.
- CI determinism guard catches output drift (>0 diff tolerance) across all fixtures.
- Telemetry coverage: each analyzer emits timing + component counters.
- **Gate Artifacts:**
- `SPRINTS_LANG_IMPLEMENTATION_PLAN.md` progress log updated (this file).
- `src/Bench/StellaOps.Bench/Scanner.Analyzers/lang-matrix.csv` recorded + referenced in docs.
- Ops notes for packaging plug-ins into Offline Kit.
---
## Cross-Sprint Considerations
- **Security:** All analyzers must enforce path canonicalization, guard against zip-slip, and expose provenance classifications (`observed`, `heuristic`, `attested`).
- **Offline-first:** No network calls; rely on cached metadata and optional offline bundles (license texts, signature roots).
- **Determinism:** Normalise timestamps to `0001-01-01T00:00:00Z` when persisting synthetic data; sort collections by stable keys.
- **Benchmarking:** Extend `src/Bench/StellaOps.Bench/Scanner.Analyzers` to compare against open-source scanners (Syft/Trivy) and document performance wins.
- **Hand-offs:** Emit guild requires consistent component schemas; Policy needs license + provenance metadata; Scheduler depends on usage flags for ImpactIndex.
## Tracking & Reporting
- Update `TASKS.md` per sprint (TODO → DOING → DONE) with date stamps.
- Log sprint summaries in `docs/updates/` once each sprint lands.
- Use module-specific CI pipeline to run analyzer suites nightly (determinism + perf).
---
**Next Action:** Start Sprint LA1 (Node Analyzer) — move tasks 10-302, 10-307, 10-308, 10-309 → DOING and spin up fixtures + benchmarks.

Some files were not shown because too many files have changed in this diff Show More