Files
git.stella-ops.org/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Java/JavaLanguageAnalyzer.cs
2025-11-18 23:45:25 +02:00

792 lines
28 KiB
C#

using System.Collections.Generic;
using System.IO;
using System.Text;
using StellaOps.Scanner.Analyzers.Lang.Java.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Java;
public sealed class JavaLanguageAnalyzer : ILanguageAnalyzer
{
public string Id => "java";
public string DisplayName => "Java/Maven Analyzer";
public async ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
var workspace = JavaWorkspaceNormalizer.Normalize(context, cancellationToken);
var lockData = await JavaLockFileCollector.LoadAsync(context, cancellationToken).ConfigureAwait(false);
var matchedLocks = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var hasLockEntries = lockData.HasEntries;
foreach (var archive in workspace.Archives)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
await ProcessArchiveAsync(archive, context, writer, lockData, matchedLocks, hasLockEntries, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
// Corrupt archives should not abort the scan.
}
catch (InvalidDataException)
{
// Skip non-zip payloads despite supported extensions.
}
}
if (lockData.Entries.Count > 0)
{
foreach (var entry in lockData.Entries)
{
if (matchedLocks.Contains(entry.Key))
{
continue;
}
var metadata = CreateDeclaredMetadata(entry);
var evidence = new[] { CreateDeclaredEvidence(entry) };
var purl = BuildPurl(entry.GroupId, entry.ArtifactId, entry.Version, packaging: null);
writer.AddFromPurl(
analyzerId: Id,
purl: purl,
name: entry.ArtifactId,
version: entry.Version,
type: "maven",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: false);
}
}
}
private async ValueTask ProcessArchiveAsync(
JavaArchive archive,
LanguageAnalyzerContext context,
LanguageComponentWriter writer,
JavaLockData lockData,
HashSet<string> matchedLocks,
bool hasLockEntries,
CancellationToken cancellationToken)
{
ManifestMetadata? manifestMetadata = null;
if (archive.TryGetEntry("META-INF/MANIFEST.MF", out var manifestEntry))
{
manifestMetadata = await ParseManifestAsync(archive, manifestEntry, cancellationToken).ConfigureAwait(false);
}
var frameworkConfig = ScanFrameworkConfigs(archive, cancellationToken);
var jniHints = ScanJniHints(archive, cancellationToken);
foreach (var entry in archive.Entries)
{
cancellationToken.ThrowIfCancellationRequested();
if (IsManifestEntry(entry.EffectivePath))
{
continue;
}
if (!IsPomPropertiesEntry(entry.EffectivePath))
{
continue;
}
var artifact = await ParsePomPropertiesAsync(archive, entry, cancellationToken).ConfigureAwait(false);
if (artifact is null)
{
continue;
}
var metadata = CreateInstalledMetadata(artifact, archive, manifestMetadata);
if (lockData.TryGet(artifact.GroupId, artifact.ArtifactId, artifact.Version, out var lockEntry))
{
matchedLocks.Add(lockEntry!.Key);
AppendLockMetadata(metadata, lockEntry);
}
else if (hasLockEntries)
{
AddMetadata(metadata, "lockMissing", "true");
}
foreach (var hint in frameworkConfig.Metadata)
{
AddMetadata(metadata, hint.Key, hint.Value);
}
foreach (var hint in jniHints.Metadata)
{
AddMetadata(metadata, hint.Key, hint.Value);
}
var evidence = new List<LanguageComponentEvidence>
{
new(LanguageEvidenceKind.File, "pom.properties", BuildLocator(archive, entry.OriginalPath), null, artifact.PomSha256),
};
if (manifestMetadata is not null)
{
evidence.Add(manifestMetadata.CreateEvidence(archive));
}
evidence.AddRange(frameworkConfig.Evidence);
evidence.AddRange(jniHints.Evidence);
var usedByEntrypoint = context.UsageHints.IsPathUsed(archive.AbsolutePath);
writer.AddFromPurl(
analyzerId: Id,
purl: artifact.Purl,
name: artifact.ArtifactId,
version: artifact.Version,
type: "maven",
metadata: SortMetadata(metadata),
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
}
}
private static string BuildLocator(JavaArchive archive, string entryPath)
{
var relativeArchive = NormalizeArchivePath(archive.RelativePath);
var normalizedEntry = NormalizeEntry(entryPath);
if (string.Equals(relativeArchive, ".", StringComparison.Ordinal) || string.IsNullOrEmpty(relativeArchive))
{
return normalizedEntry;
}
return string.Concat(relativeArchive, "!", normalizedEntry);
}
private static string NormalizeEntry(string entryPath)
=> entryPath.Replace('\\', '/');
private static string NormalizeArchivePath(string relativePath)
{
if (string.IsNullOrEmpty(relativePath) || string.Equals(relativePath, ".", StringComparison.Ordinal))
{
return ".";
}
return relativePath.Replace('\\', '/');
}
private static FrameworkConfigSummary ScanFrameworkConfigs(JavaArchive archive, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(archive);
var metadata = new Dictionary<string, SortedSet<string>>(StringComparer.Ordinal);
var evidence = new List<LanguageComponentEvidence>();
foreach (var entry in archive.Entries)
{
cancellationToken.ThrowIfCancellationRequested();
var path = entry.EffectivePath;
if (IsSpringFactories(path))
{
AddConfigHint(metadata, evidence, "config.spring.factories", archive, entry);
}
else if (IsSpringImports(path))
{
AddConfigHint(metadata, evidence, "config.spring.imports", archive, entry);
}
else if (IsSpringApplicationConfig(path))
{
AddConfigHint(metadata, evidence, "config.spring.properties", archive, entry);
}
else if (IsSpringBootstrapConfig(path))
{
AddConfigHint(metadata, evidence, "config.spring.bootstrap", archive, entry);
}
if (IsWebXml(path))
{
AddConfigHint(metadata, evidence, "config.web.xml", archive, entry);
}
if (IsWebFragment(path))
{
AddConfigHint(metadata, evidence, "config.web.fragment", archive, entry);
}
if (IsJpaConfig(path))
{
AddConfigHint(metadata, evidence, "config.jpa", archive, entry);
}
if (IsCdiConfig(path))
{
AddConfigHint(metadata, evidence, "config.cdi", archive, entry);
}
if (IsJaxbConfig(path))
{
AddConfigHint(metadata, evidence, "config.jaxb", archive, entry);
}
if (IsJaxRsConfig(path))
{
AddConfigHint(metadata, evidence, "config.jaxrs", archive, entry);
}
if (IsLoggingConfig(path))
{
AddConfigHint(metadata, evidence, "config.logging", archive, entry);
}
if (IsGraalConfig(path))
{
AddConfigHint(metadata, evidence, "config.graal", archive, entry);
}
}
var flattened = metadata.ToDictionary(
static pair => pair.Key,
static pair => string.Join(",", pair.Value),
StringComparer.Ordinal);
return new FrameworkConfigSummary(flattened, evidence);
}
private static JniHintSummary ScanJniHints(JavaArchive archive, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(archive);
var metadata = new Dictionary<string, SortedSet<string>>(StringComparer.Ordinal);
var evidence = new List<LanguageComponentEvidence>();
foreach (var entry in archive.Entries)
{
cancellationToken.ThrowIfCancellationRequested();
var path = entry.EffectivePath;
var locator = BuildLocator(archive, entry.OriginalPath);
if (IsNativeLibrary(path))
{
AddHint(metadata, evidence, "jni.nativeLibs", Path.GetFileName(path), locator, "jni-native");
}
if (IsGraalJniConfig(path))
{
AddHint(metadata, evidence, "jni.graalConfig", locator, locator, "jni-graal");
}
if (IsClassFile(path) && entry.Length is > 0 and < 1_000_000)
{
TryScanClassForLoadCalls(archive, entry, locator, metadata, evidence, cancellationToken);
}
}
var flattened = metadata.ToDictionary(
static pair => pair.Key,
static pair => string.Join(",", pair.Value),
StringComparer.Ordinal);
return new JniHintSummary(flattened, evidence);
}
private static void TryScanClassForLoadCalls(
JavaArchive archive,
JavaArchiveEntry entry,
string locator,
IDictionary<string, SortedSet<string>> metadata,
ICollection<LanguageComponentEvidence> evidence,
CancellationToken cancellationToken)
{
try
{
using var stream = archive.OpenEntry(entry);
using var buffer = new MemoryStream();
stream.CopyTo(buffer);
var bytes = buffer.ToArray();
if (ContainsAscii(bytes, "System.loadLibrary"))
{
AddHint(metadata, evidence, "jni.loadCalls", locator, locator, "jni-load");
}
else if (ContainsAscii(bytes, "System.load"))
{
AddHint(metadata, evidence, "jni.loadCalls", locator, locator, "jni-load");
}
}
catch
{
// best effort; skip unreadable class entries
}
}
private static bool ContainsAscii(byte[] buffer, string ascii)
{
if (buffer.Length == 0 || string.IsNullOrEmpty(ascii))
{
return false;
}
var needle = Encoding.ASCII.GetBytes(ascii);
return SpanSearch(buffer, needle) >= 0;
}
private static int SpanSearch(byte[] haystack, byte[] needle)
{
if (needle.Length == 0 || haystack.Length < needle.Length)
{
return -1;
}
var lastStart = haystack.Length - needle.Length;
for (var i = 0; i <= lastStart; i++)
{
var matched = true;
for (var j = 0; j < needle.Length; j++)
{
if (haystack[i + j] != needle[j])
{
matched = false;
break;
}
}
if (matched)
{
return i;
}
}
return -1;
}
private static void AddHint(
IDictionary<string, SortedSet<string>> metadata,
ICollection<LanguageComponentEvidence> evidence,
string key,
string value,
string locator,
string evidenceSource)
{
if (!metadata.TryGetValue(key, out var items))
{
items = new SortedSet<string>(StringComparer.Ordinal);
metadata[key] = items;
}
items.Add(value);
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
evidenceSource,
locator,
value: null,
sha256: null));
}
private static void AddConfigHint(
IDictionary<string, SortedSet<string>> metadata,
ICollection<LanguageComponentEvidence> evidence,
string key,
JavaArchive archive,
JavaArchiveEntry entry)
{
if (!metadata.TryGetValue(key, out var locators))
{
locators = new SortedSet<string>(StringComparer.Ordinal);
metadata[key] = locators;
}
var locator = BuildLocator(archive, entry.OriginalPath);
locators.Add(locator);
var sha256 = TryComputeSha256(archive, entry);
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"framework-config",
locator,
value: null,
sha256: sha256));
}
private static string? TryComputeSha256(JavaArchive archive, JavaArchiveEntry entry)
{
try
{
using var stream = archive.OpenEntry(entry);
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
catch
{
return null;
}
}
private static bool IsSpringFactories(string path)
=> string.Equals(path, "META-INF/spring.factories", StringComparison.OrdinalIgnoreCase);
private static bool IsSpringImports(string path)
=> path.StartsWith("META-INF/spring/", StringComparison.OrdinalIgnoreCase)
&& path.EndsWith(".imports", StringComparison.OrdinalIgnoreCase);
private static bool IsSpringApplicationConfig(string path)
=> path.EndsWith("application.properties", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("application.yml", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("application.yaml", StringComparison.OrdinalIgnoreCase);
private static bool IsSpringBootstrapConfig(string path)
=> path.EndsWith("bootstrap.properties", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("bootstrap.yml", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("bootstrap.yaml", StringComparison.OrdinalIgnoreCase);
private static bool IsWebXml(string path)
=> path.EndsWith("WEB-INF/web.xml", StringComparison.OrdinalIgnoreCase);
private static bool IsWebFragment(string path)
=> path.EndsWith("META-INF/web-fragment.xml", StringComparison.OrdinalIgnoreCase);
private static bool IsJpaConfig(string path)
=> path.EndsWith("META-INF/persistence.xml", StringComparison.OrdinalIgnoreCase);
private static bool IsCdiConfig(string path)
=> path.EndsWith("META-INF/beans.xml", StringComparison.OrdinalIgnoreCase);
private static bool IsJaxbConfig(string path)
=> path.EndsWith("META-INF/jaxb.index", StringComparison.OrdinalIgnoreCase);
private static bool IsJaxRsConfig(string path)
=> path.StartsWith("META-INF/services/", StringComparison.OrdinalIgnoreCase)
&& path.Contains("ws.rs", StringComparison.OrdinalIgnoreCase);
private static bool IsLoggingConfig(string path)
=> path.EndsWith("log4j2.xml", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("logback.xml", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("logging.properties", StringComparison.OrdinalIgnoreCase);
private static bool IsGraalConfig(string path)
=> path.StartsWith("META-INF/native-image/", StringComparison.OrdinalIgnoreCase)
&& (path.EndsWith("reflect-config.json", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("resource-config.json", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("proxy-config.json", StringComparison.OrdinalIgnoreCase));
private static bool IsGraalJniConfig(string path)
=> path.StartsWith("META-INF/native-image/", StringComparison.OrdinalIgnoreCase)
&& path.EndsWith("jni-config.json", StringComparison.OrdinalIgnoreCase);
private static bool IsNativeLibrary(string path)
{
var extension = Path.GetExtension(path);
return extension.Equals(".so", StringComparison.OrdinalIgnoreCase)
|| extension.Equals(".dll", StringComparison.OrdinalIgnoreCase)
|| extension.Equals(".dylib", StringComparison.OrdinalIgnoreCase)
|| extension.Equals(".jnilib", StringComparison.OrdinalIgnoreCase);
}
private static bool IsClassFile(string path)
=> path.EndsWith(".class", StringComparison.OrdinalIgnoreCase);
private static bool IsPomPropertiesEntry(string entryName)
=> entryName.StartsWith("META-INF/maven/", StringComparison.OrdinalIgnoreCase)
&& entryName.EndsWith("/pom.properties", StringComparison.OrdinalIgnoreCase);
private static bool IsManifestEntry(string entryName)
=> string.Equals(entryName, "META-INF/MANIFEST.MF", StringComparison.OrdinalIgnoreCase);
private static void AppendLockMetadata(ICollection<KeyValuePair<string, string?>> metadata, JavaLockEntry entry)
{
AddMetadata(metadata, "lockConfiguration", entry.Configuration);
AddMetadata(metadata, "lockRepository", entry.Repository);
AddMetadata(metadata, "lockResolved", entry.ResolvedUrl);
}
private static async ValueTask<MavenArtifact?> ParsePomPropertiesAsync(JavaArchive archive, JavaArchiveEntry entry, CancellationToken cancellationToken)
{
await using var entryStream = archive.OpenEntry(entry);
using var buffer = new MemoryStream();
await entryStream.CopyToAsync(buffer, cancellationToken).ConfigureAwait(false);
buffer.Position = 0;
using var reader = new StreamReader(buffer, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: true);
var properties = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
while (await reader.ReadLineAsync().ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
line = line.Trim();
if (line.Length == 0 || line.StartsWith('#'))
{
continue;
}
var separatorIndex = line.IndexOf('=');
if (separatorIndex <= 0)
{
continue;
}
var key = line[..separatorIndex].Trim();
var value = line[(separatorIndex + 1)..].Trim();
if (key.Length == 0)
{
continue;
}
properties[key] = value;
}
if (!properties.TryGetValue("groupId", out var groupId) || string.IsNullOrWhiteSpace(groupId))
{
return null;
}
if (!properties.TryGetValue("artifactId", out var artifactId) || string.IsNullOrWhiteSpace(artifactId))
{
return null;
}
if (!properties.TryGetValue("version", out var version) || string.IsNullOrWhiteSpace(version))
{
return null;
}
var packaging = properties.TryGetValue("packaging", out var packagingValue) ? packagingValue : "jar";
var name = properties.TryGetValue("name", out var nameValue) ? nameValue : null;
var purl = BuildPurl(groupId, artifactId, version, packaging);
buffer.Position = 0;
var pomSha = Convert.ToHexString(SHA256.HashData(buffer)).ToLowerInvariant();
return new MavenArtifact(
GroupId: groupId.Trim(),
ArtifactId: artifactId.Trim(),
Version: version.Trim(),
Packaging: packaging?.Trim(),
Name: name?.Trim(),
Purl: purl,
PomSha256: pomSha);
}
private static async ValueTask<ManifestMetadata?> ParseManifestAsync(JavaArchive archive, JavaArchiveEntry entry, CancellationToken cancellationToken)
{
await using var entryStream = archive.OpenEntry(entry);
using var reader = new StreamReader(entryStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: false);
string? title = null;
string? version = null;
string? vendor = null;
while (await reader.ReadLineAsync().ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
var separatorIndex = line.IndexOf(':');
if (separatorIndex <= 0)
{
continue;
}
var key = line[..separatorIndex].Trim();
var value = line[(separatorIndex + 1)..].Trim();
if (key.Equals("Implementation-Title", StringComparison.OrdinalIgnoreCase))
{
title ??= value;
}
else if (key.Equals("Implementation-Version", StringComparison.OrdinalIgnoreCase))
{
version ??= value;
}
else if (key.Equals("Implementation-Vendor", StringComparison.OrdinalIgnoreCase))
{
vendor ??= value;
}
}
internal sealed record FrameworkConfigSummary(
IReadOnlyDictionary<string, string> Metadata,
IReadOnlyCollection<LanguageComponentEvidence> Evidence);
internal sealed record JniHintSummary(
IReadOnlyDictionary<string, string> Metadata,
IReadOnlyCollection<LanguageComponentEvidence> Evidence);
if (title is null && version is null && vendor is null)
{
return null;
}
return new ManifestMetadata(title, version, vendor);
}
private static string BuildPurl(string groupId, string artifactId, string version, string? packaging)
{
var normalizedGroup = groupId.Replace('.', '/');
var builder = new StringBuilder();
builder.Append("pkg:maven/");
builder.Append(normalizedGroup);
builder.Append('/');
builder.Append(artifactId);
builder.Append('@');
builder.Append(version);
if (!string.IsNullOrWhiteSpace(packaging) && !packaging.Equals("jar", StringComparison.OrdinalIgnoreCase))
{
builder.Append("?type=");
builder.Append(packaging);
}
return builder.ToString();
}
private sealed record MavenArtifact(
string GroupId,
string ArtifactId,
string Version,
string? Packaging,
string? Name,
string Purl,
string PomSha256);
private sealed record ManifestMetadata(string? ImplementationTitle, string? ImplementationVersion, string? ImplementationVendor)
{
public void ApplyMetadata(ICollection<KeyValuePair<string, string?>> target)
{
AddMetadata(target, "manifestTitle", ImplementationTitle);
AddMetadata(target, "manifestVersion", ImplementationVersion);
AddMetadata(target, "manifestVendor", ImplementationVendor);
}
public LanguageComponentEvidence CreateEvidence(JavaArchive archive)
{
var locator = BuildLocator(archive, "META-INF/MANIFEST.MF");
var valueBuilder = new StringBuilder();
if (!string.IsNullOrWhiteSpace(ImplementationTitle))
{
valueBuilder.Append("title=").Append(ImplementationTitle);
}
if (!string.IsNullOrWhiteSpace(ImplementationVersion))
{
if (valueBuilder.Length > 0)
{
valueBuilder.Append(';');
}
valueBuilder.Append("version=").Append(ImplementationVersion);
}
if (!string.IsNullOrWhiteSpace(ImplementationVendor))
{
if (valueBuilder.Length > 0)
{
valueBuilder.Append(';');
}
valueBuilder.Append("vendor=").Append(ImplementationVendor);
}
var value = valueBuilder.Length > 0 ? valueBuilder.ToString() : null;
return new LanguageComponentEvidence(LanguageEvidenceKind.File, "MANIFEST.MF", locator, value, null);
}
}
private static List<KeyValuePair<string, string?>> CreateInstalledMetadata(
MavenArtifact artifact,
JavaArchive archive,
ManifestMetadata? manifestMetadata)
{
var metadata = new List<KeyValuePair<string, string?>>(8);
AddMetadata(metadata, "groupId", artifact.GroupId);
AddMetadata(metadata, "artifactId", artifact.ArtifactId);
AddMetadata(metadata, "jarPath", NormalizeArchivePath(archive.RelativePath), allowEmpty: true);
AddMetadata(metadata, "packaging", artifact.Packaging);
AddMetadata(metadata, "displayName", artifact.Name);
manifestMetadata?.ApplyMetadata(metadata);
return metadata;
}
private static IReadOnlyList<KeyValuePair<string, string?>> CreateDeclaredMetadata(JavaLockEntry entry)
{
var metadata = new List<KeyValuePair<string, string?>>(6);
var lockSource = NormalizeLockSource(entry.Source);
var lockLocator = string.IsNullOrWhiteSpace(entry.Locator) ? lockSource : entry.Locator;
AddMetadata(metadata, "declaredOnly", "true");
AddMetadata(metadata, "lockSource", lockSource);
AddMetadata(metadata, "lockLocator", lockLocator, allowEmpty: true);
AppendLockMetadata(metadata, entry);
return SortMetadata(metadata);
}
private static LanguageComponentEvidence CreateDeclaredEvidence(JavaLockEntry entry)
{
var lockSource = NormalizeLockSource(entry.Source);
var lockLocator = string.IsNullOrWhiteSpace(entry.Locator) ? lockSource : entry.Locator;
return new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
lockSource,
lockLocator,
entry.ResolvedUrl,
Sha256: null);
}
private static IReadOnlyList<KeyValuePair<string, string?>> SortMetadata(List<KeyValuePair<string, string?>> metadata)
{
metadata.Sort(static (left, right) =>
{
var keyComparison = string.CompareOrdinal(left.Key, right.Key);
if (keyComparison != 0)
{
return keyComparison;
}
return string.CompareOrdinal(left.Value ?? string.Empty, right.Value ?? string.Empty);
});
return metadata;
}
private static void AddMetadata(
ICollection<KeyValuePair<string, string?>> metadata,
string key,
string? value,
bool allowEmpty = false)
{
if (string.IsNullOrWhiteSpace(key))
{
return;
}
if (!allowEmpty && string.IsNullOrWhiteSpace(value))
{
return;
}
metadata.Add(new KeyValuePair<string, string?>(key, value));
}
private static string NormalizeLockSource(string? source)
=> string.IsNullOrWhiteSpace(source) ? "lockfile" : source;
}