using System.Collections.Generic; using System.IO; using System.Text; using StellaOps.Scanner.Analyzers.Lang.Java.Internal; namespace StellaOps.Scanner.Analyzers.Lang.Java; public sealed class JavaLanguageAnalyzer : ILanguageAnalyzer { public string Id => "java"; public string DisplayName => "Java/Maven Analyzer"; public async ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(context); ArgumentNullException.ThrowIfNull(writer); var workspace = JavaWorkspaceNormalizer.Normalize(context, cancellationToken); var lockData = await JavaLockFileCollector.LoadAsync(context, cancellationToken).ConfigureAwait(false); var matchedLocks = new HashSet(StringComparer.OrdinalIgnoreCase); var hasLockEntries = lockData.HasEntries; foreach (var archive in workspace.Archives) { cancellationToken.ThrowIfCancellationRequested(); try { await ProcessArchiveAsync(archive, context, writer, lockData, matchedLocks, hasLockEntries, cancellationToken).ConfigureAwait(false); } catch (IOException) { // Corrupt archives should not abort the scan. } catch (InvalidDataException) { // Skip non-zip payloads despite supported extensions. } } if (lockData.Entries.Count > 0) { foreach (var entry in lockData.Entries) { if (matchedLocks.Contains(entry.Key)) { continue; } var metadata = CreateDeclaredMetadata(entry); var evidence = new[] { CreateDeclaredEvidence(entry) }; var purl = BuildPurl(entry.GroupId, entry.ArtifactId, entry.Version, packaging: null); writer.AddFromPurl( analyzerId: Id, purl: purl, name: entry.ArtifactId, version: entry.Version, type: "maven", metadata: metadata, evidence: evidence, usedByEntrypoint: false); } } } private async ValueTask ProcessArchiveAsync( JavaArchive archive, LanguageAnalyzerContext context, LanguageComponentWriter writer, JavaLockData lockData, HashSet matchedLocks, bool hasLockEntries, CancellationToken cancellationToken) { ManifestMetadata? manifestMetadata = null; if (archive.TryGetEntry("META-INF/MANIFEST.MF", out var manifestEntry)) { manifestMetadata = await ParseManifestAsync(archive, manifestEntry, cancellationToken).ConfigureAwait(false); } var frameworkConfig = ScanFrameworkConfigs(archive, cancellationToken); var jniHints = ScanJniHints(archive, cancellationToken); foreach (var entry in archive.Entries) { cancellationToken.ThrowIfCancellationRequested(); if (IsManifestEntry(entry.EffectivePath)) { continue; } if (!IsPomPropertiesEntry(entry.EffectivePath)) { continue; } var artifact = await ParsePomPropertiesAsync(archive, entry, cancellationToken).ConfigureAwait(false); if (artifact is null) { continue; } var metadata = CreateInstalledMetadata(artifact, archive, manifestMetadata); if (lockData.TryGet(artifact.GroupId, artifact.ArtifactId, artifact.Version, out var lockEntry)) { matchedLocks.Add(lockEntry!.Key); AppendLockMetadata(metadata, lockEntry); } else if (hasLockEntries) { AddMetadata(metadata, "lockMissing", "true"); } foreach (var hint in frameworkConfig.Metadata) { AddMetadata(metadata, hint.Key, hint.Value); } foreach (var hint in jniHints.Metadata) { AddMetadata(metadata, hint.Key, hint.Value); } var evidence = new List { new(LanguageEvidenceKind.File, "pom.properties", BuildLocator(archive, entry.OriginalPath), null, artifact.PomSha256), }; if (manifestMetadata is not null) { evidence.Add(manifestMetadata.CreateEvidence(archive)); } evidence.AddRange(frameworkConfig.Evidence); evidence.AddRange(jniHints.Evidence); var usedByEntrypoint = context.UsageHints.IsPathUsed(archive.AbsolutePath); writer.AddFromPurl( analyzerId: Id, purl: artifact.Purl, name: artifact.ArtifactId, version: artifact.Version, type: "maven", metadata: SortMetadata(metadata), evidence: evidence, usedByEntrypoint: usedByEntrypoint); } } private static string BuildLocator(JavaArchive archive, string entryPath) { var relativeArchive = NormalizeArchivePath(archive.RelativePath); var normalizedEntry = NormalizeEntry(entryPath); if (string.Equals(relativeArchive, ".", StringComparison.Ordinal) || string.IsNullOrEmpty(relativeArchive)) { return normalizedEntry; } return string.Concat(relativeArchive, "!", normalizedEntry); } private static string NormalizeEntry(string entryPath) => entryPath.Replace('\\', '/'); private static string NormalizeArchivePath(string relativePath) { if (string.IsNullOrEmpty(relativePath) || string.Equals(relativePath, ".", StringComparison.Ordinal)) { return "."; } return relativePath.Replace('\\', '/'); } private static FrameworkConfigSummary ScanFrameworkConfigs(JavaArchive archive, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(archive); var metadata = new Dictionary>(StringComparer.Ordinal); var evidence = new List(); foreach (var entry in archive.Entries) { cancellationToken.ThrowIfCancellationRequested(); var path = entry.EffectivePath; if (IsSpringFactories(path)) { AddConfigHint(metadata, evidence, "config.spring.factories", archive, entry); } else if (IsSpringImports(path)) { AddConfigHint(metadata, evidence, "config.spring.imports", archive, entry); } else if (IsSpringApplicationConfig(path)) { AddConfigHint(metadata, evidence, "config.spring.properties", archive, entry); } else if (IsSpringBootstrapConfig(path)) { AddConfigHint(metadata, evidence, "config.spring.bootstrap", archive, entry); } if (IsWebXml(path)) { AddConfigHint(metadata, evidence, "config.web.xml", archive, entry); } if (IsWebFragment(path)) { AddConfigHint(metadata, evidence, "config.web.fragment", archive, entry); } if (IsJpaConfig(path)) { AddConfigHint(metadata, evidence, "config.jpa", archive, entry); } if (IsCdiConfig(path)) { AddConfigHint(metadata, evidence, "config.cdi", archive, entry); } if (IsJaxbConfig(path)) { AddConfigHint(metadata, evidence, "config.jaxb", archive, entry); } if (IsJaxRsConfig(path)) { AddConfigHint(metadata, evidence, "config.jaxrs", archive, entry); } if (IsLoggingConfig(path)) { AddConfigHint(metadata, evidence, "config.logging", archive, entry); } if (IsGraalConfig(path)) { AddConfigHint(metadata, evidence, "config.graal", archive, entry); } } var flattened = metadata.ToDictionary( static pair => pair.Key, static pair => string.Join(",", pair.Value), StringComparer.Ordinal); return new FrameworkConfigSummary(flattened, evidence); } private static JniHintSummary ScanJniHints(JavaArchive archive, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(archive); var metadata = new Dictionary>(StringComparer.Ordinal); var evidence = new List(); foreach (var entry in archive.Entries) { cancellationToken.ThrowIfCancellationRequested(); var path = entry.EffectivePath; var locator = BuildLocator(archive, entry.OriginalPath); if (IsNativeLibrary(path)) { AddHint(metadata, evidence, "jni.nativeLibs", Path.GetFileName(path), locator, "jni-native"); } if (IsGraalJniConfig(path)) { AddHint(metadata, evidence, "jni.graalConfig", locator, locator, "jni-graal"); } if (IsClassFile(path) && entry.Length is > 0 and < 1_000_000) { TryScanClassForLoadCalls(archive, entry, locator, metadata, evidence, cancellationToken); } } var flattened = metadata.ToDictionary( static pair => pair.Key, static pair => string.Join(",", pair.Value), StringComparer.Ordinal); return new JniHintSummary(flattened, evidence); } private static void TryScanClassForLoadCalls( JavaArchive archive, JavaArchiveEntry entry, string locator, IDictionary> metadata, ICollection evidence, CancellationToken cancellationToken) { try { using var stream = archive.OpenEntry(entry); using var buffer = new MemoryStream(); stream.CopyTo(buffer); var bytes = buffer.ToArray(); if (ContainsAscii(bytes, "System.loadLibrary")) { AddHint(metadata, evidence, "jni.loadCalls", locator, locator, "jni-load"); } else if (ContainsAscii(bytes, "System.load")) { AddHint(metadata, evidence, "jni.loadCalls", locator, locator, "jni-load"); } } catch { // best effort; skip unreadable class entries } } private static bool ContainsAscii(byte[] buffer, string ascii) { if (buffer.Length == 0 || string.IsNullOrEmpty(ascii)) { return false; } var needle = Encoding.ASCII.GetBytes(ascii); return SpanSearch(buffer, needle) >= 0; } private static int SpanSearch(byte[] haystack, byte[] needle) { if (needle.Length == 0 || haystack.Length < needle.Length) { return -1; } var lastStart = haystack.Length - needle.Length; for (var i = 0; i <= lastStart; i++) { var matched = true; for (var j = 0; j < needle.Length; j++) { if (haystack[i + j] != needle[j]) { matched = false; break; } } if (matched) { return i; } } return -1; } private static void AddHint( IDictionary> metadata, ICollection evidence, string key, string value, string locator, string evidenceSource) { if (!metadata.TryGetValue(key, out var items)) { items = new SortedSet(StringComparer.Ordinal); metadata[key] = items; } items.Add(value); evidence.Add(new LanguageComponentEvidence( LanguageEvidenceKind.File, evidenceSource, locator, value: null, sha256: null)); } private static void AddConfigHint( IDictionary> metadata, ICollection evidence, string key, JavaArchive archive, JavaArchiveEntry entry) { if (!metadata.TryGetValue(key, out var locators)) { locators = new SortedSet(StringComparer.Ordinal); metadata[key] = locators; } var locator = BuildLocator(archive, entry.OriginalPath); locators.Add(locator); var sha256 = TryComputeSha256(archive, entry); evidence.Add(new LanguageComponentEvidence( LanguageEvidenceKind.File, "framework-config", locator, value: null, sha256: sha256)); } private static string? TryComputeSha256(JavaArchive archive, JavaArchiveEntry entry) { try { using var stream = archive.OpenEntry(entry); using var sha = SHA256.Create(); var hash = sha.ComputeHash(stream); return Convert.ToHexString(hash).ToLowerInvariant(); } catch { return null; } } private static bool IsSpringFactories(string path) => string.Equals(path, "META-INF/spring.factories", StringComparison.OrdinalIgnoreCase); private static bool IsSpringImports(string path) => path.StartsWith("META-INF/spring/", StringComparison.OrdinalIgnoreCase) && path.EndsWith(".imports", StringComparison.OrdinalIgnoreCase); private static bool IsSpringApplicationConfig(string path) => path.EndsWith("application.properties", StringComparison.OrdinalIgnoreCase) || path.EndsWith("application.yml", StringComparison.OrdinalIgnoreCase) || path.EndsWith("application.yaml", StringComparison.OrdinalIgnoreCase); private static bool IsSpringBootstrapConfig(string path) => path.EndsWith("bootstrap.properties", StringComparison.OrdinalIgnoreCase) || path.EndsWith("bootstrap.yml", StringComparison.OrdinalIgnoreCase) || path.EndsWith("bootstrap.yaml", StringComparison.OrdinalIgnoreCase); private static bool IsWebXml(string path) => path.EndsWith("WEB-INF/web.xml", StringComparison.OrdinalIgnoreCase); private static bool IsWebFragment(string path) => path.EndsWith("META-INF/web-fragment.xml", StringComparison.OrdinalIgnoreCase); private static bool IsJpaConfig(string path) => path.EndsWith("META-INF/persistence.xml", StringComparison.OrdinalIgnoreCase); private static bool IsCdiConfig(string path) => path.EndsWith("META-INF/beans.xml", StringComparison.OrdinalIgnoreCase); private static bool IsJaxbConfig(string path) => path.EndsWith("META-INF/jaxb.index", StringComparison.OrdinalIgnoreCase); private static bool IsJaxRsConfig(string path) => path.StartsWith("META-INF/services/", StringComparison.OrdinalIgnoreCase) && path.Contains("ws.rs", StringComparison.OrdinalIgnoreCase); private static bool IsLoggingConfig(string path) => path.EndsWith("log4j2.xml", StringComparison.OrdinalIgnoreCase) || path.EndsWith("logback.xml", StringComparison.OrdinalIgnoreCase) || path.EndsWith("logging.properties", StringComparison.OrdinalIgnoreCase); private static bool IsGraalConfig(string path) => path.StartsWith("META-INF/native-image/", StringComparison.OrdinalIgnoreCase) && (path.EndsWith("reflect-config.json", StringComparison.OrdinalIgnoreCase) || path.EndsWith("resource-config.json", StringComparison.OrdinalIgnoreCase) || path.EndsWith("proxy-config.json", StringComparison.OrdinalIgnoreCase)); private static bool IsGraalJniConfig(string path) => path.StartsWith("META-INF/native-image/", StringComparison.OrdinalIgnoreCase) && path.EndsWith("jni-config.json", StringComparison.OrdinalIgnoreCase); private static bool IsNativeLibrary(string path) { var extension = Path.GetExtension(path); return extension.Equals(".so", StringComparison.OrdinalIgnoreCase) || extension.Equals(".dll", StringComparison.OrdinalIgnoreCase) || extension.Equals(".dylib", StringComparison.OrdinalIgnoreCase) || extension.Equals(".jnilib", StringComparison.OrdinalIgnoreCase); } private static bool IsClassFile(string path) => path.EndsWith(".class", StringComparison.OrdinalIgnoreCase); private static bool IsPomPropertiesEntry(string entryName) => entryName.StartsWith("META-INF/maven/", StringComparison.OrdinalIgnoreCase) && entryName.EndsWith("/pom.properties", StringComparison.OrdinalIgnoreCase); private static bool IsManifestEntry(string entryName) => string.Equals(entryName, "META-INF/MANIFEST.MF", StringComparison.OrdinalIgnoreCase); private static void AppendLockMetadata(ICollection> metadata, JavaLockEntry entry) { AddMetadata(metadata, "lockConfiguration", entry.Configuration); AddMetadata(metadata, "lockRepository", entry.Repository); AddMetadata(metadata, "lockResolved", entry.ResolvedUrl); } private static async ValueTask ParsePomPropertiesAsync(JavaArchive archive, JavaArchiveEntry entry, CancellationToken cancellationToken) { await using var entryStream = archive.OpenEntry(entry); using var buffer = new MemoryStream(); await entryStream.CopyToAsync(buffer, cancellationToken).ConfigureAwait(false); buffer.Position = 0; using var reader = new StreamReader(buffer, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: true); var properties = new Dictionary(StringComparer.OrdinalIgnoreCase); while (await reader.ReadLineAsync().ConfigureAwait(false) is { } line) { cancellationToken.ThrowIfCancellationRequested(); line = line.Trim(); if (line.Length == 0 || line.StartsWith('#')) { continue; } var separatorIndex = line.IndexOf('='); if (separatorIndex <= 0) { continue; } var key = line[..separatorIndex].Trim(); var value = line[(separatorIndex + 1)..].Trim(); if (key.Length == 0) { continue; } properties[key] = value; } if (!properties.TryGetValue("groupId", out var groupId) || string.IsNullOrWhiteSpace(groupId)) { return null; } if (!properties.TryGetValue("artifactId", out var artifactId) || string.IsNullOrWhiteSpace(artifactId)) { return null; } if (!properties.TryGetValue("version", out var version) || string.IsNullOrWhiteSpace(version)) { return null; } var packaging = properties.TryGetValue("packaging", out var packagingValue) ? packagingValue : "jar"; var name = properties.TryGetValue("name", out var nameValue) ? nameValue : null; var purl = BuildPurl(groupId, artifactId, version, packaging); buffer.Position = 0; var pomSha = Convert.ToHexString(SHA256.HashData(buffer)).ToLowerInvariant(); return new MavenArtifact( GroupId: groupId.Trim(), ArtifactId: artifactId.Trim(), Version: version.Trim(), Packaging: packaging?.Trim(), Name: name?.Trim(), Purl: purl, PomSha256: pomSha); } private static async ValueTask ParseManifestAsync(JavaArchive archive, JavaArchiveEntry entry, CancellationToken cancellationToken) { await using var entryStream = archive.OpenEntry(entry); using var reader = new StreamReader(entryStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, leaveOpen: false); string? title = null; string? version = null; string? vendor = null; while (await reader.ReadLineAsync().ConfigureAwait(false) is { } line) { cancellationToken.ThrowIfCancellationRequested(); if (string.IsNullOrWhiteSpace(line)) { continue; } var separatorIndex = line.IndexOf(':'); if (separatorIndex <= 0) { continue; } var key = line[..separatorIndex].Trim(); var value = line[(separatorIndex + 1)..].Trim(); if (key.Equals("Implementation-Title", StringComparison.OrdinalIgnoreCase)) { title ??= value; } else if (key.Equals("Implementation-Version", StringComparison.OrdinalIgnoreCase)) { version ??= value; } else if (key.Equals("Implementation-Vendor", StringComparison.OrdinalIgnoreCase)) { vendor ??= value; } } internal sealed record FrameworkConfigSummary( IReadOnlyDictionary Metadata, IReadOnlyCollection Evidence); internal sealed record JniHintSummary( IReadOnlyDictionary Metadata, IReadOnlyCollection Evidence); if (title is null && version is null && vendor is null) { return null; } return new ManifestMetadata(title, version, vendor); } private static string BuildPurl(string groupId, string artifactId, string version, string? packaging) { var normalizedGroup = groupId.Replace('.', '/'); var builder = new StringBuilder(); builder.Append("pkg:maven/"); builder.Append(normalizedGroup); builder.Append('/'); builder.Append(artifactId); builder.Append('@'); builder.Append(version); if (!string.IsNullOrWhiteSpace(packaging) && !packaging.Equals("jar", StringComparison.OrdinalIgnoreCase)) { builder.Append("?type="); builder.Append(packaging); } return builder.ToString(); } private sealed record MavenArtifact( string GroupId, string ArtifactId, string Version, string? Packaging, string? Name, string Purl, string PomSha256); private sealed record ManifestMetadata(string? ImplementationTitle, string? ImplementationVersion, string? ImplementationVendor) { public void ApplyMetadata(ICollection> target) { AddMetadata(target, "manifestTitle", ImplementationTitle); AddMetadata(target, "manifestVersion", ImplementationVersion); AddMetadata(target, "manifestVendor", ImplementationVendor); } public LanguageComponentEvidence CreateEvidence(JavaArchive archive) { var locator = BuildLocator(archive, "META-INF/MANIFEST.MF"); var valueBuilder = new StringBuilder(); if (!string.IsNullOrWhiteSpace(ImplementationTitle)) { valueBuilder.Append("title=").Append(ImplementationTitle); } if (!string.IsNullOrWhiteSpace(ImplementationVersion)) { if (valueBuilder.Length > 0) { valueBuilder.Append(';'); } valueBuilder.Append("version=").Append(ImplementationVersion); } if (!string.IsNullOrWhiteSpace(ImplementationVendor)) { if (valueBuilder.Length > 0) { valueBuilder.Append(';'); } valueBuilder.Append("vendor=").Append(ImplementationVendor); } var value = valueBuilder.Length > 0 ? valueBuilder.ToString() : null; return new LanguageComponentEvidence(LanguageEvidenceKind.File, "MANIFEST.MF", locator, value, null); } } private static List> CreateInstalledMetadata( MavenArtifact artifact, JavaArchive archive, ManifestMetadata? manifestMetadata) { var metadata = new List>(8); AddMetadata(metadata, "groupId", artifact.GroupId); AddMetadata(metadata, "artifactId", artifact.ArtifactId); AddMetadata(metadata, "jarPath", NormalizeArchivePath(archive.RelativePath), allowEmpty: true); AddMetadata(metadata, "packaging", artifact.Packaging); AddMetadata(metadata, "displayName", artifact.Name); manifestMetadata?.ApplyMetadata(metadata); return metadata; } private static IReadOnlyList> CreateDeclaredMetadata(JavaLockEntry entry) { var metadata = new List>(6); var lockSource = NormalizeLockSource(entry.Source); var lockLocator = string.IsNullOrWhiteSpace(entry.Locator) ? lockSource : entry.Locator; AddMetadata(metadata, "declaredOnly", "true"); AddMetadata(metadata, "lockSource", lockSource); AddMetadata(metadata, "lockLocator", lockLocator, allowEmpty: true); AppendLockMetadata(metadata, entry); return SortMetadata(metadata); } private static LanguageComponentEvidence CreateDeclaredEvidence(JavaLockEntry entry) { var lockSource = NormalizeLockSource(entry.Source); var lockLocator = string.IsNullOrWhiteSpace(entry.Locator) ? lockSource : entry.Locator; return new LanguageComponentEvidence( LanguageEvidenceKind.Metadata, lockSource, lockLocator, entry.ResolvedUrl, Sha256: null); } private static IReadOnlyList> SortMetadata(List> metadata) { metadata.Sort(static (left, right) => { var keyComparison = string.CompareOrdinal(left.Key, right.Key); if (keyComparison != 0) { return keyComparison; } return string.CompareOrdinal(left.Value ?? string.Empty, right.Value ?? string.Empty); }); return metadata; } private static void AddMetadata( ICollection> metadata, string key, string? value, bool allowEmpty = false) { if (string.IsNullOrWhiteSpace(key)) { return; } if (!allowEmpty && string.IsNullOrWhiteSpace(value)) { return; } metadata.Add(new KeyValuePair(key, value)); } private static string NormalizeLockSource(string? source) => string.IsNullOrWhiteSpace(source) ? "lockfile" : source; }