feat: Implement BerkeleyDB reader for RPM databases
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
console-runner-image / build-runner-image (push) Has been cancelled
wine-csp-build / Build Wine CSP Image (push) Has been cancelled
wine-csp-build / Integration Tests (push) Has been cancelled
wine-csp-build / Security Scan (push) Has been cancelled
wine-csp-build / Generate SBOM (push) Has been cancelled
wine-csp-build / Publish Image (push) Has been cancelled
wine-csp-build / Air-Gap Bundle (push) Has been cancelled
wine-csp-build / Test Summary (push) Has been cancelled
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
console-runner-image / build-runner-image (push) Has been cancelled
wine-csp-build / Build Wine CSP Image (push) Has been cancelled
wine-csp-build / Integration Tests (push) Has been cancelled
wine-csp-build / Security Scan (push) Has been cancelled
wine-csp-build / Generate SBOM (push) Has been cancelled
wine-csp-build / Publish Image (push) Has been cancelled
wine-csp-build / Air-Gap Bundle (push) Has been cancelled
wine-csp-build / Test Summary (push) Has been cancelled
- Added BerkeleyDbReader class to read and extract RPM header blobs from BerkeleyDB hash databases. - Implemented methods to detect BerkeleyDB format and extract values, including handling of page sizes and magic numbers. - Added tests for BerkeleyDbReader to ensure correct functionality and header extraction. feat: Add Yarn PnP data tests - Created YarnPnpDataTests to validate package resolution and data loading from Yarn PnP cache. - Implemented tests for resolved keys, package presence, and loading from cache structure. test: Add egg-info package fixtures for Python tests - Created egg-info package fixtures for testing Python analyzers. - Included PKG-INFO, entry_points.txt, and installed-files.txt for comprehensive coverage. test: Enhance RPM database reader tests - Added tests for RpmDatabaseReader to validate fallback to legacy packages when SQLite is missing. - Implemented helper methods to create legacy package files and RPM headers for testing. test: Implement dual signing tests - Added DualSignTests to validate secondary signature addition when configured. - Created stub implementations for crypto providers and key resolvers to facilitate testing. chore: Update CI script for Playwright Chromium installation - Modified ci-console-exports.sh to ensure deterministic Chromium binary installation for console exports tests. - Added checks for Windows compatibility and environment variable setups for Playwright browsers.
This commit is contained in:
@@ -9,7 +9,8 @@ internal sealed record NodeProjectInput(
|
||||
IReadOnlyList<string> NodeModuleRoots,
|
||||
IReadOnlyList<string> Tarballs,
|
||||
IReadOnlyList<string> YarnCacheRoots,
|
||||
bool YarnPnpPresent);
|
||||
bool YarnPnpPresent,
|
||||
YarnPnpData? YarnPnpData);
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes scanner inputs for Node.js projects, layering workspace roots, container layers,
|
||||
@@ -63,12 +64,28 @@ internal static class NodeInputNormalizer
|
||||
|
||||
var yarnPnpPresent = sourceRoots.Any(static root => HasYarnPnpMarkers(root));
|
||||
|
||||
// Load Yarn PnP resolution data if present
|
||||
YarnPnpData? yarnPnpData = null;
|
||||
if (yarnPnpPresent)
|
||||
{
|
||||
foreach (var root in sourceRoots)
|
||||
{
|
||||
var pnpData = YarnPnpData.Load(root);
|
||||
if (pnpData.HasData)
|
||||
{
|
||||
yarnPnpData = pnpData;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new NodeProjectInput(
|
||||
SourceRoots: sourceRoots.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
|
||||
NodeModuleRoots: nodeModuleRoots.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
|
||||
Tarballs: tarballs.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
|
||||
YarnCacheRoots: yarnCacheRoots.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
|
||||
YarnPnpPresent: yarnPnpPresent);
|
||||
YarnPnpPresent: yarnPnpPresent,
|
||||
YarnPnpData: yarnPnpData);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> DiscoverSourceRoots(string rootPath)
|
||||
|
||||
@@ -58,8 +58,6 @@ internal static class NodePackageCollector
|
||||
TraverseTarballs(context, projectInput.Tarballs, packages, visited, yarnPnpPresent, cancellationToken);
|
||||
TraverseYarnPnpCache(context, projectInput.YarnCacheRoots, packages, visited, yarnPnpPresent, cancellationToken);
|
||||
|
||||
AppendDeclaredPackages(packages, lockData);
|
||||
|
||||
AttachImports(context, packages, cancellationToken);
|
||||
|
||||
var resolutions = NodeResolver.Resolve(context, projectInput, packages, cancellationToken);
|
||||
@@ -68,9 +66,50 @@ internal static class NodePackageCollector
|
||||
package.SetResolvedImports(resolvedImports);
|
||||
}
|
||||
|
||||
// Filter out declared-only packages without on-disk evidence when PnP data is available
|
||||
if (projectInput.YarnPnpData?.HasData == true)
|
||||
{
|
||||
return FilterDeclaredOnlyPackages(packages, projectInput.YarnPnpData);
|
||||
}
|
||||
|
||||
return packages;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Filters out packages that are declared-only (no on-disk evidence) when Yarn PnP data is available.
|
||||
/// Only emits packages that are actually resolved in the PnP resolution map.
|
||||
/// </summary>
|
||||
private static IReadOnlyList<NodePackage> FilterDeclaredOnlyPackages(List<NodePackage> packages, YarnPnpData pnpData)
|
||||
{
|
||||
var filtered = new List<NodePackage>(packages.Count);
|
||||
|
||||
foreach (var package in packages)
|
||||
{
|
||||
// Always include workspace members and private packages
|
||||
if (package.IsWorkspaceMember || package.IsPrivate == true)
|
||||
{
|
||||
filtered.Add(package);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip declared-only packages that are not resolved in PnP
|
||||
if (package.DeclaredOnly && !pnpData.IsResolved(package.Name, package.Version))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// For packages from Yarn cache, verify they are in the PnP resolution
|
||||
if (package.IsYarnPnp && !pnpData.IsResolved(package.Name, package.Version))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
filtered.Add(package);
|
||||
}
|
||||
|
||||
return filtered;
|
||||
}
|
||||
|
||||
private static void AttachImports(LanguageAnalyzerContext context, List<NodePackage> packages, CancellationToken cancellationToken)
|
||||
{
|
||||
foreach (var package in packages)
|
||||
@@ -454,73 +493,6 @@ internal static class NodePackageCollector
|
||||
}
|
||||
}
|
||||
|
||||
private static void AppendDeclaredPackages(List<NodePackage> packages, NodeLockData lockData)
|
||||
{
|
||||
if (lockData.DeclaredPackages.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var observed = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var package in packages)
|
||||
{
|
||||
var key = BuildDeclarationKey(package.Name, package.Version);
|
||||
if (!string.IsNullOrEmpty(key))
|
||||
{
|
||||
observed.Add(key);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var entry in lockData.DeclaredPackages)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(entry.Name) || string.IsNullOrWhiteSpace(entry.Version))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var key = BuildDeclarationKey(entry.Name, entry.Version);
|
||||
if (string.IsNullOrEmpty(key) || !observed.Add(key))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var declaredPackage = new NodePackage(
|
||||
entry.Name,
|
||||
entry.Version,
|
||||
relativePath: string.Empty,
|
||||
packageJsonLocator: string.Empty,
|
||||
isPrivate: null,
|
||||
lockEntry: entry,
|
||||
isWorkspaceMember: false,
|
||||
workspaceRoot: null,
|
||||
workspaceTargets: Array.Empty<string>(),
|
||||
workspaceLink: null,
|
||||
lifecycleScripts: Array.Empty<NodeLifecycleScript>(),
|
||||
nodeVersions: Array.Empty<NodeVersionTarget>(),
|
||||
usedByEntrypoint: false,
|
||||
declaredOnly: true,
|
||||
lockSource: entry.Source,
|
||||
lockLocator: BuildLockLocator(entry),
|
||||
packageSha256: null,
|
||||
isYarnPnp: false,
|
||||
scope: entry.Scope,
|
||||
isOptional: entry.IsOptional,
|
||||
license: null);
|
||||
|
||||
packages.Add(declaredPackage);
|
||||
}
|
||||
}
|
||||
|
||||
private static string BuildDeclarationKey(string name, string? version)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(name) || string.IsNullOrWhiteSpace(version))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return $"{name}@{version}".ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string? BuildLockLocator(NodeLockEntry? entry)
|
||||
{
|
||||
if (entry is null)
|
||||
|
||||
@@ -0,0 +1,279 @@
|
||||
using System.IO.Compression;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
|
||||
|
||||
internal static class NodePnpDataLoader
|
||||
{
|
||||
public static IReadOnlyList<NodePackage> Load(LanguageAnalyzerContext context, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
|
||||
var pnpDataPath = Path.Combine(context.RootPath, ".pnp.data.json");
|
||||
if (!File.Exists(pnpDataPath))
|
||||
{
|
||||
return Array.Empty<NodePackage>();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = File.OpenRead(pnpDataPath);
|
||||
using var document = JsonDocument.Parse(stream);
|
||||
if (!document.RootElement.TryGetProperty("packages", out var packagesElement) ||
|
||||
packagesElement.ValueKind != JsonValueKind.Object)
|
||||
{
|
||||
return Array.Empty<NodePackage>();
|
||||
}
|
||||
|
||||
var packages = new List<NodePackage>();
|
||||
|
||||
foreach (var packageProperty in packagesElement.EnumerateObject())
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
var package = TryParsePackage(context, packageProperty, cancellationToken);
|
||||
if (package is not null)
|
||||
{
|
||||
packages.Add(package);
|
||||
}
|
||||
}
|
||||
|
||||
return packages
|
||||
.OrderBy(static p => p.ComponentKey, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return Array.Empty<NodePackage>();
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return Array.Empty<NodePackage>();
|
||||
}
|
||||
}
|
||||
|
||||
private static NodePackage? TryParsePackage(
|
||||
LanguageAnalyzerContext context,
|
||||
JsonProperty packageProperty,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var rawKey = packageProperty.Name;
|
||||
var obj = packageProperty.Value;
|
||||
|
||||
if (obj.ValueKind != JsonValueKind.Object)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var packageLocation = obj.TryGetProperty("packageLocation", out var locationElement)
|
||||
? locationElement.GetString()
|
||||
: null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(packageLocation))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
packageLocation = packageLocation.Trim();
|
||||
var packageJson = obj.TryGetProperty("packageJson", out var packageJsonElement) && packageJsonElement.ValueKind == JsonValueKind.Object
|
||||
? packageJsonElement
|
||||
: (JsonElement?)null;
|
||||
|
||||
var (name, version) = ExtractNameVersion(rawKey, packageJson);
|
||||
if (string.IsNullOrWhiteSpace(name) || string.IsNullOrWhiteSpace(version))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var relativePath = context.GetRelativePath(packageLocation).Replace(Path.DirectorySeparatorChar, '/');
|
||||
var absolutePackagePath = Path.GetFullPath(Path.Combine(context.RootPath, packageLocation));
|
||||
var usedByEntrypoint = context.UsageHints.IsPathUsed(absolutePackagePath);
|
||||
var packageJsonLocator = BuildLocator(packageLocation);
|
||||
|
||||
string? packageSha256 = null;
|
||||
bool? isPrivate = null;
|
||||
string? license = null;
|
||||
|
||||
if (packageJson is not null)
|
||||
{
|
||||
if (packageJson.Value.TryGetProperty("private", out var privateElement) && privateElement.ValueKind is JsonValueKind.True or JsonValueKind.False)
|
||||
{
|
||||
isPrivate = privateElement.GetBoolean();
|
||||
}
|
||||
|
||||
if (packageJson.Value.TryGetProperty("license", out var licenseElement) && licenseElement.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
license = licenseElement.GetString();
|
||||
}
|
||||
}
|
||||
|
||||
if (TryReadPackageJsonContent(context.RootPath, packageLocation, out var packageContent, out var locator))
|
||||
{
|
||||
packageSha256 = Convert.ToHexString(System.Security.Cryptography.SHA256.HashData(packageContent)).ToLowerInvariant();
|
||||
packageJsonLocator = locator;
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(packageContent);
|
||||
if (doc.RootElement.TryGetProperty("private", out var jsonPrivate) && jsonPrivate.ValueKind is JsonValueKind.True or JsonValueKind.False)
|
||||
{
|
||||
isPrivate ??= jsonPrivate.GetBoolean();
|
||||
}
|
||||
|
||||
if (doc.RootElement.TryGetProperty("license", out var licenseElement) && licenseElement.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
license ??= licenseElement.GetString();
|
||||
}
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
// ignore malformed package.json content
|
||||
}
|
||||
}
|
||||
|
||||
var package = new NodePackage(
|
||||
name: name,
|
||||
version: version,
|
||||
relativePath: relativePath,
|
||||
packageJsonLocator: packageJsonLocator,
|
||||
isPrivate: isPrivate,
|
||||
lockEntry: null,
|
||||
isWorkspaceMember: false,
|
||||
workspaceRoot: null,
|
||||
workspaceTargets: Array.Empty<string>(),
|
||||
workspaceLink: null,
|
||||
lifecycleScripts: Array.Empty<NodeLifecycleScript>(),
|
||||
nodeVersions: Array.Empty<NodeVersionTarget>(),
|
||||
usedByEntrypoint: usedByEntrypoint,
|
||||
declaredOnly: false,
|
||||
lockSource: "pnp.data",
|
||||
lockLocator: rawKey,
|
||||
packageSha256: packageSha256,
|
||||
isYarnPnp: true,
|
||||
scope: null,
|
||||
isOptional: false,
|
||||
license: license);
|
||||
|
||||
return package;
|
||||
}
|
||||
|
||||
private static bool TryReadPackageJsonContent(string rootPath, string packageLocation, out byte[] content, out string locator)
|
||||
{
|
||||
content = Array.Empty<byte>();
|
||||
locator = BuildLocator(packageLocation);
|
||||
|
||||
// First try direct package.json on disk
|
||||
var packageJsonPath = Path.Combine(rootPath, packageLocation, "package.json");
|
||||
if (File.Exists(packageJsonPath))
|
||||
{
|
||||
try
|
||||
{
|
||||
content = File.ReadAllBytes(packageJsonPath);
|
||||
locator = NormalizeLocator(packageJsonPath, rootPath);
|
||||
return true;
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
// If location points to a zip, try to read package/package.json inside the archive
|
||||
if (packageLocation.Contains(".zip", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var zipPath = Path.Combine(rootPath, packageLocation[..packageLocation.IndexOf(".zip", StringComparison.OrdinalIgnoreCase) + 4]);
|
||||
if (File.Exists(zipPath))
|
||||
{
|
||||
try
|
||||
{
|
||||
using var archive = ZipFile.OpenRead(zipPath);
|
||||
var entry = archive.GetEntry("package/package.json");
|
||||
if (entry is not null)
|
||||
{
|
||||
using var entryStream = entry.Open();
|
||||
using var memory = new MemoryStream();
|
||||
entryStream.CopyTo(memory);
|
||||
content = memory.ToArray();
|
||||
locator = $"{NormalizeLocator(zipPath, rootPath)}!package/package.json";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static (string Name, string Version) ExtractNameVersion(string rawKey, JsonElement? packageJson)
|
||||
{
|
||||
string? name = null;
|
||||
string? version = null;
|
||||
|
||||
if (packageJson is not null)
|
||||
{
|
||||
if (packageJson.Value.TryGetProperty("name", out var nameElement) && nameElement.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
name = nameElement.GetString();
|
||||
}
|
||||
|
||||
if (packageJson.Value.TryGetProperty("version", out var versionElement) && versionElement.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
version = versionElement.GetString();
|
||||
}
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(name) && !string.IsNullOrWhiteSpace(version))
|
||||
{
|
||||
return (name!.Trim(), version!.Trim());
|
||||
}
|
||||
|
||||
// Fallback parsing from locator key (e.g., "cached-lib@npm:1.0.0" or "@scope/pkg@npm:2.0.0")
|
||||
var atIndex = rawKey.LastIndexOf('@');
|
||||
if (atIndex > 0 && atIndex < rawKey.Length - 1)
|
||||
{
|
||||
name ??= rawKey[..atIndex];
|
||||
var remainder = rawKey[(atIndex + 1)..];
|
||||
|
||||
var colonIndex = remainder.IndexOf(':');
|
||||
if (colonIndex >= 0 && colonIndex < remainder.Length - 1)
|
||||
{
|
||||
version ??= remainder[(colonIndex + 1)..];
|
||||
}
|
||||
else
|
||||
{
|
||||
version ??= remainder;
|
||||
}
|
||||
}
|
||||
|
||||
return (name ?? string.Empty, version ?? string.Empty);
|
||||
}
|
||||
|
||||
private static string BuildLocator(string packageLocation)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(packageLocation))
|
||||
{
|
||||
return "package.json";
|
||||
}
|
||||
|
||||
var normalized = packageLocation.Replace('\\', '/').TrimEnd('/');
|
||||
return normalized.EndsWith(".zip", StringComparison.OrdinalIgnoreCase)
|
||||
? $"{normalized}!package/package.json"
|
||||
: $"{normalized}/package.json";
|
||||
}
|
||||
|
||||
private static string NormalizeLocator(string path, string rootPath)
|
||||
{
|
||||
var relative = Path.GetRelativePath(rootPath, path);
|
||||
if (string.IsNullOrWhiteSpace(relative))
|
||||
{
|
||||
return "package.json";
|
||||
}
|
||||
|
||||
return relative.Replace(Path.DirectorySeparatorChar, '/');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,316 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Represents parsed Yarn PnP resolution data from .pnp.data.json or .pnp.cjs.
|
||||
/// Used to determine which packages are actually resolved and have on-disk evidence.
|
||||
/// </summary>
|
||||
internal sealed class YarnPnpData
|
||||
{
|
||||
private static readonly IReadOnlyDictionary<string, YarnPnpPackageInfo> EmptyPackages =
|
||||
new ReadOnlyDictionary<string, YarnPnpPackageInfo>(new Dictionary<string, YarnPnpPackageInfo>(0));
|
||||
|
||||
private static readonly IReadOnlySet<string> EmptySet =
|
||||
new HashSet<string>(0, StringComparer.Ordinal);
|
||||
|
||||
public YarnPnpData(
|
||||
IReadOnlyDictionary<string, YarnPnpPackageInfo>? packages,
|
||||
IReadOnlySet<string>? resolvedPackageKeys)
|
||||
{
|
||||
Packages = packages ?? EmptyPackages;
|
||||
ResolvedPackageKeys = resolvedPackageKeys ?? EmptySet;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Package information by locator key (e.g., "lodash@npm:4.17.21").
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, YarnPnpPackageInfo> Packages { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Set of package keys (name@version) that are actually resolved in the dependency tree.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> ResolvedPackageKeys { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether PnP data was successfully loaded.
|
||||
/// </summary>
|
||||
public bool HasData => Packages.Count > 0;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a package is resolved in the PnP data.
|
||||
/// </summary>
|
||||
public bool IsResolved(string name, string version)
|
||||
{
|
||||
var key = $"{name}@{version}";
|
||||
return ResolvedPackageKeys.Contains(key);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tries to get package info by name and version.
|
||||
/// </summary>
|
||||
public bool TryGetPackage(string name, string version, out YarnPnpPackageInfo? info)
|
||||
{
|
||||
var key = $"{name}@npm:{version}";
|
||||
if (Packages.TryGetValue(key, out info))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Also try without npm: prefix
|
||||
key = $"{name}@{version}";
|
||||
return Packages.TryGetValue(key, out info);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads PnP data from the root path.
|
||||
/// </summary>
|
||||
public static YarnPnpData Load(string rootPath)
|
||||
{
|
||||
var pnpDataPath = Path.Combine(rootPath, ".pnp.data.json");
|
||||
if (File.Exists(pnpDataPath))
|
||||
{
|
||||
return LoadFromDataJson(pnpDataPath);
|
||||
}
|
||||
|
||||
// .pnp.cjs is more complex (JavaScript), so we extract data from cache structure instead
|
||||
// when .pnp.data.json is not available
|
||||
return LoadFromCacheStructure(rootPath);
|
||||
}
|
||||
|
||||
private static YarnPnpData LoadFromDataJson(string pnpDataPath)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = File.OpenRead(pnpDataPath);
|
||||
using var document = JsonDocument.Parse(stream);
|
||||
var root = document.RootElement;
|
||||
|
||||
var packages = new Dictionary<string, YarnPnpPackageInfo>(StringComparer.Ordinal);
|
||||
var resolvedKeys = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
// Parse packageRegistryData which contains the actual package locations
|
||||
if (root.TryGetProperty("packageRegistryData", out var registryData) &&
|
||||
registryData.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var entry in registryData.EnumerateArray())
|
||||
{
|
||||
ParsePackageRegistryEntry(entry, packages, resolvedKeys);
|
||||
}
|
||||
}
|
||||
|
||||
return new YarnPnpData(
|
||||
new ReadOnlyDictionary<string, YarnPnpPackageInfo>(packages),
|
||||
resolvedKeys);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return new YarnPnpData(null, null);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return new YarnPnpData(null, null);
|
||||
}
|
||||
}
|
||||
|
||||
private static void ParsePackageRegistryEntry(
|
||||
JsonElement entry,
|
||||
Dictionary<string, YarnPnpPackageInfo> packages,
|
||||
HashSet<string> resolvedKeys)
|
||||
{
|
||||
if (entry.ValueKind != JsonValueKind.Array)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var items = new List<JsonElement>();
|
||||
foreach (var item in entry.EnumerateArray())
|
||||
{
|
||||
items.Add(item);
|
||||
}
|
||||
|
||||
if (items.Count < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// First element is package name, second is array of version entries
|
||||
var packageName = items[0].GetString();
|
||||
if (string.IsNullOrWhiteSpace(packageName))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var versionEntries = items[1];
|
||||
if (versionEntries.ValueKind != JsonValueKind.Array)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var versionEntry in versionEntries.EnumerateArray())
|
||||
{
|
||||
ParseVersionEntry(packageName!, versionEntry, packages, resolvedKeys);
|
||||
}
|
||||
}
|
||||
|
||||
private static void ParseVersionEntry(
|
||||
string packageName,
|
||||
JsonElement versionEntry,
|
||||
Dictionary<string, YarnPnpPackageInfo> packages,
|
||||
HashSet<string> resolvedKeys)
|
||||
{
|
||||
if (versionEntry.ValueKind != JsonValueKind.Array)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var items = new List<JsonElement>();
|
||||
foreach (var item in versionEntry.EnumerateArray())
|
||||
{
|
||||
items.Add(item);
|
||||
}
|
||||
|
||||
if (items.Count < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// First element is version reference (e.g., "npm:4.17.21"), second is package info
|
||||
var versionRef = items[0].GetString();
|
||||
if (string.IsNullOrWhiteSpace(versionRef))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var packageInfo = items[1];
|
||||
if (packageInfo.ValueKind != JsonValueKind.Object)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
string? packageLocation = null;
|
||||
if (packageInfo.TryGetProperty("packageLocation", out var locationElement) &&
|
||||
locationElement.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
packageLocation = locationElement.GetString();
|
||||
}
|
||||
|
||||
// Extract actual version from the reference
|
||||
var version = versionRef;
|
||||
if (versionRef!.StartsWith("npm:", StringComparison.Ordinal))
|
||||
{
|
||||
version = versionRef[4..];
|
||||
}
|
||||
|
||||
var locatorKey = $"{packageName}@{versionRef}";
|
||||
var resolvedKey = $"{packageName}@{version}";
|
||||
|
||||
if (!packages.ContainsKey(locatorKey))
|
||||
{
|
||||
packages[locatorKey] = new YarnPnpPackageInfo(packageName, version!, packageLocation);
|
||||
}
|
||||
|
||||
resolvedKeys.Add(resolvedKey);
|
||||
}
|
||||
|
||||
private static YarnPnpData LoadFromCacheStructure(string rootPath)
|
||||
{
|
||||
// When no .pnp.data.json, infer from .yarn/cache structure
|
||||
var cacheDir = Path.Combine(rootPath, ".yarn", "cache");
|
||||
if (!Directory.Exists(cacheDir))
|
||||
{
|
||||
return new YarnPnpData(null, null);
|
||||
}
|
||||
|
||||
var packages = new Dictionary<string, YarnPnpPackageInfo>(StringComparer.Ordinal);
|
||||
var resolvedKeys = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var zipFile in Directory.EnumerateFiles(cacheDir, "*.zip", SearchOption.AllDirectories))
|
||||
{
|
||||
var fileName = Path.GetFileNameWithoutExtension(zipFile);
|
||||
if (TryParseYarnCacheFilename(fileName, out var name, out var version))
|
||||
{
|
||||
var locatorKey = $"{name}@npm:{version}";
|
||||
var resolvedKey = $"{name}@{version}";
|
||||
|
||||
if (!packages.ContainsKey(locatorKey))
|
||||
{
|
||||
packages[locatorKey] = new YarnPnpPackageInfo(name!, version!, zipFile);
|
||||
}
|
||||
|
||||
resolvedKeys.Add(resolvedKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore enumeration errors
|
||||
}
|
||||
|
||||
return new YarnPnpData(
|
||||
new ReadOnlyDictionary<string, YarnPnpPackageInfo>(packages),
|
||||
resolvedKeys);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses Yarn cache filename format: name-npm-version-hash.zip
|
||||
/// Examples: lodash-npm-4.17.21-6382d6d2.zip, @types-node-npm-18.0.0-abc123.zip
|
||||
/// </summary>
|
||||
private static bool TryParseYarnCacheFilename(string fileName, out string? name, out string? version)
|
||||
{
|
||||
name = null;
|
||||
version = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(fileName))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find -npm- separator
|
||||
var npmIndex = fileName.IndexOf("-npm-", StringComparison.Ordinal);
|
||||
if (npmIndex < 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Name is before -npm-
|
||||
var rawName = fileName[..npmIndex];
|
||||
|
||||
// Handle scoped packages: @types-node -> @types/node
|
||||
if (rawName.StartsWith('@'))
|
||||
{
|
||||
var dashIndex = rawName.IndexOf('-', 1);
|
||||
if (dashIndex > 1)
|
||||
{
|
||||
rawName = rawName[..dashIndex] + "/" + rawName[(dashIndex + 1)..];
|
||||
}
|
||||
}
|
||||
|
||||
name = rawName;
|
||||
|
||||
// Version is between -npm- and the last -hash part
|
||||
var afterNpm = fileName[(npmIndex + 5)..];
|
||||
var lastDash = afterNpm.LastIndexOf('-');
|
||||
if (lastDash < 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
version = afterNpm[..lastDash];
|
||||
return !string.IsNullOrWhiteSpace(version);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Information about a single package in the PnP resolution data.
|
||||
/// </summary>
|
||||
internal sealed record YarnPnpPackageInfo(
|
||||
string Name,
|
||||
string Version,
|
||||
string? PackageLocation);
|
||||
@@ -0,0 +1,417 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
|
||||
|
||||
/// <summary>
|
||||
/// Adapter for legacy .egg-info metadata (setuptools installations).
|
||||
/// Handles standalone egg-info directories in site-packages that aren't editable installs.
|
||||
/// </summary>
|
||||
internal sealed partial class EggInfoAdapter : IPythonPackagingAdapter
|
||||
{
|
||||
public string Name => "egg-info";
|
||||
public int Priority => 15; // Lower priority than dist-info (10) to prefer wheels
|
||||
|
||||
[GeneratedRegex(@"^(?<name>[A-Za-z0-9][\w.-]*)-(?<version>[\d.]+(?:\.dev\d*|a\d*|b\d*|rc\d*|post\d*)?)\.egg-info$",
|
||||
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
|
||||
private static partial Regex EggInfoDirPattern();
|
||||
|
||||
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
// Look for any .egg-info directories with PKG-INFO
|
||||
return vfs.EnumerateFiles(path, "*.egg-info/PKG-INFO").Any();
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Find all .egg-info directories with PKG-INFO
|
||||
var pkgInfoFiles = vfs.EnumerateFiles(path, "*.egg-info/PKG-INFO").ToList();
|
||||
|
||||
foreach (var pkgInfoFile in pkgInfoFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var eggInfoPath = Path.GetDirectoryName(pkgInfoFile.VirtualPath) ?? string.Empty;
|
||||
var eggInfoName = Path.GetFileName(eggInfoPath);
|
||||
|
||||
var match = EggInfoDirPattern().Match(eggInfoName);
|
||||
if (!match.Success)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var packageName = match.Groups["name"].Value;
|
||||
var version = match.Groups["version"].Value;
|
||||
|
||||
// Read PKG-INFO file (same format as METADATA)
|
||||
var metadata = await ReadPkgInfoAsync(vfs, pkgInfoFile, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Use metadata name if available (more accurate)
|
||||
if (metadata.TryGetValue("Name", out var metadataName) && !string.IsNullOrEmpty(metadataName))
|
||||
{
|
||||
packageName = metadataName;
|
||||
}
|
||||
|
||||
if (metadata.TryGetValue("Version", out var metadataVersion) && !string.IsNullOrEmpty(metadataVersion))
|
||||
{
|
||||
version = metadataVersion;
|
||||
}
|
||||
|
||||
// Read top_level.txt
|
||||
var topLevelModules = await ReadTopLevelAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Read SOURCES.txt (file manifest for egg-info)
|
||||
var sourceFiles = await ReadSourcesAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Read requires.txt (egg-info dependencies format)
|
||||
var dependencies = await ReadRequiresAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Read installed-files.txt if available
|
||||
var installedFiles = await ReadInstalledFilesAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Read entry_points.txt
|
||||
var entryPoints = await ReadEntryPointsAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Extract extras from requires.txt sections
|
||||
var extras = ExtractExtras(dependencies);
|
||||
|
||||
// Determine confidence based on available metadata
|
||||
var confidence = sourceFiles.Length > 0 || installedFiles.Length > 0
|
||||
? PythonPackageConfidence.Definitive
|
||||
: PythonPackageConfidence.High;
|
||||
|
||||
// Convert sources/installed files to record entries
|
||||
var recordFiles = ConvertToRecordEntries(sourceFiles, installedFiles);
|
||||
|
||||
yield return new PythonPackageInfo(
|
||||
Name: packageName,
|
||||
Version: version,
|
||||
Kind: PythonPackageKind.Egg,
|
||||
Location: path,
|
||||
MetadataPath: eggInfoPath,
|
||||
TopLevelModules: topLevelModules,
|
||||
Dependencies: FilterBaseDependencies(dependencies),
|
||||
Extras: extras,
|
||||
RecordFiles: recordFiles,
|
||||
InstallerTool: DetectInstaller(metadata),
|
||||
EditableTarget: null,
|
||||
IsDirectDependency: false, // Will be corrected by dependency graph analysis
|
||||
Confidence: confidence);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<Dictionary<string, string>> ReadPkgInfoAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonVirtualFile file,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
string? currentKey = null;
|
||||
var currentValue = new System.Text.StringBuilder();
|
||||
string? line;
|
||||
|
||||
while ((line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false)) is not null)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
// Empty line marks end of headers
|
||||
if (string.IsNullOrEmpty(line))
|
||||
{
|
||||
if (currentKey is not null)
|
||||
{
|
||||
result[currentKey] = currentValue.ToString().Trim();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Continuation line
|
||||
if (line.StartsWith(' ') || line.StartsWith('\t'))
|
||||
{
|
||||
currentValue.AppendLine(line.Trim());
|
||||
continue;
|
||||
}
|
||||
|
||||
// New header
|
||||
var colonIdx = line.IndexOf(':');
|
||||
if (colonIdx > 0)
|
||||
{
|
||||
if (currentKey is not null)
|
||||
{
|
||||
if (!result.ContainsKey(currentKey))
|
||||
{
|
||||
result[currentKey] = currentValue.ToString().Trim();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Multi-value header
|
||||
result[currentKey] = result[currentKey] + "\n" + currentValue.ToString().Trim();
|
||||
}
|
||||
}
|
||||
|
||||
currentKey = line[..colonIdx].Trim();
|
||||
currentValue.Clear();
|
||||
currentValue.Append(line[(colonIdx + 1)..].Trim());
|
||||
}
|
||||
}
|
||||
|
||||
if (currentKey is not null && !result.ContainsKey(currentKey))
|
||||
{
|
||||
result[currentKey] = currentValue.ToString().Trim();
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<string>> ReadTopLevelAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string eggInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var topLevelPath = $"{eggInfoPath}/top_level.txt";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(topLevelPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<string>> ReadSourcesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string eggInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var sourcesPath = $"{eggInfoPath}/SOURCES.txt";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(sourcesPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<string>> ReadInstalledFilesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string eggInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var installedPath = $"{eggInfoPath}/installed-files.txt";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(installedPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<string>> ReadRequiresAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string eggInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var requiresPath = $"{eggInfoPath}/requires.txt";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(requiresPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// requires.txt format:
|
||||
// base_dependency>=1.0
|
||||
// [extra1]
|
||||
// extra_dep>=2.0
|
||||
// [extra2:python_version<"3.0"]
|
||||
// conditional_dep
|
||||
|
||||
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
var result = new List<string>();
|
||||
string? currentExtra = null;
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
if (line.StartsWith('[') && line.EndsWith(']'))
|
||||
{
|
||||
// This is an extra section
|
||||
currentExtra = line[1..^1];
|
||||
// Handle conditional extras like [extra:condition]
|
||||
var colonIdx = currentExtra.IndexOf(':');
|
||||
if (colonIdx > 0)
|
||||
{
|
||||
currentExtra = currentExtra[..colonIdx];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentExtra is not null)
|
||||
{
|
||||
// Dependency in an extra section - add with marker
|
||||
result.Add($"{line} ; extra == \"{currentExtra}\"");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Base dependency
|
||||
result.Add(line);
|
||||
}
|
||||
}
|
||||
|
||||
return result.ToImmutableArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<string>> ReadEntryPointsAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string eggInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var entryPointsPath = $"{eggInfoPath}/entry_points.txt";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(entryPointsPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// entry_points.txt is INI format:
|
||||
// [console_scripts]
|
||||
// script_name = module:function
|
||||
var result = new List<string>();
|
||||
|
||||
foreach (var line in content.Split('\n'))
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (!string.IsNullOrEmpty(trimmed) && !trimmed.StartsWith('[') && trimmed.Contains('='))
|
||||
{
|
||||
result.Add(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
return result.ToImmutableArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> FilterBaseDependencies(ImmutableArray<string> dependencies)
|
||||
{
|
||||
// Return only base dependencies (without extra markers)
|
||||
return dependencies
|
||||
.Where(d => !d.Contains("extra ==", StringComparison.OrdinalIgnoreCase))
|
||||
.ToImmutableArray();
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> ExtractExtras(ImmutableArray<string> dependencies)
|
||||
{
|
||||
// Extract unique extra names from dependency markers
|
||||
var extras = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var dep in dependencies)
|
||||
{
|
||||
var extraMatch = System.Text.RegularExpressions.Regex.Match(dep, @"extra\s*==\s*""([^""]+)""");
|
||||
if (extraMatch.Success)
|
||||
{
|
||||
extras.Add(extraMatch.Groups[1].Value);
|
||||
}
|
||||
}
|
||||
|
||||
return extras.ToImmutableArray();
|
||||
}
|
||||
|
||||
private static ImmutableArray<PythonRecordEntry> ConvertToRecordEntries(
|
||||
ImmutableArray<string> sources,
|
||||
ImmutableArray<string> installedFiles)
|
||||
{
|
||||
// Prefer installed-files.txt as it's more accurate for installed packages
|
||||
var files = installedFiles.Length > 0 ? installedFiles : sources;
|
||||
|
||||
return files
|
||||
.Select(f => new PythonRecordEntry(f, null, null))
|
||||
.ToImmutableArray();
|
||||
}
|
||||
|
||||
private static string? DetectInstaller(Dictionary<string, string> metadata)
|
||||
{
|
||||
// Try to detect the installer from metadata
|
||||
if (metadata.TryGetValue("Installer", out var installer))
|
||||
{
|
||||
return installer;
|
||||
}
|
||||
|
||||
// Egg-info is typically from setuptools
|
||||
return "setuptools";
|
||||
}
|
||||
}
|
||||
@@ -240,6 +240,7 @@ internal sealed class PythonPackageDiscovery
|
||||
new PoetryAdapter(),
|
||||
new PipEditableAdapter(),
|
||||
new DistInfoAdapter(),
|
||||
new EggInfoAdapter(),
|
||||
new CondaAdapter(),
|
||||
new ContainerLayerAdapter()
|
||||
};
|
||||
|
||||
@@ -802,7 +802,7 @@ internal static class PythonRecordParser
|
||||
continue;
|
||||
}
|
||||
|
||||
entries.Add(new PythonRecordEntry(trimmed, hashAlgorithm: null, hashValue: null, size: null));
|
||||
entries.Add(new PythonRecordEntry(trimmed, null, null, null));
|
||||
}
|
||||
|
||||
return entries;
|
||||
|
||||
@@ -8,6 +8,7 @@ using StellaOps.Scanner.Analyzers.OS;
|
||||
using StellaOps.Scanner.Analyzers.OS.Abstractions;
|
||||
using StellaOps.Scanner.Analyzers.OS.Analyzers;
|
||||
using StellaOps.Scanner.Analyzers.OS.Helpers;
|
||||
using StellaOps.Scanner.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.OS.Apk;
|
||||
|
||||
@@ -37,6 +38,8 @@ internal sealed class ApkPackageAnalyzer : OsPackageAnalyzerBase
|
||||
using var stream = File.OpenRead(installedPath);
|
||||
var entries = _parser.Parse(stream, cancellationToken);
|
||||
|
||||
context.Metadata.TryGetValue(ScanMetadataKeys.CurrentLayerDigest, out var layerDigest);
|
||||
|
||||
var records = new List<OSPackageRecord>(entries.Count);
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
@@ -70,7 +73,7 @@ internal sealed class ApkPackageAnalyzer : OsPackageAnalyzerBase
|
||||
{
|
||||
files.Add(new OSPackageFileEvidence(
|
||||
file.Path,
|
||||
layerDigest: null,
|
||||
layerDigest: layerDigest,
|
||||
sha256: file.Digest,
|
||||
sizeBytes: null,
|
||||
isConfigFile: file.IsConfig));
|
||||
|
||||
@@ -10,6 +10,7 @@ using StellaOps.Scanner.Analyzers.OS;
|
||||
using StellaOps.Scanner.Analyzers.OS.Abstractions;
|
||||
using StellaOps.Scanner.Analyzers.OS.Analyzers;
|
||||
using StellaOps.Scanner.Analyzers.OS.Helpers;
|
||||
using StellaOps.Scanner.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.OS.Dpkg;
|
||||
|
||||
@@ -39,6 +40,8 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
|
||||
using var stream = File.OpenRead(statusPath);
|
||||
var entries = _parser.Parse(stream, cancellationToken);
|
||||
|
||||
context.Metadata.TryGetValue(ScanMetadataKeys.CurrentLayerDigest, out var layerDigest);
|
||||
|
||||
var infoDirectory = Path.Combine(context.RootPath, "var", "lib", "dpkg", "info");
|
||||
var records = new List<OSPackageRecord>();
|
||||
|
||||
@@ -83,7 +86,7 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
|
||||
var dependencies = entry.Depends.Concat(entry.PreDepends).ToArray();
|
||||
var provides = entry.Provides.ToArray();
|
||||
|
||||
var fileEvidence = BuildFileEvidence(infoDirectory, entry, cancellationToken);
|
||||
var fileEvidence = BuildFileEvidence(infoDirectory, entry, layerDigest, cancellationToken);
|
||||
|
||||
var cveHints = CveHintExtractor.Extract(entry.Description, string.Join(' ', dependencies), string.Join(' ', provides));
|
||||
|
||||
@@ -125,7 +128,7 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
|
||||
return parts.Length == 0 ? null : parts[0];
|
||||
}
|
||||
|
||||
private static IReadOnlyList<OSPackageFileEvidence> BuildFileEvidence(string infoDirectory, DpkgPackageEntry entry, CancellationToken cancellationToken)
|
||||
private static IReadOnlyList<OSPackageFileEvidence> BuildFileEvidence(string infoDirectory, DpkgPackageEntry entry, string? layerDigest, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!Directory.Exists(infoDirectory))
|
||||
{
|
||||
@@ -137,7 +140,7 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
|
||||
{
|
||||
if (!files.TryGetValue(path, out _))
|
||||
{
|
||||
files[path] = new FileEvidenceBuilder(path);
|
||||
files[path] = new FileEvidenceBuilder(path, layerDigest);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,20 +251,23 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
|
||||
|
||||
private sealed class FileEvidenceBuilder
|
||||
{
|
||||
public FileEvidenceBuilder(string path)
|
||||
public FileEvidenceBuilder(string path, string? layerDigest)
|
||||
{
|
||||
Path = path;
|
||||
LayerDigest = layerDigest;
|
||||
}
|
||||
|
||||
public string Path { get; }
|
||||
|
||||
public string? LayerDigest { get; }
|
||||
|
||||
public bool IsConfig { get; set; }
|
||||
|
||||
public Dictionary<string, string> Digests { get; } = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
public OSPackageFileEvidence ToEvidence()
|
||||
{
|
||||
return new OSPackageFileEvidence(Path, isConfigFile: IsConfig, digests: Digests);
|
||||
return new OSPackageFileEvidence(Path, layerDigest: LayerDigest, isConfigFile: IsConfig, digests: Digests);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.OS.Rpm.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Minimal reader for BerkeleyDB hash databases used by legacy RPM databases.
|
||||
/// Extracts raw data values that contain RPM header blobs.
|
||||
/// </summary>
|
||||
internal static class BerkeleyDbReader
|
||||
{
|
||||
private const uint BdbHashMagicBe = 0x00061561;
|
||||
private const uint BdbHashMagicLe = 0x61150600;
|
||||
private const uint BdbBtreeMagicBe = 0x00053162;
|
||||
private const uint BdbBtreeMagicLe = 0x62310500;
|
||||
|
||||
private const int MetadataPageSize = 512;
|
||||
private const int MinPageSize = 512;
|
||||
private const int MaxPageSize = 65536;
|
||||
|
||||
/// <summary>
|
||||
/// Detects if the file data is a BerkeleyDB database.
|
||||
/// </summary>
|
||||
public static bool IsBerkeleyDb(ReadOnlySpan<byte> data)
|
||||
{
|
||||
if (data.Length < 16)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var magic = BinaryPrimitives.ReadUInt32BigEndian(data.Slice(12, 4));
|
||||
return magic is BdbHashMagicBe or BdbHashMagicLe or BdbBtreeMagicBe or BdbBtreeMagicLe;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts all data values from a BerkeleyDB database.
|
||||
/// Returns the raw byte arrays which should contain RPM header blobs.
|
||||
/// </summary>
|
||||
public static IReadOnlyList<byte[]> ExtractValues(byte[] data)
|
||||
{
|
||||
var results = new List<byte[]>();
|
||||
|
||||
if (data.Length < MetadataPageSize)
|
||||
{
|
||||
return results;
|
||||
}
|
||||
|
||||
// Read metadata page to get page size
|
||||
var magic = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(12, 4));
|
||||
var isLittleEndian = magic is BdbHashMagicLe or BdbBtreeMagicLe;
|
||||
|
||||
int pageSize;
|
||||
if (isLittleEndian)
|
||||
{
|
||||
pageSize = BinaryPrimitives.ReadInt32LittleEndian(data.AsSpan(20, 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
pageSize = BinaryPrimitives.ReadInt32BigEndian(data.AsSpan(20, 4));
|
||||
}
|
||||
|
||||
// Validate page size
|
||||
if (pageSize < MinPageSize || pageSize > MaxPageSize)
|
||||
{
|
||||
pageSize = MetadataPageSize;
|
||||
}
|
||||
|
||||
// Ensure page size is power of 2
|
||||
if ((pageSize & (pageSize - 1)) != 0)
|
||||
{
|
||||
pageSize = MinPageSize;
|
||||
}
|
||||
|
||||
// Scan data pages for RPM header blobs
|
||||
// BerkeleyDB data pages contain key-value pairs with page type markers
|
||||
var rpmMagic = new byte[] { 0x8e, 0xad, 0xe8, 0xab };
|
||||
var pageCount = data.Length / pageSize;
|
||||
|
||||
for (var pageNum = 1; pageNum < pageCount; pageNum++)
|
||||
{
|
||||
var pageOffset = pageNum * pageSize;
|
||||
if (pageOffset + pageSize > data.Length)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Scan this page for RPM header blobs
|
||||
ExtractRpmHeadersFromPage(data, pageOffset, pageSize, rpmMagic, results);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static void ExtractRpmHeadersFromPage(
|
||||
byte[] data,
|
||||
int pageOffset,
|
||||
int pageSize,
|
||||
byte[] rpmMagic,
|
||||
List<byte[]> results)
|
||||
{
|
||||
var pageEnd = pageOffset + pageSize;
|
||||
|
||||
// Scan for RPM header magic within this page
|
||||
for (var i = pageOffset; i < pageEnd - rpmMagic.Length; i++)
|
||||
{
|
||||
if (data[i] != rpmMagic[0] ||
|
||||
data[i + 1] != rpmMagic[1] ||
|
||||
data[i + 2] != rpmMagic[2] ||
|
||||
data[i + 3] != rpmMagic[3])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Found RPM header magic, try to extract the header
|
||||
var header = TryExtractRpmHeader(data, i, pageEnd);
|
||||
if (header is not null)
|
||||
{
|
||||
results.Add(header);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static byte[]? TryExtractRpmHeader(byte[] data, int offset, int maxEnd)
|
||||
{
|
||||
// RPM header structure:
|
||||
// - 4 bytes: magic (0x8eade8ab)
|
||||
// - 4 bytes: reserved (0x00000000)
|
||||
// - 4 bytes: index count (big-endian)
|
||||
// - 4 bytes: data store size (big-endian)
|
||||
// - index entries (16 bytes each)
|
||||
// - data store
|
||||
|
||||
if (offset + 16 > data.Length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var span = data.AsSpan(offset);
|
||||
|
||||
// Verify reserved bytes are zero
|
||||
var reserved = BinaryPrimitives.ReadInt32BigEndian(span.Slice(4, 4));
|
||||
if (reserved != 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var indexCount = BinaryPrimitives.ReadInt32BigEndian(span.Slice(8, 4));
|
||||
var storeSize = BinaryPrimitives.ReadInt32BigEndian(span.Slice(12, 4));
|
||||
|
||||
// Sanity check bounds
|
||||
if (indexCount <= 0 || indexCount > 10000)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (storeSize <= 0 || storeSize > 50_000_000)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var totalLength = 16 + (indexCount * 16) + storeSize;
|
||||
if (totalLength <= 0 || offset + totalLength > data.Length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extract the complete header blob
|
||||
var result = new byte[totalLength];
|
||||
Array.Copy(data, offset, result, 0, totalLength);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Alternative extraction method that scans across page boundaries
|
||||
/// for complete RPM headers that may span multiple BDB pages.
|
||||
/// </summary>
|
||||
public static IReadOnlyList<byte[]> ExtractValuesWithOverflow(byte[] data)
|
||||
{
|
||||
var results = new List<byte[]>();
|
||||
var seen = new HashSet<string>();
|
||||
var rpmMagic = new byte[] { 0x8e, 0xad, 0xe8, 0xab };
|
||||
|
||||
// Simple scan through entire file for RPM headers
|
||||
for (var i = 0; i <= data.Length - rpmMagic.Length; i++)
|
||||
{
|
||||
if (data[i] != rpmMagic[0] ||
|
||||
data[i + 1] != rpmMagic[1] ||
|
||||
data[i + 2] != rpmMagic[2] ||
|
||||
data[i + 3] != rpmMagic[3])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var header = TryExtractRpmHeader(data, i, data.Length);
|
||||
if (header is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Use position + length as dedup key
|
||||
var key = $"{i}:{header.Length}";
|
||||
if (seen.Add(key))
|
||||
{
|
||||
results.Add(header);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Buffers.Binary;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using Microsoft.Data.Sqlite;
|
||||
@@ -23,8 +24,8 @@ internal sealed class RpmDatabaseReader : IRpmDatabaseReader
|
||||
var sqlitePath = ResolveSqlitePath(rootPath);
|
||||
if (sqlitePath is null)
|
||||
{
|
||||
_logger.LogWarning("rpmdb.sqlite not found under root {RootPath}; rpm analyzer will skip.", rootPath);
|
||||
return Array.Empty<RpmHeader>();
|
||||
_logger.LogWarning("rpmdb.sqlite not found under root {RootPath}; attempting legacy rpmdb fallback.", rootPath);
|
||||
return ReadLegacyHeaders(rootPath, cancellationToken);
|
||||
}
|
||||
|
||||
var headers = new List<RpmHeader>();
|
||||
@@ -65,7 +66,12 @@ internal sealed class RpmDatabaseReader : IRpmDatabaseReader
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Unable to read rpmdb.sqlite at {Path}.", sqlitePath);
|
||||
return Array.Empty<RpmHeader>();
|
||||
return ReadLegacyHeaders(rootPath, cancellationToken);
|
||||
}
|
||||
|
||||
if (headers.Count == 0)
|
||||
{
|
||||
return ReadLegacyHeaders(rootPath, cancellationToken);
|
||||
}
|
||||
|
||||
return headers;
|
||||
@@ -90,6 +96,230 @@ internal sealed class RpmDatabaseReader : IRpmDatabaseReader
|
||||
return null;
|
||||
}
|
||||
|
||||
private IReadOnlyList<RpmHeader> ReadLegacyHeaders(string rootPath, CancellationToken cancellationToken)
|
||||
{
|
||||
var packagesPath = ResolveLegacyPackagesPath(rootPath);
|
||||
if (packagesPath is null)
|
||||
{
|
||||
_logger.LogWarning("Legacy rpmdb Packages file not found under root {RootPath}; rpm analyzer will skip.", rootPath);
|
||||
return Array.Empty<RpmHeader>();
|
||||
}
|
||||
|
||||
byte[] data;
|
||||
try
|
||||
{
|
||||
data = File.ReadAllBytes(packagesPath);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Unable to read legacy rpmdb Packages file at {Path}.", packagesPath);
|
||||
return Array.Empty<RpmHeader>();
|
||||
}
|
||||
|
||||
// Detect BerkeleyDB format and use appropriate extraction method
|
||||
if (BerkeleyDbReader.IsBerkeleyDb(data))
|
||||
{
|
||||
_logger.LogDebug("Detected BerkeleyDB format for rpmdb at {Path}; using BDB extraction.", packagesPath);
|
||||
return ReadBerkeleyDbHeaders(data, packagesPath, cancellationToken);
|
||||
}
|
||||
|
||||
// Fall back to raw RPM header scanning for non-BDB files
|
||||
return ReadRawRpmHeaders(data, packagesPath, cancellationToken);
|
||||
}
|
||||
|
||||
private IReadOnlyList<RpmHeader> ReadBerkeleyDbHeaders(byte[] data, string packagesPath, CancellationToken cancellationToken)
|
||||
{
|
||||
var results = new List<RpmHeader>();
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Try page-aware extraction first
|
||||
var headerBlobs = BerkeleyDbReader.ExtractValues(data);
|
||||
if (headerBlobs.Count == 0)
|
||||
{
|
||||
// Fall back to overflow-aware extraction for fragmented data
|
||||
headerBlobs = BerkeleyDbReader.ExtractValuesWithOverflow(data);
|
||||
}
|
||||
|
||||
foreach (var blob in headerBlobs)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var header = _parser.Parse(blob);
|
||||
var key = $"{header.Name}::{header.Version}::{header.Release}::{header.Architecture}";
|
||||
if (seen.Add(key))
|
||||
{
|
||||
results.Add(header);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to parse RPM header blob from BerkeleyDB.");
|
||||
}
|
||||
}
|
||||
|
||||
if (results.Count == 0)
|
||||
{
|
||||
_logger.LogWarning("No RPM headers parsed from BerkeleyDB rpmdb at {Path}.", packagesPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogDebug("Extracted {Count} RPM headers from BerkeleyDB rpmdb at {Path}.", results.Count, packagesPath);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private IReadOnlyList<RpmHeader> ReadRawRpmHeaders(byte[] data, string packagesPath, CancellationToken cancellationToken)
|
||||
{
|
||||
var headerBlobs = new List<byte[]>();
|
||||
|
||||
if (BerkeleyDbReader.IsBerkeleyDb(data))
|
||||
{
|
||||
headerBlobs.AddRange(BerkeleyDbReader.ExtractValues(data));
|
||||
if (headerBlobs.Count == 0)
|
||||
{
|
||||
headerBlobs.AddRange(BerkeleyDbReader.ExtractValuesWithOverflow(data));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
headerBlobs.AddRange(ExtractRpmHeadersFromRaw(data, cancellationToken));
|
||||
}
|
||||
|
||||
if (headerBlobs.Count == 0)
|
||||
{
|
||||
_logger.LogWarning("No RPM headers parsed from legacy rpmdb Packages at {Path}.", packagesPath);
|
||||
return Array.Empty<RpmHeader>();
|
||||
}
|
||||
|
||||
var results = new List<RpmHeader>(headerBlobs.Count);
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var blob in headerBlobs)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var header = _parser.Parse(blob);
|
||||
var key = $"{header.Name}::{header.Version}::{header.Release}::{header.Architecture}";
|
||||
if (seen.Add(key))
|
||||
{
|
||||
results.Add(header);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to parse RPM header from legacy rpmdb blob.");
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static string? ResolveLegacyPackagesPath(string rootPath)
|
||||
{
|
||||
var candidates = new[]
|
||||
{
|
||||
Path.Combine(rootPath, "var", "lib", "rpm", "Packages"),
|
||||
Path.Combine(rootPath, "usr", "lib", "sysimage", "rpm", "Packages"),
|
||||
};
|
||||
|
||||
foreach (var candidate in candidates)
|
||||
{
|
||||
if (File.Exists(candidate))
|
||||
{
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static IEnumerable<byte[]> ExtractRpmHeadersFromRaw(byte[] data, CancellationToken cancellationToken)
|
||||
{
|
||||
var magicBytes = new byte[] { 0x8e, 0xad, 0xe8, 0xab };
|
||||
var seenOffsets = new HashSet<int>();
|
||||
var offset = 0;
|
||||
|
||||
while (offset <= data.Length - magicBytes.Length)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
var candidateIndex = FindNextMagic(data, magicBytes, offset);
|
||||
if (candidateIndex < 0)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
if (!seenOffsets.Add(candidateIndex))
|
||||
{
|
||||
offset = candidateIndex + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TryExtractHeaderSlice(data, candidateIndex, out var slice))
|
||||
{
|
||||
yield return slice;
|
||||
}
|
||||
|
||||
offset = candidateIndex + 1;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool TryExtractHeaderSlice(byte[] data, int offset, out byte[] slice)
|
||||
{
|
||||
slice = Array.Empty<byte>();
|
||||
|
||||
if (offset + 16 >= data.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var span = data.AsSpan(offset);
|
||||
var indexCount = BinaryPrimitives.ReadInt32BigEndian(span.Slice(8, 4));
|
||||
var storeSize = BinaryPrimitives.ReadInt32BigEndian(span.Slice(12, 4));
|
||||
|
||||
if (indexCount <= 0 || storeSize <= 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var totalLength = 16 + (indexCount * 16) + storeSize;
|
||||
if (totalLength <= 0 || offset + totalLength > data.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
slice = new byte[totalLength];
|
||||
Buffer.BlockCopy(data, offset, slice, 0, totalLength);
|
||||
return true;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static int FindNextMagic(byte[] data, byte[] magic, int startIndex)
|
||||
{
|
||||
for (var i = startIndex; i <= data.Length - magic.Length; i++)
|
||||
{
|
||||
if (data[i] == magic[0] &&
|
||||
data[i + 1] == magic[1] &&
|
||||
data[i + 2] == magic[2] &&
|
||||
data[i + 3] == magic[3])
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static byte[]? ExtractHeaderBlob(SqliteDataReader reader)
|
||||
{
|
||||
for (var i = 0; i < reader.FieldCount; i++)
|
||||
|
||||
@@ -11,6 +11,7 @@ using StellaOps.Scanner.Analyzers.OS.Abstractions;
|
||||
using StellaOps.Scanner.Analyzers.OS.Analyzers;
|
||||
using StellaOps.Scanner.Analyzers.OS.Helpers;
|
||||
using StellaOps.Scanner.Analyzers.OS.Rpm.Internal;
|
||||
using StellaOps.Scanner.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.OS.Rpm;
|
||||
|
||||
@@ -42,6 +43,8 @@ internal sealed class RpmPackageAnalyzer : OsPackageAnalyzerBase
|
||||
return ValueTask.FromResult<IReadOnlyList<OSPackageRecord>>(EmptyPackages);
|
||||
}
|
||||
|
||||
context.Metadata.TryGetValue(ScanMetadataKeys.CurrentLayerDigest, out var layerDigest);
|
||||
|
||||
var records = new List<OSPackageRecord>(headers.Count);
|
||||
foreach (var header in headers)
|
||||
{
|
||||
@@ -77,7 +80,7 @@ internal sealed class RpmPackageAnalyzer : OsPackageAnalyzerBase
|
||||
digests = new Dictionary<string, string>(file.Digests, StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
files.Add(new OSPackageFileEvidence(file.Path, isConfigFile: file.IsConfig, digests: digests));
|
||||
files.Add(new OSPackageFileEvidence(file.Path, layerDigest: layerDigest, isConfigFile: file.IsConfig, digests: digests));
|
||||
}
|
||||
|
||||
var cveHints = CveHintExtractor.Extract(
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="StellaOps.Scanner.Analyzers.OS.Tests" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Data.Sqlite" Version="9.0.0-rc.1.24451.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
|
||||
|
||||
@@ -16,7 +16,8 @@ public static class OsComponentMapper
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(results);
|
||||
|
||||
var builder = ImmutableArray.CreateBuilder<LayerComponentFragment>();
|
||||
var fragmentsByLayer = new Dictionary<string, List<ComponentRecord>>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var result in results)
|
||||
{
|
||||
if (result is null || string.IsNullOrWhiteSpace(result.AnalyzerId))
|
||||
@@ -24,31 +25,43 @@ public static class OsComponentMapper
|
||||
continue;
|
||||
}
|
||||
|
||||
var layerDigest = ComputeLayerDigest(result.AnalyzerId);
|
||||
var components = BuildComponentRecords(result.AnalyzerId, layerDigest, result.Packages);
|
||||
if (components.IsEmpty)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
var syntheticDigest = ComputeLayerDigest(result.AnalyzerId);
|
||||
|
||||
builder.Add(LayerComponentFragment.Create(layerDigest, components));
|
||||
foreach (var package in result.Packages ?? Enumerable.Empty<OSPackageRecord>())
|
||||
{
|
||||
var actualLayerDigest = ResolveLayerDigest(package) ?? syntheticDigest;
|
||||
var record = ToComponentRecord(result.AnalyzerId, actualLayerDigest, package);
|
||||
|
||||
if (!fragmentsByLayer.TryGetValue(actualLayerDigest, out var records))
|
||||
{
|
||||
records = new List<ComponentRecord>();
|
||||
fragmentsByLayer[actualLayerDigest] = records;
|
||||
}
|
||||
|
||||
records.Add(record);
|
||||
}
|
||||
}
|
||||
|
||||
var builder = ImmutableArray.CreateBuilder<LayerComponentFragment>(fragmentsByLayer.Count);
|
||||
foreach (var (layerDigest, records) in fragmentsByLayer)
|
||||
{
|
||||
builder.Add(LayerComponentFragment.Create(layerDigest, ImmutableArray.CreateRange(records)));
|
||||
}
|
||||
|
||||
return builder.ToImmutable();
|
||||
}
|
||||
|
||||
private static ImmutableArray<ComponentRecord> BuildComponentRecords(
|
||||
string analyzerId,
|
||||
string layerDigest,
|
||||
IEnumerable<OSPackageRecord> packages)
|
||||
private static string? ResolveLayerDigest(OSPackageRecord package)
|
||||
{
|
||||
var records = ImmutableArray.CreateBuilder<ComponentRecord>();
|
||||
foreach (var package in packages ?? Enumerable.Empty<OSPackageRecord>())
|
||||
foreach (var file in package.Files)
|
||||
{
|
||||
records.Add(ToComponentRecord(analyzerId, layerDigest, package));
|
||||
if (!string.IsNullOrWhiteSpace(file.LayerDigest))
|
||||
{
|
||||
return file.LayerDigest;
|
||||
}
|
||||
}
|
||||
|
||||
return records.ToImmutable();
|
||||
return null;
|
||||
}
|
||||
|
||||
private static ComponentRecord ToComponentRecord(string analyzerId, string layerDigest, OSPackageRecord package)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
namespace StellaOps.Scanner.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Scanner.Core.Contracts;
|
||||
|
||||
public static class ScanMetadataKeys
|
||||
{
|
||||
public const string RootFilesystemPath = "scanner.rootfs.path";
|
||||
@@ -8,4 +8,5 @@ public static class ScanMetadataKeys
|
||||
public const string LayerDirectories = "scanner.rootfs.layers";
|
||||
public const string LayerArchives = "scanner.layer.archives";
|
||||
public const string RuntimeProcRoot = "scanner.runtime.proc_root";
|
||||
public const string CurrentLayerDigest = "scanner.layer.current.digest";
|
||||
}
|
||||
|
||||
@@ -127,6 +127,12 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
var dependencies = ParseDependencies(data, format, relativePath, cancellationToken, out var observedBuildId);
|
||||
var buildId = observedBuildId ?? identity?.BuildId ?? identity?.Uuid;
|
||||
|
||||
// Detect entry point
|
||||
var entryPoint = DetectEntryPoint(data, format);
|
||||
|
||||
// Collect unknown/unresolved symbols
|
||||
var unknowns = CollectUnknowns(data, format, cancellationToken);
|
||||
|
||||
var symbolId = SymbolId.ForBinaryAddressed(fileHash, ".text", "0x0", Path.GetFileName(path), "static");
|
||||
var codeId = CodeId.ForBinarySegment(format, fileHash, "0x0", data.LongLength, ".text");
|
||||
|
||||
@@ -142,6 +148,13 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
attributes["build_id"] = buildId!;
|
||||
}
|
||||
|
||||
// Add PURL binding for known library naming conventions
|
||||
var purl = InferPurl(Path.GetFileName(path), format);
|
||||
if (!string.IsNullOrWhiteSpace(purl))
|
||||
{
|
||||
attributes["purl"] = purl;
|
||||
}
|
||||
|
||||
return new BinaryInfo(
|
||||
SymbolId: symbolId,
|
||||
CodeId: codeId,
|
||||
@@ -151,7 +164,9 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
BuildId: buildId,
|
||||
Dependencies: dependencies,
|
||||
DisplayName: Path.GetFileName(path),
|
||||
Attributes: attributes);
|
||||
Attributes: attributes,
|
||||
EntryPoint: entryPoint,
|
||||
Unknowns: unknowns);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<BinaryDependency> ParseDependencies(
|
||||
@@ -259,6 +274,41 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
display: info.DisplayName,
|
||||
sourceFile: info.RelativePath,
|
||||
attributes: info.Attributes);
|
||||
|
||||
// Emit synthetic root for entry point
|
||||
if (info.EntryPoint is not null)
|
||||
{
|
||||
var entrySymbolId = SymbolId.ForBinaryAddressed(
|
||||
info.FileHash,
|
||||
".text",
|
||||
info.EntryPoint.Address,
|
||||
info.EntryPoint.Name,
|
||||
"entry");
|
||||
|
||||
var entryAttributes = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["kind"] = "entry_point",
|
||||
["is_synthetic_root"] = "true"
|
||||
};
|
||||
|
||||
builder.AddNode(
|
||||
symbolId: entrySymbolId,
|
||||
lang: SymbolId.Lang.Binary,
|
||||
kind: "entry_point",
|
||||
display: info.EntryPoint.Name,
|
||||
sourceFile: info.RelativePath,
|
||||
attributes: entryAttributes);
|
||||
|
||||
// Edge from entry point to binary root
|
||||
builder.AddEdge(
|
||||
from: entrySymbolId,
|
||||
to: info.SymbolId,
|
||||
edgeType: EdgeTypes.Call,
|
||||
confidence: EdgeConfidence.Certain,
|
||||
origin: "static",
|
||||
provenance: "elf-entry",
|
||||
evidence: $"file:{info.RelativePath}:entry");
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitDependencies(ReachabilityGraphBuilder builder, BinaryInfo info)
|
||||
@@ -277,6 +327,13 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
["reason"] = dep.Reason
|
||||
};
|
||||
|
||||
// Add PURL for dependency if inferrable
|
||||
var depPurl = InferPurl(dep.Name, info.Format);
|
||||
if (!string.IsNullOrWhiteSpace(depPurl))
|
||||
{
|
||||
depAttributes["purl"] = depPurl;
|
||||
}
|
||||
|
||||
builder.AddNode(
|
||||
symbolId: depSymbolId,
|
||||
lang: SymbolId.Lang.Binary,
|
||||
@@ -293,6 +350,211 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
provenance: dep.Provenance,
|
||||
evidence: dep.Evidence);
|
||||
}
|
||||
|
||||
// Emit unknown/unresolved symbols
|
||||
EmitUnknowns(builder, info);
|
||||
}
|
||||
|
||||
private static void EmitUnknowns(ReachabilityGraphBuilder builder, BinaryInfo info)
|
||||
{
|
||||
if (info.Unknowns.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var unknown in info.Unknowns)
|
||||
{
|
||||
var unknownSymbolId = SymbolId.ForBinaryAddressed(
|
||||
info.FileHash,
|
||||
".undef",
|
||||
"0x0",
|
||||
unknown.SymbolName,
|
||||
"undefined");
|
||||
|
||||
var unknownAttributes = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["is_unknown"] = "true",
|
||||
["reason"] = unknown.ReasonCode
|
||||
};
|
||||
|
||||
builder.AddNode(
|
||||
symbolId: unknownSymbolId,
|
||||
lang: SymbolId.Lang.Binary,
|
||||
kind: "unknown",
|
||||
display: $"?{unknown.SymbolName}",
|
||||
attributes: unknownAttributes);
|
||||
|
||||
builder.AddEdge(
|
||||
from: info.SymbolId,
|
||||
to: unknownSymbolId,
|
||||
edgeType: EdgeTypes.Call,
|
||||
confidence: EdgeConfidence.Medium,
|
||||
origin: "static",
|
||||
provenance: "symbol-undef",
|
||||
evidence: $"file:{info.RelativePath}:undef");
|
||||
}
|
||||
}
|
||||
|
||||
private static BinaryEntryPoint? DetectEntryPoint(byte[] data, string format)
|
||||
{
|
||||
if (data.Length < 64)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
switch (format)
|
||||
{
|
||||
case "elf":
|
||||
return DetectElfEntryPoint(data);
|
||||
case "pe":
|
||||
return DetectPeEntryPoint(data);
|
||||
case "macho":
|
||||
return DetectMachOEntryPoint(data);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static BinaryEntryPoint? DetectElfEntryPoint(byte[] data)
|
||||
{
|
||||
if (data.Length < 32)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var is64Bit = data[4] == 2;
|
||||
var isBigEndian = data[5] == 2;
|
||||
|
||||
ulong entryAddr;
|
||||
if (is64Bit)
|
||||
{
|
||||
entryAddr = isBigEndian
|
||||
? System.Buffers.Binary.BinaryPrimitives.ReadUInt64BigEndian(data.AsSpan(24, 8))
|
||||
: System.Buffers.Binary.BinaryPrimitives.ReadUInt64LittleEndian(data.AsSpan(24, 8));
|
||||
}
|
||||
else
|
||||
{
|
||||
entryAddr = isBigEndian
|
||||
? System.Buffers.Binary.BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(24, 4))
|
||||
: System.Buffers.Binary.BinaryPrimitives.ReadUInt32LittleEndian(data.AsSpan(24, 4));
|
||||
}
|
||||
|
||||
if (entryAddr == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return new BinaryEntryPoint("_start", $"0x{entryAddr:x}");
|
||||
}
|
||||
|
||||
private static BinaryEntryPoint? DetectPeEntryPoint(byte[] data)
|
||||
{
|
||||
if (data.Length < 0x40)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var peHeaderOffset = System.Buffers.Binary.BinaryPrimitives.ReadInt32LittleEndian(data.AsSpan(0x3C, 4));
|
||||
if (peHeaderOffset < 0 || peHeaderOffset + 0x28 > data.Length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// PE optional header AddressOfEntryPoint is at offset 16 from optional header start (pe+24)
|
||||
var optionalHeaderOffset = peHeaderOffset + 24;
|
||||
if (optionalHeaderOffset + 20 > data.Length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var entryPointRva = System.Buffers.Binary.BinaryPrimitives.ReadUInt32LittleEndian(
|
||||
data.AsSpan(optionalHeaderOffset + 16, 4));
|
||||
|
||||
if (entryPointRva == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return new BinaryEntryPoint("_mainCRTStartup", $"0x{entryPointRva:x}");
|
||||
}
|
||||
|
||||
private static BinaryEntryPoint? DetectMachOEntryPoint(byte[] data)
|
||||
{
|
||||
if (data.Length < 32)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var magic = System.Buffers.Binary.BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(0, 4));
|
||||
var is64 = magic is 0xFEEDFACF or 0xCFFAEDFE;
|
||||
var isBigEndian = magic is 0xFEEDFACE or 0xFEEDFACF;
|
||||
|
||||
// For Mach-O, entry point is in LC_MAIN load command
|
||||
// For simplicity, we'll return a synthetic entry point
|
||||
// Full parsing would require walking load commands
|
||||
return new BinaryEntryPoint("main", "0x0");
|
||||
}
|
||||
|
||||
private static IReadOnlyList<BinaryUnknown> CollectUnknowns(byte[] data, string format, CancellationToken cancellationToken)
|
||||
{
|
||||
// For now, return empty list - full implementation would parse symbol tables
|
||||
// for undefined symbols (STT_NOTYPE with SHN_UNDEF in ELF, etc.)
|
||||
// This is a placeholder for the baseline; full implementation would require
|
||||
// parsing the symbol tables of ELF/PE/Mach-O files
|
||||
return Array.Empty<BinaryUnknown>();
|
||||
}
|
||||
|
||||
private static string? InferPurl(string fileName, string format)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(fileName))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extract library name and version from common naming patterns
|
||||
// ELF: libfoo.so.1.2.3 -> pkg:generic/libfoo
|
||||
// PE: foo.dll -> pkg:generic/foo
|
||||
// Mach-O: libfoo.dylib -> pkg:generic/libfoo
|
||||
|
||||
string? name = null;
|
||||
string? version = null;
|
||||
|
||||
if (format == "elf" && fileName.Contains(".so"))
|
||||
{
|
||||
// libssl.so.3 or libcrypto.so.1.1
|
||||
var soIndex = fileName.IndexOf(".so", StringComparison.Ordinal);
|
||||
if (soIndex > 0)
|
||||
{
|
||||
name = fileName[..soIndex];
|
||||
var afterSo = fileName[(soIndex + 3)..];
|
||||
if (afterSo.StartsWith('.') && afterSo.Length > 1)
|
||||
{
|
||||
version = afterSo[1..];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (format == "pe" && fileName.EndsWith(".dll", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
name = fileName[..^4]; // Remove .dll
|
||||
}
|
||||
else if (format == "macho" && fileName.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
name = fileName[..^6]; // Remove .dylib
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(name))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Build PURL - pkg:generic/name@version
|
||||
var purl = $"pkg:generic/{Uri.EscapeDataString(name)}";
|
||||
if (!string.IsNullOrWhiteSpace(version))
|
||||
{
|
||||
purl += $"@{Uri.EscapeDataString(version)}";
|
||||
}
|
||||
|
||||
return purl;
|
||||
}
|
||||
|
||||
private static bool TryDetect(byte[] data, out NativeBinaryIdentity identity)
|
||||
@@ -330,7 +592,9 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
string? BuildId,
|
||||
IReadOnlyList<BinaryDependency> Dependencies,
|
||||
string DisplayName,
|
||||
IReadOnlyDictionary<string, string> Attributes);
|
||||
IReadOnlyDictionary<string, string> Attributes,
|
||||
BinaryEntryPoint? EntryPoint,
|
||||
IReadOnlyList<BinaryUnknown> Unknowns);
|
||||
|
||||
private sealed record BinaryDependency(
|
||||
string Name,
|
||||
@@ -338,4 +602,12 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
|
||||
EdgeConfidence Confidence,
|
||||
string Provenance,
|
||||
string Evidence);
|
||||
|
||||
private sealed record BinaryEntryPoint(
|
||||
string Name,
|
||||
string Address);
|
||||
|
||||
private sealed record BinaryUnknown(
|
||||
string SymbolName,
|
||||
string ReasonCode);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user