feat: Implement BerkeleyDB reader for RPM databases
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
console-runner-image / build-runner-image (push) Has been cancelled
wine-csp-build / Build Wine CSP Image (push) Has been cancelled
wine-csp-build / Integration Tests (push) Has been cancelled
wine-csp-build / Security Scan (push) Has been cancelled
wine-csp-build / Generate SBOM (push) Has been cancelled
wine-csp-build / Publish Image (push) Has been cancelled
wine-csp-build / Air-Gap Bundle (push) Has been cancelled
wine-csp-build / Test Summary (push) Has been cancelled

- Added BerkeleyDbReader class to read and extract RPM header blobs from BerkeleyDB hash databases.
- Implemented methods to detect BerkeleyDB format and extract values, including handling of page sizes and magic numbers.
- Added tests for BerkeleyDbReader to ensure correct functionality and header extraction.

feat: Add Yarn PnP data tests

- Created YarnPnpDataTests to validate package resolution and data loading from Yarn PnP cache.
- Implemented tests for resolved keys, package presence, and loading from cache structure.

test: Add egg-info package fixtures for Python tests

- Created egg-info package fixtures for testing Python analyzers.
- Included PKG-INFO, entry_points.txt, and installed-files.txt for comprehensive coverage.

test: Enhance RPM database reader tests

- Added tests for RpmDatabaseReader to validate fallback to legacy packages when SQLite is missing.
- Implemented helper methods to create legacy package files and RPM headers for testing.

test: Implement dual signing tests

- Added DualSignTests to validate secondary signature addition when configured.
- Created stub implementations for crypto providers and key resolvers to facilitate testing.

chore: Update CI script for Playwright Chromium installation

- Modified ci-console-exports.sh to ensure deterministic Chromium binary installation for console exports tests.
- Added checks for Windows compatibility and environment variable setups for Playwright browsers.
This commit is contained in:
StellaOps Bot
2025-12-07 16:24:45 +02:00
parent e3f28a21ab
commit 11597679ed
199 changed files with 9809 additions and 4404 deletions

View File

@@ -9,7 +9,8 @@ internal sealed record NodeProjectInput(
IReadOnlyList<string> NodeModuleRoots,
IReadOnlyList<string> Tarballs,
IReadOnlyList<string> YarnCacheRoots,
bool YarnPnpPresent);
bool YarnPnpPresent,
YarnPnpData? YarnPnpData);
/// <summary>
/// Normalizes scanner inputs for Node.js projects, layering workspace roots, container layers,
@@ -63,12 +64,28 @@ internal static class NodeInputNormalizer
var yarnPnpPresent = sourceRoots.Any(static root => HasYarnPnpMarkers(root));
// Load Yarn PnP resolution data if present
YarnPnpData? yarnPnpData = null;
if (yarnPnpPresent)
{
foreach (var root in sourceRoots)
{
var pnpData = YarnPnpData.Load(root);
if (pnpData.HasData)
{
yarnPnpData = pnpData;
break;
}
}
}
return new NodeProjectInput(
SourceRoots: sourceRoots.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
NodeModuleRoots: nodeModuleRoots.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
Tarballs: tarballs.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
YarnCacheRoots: yarnCacheRoots.OrderBy(static p => p, StringComparer.Ordinal).ToArray(),
YarnPnpPresent: yarnPnpPresent);
YarnPnpPresent: yarnPnpPresent,
YarnPnpData: yarnPnpData);
}
private static IReadOnlyList<string> DiscoverSourceRoots(string rootPath)

View File

@@ -58,8 +58,6 @@ internal static class NodePackageCollector
TraverseTarballs(context, projectInput.Tarballs, packages, visited, yarnPnpPresent, cancellationToken);
TraverseYarnPnpCache(context, projectInput.YarnCacheRoots, packages, visited, yarnPnpPresent, cancellationToken);
AppendDeclaredPackages(packages, lockData);
AttachImports(context, packages, cancellationToken);
var resolutions = NodeResolver.Resolve(context, projectInput, packages, cancellationToken);
@@ -68,9 +66,50 @@ internal static class NodePackageCollector
package.SetResolvedImports(resolvedImports);
}
// Filter out declared-only packages without on-disk evidence when PnP data is available
if (projectInput.YarnPnpData?.HasData == true)
{
return FilterDeclaredOnlyPackages(packages, projectInput.YarnPnpData);
}
return packages;
}
/// <summary>
/// Filters out packages that are declared-only (no on-disk evidence) when Yarn PnP data is available.
/// Only emits packages that are actually resolved in the PnP resolution map.
/// </summary>
private static IReadOnlyList<NodePackage> FilterDeclaredOnlyPackages(List<NodePackage> packages, YarnPnpData pnpData)
{
var filtered = new List<NodePackage>(packages.Count);
foreach (var package in packages)
{
// Always include workspace members and private packages
if (package.IsWorkspaceMember || package.IsPrivate == true)
{
filtered.Add(package);
continue;
}
// Skip declared-only packages that are not resolved in PnP
if (package.DeclaredOnly && !pnpData.IsResolved(package.Name, package.Version))
{
continue;
}
// For packages from Yarn cache, verify they are in the PnP resolution
if (package.IsYarnPnp && !pnpData.IsResolved(package.Name, package.Version))
{
continue;
}
filtered.Add(package);
}
return filtered;
}
private static void AttachImports(LanguageAnalyzerContext context, List<NodePackage> packages, CancellationToken cancellationToken)
{
foreach (var package in packages)
@@ -454,73 +493,6 @@ internal static class NodePackageCollector
}
}
private static void AppendDeclaredPackages(List<NodePackage> packages, NodeLockData lockData)
{
if (lockData.DeclaredPackages.Count == 0)
{
return;
}
var observed = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var package in packages)
{
var key = BuildDeclarationKey(package.Name, package.Version);
if (!string.IsNullOrEmpty(key))
{
observed.Add(key);
}
}
foreach (var entry in lockData.DeclaredPackages)
{
if (string.IsNullOrWhiteSpace(entry.Name) || string.IsNullOrWhiteSpace(entry.Version))
{
continue;
}
var key = BuildDeclarationKey(entry.Name, entry.Version);
if (string.IsNullOrEmpty(key) || !observed.Add(key))
{
continue;
}
var declaredPackage = new NodePackage(
entry.Name,
entry.Version,
relativePath: string.Empty,
packageJsonLocator: string.Empty,
isPrivate: null,
lockEntry: entry,
isWorkspaceMember: false,
workspaceRoot: null,
workspaceTargets: Array.Empty<string>(),
workspaceLink: null,
lifecycleScripts: Array.Empty<NodeLifecycleScript>(),
nodeVersions: Array.Empty<NodeVersionTarget>(),
usedByEntrypoint: false,
declaredOnly: true,
lockSource: entry.Source,
lockLocator: BuildLockLocator(entry),
packageSha256: null,
isYarnPnp: false,
scope: entry.Scope,
isOptional: entry.IsOptional,
license: null);
packages.Add(declaredPackage);
}
}
private static string BuildDeclarationKey(string name, string? version)
{
if (string.IsNullOrWhiteSpace(name) || string.IsNullOrWhiteSpace(version))
{
return string.Empty;
}
return $"{name}@{version}".ToLowerInvariant();
}
private static string? BuildLockLocator(NodeLockEntry? entry)
{
if (entry is null)

View File

@@ -0,0 +1,279 @@
using System.IO.Compression;
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal static class NodePnpDataLoader
{
public static IReadOnlyList<NodePackage> Load(LanguageAnalyzerContext context, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
var pnpDataPath = Path.Combine(context.RootPath, ".pnp.data.json");
if (!File.Exists(pnpDataPath))
{
return Array.Empty<NodePackage>();
}
try
{
using var stream = File.OpenRead(pnpDataPath);
using var document = JsonDocument.Parse(stream);
if (!document.RootElement.TryGetProperty("packages", out var packagesElement) ||
packagesElement.ValueKind != JsonValueKind.Object)
{
return Array.Empty<NodePackage>();
}
var packages = new List<NodePackage>();
foreach (var packageProperty in packagesElement.EnumerateObject())
{
cancellationToken.ThrowIfCancellationRequested();
var package = TryParsePackage(context, packageProperty, cancellationToken);
if (package is not null)
{
packages.Add(package);
}
}
return packages
.OrderBy(static p => p.ComponentKey, StringComparer.Ordinal)
.ToArray();
}
catch (IOException)
{
return Array.Empty<NodePackage>();
}
catch (JsonException)
{
return Array.Empty<NodePackage>();
}
}
private static NodePackage? TryParsePackage(
LanguageAnalyzerContext context,
JsonProperty packageProperty,
CancellationToken cancellationToken)
{
var rawKey = packageProperty.Name;
var obj = packageProperty.Value;
if (obj.ValueKind != JsonValueKind.Object)
{
return null;
}
var packageLocation = obj.TryGetProperty("packageLocation", out var locationElement)
? locationElement.GetString()
: null;
if (string.IsNullOrWhiteSpace(packageLocation))
{
return null;
}
packageLocation = packageLocation.Trim();
var packageJson = obj.TryGetProperty("packageJson", out var packageJsonElement) && packageJsonElement.ValueKind == JsonValueKind.Object
? packageJsonElement
: (JsonElement?)null;
var (name, version) = ExtractNameVersion(rawKey, packageJson);
if (string.IsNullOrWhiteSpace(name) || string.IsNullOrWhiteSpace(version))
{
return null;
}
var relativePath = context.GetRelativePath(packageLocation).Replace(Path.DirectorySeparatorChar, '/');
var absolutePackagePath = Path.GetFullPath(Path.Combine(context.RootPath, packageLocation));
var usedByEntrypoint = context.UsageHints.IsPathUsed(absolutePackagePath);
var packageJsonLocator = BuildLocator(packageLocation);
string? packageSha256 = null;
bool? isPrivate = null;
string? license = null;
if (packageJson is not null)
{
if (packageJson.Value.TryGetProperty("private", out var privateElement) && privateElement.ValueKind is JsonValueKind.True or JsonValueKind.False)
{
isPrivate = privateElement.GetBoolean();
}
if (packageJson.Value.TryGetProperty("license", out var licenseElement) && licenseElement.ValueKind == JsonValueKind.String)
{
license = licenseElement.GetString();
}
}
if (TryReadPackageJsonContent(context.RootPath, packageLocation, out var packageContent, out var locator))
{
packageSha256 = Convert.ToHexString(System.Security.Cryptography.SHA256.HashData(packageContent)).ToLowerInvariant();
packageJsonLocator = locator;
try
{
using var doc = JsonDocument.Parse(packageContent);
if (doc.RootElement.TryGetProperty("private", out var jsonPrivate) && jsonPrivate.ValueKind is JsonValueKind.True or JsonValueKind.False)
{
isPrivate ??= jsonPrivate.GetBoolean();
}
if (doc.RootElement.TryGetProperty("license", out var licenseElement) && licenseElement.ValueKind == JsonValueKind.String)
{
license ??= licenseElement.GetString();
}
}
catch (JsonException)
{
// ignore malformed package.json content
}
}
var package = new NodePackage(
name: name,
version: version,
relativePath: relativePath,
packageJsonLocator: packageJsonLocator,
isPrivate: isPrivate,
lockEntry: null,
isWorkspaceMember: false,
workspaceRoot: null,
workspaceTargets: Array.Empty<string>(),
workspaceLink: null,
lifecycleScripts: Array.Empty<NodeLifecycleScript>(),
nodeVersions: Array.Empty<NodeVersionTarget>(),
usedByEntrypoint: usedByEntrypoint,
declaredOnly: false,
lockSource: "pnp.data",
lockLocator: rawKey,
packageSha256: packageSha256,
isYarnPnp: true,
scope: null,
isOptional: false,
license: license);
return package;
}
private static bool TryReadPackageJsonContent(string rootPath, string packageLocation, out byte[] content, out string locator)
{
content = Array.Empty<byte>();
locator = BuildLocator(packageLocation);
// First try direct package.json on disk
var packageJsonPath = Path.Combine(rootPath, packageLocation, "package.json");
if (File.Exists(packageJsonPath))
{
try
{
content = File.ReadAllBytes(packageJsonPath);
locator = NormalizeLocator(packageJsonPath, rootPath);
return true;
}
catch (IOException)
{
}
}
// If location points to a zip, try to read package/package.json inside the archive
if (packageLocation.Contains(".zip", StringComparison.OrdinalIgnoreCase))
{
var zipPath = Path.Combine(rootPath, packageLocation[..packageLocation.IndexOf(".zip", StringComparison.OrdinalIgnoreCase) + 4]);
if (File.Exists(zipPath))
{
try
{
using var archive = ZipFile.OpenRead(zipPath);
var entry = archive.GetEntry("package/package.json");
if (entry is not null)
{
using var entryStream = entry.Open();
using var memory = new MemoryStream();
entryStream.CopyTo(memory);
content = memory.ToArray();
locator = $"{NormalizeLocator(zipPath, rootPath)}!package/package.json";
return true;
}
}
catch (IOException)
{
}
catch (InvalidDataException)
{
}
}
}
return false;
}
private static (string Name, string Version) ExtractNameVersion(string rawKey, JsonElement? packageJson)
{
string? name = null;
string? version = null;
if (packageJson is not null)
{
if (packageJson.Value.TryGetProperty("name", out var nameElement) && nameElement.ValueKind == JsonValueKind.String)
{
name = nameElement.GetString();
}
if (packageJson.Value.TryGetProperty("version", out var versionElement) && versionElement.ValueKind == JsonValueKind.String)
{
version = versionElement.GetString();
}
}
if (!string.IsNullOrWhiteSpace(name) && !string.IsNullOrWhiteSpace(version))
{
return (name!.Trim(), version!.Trim());
}
// Fallback parsing from locator key (e.g., "cached-lib@npm:1.0.0" or "@scope/pkg@npm:2.0.0")
var atIndex = rawKey.LastIndexOf('@');
if (atIndex > 0 && atIndex < rawKey.Length - 1)
{
name ??= rawKey[..atIndex];
var remainder = rawKey[(atIndex + 1)..];
var colonIndex = remainder.IndexOf(':');
if (colonIndex >= 0 && colonIndex < remainder.Length - 1)
{
version ??= remainder[(colonIndex + 1)..];
}
else
{
version ??= remainder;
}
}
return (name ?? string.Empty, version ?? string.Empty);
}
private static string BuildLocator(string packageLocation)
{
if (string.IsNullOrWhiteSpace(packageLocation))
{
return "package.json";
}
var normalized = packageLocation.Replace('\\', '/').TrimEnd('/');
return normalized.EndsWith(".zip", StringComparison.OrdinalIgnoreCase)
? $"{normalized}!package/package.json"
: $"{normalized}/package.json";
}
private static string NormalizeLocator(string path, string rootPath)
{
var relative = Path.GetRelativePath(rootPath, path);
if (string.IsNullOrWhiteSpace(relative))
{
return "package.json";
}
return relative.Replace(Path.DirectorySeparatorChar, '/');
}
}

View File

@@ -0,0 +1,316 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.IO;
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
/// <summary>
/// Represents parsed Yarn PnP resolution data from .pnp.data.json or .pnp.cjs.
/// Used to determine which packages are actually resolved and have on-disk evidence.
/// </summary>
internal sealed class YarnPnpData
{
private static readonly IReadOnlyDictionary<string, YarnPnpPackageInfo> EmptyPackages =
new ReadOnlyDictionary<string, YarnPnpPackageInfo>(new Dictionary<string, YarnPnpPackageInfo>(0));
private static readonly IReadOnlySet<string> EmptySet =
new HashSet<string>(0, StringComparer.Ordinal);
public YarnPnpData(
IReadOnlyDictionary<string, YarnPnpPackageInfo>? packages,
IReadOnlySet<string>? resolvedPackageKeys)
{
Packages = packages ?? EmptyPackages;
ResolvedPackageKeys = resolvedPackageKeys ?? EmptySet;
}
/// <summary>
/// Package information by locator key (e.g., "lodash@npm:4.17.21").
/// </summary>
public IReadOnlyDictionary<string, YarnPnpPackageInfo> Packages { get; }
/// <summary>
/// Set of package keys (name@version) that are actually resolved in the dependency tree.
/// </summary>
public IReadOnlySet<string> ResolvedPackageKeys { get; }
/// <summary>
/// Whether PnP data was successfully loaded.
/// </summary>
public bool HasData => Packages.Count > 0;
/// <summary>
/// Checks if a package is resolved in the PnP data.
/// </summary>
public bool IsResolved(string name, string version)
{
var key = $"{name}@{version}";
return ResolvedPackageKeys.Contains(key);
}
/// <summary>
/// Tries to get package info by name and version.
/// </summary>
public bool TryGetPackage(string name, string version, out YarnPnpPackageInfo? info)
{
var key = $"{name}@npm:{version}";
if (Packages.TryGetValue(key, out info))
{
return true;
}
// Also try without npm: prefix
key = $"{name}@{version}";
return Packages.TryGetValue(key, out info);
}
/// <summary>
/// Loads PnP data from the root path.
/// </summary>
public static YarnPnpData Load(string rootPath)
{
var pnpDataPath = Path.Combine(rootPath, ".pnp.data.json");
if (File.Exists(pnpDataPath))
{
return LoadFromDataJson(pnpDataPath);
}
// .pnp.cjs is more complex (JavaScript), so we extract data from cache structure instead
// when .pnp.data.json is not available
return LoadFromCacheStructure(rootPath);
}
private static YarnPnpData LoadFromDataJson(string pnpDataPath)
{
try
{
using var stream = File.OpenRead(pnpDataPath);
using var document = JsonDocument.Parse(stream);
var root = document.RootElement;
var packages = new Dictionary<string, YarnPnpPackageInfo>(StringComparer.Ordinal);
var resolvedKeys = new HashSet<string>(StringComparer.Ordinal);
// Parse packageRegistryData which contains the actual package locations
if (root.TryGetProperty("packageRegistryData", out var registryData) &&
registryData.ValueKind == JsonValueKind.Array)
{
foreach (var entry in registryData.EnumerateArray())
{
ParsePackageRegistryEntry(entry, packages, resolvedKeys);
}
}
return new YarnPnpData(
new ReadOnlyDictionary<string, YarnPnpPackageInfo>(packages),
resolvedKeys);
}
catch (IOException)
{
return new YarnPnpData(null, null);
}
catch (JsonException)
{
return new YarnPnpData(null, null);
}
}
private static void ParsePackageRegistryEntry(
JsonElement entry,
Dictionary<string, YarnPnpPackageInfo> packages,
HashSet<string> resolvedKeys)
{
if (entry.ValueKind != JsonValueKind.Array)
{
return;
}
var items = new List<JsonElement>();
foreach (var item in entry.EnumerateArray())
{
items.Add(item);
}
if (items.Count < 2)
{
return;
}
// First element is package name, second is array of version entries
var packageName = items[0].GetString();
if (string.IsNullOrWhiteSpace(packageName))
{
return;
}
var versionEntries = items[1];
if (versionEntries.ValueKind != JsonValueKind.Array)
{
return;
}
foreach (var versionEntry in versionEntries.EnumerateArray())
{
ParseVersionEntry(packageName!, versionEntry, packages, resolvedKeys);
}
}
private static void ParseVersionEntry(
string packageName,
JsonElement versionEntry,
Dictionary<string, YarnPnpPackageInfo> packages,
HashSet<string> resolvedKeys)
{
if (versionEntry.ValueKind != JsonValueKind.Array)
{
return;
}
var items = new List<JsonElement>();
foreach (var item in versionEntry.EnumerateArray())
{
items.Add(item);
}
if (items.Count < 2)
{
return;
}
// First element is version reference (e.g., "npm:4.17.21"), second is package info
var versionRef = items[0].GetString();
if (string.IsNullOrWhiteSpace(versionRef))
{
return;
}
var packageInfo = items[1];
if (packageInfo.ValueKind != JsonValueKind.Object)
{
return;
}
string? packageLocation = null;
if (packageInfo.TryGetProperty("packageLocation", out var locationElement) &&
locationElement.ValueKind == JsonValueKind.String)
{
packageLocation = locationElement.GetString();
}
// Extract actual version from the reference
var version = versionRef;
if (versionRef!.StartsWith("npm:", StringComparison.Ordinal))
{
version = versionRef[4..];
}
var locatorKey = $"{packageName}@{versionRef}";
var resolvedKey = $"{packageName}@{version}";
if (!packages.ContainsKey(locatorKey))
{
packages[locatorKey] = new YarnPnpPackageInfo(packageName, version!, packageLocation);
}
resolvedKeys.Add(resolvedKey);
}
private static YarnPnpData LoadFromCacheStructure(string rootPath)
{
// When no .pnp.data.json, infer from .yarn/cache structure
var cacheDir = Path.Combine(rootPath, ".yarn", "cache");
if (!Directory.Exists(cacheDir))
{
return new YarnPnpData(null, null);
}
var packages = new Dictionary<string, YarnPnpPackageInfo>(StringComparer.Ordinal);
var resolvedKeys = new HashSet<string>(StringComparer.Ordinal);
try
{
foreach (var zipFile in Directory.EnumerateFiles(cacheDir, "*.zip", SearchOption.AllDirectories))
{
var fileName = Path.GetFileNameWithoutExtension(zipFile);
if (TryParseYarnCacheFilename(fileName, out var name, out var version))
{
var locatorKey = $"{name}@npm:{version}";
var resolvedKey = $"{name}@{version}";
if (!packages.ContainsKey(locatorKey))
{
packages[locatorKey] = new YarnPnpPackageInfo(name!, version!, zipFile);
}
resolvedKeys.Add(resolvedKey);
}
}
}
catch (IOException)
{
// Ignore enumeration errors
}
return new YarnPnpData(
new ReadOnlyDictionary<string, YarnPnpPackageInfo>(packages),
resolvedKeys);
}
/// <summary>
/// Parses Yarn cache filename format: name-npm-version-hash.zip
/// Examples: lodash-npm-4.17.21-6382d6d2.zip, @types-node-npm-18.0.0-abc123.zip
/// </summary>
private static bool TryParseYarnCacheFilename(string fileName, out string? name, out string? version)
{
name = null;
version = null;
if (string.IsNullOrWhiteSpace(fileName))
{
return false;
}
// Find -npm- separator
var npmIndex = fileName.IndexOf("-npm-", StringComparison.Ordinal);
if (npmIndex < 1)
{
return false;
}
// Name is before -npm-
var rawName = fileName[..npmIndex];
// Handle scoped packages: @types-node -> @types/node
if (rawName.StartsWith('@'))
{
var dashIndex = rawName.IndexOf('-', 1);
if (dashIndex > 1)
{
rawName = rawName[..dashIndex] + "/" + rawName[(dashIndex + 1)..];
}
}
name = rawName;
// Version is between -npm- and the last -hash part
var afterNpm = fileName[(npmIndex + 5)..];
var lastDash = afterNpm.LastIndexOf('-');
if (lastDash < 1)
{
return false;
}
version = afterNpm[..lastDash];
return !string.IsNullOrWhiteSpace(version);
}
}
/// <summary>
/// Information about a single package in the PnP resolution data.
/// </summary>
internal sealed record YarnPnpPackageInfo(
string Name,
string Version,
string? PackageLocation);

View File

@@ -0,0 +1,417 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
/// <summary>
/// Adapter for legacy .egg-info metadata (setuptools installations).
/// Handles standalone egg-info directories in site-packages that aren't editable installs.
/// </summary>
internal sealed partial class EggInfoAdapter : IPythonPackagingAdapter
{
public string Name => "egg-info";
public int Priority => 15; // Lower priority than dist-info (10) to prefer wheels
[GeneratedRegex(@"^(?<name>[A-Za-z0-9][\w.-]*)-(?<version>[\d.]+(?:\.dev\d*|a\d*|b\d*|rc\d*|post\d*)?)\.egg-info$",
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex EggInfoDirPattern();
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
{
// Look for any .egg-info directories with PKG-INFO
return vfs.EnumerateFiles(path, "*.egg-info/PKG-INFO").Any();
}
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
{
// Find all .egg-info directories with PKG-INFO
var pkgInfoFiles = vfs.EnumerateFiles(path, "*.egg-info/PKG-INFO").ToList();
foreach (var pkgInfoFile in pkgInfoFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var eggInfoPath = Path.GetDirectoryName(pkgInfoFile.VirtualPath) ?? string.Empty;
var eggInfoName = Path.GetFileName(eggInfoPath);
var match = EggInfoDirPattern().Match(eggInfoName);
if (!match.Success)
{
continue;
}
var packageName = match.Groups["name"].Value;
var version = match.Groups["version"].Value;
// Read PKG-INFO file (same format as METADATA)
var metadata = await ReadPkgInfoAsync(vfs, pkgInfoFile, cancellationToken).ConfigureAwait(false);
// Use metadata name if available (more accurate)
if (metadata.TryGetValue("Name", out var metadataName) && !string.IsNullOrEmpty(metadataName))
{
packageName = metadataName;
}
if (metadata.TryGetValue("Version", out var metadataVersion) && !string.IsNullOrEmpty(metadataVersion))
{
version = metadataVersion;
}
// Read top_level.txt
var topLevelModules = await ReadTopLevelAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read SOURCES.txt (file manifest for egg-info)
var sourceFiles = await ReadSourcesAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read requires.txt (egg-info dependencies format)
var dependencies = await ReadRequiresAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read installed-files.txt if available
var installedFiles = await ReadInstalledFilesAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read entry_points.txt
var entryPoints = await ReadEntryPointsAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Extract extras from requires.txt sections
var extras = ExtractExtras(dependencies);
// Determine confidence based on available metadata
var confidence = sourceFiles.Length > 0 || installedFiles.Length > 0
? PythonPackageConfidence.Definitive
: PythonPackageConfidence.High;
// Convert sources/installed files to record entries
var recordFiles = ConvertToRecordEntries(sourceFiles, installedFiles);
yield return new PythonPackageInfo(
Name: packageName,
Version: version,
Kind: PythonPackageKind.Egg,
Location: path,
MetadataPath: eggInfoPath,
TopLevelModules: topLevelModules,
Dependencies: FilterBaseDependencies(dependencies),
Extras: extras,
RecordFiles: recordFiles,
InstallerTool: DetectInstaller(metadata),
EditableTarget: null,
IsDirectDependency: false, // Will be corrected by dependency graph analysis
Confidence: confidence);
}
}
private static async Task<Dictionary<string, string>> ReadPkgInfoAsync(
PythonVirtualFileSystem vfs,
PythonVirtualFile file,
CancellationToken cancellationToken)
{
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
try
{
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return result;
}
using var reader = new StreamReader(stream);
string? currentKey = null;
var currentValue = new System.Text.StringBuilder();
string? line;
while ((line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false)) is not null)
{
cancellationToken.ThrowIfCancellationRequested();
// Empty line marks end of headers
if (string.IsNullOrEmpty(line))
{
if (currentKey is not null)
{
result[currentKey] = currentValue.ToString().Trim();
}
break;
}
// Continuation line
if (line.StartsWith(' ') || line.StartsWith('\t'))
{
currentValue.AppendLine(line.Trim());
continue;
}
// New header
var colonIdx = line.IndexOf(':');
if (colonIdx > 0)
{
if (currentKey is not null)
{
if (!result.ContainsKey(currentKey))
{
result[currentKey] = currentValue.ToString().Trim();
}
else
{
// Multi-value header
result[currentKey] = result[currentKey] + "\n" + currentValue.ToString().Trim();
}
}
currentKey = line[..colonIdx].Trim();
currentValue.Clear();
currentValue.Append(line[(colonIdx + 1)..].Trim());
}
}
if (currentKey is not null && !result.ContainsKey(currentKey))
{
result[currentKey] = currentValue.ToString().Trim();
}
}
catch (IOException)
{
// Ignore read errors
}
return result;
}
private static async Task<ImmutableArray<string>> ReadTopLevelAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var topLevelPath = $"{eggInfoPath}/top_level.txt";
try
{
using var stream = await vfs.OpenReadAsync(topLevelPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadSourcesAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var sourcesPath = $"{eggInfoPath}/SOURCES.txt";
try
{
using var stream = await vfs.OpenReadAsync(sourcesPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadInstalledFilesAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var installedPath = $"{eggInfoPath}/installed-files.txt";
try
{
using var stream = await vfs.OpenReadAsync(installedPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadRequiresAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var requiresPath = $"{eggInfoPath}/requires.txt";
try
{
using var stream = await vfs.OpenReadAsync(requiresPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
// requires.txt format:
// base_dependency>=1.0
// [extra1]
// extra_dep>=2.0
// [extra2:python_version<"3.0"]
// conditional_dep
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
var result = new List<string>();
string? currentExtra = null;
foreach (var line in lines)
{
if (line.StartsWith('[') && line.EndsWith(']'))
{
// This is an extra section
currentExtra = line[1..^1];
// Handle conditional extras like [extra:condition]
var colonIdx = currentExtra.IndexOf(':');
if (colonIdx > 0)
{
currentExtra = currentExtra[..colonIdx];
}
continue;
}
if (currentExtra is not null)
{
// Dependency in an extra section - add with marker
result.Add($"{line} ; extra == \"{currentExtra}\"");
}
else
{
// Base dependency
result.Add(line);
}
}
return result.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadEntryPointsAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var entryPointsPath = $"{eggInfoPath}/entry_points.txt";
try
{
using var stream = await vfs.OpenReadAsync(entryPointsPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
// entry_points.txt is INI format:
// [console_scripts]
// script_name = module:function
var result = new List<string>();
foreach (var line in content.Split('\n'))
{
var trimmed = line.Trim();
if (!string.IsNullOrEmpty(trimmed) && !trimmed.StartsWith('[') && trimmed.Contains('='))
{
result.Add(trimmed);
}
}
return result.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static ImmutableArray<string> FilterBaseDependencies(ImmutableArray<string> dependencies)
{
// Return only base dependencies (without extra markers)
return dependencies
.Where(d => !d.Contains("extra ==", StringComparison.OrdinalIgnoreCase))
.ToImmutableArray();
}
private static ImmutableArray<string> ExtractExtras(ImmutableArray<string> dependencies)
{
// Extract unique extra names from dependency markers
var extras = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var dep in dependencies)
{
var extraMatch = System.Text.RegularExpressions.Regex.Match(dep, @"extra\s*==\s*""([^""]+)""");
if (extraMatch.Success)
{
extras.Add(extraMatch.Groups[1].Value);
}
}
return extras.ToImmutableArray();
}
private static ImmutableArray<PythonRecordEntry> ConvertToRecordEntries(
ImmutableArray<string> sources,
ImmutableArray<string> installedFiles)
{
// Prefer installed-files.txt as it's more accurate for installed packages
var files = installedFiles.Length > 0 ? installedFiles : sources;
return files
.Select(f => new PythonRecordEntry(f, null, null))
.ToImmutableArray();
}
private static string? DetectInstaller(Dictionary<string, string> metadata)
{
// Try to detect the installer from metadata
if (metadata.TryGetValue("Installer", out var installer))
{
return installer;
}
// Egg-info is typically from setuptools
return "setuptools";
}
}

View File

@@ -240,6 +240,7 @@ internal sealed class PythonPackageDiscovery
new PoetryAdapter(),
new PipEditableAdapter(),
new DistInfoAdapter(),
new EggInfoAdapter(),
new CondaAdapter(),
new ContainerLayerAdapter()
};

View File

@@ -802,7 +802,7 @@ internal static class PythonRecordParser
continue;
}
entries.Add(new PythonRecordEntry(trimmed, hashAlgorithm: null, hashValue: null, size: null));
entries.Add(new PythonRecordEntry(trimmed, null, null, null));
}
return entries;

View File

@@ -8,6 +8,7 @@ using StellaOps.Scanner.Analyzers.OS;
using StellaOps.Scanner.Analyzers.OS.Abstractions;
using StellaOps.Scanner.Analyzers.OS.Analyzers;
using StellaOps.Scanner.Analyzers.OS.Helpers;
using StellaOps.Scanner.Core.Contracts;
namespace StellaOps.Scanner.Analyzers.OS.Apk;
@@ -37,6 +38,8 @@ internal sealed class ApkPackageAnalyzer : OsPackageAnalyzerBase
using var stream = File.OpenRead(installedPath);
var entries = _parser.Parse(stream, cancellationToken);
context.Metadata.TryGetValue(ScanMetadataKeys.CurrentLayerDigest, out var layerDigest);
var records = new List<OSPackageRecord>(entries.Count);
foreach (var entry in entries)
{
@@ -70,7 +73,7 @@ internal sealed class ApkPackageAnalyzer : OsPackageAnalyzerBase
{
files.Add(new OSPackageFileEvidence(
file.Path,
layerDigest: null,
layerDigest: layerDigest,
sha256: file.Digest,
sizeBytes: null,
isConfigFile: file.IsConfig));

View File

@@ -10,6 +10,7 @@ using StellaOps.Scanner.Analyzers.OS;
using StellaOps.Scanner.Analyzers.OS.Abstractions;
using StellaOps.Scanner.Analyzers.OS.Analyzers;
using StellaOps.Scanner.Analyzers.OS.Helpers;
using StellaOps.Scanner.Core.Contracts;
namespace StellaOps.Scanner.Analyzers.OS.Dpkg;
@@ -39,6 +40,8 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
using var stream = File.OpenRead(statusPath);
var entries = _parser.Parse(stream, cancellationToken);
context.Metadata.TryGetValue(ScanMetadataKeys.CurrentLayerDigest, out var layerDigest);
var infoDirectory = Path.Combine(context.RootPath, "var", "lib", "dpkg", "info");
var records = new List<OSPackageRecord>();
@@ -83,7 +86,7 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
var dependencies = entry.Depends.Concat(entry.PreDepends).ToArray();
var provides = entry.Provides.ToArray();
var fileEvidence = BuildFileEvidence(infoDirectory, entry, cancellationToken);
var fileEvidence = BuildFileEvidence(infoDirectory, entry, layerDigest, cancellationToken);
var cveHints = CveHintExtractor.Extract(entry.Description, string.Join(' ', dependencies), string.Join(' ', provides));
@@ -125,7 +128,7 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
return parts.Length == 0 ? null : parts[0];
}
private static IReadOnlyList<OSPackageFileEvidence> BuildFileEvidence(string infoDirectory, DpkgPackageEntry entry, CancellationToken cancellationToken)
private static IReadOnlyList<OSPackageFileEvidence> BuildFileEvidence(string infoDirectory, DpkgPackageEntry entry, string? layerDigest, CancellationToken cancellationToken)
{
if (!Directory.Exists(infoDirectory))
{
@@ -137,7 +140,7 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
{
if (!files.TryGetValue(path, out _))
{
files[path] = new FileEvidenceBuilder(path);
files[path] = new FileEvidenceBuilder(path, layerDigest);
}
}
@@ -248,20 +251,23 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
private sealed class FileEvidenceBuilder
{
public FileEvidenceBuilder(string path)
public FileEvidenceBuilder(string path, string? layerDigest)
{
Path = path;
LayerDigest = layerDigest;
}
public string Path { get; }
public string? LayerDigest { get; }
public bool IsConfig { get; set; }
public Dictionary<string, string> Digests { get; } = new(StringComparer.OrdinalIgnoreCase);
public OSPackageFileEvidence ToEvidence()
{
return new OSPackageFileEvidence(Path, isConfigFile: IsConfig, digests: Digests);
return new OSPackageFileEvidence(Path, layerDigest: LayerDigest, isConfigFile: IsConfig, digests: Digests);
}
}
}

View File

@@ -0,0 +1,211 @@
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
namespace StellaOps.Scanner.Analyzers.OS.Rpm.Internal;
/// <summary>
/// Minimal reader for BerkeleyDB hash databases used by legacy RPM databases.
/// Extracts raw data values that contain RPM header blobs.
/// </summary>
internal static class BerkeleyDbReader
{
private const uint BdbHashMagicBe = 0x00061561;
private const uint BdbHashMagicLe = 0x61150600;
private const uint BdbBtreeMagicBe = 0x00053162;
private const uint BdbBtreeMagicLe = 0x62310500;
private const int MetadataPageSize = 512;
private const int MinPageSize = 512;
private const int MaxPageSize = 65536;
/// <summary>
/// Detects if the file data is a BerkeleyDB database.
/// </summary>
public static bool IsBerkeleyDb(ReadOnlySpan<byte> data)
{
if (data.Length < 16)
{
return false;
}
var magic = BinaryPrimitives.ReadUInt32BigEndian(data.Slice(12, 4));
return magic is BdbHashMagicBe or BdbHashMagicLe or BdbBtreeMagicBe or BdbBtreeMagicLe;
}
/// <summary>
/// Extracts all data values from a BerkeleyDB database.
/// Returns the raw byte arrays which should contain RPM header blobs.
/// </summary>
public static IReadOnlyList<byte[]> ExtractValues(byte[] data)
{
var results = new List<byte[]>();
if (data.Length < MetadataPageSize)
{
return results;
}
// Read metadata page to get page size
var magic = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(12, 4));
var isLittleEndian = magic is BdbHashMagicLe or BdbBtreeMagicLe;
int pageSize;
if (isLittleEndian)
{
pageSize = BinaryPrimitives.ReadInt32LittleEndian(data.AsSpan(20, 4));
}
else
{
pageSize = BinaryPrimitives.ReadInt32BigEndian(data.AsSpan(20, 4));
}
// Validate page size
if (pageSize < MinPageSize || pageSize > MaxPageSize)
{
pageSize = MetadataPageSize;
}
// Ensure page size is power of 2
if ((pageSize & (pageSize - 1)) != 0)
{
pageSize = MinPageSize;
}
// Scan data pages for RPM header blobs
// BerkeleyDB data pages contain key-value pairs with page type markers
var rpmMagic = new byte[] { 0x8e, 0xad, 0xe8, 0xab };
var pageCount = data.Length / pageSize;
for (var pageNum = 1; pageNum < pageCount; pageNum++)
{
var pageOffset = pageNum * pageSize;
if (pageOffset + pageSize > data.Length)
{
break;
}
// Scan this page for RPM header blobs
ExtractRpmHeadersFromPage(data, pageOffset, pageSize, rpmMagic, results);
}
return results;
}
private static void ExtractRpmHeadersFromPage(
byte[] data,
int pageOffset,
int pageSize,
byte[] rpmMagic,
List<byte[]> results)
{
var pageEnd = pageOffset + pageSize;
// Scan for RPM header magic within this page
for (var i = pageOffset; i < pageEnd - rpmMagic.Length; i++)
{
if (data[i] != rpmMagic[0] ||
data[i + 1] != rpmMagic[1] ||
data[i + 2] != rpmMagic[2] ||
data[i + 3] != rpmMagic[3])
{
continue;
}
// Found RPM header magic, try to extract the header
var header = TryExtractRpmHeader(data, i, pageEnd);
if (header is not null)
{
results.Add(header);
}
}
}
private static byte[]? TryExtractRpmHeader(byte[] data, int offset, int maxEnd)
{
// RPM header structure:
// - 4 bytes: magic (0x8eade8ab)
// - 4 bytes: reserved (0x00000000)
// - 4 bytes: index count (big-endian)
// - 4 bytes: data store size (big-endian)
// - index entries (16 bytes each)
// - data store
if (offset + 16 > data.Length)
{
return null;
}
var span = data.AsSpan(offset);
// Verify reserved bytes are zero
var reserved = BinaryPrimitives.ReadInt32BigEndian(span.Slice(4, 4));
if (reserved != 0)
{
return null;
}
var indexCount = BinaryPrimitives.ReadInt32BigEndian(span.Slice(8, 4));
var storeSize = BinaryPrimitives.ReadInt32BigEndian(span.Slice(12, 4));
// Sanity check bounds
if (indexCount <= 0 || indexCount > 10000)
{
return null;
}
if (storeSize <= 0 || storeSize > 50_000_000)
{
return null;
}
var totalLength = 16 + (indexCount * 16) + storeSize;
if (totalLength <= 0 || offset + totalLength > data.Length)
{
return null;
}
// Extract the complete header blob
var result = new byte[totalLength];
Array.Copy(data, offset, result, 0, totalLength);
return result;
}
/// <summary>
/// Alternative extraction method that scans across page boundaries
/// for complete RPM headers that may span multiple BDB pages.
/// </summary>
public static IReadOnlyList<byte[]> ExtractValuesWithOverflow(byte[] data)
{
var results = new List<byte[]>();
var seen = new HashSet<string>();
var rpmMagic = new byte[] { 0x8e, 0xad, 0xe8, 0xab };
// Simple scan through entire file for RPM headers
for (var i = 0; i <= data.Length - rpmMagic.Length; i++)
{
if (data[i] != rpmMagic[0] ||
data[i + 1] != rpmMagic[1] ||
data[i + 2] != rpmMagic[2] ||
data[i + 3] != rpmMagic[3])
{
continue;
}
var header = TryExtractRpmHeader(data, i, data.Length);
if (header is null)
{
continue;
}
// Use position + length as dedup key
var key = $"{i}:{header.Length}";
if (seen.Add(key))
{
results.Add(header);
}
}
return results;
}
}

View File

@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Buffers.Binary;
using System.IO;
using System.Threading;
using Microsoft.Data.Sqlite;
@@ -23,8 +24,8 @@ internal sealed class RpmDatabaseReader : IRpmDatabaseReader
var sqlitePath = ResolveSqlitePath(rootPath);
if (sqlitePath is null)
{
_logger.LogWarning("rpmdb.sqlite not found under root {RootPath}; rpm analyzer will skip.", rootPath);
return Array.Empty<RpmHeader>();
_logger.LogWarning("rpmdb.sqlite not found under root {RootPath}; attempting legacy rpmdb fallback.", rootPath);
return ReadLegacyHeaders(rootPath, cancellationToken);
}
var headers = new List<RpmHeader>();
@@ -65,7 +66,12 @@ internal sealed class RpmDatabaseReader : IRpmDatabaseReader
catch (Exception ex)
{
_logger.LogWarning(ex, "Unable to read rpmdb.sqlite at {Path}.", sqlitePath);
return Array.Empty<RpmHeader>();
return ReadLegacyHeaders(rootPath, cancellationToken);
}
if (headers.Count == 0)
{
return ReadLegacyHeaders(rootPath, cancellationToken);
}
return headers;
@@ -90,6 +96,230 @@ internal sealed class RpmDatabaseReader : IRpmDatabaseReader
return null;
}
private IReadOnlyList<RpmHeader> ReadLegacyHeaders(string rootPath, CancellationToken cancellationToken)
{
var packagesPath = ResolveLegacyPackagesPath(rootPath);
if (packagesPath is null)
{
_logger.LogWarning("Legacy rpmdb Packages file not found under root {RootPath}; rpm analyzer will skip.", rootPath);
return Array.Empty<RpmHeader>();
}
byte[] data;
try
{
data = File.ReadAllBytes(packagesPath);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Unable to read legacy rpmdb Packages file at {Path}.", packagesPath);
return Array.Empty<RpmHeader>();
}
// Detect BerkeleyDB format and use appropriate extraction method
if (BerkeleyDbReader.IsBerkeleyDb(data))
{
_logger.LogDebug("Detected BerkeleyDB format for rpmdb at {Path}; using BDB extraction.", packagesPath);
return ReadBerkeleyDbHeaders(data, packagesPath, cancellationToken);
}
// Fall back to raw RPM header scanning for non-BDB files
return ReadRawRpmHeaders(data, packagesPath, cancellationToken);
}
private IReadOnlyList<RpmHeader> ReadBerkeleyDbHeaders(byte[] data, string packagesPath, CancellationToken cancellationToken)
{
var results = new List<RpmHeader>();
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
// Try page-aware extraction first
var headerBlobs = BerkeleyDbReader.ExtractValues(data);
if (headerBlobs.Count == 0)
{
// Fall back to overflow-aware extraction for fragmented data
headerBlobs = BerkeleyDbReader.ExtractValuesWithOverflow(data);
}
foreach (var blob in headerBlobs)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var header = _parser.Parse(blob);
var key = $"{header.Name}::{header.Version}::{header.Release}::{header.Architecture}";
if (seen.Add(key))
{
results.Add(header);
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to parse RPM header blob from BerkeleyDB.");
}
}
if (results.Count == 0)
{
_logger.LogWarning("No RPM headers parsed from BerkeleyDB rpmdb at {Path}.", packagesPath);
}
else
{
_logger.LogDebug("Extracted {Count} RPM headers from BerkeleyDB rpmdb at {Path}.", results.Count, packagesPath);
}
return results;
}
private IReadOnlyList<RpmHeader> ReadRawRpmHeaders(byte[] data, string packagesPath, CancellationToken cancellationToken)
{
var headerBlobs = new List<byte[]>();
if (BerkeleyDbReader.IsBerkeleyDb(data))
{
headerBlobs.AddRange(BerkeleyDbReader.ExtractValues(data));
if (headerBlobs.Count == 0)
{
headerBlobs.AddRange(BerkeleyDbReader.ExtractValuesWithOverflow(data));
}
}
else
{
headerBlobs.AddRange(ExtractRpmHeadersFromRaw(data, cancellationToken));
}
if (headerBlobs.Count == 0)
{
_logger.LogWarning("No RPM headers parsed from legacy rpmdb Packages at {Path}.", packagesPath);
return Array.Empty<RpmHeader>();
}
var results = new List<RpmHeader>(headerBlobs.Count);
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var blob in headerBlobs)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var header = _parser.Parse(blob);
var key = $"{header.Name}::{header.Version}::{header.Release}::{header.Architecture}";
if (seen.Add(key))
{
results.Add(header);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse RPM header from legacy rpmdb blob.");
}
}
return results;
}
private static string? ResolveLegacyPackagesPath(string rootPath)
{
var candidates = new[]
{
Path.Combine(rootPath, "var", "lib", "rpm", "Packages"),
Path.Combine(rootPath, "usr", "lib", "sysimage", "rpm", "Packages"),
};
foreach (var candidate in candidates)
{
if (File.Exists(candidate))
{
return candidate;
}
}
return null;
}
private static IEnumerable<byte[]> ExtractRpmHeadersFromRaw(byte[] data, CancellationToken cancellationToken)
{
var magicBytes = new byte[] { 0x8e, 0xad, 0xe8, 0xab };
var seenOffsets = new HashSet<int>();
var offset = 0;
while (offset <= data.Length - magicBytes.Length)
{
cancellationToken.ThrowIfCancellationRequested();
var candidateIndex = FindNextMagic(data, magicBytes, offset);
if (candidateIndex < 0)
{
yield break;
}
if (!seenOffsets.Add(candidateIndex))
{
offset = candidateIndex + 1;
continue;
}
if (TryExtractHeaderSlice(data, candidateIndex, out var slice))
{
yield return slice;
}
offset = candidateIndex + 1;
}
}
private static bool TryExtractHeaderSlice(byte[] data, int offset, out byte[] slice)
{
slice = Array.Empty<byte>();
if (offset + 16 >= data.Length)
{
return false;
}
try
{
var span = data.AsSpan(offset);
var indexCount = BinaryPrimitives.ReadInt32BigEndian(span.Slice(8, 4));
var storeSize = BinaryPrimitives.ReadInt32BigEndian(span.Slice(12, 4));
if (indexCount <= 0 || storeSize <= 0)
{
return false;
}
var totalLength = 16 + (indexCount * 16) + storeSize;
if (totalLength <= 0 || offset + totalLength > data.Length)
{
return false;
}
slice = new byte[totalLength];
Buffer.BlockCopy(data, offset, slice, 0, totalLength);
return true;
}
catch
{
return false;
}
}
private static int FindNextMagic(byte[] data, byte[] magic, int startIndex)
{
for (var i = startIndex; i <= data.Length - magic.Length; i++)
{
if (data[i] == magic[0] &&
data[i + 1] == magic[1] &&
data[i + 2] == magic[2] &&
data[i + 3] == magic[3])
{
return i;
}
}
return -1;
}
private static byte[]? ExtractHeaderBlob(SqliteDataReader reader)
{
for (var i = 0; i < reader.FieldCount; i++)

View File

@@ -11,6 +11,7 @@ using StellaOps.Scanner.Analyzers.OS.Abstractions;
using StellaOps.Scanner.Analyzers.OS.Analyzers;
using StellaOps.Scanner.Analyzers.OS.Helpers;
using StellaOps.Scanner.Analyzers.OS.Rpm.Internal;
using StellaOps.Scanner.Core.Contracts;
namespace StellaOps.Scanner.Analyzers.OS.Rpm;
@@ -42,6 +43,8 @@ internal sealed class RpmPackageAnalyzer : OsPackageAnalyzerBase
return ValueTask.FromResult<IReadOnlyList<OSPackageRecord>>(EmptyPackages);
}
context.Metadata.TryGetValue(ScanMetadataKeys.CurrentLayerDigest, out var layerDigest);
var records = new List<OSPackageRecord>(headers.Count);
foreach (var header in headers)
{
@@ -77,7 +80,7 @@ internal sealed class RpmPackageAnalyzer : OsPackageAnalyzerBase
digests = new Dictionary<string, string>(file.Digests, StringComparer.OrdinalIgnoreCase);
}
files.Add(new OSPackageFileEvidence(file.Path, isConfigFile: file.IsConfig, digests: digests));
files.Add(new OSPackageFileEvidence(file.Path, layerDigest: layerDigest, isConfigFile: file.IsConfig, digests: digests));
}
var cveHints = CveHintExtractor.Extract(

View File

@@ -6,6 +6,9 @@
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<InternalsVisibleTo Include="StellaOps.Scanner.Analyzers.OS.Tests" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Data.Sqlite" Version="9.0.0-rc.1.24451.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />

View File

@@ -16,7 +16,8 @@ public static class OsComponentMapper
{
ArgumentNullException.ThrowIfNull(results);
var builder = ImmutableArray.CreateBuilder<LayerComponentFragment>();
var fragmentsByLayer = new Dictionary<string, List<ComponentRecord>>(StringComparer.OrdinalIgnoreCase);
foreach (var result in results)
{
if (result is null || string.IsNullOrWhiteSpace(result.AnalyzerId))
@@ -24,31 +25,43 @@ public static class OsComponentMapper
continue;
}
var layerDigest = ComputeLayerDigest(result.AnalyzerId);
var components = BuildComponentRecords(result.AnalyzerId, layerDigest, result.Packages);
if (components.IsEmpty)
{
continue;
}
var syntheticDigest = ComputeLayerDigest(result.AnalyzerId);
builder.Add(LayerComponentFragment.Create(layerDigest, components));
foreach (var package in result.Packages ?? Enumerable.Empty<OSPackageRecord>())
{
var actualLayerDigest = ResolveLayerDigest(package) ?? syntheticDigest;
var record = ToComponentRecord(result.AnalyzerId, actualLayerDigest, package);
if (!fragmentsByLayer.TryGetValue(actualLayerDigest, out var records))
{
records = new List<ComponentRecord>();
fragmentsByLayer[actualLayerDigest] = records;
}
records.Add(record);
}
}
var builder = ImmutableArray.CreateBuilder<LayerComponentFragment>(fragmentsByLayer.Count);
foreach (var (layerDigest, records) in fragmentsByLayer)
{
builder.Add(LayerComponentFragment.Create(layerDigest, ImmutableArray.CreateRange(records)));
}
return builder.ToImmutable();
}
private static ImmutableArray<ComponentRecord> BuildComponentRecords(
string analyzerId,
string layerDigest,
IEnumerable<OSPackageRecord> packages)
private static string? ResolveLayerDigest(OSPackageRecord package)
{
var records = ImmutableArray.CreateBuilder<ComponentRecord>();
foreach (var package in packages ?? Enumerable.Empty<OSPackageRecord>())
foreach (var file in package.Files)
{
records.Add(ToComponentRecord(analyzerId, layerDigest, package));
if (!string.IsNullOrWhiteSpace(file.LayerDigest))
{
return file.LayerDigest;
}
}
return records.ToImmutable();
return null;
}
private static ComponentRecord ToComponentRecord(string analyzerId, string layerDigest, OSPackageRecord package)

View File

@@ -1,5 +1,5 @@
namespace StellaOps.Scanner.Core.Contracts;
namespace StellaOps.Scanner.Core.Contracts;
public static class ScanMetadataKeys
{
public const string RootFilesystemPath = "scanner.rootfs.path";
@@ -8,4 +8,5 @@ public static class ScanMetadataKeys
public const string LayerDirectories = "scanner.rootfs.layers";
public const string LayerArchives = "scanner.layer.archives";
public const string RuntimeProcRoot = "scanner.runtime.proc_root";
public const string CurrentLayerDigest = "scanner.layer.current.digest";
}

View File

@@ -127,6 +127,12 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
var dependencies = ParseDependencies(data, format, relativePath, cancellationToken, out var observedBuildId);
var buildId = observedBuildId ?? identity?.BuildId ?? identity?.Uuid;
// Detect entry point
var entryPoint = DetectEntryPoint(data, format);
// Collect unknown/unresolved symbols
var unknowns = CollectUnknowns(data, format, cancellationToken);
var symbolId = SymbolId.ForBinaryAddressed(fileHash, ".text", "0x0", Path.GetFileName(path), "static");
var codeId = CodeId.ForBinarySegment(format, fileHash, "0x0", data.LongLength, ".text");
@@ -142,6 +148,13 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
attributes["build_id"] = buildId!;
}
// Add PURL binding for known library naming conventions
var purl = InferPurl(Path.GetFileName(path), format);
if (!string.IsNullOrWhiteSpace(purl))
{
attributes["purl"] = purl;
}
return new BinaryInfo(
SymbolId: symbolId,
CodeId: codeId,
@@ -151,7 +164,9 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
BuildId: buildId,
Dependencies: dependencies,
DisplayName: Path.GetFileName(path),
Attributes: attributes);
Attributes: attributes,
EntryPoint: entryPoint,
Unknowns: unknowns);
}
private static IReadOnlyList<BinaryDependency> ParseDependencies(
@@ -259,6 +274,41 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
display: info.DisplayName,
sourceFile: info.RelativePath,
attributes: info.Attributes);
// Emit synthetic root for entry point
if (info.EntryPoint is not null)
{
var entrySymbolId = SymbolId.ForBinaryAddressed(
info.FileHash,
".text",
info.EntryPoint.Address,
info.EntryPoint.Name,
"entry");
var entryAttributes = new Dictionary<string, string>(StringComparer.Ordinal)
{
["kind"] = "entry_point",
["is_synthetic_root"] = "true"
};
builder.AddNode(
symbolId: entrySymbolId,
lang: SymbolId.Lang.Binary,
kind: "entry_point",
display: info.EntryPoint.Name,
sourceFile: info.RelativePath,
attributes: entryAttributes);
// Edge from entry point to binary root
builder.AddEdge(
from: entrySymbolId,
to: info.SymbolId,
edgeType: EdgeTypes.Call,
confidence: EdgeConfidence.Certain,
origin: "static",
provenance: "elf-entry",
evidence: $"file:{info.RelativePath}:entry");
}
}
private static void EmitDependencies(ReachabilityGraphBuilder builder, BinaryInfo info)
@@ -277,6 +327,13 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
["reason"] = dep.Reason
};
// Add PURL for dependency if inferrable
var depPurl = InferPurl(dep.Name, info.Format);
if (!string.IsNullOrWhiteSpace(depPurl))
{
depAttributes["purl"] = depPurl;
}
builder.AddNode(
symbolId: depSymbolId,
lang: SymbolId.Lang.Binary,
@@ -293,6 +350,211 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
provenance: dep.Provenance,
evidence: dep.Evidence);
}
// Emit unknown/unresolved symbols
EmitUnknowns(builder, info);
}
private static void EmitUnknowns(ReachabilityGraphBuilder builder, BinaryInfo info)
{
if (info.Unknowns.Count == 0)
{
return;
}
foreach (var unknown in info.Unknowns)
{
var unknownSymbolId = SymbolId.ForBinaryAddressed(
info.FileHash,
".undef",
"0x0",
unknown.SymbolName,
"undefined");
var unknownAttributes = new Dictionary<string, string>(StringComparer.Ordinal)
{
["is_unknown"] = "true",
["reason"] = unknown.ReasonCode
};
builder.AddNode(
symbolId: unknownSymbolId,
lang: SymbolId.Lang.Binary,
kind: "unknown",
display: $"?{unknown.SymbolName}",
attributes: unknownAttributes);
builder.AddEdge(
from: info.SymbolId,
to: unknownSymbolId,
edgeType: EdgeTypes.Call,
confidence: EdgeConfidence.Medium,
origin: "static",
provenance: "symbol-undef",
evidence: $"file:{info.RelativePath}:undef");
}
}
private static BinaryEntryPoint? DetectEntryPoint(byte[] data, string format)
{
if (data.Length < 64)
{
return null;
}
switch (format)
{
case "elf":
return DetectElfEntryPoint(data);
case "pe":
return DetectPeEntryPoint(data);
case "macho":
return DetectMachOEntryPoint(data);
default:
return null;
}
}
private static BinaryEntryPoint? DetectElfEntryPoint(byte[] data)
{
if (data.Length < 32)
{
return null;
}
var is64Bit = data[4] == 2;
var isBigEndian = data[5] == 2;
ulong entryAddr;
if (is64Bit)
{
entryAddr = isBigEndian
? System.Buffers.Binary.BinaryPrimitives.ReadUInt64BigEndian(data.AsSpan(24, 8))
: System.Buffers.Binary.BinaryPrimitives.ReadUInt64LittleEndian(data.AsSpan(24, 8));
}
else
{
entryAddr = isBigEndian
? System.Buffers.Binary.BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(24, 4))
: System.Buffers.Binary.BinaryPrimitives.ReadUInt32LittleEndian(data.AsSpan(24, 4));
}
if (entryAddr == 0)
{
return null;
}
return new BinaryEntryPoint("_start", $"0x{entryAddr:x}");
}
private static BinaryEntryPoint? DetectPeEntryPoint(byte[] data)
{
if (data.Length < 0x40)
{
return null;
}
var peHeaderOffset = System.Buffers.Binary.BinaryPrimitives.ReadInt32LittleEndian(data.AsSpan(0x3C, 4));
if (peHeaderOffset < 0 || peHeaderOffset + 0x28 > data.Length)
{
return null;
}
// PE optional header AddressOfEntryPoint is at offset 16 from optional header start (pe+24)
var optionalHeaderOffset = peHeaderOffset + 24;
if (optionalHeaderOffset + 20 > data.Length)
{
return null;
}
var entryPointRva = System.Buffers.Binary.BinaryPrimitives.ReadUInt32LittleEndian(
data.AsSpan(optionalHeaderOffset + 16, 4));
if (entryPointRva == 0)
{
return null;
}
return new BinaryEntryPoint("_mainCRTStartup", $"0x{entryPointRva:x}");
}
private static BinaryEntryPoint? DetectMachOEntryPoint(byte[] data)
{
if (data.Length < 32)
{
return null;
}
var magic = System.Buffers.Binary.BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(0, 4));
var is64 = magic is 0xFEEDFACF or 0xCFFAEDFE;
var isBigEndian = magic is 0xFEEDFACE or 0xFEEDFACF;
// For Mach-O, entry point is in LC_MAIN load command
// For simplicity, we'll return a synthetic entry point
// Full parsing would require walking load commands
return new BinaryEntryPoint("main", "0x0");
}
private static IReadOnlyList<BinaryUnknown> CollectUnknowns(byte[] data, string format, CancellationToken cancellationToken)
{
// For now, return empty list - full implementation would parse symbol tables
// for undefined symbols (STT_NOTYPE with SHN_UNDEF in ELF, etc.)
// This is a placeholder for the baseline; full implementation would require
// parsing the symbol tables of ELF/PE/Mach-O files
return Array.Empty<BinaryUnknown>();
}
private static string? InferPurl(string fileName, string format)
{
if (string.IsNullOrWhiteSpace(fileName))
{
return null;
}
// Extract library name and version from common naming patterns
// ELF: libfoo.so.1.2.3 -> pkg:generic/libfoo
// PE: foo.dll -> pkg:generic/foo
// Mach-O: libfoo.dylib -> pkg:generic/libfoo
string? name = null;
string? version = null;
if (format == "elf" && fileName.Contains(".so"))
{
// libssl.so.3 or libcrypto.so.1.1
var soIndex = fileName.IndexOf(".so", StringComparison.Ordinal);
if (soIndex > 0)
{
name = fileName[..soIndex];
var afterSo = fileName[(soIndex + 3)..];
if (afterSo.StartsWith('.') && afterSo.Length > 1)
{
version = afterSo[1..];
}
}
}
else if (format == "pe" && fileName.EndsWith(".dll", StringComparison.OrdinalIgnoreCase))
{
name = fileName[..^4]; // Remove .dll
}
else if (format == "macho" && fileName.EndsWith(".dylib", StringComparison.OrdinalIgnoreCase))
{
name = fileName[..^6]; // Remove .dylib
}
if (string.IsNullOrWhiteSpace(name))
{
return null;
}
// Build PURL - pkg:generic/name@version
var purl = $"pkg:generic/{Uri.EscapeDataString(name)}";
if (!string.IsNullOrWhiteSpace(version))
{
purl += $"@{Uri.EscapeDataString(version)}";
}
return purl;
}
private static bool TryDetect(byte[] data, out NativeBinaryIdentity identity)
@@ -330,7 +592,9 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
string? BuildId,
IReadOnlyList<BinaryDependency> Dependencies,
string DisplayName,
IReadOnlyDictionary<string, string> Attributes);
IReadOnlyDictionary<string, string> Attributes,
BinaryEntryPoint? EntryPoint,
IReadOnlyList<BinaryUnknown> Unknowns);
private sealed record BinaryDependency(
string Name,
@@ -338,4 +602,12 @@ public sealed class BinaryReachabilityLifter : IReachabilityLifter
EdgeConfidence Confidence,
string Provenance,
string Evidence);
private sealed record BinaryEntryPoint(
string Name,
string Address);
private sealed record BinaryUnknown(
string SymbolName,
string ReasonCode);
}