feat: Implement BerkeleyDB reader for RPM databases
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
console-runner-image / build-runner-image (push) Has been cancelled
wine-csp-build / Build Wine CSP Image (push) Has been cancelled
wine-csp-build / Integration Tests (push) Has been cancelled
wine-csp-build / Security Scan (push) Has been cancelled
wine-csp-build / Generate SBOM (push) Has been cancelled
wine-csp-build / Publish Image (push) Has been cancelled
wine-csp-build / Air-Gap Bundle (push) Has been cancelled
wine-csp-build / Test Summary (push) Has been cancelled

- Added BerkeleyDbReader class to read and extract RPM header blobs from BerkeleyDB hash databases.
- Implemented methods to detect BerkeleyDB format and extract values, including handling of page sizes and magic numbers.
- Added tests for BerkeleyDbReader to ensure correct functionality and header extraction.

feat: Add Yarn PnP data tests

- Created YarnPnpDataTests to validate package resolution and data loading from Yarn PnP cache.
- Implemented tests for resolved keys, package presence, and loading from cache structure.

test: Add egg-info package fixtures for Python tests

- Created egg-info package fixtures for testing Python analyzers.
- Included PKG-INFO, entry_points.txt, and installed-files.txt for comprehensive coverage.

test: Enhance RPM database reader tests

- Added tests for RpmDatabaseReader to validate fallback to legacy packages when SQLite is missing.
- Implemented helper methods to create legacy package files and RPM headers for testing.

test: Implement dual signing tests

- Added DualSignTests to validate secondary signature addition when configured.
- Created stub implementations for crypto providers and key resolvers to facilitate testing.

chore: Update CI script for Playwright Chromium installation

- Modified ci-console-exports.sh to ensure deterministic Chromium binary installation for console exports tests.
- Added checks for Windows compatibility and environment variable setups for Playwright browsers.
This commit is contained in:
StellaOps Bot
2025-12-07 16:24:45 +02:00
parent e3f28a21ab
commit 11597679ed
199 changed files with 9809 additions and 4404 deletions

View File

@@ -0,0 +1,417 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
/// <summary>
/// Adapter for legacy .egg-info metadata (setuptools installations).
/// Handles standalone egg-info directories in site-packages that aren't editable installs.
/// </summary>
internal sealed partial class EggInfoAdapter : IPythonPackagingAdapter
{
public string Name => "egg-info";
public int Priority => 15; // Lower priority than dist-info (10) to prefer wheels
[GeneratedRegex(@"^(?<name>[A-Za-z0-9][\w.-]*)-(?<version>[\d.]+(?:\.dev\d*|a\d*|b\d*|rc\d*|post\d*)?)\.egg-info$",
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex EggInfoDirPattern();
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
{
// Look for any .egg-info directories with PKG-INFO
return vfs.EnumerateFiles(path, "*.egg-info/PKG-INFO").Any();
}
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
{
// Find all .egg-info directories with PKG-INFO
var pkgInfoFiles = vfs.EnumerateFiles(path, "*.egg-info/PKG-INFO").ToList();
foreach (var pkgInfoFile in pkgInfoFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var eggInfoPath = Path.GetDirectoryName(pkgInfoFile.VirtualPath) ?? string.Empty;
var eggInfoName = Path.GetFileName(eggInfoPath);
var match = EggInfoDirPattern().Match(eggInfoName);
if (!match.Success)
{
continue;
}
var packageName = match.Groups["name"].Value;
var version = match.Groups["version"].Value;
// Read PKG-INFO file (same format as METADATA)
var metadata = await ReadPkgInfoAsync(vfs, pkgInfoFile, cancellationToken).ConfigureAwait(false);
// Use metadata name if available (more accurate)
if (metadata.TryGetValue("Name", out var metadataName) && !string.IsNullOrEmpty(metadataName))
{
packageName = metadataName;
}
if (metadata.TryGetValue("Version", out var metadataVersion) && !string.IsNullOrEmpty(metadataVersion))
{
version = metadataVersion;
}
// Read top_level.txt
var topLevelModules = await ReadTopLevelAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read SOURCES.txt (file manifest for egg-info)
var sourceFiles = await ReadSourcesAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read requires.txt (egg-info dependencies format)
var dependencies = await ReadRequiresAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read installed-files.txt if available
var installedFiles = await ReadInstalledFilesAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Read entry_points.txt
var entryPoints = await ReadEntryPointsAsync(vfs, eggInfoPath, cancellationToken).ConfigureAwait(false);
// Extract extras from requires.txt sections
var extras = ExtractExtras(dependencies);
// Determine confidence based on available metadata
var confidence = sourceFiles.Length > 0 || installedFiles.Length > 0
? PythonPackageConfidence.Definitive
: PythonPackageConfidence.High;
// Convert sources/installed files to record entries
var recordFiles = ConvertToRecordEntries(sourceFiles, installedFiles);
yield return new PythonPackageInfo(
Name: packageName,
Version: version,
Kind: PythonPackageKind.Egg,
Location: path,
MetadataPath: eggInfoPath,
TopLevelModules: topLevelModules,
Dependencies: FilterBaseDependencies(dependencies),
Extras: extras,
RecordFiles: recordFiles,
InstallerTool: DetectInstaller(metadata),
EditableTarget: null,
IsDirectDependency: false, // Will be corrected by dependency graph analysis
Confidence: confidence);
}
}
private static async Task<Dictionary<string, string>> ReadPkgInfoAsync(
PythonVirtualFileSystem vfs,
PythonVirtualFile file,
CancellationToken cancellationToken)
{
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
try
{
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return result;
}
using var reader = new StreamReader(stream);
string? currentKey = null;
var currentValue = new System.Text.StringBuilder();
string? line;
while ((line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false)) is not null)
{
cancellationToken.ThrowIfCancellationRequested();
// Empty line marks end of headers
if (string.IsNullOrEmpty(line))
{
if (currentKey is not null)
{
result[currentKey] = currentValue.ToString().Trim();
}
break;
}
// Continuation line
if (line.StartsWith(' ') || line.StartsWith('\t'))
{
currentValue.AppendLine(line.Trim());
continue;
}
// New header
var colonIdx = line.IndexOf(':');
if (colonIdx > 0)
{
if (currentKey is not null)
{
if (!result.ContainsKey(currentKey))
{
result[currentKey] = currentValue.ToString().Trim();
}
else
{
// Multi-value header
result[currentKey] = result[currentKey] + "\n" + currentValue.ToString().Trim();
}
}
currentKey = line[..colonIdx].Trim();
currentValue.Clear();
currentValue.Append(line[(colonIdx + 1)..].Trim());
}
}
if (currentKey is not null && !result.ContainsKey(currentKey))
{
result[currentKey] = currentValue.ToString().Trim();
}
}
catch (IOException)
{
// Ignore read errors
}
return result;
}
private static async Task<ImmutableArray<string>> ReadTopLevelAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var topLevelPath = $"{eggInfoPath}/top_level.txt";
try
{
using var stream = await vfs.OpenReadAsync(topLevelPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadSourcesAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var sourcesPath = $"{eggInfoPath}/SOURCES.txt";
try
{
using var stream = await vfs.OpenReadAsync(sourcesPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadInstalledFilesAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var installedPath = $"{eggInfoPath}/installed-files.txt";
try
{
using var stream = await vfs.OpenReadAsync(installedPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadRequiresAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var requiresPath = $"{eggInfoPath}/requires.txt";
try
{
using var stream = await vfs.OpenReadAsync(requiresPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
// requires.txt format:
// base_dependency>=1.0
// [extra1]
// extra_dep>=2.0
// [extra2:python_version<"3.0"]
// conditional_dep
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
var result = new List<string>();
string? currentExtra = null;
foreach (var line in lines)
{
if (line.StartsWith('[') && line.EndsWith(']'))
{
// This is an extra section
currentExtra = line[1..^1];
// Handle conditional extras like [extra:condition]
var colonIdx = currentExtra.IndexOf(':');
if (colonIdx > 0)
{
currentExtra = currentExtra[..colonIdx];
}
continue;
}
if (currentExtra is not null)
{
// Dependency in an extra section - add with marker
result.Add($"{line} ; extra == \"{currentExtra}\"");
}
else
{
// Base dependency
result.Add(line);
}
}
return result.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ReadEntryPointsAsync(
PythonVirtualFileSystem vfs,
string eggInfoPath,
CancellationToken cancellationToken)
{
var entryPointsPath = $"{eggInfoPath}/entry_points.txt";
try
{
using var stream = await vfs.OpenReadAsync(entryPointsPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
// entry_points.txt is INI format:
// [console_scripts]
// script_name = module:function
var result = new List<string>();
foreach (var line in content.Split('\n'))
{
var trimmed = line.Trim();
if (!string.IsNullOrEmpty(trimmed) && !trimmed.StartsWith('[') && trimmed.Contains('='))
{
result.Add(trimmed);
}
}
return result.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static ImmutableArray<string> FilterBaseDependencies(ImmutableArray<string> dependencies)
{
// Return only base dependencies (without extra markers)
return dependencies
.Where(d => !d.Contains("extra ==", StringComparison.OrdinalIgnoreCase))
.ToImmutableArray();
}
private static ImmutableArray<string> ExtractExtras(ImmutableArray<string> dependencies)
{
// Extract unique extra names from dependency markers
var extras = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var dep in dependencies)
{
var extraMatch = System.Text.RegularExpressions.Regex.Match(dep, @"extra\s*==\s*""([^""]+)""");
if (extraMatch.Success)
{
extras.Add(extraMatch.Groups[1].Value);
}
}
return extras.ToImmutableArray();
}
private static ImmutableArray<PythonRecordEntry> ConvertToRecordEntries(
ImmutableArray<string> sources,
ImmutableArray<string> installedFiles)
{
// Prefer installed-files.txt as it's more accurate for installed packages
var files = installedFiles.Length > 0 ? installedFiles : sources;
return files
.Select(f => new PythonRecordEntry(f, null, null))
.ToImmutableArray();
}
private static string? DetectInstaller(Dictionary<string, string> metadata)
{
// Try to detect the installer from metadata
if (metadata.TryGetValue("Installer", out var installer))
{
return installer;
}
// Egg-info is typically from setuptools
return "setuptools";
}
}

View File

@@ -240,6 +240,7 @@ internal sealed class PythonPackageDiscovery
new PoetryAdapter(),
new PipEditableAdapter(),
new DistInfoAdapter(),
new EggInfoAdapter(),
new CondaAdapter(),
new ContainerLayerAdapter()
};

View File

@@ -802,7 +802,7 @@ internal static class PythonRecordParser
continue;
}
entries.Add(new PythonRecordEntry(trimmed, hashAlgorithm: null, hashValue: null, size: null));
entries.Add(new PythonRecordEntry(trimmed, null, null, null));
}
return entries;