feat: Add native binary analyzer test utilities and implement SM2 signing tests
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Manifest Integrity / Audit SHA256SUMS Files (push) Has been cancelled
Manifest Integrity / Validate Schema Integrity (push) Has been cancelled
Manifest Integrity / Validate Contract Documents (push) Has been cancelled
Manifest Integrity / Validate Pack Fixtures (push) Has been cancelled
Manifest Integrity / Verify Merkle Roots (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled

- Introduced `NativeTestBase` class for ELF, PE, and Mach-O binary parsing helpers and assertions.
- Created `TestCryptoFactory` for SM2 cryptographic provider setup and key generation.
- Implemented `Sm2SigningTests` to validate signing functionality with environment gate checks.
- Developed console export service and store with comprehensive unit tests for export status management.
This commit is contained in:
StellaOps Bot
2025-12-07 13:12:41 +02:00
parent d907729778
commit e53a282fbe
387 changed files with 21941 additions and 1518 deletions

View File

@@ -0,0 +1,558 @@
using System.Collections.Immutable;
using System.Text;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
/// <summary>
/// Analyzes native extension binaries for shared library dependencies.
/// </summary>
internal sealed partial class NativeLibraryAnalyzer
{
// ELF magic number
private static ReadOnlySpan<byte> ElfMagic => [0x7F, 0x45, 0x4C, 0x46];
// PE magic number (MZ)
private static ReadOnlySpan<byte> PeMagic => [0x4D, 0x5A];
// Mach-O magic numbers
private const uint MachOMagic32 = 0xFEEDFACE;
private const uint MachOMagic64 = 0xFEEDFACF;
private const uint MachOMagic32Swap = 0xCEFAEDFE;
private const uint MachOMagic64Swap = 0xCFFAEDFE;
// ELF dynamic section types
private const int DT_NULL = 0;
private const int DT_NEEDED = 1;
private const int DT_STRTAB = 5;
// Mach-O load command types
private const uint LC_LOAD_DYLIB = 0x0C;
private const uint LC_LOAD_WEAK_DYLIB = 0x18;
private const uint LC_REEXPORT_DYLIB = 0x1F;
private const uint LC_LAZY_LOAD_DYLIB = 0x20;
// Pattern for ctypes.CDLL usage
[GeneratedRegex(
@"(?:ctypes\.)?(?:CDLL|cdll\.LoadLibrary|windll\.LoadLibrary|WinDLL)\s*\(\s*['""]([^'""]+)['""]",
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex CtypesLoadPattern();
// Pattern for cffi ffi.dlopen
[GeneratedRegex(
@"ffi\.dlopen\s*\(\s*['""]([^'""]+)['""]",
RegexOptions.Compiled)]
private static partial Regex CffiDlopenPattern();
/// <summary>
/// Analyzes a binary file for native library dependencies.
/// </summary>
public async Task<ImmutableArray<string>> AnalyzeBinaryAsync(
PythonVirtualFileSystem vfs,
string path,
CancellationToken cancellationToken = default)
{
await using var stream = await vfs.OpenReadAsync(path, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
// Read enough for magic detection
var header = new byte[64];
var bytesRead = await stream.ReadAsync(header, cancellationToken).ConfigureAwait(false);
if (bytesRead < 4)
{
return ImmutableArray<string>.Empty;
}
// Reset stream position
stream.Position = 0;
// Detect format and parse
if (IsElf(header))
{
return await ParseElfDependenciesAsync(stream, cancellationToken).ConfigureAwait(false);
}
if (IsPe(header))
{
return await ParsePeDependenciesAsync(stream, cancellationToken).ConfigureAwait(false);
}
if (IsMachO(header))
{
return await ParseMachODependenciesAsync(stream, cancellationToken).ConfigureAwait(false);
}
return ImmutableArray<string>.Empty;
}
/// <summary>
/// Detects ctypes/cffi library loading patterns in Python source.
/// </summary>
public async Task<ImmutableArray<string>> DetectSourceDependenciesAsync(
PythonVirtualFileSystem vfs,
string packagePath,
CancellationToken cancellationToken = default)
{
var dependencies = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
// Search for Python files
var pythonFiles = vfs.EnumerateFiles(packagePath, "*.py").ToList();
foreach (var pyFile in pythonFiles)
{
await using var stream = await vfs.OpenReadAsync(pyFile.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null) continue;
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
// Check for ctypes usage
foreach (Match match in CtypesLoadPattern().Matches(content))
{
var libName = match.Groups[1].Value;
if (!string.IsNullOrWhiteSpace(libName))
{
dependencies.Add(NormalizeLibraryName(libName));
}
}
// Check for cffi dlopen
foreach (Match match in CffiDlopenPattern().Matches(content))
{
var libName = match.Groups[1].Value;
if (!string.IsNullOrWhiteSpace(libName))
{
dependencies.Add(NormalizeLibraryName(libName));
}
}
}
return [.. dependencies.OrderBy(d => d)];
}
private static bool IsElf(ReadOnlySpan<byte> header) =>
header.Length >= 4 && header[..4].SequenceEqual(ElfMagic);
private static bool IsPe(ReadOnlySpan<byte> header) =>
header.Length >= 2 && header[..2].SequenceEqual(PeMagic);
private static bool IsMachO(ReadOnlySpan<byte> header)
{
if (header.Length < 4) return false;
var magic = BitConverter.ToUInt32(header[..4]);
return magic is MachOMagic32 or MachOMagic64 or MachOMagic32Swap or MachOMagic64Swap;
}
private static async Task<ImmutableArray<string>> ParseElfDependenciesAsync(
Stream stream,
CancellationToken cancellationToken)
{
try
{
var dependencies = new List<string>();
// Read ELF header
var headerBytes = new byte[64];
await stream.ReadExactlyAsync(headerBytes, cancellationToken).ConfigureAwait(false);
var is64Bit = headerBytes[4] == 2; // EI_CLASS
var isLittleEndian = headerBytes[5] == 1; // EI_DATA
// Get program header info
ulong phOffset;
ushort phEntSize, phNum;
if (is64Bit)
{
phOffset = ReadUInt64(headerBytes.AsSpan(32), isLittleEndian);
phEntSize = ReadUInt16(headerBytes.AsSpan(54), isLittleEndian);
phNum = ReadUInt16(headerBytes.AsSpan(56), isLittleEndian);
}
else
{
phOffset = ReadUInt32(headerBytes.AsSpan(28), isLittleEndian);
phEntSize = ReadUInt16(headerBytes.AsSpan(42), isLittleEndian);
phNum = ReadUInt16(headerBytes.AsSpan(44), isLittleEndian);
}
// Find PT_DYNAMIC segment
ulong dynamicOffset = 0;
ulong dynamicSize = 0;
stream.Position = (long)phOffset;
var phBuffer = new byte[phEntSize];
for (int i = 0; i < phNum; i++)
{
await stream.ReadExactlyAsync(phBuffer, cancellationToken).ConfigureAwait(false);
uint pType = ReadUInt32(phBuffer.AsSpan(0), isLittleEndian);
if (pType == 2) // PT_DYNAMIC
{
if (is64Bit)
{
dynamicOffset = ReadUInt64(phBuffer.AsSpan(8), isLittleEndian);
dynamicSize = ReadUInt64(phBuffer.AsSpan(32), isLittleEndian);
}
else
{
dynamicOffset = ReadUInt32(phBuffer.AsSpan(4), isLittleEndian);
dynamicSize = ReadUInt32(phBuffer.AsSpan(16), isLittleEndian);
}
break;
}
}
if (dynamicOffset == 0)
{
return ImmutableArray<string>.Empty;
}
// Parse dynamic section
stream.Position = (long)dynamicOffset;
var dynEntrySize = is64Bit ? 16 : 8;
var dynBuffer = new byte[dynEntrySize];
var neededOffsets = new List<ulong>();
ulong strTabOffset = 0;
while (stream.Position < (long)(dynamicOffset + dynamicSize))
{
await stream.ReadExactlyAsync(dynBuffer, cancellationToken).ConfigureAwait(false);
long tag;
ulong val;
if (is64Bit)
{
tag = (long)ReadUInt64(dynBuffer.AsSpan(0), isLittleEndian);
val = ReadUInt64(dynBuffer.AsSpan(8), isLittleEndian);
}
else
{
tag = (int)ReadUInt32(dynBuffer.AsSpan(0), isLittleEndian);
val = ReadUInt32(dynBuffer.AsSpan(4), isLittleEndian);
}
if (tag == DT_NULL)
break;
if (tag == DT_NEEDED)
neededOffsets.Add(val);
else if (tag == DT_STRTAB)
strTabOffset = val;
}
// Read library names from string table
foreach (var offset in neededOffsets)
{
stream.Position = (long)(strTabOffset + offset);
var name = await ReadNullTerminatedStringAsync(stream, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(name))
{
dependencies.Add(name);
}
}
return [.. dependencies];
}
catch
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ParsePeDependenciesAsync(
Stream stream,
CancellationToken cancellationToken)
{
try
{
var dependencies = new List<string>();
// Read DOS header
var dosHeader = new byte[64];
await stream.ReadExactlyAsync(dosHeader, cancellationToken).ConfigureAwait(false);
// Get PE header offset
var peOffset = BitConverter.ToInt32(dosHeader, 60);
// Read PE signature and COFF header
stream.Position = peOffset;
var peSignature = new byte[4];
await stream.ReadExactlyAsync(peSignature, cancellationToken).ConfigureAwait(false);
if (peSignature[0] != 'P' || peSignature[1] != 'E')
{
return ImmutableArray<string>.Empty;
}
var coffHeader = new byte[20];
await stream.ReadExactlyAsync(coffHeader, cancellationToken).ConfigureAwait(false);
var sizeOfOptionalHeader = BitConverter.ToUInt16(coffHeader, 16);
// Read optional header magic
var optionalMagic = new byte[2];
await stream.ReadExactlyAsync(optionalMagic, cancellationToken).ConfigureAwait(false);
var is64Bit = BitConverter.ToUInt16(optionalMagic, 0) == 0x20B;
// Skip to data directories
var dataDirectoryOffset = is64Bit ? 108 : 92;
stream.Position = peOffset + 24 + dataDirectoryOffset;
// Skip past first entry (Export), read Import directory entry
stream.Position += 8; // Skip Export
var importRva = new byte[8];
await stream.ReadExactlyAsync(importRva, cancellationToken).ConfigureAwait(false);
var importVirtualAddress = BitConverter.ToUInt32(importRva, 0);
var importSize = BitConverter.ToUInt32(importRva, 4);
if (importVirtualAddress == 0)
{
return ImmutableArray<string>.Empty;
}
// Read section headers to find file offset for import RVA
stream.Position = peOffset + 24 + sizeOfOptionalHeader;
var numberOfSections = BitConverter.ToUInt16(coffHeader, 2);
var sectionHeader = new byte[40];
uint importFileOffset = 0;
uint sectionVirtualAddress = 0;
uint sectionRawDataPointer = 0;
for (int i = 0; i < numberOfSections; i++)
{
await stream.ReadExactlyAsync(sectionHeader, cancellationToken).ConfigureAwait(false);
var virtAddr = BitConverter.ToUInt32(sectionHeader, 12);
var virtSize = BitConverter.ToUInt32(sectionHeader, 8);
var rawPtr = BitConverter.ToUInt32(sectionHeader, 20);
if (importVirtualAddress >= virtAddr && importVirtualAddress < virtAddr + virtSize)
{
sectionVirtualAddress = virtAddr;
sectionRawDataPointer = rawPtr;
importFileOffset = rawPtr + (importVirtualAddress - virtAddr);
break;
}
}
if (importFileOffset == 0)
{
return ImmutableArray<string>.Empty;
}
// Parse import directory
stream.Position = importFileOffset;
var importEntry = new byte[20];
while (true)
{
await stream.ReadExactlyAsync(importEntry, cancellationToken).ConfigureAwait(false);
var nameRva = BitConverter.ToUInt32(importEntry, 12);
if (nameRva == 0)
break;
var nameFileOffset = sectionRawDataPointer + (nameRva - sectionVirtualAddress);
var currentPos = stream.Position;
stream.Position = nameFileOffset;
var dllName = await ReadNullTerminatedStringAsync(stream, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(dllName))
{
dependencies.Add(dllName);
}
stream.Position = currentPos;
}
return [.. dependencies];
}
catch
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<string>> ParseMachODependenciesAsync(
Stream stream,
CancellationToken cancellationToken)
{
try
{
var dependencies = new List<string>();
// Read Mach-O header
var headerBytes = new byte[32];
await stream.ReadExactlyAsync(headerBytes, cancellationToken).ConfigureAwait(false);
var magic = BitConverter.ToUInt32(headerBytes, 0);
var isSwapped = magic is MachOMagic32Swap or MachOMagic64Swap;
var is64Bit = magic is MachOMagic64 or MachOMagic64Swap;
var ncmds = ReadUInt32Macho(headerBytes.AsSpan(16), isSwapped);
var sizeofcmds = ReadUInt32Macho(headerBytes.AsSpan(20), isSwapped);
// Skip to load commands
var loadCommandOffset = is64Bit ? 32 : 28;
stream.Position = loadCommandOffset;
var cmdBuffer = new byte[8];
for (uint i = 0; i < ncmds; i++)
{
var cmdStart = stream.Position;
await stream.ReadExactlyAsync(cmdBuffer, cancellationToken).ConfigureAwait(false);
var cmd = ReadUInt32Macho(cmdBuffer.AsSpan(0), isSwapped);
var cmdsize = ReadUInt32Macho(cmdBuffer.AsSpan(4), isSwapped);
if (cmd is LC_LOAD_DYLIB or LC_LOAD_WEAK_DYLIB or LC_REEXPORT_DYLIB or LC_LAZY_LOAD_DYLIB)
{
// Read dylib_command structure
var dylibNameOffset = new byte[4];
await stream.ReadExactlyAsync(dylibNameOffset, cancellationToken).ConfigureAwait(false);
var nameOffset = ReadUInt32Macho(dylibNameOffset.AsSpan(0), isSwapped);
// Read the library name
stream.Position = cmdStart + nameOffset;
var libName = await ReadNullTerminatedStringAsync(stream, cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(libName))
{
// Extract just the filename from paths like /usr/lib/libSystem.B.dylib
var fileName = Path.GetFileName(libName);
dependencies.Add(fileName);
}
}
stream.Position = cmdStart + cmdsize;
}
return [.. dependencies];
}
catch
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<string> ReadNullTerminatedStringAsync(
Stream stream,
CancellationToken cancellationToken)
{
var bytes = new List<byte>();
var buffer = new byte[1];
while (await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false) == 1)
{
if (buffer[0] == 0)
break;
bytes.Add(buffer[0]);
if (bytes.Count > 256) // Sanity limit
break;
}
return Encoding.UTF8.GetString(bytes.ToArray());
}
private static ushort ReadUInt16(ReadOnlySpan<byte> span, bool littleEndian)
{
var value = BitConverter.ToUInt16(span);
if (!littleEndian && BitConverter.IsLittleEndian)
{
value = BinaryPrimitives.ReverseEndianness(value);
}
return value;
}
private static uint ReadUInt32(ReadOnlySpan<byte> span, bool littleEndian)
{
var value = BitConverter.ToUInt32(span);
if (!littleEndian && BitConverter.IsLittleEndian)
{
value = BinaryPrimitives.ReverseEndianness(value);
}
return value;
}
private static ulong ReadUInt64(ReadOnlySpan<byte> span, bool littleEndian)
{
var value = BitConverter.ToUInt64(span);
if (!littleEndian && BitConverter.IsLittleEndian)
{
value = BinaryPrimitives.ReverseEndianness(value);
}
return value;
}
private static uint ReadUInt32Macho(ReadOnlySpan<byte> span, bool isSwapped)
{
var value = BitConverter.ToUInt32(span);
if (isSwapped)
{
value = BinaryPrimitives.ReverseEndianness(value);
}
return value;
}
private static string NormalizeLibraryName(string name)
{
// Clean up library names
var normalized = name.Trim();
// Handle common path patterns
if (normalized.Contains('/') || normalized.Contains('\\'))
{
normalized = Path.GetFileName(normalized);
}
// Remove version suffixes like .so.1.2.3
var match = Regex.Match(normalized, @"^(lib[^.]+\.(?:so|dylib|dll))");
if (match.Success)
{
return match.Groups[1].Value;
}
return normalized;
}
}
/// <summary>
/// Binary primitives helper for endianness conversion.
/// </summary>
file static class BinaryPrimitives
{
public static ushort ReverseEndianness(ushort value) =>
(ushort)((value >> 8) | (value << 8));
public static uint ReverseEndianness(uint value) =>
((value >> 24) & 0xFF) |
((value >> 8) & 0xFF00) |
((value << 8) & 0xFF0000) |
((value << 24) & 0xFF000000);
public static ulong ReverseEndianness(ulong value) =>
((value >> 56) & 0xFF) |
((value >> 40) & 0xFF00) |
((value >> 24) & 0xFF0000) |
((value >> 8) & 0xFF000000) |
((value << 8) & 0xFF00000000) |
((value << 24) & 0xFF0000000000) |
((value << 40) & 0xFF000000000000) |
((value << 56) & 0xFF00000000000000);
}

View File

@@ -78,6 +78,38 @@ internal sealed record PythonNativeExtension(
}
}
/// <summary>
/// Result of native extension analysis for a package.
/// </summary>
/// <param name="Extensions">All detected native extensions.</param>
/// <param name="BinaryDependencies">Native library dependencies detected from binary analysis.</param>
/// <param name="SourceDependencies">Native library dependencies detected from source code (ctypes/cffi patterns).</param>
/// <param name="AllDependencies">Combined and deduplicated list of all native dependencies.</param>
internal sealed record NativeExtensionAnalysis(
ImmutableArray<PythonNativeExtension> Extensions,
ImmutableArray<string> BinaryDependencies,
ImmutableArray<string> SourceDependencies,
ImmutableArray<string> AllDependencies)
{
/// <summary>
/// Gets whether this package has any native extensions.
/// </summary>
public bool HasNativeExtensions => Extensions.Length > 0;
/// <summary>
/// Gets whether this package has any native library dependencies.
/// </summary>
public bool HasNativeDependencies => AllDependencies.Length > 0;
/// <summary>
/// Gets all unique platforms targeted by the extensions.
/// </summary>
public IEnumerable<string> TargetedPlatforms => Extensions
.Select(e => e.Platform)
.Where(p => p is not null)
.Distinct()!;
}
/// <summary>
/// The type of native extension.
/// </summary>

View File

@@ -9,6 +9,7 @@ namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
/// </summary>
internal sealed partial class PythonNativeExtensionScanner
{
private readonly NativeLibraryAnalyzer _libraryAnalyzer = new();
// Pattern to extract module name and platform info from extension filenames
// Examples: numpy.core._multiarray_umath.cpython-311-x86_64-linux-gnu.so
// _ssl.cpython-311-darwin.so
@@ -39,7 +40,7 @@ internal sealed partial class PythonNativeExtensionScanner
private static partial Regex PyO3Pattern();
/// <summary>
/// Scans the VFS for native extensions.
/// Scans the VFS for native extensions (without dependency analysis).
/// </summary>
public IEnumerable<PythonNativeExtension> Scan(PythonVirtualFileSystem vfs)
{
@@ -51,7 +52,7 @@ internal sealed partial class PythonNativeExtensionScanner
foreach (var file in extensionFiles)
{
var extension = ParseExtensionFile(file);
var extension = ParseExtensionFile(file, ImmutableArray<string>.Empty);
if (extension is not null)
{
yield return extension;
@@ -77,6 +78,97 @@ internal sealed partial class PythonNativeExtensionScanner
}
}
/// <summary>
/// Scans the VFS for native extensions with full dependency analysis.
/// </summary>
public async Task<ImmutableArray<PythonNativeExtension>> ScanWithDependenciesAsync(
PythonVirtualFileSystem vfs,
CancellationToken cancellationToken = default)
{
var extensions = new List<PythonNativeExtension>();
// Find all .so and .pyd files
var extensionFiles = vfs.Files
.Where(f => f.VirtualPath.EndsWith(".so", StringComparison.OrdinalIgnoreCase) ||
f.VirtualPath.EndsWith(".pyd", StringComparison.OrdinalIgnoreCase))
.ToList();
foreach (var file in extensionFiles)
{
// Analyze native dependencies
var dependencies = await _libraryAnalyzer.AnalyzeBinaryAsync(
vfs,
file.VirtualPath,
cancellationToken).ConfigureAwait(false);
var extension = ParseExtensionFile(file, dependencies);
if (extension is not null)
{
extensions.Add(extension);
}
}
// Find WASM files (no native dependencies to analyze)
var wasmFiles = vfs.Files
.Where(f => f.VirtualPath.EndsWith(".wasm", StringComparison.OrdinalIgnoreCase))
.ToList();
foreach (var file in wasmFiles)
{
extensions.Add(new PythonNativeExtension(
ModuleName: Path.GetFileNameWithoutExtension(file.VirtualPath),
Path: file.VirtualPath,
Kind: PythonNativeExtensionKind.Wasm,
Platform: null,
Architecture: "wasm32",
Source: file.Source,
PackageName: ExtractPackageName(file.VirtualPath),
Dependencies: ImmutableArray<string>.Empty));
}
return [.. extensions];
}
/// <summary>
/// Gets combined native dependencies from both binaries and source code patterns.
/// </summary>
public async Task<NativeExtensionAnalysis> AnalyzeAsync(
PythonVirtualFileSystem vfs,
string? packagePath = null,
CancellationToken cancellationToken = default)
{
var extensions = await ScanWithDependenciesAsync(vfs, cancellationToken).ConfigureAwait(false);
// Collect all binary dependencies
var binaryDependencies = extensions
.SelectMany(e => e.Dependencies)
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
// Detect source-level dependencies (ctypes, cffi patterns)
var sourceDependencies = ImmutableArray<string>.Empty;
if (!string.IsNullOrEmpty(packagePath))
{
sourceDependencies = await _libraryAnalyzer.DetectSourceDependenciesAsync(
vfs,
packagePath,
cancellationToken).ConfigureAwait(false);
}
// Combine and deduplicate
var allDependencies = binaryDependencies
.Concat(sourceDependencies)
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(d => d)
.ToImmutableArray();
return new NativeExtensionAnalysis(
Extensions: extensions,
BinaryDependencies: binaryDependencies,
SourceDependencies: sourceDependencies,
AllDependencies: allDependencies);
}
/// <summary>
/// Detects the kind of native extension from source files in the package.
/// </summary>
@@ -139,7 +231,9 @@ internal sealed partial class PythonNativeExtensionScanner
return PythonNativeExtensionKind.CExtension;
}
private static PythonNativeExtension? ParseExtensionFile(PythonVirtualFile file)
private static PythonNativeExtension? ParseExtensionFile(
PythonVirtualFile file,
ImmutableArray<string> dependencies)
{
var fileName = Path.GetFileName(file.VirtualPath);
var match = ExtensionFilePattern().Match(fileName);
@@ -187,7 +281,7 @@ internal sealed partial class PythonNativeExtensionScanner
Architecture: architecture,
Source: file.Source,
PackageName: ExtractPackageName(file.VirtualPath),
Dependencies: ImmutableArray<string>.Empty);
Dependencies: dependencies);
}
private static (string? Platform, string? Architecture) ParsePlatformString(string platformStr)

View File

@@ -0,0 +1,338 @@
using System.Collections.Frozen;
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Dependencies;
/// <summary>
/// Represents a dependency graph of Python packages.
/// </summary>
internal sealed class DependencyGraph
{
private readonly Dictionary<string, DependencyNode> _nodes = new(StringComparer.OrdinalIgnoreCase);
private readonly List<DependencyEdge> _edges = [];
/// <summary>
/// All nodes in the graph.
/// </summary>
public IReadOnlyDictionary<string, DependencyNode> Nodes => _nodes;
/// <summary>
/// All edges in the graph.
/// </summary>
public IReadOnlyList<DependencyEdge> Edges => _edges;
/// <summary>
/// Root packages (those not depended upon by any other package).
/// </summary>
public IEnumerable<DependencyNode> RootNodes =>
_nodes.Values.Where(n => !_edges.Any(e => e.To == n.NormalizedName));
/// <summary>
/// Adds or updates a node in the graph.
/// </summary>
public DependencyNode AddNode(string name, string? version = null, bool isInstalled = false)
{
var normalizedName = NormalizeName(name);
if (!_nodes.TryGetValue(normalizedName, out var node))
{
node = new DependencyNode(
Name: name,
NormalizedName: normalizedName,
Version: version,
IsInstalled: isInstalled,
Depth: -1, // Will be calculated later
TransitiveDependencyCount: 0);
_nodes[normalizedName] = node;
}
else if (version is not null && node.Version is null)
{
// Update with version if we have it now
node = node with { Version = version, IsInstalled = isInstalled };
_nodes[normalizedName] = node;
}
return node;
}
/// <summary>
/// Adds a dependency edge from one package to another.
/// </summary>
public void AddEdge(string from, string to, string? versionConstraint = null, bool isOptional = false)
{
var fromNormalized = NormalizeName(from);
var toNormalized = NormalizeName(to);
// Ensure both nodes exist
AddNode(from);
AddNode(to);
var edge = new DependencyEdge(
From: fromNormalized,
To: toNormalized,
VersionConstraint: versionConstraint,
IsOptional: isOptional);
if (!_edges.Contains(edge))
{
_edges.Add(edge);
}
}
/// <summary>
/// Gets direct dependencies of a package.
/// </summary>
public IEnumerable<string> GetDirectDependencies(string name)
{
var normalizedName = NormalizeName(name);
return _edges
.Where(e => e.From.Equals(normalizedName, StringComparison.OrdinalIgnoreCase))
.Select(e => e.To);
}
/// <summary>
/// Gets packages that depend on a given package.
/// </summary>
public IEnumerable<string> GetDependents(string name)
{
var normalizedName = NormalizeName(name);
return _edges
.Where(e => e.To.Equals(normalizedName, StringComparison.OrdinalIgnoreCase))
.Select(e => e.From);
}
/// <summary>
/// Calculates the transitive closure (all transitive dependencies) for each package.
/// </summary>
public ImmutableDictionary<string, ImmutableHashSet<string>> CalculateTransitiveClosure()
{
var closure = new Dictionary<string, HashSet<string>>(StringComparer.OrdinalIgnoreCase);
foreach (var node in _nodes.Keys)
{
closure[node] = [];
CalculateTransitiveClosureRecursive(node, closure[node], []);
}
return closure.ToImmutableDictionary(
kvp => kvp.Key,
kvp => kvp.Value.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase),
StringComparer.OrdinalIgnoreCase);
}
private void CalculateTransitiveClosureRecursive(
string node,
HashSet<string> closure,
HashSet<string> visited)
{
if (!visited.Add(node))
{
return; // Already processed or circular
}
foreach (var dep in GetDirectDependencies(node))
{
closure.Add(dep);
CalculateTransitiveClosureRecursive(dep, closure, visited);
}
}
/// <summary>
/// Detects circular dependencies in the graph.
/// </summary>
public ImmutableArray<ImmutableArray<string>> DetectCycles()
{
var cycles = new List<ImmutableArray<string>>();
var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var recursionStack = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var path = new List<string>();
foreach (var node in _nodes.Keys)
{
if (!visited.Contains(node))
{
DetectCyclesRecursive(node, visited, recursionStack, path, cycles);
}
}
return [.. cycles];
}
private void DetectCyclesRecursive(
string node,
HashSet<string> visited,
HashSet<string> recursionStack,
List<string> path,
List<ImmutableArray<string>> cycles)
{
visited.Add(node);
recursionStack.Add(node);
path.Add(node);
foreach (var neighbor in GetDirectDependencies(node))
{
if (!visited.Contains(neighbor))
{
DetectCyclesRecursive(neighbor, visited, recursionStack, path, cycles);
}
else if (recursionStack.Contains(neighbor))
{
// Found a cycle - extract the cycle from the path
var cycleStart = path.IndexOf(neighbor);
if (cycleStart >= 0)
{
var cycle = path.Skip(cycleStart).Append(neighbor).ToImmutableArray();
cycles.Add(cycle);
}
}
}
path.RemoveAt(path.Count - 1);
recursionStack.Remove(node);
}
/// <summary>
/// Calculates the depth of each package in the dependency tree.
/// Depth 0 means it's a direct/root dependency.
/// </summary>
public void CalculateDepths()
{
// Find root nodes (packages not depended upon by others)
var roots = RootNodes.ToList();
// Reset all depths
foreach (var key in _nodes.Keys.ToList())
{
_nodes[key] = _nodes[key] with { Depth = -1 };
}
// BFS from roots
var queue = new Queue<(string Node, int Depth)>();
var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var root in roots)
{
queue.Enqueue((root.NormalizedName, 0));
}
while (queue.Count > 0)
{
var (node, depth) = queue.Dequeue();
if (!visited.Add(node))
{
continue;
}
if (_nodes.TryGetValue(node, out var nodeData))
{
_nodes[node] = nodeData with { Depth = depth };
}
foreach (var dep in GetDirectDependencies(node))
{
if (!visited.Contains(dep))
{
queue.Enqueue((dep, depth + 1));
}
}
}
}
/// <summary>
/// Calculates transitive dependency counts for all nodes.
/// </summary>
public void CalculateTransitiveCounts()
{
var closure = CalculateTransitiveClosure();
foreach (var (name, deps) in closure)
{
if (_nodes.TryGetValue(name, out var node))
{
_nodes[name] = node with { TransitiveDependencyCount = deps.Count };
}
}
}
/// <summary>
/// Performs topological sort on the graph.
/// Returns packages in order such that dependencies come before dependents.
/// </summary>
public ImmutableArray<string> TopologicalSort()
{
var result = new List<string>();
var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var temp = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var node in _nodes.Keys)
{
if (!TopologicalSortVisit(node, visited, temp, result))
{
// Cycle detected - return empty
return ImmutableArray<string>.Empty;
}
}
// Result already has dependencies before dependents (post-order DFS)
return [.. result];
}
private bool TopologicalSortVisit(
string node,
HashSet<string> visited,
HashSet<string> temp,
List<string> result)
{
if (temp.Contains(node))
{
return false; // Cycle
}
if (visited.Contains(node))
{
return true;
}
temp.Add(node);
foreach (var dep in GetDirectDependencies(node))
{
if (!TopologicalSortVisit(dep, visited, temp, result))
{
return false;
}
}
temp.Remove(node);
visited.Add(node);
result.Add(node);
return true;
}
/// <summary>
/// Normalizes a package name for comparison.
/// </summary>
private static string NormalizeName(string name) =>
name.ToLowerInvariant().Replace('-', '_').Replace('.', '_');
}
/// <summary>
/// Represents a package node in the dependency graph.
/// </summary>
internal sealed record DependencyNode(
string Name,
string NormalizedName,
string? Version,
bool IsInstalled,
int Depth,
int TransitiveDependencyCount);
/// <summary>
/// Represents a dependency edge in the graph.
/// </summary>
internal sealed record DependencyEdge(
string From,
string To,
string? VersionConstraint,
bool IsOptional);

View File

@@ -0,0 +1,254 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Dependencies;
/// <summary>
/// Resolves transitive dependencies for Python packages.
/// </summary>
internal sealed partial class TransitiveDependencyResolver
{
// Pattern to parse PEP 508 dependency specification
// Examples: requests>=2.0, flask[async]<3.0, django>=3.2,<4.0; python_version>='3.8'
[GeneratedRegex(
@"^(?<name>[a-zA-Z0-9](?:[a-zA-Z0-9._-]*[a-zA-Z0-9])?)(?:\s*\[(?<extras>[^\]]+)\])?\s*(?<constraint>(?:[<>=!~]=?\s*\S+(?:\s*,\s*[<>=!~]=?\s*\S+)*))?(?:\s*;\s*(?<marker>.+))?$",
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex DependencyPattern();
/// <summary>
/// Builds a dependency graph from installed packages.
/// </summary>
public DependencyGraph BuildGraph(IEnumerable<PythonPackageInfo> packages)
{
var graph = new DependencyGraph();
var packageLookup = packages.ToDictionary(
p => p.NormalizedName,
StringComparer.OrdinalIgnoreCase);
// Add all packages as nodes
foreach (var package in packages)
{
graph.AddNode(package.Name, package.Version, isInstalled: true);
}
// Add dependency edges
foreach (var package in packages)
{
foreach (var depString in package.Dependencies)
{
var parsed = ParseDependency(depString);
if (parsed is not null)
{
graph.AddEdge(
package.Name,
parsed.Name,
parsed.Constraint,
parsed.IsOptional);
}
}
}
// Calculate depths and transitive counts
graph.CalculateDepths();
graph.CalculateTransitiveCounts();
return graph;
}
/// <summary>
/// Resolves all dependencies for a package, including transitive ones.
/// </summary>
public TransitiveDependencyAnalysis Analyze(IEnumerable<PythonPackageInfo> packages)
{
var graph = BuildGraph(packages);
var closure = graph.CalculateTransitiveClosure();
var cycles = graph.DetectCycles();
var sortedOrder = graph.TopologicalSort();
// Calculate statistics
var maxDepth = graph.Nodes.Values
.Where(n => n.Depth >= 0)
.Select(n => n.Depth)
.DefaultIfEmpty(0)
.Max();
var directDependencyCount = graph.RootNodes.Count();
var totalTransitiveDependencies = closure.Values
.SelectMany(c => c)
.Distinct(StringComparer.OrdinalIgnoreCase)
.Count();
// Find most depended upon packages
var dependentCounts = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
foreach (var node in graph.Nodes.Values)
{
foreach (var dep in graph.GetDirectDependencies(node.NormalizedName))
{
dependentCounts.TryGetValue(dep, out var count);
dependentCounts[dep] = count + 1;
}
}
var mostDepended = dependentCounts
.OrderByDescending(kvp => kvp.Value)
.Take(10)
.Select(kvp => (kvp.Key, kvp.Value))
.ToImmutableArray();
// Identify missing dependencies (referenced but not installed)
var missingDependencies = graph.Nodes.Values
.Where(n => !n.IsInstalled)
.Select(n => n.Name)
.ToImmutableArray();
return new TransitiveDependencyAnalysis(
Graph: graph,
TransitiveClosure: closure,
Cycles: cycles,
TopologicalOrder: sortedOrder,
MaxDepth: maxDepth,
DirectDependencyCount: directDependencyCount,
TotalTransitiveDependencies: totalTransitiveDependencies,
MostDependedUpon: mostDepended,
MissingDependencies: missingDependencies,
HasCircularDependencies: cycles.Length > 0);
}
/// <summary>
/// Gets all transitive dependencies for a specific package.
/// </summary>
public ImmutableArray<string> GetTransitiveDependencies(
DependencyGraph graph,
string packageName)
{
var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var result = new List<string>();
CollectTransitive(graph, NormalizeName(packageName), visited, result, excludeRoot: true);
return [.. result.OrderBy(r => r)];
}
private static void CollectTransitive(
DependencyGraph graph,
string packageName,
HashSet<string> visited,
List<string> result,
bool excludeRoot)
{
if (!visited.Add(packageName))
{
return;
}
if (!excludeRoot && graph.Nodes.TryGetValue(packageName, out var node))
{
result.Add(node.Name);
}
foreach (var dep in graph.GetDirectDependencies(packageName))
{
CollectTransitive(graph, dep, visited, result, excludeRoot: false);
}
}
/// <summary>
/// Parses a PEP 508 dependency specification.
/// </summary>
public static ParsedDependency? ParseDependency(string spec)
{
if (string.IsNullOrWhiteSpace(spec))
{
return null;
}
var match = DependencyPattern().Match(spec.Trim());
if (!match.Success)
{
return null;
}
var name = match.Groups["name"].Value;
var extras = match.Groups["extras"].Success
? match.Groups["extras"].Value.Split(',').Select(e => e.Trim()).ToArray()
: [];
var constraint = match.Groups["constraint"].Success
? match.Groups["constraint"].Value.Trim()
: null;
var marker = match.Groups["marker"].Success
? match.Groups["marker"].Value.Trim()
: null;
// Check if it's an optional dependency (has extras or certain markers)
var isOptional = extras.Length > 0 ||
(marker is not null && marker.Contains("extra", StringComparison.OrdinalIgnoreCase));
return new ParsedDependency(
Name: name,
NormalizedName: NormalizeName(name),
Extras: [.. extras],
Constraint: constraint,
Marker: marker,
IsOptional: isOptional);
}
private static string NormalizeName(string name) =>
name.ToLowerInvariant().Replace('-', '_').Replace('.', '_');
}
/// <summary>
/// Result of transitive dependency analysis.
/// </summary>
internal sealed record TransitiveDependencyAnalysis(
DependencyGraph Graph,
ImmutableDictionary<string, ImmutableHashSet<string>> TransitiveClosure,
ImmutableArray<ImmutableArray<string>> Cycles,
ImmutableArray<string> TopologicalOrder,
int MaxDepth,
int DirectDependencyCount,
int TotalTransitiveDependencies,
ImmutableArray<(string Package, int DependentCount)> MostDependedUpon,
ImmutableArray<string> MissingDependencies,
bool HasCircularDependencies)
{
/// <summary>
/// Gets all packages that transitively depend on a given package.
/// </summary>
public ImmutableArray<string> GetReverseDependencies(string packageName)
{
var normalized = packageName.ToLowerInvariant().Replace('-', '_').Replace('.', '_');
var result = new List<string>();
foreach (var (pkg, deps) in TransitiveClosure)
{
if (deps.Contains(normalized))
{
result.Add(pkg);
}
}
return [.. result.OrderBy(r => r)];
}
/// <summary>
/// Gets packages at a specific depth in the dependency tree.
/// </summary>
public ImmutableArray<string> GetPackagesAtDepth(int depth) =>
[.. Graph.Nodes.Values
.Where(n => n.Depth == depth)
.Select(n => n.Name)
.OrderBy(n => n)];
}
/// <summary>
/// A parsed PEP 508 dependency specification.
/// </summary>
internal sealed record ParsedDependency(
string Name,
string NormalizedName,
ImmutableArray<string> Extras,
string? Constraint,
string? Marker,
bool IsOptional);

View File

@@ -0,0 +1,100 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
/// <summary>
/// Classifies the scope/purpose of a Python package dependency.
/// Similar to Maven's compile/runtime/test/provided scopes.
/// </summary>
internal enum PythonPackageScope
{
/// <summary>
/// Unknown or unclassified scope.
/// </summary>
Unknown = 0,
/// <summary>
/// Production dependency - required for the application to run.
/// Equivalent to Maven's "compile" scope.
/// </summary>
Production = 1,
/// <summary>
/// Development dependency - used during development only.
/// Includes testing frameworks, linters, formatters, type checkers.
/// Equivalent to Maven's "test" scope.
/// </summary>
Development = 2,
/// <summary>
/// Documentation dependency - used only for building docs.
/// </summary>
Documentation = 3,
/// <summary>
/// Build dependency - used only during package building.
/// Equivalent to Maven's "provided" scope.
/// </summary>
Build = 4,
/// <summary>
/// Optional dependency - installed via extras.
/// </summary>
Optional = 5
}
/// <summary>
/// Risk level associated with a package scope.
/// Production dependencies are higher risk than development dependencies.
/// </summary>
internal enum ScopeRiskLevel
{
/// <summary>
/// Unknown risk level.
/// </summary>
Unknown = 0,
/// <summary>
/// Low risk - documentation or build-only dependencies.
/// </summary>
Low = 1,
/// <summary>
/// Medium risk - development/test dependencies.
/// </summary>
Medium = 2,
/// <summary>
/// High risk - production dependencies.
/// </summary>
High = 3
}
/// <summary>
/// Extension methods for package scope.
/// </summary>
internal static class PythonPackageScopeExtensions
{
/// <summary>
/// Gets the risk level associated with a scope.
/// </summary>
public static ScopeRiskLevel GetRiskLevel(this PythonPackageScope scope) => scope switch
{
PythonPackageScope.Production => ScopeRiskLevel.High,
PythonPackageScope.Development => ScopeRiskLevel.Medium,
PythonPackageScope.Documentation => ScopeRiskLevel.Low,
PythonPackageScope.Build => ScopeRiskLevel.Low,
PythonPackageScope.Optional => ScopeRiskLevel.Medium,
_ => ScopeRiskLevel.Unknown
};
/// <summary>
/// Returns true if this scope represents a runtime dependency.
/// </summary>
public static bool IsRuntime(this PythonPackageScope scope) =>
scope is PythonPackageScope.Production or PythonPackageScope.Optional;
/// <summary>
/// Returns true if this scope represents a development-only dependency.
/// </summary>
public static bool IsDevelopmentOnly(this PythonPackageScope scope) =>
scope is PythonPackageScope.Development or PythonPackageScope.Documentation or PythonPackageScope.Build;
}

View File

@@ -0,0 +1,360 @@
using System.Collections.Frozen;
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
/// <summary>
/// Classifies Python packages into scope categories (production, development, etc.).
/// </summary>
internal static class PythonScopeClassifier
{
/// <summary>
/// Well-known development/test packages.
/// </summary>
private static readonly FrozenSet<string> DevelopmentPackages =
new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
// Testing frameworks
"pytest", "pytest-cov", "pytest-asyncio", "pytest-mock", "pytest-xdist",
"pytest-timeout", "pytest-benchmark", "pytest-django", "pytest-flask",
"unittest2", "nose", "nose2", "tox", "nox", "hypothesis",
"coverage", "codecov", "coveralls",
"mock", "mockito", "responses", "httpretty", "vcrpy", "freezegun",
"factory-boy", "faker", "mimesis",
// Type checkers
"mypy", "pyright", "pyre-check", "pytype",
"types-requests", "types-pyyaml", "types-setuptools",
// Linters and formatters
"flake8", "pylint", "pyflakes", "pycodestyle", "pydocstyle",
"black", "autopep8", "yapf", "isort", "autoflake",
"ruff", "blue",
"bandit", "safety", "pip-audit",
// Code quality
"pre-commit", "commitizen",
"radon", "xenon", "mccabe",
"vulture", "dead",
// Debugging
"ipdb", "pdb++", "pudb", "debugpy",
"snoop", "icecream", "devtools",
// Profiling
"py-spy", "memory-profiler", "line-profiler", "scalene",
"pyinstrument", "yappi",
// Development tools
"ipython", "jupyter", "notebook", "jupyterlab",
"bpython", "ptpython",
"watchdog", "watchfiles", "hupper",
"rope", "jedi", "python-lsp-server",
// Build tools (often dev-only)
"build", "twine", "flit", "poetry", "hatch", "pdm",
"setuptools-scm", "versioneer", "bump2version", "bumpversion",
}.ToFrozenSet();
/// <summary>
/// Well-known documentation packages.
/// </summary>
private static readonly FrozenSet<string> DocumentationPackages =
new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"sphinx", "sphinx-rtd-theme", "sphinx-autodoc-typehints",
"sphinxcontrib-napoleon", "sphinxcontrib-apidoc",
"mkdocs", "mkdocs-material", "mkdocstrings",
"pdoc", "pdoc3", "pydoc-markdown",
"docutils", "recommonmark", "myst-parser",
}.ToFrozenSet();
/// <summary>
/// Well-known build-only packages.
/// </summary>
private static readonly FrozenSet<string> BuildPackages =
new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"wheel", "setuptools", "pip",
"cython", "mypyc",
"pybind11", "cffi", "swig",
"meson", "cmake", "ninja",
"scikit-build", "scikit-build-core",
}.ToFrozenSet();
/// <summary>
/// Extra names that indicate development scope.
/// </summary>
private static readonly FrozenSet<string> DevelopmentExtras =
new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"dev", "develop", "development",
"test", "tests", "testing",
"lint", "linting",
"check", "checks",
"quality",
"typing", "types",
}.ToFrozenSet();
/// <summary>
/// Extra names that indicate documentation scope.
/// </summary>
private static readonly FrozenSet<string> DocumentationExtras =
new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"doc", "docs", "documentation",
"sphinx",
}.ToFrozenSet();
/// <summary>
/// Classifies a package based on its metadata and context.
/// </summary>
/// <param name="package">The package to classify.</param>
/// <param name="lockFileSection">The lock file section (e.g., "default", "develop", "main", "dev").</param>
/// <param name="requirementsFile">The requirements file name (e.g., "requirements-dev.txt").</param>
/// <param name="installedExtras">Extras that triggered this package's installation.</param>
/// <returns>The classified scope.</returns>
public static PythonPackageScope Classify(
PythonPackageInfo package,
string? lockFileSection = null,
string? requirementsFile = null,
IEnumerable<string>? installedExtras = null)
{
ArgumentNullException.ThrowIfNull(package);
// 1. Check lock file section
var sectionScope = ClassifyFromLockFileSection(lockFileSection);
if (sectionScope != PythonPackageScope.Unknown)
{
return sectionScope;
}
// 2. Check requirements file name
var fileScope = ClassifyFromRequirementsFile(requirementsFile);
if (fileScope != PythonPackageScope.Unknown)
{
return fileScope;
}
// 3. Check extras
var extrasScope = ClassifyFromExtras(installedExtras);
if (extrasScope != PythonPackageScope.Unknown)
{
return extrasScope;
}
// 4. Use heuristics based on package name
return ClassifyFromPackageName(package.NormalizedName);
}
/// <summary>
/// Classifies multiple packages and returns a dictionary of scopes.
/// </summary>
public static ImmutableDictionary<string, PythonPackageScope> ClassifyAll(
IEnumerable<PythonPackageInfo> packages,
IReadOnlyDictionary<string, string>? lockFileSections = null,
IReadOnlyDictionary<string, string>? requirementsFiles = null,
IReadOnlyDictionary<string, IEnumerable<string>>? packageExtras = null)
{
var result = new Dictionary<string, PythonPackageScope>(StringComparer.OrdinalIgnoreCase);
foreach (var package in packages)
{
var normalizedName = package.NormalizedName;
string? section = null;
string? reqFile = null;
IEnumerable<string>? extras = null;
lockFileSections?.TryGetValue(normalizedName, out section);
requirementsFiles?.TryGetValue(normalizedName, out reqFile);
packageExtras?.TryGetValue(normalizedName, out extras);
result[normalizedName] = Classify(package, section, reqFile, extras);
}
return result.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase);
}
/// <summary>
/// Classifies based on lock file section name.
/// </summary>
public static PythonPackageScope ClassifyFromLockFileSection(string? section)
{
if (string.IsNullOrWhiteSpace(section))
{
return PythonPackageScope.Unknown;
}
return section.ToLowerInvariant() switch
{
// Pipfile.lock sections
"default" => PythonPackageScope.Production,
"develop" => PythonPackageScope.Development,
// poetry.lock groups
"main" => PythonPackageScope.Production,
"dev" => PythonPackageScope.Development,
"test" => PythonPackageScope.Development,
"docs" => PythonPackageScope.Documentation,
// pdm.lock groups
"production" => PythonPackageScope.Production,
"development" => PythonPackageScope.Development,
// uv.lock / pip-tools
"packages" => PythonPackageScope.Production,
"dev-packages" => PythonPackageScope.Development,
_ => PythonPackageScope.Unknown
};
}
/// <summary>
/// Classifies based on requirements file name.
/// </summary>
public static PythonPackageScope ClassifyFromRequirementsFile(string? fileName)
{
if (string.IsNullOrWhiteSpace(fileName))
{
return PythonPackageScope.Unknown;
}
var name = Path.GetFileNameWithoutExtension(fileName).ToLowerInvariant();
// Production files
if (name is "requirements" or "requirements.prod" or "requirements-prod" or
"requirements.production" or "requirements-production" or
"requirements.main" or "requirements-main" or
"requirements.lock" or "requirements-lock")
{
return PythonPackageScope.Production;
}
// Development files
if (name.Contains("dev") || name.Contains("develop") ||
name.Contains("test") || name.Contains("lint") ||
name.Contains("check") || name.Contains("ci"))
{
return PythonPackageScope.Development;
}
// Documentation files
if (name.Contains("doc") || name.Contains("sphinx"))
{
return PythonPackageScope.Documentation;
}
// Build files
if (name.Contains("build") || name.Contains("wheel"))
{
return PythonPackageScope.Build;
}
return PythonPackageScope.Unknown;
}
/// <summary>
/// Classifies based on extras that triggered the installation.
/// </summary>
public static PythonPackageScope ClassifyFromExtras(IEnumerable<string>? extras)
{
if (extras is null)
{
return PythonPackageScope.Unknown;
}
foreach (var extra in extras)
{
var normalizedExtra = extra.ToLowerInvariant();
if (DevelopmentExtras.Contains(normalizedExtra))
{
return PythonPackageScope.Development;
}
if (DocumentationExtras.Contains(normalizedExtra))
{
return PythonPackageScope.Documentation;
}
}
// If installed via an extra but not a known dev/doc extra, it's optional
if (extras.Any())
{
return PythonPackageScope.Optional;
}
return PythonPackageScope.Unknown;
}
/// <summary>
/// Classifies based on well-known package names.
/// </summary>
public static PythonPackageScope ClassifyFromPackageName(string normalizedName)
{
if (string.IsNullOrWhiteSpace(normalizedName))
{
return PythonPackageScope.Unknown;
}
// Python package names treat - and _ as equivalent (PEP 503 normalization)
// We need to check both variants since packages use both conventions
var nameWithUnderscores = normalizedName.Replace('-', '_').ToLowerInvariant();
var nameWithHyphens = normalizedName.Replace('_', '-').ToLowerInvariant();
if (DevelopmentPackages.Contains(nameWithUnderscores) ||
DevelopmentPackages.Contains(nameWithHyphens))
{
return PythonPackageScope.Development;
}
if (DocumentationPackages.Contains(nameWithUnderscores) ||
DocumentationPackages.Contains(nameWithHyphens))
{
return PythonPackageScope.Documentation;
}
if (BuildPackages.Contains(nameWithUnderscores) ||
BuildPackages.Contains(nameWithHyphens))
{
return PythonPackageScope.Build;
}
// Check for common prefixes/suffixes (using hyphen form as canonical)
if (nameWithHyphens.StartsWith("pytest-") || nameWithHyphens.StartsWith("flake8-") ||
nameWithHyphens.StartsWith("pylint-") || nameWithHyphens.StartsWith("mypy-") ||
nameWithHyphens.StartsWith("types-") || nameWithHyphens.StartsWith("sphinx-") ||
nameWithHyphens.StartsWith("sphinxcontrib-"))
{
if (nameWithHyphens.StartsWith("sphinx") || nameWithHyphens.StartsWith("sphinxcontrib"))
{
return PythonPackageScope.Documentation;
}
return PythonPackageScope.Development;
}
// Default to unknown - will typically be treated as production
return PythonPackageScope.Unknown;
}
/// <summary>
/// Determines if a package should be included in vulnerability scanning.
/// Development-only packages are lower priority.
/// </summary>
public static bool ShouldScanForVulnerabilities(PythonPackageScope scope) =>
scope.GetRiskLevel() >= ScopeRiskLevel.Medium;
/// <summary>
/// Gets a human-readable description of the scope.
/// </summary>
public static string GetDescription(PythonPackageScope scope) => scope switch
{
PythonPackageScope.Production => "Production dependency",
PythonPackageScope.Development => "Development/test dependency",
PythonPackageScope.Documentation => "Documentation dependency",
PythonPackageScope.Build => "Build-time dependency",
PythonPackageScope.Optional => "Optional dependency (via extras)",
_ => "Unknown scope"
};
}

View File

@@ -4,7 +4,7 @@
<LangVersion>preview</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>