Add comprehensive tests for Go and Python version conflict detection and licensing normalization
- Implemented GoVersionConflictDetectorTests to validate pseudo-version detection, conflict analysis, and conflict retrieval for Go modules. - Created VersionConflictDetectorTests for Python to assess conflict detection across various version scenarios, including major, minor, and patch differences. - Added SpdxLicenseNormalizerTests to ensure accurate normalization of SPDX license strings and classifiers. - Developed VendoredPackageDetectorTests to identify vendored packages and extract embedded packages from Python packages, including handling of vendor directories and known vendored packages.
This commit is contained in:
@@ -0,0 +1,389 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Conflicts;
|
||||
|
||||
/// <summary>
|
||||
/// Detects version conflicts where the same Python package appears with multiple versions.
|
||||
/// Common in containers with multiple virtualenvs or conflicting requirements.
|
||||
/// </summary>
|
||||
internal static partial class VersionConflictDetector
|
||||
{
|
||||
/// <summary>
|
||||
/// Analyzes discovered packages for version conflicts.
|
||||
/// </summary>
|
||||
public static VersionConflictAnalysis Analyze(IEnumerable<PythonPackageInfo> packages)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(packages);
|
||||
|
||||
var packageList = packages.ToList();
|
||||
if (packageList.Count == 0)
|
||||
{
|
||||
return VersionConflictAnalysis.Empty;
|
||||
}
|
||||
|
||||
// Group by normalized package name
|
||||
var groups = packageList
|
||||
.Where(p => !string.IsNullOrWhiteSpace(p.Version))
|
||||
.GroupBy(p => p.NormalizedName, StringComparer.OrdinalIgnoreCase)
|
||||
.Where(g => g.Select(p => p.Version).Distinct(StringComparer.OrdinalIgnoreCase).Count() > 1)
|
||||
.ToList();
|
||||
|
||||
if (groups.Count == 0)
|
||||
{
|
||||
return VersionConflictAnalysis.Empty;
|
||||
}
|
||||
|
||||
var conflicts = new List<PythonVersionConflict>();
|
||||
|
||||
foreach (var group in groups)
|
||||
{
|
||||
var versions = group
|
||||
.Select(p => new PythonVersionOccurrence(
|
||||
p.Version!,
|
||||
p.Location,
|
||||
p.MetadataPath ?? p.Location,
|
||||
p.Kind.ToString(),
|
||||
p.InstallerTool))
|
||||
.OrderBy(v => v.Version, PythonVersionComparer.Instance)
|
||||
.ToImmutableArray();
|
||||
|
||||
// Determine severity based on version distance
|
||||
var severity = CalculateSeverity(versions);
|
||||
|
||||
conflicts.Add(new PythonVersionConflict(
|
||||
group.Key,
|
||||
group.First().Name, // Original non-normalized name
|
||||
versions,
|
||||
severity));
|
||||
}
|
||||
|
||||
return new VersionConflictAnalysis(
|
||||
[.. conflicts.OrderBy(c => c.NormalizedName, StringComparer.Ordinal)],
|
||||
conflicts.Count,
|
||||
conflicts.Max(c => c.Severity));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes packages from discovery result for version conflicts.
|
||||
/// </summary>
|
||||
public static VersionConflictAnalysis Analyze(PythonPackageDiscoveryResult discoveryResult)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(discoveryResult);
|
||||
return Analyze(discoveryResult.Packages);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a specific package has version conflicts in the given package set.
|
||||
/// </summary>
|
||||
public static PythonVersionConflict? GetConflict(
|
||||
IEnumerable<PythonPackageInfo> packages,
|
||||
string packageName)
|
||||
{
|
||||
var normalizedName = PythonPackageInfo.NormalizeName(packageName);
|
||||
var analysis = Analyze(packages);
|
||||
return analysis.GetConflict(normalizedName);
|
||||
}
|
||||
|
||||
private static ConflictSeverity CalculateSeverity(ImmutableArray<PythonVersionOccurrence> versions)
|
||||
{
|
||||
var versionStrings = versions.Select(v => v.Version).Distinct().ToList();
|
||||
|
||||
if (versionStrings.Count == 1)
|
||||
{
|
||||
return ConflictSeverity.None;
|
||||
}
|
||||
|
||||
// Try to parse as PEP 440 versions
|
||||
var parsedVersions = versionStrings
|
||||
.Select(TryParsePep440Version)
|
||||
.Where(v => v is not null)
|
||||
.Cast<Pep440Version>()
|
||||
.ToList();
|
||||
|
||||
if (parsedVersions.Count < 2)
|
||||
{
|
||||
// Can't determine severity without parseable versions
|
||||
return ConflictSeverity.Medium;
|
||||
}
|
||||
|
||||
// Check for epoch differences (critical - completely different version schemes)
|
||||
var epochs = parsedVersions.Select(v => v.Epoch).Distinct().ToList();
|
||||
if (epochs.Count > 1)
|
||||
{
|
||||
return ConflictSeverity.High;
|
||||
}
|
||||
|
||||
// Check for major version differences (high severity)
|
||||
var majorVersions = parsedVersions.Select(v => v.Major).Distinct().ToList();
|
||||
if (majorVersions.Count > 1)
|
||||
{
|
||||
return ConflictSeverity.High;
|
||||
}
|
||||
|
||||
// Check for minor version differences (medium severity)
|
||||
var minorVersions = parsedVersions.Select(v => v.Minor).Distinct().ToList();
|
||||
if (minorVersions.Count > 1)
|
||||
{
|
||||
return ConflictSeverity.Medium;
|
||||
}
|
||||
|
||||
// Only patch/micro version differences (low severity)
|
||||
return ConflictSeverity.Low;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses a PEP 440 version string.
|
||||
/// Handles: epoch, release segments, pre/post/dev releases, local versions.
|
||||
/// </summary>
|
||||
private static Pep440Version? TryParsePep440Version(string version)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(version))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// PEP 440 pattern:
|
||||
// [N!]N(.N)*[{a|b|rc}N][.postN][.devN][+local]
|
||||
var match = Pep440VersionPattern().Match(version);
|
||||
if (!match.Success)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var epoch = 0;
|
||||
if (match.Groups["epoch"].Success && int.TryParse(match.Groups["epoch"].Value, out var e))
|
||||
{
|
||||
epoch = e;
|
||||
}
|
||||
|
||||
var release = match.Groups["release"].Value;
|
||||
var releaseParts = release.Split('.');
|
||||
|
||||
if (!int.TryParse(releaseParts[0], out var major))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var minor = releaseParts.Length > 1 && int.TryParse(releaseParts[1], out var m) ? m : 0;
|
||||
var micro = releaseParts.Length > 2 && int.TryParse(releaseParts[2], out var p) ? p : 0;
|
||||
|
||||
string? preRelease = null;
|
||||
if (match.Groups["pre"].Success)
|
||||
{
|
||||
preRelease = match.Groups["pre"].Value;
|
||||
}
|
||||
|
||||
string? postRelease = null;
|
||||
if (match.Groups["post"].Success)
|
||||
{
|
||||
postRelease = match.Groups["post"].Value;
|
||||
}
|
||||
|
||||
string? devRelease = null;
|
||||
if (match.Groups["dev"].Success)
|
||||
{
|
||||
devRelease = match.Groups["dev"].Value;
|
||||
}
|
||||
|
||||
string? local = null;
|
||||
if (match.Groups["local"].Success)
|
||||
{
|
||||
local = match.Groups["local"].Value;
|
||||
}
|
||||
|
||||
return new Pep440Version(epoch, major, minor, micro, preRelease, postRelease, devRelease, local);
|
||||
}
|
||||
|
||||
// PEP 440 version pattern
|
||||
[GeneratedRegex(
|
||||
@"^((?<epoch>\d+)!)?(?<release>\d+(\.\d+)*)((?<pre>(a|alpha|b|beta|c|rc)\d*))?(\.?(?<post>post\d*))?(\.?(?<dev>dev\d*))?(\+(?<local>[a-z0-9.]+))?$",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex Pep440VersionPattern();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of version conflict analysis.
|
||||
/// </summary>
|
||||
internal sealed record VersionConflictAnalysis(
|
||||
ImmutableArray<PythonVersionConflict> Conflicts,
|
||||
int TotalConflicts,
|
||||
ConflictSeverity MaxSeverity)
|
||||
{
|
||||
public static readonly VersionConflictAnalysis Empty = new([], 0, ConflictSeverity.None);
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if any conflicts were found.
|
||||
/// </summary>
|
||||
public bool HasConflicts => TotalConflicts > 0;
|
||||
|
||||
/// <summary>
|
||||
/// Gets conflicts for a specific package.
|
||||
/// </summary>
|
||||
public PythonVersionConflict? GetConflict(string normalizedName)
|
||||
=> Conflicts.FirstOrDefault(c =>
|
||||
string.Equals(c.NormalizedName, normalizedName, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
/// <summary>
|
||||
/// Gets high-severity conflicts only.
|
||||
/// </summary>
|
||||
public ImmutableArray<PythonVersionConflict> HighSeverityConflicts =>
|
||||
Conflicts.Where(c => c.Severity == ConflictSeverity.High).ToImmutableArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a version conflict for a single Python package.
|
||||
/// </summary>
|
||||
internal sealed record PythonVersionConflict(
|
||||
string NormalizedName,
|
||||
string OriginalName,
|
||||
ImmutableArray<PythonVersionOccurrence> Versions,
|
||||
ConflictSeverity Severity)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the PURL for this package (without version).
|
||||
/// </summary>
|
||||
public string Purl => $"pkg:pypi/{NormalizedName.Replace('_', '-')}";
|
||||
|
||||
/// <summary>
|
||||
/// Gets all unique version strings.
|
||||
/// </summary>
|
||||
public IEnumerable<string> UniqueVersions
|
||||
=> Versions.Select(v => v.Version).Distinct();
|
||||
|
||||
/// <summary>
|
||||
/// Gets the versions as a comma-separated string.
|
||||
/// </summary>
|
||||
public string VersionsString
|
||||
=> string.Join(",", UniqueVersions);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of locations where conflicting versions are found.
|
||||
/// </summary>
|
||||
public int LocationCount => Versions.Select(v => v.Location).Distinct().Count();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a single occurrence of a version.
|
||||
/// </summary>
|
||||
internal sealed record PythonVersionOccurrence(
|
||||
string Version,
|
||||
string Location,
|
||||
string MetadataPath,
|
||||
string PackageKind,
|
||||
string? InstallerTool);
|
||||
|
||||
/// <summary>
|
||||
/// Severity level of a version conflict.
|
||||
/// </summary>
|
||||
internal enum ConflictSeverity
|
||||
{
|
||||
/// <summary>
|
||||
/// No conflict.
|
||||
/// </summary>
|
||||
None = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Only micro/patch version differences (likely compatible).
|
||||
/// </summary>
|
||||
Low = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Minor version differences (may have API changes).
|
||||
/// </summary>
|
||||
Medium = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Major version or epoch differences (likely incompatible).
|
||||
/// </summary>
|
||||
High = 3
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a parsed PEP 440 version.
|
||||
/// </summary>
|
||||
internal sealed record Pep440Version(
|
||||
int Epoch,
|
||||
int Major,
|
||||
int Minor,
|
||||
int Micro,
|
||||
string? PreRelease,
|
||||
string? PostRelease,
|
||||
string? DevRelease,
|
||||
string? LocalVersion)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether this is a pre-release version.
|
||||
/// </summary>
|
||||
public bool IsPreRelease => PreRelease is not null || DevRelease is not null;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the release tuple as a comparable string.
|
||||
/// </summary>
|
||||
public string ReleaseTuple => $"{Epoch}!{Major}.{Minor}.{Micro}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Comparer for PEP 440 version strings.
|
||||
/// </summary>
|
||||
internal sealed class PythonVersionComparer : IComparer<string>
|
||||
{
|
||||
public static readonly PythonVersionComparer Instance = new();
|
||||
|
||||
public int Compare(string? x, string? y)
|
||||
{
|
||||
if (x is null && y is null) return 0;
|
||||
if (x is null) return -1;
|
||||
if (y is null) return 1;
|
||||
|
||||
// Normalize versions for comparison
|
||||
var xNorm = NormalizeVersion(x);
|
||||
var yNorm = NormalizeVersion(y);
|
||||
|
||||
var xParts = xNorm.Split(['.', '-', '_'], StringSplitOptions.RemoveEmptyEntries);
|
||||
var yParts = yNorm.Split(['.', '-', '_'], StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
var maxParts = Math.Max(xParts.Length, yParts.Length);
|
||||
|
||||
for (int i = 0; i < maxParts; i++)
|
||||
{
|
||||
var xPart = i < xParts.Length ? xParts[i] : "0";
|
||||
var yPart = i < yParts.Length ? yParts[i] : "0";
|
||||
|
||||
// Try numeric comparison first
|
||||
if (int.TryParse(xPart, out var xNum) && int.TryParse(yPart, out var yNum))
|
||||
{
|
||||
var numCompare = xNum.CompareTo(yNum);
|
||||
if (numCompare != 0) return numCompare;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fall back to string comparison
|
||||
var strCompare = string.Compare(xPart, yPart, StringComparison.OrdinalIgnoreCase);
|
||||
if (strCompare != 0) return strCompare;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static string NormalizeVersion(string version)
|
||||
{
|
||||
// Remove epoch for simple comparison
|
||||
var epochIdx = version.IndexOf('!');
|
||||
if (epochIdx >= 0)
|
||||
{
|
||||
version = version[(epochIdx + 1)..];
|
||||
}
|
||||
|
||||
// Remove local version
|
||||
var localIdx = version.IndexOf('+');
|
||||
if (localIdx >= 0)
|
||||
{
|
||||
version = version[..localIdx];
|
||||
}
|
||||
|
||||
return version.ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,447 @@
|
||||
using System.Collections.Frozen;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Licensing;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes Python license classifiers and license strings to SPDX expressions.
|
||||
/// </summary>
|
||||
internal static partial class SpdxLicenseNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Maps PyPI classifiers to SPDX identifiers.
|
||||
/// </summary>
|
||||
private static readonly FrozenDictionary<string, string> ClassifierToSpdx =
|
||||
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// OSI Approved licenses
|
||||
["License :: OSI Approved :: MIT License"] = "MIT",
|
||||
["License :: OSI Approved :: MIT No Attribution License (MIT-0)"] = "MIT-0",
|
||||
["License :: OSI Approved :: Apache Software License"] = "Apache-2.0",
|
||||
["License :: OSI Approved :: BSD License"] = "BSD-3-Clause",
|
||||
["License :: OSI Approved :: GNU General Public License (GPL)"] = "GPL-3.0-only",
|
||||
["License :: OSI Approved :: GNU General Public License v2 (GPLv2)"] = "GPL-2.0-only",
|
||||
["License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)"] = "GPL-2.0-or-later",
|
||||
["License :: OSI Approved :: GNU General Public License v3 (GPLv3)"] = "GPL-3.0-only",
|
||||
["License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)"] = "GPL-3.0-or-later",
|
||||
["License :: OSI Approved :: GNU Lesser General Public License v2 (LGPLv2)"] = "LGPL-2.0-only",
|
||||
["License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)"] = "LGPL-2.0-or-later",
|
||||
["License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)"] = "LGPL-3.0-only",
|
||||
["License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)"] = "LGPL-3.0-or-later",
|
||||
["License :: OSI Approved :: GNU Affero General Public License v3"] = "AGPL-3.0-only",
|
||||
["License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)"] = "AGPL-3.0-or-later",
|
||||
["License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)"] = "MPL-2.0",
|
||||
["License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)"] = "MPL-1.1",
|
||||
["License :: OSI Approved :: ISC License (ISCL)"] = "ISC",
|
||||
["License :: OSI Approved :: Python Software Foundation License"] = "PSF-2.0",
|
||||
["License :: OSI Approved :: Zope Public License"] = "ZPL-2.1",
|
||||
["License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)"] = "EPL-1.0",
|
||||
["License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)"] = "EPL-2.0",
|
||||
["License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)"] = "EUPL-1.2",
|
||||
["License :: OSI Approved :: Academic Free License (AFL)"] = "AFL-3.0",
|
||||
["License :: OSI Approved :: Artistic License"] = "Artistic-2.0",
|
||||
["License :: OSI Approved :: Boost Software License 1.0 (BSL-1.0)"] = "BSL-1.0",
|
||||
["License :: OSI Approved :: Common Development and Distribution License 1.0 (CDDL-1.0)"] = "CDDL-1.0",
|
||||
["License :: OSI Approved :: Historical Permission Notice and Disclaimer (HPND)"] = "HPND",
|
||||
["License :: OSI Approved :: IBM Public License"] = "IPL-1.0",
|
||||
["License :: OSI Approved :: Intel Open Source License"] = "Intel",
|
||||
["License :: OSI Approved :: Jabber Open Source License"] = "JOSL-1.0",
|
||||
["License :: OSI Approved :: Open Software License 3.0 (OSL-3.0)"] = "OSL-3.0",
|
||||
["License :: OSI Approved :: PostgreSQL License"] = "PostgreSQL",
|
||||
["License :: OSI Approved :: The Unlicense (Unlicense)"] = "Unlicense",
|
||||
["License :: OSI Approved :: Universal Permissive License (UPL)"] = "UPL-1.0",
|
||||
["License :: OSI Approved :: W3C License"] = "W3C",
|
||||
["License :: OSI Approved :: zlib/libpng License"] = "Zlib",
|
||||
|
||||
// BSD variants (common on PyPI)
|
||||
["License :: OSI Approved :: BSD 2-Clause License"] = "BSD-2-Clause",
|
||||
["License :: OSI Approved :: BSD 3-Clause License"] = "BSD-3-Clause",
|
||||
["License :: OSI Approved :: BSD-2-Clause"] = "BSD-2-Clause",
|
||||
["License :: OSI Approved :: BSD-3-Clause"] = "BSD-3-Clause",
|
||||
|
||||
// Public domain and CC0
|
||||
["License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"] = "CC0-1.0",
|
||||
["License :: Public Domain"] = "Unlicense",
|
||||
|
||||
// Other common ones
|
||||
["License :: Other/Proprietary License"] = "LicenseRef-Proprietary",
|
||||
["License :: Freeware"] = "LicenseRef-Freeware",
|
||||
["License :: Freely Distributable"] = "LicenseRef-FreelyDistributable",
|
||||
|
||||
// DFSG Free licenses
|
||||
["License :: DFSG approved"] = "LicenseRef-DFSG-Approved",
|
||||
}.ToFrozenDictionary();
|
||||
|
||||
/// <summary>
|
||||
/// Maps common license strings to SPDX identifiers.
|
||||
/// </summary>
|
||||
private static readonly FrozenDictionary<string, string> LicenseStringToSpdx =
|
||||
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// MIT variations
|
||||
["MIT"] = "MIT",
|
||||
["MIT License"] = "MIT",
|
||||
["MIT license"] = "MIT",
|
||||
["The MIT License"] = "MIT",
|
||||
["MIT-0"] = "MIT-0",
|
||||
|
||||
// Apache variations
|
||||
["Apache"] = "Apache-2.0",
|
||||
["Apache 2"] = "Apache-2.0",
|
||||
["Apache 2.0"] = "Apache-2.0",
|
||||
["Apache-2"] = "Apache-2.0",
|
||||
["Apache-2.0"] = "Apache-2.0",
|
||||
["Apache License"] = "Apache-2.0",
|
||||
["Apache License 2.0"] = "Apache-2.0",
|
||||
["Apache License, Version 2.0"] = "Apache-2.0",
|
||||
["Apache Software License"] = "Apache-2.0",
|
||||
["ASL 2.0"] = "Apache-2.0",
|
||||
|
||||
// BSD variations
|
||||
["BSD"] = "BSD-3-Clause",
|
||||
["BSD License"] = "BSD-3-Clause",
|
||||
["BSD license"] = "BSD-3-Clause",
|
||||
["BSD-2"] = "BSD-2-Clause",
|
||||
["BSD 2-Clause"] = "BSD-2-Clause",
|
||||
["BSD-2-Clause"] = "BSD-2-Clause",
|
||||
["BSD-3"] = "BSD-3-Clause",
|
||||
["BSD 3-Clause"] = "BSD-3-Clause",
|
||||
["BSD-3-Clause"] = "BSD-3-Clause",
|
||||
["Simplified BSD"] = "BSD-2-Clause",
|
||||
["New BSD"] = "BSD-3-Clause",
|
||||
["Modified BSD"] = "BSD-3-Clause",
|
||||
|
||||
// GPL variations
|
||||
["GPL"] = "GPL-3.0-only",
|
||||
["GPLv2"] = "GPL-2.0-only",
|
||||
["GPL v2"] = "GPL-2.0-only",
|
||||
["GPL-2"] = "GPL-2.0-only",
|
||||
["GPL-2.0"] = "GPL-2.0-only",
|
||||
["GPL-2.0-only"] = "GPL-2.0-only",
|
||||
["GPL-2.0+"] = "GPL-2.0-or-later",
|
||||
["GPL-2.0-or-later"] = "GPL-2.0-or-later",
|
||||
["GPLv3"] = "GPL-3.0-only",
|
||||
["GPL v3"] = "GPL-3.0-only",
|
||||
["GPL-3"] = "GPL-3.0-only",
|
||||
["GPL-3.0"] = "GPL-3.0-only",
|
||||
["GPL-3.0-only"] = "GPL-3.0-only",
|
||||
["GPL-3.0+"] = "GPL-3.0-or-later",
|
||||
["GPL-3.0-or-later"] = "GPL-3.0-or-later",
|
||||
["GNU General Public License"] = "GPL-3.0-only",
|
||||
["GNU General Public License v3"] = "GPL-3.0-only",
|
||||
|
||||
// LGPL variations
|
||||
["LGPL"] = "LGPL-3.0-only",
|
||||
["LGPLv2"] = "LGPL-2.0-only",
|
||||
["LGPL-2.0"] = "LGPL-2.0-only",
|
||||
["LGPL-2.1"] = "LGPL-2.1-only",
|
||||
["LGPLv3"] = "LGPL-3.0-only",
|
||||
["LGPL-3.0"] = "LGPL-3.0-only",
|
||||
["GNU Lesser General Public License"] = "LGPL-3.0-only",
|
||||
|
||||
// AGPL variations
|
||||
["AGPL"] = "AGPL-3.0-only",
|
||||
["AGPLv3"] = "AGPL-3.0-only",
|
||||
["AGPL-3.0"] = "AGPL-3.0-only",
|
||||
|
||||
// MPL variations
|
||||
["MPL"] = "MPL-2.0",
|
||||
["MPL 2.0"] = "MPL-2.0",
|
||||
["MPL-2.0"] = "MPL-2.0",
|
||||
["Mozilla Public License 2.0"] = "MPL-2.0",
|
||||
|
||||
// ISC
|
||||
["ISC"] = "ISC",
|
||||
["ISC License"] = "ISC",
|
||||
|
||||
// Other common licenses
|
||||
["PSF"] = "PSF-2.0",
|
||||
["Python Software Foundation License"] = "PSF-2.0",
|
||||
["PSFL"] = "PSF-2.0",
|
||||
["Unlicense"] = "Unlicense",
|
||||
["The Unlicense"] = "Unlicense",
|
||||
["CC0"] = "CC0-1.0",
|
||||
["CC0 1.0"] = "CC0-1.0",
|
||||
["CC0-1.0"] = "CC0-1.0",
|
||||
["Public Domain"] = "Unlicense",
|
||||
["Zlib"] = "Zlib",
|
||||
["zlib"] = "Zlib",
|
||||
["Boost"] = "BSL-1.0",
|
||||
["BSL-1.0"] = "BSL-1.0",
|
||||
["EPL"] = "EPL-2.0",
|
||||
["EPL-1.0"] = "EPL-1.0",
|
||||
["EPL-2.0"] = "EPL-2.0",
|
||||
["Eclipse"] = "EPL-2.0",
|
||||
["Eclipse Public License"] = "EPL-2.0",
|
||||
["Artistic"] = "Artistic-2.0",
|
||||
["Artistic License"] = "Artistic-2.0",
|
||||
["PostgreSQL"] = "PostgreSQL",
|
||||
["W3C"] = "W3C",
|
||||
["WTFPL"] = "WTFPL",
|
||||
}.ToFrozenDictionary();
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a Python package's license information to an SPDX expression.
|
||||
/// </summary>
|
||||
/// <param name="license">The license field from METADATA.</param>
|
||||
/// <param name="classifiers">The classifiers from METADATA.</param>
|
||||
/// <param name="licenseExpression">PEP 639 license-expression field (if present).</param>
|
||||
/// <returns>The normalized SPDX expression or null if not determinable.</returns>
|
||||
public static string? Normalize(
|
||||
string? license,
|
||||
IEnumerable<string>? classifiers,
|
||||
string? licenseExpression = null)
|
||||
{
|
||||
// PEP 639 license expression takes precedence
|
||||
if (!string.IsNullOrWhiteSpace(licenseExpression))
|
||||
{
|
||||
// Validate it looks like an SPDX expression
|
||||
if (IsValidSpdxExpression(licenseExpression))
|
||||
{
|
||||
return licenseExpression.Trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Try classifiers next (most reliable)
|
||||
if (classifiers is not null)
|
||||
{
|
||||
var spdxFromClassifier = NormalizeFromClassifiers(classifiers);
|
||||
if (spdxFromClassifier is not null)
|
||||
{
|
||||
return spdxFromClassifier;
|
||||
}
|
||||
}
|
||||
|
||||
// Try the license string
|
||||
if (!string.IsNullOrWhiteSpace(license))
|
||||
{
|
||||
var spdxFromString = NormalizeFromString(license);
|
||||
if (spdxFromString is not null)
|
||||
{
|
||||
return spdxFromString;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes license classifiers to SPDX.
|
||||
/// </summary>
|
||||
public static string? NormalizeFromClassifiers(IEnumerable<string> classifiers)
|
||||
{
|
||||
var spdxIds = new List<string>();
|
||||
|
||||
foreach (var classifier in classifiers)
|
||||
{
|
||||
if (ClassifierToSpdx.TryGetValue(classifier.Trim(), out var spdxId))
|
||||
{
|
||||
if (!spdxIds.Contains(spdxId, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
spdxIds.Add(spdxId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (spdxIds.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (spdxIds.Count == 1)
|
||||
{
|
||||
return spdxIds[0];
|
||||
}
|
||||
|
||||
// Multiple licenses - create OR expression (dual licensing)
|
||||
return string.Join(" OR ", spdxIds.OrderBy(s => s, StringComparer.Ordinal));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a license string to SPDX.
|
||||
/// </summary>
|
||||
public static string? NormalizeFromString(string license)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(license))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trimmed = license.Trim();
|
||||
|
||||
// Direct lookup
|
||||
if (LicenseStringToSpdx.TryGetValue(trimmed, out var spdxId))
|
||||
{
|
||||
return spdxId;
|
||||
}
|
||||
|
||||
// Try normalized lookup (remove common suffixes/prefixes)
|
||||
var normalized = NormalizeLicenseString(trimmed);
|
||||
if (LicenseStringToSpdx.TryGetValue(normalized, out spdxId))
|
||||
{
|
||||
return spdxId;
|
||||
}
|
||||
|
||||
// Try pattern matching for known patterns
|
||||
spdxId = TryPatternMatch(trimmed);
|
||||
if (spdxId is not null)
|
||||
{
|
||||
return spdxId;
|
||||
}
|
||||
|
||||
// Can't normalize - return as LicenseRef
|
||||
if (IsPlausibleLicenseName(trimmed))
|
||||
{
|
||||
return $"LicenseRef-{SanitizeForSpdx(trimmed)}";
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? TryPatternMatch(string license)
|
||||
{
|
||||
// MIT pattern
|
||||
if (MitPattern().IsMatch(license))
|
||||
{
|
||||
return "MIT";
|
||||
}
|
||||
|
||||
// Apache pattern
|
||||
if (ApachePattern().IsMatch(license))
|
||||
{
|
||||
return "Apache-2.0";
|
||||
}
|
||||
|
||||
// BSD pattern
|
||||
var bsdMatch = BsdPattern().Match(license);
|
||||
if (bsdMatch.Success)
|
||||
{
|
||||
var clauseCount = bsdMatch.Groups["clauses"].Value;
|
||||
return clauseCount switch
|
||||
{
|
||||
"2" => "BSD-2-Clause",
|
||||
"3" => "BSD-3-Clause",
|
||||
"4" => "BSD-4-Clause",
|
||||
_ => "BSD-3-Clause"
|
||||
};
|
||||
}
|
||||
|
||||
// GPL pattern
|
||||
var gplMatch = GplPattern().Match(license);
|
||||
if (gplMatch.Success)
|
||||
{
|
||||
var version = gplMatch.Groups["version"].Value;
|
||||
var orLater = gplMatch.Groups["orlater"].Success;
|
||||
|
||||
return version switch
|
||||
{
|
||||
"2" or "2.0" => orLater ? "GPL-2.0-or-later" : "GPL-2.0-only",
|
||||
"3" or "3.0" => orLater ? "GPL-3.0-or-later" : "GPL-3.0-only",
|
||||
_ => "GPL-3.0-only"
|
||||
};
|
||||
}
|
||||
|
||||
// LGPL pattern
|
||||
var lgplMatch = LgplPattern().Match(license);
|
||||
if (lgplMatch.Success)
|
||||
{
|
||||
var version = lgplMatch.Groups["version"].Value;
|
||||
return version switch
|
||||
{
|
||||
"2" or "2.0" => "LGPL-2.0-only",
|
||||
"2.1" => "LGPL-2.1-only",
|
||||
"3" or "3.0" => "LGPL-3.0-only",
|
||||
_ => "LGPL-3.0-only"
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string NormalizeLicenseString(string license)
|
||||
{
|
||||
// Remove common noise
|
||||
var result = license
|
||||
.Replace("the ", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" license", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" License", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace("(", "")
|
||||
.Replace(")", "")
|
||||
.Trim();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static bool IsValidSpdxExpression(string expression)
|
||||
{
|
||||
// Basic validation - SPDX expressions use AND, OR, WITH, parentheses
|
||||
if (string.IsNullOrWhiteSpace(expression))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must contain valid SPDX identifier characters
|
||||
return SpdxExpressionPattern().IsMatch(expression);
|
||||
}
|
||||
|
||||
private static bool IsPlausibleLicenseName(string text)
|
||||
{
|
||||
// Filter out things that are definitely not license names
|
||||
if (text.Length > 100 || text.Length < 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip if it looks like a URL
|
||||
if (text.Contains("://") || text.Contains("www."))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip if it's a full paragraph
|
||||
if (text.Contains('\n') || text.Split(' ').Length > 10)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static string SanitizeForSpdx(string text)
|
||||
{
|
||||
// SPDX LicenseRef identifiers can only contain alphanumeric, ".", "-"
|
||||
var sanitized = new char[text.Length];
|
||||
for (int i = 0; i < text.Length; i++)
|
||||
{
|
||||
var c = text[i];
|
||||
if (char.IsLetterOrDigit(c) || c == '.' || c == '-')
|
||||
{
|
||||
sanitized[i] = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
sanitized[i] = '-';
|
||||
}
|
||||
}
|
||||
|
||||
return new string(sanitized).Trim('-');
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"^MIT(\s|$)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex MitPattern();
|
||||
|
||||
[GeneratedRegex(@"Apache\s*(Software\s*)?(License\s*)?(Version\s*)?(2\.?0?)?", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex ApachePattern();
|
||||
|
||||
[GeneratedRegex(@"BSD[\s\-]?(?<clauses>[234])?\s*[\-]?\s*Clause", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex BsdPattern();
|
||||
|
||||
[GeneratedRegex(@"(GNU\s*)?(General\s*)?Public\s*License[\s,]*(v|version)?[\s]*(?<version>[23](\.0)?)?(?<orlater>\+|\s*or\s*later)?", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex GplPattern();
|
||||
|
||||
[GeneratedRegex(@"(GNU\s*)?Lesser\s*(General\s*)?Public\s*License[\s,]*(v|version)?[\s]*(?<version>[23](\.0|\.1)?)?", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex LgplPattern();
|
||||
|
||||
[GeneratedRegex(@"^[A-Za-z0-9.\-\+ ]+(\s+(AND|OR|WITH)\s+[A-Za-z0-9.\-\+ ]+)*$", RegexOptions.Compiled)]
|
||||
private static partial Regex SpdxExpressionPattern();
|
||||
}
|
||||
@@ -0,0 +1,524 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Vendoring;
|
||||
|
||||
/// <summary>
|
||||
/// Detects vendored (bundled) packages inside Python packages.
|
||||
/// Python's equivalent of Java's shaded JAR detection.
|
||||
/// Common patterns: pip._vendor, requests.packages, certifi bundled certs.
|
||||
/// </summary>
|
||||
internal static partial class VendoredPackageDetector
|
||||
{
|
||||
/// <summary>
|
||||
/// Common vendoring directory patterns.
|
||||
/// </summary>
|
||||
private static readonly string[] VendorDirectoryPatterns =
|
||||
[
|
||||
"_vendor",
|
||||
"_vendored",
|
||||
"vendor",
|
||||
"vendored",
|
||||
"extern",
|
||||
"external",
|
||||
"third_party",
|
||||
"thirdparty",
|
||||
"packages", // Old requests pattern
|
||||
"lib", // Sometimes used for bundled libs
|
||||
"bundled"
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Well-known vendored packages in the Python ecosystem.
|
||||
/// Maps parent package to expected vendored packages.
|
||||
/// </summary>
|
||||
private static readonly IReadOnlyDictionary<string, string[]> KnownVendoredPackages =
|
||||
new Dictionary<string, string[]>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["pip"] = ["certifi", "chardet", "colorama", "distlib", "html5lib", "idna", "msgpack",
|
||||
"packaging", "pep517", "pkg_resources", "platformdirs", "pygments", "pyparsing",
|
||||
"requests", "resolvelib", "rich", "setuptools", "six", "tenacity", "tomli",
|
||||
"truststore", "typing_extensions", "urllib3", "webencodings"],
|
||||
["setuptools"] = ["more_itertools", "ordered_set", "packaging", "pyparsing"],
|
||||
["requests"] = ["urllib3", "chardet", "idna", "certifi"],
|
||||
["urllib3"] = ["six"],
|
||||
["virtualenv"] = ["distlib", "filelock", "platformdirs", "six"],
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes a package for vendored dependencies.
|
||||
/// </summary>
|
||||
public static async Task<VendoringAnalysis> AnalyzeAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonPackageInfo package,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(vfs);
|
||||
ArgumentNullException.ThrowIfNull(package);
|
||||
|
||||
var markers = new List<string>();
|
||||
var embeddedPackages = new List<EmbeddedPackage>();
|
||||
var vendoredPaths = new List<string>();
|
||||
|
||||
// Get package installation directory
|
||||
var packageDir = GetPackageDirectory(package);
|
||||
if (string.IsNullOrEmpty(packageDir))
|
||||
{
|
||||
return VendoringAnalysis.NotVendored(package.Name);
|
||||
}
|
||||
|
||||
// Scan for vendor directories
|
||||
foreach (var vendorPattern in VendorDirectoryPatterns)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var vendorPaths = await FindVendorDirectoriesAsync(vfs, packageDir, vendorPattern, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
foreach (var vendorPath in vendorPaths)
|
||||
{
|
||||
markers.Add($"vendor-directory:{vendorPattern}");
|
||||
vendoredPaths.Add(vendorPath);
|
||||
|
||||
// Extract embedded package info
|
||||
var embedded = await ExtractEmbeddedPackagesAsync(vfs, vendorPath, package.Name, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
embeddedPackages.AddRange(embedded);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for well-known vendored packages
|
||||
if (KnownVendoredPackages.TryGetValue(package.NormalizedName, out var expectedVendored))
|
||||
{
|
||||
var foundExpected = embeddedPackages
|
||||
.Where(e => expectedVendored.Contains(e.Name, StringComparer.OrdinalIgnoreCase))
|
||||
.Select(e => e.Name)
|
||||
.ToList();
|
||||
|
||||
if (foundExpected.Count > 0)
|
||||
{
|
||||
markers.Add("known-vendored-package");
|
||||
}
|
||||
}
|
||||
|
||||
// Check RECORD file for vendor paths
|
||||
if (package.RecordFiles.Length > 0)
|
||||
{
|
||||
var vendorRecords = package.RecordFiles
|
||||
.Where(r => VendorDirectoryPatterns.Any(p =>
|
||||
r.Path.Contains($"/{p}/", StringComparison.OrdinalIgnoreCase) ||
|
||||
r.Path.Contains($"\\{p}\\", StringComparison.OrdinalIgnoreCase)))
|
||||
.ToList();
|
||||
|
||||
if (vendorRecords.Count > 0)
|
||||
{
|
||||
markers.Add("record-vendor-entries");
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate confidence
|
||||
var confidence = CalculateConfidence(markers, embeddedPackages.Count);
|
||||
|
||||
return new VendoringAnalysis(
|
||||
package.Name,
|
||||
confidence >= VendoringConfidence.Low, // Any confidence > None indicates vendoring
|
||||
confidence,
|
||||
[.. markers.Distinct().OrderBy(m => m, StringComparer.Ordinal)],
|
||||
[.. embeddedPackages.OrderBy(e => e.Name, StringComparer.Ordinal)],
|
||||
[.. vendoredPaths.Distinct().OrderBy(p => p, StringComparer.Ordinal)]);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes all packages in a discovery result for vendoring.
|
||||
/// </summary>
|
||||
public static async Task<ImmutableArray<VendoringAnalysis>> AnalyzeAllAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonPackageDiscoveryResult discoveryResult,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var results = new List<VendoringAnalysis>();
|
||||
|
||||
foreach (var package in discoveryResult.Packages)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var analysis = await AnalyzeAsync(vfs, package, cancellationToken).ConfigureAwait(false);
|
||||
if (analysis.IsVendored)
|
||||
{
|
||||
results.Add(analysis);
|
||||
}
|
||||
}
|
||||
|
||||
return [.. results];
|
||||
}
|
||||
|
||||
private static string? GetPackageDirectory(PythonPackageInfo package)
|
||||
{
|
||||
// The package module directory is typically in the same directory as the dist-info,
|
||||
// with the same name as the package (normalized to lowercase with underscores).
|
||||
// E.g., dist-info at "site-packages/pip-23.0.dist-info" means package at "site-packages/pip/"
|
||||
|
||||
string? baseDir = null;
|
||||
|
||||
if (!string.IsNullOrEmpty(package.MetadataPath))
|
||||
{
|
||||
// Get the directory containing dist-info (usually site-packages)
|
||||
baseDir = Path.GetDirectoryName(package.MetadataPath);
|
||||
}
|
||||
else if (!string.IsNullOrEmpty(package.Location))
|
||||
{
|
||||
baseDir = package.Location;
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(baseDir))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// The package directory is baseDir + package module name
|
||||
// Use the first top-level module if available, otherwise use the normalized package name
|
||||
var moduleName = package.TopLevelModules.Length > 0
|
||||
? package.TopLevelModules[0]
|
||||
: package.NormalizedName;
|
||||
|
||||
return Path.Combine(baseDir, moduleName).Replace('\\', '/');
|
||||
}
|
||||
|
||||
private static async Task<List<string>> FindVendorDirectoriesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string baseDir,
|
||||
string vendorPattern,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var results = new List<string>();
|
||||
|
||||
try
|
||||
{
|
||||
// Check for direct vendor directory under package
|
||||
foreach (var file in vfs.Files)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var relativePath = GetRelativePath(baseDir, file.VirtualPath);
|
||||
if (string.IsNullOrEmpty(relativePath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look for vendor directory pattern in path
|
||||
var parts = relativePath.Split(['/', '\\'], StringSplitOptions.RemoveEmptyEntries);
|
||||
for (int i = 0; i < parts.Length - 1; i++)
|
||||
{
|
||||
if (string.Equals(parts[i], vendorPattern, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Found vendor directory
|
||||
var vendorPath = string.Join("/", parts.Take(i + 1));
|
||||
var fullVendorPath = Path.Combine(baseDir, vendorPath).Replace('\\', '/');
|
||||
|
||||
if (!results.Contains(fullVendorPath, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
results.Add(fullVendorPath);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// Ignore errors during directory scanning
|
||||
}
|
||||
|
||||
await Task.CompletedTask; // Keep async signature for future enhancements
|
||||
return results;
|
||||
}
|
||||
|
||||
private static async Task<List<EmbeddedPackage>> ExtractEmbeddedPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string vendorPath,
|
||||
string parentPackage,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var packages = new List<EmbeddedPackage>();
|
||||
var seenPackages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
try
|
||||
{
|
||||
// Find all Python packages in vendor directory
|
||||
foreach (var file in vfs.Files)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (!file.VirtualPath.StartsWith(vendorPath, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var relativePath = file.VirtualPath[(vendorPath.Length + 1)..];
|
||||
var parts = relativePath.Split(['/', '\\'], StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
if (parts.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the package name (first directory or .py file)
|
||||
var packageName = parts[0];
|
||||
|
||||
// Handle .py files (single-file modules)
|
||||
if (packageName.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
packageName = packageName[..^3];
|
||||
}
|
||||
|
||||
// Skip __pycache__ and other internal directories
|
||||
if (packageName.StartsWith("__") || packageName.StartsWith("."))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!seenPackages.Add(packageName))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to extract version from __init__.py or version.py
|
||||
var version = await ExtractVersionAsync(vfs, vendorPath, packageName, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
// Try to find license
|
||||
var license = await ExtractLicenseAsync(vfs, vendorPath, packageName, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
packages.Add(new EmbeddedPackage(
|
||||
packageName,
|
||||
version,
|
||||
license,
|
||||
Path.Combine(vendorPath, packageName).Replace('\\', '/'),
|
||||
parentPackage));
|
||||
}
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// Ignore errors during extraction
|
||||
}
|
||||
|
||||
return packages;
|
||||
}
|
||||
|
||||
private static async Task<string?> ExtractVersionAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string vendorPath,
|
||||
string packageName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// Common locations for version information
|
||||
var versionFiles = new[]
|
||||
{
|
||||
$"{vendorPath}/{packageName}/__init__.py",
|
||||
$"{vendorPath}/{packageName}/_version.py",
|
||||
$"{vendorPath}/{packageName}/version.py",
|
||||
$"{vendorPath}/{packageName}/__version__.py"
|
||||
};
|
||||
|
||||
foreach (var versionFile in versionFiles)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(versionFile, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null) continue;
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Look for __version__ = "x.y.z"
|
||||
var match = VersionPattern().Match(content);
|
||||
if (match.Success)
|
||||
{
|
||||
return match.Groups["version"].Value;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Continue to next file
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static async Task<string?> ExtractLicenseAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string vendorPath,
|
||||
string packageName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// Common license file locations
|
||||
var licenseFiles = new[]
|
||||
{
|
||||
$"{vendorPath}/{packageName}/LICENSE",
|
||||
$"{vendorPath}/{packageName}/LICENSE.txt",
|
||||
$"{vendorPath}/{packageName}/LICENSE.md",
|
||||
$"{vendorPath}/{packageName}/COPYING"
|
||||
};
|
||||
|
||||
foreach (var licenseFile in licenseFiles)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(licenseFile, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null) continue;
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var firstLine = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Try to identify license from content
|
||||
if (firstLine?.Contains("MIT", StringComparison.OrdinalIgnoreCase) == true)
|
||||
{
|
||||
return "MIT";
|
||||
}
|
||||
if (firstLine?.Contains("Apache", StringComparison.OrdinalIgnoreCase) == true)
|
||||
{
|
||||
return "Apache-2.0";
|
||||
}
|
||||
if (firstLine?.Contains("BSD", StringComparison.OrdinalIgnoreCase) == true)
|
||||
{
|
||||
return "BSD-3-Clause";
|
||||
}
|
||||
|
||||
return "Unknown (license file present)";
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Continue to next file
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? GetRelativePath(string basePath, string fullPath)
|
||||
{
|
||||
basePath = basePath.Replace('\\', '/').TrimEnd('/');
|
||||
fullPath = fullPath.Replace('\\', '/');
|
||||
|
||||
if (fullPath.StartsWith(basePath + "/", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return fullPath[(basePath.Length + 1)..];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static VendoringConfidence CalculateConfidence(List<string> markers, int embeddedCount)
|
||||
{
|
||||
var score = 0;
|
||||
|
||||
// Strong indicators
|
||||
if (markers.Contains("known-vendored-package")) score += 3;
|
||||
if (markers.Contains("record-vendor-entries")) score += 2;
|
||||
|
||||
// Vendor directory presence
|
||||
var vendorDirs = markers.Count(m => m.StartsWith("vendor-directory:"));
|
||||
score += vendorDirs;
|
||||
|
||||
// Embedded package count
|
||||
if (embeddedCount > 5) score += 2;
|
||||
else if (embeddedCount > 1) score += 1;
|
||||
|
||||
return score switch
|
||||
{
|
||||
>= 4 => VendoringConfidence.High,
|
||||
>= 2 => VendoringConfidence.Medium,
|
||||
>= 1 => VendoringConfidence.Low,
|
||||
_ => VendoringConfidence.None
|
||||
};
|
||||
}
|
||||
|
||||
// Pattern to match __version__ = "x.y.z" or VERSION = "x.y.z"
|
||||
[GeneratedRegex(
|
||||
@"(?:__version__|VERSION)\s*=\s*['""](?<version>[^'""]+)['""]",
|
||||
RegexOptions.Compiled)]
|
||||
private static partial Regex VersionPattern();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of vendoring analysis for a single package.
|
||||
/// </summary>
|
||||
internal sealed record VendoringAnalysis(
|
||||
string PackageName,
|
||||
bool IsVendored,
|
||||
VendoringConfidence Confidence,
|
||||
ImmutableArray<string> Markers,
|
||||
ImmutableArray<EmbeddedPackage> EmbeddedPackages,
|
||||
ImmutableArray<string> VendorPaths)
|
||||
{
|
||||
public static VendoringAnalysis NotVendored(string packageName) => new(
|
||||
packageName,
|
||||
false,
|
||||
VendoringConfidence.None,
|
||||
[],
|
||||
[],
|
||||
[]);
|
||||
|
||||
/// <summary>
|
||||
/// Returns the count of embedded packages.
|
||||
/// </summary>
|
||||
public int EmbeddedCount => EmbeddedPackages.Length;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the embedded packages as a comma-separated list.
|
||||
/// </summary>
|
||||
public string GetEmbeddedPackageList()
|
||||
=> string.Join(",", EmbeddedPackages.Select(p => p.NameWithVersion));
|
||||
|
||||
/// <summary>
|
||||
/// Gets PURLs for all embedded packages.
|
||||
/// </summary>
|
||||
public IEnumerable<string> GetEmbeddedPurls()
|
||||
=> EmbeddedPackages.Select(p => p.Purl);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a package embedded/vendored inside another package.
|
||||
/// </summary>
|
||||
internal sealed record EmbeddedPackage(
|
||||
string Name,
|
||||
string? Version,
|
||||
string? License,
|
||||
string Path,
|
||||
string ParentPackage)
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns the name with version if available.
|
||||
/// </summary>
|
||||
public string NameWithVersion => Version is not null ? $"{Name}@{Version}" : Name;
|
||||
|
||||
/// <summary>
|
||||
/// Returns the PURL for this embedded package.
|
||||
/// </summary>
|
||||
public string Purl => Version is not null
|
||||
? $"pkg:pypi/{NormalizeName(Name)}@{Version}"
|
||||
: $"pkg:pypi/{NormalizeName(Name)}";
|
||||
|
||||
/// <summary>
|
||||
/// Returns a qualified name including the parent package.
|
||||
/// </summary>
|
||||
public string QualifiedName => $"{ParentPackage}._vendor.{Name}";
|
||||
|
||||
private static string NormalizeName(string name) =>
|
||||
name.ToLowerInvariant().Replace('_', '-');
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for vendoring detection.
|
||||
/// </summary>
|
||||
internal enum VendoringConfidence
|
||||
{
|
||||
None = 0,
|
||||
Low = 1,
|
||||
Medium = 2,
|
||||
High = 3
|
||||
}
|
||||
Reference in New Issue
Block a user