Add comprehensive tests for Go and Python version conflict detection and licensing normalization

- Implemented GoVersionConflictDetectorTests to validate pseudo-version detection, conflict analysis, and conflict retrieval for Go modules.
- Created VersionConflictDetectorTests for Python to assess conflict detection across various version scenarios, including major, minor, and patch differences.
- Added SpdxLicenseNormalizerTests to ensure accurate normalization of SPDX license strings and classifiers.
- Developed VendoredPackageDetectorTests to identify vendored packages and extract embedded packages from Python packages, including handling of vendor directories and known vendored packages.
This commit is contained in:
StellaOps Bot
2025-12-07 01:51:37 +02:00
parent 98934170ca
commit e0f6efecce
66 changed files with 7591 additions and 451 deletions

View File

@@ -134,7 +134,7 @@ internal static partial class BunConfigHelper
scopeRegistries.ToImmutableDictionary(StringComparer.Ordinal));
}
private static string StripQuotes(string value)
internal static string StripQuotes(string value)
{
if (value.Length >= 2)
{
@@ -148,7 +148,7 @@ internal static partial class BunConfigHelper
return value;
}
private static string? ExtractRegistryUrl(string value)
internal static string? ExtractRegistryUrl(string value)
{
// Simple case: just a URL string
if (value.StartsWith("http", StringComparison.OrdinalIgnoreCase))

View File

@@ -104,7 +104,7 @@ internal static class BunLockParser
}
}
private static (string Name, string Version) ParsePackageKey(string key)
internal static (string Name, string Version) ParsePackageKey(string key)
{
// Format: name@version or @scope/name@version
// Need to find the last @ that is not at position 0 (for scoped packages)
@@ -219,7 +219,7 @@ internal static class BunLockParser
/// <summary>
/// Classifies the resolved URL to detect git, tarball, file, or npm sources.
/// </summary>
private static (string SourceType, string? GitCommit, string? Specifier) ClassifyResolvedUrl(string? resolved)
internal static (string SourceType, string? GitCommit, string? Specifier) ClassifyResolvedUrl(string? resolved)
{
if (string.IsNullOrEmpty(resolved))
{
@@ -277,7 +277,7 @@ internal static class BunLockParser
/// <summary>
/// Extracts git commit hash from a git URL (after # or @).
/// </summary>
private static string? ExtractGitCommit(string url)
internal static string? ExtractGitCommit(string url)
{
// Format: git+https://github.com/user/repo#commit
// or: github:user/repo#tag

View File

@@ -8,6 +8,10 @@
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<InternalsVisibleTo Include="StellaOps.Scanner.Analyzers.Lang.Bun.Tests" />
</ItemGroup>
<ItemGroup>
<Compile Include="**\*.cs" Exclude="obj\**;bin\**" />
<EmbeddedResource Include="**\*.json" Exclude="obj\**;bin\**" />

View File

@@ -145,7 +145,7 @@ internal static class NuGetConfigParser
case "username":
username = value;
break;
case "clearTextPassword":
case "cleartextpassword":
password = value;
isClearTextPassword = true;
break;

View File

@@ -175,6 +175,47 @@ public sealed class GoLanguageAnalyzer : ILanguageAnalyzer
metadata["workspace"] = "true";
}
// Add license metadata
if (!string.IsNullOrEmpty(inventory.License))
{
metadata["license"] = inventory.License;
}
// Add CGO metadata
if (!inventory.CgoAnalysis.IsEmpty)
{
metadata["cgo.enabled"] = inventory.CgoAnalysis.HasCgoImport ? "true" : "false";
var cflags = inventory.CgoAnalysis.GetCFlags();
if (!string.IsNullOrEmpty(cflags))
{
metadata["cgo.cflags"] = cflags;
}
var ldflags = inventory.CgoAnalysis.GetLdFlags();
if (!string.IsNullOrEmpty(ldflags))
{
metadata["cgo.ldflags"] = ldflags;
}
if (inventory.CgoAnalysis.NativeLibraries.Length > 0)
{
metadata["cgo.nativeLibs"] = string.Join(",", inventory.CgoAnalysis.NativeLibraries.Take(10));
}
if (inventory.CgoAnalysis.IncludedHeaders.Length > 0)
{
metadata["cgo.headers"] = string.Join(",", inventory.CgoAnalysis.IncludedHeaders.Take(10));
}
}
// Add conflict summary for main module
if (inventory.ConflictAnalysis.HasConflicts)
{
metadata["conflict.count"] = inventory.ConflictAnalysis.Conflicts.Length.ToString();
metadata["conflict.maxSeverity"] = inventory.ConflictAnalysis.MaxSeverity.ToString().ToLowerInvariant();
}
var evidence = new List<LanguageComponentEvidence>();
if (!string.IsNullOrEmpty(goModRelative))
@@ -187,6 +228,17 @@ public sealed class GoLanguageAnalyzer : ILanguageAnalyzer
null));
}
// Add CGO file evidence
foreach (var cgoFile in inventory.CgoAnalysis.CgoFiles.Take(5))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"cgo-source",
cgoFile,
"import \"C\"",
null));
}
evidence.Sort(static (l, r) => string.CompareOrdinal(l.ComparisonKey, r.ComparisonKey));
// Main module typically has (devel) as version in source context
@@ -281,6 +333,37 @@ public sealed class GoLanguageAnalyzer : ILanguageAnalyzer
metadata["excluded"] = "true";
}
// Add license metadata
if (!string.IsNullOrEmpty(module.License))
{
metadata["license"] = module.License;
if (module.LicenseConfidence != GoLicenseDetector.LicenseConfidence.None)
{
metadata["license.confidence"] = module.LicenseConfidence.ToString().ToLowerInvariant();
}
}
// Add pseudo-version indicator
if (module.IsPseudoVersion)
{
metadata["pseudoVersion"] = "true";
}
// Add conflict metadata for this specific module
var conflict = inventory.ConflictAnalysis.GetConflict(module.Path);
if (conflict is not null)
{
metadata["conflict.detected"] = "true";
metadata["conflict.severity"] = conflict.Severity.ToString().ToLowerInvariant();
metadata["conflict.type"] = conflict.ConflictType.ToString();
var otherVersions = conflict.OtherVersions.Take(5).ToList();
if (otherVersions.Count > 0)
{
metadata["conflict.otherVersions"] = string.Join(",", otherVersions);
}
}
var evidence = new List<LanguageComponentEvidence>();
// Evidence from go.mod
@@ -428,6 +511,28 @@ public sealed class GoLanguageAnalyzer : ILanguageAnalyzer
AddIfMissing(entries, "build.vcs.modified", dwarf.Modified?.ToString()?.ToLowerInvariant());
AddIfMissing(entries, "build.vcs.time", dwarf.TimestampUtc);
}
// Extract explicit CGO metadata from build settings
var cgoSettings = GoCgoDetector.ExtractFromBuildSettings(buildInfo.Settings);
if (cgoSettings.CgoEnabled)
{
AddIfMissing(entries, "cgo.enabled", "true");
AddIfMissing(entries, "cgo.cflags", cgoSettings.CgoFlags);
AddIfMissing(entries, "cgo.ldflags", cgoSettings.CgoLdFlags);
AddIfMissing(entries, "cgo.cc", cgoSettings.CCompiler);
AddIfMissing(entries, "cgo.cxx", cgoSettings.CxxCompiler);
}
// Scan for native libraries alongside the binary
var binaryDir = Path.GetDirectoryName(buildInfo.AbsoluteBinaryPath);
if (!string.IsNullOrEmpty(binaryDir))
{
var nativeLibs = GoCgoDetector.ScanForNativeLibraries(binaryDir);
if (nativeLibs.Count > 0)
{
AddIfMissing(entries, "cgo.nativeLibs", string.Join(",", nativeLibs.Take(10)));
}
}
}
entries.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));

View File

@@ -0,0 +1,398 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
/// <summary>
/// Detects CGO usage in Go modules and binaries.
/// Equivalent to Java's JNI detection for native code integration.
/// </summary>
internal static partial class GoCgoDetector
{
/// <summary>
/// Native library file extensions.
/// </summary>
private static readonly string[] NativeLibraryExtensions =
[
".so", // Linux shared library
".dll", // Windows dynamic link library
".dylib", // macOS dynamic library
".a", // Static library (archive)
".lib", // Windows static library
];
/// <summary>
/// Result of CGO analysis for a Go module.
/// </summary>
public sealed record CgoAnalysisResult
{
public static readonly CgoAnalysisResult Empty = new(
false,
ImmutableArray<string>.Empty,
ImmutableArray<CgoDirective>.Empty,
ImmutableArray<string>.Empty,
ImmutableArray<string>.Empty);
public CgoAnalysisResult(
bool hasCgoImport,
ImmutableArray<string> cgoFiles,
ImmutableArray<CgoDirective> directives,
ImmutableArray<string> nativeLibraries,
ImmutableArray<string> includedHeaders)
{
HasCgoImport = hasCgoImport;
CgoFiles = cgoFiles;
Directives = directives;
NativeLibraries = nativeLibraries;
IncludedHeaders = includedHeaders;
}
/// <summary>
/// True if any Go file imports "C".
/// </summary>
public bool HasCgoImport { get; }
/// <summary>
/// List of Go files containing CGO imports.
/// </summary>
public ImmutableArray<string> CgoFiles { get; }
/// <summary>
/// Parsed #cgo directives from source files.
/// </summary>
public ImmutableArray<CgoDirective> Directives { get; }
/// <summary>
/// Native libraries found alongside Go source/binary.
/// </summary>
public ImmutableArray<string> NativeLibraries { get; }
/// <summary>
/// C headers included in cgo preamble.
/// </summary>
public ImmutableArray<string> IncludedHeaders { get; }
/// <summary>
/// Returns true if any CGO usage was detected.
/// </summary>
public bool IsEmpty => !HasCgoImport && CgoFiles.IsEmpty && NativeLibraries.IsEmpty;
/// <summary>
/// Gets CFLAGS from directives.
/// </summary>
public string? GetCFlags()
=> GetDirectiveValues("CFLAGS");
/// <summary>
/// Gets LDFLAGS from directives.
/// </summary>
public string? GetLdFlags()
=> GetDirectiveValues("LDFLAGS");
/// <summary>
/// Gets pkg-config packages from directives.
/// </summary>
public string? GetPkgConfig()
=> GetDirectiveValues("pkg-config");
private string? GetDirectiveValues(string directiveType)
{
var values = Directives
.Where(d => d.Type.Equals(directiveType, StringComparison.OrdinalIgnoreCase))
.Select(d => d.Value)
.Where(v => !string.IsNullOrWhiteSpace(v))
.Distinct(StringComparer.Ordinal)
.ToList();
return values.Count > 0 ? string.Join(" ", values) : null;
}
}
/// <summary>
/// Represents a parsed #cgo directive.
/// </summary>
public sealed record CgoDirective(
string Type,
string Value,
string? Constraint,
string SourceFile);
/// <summary>
/// Analyzes a Go module directory for CGO usage.
/// </summary>
public static CgoAnalysisResult AnalyzeModule(string modulePath)
{
ArgumentException.ThrowIfNullOrWhiteSpace(modulePath);
if (!Directory.Exists(modulePath))
{
return CgoAnalysisResult.Empty;
}
var cgoFiles = new List<string>();
var directives = new List<CgoDirective>();
var headers = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var nativeLibs = new List<string>();
// Scan for .go files with CGO imports
var goFiles = EnumerateGoFiles(modulePath);
foreach (var goFile in goFiles)
{
var result = AnalyzeGoFile(goFile);
if (result.HasCgoImport)
{
cgoFiles.Add(Path.GetRelativePath(modulePath, goFile));
directives.AddRange(result.Directives);
foreach (var header in result.Headers)
{
headers.Add(header);
}
}
}
// Scan for native libraries
nativeLibs.AddRange(ScanForNativeLibraries(modulePath));
return new CgoAnalysisResult(
cgoFiles.Count > 0,
[.. cgoFiles.OrderBy(f => f, StringComparer.Ordinal)],
[.. directives],
[.. nativeLibs.Distinct().OrderBy(l => l, StringComparer.Ordinal)],
[.. headers.OrderBy(h => h, StringComparer.Ordinal)]);
}
/// <summary>
/// Extracts CGO settings from build info settings.
/// </summary>
public static CgoBuildSettings ExtractFromBuildSettings(
IEnumerable<KeyValuePair<string, string?>> settings)
{
ArgumentNullException.ThrowIfNull(settings);
string? cgoEnabled = null;
string? cgoFlags = null;
string? cgoLdFlags = null;
string? ccCompiler = null;
string? cxxCompiler = null;
foreach (var setting in settings)
{
switch (setting.Key)
{
case "CGO_ENABLED":
cgoEnabled = setting.Value;
break;
case "CGO_CFLAGS":
cgoFlags = setting.Value;
break;
case "CGO_LDFLAGS":
cgoLdFlags = setting.Value;
break;
case "CC":
ccCompiler = setting.Value;
break;
case "CXX":
cxxCompiler = setting.Value;
break;
}
}
return new CgoBuildSettings(
cgoEnabled?.Equals("1", StringComparison.Ordinal) == true,
cgoFlags,
cgoLdFlags,
ccCompiler,
cxxCompiler);
}
/// <summary>
/// Scans for native libraries in a directory (alongside a binary).
/// </summary>
public static IReadOnlyList<string> ScanForNativeLibraries(string directoryPath)
{
if (!Directory.Exists(directoryPath))
{
return [];
}
var libraries = new List<string>();
try
{
foreach (var file in Directory.EnumerateFiles(directoryPath, "*", SearchOption.TopDirectoryOnly))
{
var extension = Path.GetExtension(file);
if (NativeLibraryExtensions.Any(ext =>
extension.Equals(ext, StringComparison.OrdinalIgnoreCase)))
{
libraries.Add(Path.GetFileName(file));
}
}
}
catch (IOException)
{
// Skip inaccessible directories
}
catch (UnauthorizedAccessException)
{
// Skip inaccessible directories
}
return libraries;
}
private static IEnumerable<string> EnumerateGoFiles(string rootPath)
{
var options = new EnumerationOptions
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
MaxRecursionDepth = 10,
};
foreach (var file in Directory.EnumerateFiles(rootPath, "*.go", options))
{
// Skip test files and vendor directory
if (file.EndsWith("_test.go", StringComparison.OrdinalIgnoreCase))
{
continue;
}
if (file.Contains($"{Path.DirectorySeparatorChar}vendor{Path.DirectorySeparatorChar}") ||
file.Contains($"{Path.AltDirectorySeparatorChar}vendor{Path.AltDirectorySeparatorChar}"))
{
continue;
}
yield return file;
}
}
private static GoFileAnalysisResult AnalyzeGoFile(string filePath)
{
try
{
var content = File.ReadAllText(filePath);
return AnalyzeGoFileContent(content, filePath);
}
catch (IOException)
{
return GoFileAnalysisResult.Empty;
}
catch (UnauthorizedAccessException)
{
return GoFileAnalysisResult.Empty;
}
}
internal static GoFileAnalysisResult AnalyzeGoFileContent(string content, string filePath)
{
if (string.IsNullOrWhiteSpace(content))
{
return GoFileAnalysisResult.Empty;
}
// Check for import "C"
var hasCgoImport = CgoImportPattern().IsMatch(content);
if (!hasCgoImport)
{
return GoFileAnalysisResult.Empty;
}
var directives = new List<CgoDirective>();
var headers = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
// Find the cgo preamble (comment block before import "C")
var preambleMatch = CgoPreamblePattern().Match(content);
if (preambleMatch.Success)
{
var preamble = preambleMatch.Groups[1].Value;
// Parse #cgo directives
foreach (Match directiveMatch in CgoDirectivePattern().Matches(preamble))
{
var constraint = directiveMatch.Groups[1].Success
? directiveMatch.Groups[1].Value.Trim()
: null;
var directiveType = directiveMatch.Groups[2].Value.Trim();
var directiveValue = directiveMatch.Groups[3].Value.Trim();
directives.Add(new CgoDirective(
directiveType,
directiveValue,
constraint,
filePath));
}
// Parse #include directives for headers
foreach (Match includeMatch in CIncludePattern().Matches(preamble))
{
var header = includeMatch.Groups[1].Value;
if (!string.IsNullOrWhiteSpace(header))
{
headers.Add(header);
}
}
}
return new GoFileAnalysisResult(true, directives, headers.ToList());
}
internal sealed record GoFileAnalysisResult(
bool HasCgoImport,
List<CgoDirective> Directives,
List<string> Headers)
{
public static readonly GoFileAnalysisResult Empty = new(false, [], []);
}
/// <summary>
/// CGO build settings extracted from binary build info.
/// </summary>
public sealed record CgoBuildSettings(
bool CgoEnabled,
string? CgoFlags,
string? CgoLdFlags,
string? CCompiler,
string? CxxCompiler)
{
public static readonly CgoBuildSettings Empty = new(false, null, null, null, null);
/// <summary>
/// Returns true if CGO is enabled.
/// </summary>
public bool IsEmpty => !CgoEnabled &&
string.IsNullOrEmpty(CgoFlags) &&
string.IsNullOrEmpty(CgoLdFlags);
}
// Regex patterns
/// <summary>
/// Matches: import "C" or import ( ... "C" ... )
/// </summary>
[GeneratedRegex(@"import\s*(?:\(\s*)?""C""", RegexOptions.Multiline)]
private static partial Regex CgoImportPattern();
/// <summary>
/// Matches the cgo preamble comment block before import "C".
/// </summary>
[GeneratedRegex(@"/\*\s*((?:#.*?\n|.*?\n)*?)\s*\*/\s*import\s*""C""", RegexOptions.Singleline)]
private static partial Regex CgoPreamblePattern();
/// <summary>
/// Matches #cgo directives with optional build constraints.
/// Format: #cgo [GOOS GOARCH] DIRECTIVE: value
/// </summary>
[GeneratedRegex(@"#cgo\s+(?:([a-z0-9_,!\s]+)\s+)?(\w+):\s*(.+?)(?=\n|$)", RegexOptions.Multiline | RegexOptions.IgnoreCase)]
private static partial Regex CgoDirectivePattern();
/// <summary>
/// Matches C #include directives.
/// </summary>
[GeneratedRegex(@"#include\s*[<""]([^>""]+)[>""]", RegexOptions.Multiline)]
private static partial Regex CIncludePattern();
}

View File

@@ -0,0 +1,336 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
/// <summary>
/// Detects and normalizes licenses for Go modules.
/// Scans LICENSE files and converts to SPDX identifiers.
/// </summary>
internal static partial class GoLicenseDetector
{
/// <summary>
/// Common license file names to scan.
/// </summary>
private static readonly string[] LicenseFileNames =
[
"LICENSE",
"LICENSE.txt",
"LICENSE.md",
"LICENSE.rst",
"LICENCE", // British spelling
"LICENCE.txt",
"LICENCE.md",
"COPYING",
"COPYING.txt",
"COPYING.md",
"MIT-LICENSE",
"MIT-LICENSE.txt",
"APACHE-LICENSE",
"APACHE-LICENSE.txt",
"APACHE-2.0.txt",
"UNLICENSE",
"UNLICENSE.txt",
];
/// <summary>
/// License patterns mapped to SPDX identifiers.
/// Order matters - more specific patterns first.
/// </summary>
private static readonly LicensePattern[] LicensePatterns =
[
// Apache variants
new("Apache-2.0", @"Apache License.*?(?:Version 2\.0|v2\.0)", "Apache License, Version 2.0"),
new("Apache-1.1", @"Apache License.*?(?:Version 1\.1|v1\.1)", "Apache License, Version 1.1"),
new("Apache-1.0", @"Apache License.*?(?:Version 1\.0|v1\.0)", "Apache License, Version 1.0"),
// MIT variants
new("MIT", @"(?:MIT License|Permission is hereby granted, free of charge)", "MIT License"),
new("MIT-0", @"MIT No Attribution", "MIT No Attribution"),
// BSD variants (order matters - check 3-clause before 2-clause)
new("BSD-3-Clause", @"BSD 3-Clause|Redistribution and use.*?3\. Neither the name", "BSD 3-Clause License"),
new("BSD-2-Clause", @"BSD 2-Clause|Redistribution and use.*?provided that the following conditions", "BSD 2-Clause License"),
new("BSD-3-Clause-Clear", @"BSD-3-Clause-Clear|clear BSD", "BSD 3-Clause Clear License"),
new("0BSD", @"Zero-Clause BSD|BSD Zero Clause", "BSD Zero Clause License"),
// GPL variants
new("GPL-3.0-only", @"GNU GENERAL PUBLIC LICENSE.*?Version 3", "GNU General Public License v3.0 only"),
new("GPL-3.0-or-later", @"GNU GENERAL PUBLIC LICENSE.*?Version 3.*?or \(at your option\) any later", "GNU General Public License v3.0 or later"),
new("GPL-2.0-only", @"GNU GENERAL PUBLIC LICENSE.*?Version 2(?!.*or later)", "GNU General Public License v2.0 only"),
new("GPL-2.0-or-later", @"GNU GENERAL PUBLIC LICENSE.*?Version 2.*?or \(at your option\) any later", "GNU General Public License v2.0 or later"),
// LGPL variants
new("LGPL-3.0-only", @"GNU LESSER GENERAL PUBLIC LICENSE.*?Version 3", "GNU Lesser General Public License v3.0 only"),
new("LGPL-2.1-only", @"GNU LESSER GENERAL PUBLIC LICENSE.*?Version 2\.1", "GNU Lesser General Public License v2.1 only"),
new("LGPL-2.0-only", @"GNU LIBRARY GENERAL PUBLIC LICENSE.*?Version 2", "GNU Library General Public License v2 only"),
// AGPL variants
new("AGPL-3.0-only", @"GNU AFFERO GENERAL PUBLIC LICENSE.*?Version 3", "GNU Affero General Public License v3.0 only"),
// Mozilla
new("MPL-2.0", @"Mozilla Public License.*?(?:Version 2\.0|v2\.0|2\.0)", "Mozilla Public License 2.0"),
new("MPL-1.1", @"Mozilla Public License.*?(?:Version 1\.1|v1\.1|1\.1)", "Mozilla Public License 1.1"),
// Creative Commons
new("CC-BY-4.0", @"Creative Commons Attribution 4\.0", "Creative Commons Attribution 4.0"),
new("CC-BY-SA-4.0", @"Creative Commons Attribution-ShareAlike 4\.0", "Creative Commons Attribution ShareAlike 4.0"),
new("CC0-1.0", @"CC0 1\.0|Creative Commons Zero", "Creative Commons Zero v1.0 Universal"),
// Other common licenses
new("ISC", @"ISC License|Permission to use, copy, modify, and/or distribute", "ISC License"),
new("Unlicense", @"This is free and unencumbered software released into the public domain", "The Unlicense"),
new("WTFPL", @"DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE", "Do What The F*ck You Want To Public License"),
new("Zlib", @"zlib License|This software is provided 'as-is'", "zlib License"),
new("BSL-1.0", @"Boost Software License", "Boost Software License 1.0"),
new("PostgreSQL", @"PostgreSQL License", "PostgreSQL License"),
new("BlueOak-1.0.0", @"Blue Oak Model License", "Blue Oak Model License 1.0.0"),
// Dual/multiple license indicators
new("MIT OR Apache-2.0", @"(?:MIT|Apache)[/\s]+(?:OR|AND|/)[/\s]+(?:Apache|MIT)", "MIT OR Apache-2.0"),
];
/// <summary>
/// Result of license detection for a module.
/// </summary>
public sealed record LicenseInfo(
string? SpdxIdentifier,
string? LicenseFile,
string? RawLicenseName,
LicenseConfidence Confidence)
{
public static readonly LicenseInfo Unknown = new(null, null, null, LicenseConfidence.None);
/// <summary>
/// Returns true if a license was detected.
/// </summary>
public bool IsDetected => !string.IsNullOrEmpty(SpdxIdentifier);
}
/// <summary>
/// Confidence level for license detection.
/// </summary>
public enum LicenseConfidence
{
/// <summary>No license detected.</summary>
None = 0,
/// <summary>Matched by heuristic or partial match.</summary>
Low = 1,
/// <summary>Matched by pattern with good confidence.</summary>
Medium = 2,
/// <summary>Exact SPDX identifier found or strong pattern match.</summary>
High = 3
}
/// <summary>
/// Detects license for a Go module at the given path.
/// </summary>
public static LicenseInfo DetectLicense(string modulePath)
{
ArgumentException.ThrowIfNullOrWhiteSpace(modulePath);
if (!Directory.Exists(modulePath))
{
return LicenseInfo.Unknown;
}
// Search for license files
foreach (var licenseFileName in LicenseFileNames)
{
var licensePath = Path.Combine(modulePath, licenseFileName);
if (File.Exists(licensePath))
{
var result = AnalyzeLicenseFile(licensePath);
if (result.IsDetected)
{
return result;
}
}
}
// Check for license in a docs subdirectory
var docsPath = Path.Combine(modulePath, "docs");
if (Directory.Exists(docsPath))
{
foreach (var licenseFileName in LicenseFileNames)
{
var licensePath = Path.Combine(docsPath, licenseFileName);
if (File.Exists(licensePath))
{
var result = AnalyzeLicenseFile(licensePath);
if (result.IsDetected)
{
return result;
}
}
}
}
return LicenseInfo.Unknown;
}
/// <summary>
/// Detects license for a vendored module.
/// </summary>
public static LicenseInfo DetectVendoredLicense(string vendorPath, string modulePath)
{
ArgumentException.ThrowIfNullOrWhiteSpace(vendorPath);
ArgumentException.ThrowIfNullOrWhiteSpace(modulePath);
// vendor/<module-path>/LICENSE
var vendoredModulePath = Path.Combine(vendorPath, modulePath.Replace('/', Path.DirectorySeparatorChar));
if (Directory.Exists(vendoredModulePath))
{
return DetectLicense(vendoredModulePath);
}
return LicenseInfo.Unknown;
}
/// <summary>
/// Analyzes a license file and returns detected license info.
/// </summary>
public static LicenseInfo AnalyzeLicenseFile(string filePath)
{
ArgumentException.ThrowIfNullOrWhiteSpace(filePath);
try
{
// Read first 8KB of file (should be enough for license detection)
var content = ReadFileHead(filePath, 8192);
if (string.IsNullOrWhiteSpace(content))
{
return LicenseInfo.Unknown;
}
return AnalyzeLicenseContent(content, filePath);
}
catch (IOException)
{
return LicenseInfo.Unknown;
}
catch (UnauthorizedAccessException)
{
return LicenseInfo.Unknown;
}
}
/// <summary>
/// Analyzes license content and returns detected license info.
/// </summary>
internal static LicenseInfo AnalyzeLicenseContent(string content, string? sourceFile = null)
{
if (string.IsNullOrWhiteSpace(content))
{
return LicenseInfo.Unknown;
}
// Check for explicit SPDX identifier first (highest confidence)
var spdxMatch = SpdxIdentifierPattern().Match(content);
if (spdxMatch.Success)
{
var spdxId = spdxMatch.Groups[1].Value.Trim();
return new LicenseInfo(spdxId, sourceFile, spdxId, LicenseConfidence.High);
}
// Try pattern matching
foreach (var pattern in LicensePatterns)
{
if (pattern.CompiledRegex.IsMatch(content))
{
return new LicenseInfo(
pattern.SpdxId,
sourceFile,
pattern.DisplayName,
LicenseConfidence.Medium);
}
}
// Check for common keywords as low-confidence fallback
var keywordLicense = DetectByKeywords(content);
if (keywordLicense is not null)
{
return new LicenseInfo(
keywordLicense,
sourceFile,
keywordLicense,
LicenseConfidence.Low);
}
return LicenseInfo.Unknown;
}
private static string? DetectByKeywords(string content)
{
var upperContent = content.ToUpperInvariant();
// Very basic keyword detection as fallback
if (upperContent.Contains("MIT"))
{
return "MIT";
}
if (upperContent.Contains("APACHE"))
{
return "Apache-2.0"; // Default to 2.0
}
if (upperContent.Contains("BSD"))
{
return "BSD-3-Clause"; // Default to 3-clause
}
if (upperContent.Contains("GPL"))
{
return "GPL-3.0-only"; // Default to 3.0
}
if (upperContent.Contains("PUBLIC DOMAIN") || upperContent.Contains("UNLICENSE"))
{
return "Unlicense";
}
return null;
}
private static string ReadFileHead(string filePath, int maxBytes)
{
using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var buffer = new byte[Math.Min(maxBytes, stream.Length)];
var bytesRead = stream.Read(buffer, 0, buffer.Length);
// Try UTF-8 first, fall back to ASCII
try
{
return System.Text.Encoding.UTF8.GetString(buffer, 0, bytesRead);
}
catch
{
return System.Text.Encoding.ASCII.GetString(buffer, 0, bytesRead);
}
}
/// <summary>
/// Matches SPDX-License-Identifier comments.
/// </summary>
[GeneratedRegex(@"SPDX-License-Identifier:\s*([A-Za-z0-9\-\.+]+(?:\s+(?:OR|AND)\s+[A-Za-z0-9\-\.+]+)*)", RegexOptions.IgnoreCase)]
private static partial Regex SpdxIdentifierPattern();
/// <summary>
/// Internal record for license patterns.
/// </summary>
private sealed record LicensePattern
{
public LicensePattern(string spdxId, string pattern, string displayName)
{
SpdxId = spdxId;
DisplayName = displayName;
CompiledRegex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
}
public string SpdxId { get; }
public string DisplayName { get; }
public Regex CompiledRegex { get; }
}
}

View File

@@ -27,6 +27,21 @@ internal static class GoSourceInventory
public string Source { get; init; } = "go.mod";
public string ModuleCategory { get; init; } = "public";
public string? Registry { get; init; }
/// <summary>
/// SPDX license identifier if detected.
/// </summary>
public string? License { get; init; }
/// <summary>
/// License detection confidence.
/// </summary>
public GoLicenseDetector.LicenseConfidence LicenseConfidence { get; init; }
/// <summary>
/// True if this is a pseudo-version (unreleased code).
/// </summary>
public bool IsPseudoVersion { get; init; }
}
/// <summary>
@@ -38,18 +53,27 @@ internal static class GoSourceInventory
null,
null,
ImmutableArray<GoSourceModule>.Empty,
ImmutableArray<string>.Empty);
ImmutableArray<string>.Empty,
GoVersionConflictDetector.GoConflictAnalysis.Empty,
GoCgoDetector.CgoAnalysisResult.Empty,
null);
public SourceInventoryResult(
string? modulePath,
string? goVersion,
ImmutableArray<GoSourceModule> modules,
ImmutableArray<string> retractedVersions)
ImmutableArray<string> retractedVersions,
GoVersionConflictDetector.GoConflictAnalysis conflictAnalysis,
GoCgoDetector.CgoAnalysisResult cgoAnalysis,
string? license)
{
ModulePath = modulePath;
GoVersion = goVersion;
Modules = modules;
RetractedVersions = retractedVersions;
ConflictAnalysis = conflictAnalysis;
CgoAnalysis = cgoAnalysis;
License = license;
}
public string? ModulePath { get; }
@@ -57,6 +81,21 @@ internal static class GoSourceInventory
public ImmutableArray<GoSourceModule> Modules { get; }
public ImmutableArray<string> RetractedVersions { get; }
/// <summary>
/// Version conflict analysis for this inventory.
/// </summary>
public GoVersionConflictDetector.GoConflictAnalysis ConflictAnalysis { get; }
/// <summary>
/// CGO usage analysis for this module.
/// </summary>
public GoCgoDetector.CgoAnalysisResult CgoAnalysis { get; }
/// <summary>
/// Main module license (SPDX identifier).
/// </summary>
public string? License { get; }
public bool IsEmpty => Modules.IsEmpty && string.IsNullOrEmpty(ModulePath);
}
@@ -114,6 +153,7 @@ internal static class GoSourceInventory
var isPrivate = GoPrivateModuleDetector.IsLikelyPrivate(req.Path);
var moduleCategory = GoPrivateModuleDetector.GetModuleCategory(req.Path);
var registry = GoPrivateModuleDetector.GetRegistry(req.Path);
var isPseudoVersion = GoVersionConflictDetector.IsPseudoVersion(req.Version);
// Check for replacement
GoModParser.GoModReplace? replacement = null;
@@ -127,6 +167,20 @@ internal static class GoSourceInventory
// Check if excluded
var isExcluded = excludes.Contains(versionedKey);
// Detect license for vendored modules
string? license = null;
var licenseConfidence = GoLicenseDetector.LicenseConfidence.None;
if (isVendored && project.HasVendor)
{
var vendorDir = Path.GetDirectoryName(project.VendorModulesPath);
if (!string.IsNullOrEmpty(vendorDir))
{
var licenseInfo = GoLicenseDetector.DetectVendoredLicense(vendorDir, req.Path);
license = licenseInfo.SpdxIdentifier;
licenseConfidence = licenseInfo.Confidence;
}
}
var module = new GoSourceModule
{
Path = req.Path,
@@ -143,7 +197,10 @@ internal static class GoSourceInventory
ReplacementVersion = replacement?.NewVersion,
Source = isVendored ? "vendor" : "go.mod",
ModuleCategory = moduleCategory,
Registry = registry
Registry = registry,
License = license,
LicenseConfidence = licenseConfidence,
IsPseudoVersion = isPseudoVersion
};
modules.Add(module);
@@ -162,6 +219,21 @@ internal static class GoSourceInventory
{
var isPrivate = GoPrivateModuleDetector.IsLikelyPrivate(vendorMod.Path);
var moduleCategory = GoPrivateModuleDetector.GetModuleCategory(vendorMod.Path);
var isPseudoVersion = GoVersionConflictDetector.IsPseudoVersion(vendorMod.Version);
// Detect license for vendored module
string? license = null;
var licenseConfidence = GoLicenseDetector.LicenseConfidence.None;
if (project.HasVendor)
{
var vendorDir = Path.GetDirectoryName(project.VendorModulesPath);
if (!string.IsNullOrEmpty(vendorDir))
{
var licenseInfo = GoLicenseDetector.DetectVendoredLicense(vendorDir, vendorMod.Path);
license = licenseInfo.SpdxIdentifier;
licenseConfidence = licenseInfo.Confidence;
}
}
modules.Add(new GoSourceModule
{
@@ -176,17 +248,36 @@ internal static class GoSourceInventory
IsRetracted = false,
IsPrivate = isPrivate,
Source = "vendor",
ModuleCategory = moduleCategory
ModuleCategory = moduleCategory,
License = license,
LicenseConfidence = licenseConfidence,
IsPseudoVersion = isPseudoVersion
});
}
}
}
// Perform conflict analysis
var conflictAnalysis = GoVersionConflictDetector.Analyze(
modules,
goMod.Replaces.ToList(),
goMod.Excludes.ToList(),
retractedVersions);
// Analyze CGO usage in the module
var cgoAnalysis = GoCgoDetector.AnalyzeModule(project.RootPath);
// Detect main module license
var mainLicense = GoLicenseDetector.DetectLicense(project.RootPath);
return new SourceInventoryResult(
goMod.ModulePath,
goMod.GoVersion,
modules.ToImmutableArray(),
retractedVersions);
retractedVersions,
conflictAnalysis,
cgoAnalysis,
mainLicense.SpdxIdentifier);
}
/// <summary>

View File

@@ -0,0 +1,442 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Go.Internal;
/// <summary>
/// Detects version conflicts in Go module dependencies.
/// Similar to Java's VersionConflictDetector for Maven artifacts.
/// </summary>
internal static partial class GoVersionConflictDetector
{
/// <summary>
/// Conflict severity levels.
/// </summary>
public enum GoConflictSeverity
{
/// <summary>No conflict detected.</summary>
None = 0,
/// <summary>Minor version mismatch or informational.</summary>
Low = 1,
/// <summary>Potential compatibility issue.</summary>
Medium = 2,
/// <summary>Likely breaking change or security concern.</summary>
High = 3
}
/// <summary>
/// Types of version conflicts in Go modules.
/// </summary>
public enum GoConflictType
{
/// <summary>No conflict.</summary>
None,
/// <summary>Module replaced with different version.</summary>
ReplaceOverride,
/// <summary>Module replaced with local path.</summary>
LocalReplacement,
/// <summary>Using pseudo-version (unreleased code).</summary>
PseudoVersion,
/// <summary>Major version mismatch in module path.</summary>
MajorVersionMismatch,
/// <summary>Multiple workspace modules require different versions.</summary>
WorkspaceConflict,
/// <summary>Excluded version is still being required.</summary>
ExcludedVersion,
/// <summary>Using a retracted version.</summary>
RetractedVersion
}
/// <summary>
/// Represents a detected version conflict.
/// </summary>
public sealed record GoVersionConflict(
string ModulePath,
string SelectedVersion,
ImmutableArray<string> RequestedVersions,
GoConflictSeverity Severity,
GoConflictType ConflictType,
string? Description)
{
/// <summary>
/// Gets other versions that were requested but not selected.
/// </summary>
public IEnumerable<string> OtherVersions
=> RequestedVersions.Where(v => !v.Equals(SelectedVersion, StringComparison.Ordinal));
}
/// <summary>
/// Result of conflict analysis for a module inventory.
/// </summary>
public sealed record GoConflictAnalysis
{
public static readonly GoConflictAnalysis Empty = new(
ImmutableArray<GoVersionConflict>.Empty,
ImmutableDictionary<string, GoVersionConflict>.Empty);
public GoConflictAnalysis(
ImmutableArray<GoVersionConflict> conflicts,
ImmutableDictionary<string, GoVersionConflict> byModule)
{
Conflicts = conflicts;
_byModule = byModule;
}
private readonly ImmutableDictionary<string, GoVersionConflict> _byModule;
/// <summary>
/// All detected conflicts.
/// </summary>
public ImmutableArray<GoVersionConflict> Conflicts { get; }
/// <summary>
/// Returns true if any conflicts were detected.
/// </summary>
public bool HasConflicts => Conflicts.Length > 0;
/// <summary>
/// Gets the highest severity among all conflicts.
/// </summary>
public GoConflictSeverity MaxSeverity
=> Conflicts.Length > 0 ? Conflicts.Max(c => c.Severity) : GoConflictSeverity.None;
/// <summary>
/// Gets conflict for a specific module if one exists.
/// </summary>
public GoVersionConflict? GetConflict(string modulePath)
=> _byModule.TryGetValue(modulePath, out var conflict) ? conflict : null;
}
/// <summary>
/// Analyzes module inventory for version conflicts.
/// </summary>
public static GoConflictAnalysis Analyze(
IReadOnlyList<GoSourceInventory.GoSourceModule> modules,
IReadOnlyList<GoModParser.GoModReplace> replaces,
IReadOnlyList<GoModParser.GoModExclude> excludes,
ImmutableArray<string> retractedVersions)
{
ArgumentNullException.ThrowIfNull(modules);
ArgumentNullException.ThrowIfNull(replaces);
ArgumentNullException.ThrowIfNull(excludes);
if (modules.Count == 0)
{
return GoConflictAnalysis.Empty;
}
var conflicts = new List<GoVersionConflict>();
// Build exclude set for quick lookup
var excludeSet = excludes
.Select(e => $"{e.Path}@{e.Version}")
.ToImmutableHashSet(StringComparer.Ordinal);
// Build replace map
var replaceMap = replaces.ToDictionary(
r => r.OldVersion is not null ? $"{r.OldPath}@{r.OldVersion}" : r.OldPath,
r => r,
StringComparer.Ordinal);
foreach (var module in modules)
{
// Check for pseudo-version
if (IsPseudoVersion(module.Version))
{
conflicts.Add(new GoVersionConflict(
module.Path,
module.Version,
[module.Version],
GoConflictSeverity.Medium,
GoConflictType.PseudoVersion,
"Using pseudo-version indicates unreleased or unstable code"));
}
// Check for replace directive conflicts
if (module.IsReplaced)
{
var severity = GoConflictSeverity.Low;
var conflictType = GoConflictType.ReplaceOverride;
var description = $"Module replaced with {module.ReplacementPath}";
// Local path replacement is higher risk
if (IsLocalPath(module.ReplacementPath))
{
severity = GoConflictSeverity.High;
conflictType = GoConflictType.LocalReplacement;
description = "Module replaced with local path - may not be reproducible";
}
conflicts.Add(new GoVersionConflict(
module.Path,
module.Version,
[module.Version],
severity,
conflictType,
description));
}
// Check for excluded version being required
var versionedKey = $"{module.Path}@{module.Version}";
if (excludeSet.Contains(versionedKey))
{
conflicts.Add(new GoVersionConflict(
module.Path,
module.Version,
[module.Version],
GoConflictSeverity.High,
GoConflictType.ExcludedVersion,
"Required version is explicitly excluded"));
}
// Check for retracted versions (in own module's go.mod)
if (module.IsRetracted || retractedVersions.Contains(module.Version))
{
conflicts.Add(new GoVersionConflict(
module.Path,
module.Version,
[module.Version],
GoConflictSeverity.High,
GoConflictType.RetractedVersion,
"Using a retracted version - may have known issues"));
}
}
// Check for major version mismatches
var modulesByBasePath = modules
.GroupBy(m => ExtractBasePath(m.Path), StringComparer.OrdinalIgnoreCase)
.Where(g => g.Count() > 1);
foreach (var group in modulesByBasePath)
{
var versions = group.Select(m => ExtractMajorVersion(m.Path)).Distinct().ToList();
if (versions.Count > 1)
{
foreach (var module in group)
{
var otherVersions = group
.Where(m => !m.Path.Equals(module.Path, StringComparison.Ordinal))
.Select(m => m.Version)
.ToImmutableArray();
conflicts.Add(new GoVersionConflict(
module.Path,
module.Version,
[module.Version, .. otherVersions],
GoConflictSeverity.Medium,
GoConflictType.MajorVersionMismatch,
$"Multiple major versions of same module: {string.Join(", ", versions)}"));
}
}
}
var byModule = conflicts
.GroupBy(c => c.ModulePath, StringComparer.Ordinal)
.Select(g => g.OrderByDescending(c => c.Severity).First())
.ToImmutableDictionary(c => c.ModulePath, c => c, StringComparer.Ordinal);
return new GoConflictAnalysis(
[.. conflicts.OrderBy(c => c.ModulePath, StringComparer.Ordinal)],
byModule);
}
/// <summary>
/// Analyzes workspace for cross-module version conflicts.
/// </summary>
public static GoConflictAnalysis AnalyzeWorkspace(
IReadOnlyList<GoSourceInventory.SourceInventoryResult> inventories)
{
ArgumentNullException.ThrowIfNull(inventories);
if (inventories.Count < 2)
{
return GoConflictAnalysis.Empty;
}
var conflicts = new List<GoVersionConflict>();
// Group all modules by path across workspace members
var allModules = inventories
.SelectMany(inv => inv.Modules)
.GroupBy(m => m.Path, StringComparer.Ordinal);
foreach (var group in allModules)
{
var versions = group
.Select(m => m.Version)
.Distinct(StringComparer.Ordinal)
.ToList();
if (versions.Count > 1)
{
// Different versions of same dependency across workspace
var selectedVersion = SelectMvsVersion(versions);
conflicts.Add(new GoVersionConflict(
group.Key,
selectedVersion,
[.. versions],
GoConflictSeverity.Low,
GoConflictType.WorkspaceConflict,
$"Workspace modules require different versions: {string.Join(", ", versions)}"));
}
}
var byModule = conflicts
.ToImmutableDictionary(c => c.ModulePath, c => c, StringComparer.Ordinal);
return new GoConflictAnalysis([.. conflicts], byModule);
}
/// <summary>
/// Determines if a version string is a pseudo-version.
/// Pseudo-versions have format: v0.0.0-yyyymmddhhmmss-abcdefabcdef
/// </summary>
public static bool IsPseudoVersion(string version)
{
if (string.IsNullOrWhiteSpace(version))
{
return false;
}
return PseudoVersionPattern().IsMatch(version);
}
/// <summary>
/// Determines if a path is a local filesystem path.
/// </summary>
private static bool IsLocalPath(string? path)
{
if (string.IsNullOrWhiteSpace(path))
{
return false;
}
// Starts with ./ or ../ or /
if (path.StartsWith('.') || path.StartsWith('/') || path.StartsWith('\\'))
{
return true;
}
// Windows absolute path
if (path.Length >= 2 && char.IsLetter(path[0]) && path[1] == ':')
{
return true;
}
return false;
}
/// <summary>
/// Extracts the base path without major version suffix.
/// Example: "github.com/user/repo/v2" -> "github.com/user/repo"
/// </summary>
private static string ExtractBasePath(string modulePath)
{
var match = MajorVersionSuffixPattern().Match(modulePath);
return match.Success ? modulePath[..^match.Length] : modulePath;
}
/// <summary>
/// Extracts major version from module path.
/// Example: "github.com/user/repo/v2" -> "v2"
/// </summary>
private static string ExtractMajorVersion(string modulePath)
{
var match = MajorVersionSuffixPattern().Match(modulePath);
return match.Success ? match.Value : "v0/v1";
}
/// <summary>
/// Simulates Go's Minimal Version Selection to pick the highest version.
/// </summary>
private static string SelectMvsVersion(IEnumerable<string> versions)
{
// MVS picks the highest version among all requested
return versions
.OrderByDescending(v => v, SemVerComparer.Instance)
.First();
}
/// <summary>
/// Matches pseudo-versions: v0.0.0-timestamp-hash or vX.Y.Z-pre.0.timestamp-hash
/// </summary>
[GeneratedRegex(@"^v\d+\.\d+\.\d+(-[a-z0-9]+)?\.?\d*\.?\d{14}-[a-f0-9]{12}$", RegexOptions.IgnoreCase)]
private static partial Regex PseudoVersionPattern();
/// <summary>
/// Matches major version suffix: /v2, /v3, etc.
/// </summary>
[GeneratedRegex(@"/v\d+$")]
private static partial Regex MajorVersionSuffixPattern();
/// <summary>
/// Comparer for semantic versions that handles Go module versions.
/// </summary>
private sealed class SemVerComparer : IComparer<string>
{
public static readonly SemVerComparer Instance = new();
public int Compare(string? x, string? y)
{
if (x is null && y is null) return 0;
if (x is null) return -1;
if (y is null) return 1;
var partsX = ParseVersion(x);
var partsY = ParseVersion(y);
// Compare major.minor.patch
for (var i = 0; i < 3; i++)
{
var comparison = partsX[i].CompareTo(partsY[i]);
if (comparison != 0) return comparison;
}
// Compare pre-release (no pre-release > pre-release)
var preX = partsX[3] > 0 || !string.IsNullOrEmpty(GetPrerelease(x));
var preY = partsY[3] > 0 || !string.IsNullOrEmpty(GetPrerelease(y));
if (!preX && preY) return 1;
if (preX && !preY) return -1;
return string.CompareOrdinal(x, y);
}
private static int[] ParseVersion(string version)
{
var result = new int[4]; // major, minor, patch, prerelease indicator
// Strip 'v' prefix
if (version.StartsWith('v') || version.StartsWith('V'))
{
version = version[1..];
}
// Handle pseudo-versions specially
if (version.Contains('-'))
{
var dashIndex = version.IndexOf('-');
version = version[..dashIndex];
result[3] = 1; // Mark as pre-release
}
var parts = version.Split('.');
for (var i = 0; i < Math.Min(parts.Length, 3); i++)
{
if (int.TryParse(parts[i], out var num))
{
result[i] = num;
}
}
return result;
}
private static string? GetPrerelease(string version)
{
var dashIndex = version.IndexOf('-');
return dashIndex >= 0 ? version[(dashIndex + 1)..] : null;
}
}
}

View File

@@ -8,6 +8,10 @@
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<InternalsVisibleTo Include="StellaOps.Scanner.Analyzers.Lang.Go.Tests" />
</ItemGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />

View File

@@ -0,0 +1,175 @@
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
/// <summary>
/// Tracks dependency declarations from package.json files, mapping package names to their
/// declared scopes and version ranges. Used to classify dependencies during collection.
/// </summary>
internal sealed class NodeDependencyIndex
{
private static readonly NodeDependencyIndex Empty = new(
new Dictionary<string, NodeDependencyDeclaration>(StringComparer.OrdinalIgnoreCase));
private readonly Dictionary<string, NodeDependencyDeclaration> _declarations;
private NodeDependencyIndex(Dictionary<string, NodeDependencyDeclaration> declarations)
{
_declarations = declarations;
}
/// <summary>
/// Gets all declared dependencies.
/// </summary>
public IReadOnlyCollection<NodeDependencyDeclaration> Declarations => _declarations.Values;
/// <summary>
/// Creates a dependency index from the root package.json file.
/// </summary>
/// <param name="rootPath">The project root directory.</param>
/// <returns>A dependency index with all declared dependencies and their scopes.</returns>
public static NodeDependencyIndex Create(string rootPath)
{
var packageJsonPath = Path.Combine(rootPath, "package.json");
if (!File.Exists(packageJsonPath))
{
return Empty;
}
try
{
using var stream = File.OpenRead(packageJsonPath);
using var document = JsonDocument.Parse(stream);
return CreateFromJson(document.RootElement);
}
catch (IOException)
{
return Empty;
}
catch (JsonException)
{
return Empty;
}
}
/// <summary>
/// Creates a dependency index from a parsed package.json JSON element.
/// </summary>
/// <param name="root">The root JSON element of package.json.</param>
/// <returns>A dependency index with all declared dependencies and their scopes.</returns>
public static NodeDependencyIndex CreateFromJson(JsonElement root)
{
var declarations = new Dictionary<string, NodeDependencyDeclaration>(StringComparer.OrdinalIgnoreCase);
ParseDependencySection(root, "dependencies", NodeDependencyScope.Production, declarations);
ParseDependencySection(root, "devDependencies", NodeDependencyScope.Development, declarations);
ParseDependencySection(root, "peerDependencies", NodeDependencyScope.Peer, declarations);
ParseDependencySection(root, "optionalDependencies", NodeDependencyScope.Optional, declarations);
if (declarations.Count == 0)
{
return Empty;
}
return new NodeDependencyIndex(declarations);
}
/// <summary>
/// Tries to get the scope for a dependency by name.
/// </summary>
/// <param name="packageName">The package name to look up.</param>
/// <param name="scope">The scope if found.</param>
/// <returns>True if the dependency was found in the index.</returns>
public bool TryGetScope(string packageName, out NodeDependencyScope scope)
{
if (_declarations.TryGetValue(packageName, out var declaration))
{
scope = declaration.Scope;
return true;
}
scope = default;
return false;
}
/// <summary>
/// Tries to get the full declaration for a dependency by name.
/// </summary>
/// <param name="packageName">The package name to look up.</param>
/// <param name="declaration">The declaration if found.</param>
/// <returns>True if the dependency was found in the index.</returns>
public bool TryGetDeclaration(string packageName, out NodeDependencyDeclaration? declaration)
{
if (_declarations.TryGetValue(packageName, out var found))
{
declaration = found;
return true;
}
declaration = null;
return false;
}
/// <summary>
/// Returns true if the dependency is optional (declared in optionalDependencies).
/// </summary>
public bool IsOptional(string packageName)
{
return _declarations.TryGetValue(packageName, out var declaration)
&& declaration.Scope == NodeDependencyScope.Optional;
}
private static void ParseDependencySection(
JsonElement root,
string sectionName,
NodeDependencyScope scope,
Dictionary<string, NodeDependencyDeclaration> declarations)
{
if (!root.TryGetProperty(sectionName, out var section) ||
section.ValueKind != JsonValueKind.Object)
{
return;
}
foreach (var property in section.EnumerateObject())
{
var packageName = property.Name;
if (string.IsNullOrWhiteSpace(packageName))
{
continue;
}
// Only use the first declaration (higher priority sections should be parsed first)
// Production > Development > Peer > Optional
if (declarations.ContainsKey(packageName))
{
continue;
}
string? versionRange = null;
if (property.Value.ValueKind == JsonValueKind.String)
{
versionRange = property.Value.GetString();
}
declarations[packageName] = new NodeDependencyDeclaration(
packageName,
versionRange,
scope,
sectionName);
}
}
}
/// <summary>
/// Represents a dependency declaration from package.json.
/// </summary>
/// <param name="Name">The package name.</param>
/// <param name="VersionRange">The declared version range (e.g., "^1.2.3", "~1.0.0", ">=1.0.0").</param>
/// <param name="Scope">The scope derived from which section the dependency was declared in.</param>
/// <param name="Section">The original section name (e.g., "dependencies", "devDependencies").</param>
internal sealed record NodeDependencyDeclaration(
string Name,
string? VersionRange,
NodeDependencyScope Scope,
string Section);

View File

@@ -0,0 +1,32 @@
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
/// <summary>
/// Represents the dependency scope in a Node.js package.json file.
/// Maps to the section where the dependency is declared.
/// </summary>
internal enum NodeDependencyScope
{
/// <summary>
/// Production dependency declared in the "dependencies" section.
/// Required at runtime.
/// </summary>
Production,
/// <summary>
/// Development dependency declared in the "devDependencies" section.
/// Only needed during development/build.
/// </summary>
Development,
/// <summary>
/// Peer dependency declared in the "peerDependencies" section.
/// Expected to be provided by the consuming package.
/// </summary>
Peer,
/// <summary>
/// Optional dependency declared in the "optionalDependencies" section.
/// Installation failure does not cause npm install to fail.
/// </summary>
Optional
}

View File

@@ -1,7 +1,7 @@
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal sealed class NodeLockData
{
private const string PackageLockSource = "package-lock.json";
@@ -11,33 +11,48 @@ internal sealed class NodeLockData
private static readonly NodeLockData Empty = new(
new Dictionary<string, NodeLockEntry>(StringComparer.Ordinal),
new Dictionary<string, NodeLockEntry>(StringComparer.OrdinalIgnoreCase),
Array.Empty<NodeLockEntry>());
Array.Empty<NodeLockEntry>(),
NodeDependencyIndex.Create(string.Empty));
private readonly Dictionary<string, NodeLockEntry> _byPath;
private readonly Dictionary<string, NodeLockEntry> _byName;
private readonly IReadOnlyCollection<NodeLockEntry> _declared;
private readonly NodeDependencyIndex _dependencyIndex;
private NodeLockData(
Dictionary<string, NodeLockEntry> byPath,
Dictionary<string, NodeLockEntry> byName,
IReadOnlyCollection<NodeLockEntry> declared)
IReadOnlyCollection<NodeLockEntry> declared,
NodeDependencyIndex dependencyIndex)
{
_byPath = byPath;
_byName = byName;
_declared = declared;
_dependencyIndex = dependencyIndex;
}
public IReadOnlyCollection<NodeLockEntry> DeclaredPackages => _declared;
/// <summary>
/// Gets the dependency index built from package.json.
/// </summary>
public NodeDependencyIndex DependencyIndex => _dependencyIndex;
public static ValueTask<NodeLockData> LoadAsync(string rootPath, CancellationToken cancellationToken)
{
var byPath = new Dictionary<string, NodeLockEntry>(StringComparer.Ordinal);
var byName = new Dictionary<string, NodeLockEntry>(StringComparer.OrdinalIgnoreCase);
var declared = new Dictionary<string, NodeLockEntry>(StringComparer.OrdinalIgnoreCase);
LoadPackageLockJson(rootPath, byPath, byName, declared, cancellationToken);
LoadYarnLock(rootPath, byName, declared);
LoadPnpmLock(rootPath, byName, declared);
// Build dependency index from package.json first
var dependencyIndex = NodeDependencyIndex.Create(rootPath);
LoadPackageLockJson(rootPath, byPath, byName, declared, dependencyIndex, cancellationToken);
LoadYarnLock(rootPath, byName, declared, dependencyIndex);
LoadPnpmLock(rootPath, byName, declared, dependencyIndex);
// Add declared-only entries for packages in package.json but not in any lockfile
AddDeclaredOnlyFromPackageJson(declared, dependencyIndex);
if (byPath.Count == 0 && byName.Count == 0 && declared.Count == 0)
{
@@ -51,37 +66,78 @@ internal sealed class NodeLockData
.ThenBy(static entry => entry.Locator ?? string.Empty, StringComparer.OrdinalIgnoreCase)
.ToArray();
return ValueTask.FromResult(new NodeLockData(byPath, byName, declaredList));
return ValueTask.FromResult(new NodeLockData(byPath, byName, declaredList, dependencyIndex));
}
public bool TryGet(string relativePath, string packageName, out NodeLockEntry? entry)
{
var normalizedPath = NormalizeLockPath(relativePath);
if (_byPath.TryGetValue(normalizedPath, out var byPathEntry))
{
entry = byPathEntry;
return true;
}
if (!string.IsNullOrEmpty(packageName))
{
var normalizedName = packageName.StartsWith('@') ? packageName : packageName;
if (_byName.TryGetValue(normalizedName, out var byNameEntry))
{
entry = byNameEntry;
return true;
}
}
entry = null;
return false;
}
/// <summary>
/// Adds declared-only entries for packages in package.json that are not in any lockfile.
/// </summary>
private static void AddDeclaredOnlyFromPackageJson(
IDictionary<string, NodeLockEntry> declared,
NodeDependencyIndex dependencyIndex)
{
foreach (var declaration in dependencyIndex.Declarations)
{
var key = $"{declaration.Name}@{declaration.VersionRange ?? "*"}".ToLowerInvariant();
// Only add if not already present from lockfiles
if (declared.ContainsKey(key))
{
continue;
}
// Check if we have any version of this package
var hasAnyVersion = declared.Keys.Any(k =>
k.StartsWith($"{declaration.Name}@", StringComparison.OrdinalIgnoreCase));
if (hasAnyVersion)
{
continue;
}
var entry = new NodeLockEntry(
Source: "package.json",
Locator: $"package.json#{declaration.Section}",
Name: declaration.Name,
Version: declaration.VersionRange,
Resolved: null,
Integrity: null,
Scope: declaration.Scope,
IsOptional: declaration.Scope == NodeDependencyScope.Optional);
declared[key] = entry;
}
}
public bool TryGet(string relativePath, string packageName, out NodeLockEntry? entry)
{
var normalizedPath = NormalizeLockPath(relativePath);
if (_byPath.TryGetValue(normalizedPath, out var byPathEntry))
{
entry = byPathEntry;
return true;
}
if (!string.IsNullOrEmpty(packageName))
{
var normalizedName = packageName.StartsWith('@') ? packageName : packageName;
if (_byName.TryGetValue(normalizedName, out var byNameEntry))
{
entry = byNameEntry;
return true;
}
}
entry = null;
return false;
}
private static NodeLockEntry? CreateEntry(
string source,
string? locator,
string? inferredName,
JsonElement element)
JsonElement element,
NodeDependencyIndex? dependencyIndex = null)
{
string? name = inferredName;
string? version = null;
@@ -101,17 +157,17 @@ internal sealed class NodeLockData
{
version = versionElement.GetString();
}
if (element.TryGetProperty("resolved", out var resolvedElement) && resolvedElement.ValueKind == JsonValueKind.String)
{
resolved = resolvedElement.GetString();
}
if (element.TryGetProperty("integrity", out var integrityElement) && integrityElement.ValueKind == JsonValueKind.String)
{
integrity = integrityElement.GetString();
}
if (element.TryGetProperty("resolved", out var resolvedElement) && resolvedElement.ValueKind == JsonValueKind.String)
{
resolved = resolvedElement.GetString();
}
if (element.TryGetProperty("integrity", out var integrityElement) && integrityElement.ValueKind == JsonValueKind.String)
{
integrity = integrityElement.GetString();
}
if (version is null && resolved is null && integrity is null)
{
return null;
@@ -123,22 +179,33 @@ internal sealed class NodeLockData
}
var locatorValue = string.IsNullOrWhiteSpace(locator) ? null : locator;
return new NodeLockEntry(source, locatorValue, name!, version, resolved, integrity);
// Look up scope from dependency index
NodeDependencyScope? scope = null;
var isOptional = false;
if (dependencyIndex is not null && dependencyIndex.TryGetScope(name!, out var foundScope))
{
scope = foundScope;
isOptional = foundScope == NodeDependencyScope.Optional;
}
return new NodeLockEntry(source, locatorValue, name!, version, resolved, integrity, scope, isOptional);
}
private static void TraverseLegacyDependencies(
string currentPath,
JsonElement dependenciesElement,
IDictionary<string, NodeLockEntry> byPath,
IDictionary<string, NodeLockEntry> byName,
IDictionary<string, NodeLockEntry> declared)
IDictionary<string, NodeLockEntry> declared,
NodeDependencyIndex dependencyIndex)
{
foreach (var dependency in dependenciesElement.EnumerateObject())
{
var depValue = dependency.Value;
var path = $"{currentPath}/{dependency.Name}";
var normalizedPath = NormalizeLockPath(path);
var entry = CreateEntry(PackageLockSource, normalizedPath, dependency.Name, depValue);
var entry = CreateEntry(PackageLockSource, normalizedPath, dependency.Name, depValue, dependencyIndex);
if (entry is not null)
{
byPath[normalizedPath] = entry;
@@ -148,40 +215,41 @@ internal sealed class NodeLockData
if (depValue.TryGetProperty("dependencies", out var childDependencies) && childDependencies.ValueKind == JsonValueKind.Object)
{
TraverseLegacyDependencies(path + "/node_modules", childDependencies, byPath, byName, declared);
TraverseLegacyDependencies(path + "/node_modules", childDependencies, byPath, byName, declared, dependencyIndex);
}
}
}
private static void LoadPackageLockJson(
string rootPath,
IDictionary<string, NodeLockEntry> byPath,
IDictionary<string, NodeLockEntry> byName,
IDictionary<string, NodeLockEntry> declared,
NodeDependencyIndex dependencyIndex,
CancellationToken cancellationToken)
{
var packageLockPath = Path.Combine(rootPath, "package-lock.json");
if (!File.Exists(packageLockPath))
{
return;
}
try
{
using var stream = File.OpenRead(packageLockPath);
using var document = JsonDocument.Parse(stream);
cancellationToken.ThrowIfCancellationRequested();
var root = document.RootElement;
if (root.TryGetProperty("packages", out var packagesElement) && packagesElement.ValueKind == JsonValueKind.Object)
{
{
return;
}
try
{
using var stream = File.OpenRead(packageLockPath);
using var document = JsonDocument.Parse(stream);
cancellationToken.ThrowIfCancellationRequested();
var root = document.RootElement;
if (root.TryGetProperty("packages", out var packagesElement) && packagesElement.ValueKind == JsonValueKind.Object)
{
foreach (var packageProperty in packagesElement.EnumerateObject())
{
var key = NormalizeLockPath(packageProperty.Name);
var inferredName = ExtractNameFromPath(key);
var entry = CreateEntry(PackageLockSource, key, inferredName, packageProperty.Value);
var entry = CreateEntry(PackageLockSource, key, inferredName, packageProperty.Value, dependencyIndex);
if (entry is null)
{
continue;
@@ -199,38 +267,39 @@ internal sealed class NodeLockData
}
else if (root.TryGetProperty("dependencies", out var dependenciesElement) && dependenciesElement.ValueKind == JsonValueKind.Object)
{
TraverseLegacyDependencies("node_modules", dependenciesElement, byPath, byName, declared);
TraverseLegacyDependencies("node_modules", dependenciesElement, byPath, byName, declared, dependencyIndex);
}
}
catch (IOException)
{
// Ignore unreadable package-lock.
}
catch (JsonException)
{
// Ignore malformed package-lock.
}
}
catch (IOException)
{
// Ignore unreadable package-lock.
}
catch (JsonException)
{
// Ignore malformed package-lock.
}
}
private static void LoadYarnLock(
string rootPath,
IDictionary<string, NodeLockEntry> byName,
IDictionary<string, NodeLockEntry> declared)
{
var yarnLockPath = Path.Combine(rootPath, "yarn.lock");
if (!File.Exists(yarnLockPath))
{
return;
}
try
{
var lines = File.ReadAllLines(yarnLockPath);
string? currentName = null;
string? version = null;
string? resolved = null;
string? integrity = null;
IDictionary<string, NodeLockEntry> declared,
NodeDependencyIndex dependencyIndex)
{
var yarnLockPath = Path.Combine(rootPath, "yarn.lock");
if (!File.Exists(yarnLockPath))
{
return;
}
try
{
var lines = File.ReadAllLines(yarnLockPath);
string? currentName = null;
string? version = null;
string? resolved = null;
string? integrity = null;
void Flush()
{
if (string.IsNullOrWhiteSpace(currentName))
@@ -250,64 +319,74 @@ internal sealed class NodeLockData
return;
}
var entry = new NodeLockEntry(YarnLockSource, currentName, simpleName, version, resolved, integrity);
// Look up scope from dependency index
NodeDependencyScope? scope = null;
var isOptional = false;
if (dependencyIndex.TryGetScope(simpleName, out var foundScope))
{
scope = foundScope;
isOptional = foundScope == NodeDependencyScope.Optional;
}
var entry = new NodeLockEntry(YarnLockSource, currentName, simpleName, version, resolved, integrity, scope, isOptional);
byName[simpleName] = entry;
AddDeclaration(declared, entry);
version = null;
resolved = null;
integrity = null;
}
foreach (var line in lines)
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed))
{
Flush();
currentName = null;
continue;
}
if (!char.IsWhiteSpace(line, 0) && trimmed.EndsWith(':'))
{
Flush();
currentName = trimmed.TrimEnd(':').Trim('"');
continue;
}
if (trimmed.StartsWith("version", StringComparison.OrdinalIgnoreCase))
{
version = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith("resolved", StringComparison.OrdinalIgnoreCase))
{
resolved = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith("integrity", StringComparison.OrdinalIgnoreCase))
{
integrity = ExtractQuotedValue(trimmed);
}
}
Flush();
}
catch (IOException)
{
// Ignore unreadable yarn.lock
}
}
foreach (var line in lines)
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed))
{
Flush();
currentName = null;
continue;
}
if (!char.IsWhiteSpace(line, 0) && trimmed.EndsWith(':'))
{
Flush();
currentName = trimmed.TrimEnd(':').Trim('"');
continue;
}
if (trimmed.StartsWith("version", StringComparison.OrdinalIgnoreCase))
{
version = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith("resolved", StringComparison.OrdinalIgnoreCase))
{
resolved = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith("integrity", StringComparison.OrdinalIgnoreCase))
{
integrity = ExtractQuotedValue(trimmed);
}
}
Flush();
}
catch (IOException)
{
// Ignore unreadable yarn.lock
}
}
private static void LoadPnpmLock(
string rootPath,
IDictionary<string, NodeLockEntry> byName,
IDictionary<string, NodeLockEntry> declared)
{
var pnpmLockPath = Path.Combine(rootPath, "pnpm-lock.yaml");
if (!File.Exists(pnpmLockPath))
{
return;
}
IDictionary<string, NodeLockEntry> declared,
NodeDependencyIndex dependencyIndex)
{
var pnpmLockPath = Path.Combine(rootPath, "pnpm-lock.yaml");
if (!File.Exists(pnpmLockPath))
{
return;
}
try
{
using var reader = new StreamReader(pnpmLockPath);
@@ -336,7 +415,16 @@ internal sealed class NodeLockData
return;
}
var entry = new NodeLockEntry(PnpmLockSource, currentPackage, name, version, resolved, integrity);
// Look up scope from dependency index
NodeDependencyScope? scope = null;
var isOptional = false;
if (dependencyIndex.TryGetScope(name, out var foundScope))
{
scope = foundScope;
isOptional = foundScope == NodeDependencyScope.Optional;
}
var entry = new NodeLockEntry(PnpmLockSource, currentPackage, name, version, resolved, integrity, scope, isOptional);
byName[name] = entry;
AddDeclaration(declared, entry);
version = null;
@@ -409,57 +497,57 @@ internal sealed class NodeLockData
Flush();
}
catch (IOException)
{
// Ignore unreadable pnpm lock file.
}
}
private static string? ExtractQuotedValue(string line)
{
var quoteStart = line.IndexOf('"');
if (quoteStart < 0)
{
return null;
}
var quoteEnd = line.LastIndexOf('"');
if (quoteEnd <= quoteStart)
{
return null;
}
return line.Substring(quoteStart + 1, quoteEnd - quoteStart - 1);
}
private static string ExtractPackageNameFromYarnKey(string key)
{
var commaIndex = key.IndexOf(',');
var trimmed = commaIndex > 0 ? key[..commaIndex] : key;
trimmed = trimmed.Trim('"');
var atIndex = trimmed.IndexOf('@', 1);
if (atIndex > 0)
{
return trimmed[..atIndex];
}
return trimmed;
}
catch (IOException)
{
// Ignore unreadable pnpm lock file.
}
}
private static string? ExtractQuotedValue(string line)
{
var quoteStart = line.IndexOf('"');
if (quoteStart < 0)
{
return null;
}
var quoteEnd = line.LastIndexOf('"');
if (quoteEnd <= quoteStart)
{
return null;
}
return line.Substring(quoteStart + 1, quoteEnd - quoteStart - 1);
}
private static string ExtractPackageNameFromYarnKey(string key)
{
var commaIndex = key.IndexOf(',');
var trimmed = commaIndex > 0 ? key[..commaIndex] : key;
trimmed = trimmed.Trim('"');
var atIndex = trimmed.IndexOf('@', 1);
if (atIndex > 0)
{
return trimmed[..atIndex];
}
return trimmed;
}
private static string ExtractNameFromPnpmKey(string key)
{
var parts = key.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (parts.Length == 0)
{
return string.Empty;
}
if (parts[0].StartsWith('@'))
{
return parts.Length >= 2 ? $"{parts[0]}/{parts[1]}" : parts[0];
}
if (parts.Length == 0)
{
return string.Empty;
}
if (parts[0].StartsWith('@'))
{
return parts.Length >= 2 ? $"{parts[0]}/{parts[1]}" : parts[0];
}
return parts[0];
}
@@ -481,48 +569,48 @@ internal sealed class NodeLockData
declared[key] = entry;
}
}
private static string NormalizeLockPath(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return string.Empty;
}
var normalized = path.Replace('\\', '/');
normalized = normalized.TrimStart('.', '/');
return normalized;
}
private static string ExtractNameFromPath(string normalizedPath)
{
if (string.IsNullOrEmpty(normalizedPath))
{
return string.Empty;
}
var segments = normalizedPath.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (segments.Length == 0)
{
return string.Empty;
}
if (segments[0] == "node_modules")
{
if (segments.Length >= 3 && segments[1].StartsWith('@'))
{
return $"{segments[1]}/{segments[2]}";
}
return segments.Length >= 2 ? segments[1] : string.Empty;
}
var last = segments[^1];
if (last.StartsWith('@') && segments.Length >= 2)
{
return $"{segments[^2]}/{last}";
}
return last;
}
}
private static string NormalizeLockPath(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return string.Empty;
}
var normalized = path.Replace('\\', '/');
normalized = normalized.TrimStart('.', '/');
return normalized;
}
private static string ExtractNameFromPath(string normalizedPath)
{
if (string.IsNullOrEmpty(normalizedPath))
{
return string.Empty;
}
var segments = normalizedPath.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (segments.Length == 0)
{
return string.Empty;
}
if (segments[0] == "node_modules")
{
if (segments.Length >= 3 && segments[1].StartsWith('@'))
{
return $"{segments[1]}/{segments[2]}";
}
return segments.Length >= 2 ? segments[1] : string.Empty;
}
var last = segments[^1];
if (last.StartsWith('@') && segments.Length >= 2)
{
return $"{segments[^2]}/{last}";
}
return last;
}
}

View File

@@ -1,12 +1,25 @@
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
/// <summary>
/// Represents an entry from a Node.js lockfile (package-lock.json, yarn.lock, or pnpm-lock.yaml).
/// </summary>
/// <param name="Source">The lockfile source (e.g., "package-lock.json", "yarn.lock").</param>
/// <param name="Locator">The locator within the lockfile (path or key).</param>
/// <param name="Name">The package name.</param>
/// <param name="Version">The resolved version.</param>
/// <param name="Resolved">The URL where the package was resolved from.</param>
/// <param name="Integrity">The integrity hash (e.g., "sha512-...").</param>
/// <param name="Scope">The dependency scope from package.json (Production, Development, Peer, Optional).</param>
/// <param name="IsOptional">Whether this is an optional dependency.</param>
internal sealed record NodeLockEntry(
string Source,
string? Locator,
string Name,
string? Version,
string? Resolved,
string? Integrity);
string? Integrity,
NodeDependencyScope? Scope = null,
bool IsOptional = false);
internal static class NodeLockEntryExtensions
{

View File

@@ -3,16 +3,16 @@ using System.Globalization;
using System.Linq;
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
internal sealed class NodePackage
{
public NodePackage(
string name,
string version,
string relativePath,
string packageJsonLocator,
bool? isPrivate,
NodeLockEntry? lockEntry,
internal sealed class NodePackage
{
public NodePackage(
string name,
string version,
string relativePath,
string packageJsonLocator,
bool? isPrivate,
NodeLockEntry? lockEntry,
bool isWorkspaceMember,
string? workspaceRoot,
IReadOnlyList<string> workspaceTargets,
@@ -24,15 +24,18 @@ internal sealed class NodePackage
string? lockSource = null,
string? lockLocator = null,
string? packageSha256 = null,
bool isYarnPnp = false)
bool isYarnPnp = false,
NodeDependencyScope? scope = null,
bool isOptional = false,
string? license = null)
{
Name = name;
Version = version;
RelativePath = relativePath;
PackageJsonLocator = packageJsonLocator;
IsPrivate = isPrivate;
LockEntry = lockEntry;
IsWorkspaceMember = isWorkspaceMember;
IsPrivate = isPrivate;
LockEntry = lockEntry;
IsWorkspaceMember = isWorkspaceMember;
WorkspaceRoot = workspaceRoot;
WorkspaceTargets = workspaceTargets;
WorkspaceLink = workspaceLink;
@@ -44,28 +47,31 @@ internal sealed class NodePackage
LockLocator = lockLocator;
PackageSha256 = packageSha256;
IsYarnPnp = isYarnPnp;
Scope = scope;
IsOptional = isOptional;
License = license;
}
public string Name { get; }
public string Version { get; }
public string RelativePath { get; }
public string PackageJsonLocator { get; }
public bool? IsPrivate { get; }
public NodeLockEntry? LockEntry { get; }
public bool IsWorkspaceMember { get; }
public string? WorkspaceRoot { get; }
public IReadOnlyList<string> WorkspaceTargets { get; }
public string? WorkspaceLink { get; }
public string Name { get; }
public string Version { get; }
public string RelativePath { get; }
public string PackageJsonLocator { get; }
public bool? IsPrivate { get; }
public NodeLockEntry? LockEntry { get; }
public bool IsWorkspaceMember { get; }
public string? WorkspaceRoot { get; }
public IReadOnlyList<string> WorkspaceTargets { get; }
public string? WorkspaceLink { get; }
public IReadOnlyList<NodeLifecycleScript> LifecycleScripts { get; }
public IReadOnlyList<NodeVersionTarget> NodeVersions { get; }
@@ -84,6 +90,26 @@ internal sealed class NodePackage
public bool IsYarnPnp { get; }
/// <summary>
/// The dependency scope from package.json (Production, Development, Peer, Optional).
/// </summary>
public NodeDependencyScope? Scope { get; }
/// <summary>
/// The risk level derived from scope: "production", "development", "peer", or "optional".
/// </summary>
public string RiskLevel => NodeScopeClassifier.GetRiskLevel(Scope);
/// <summary>
/// Whether this is an optional dependency (declared in optionalDependencies).
/// </summary>
public bool IsOptional { get; }
/// <summary>
/// The license declared in package.json (e.g., "MIT", "Apache-2.0").
/// </summary>
public string? License { get; }
private readonly List<NodeEntrypoint> _entrypoints = new();
private readonly List<NodeImportEdge> _imports = new();
private readonly List<NodeImportResolution> _resolvedImports = new();
@@ -93,13 +119,13 @@ internal sealed class NodePackage
public IReadOnlyList<NodeImportResolution> ResolvedImports => _resolvedImports;
public string RelativePathNormalized => string.IsNullOrEmpty(RelativePath) ? string.Empty : RelativePath.Replace(Path.DirectorySeparatorChar, '/');
public string ComponentKey => $"purl::{Purl}";
public string Purl => BuildPurl(Name, Version);
public IReadOnlyCollection<LanguageComponentEvidence> CreateEvidence()
{
public string ComponentKey => $"purl::{Purl}";
public string Purl => BuildPurl(Name, Version);
public IReadOnlyCollection<LanguageComponentEvidence> CreateEvidence()
{
var evidence = new List<LanguageComponentEvidence>
{
CreateRootEvidence()
@@ -121,10 +147,10 @@ internal sealed class NodePackage
? $"package.json#scripts.{script.Name}"
: $"{PackageJsonLocator}#scripts.{script.Name}";
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"package.json:scripts",
locator,
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"package.json:scripts",
locator,
script.Command,
script.Sha256));
}
@@ -185,46 +211,46 @@ internal sealed class NodePackage
.OrderBy(static e => e.ComparisonKey, StringComparer.Ordinal)
.ToArray();
}
public IReadOnlyCollection<KeyValuePair<string, string?>> CreateMetadata()
{
var entries = new List<KeyValuePair<string, string?>>(8)
{
new("path", string.IsNullOrEmpty(RelativePathNormalized) ? "." : RelativePathNormalized)
};
if (IsPrivate is bool isPrivate)
{
entries.Add(new KeyValuePair<string, string?>("private", isPrivate ? "true" : "false"));
}
if (LockEntry is not null)
{
if (!string.IsNullOrWhiteSpace(LockEntry.Resolved))
{
entries.Add(new KeyValuePair<string, string?>("resolved", LockEntry.Resolved));
}
if (!string.IsNullOrWhiteSpace(LockEntry.Integrity))
{
entries.Add(new KeyValuePair<string, string?>("integrity", LockEntry.Integrity));
}
}
public IReadOnlyCollection<KeyValuePair<string, string?>> CreateMetadata()
{
var entries = new List<KeyValuePair<string, string?>>(8)
{
new("path", string.IsNullOrEmpty(RelativePathNormalized) ? "." : RelativePathNormalized)
};
if (IsPrivate is bool isPrivate)
{
entries.Add(new KeyValuePair<string, string?>("private", isPrivate ? "true" : "false"));
}
if (LockEntry is not null)
{
if (!string.IsNullOrWhiteSpace(LockEntry.Resolved))
{
entries.Add(new KeyValuePair<string, string?>("resolved", LockEntry.Resolved));
}
if (!string.IsNullOrWhiteSpace(LockEntry.Integrity))
{
entries.Add(new KeyValuePair<string, string?>("integrity", LockEntry.Integrity));
}
}
if (IsWorkspaceMember)
{
entries.Add(new KeyValuePair<string, string?>("workspaceMember", "true"));
if (!string.IsNullOrWhiteSpace(WorkspaceRoot))
{
entries.Add(new KeyValuePair<string, string?>("workspaceRoot", WorkspaceRoot));
}
}
if (!string.IsNullOrWhiteSpace(WorkspaceLink))
{
entries.Add(new KeyValuePair<string, string?>("workspaceLink", WorkspaceLink));
}
entries.Add(new KeyValuePair<string, string?>("workspaceRoot", WorkspaceRoot));
}
}
if (!string.IsNullOrWhiteSpace(WorkspaceLink))
{
entries.Add(new KeyValuePair<string, string?>("workspaceLink", WorkspaceLink));
}
if (WorkspaceTargets.Count > 0)
{
entries.Add(new KeyValuePair<string, string?>("workspaceTargets", string.Join(';', WorkspaceTargets)));
@@ -282,19 +308,19 @@ internal sealed class NodePackage
entries.Add(new KeyValuePair<string, string?>("installScripts", "true"));
var lifecycleNames = LifecycleScripts
.Select(static script => script.Name)
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static name => name, StringComparer.OrdinalIgnoreCase)
.ToArray();
if (lifecycleNames.Length > 0)
{
entries.Add(new KeyValuePair<string, string?>("policyHint.installLifecycle", string.Join(';', lifecycleNames)));
}
foreach (var script in LifecycleScripts.OrderBy(static script => script.Name, StringComparer.OrdinalIgnoreCase))
{
entries.Add(new KeyValuePair<string, string?>($"script.{script.Name}", script.Command));
}
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static name => name, StringComparer.OrdinalIgnoreCase)
.ToArray();
if (lifecycleNames.Length > 0)
{
entries.Add(new KeyValuePair<string, string?>("policyHint.installLifecycle", string.Join(';', lifecycleNames)));
}
foreach (var script in LifecycleScripts.OrderBy(static script => script.Name, StringComparer.OrdinalIgnoreCase))
{
entries.Add(new KeyValuePair<string, string?>($"script.{script.Name}", script.Command));
}
}
if (DeclaredOnly)
@@ -317,6 +343,23 @@ internal sealed class NodePackage
entries.Add(new KeyValuePair<string, string?>("yarnPnp", "true"));
}
// Scope classification metadata
if (Scope is not null)
{
entries.Add(new KeyValuePair<string, string?>("scope", Scope.Value.ToString().ToLowerInvariant()));
entries.Add(new KeyValuePair<string, string?>("riskLevel", RiskLevel));
}
if (IsOptional)
{
entries.Add(new KeyValuePair<string, string?>("optional", "true"));
}
if (!string.IsNullOrWhiteSpace(License))
{
entries.Add(new KeyValuePair<string, string?>("license", License));
}
return entries
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToArray();
@@ -398,26 +441,26 @@ internal sealed class NodePackage
return conditionSet.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
}
private static string BuildPurl(string name, string version)
{
var normalizedName = NormalizeName(name);
return $"pkg:npm/{normalizedName}@{version}";
}
private static string NormalizeName(string name)
{
if (string.IsNullOrWhiteSpace(name))
{
return name;
}
if (name[0] == '@')
{
var scopeAndName = name[1..];
return $"%40{scopeAndName}";
}
private static string BuildPurl(string name, string version)
{
var normalizedName = NormalizeName(name);
return $"pkg:npm/{normalizedName}@{version}";
}
private static string NormalizeName(string name)
{
if (string.IsNullOrWhiteSpace(name))
{
return name;
}
if (name[0] == '@')
{
var scopeAndName = name[1..];
return $"%40{scopeAndName}";
}
return name;
}

View File

@@ -500,7 +500,12 @@ internal static class NodePackageCollector
usedByEntrypoint: false,
declaredOnly: true,
lockSource: entry.Source,
lockLocator: BuildLockLocator(entry));
lockLocator: BuildLockLocator(entry),
packageSha256: null,
isYarnPnp: false,
scope: entry.Scope,
isOptional: entry.IsOptional,
license: null);
packages.Add(declaredPackage);
}
@@ -614,6 +619,22 @@ internal static class NodePackageCollector
var lockLocator = BuildLockLocator(lockEntry);
var lockSource = lockEntry?.Source;
// Get scope from lock entry (populated by NodeLockData from package.json)
// or from the dependency index directly if this is a root package
NodeDependencyScope? scope = lockEntry?.Scope;
var isOptional = lockEntry?.IsOptional ?? false;
if (scope is null && lockData?.DependencyIndex is { } dependencyIndex)
{
if (dependencyIndex.TryGetScope(name, out var foundScope))
{
scope = foundScope;
isOptional = foundScope == NodeDependencyScope.Optional;
}
}
// Extract license from package.json
var license = ExtractLicense(root);
string? workspaceRoot = null;
var isWorkspaceMember = workspaceIndex?.TryGetMember(relativeDirectory, out workspaceRoot) == true;
var workspaceRootValue = isWorkspaceMember && workspaceIndex is not null ? workspaceRoot : null;
@@ -642,7 +663,10 @@ internal static class NodePackageCollector
lockSource: lockSource,
lockLocator: lockLocator,
packageSha256: packageSha256,
isYarnPnp: yarnPnpPresent);
isYarnPnp: yarnPnpPresent,
scope: scope,
isOptional: isOptional,
license: license);
AttachEntrypoints(context, package, root, relativeDirectory);
@@ -813,6 +837,76 @@ internal static class NodePackageCollector
|| name.Equals("install", StringComparison.OrdinalIgnoreCase)
|| name.Equals("postinstall", StringComparison.OrdinalIgnoreCase);
/// <summary>
/// Extracts the license from package.json.
/// Handles both string format ("license": "MIT") and object format ("license": { "type": "MIT" }).
/// Also handles legacy "licenses" array format.
/// </summary>
private static string? ExtractLicense(JsonElement root)
{
// Try modern "license" field (string)
if (root.TryGetProperty("license", out var licenseElement))
{
if (licenseElement.ValueKind == JsonValueKind.String)
{
var license = licenseElement.GetString();
if (!string.IsNullOrWhiteSpace(license))
{
return license.Trim();
}
}
else if (licenseElement.ValueKind == JsonValueKind.Object)
{
// Object format: { "type": "MIT", "url": "..." }
if (licenseElement.TryGetProperty("type", out var typeElement) &&
typeElement.ValueKind == JsonValueKind.String)
{
var license = typeElement.GetString();
if (!string.IsNullOrWhiteSpace(license))
{
return license.Trim();
}
}
}
}
// Try legacy "licenses" array format
if (root.TryGetProperty("licenses", out var licensesElement) &&
licensesElement.ValueKind == JsonValueKind.Array)
{
var licenses = new List<string>();
foreach (var item in licensesElement.EnumerateArray())
{
string? license = null;
if (item.ValueKind == JsonValueKind.String)
{
license = item.GetString();
}
else if (item.ValueKind == JsonValueKind.Object &&
item.TryGetProperty("type", out var itemTypeElement) &&
itemTypeElement.ValueKind == JsonValueKind.String)
{
license = itemTypeElement.GetString();
}
if (!string.IsNullOrWhiteSpace(license))
{
licenses.Add(license.Trim());
}
}
if (licenses.Count > 0)
{
// Combine multiple licenses with OR (SPDX expression style)
return licenses.Count == 1
? licenses[0]
: $"({string.Join(" OR ", licenses)})";
}
}
return null;
}
private static void AttachEntrypoints(LanguageAnalyzerContext context, NodePackage package, JsonElement root, string relativeDirectory)
{
static string NormalizePath(string relativeDirectory, string? path)

View File

@@ -0,0 +1,72 @@
namespace StellaOps.Scanner.Analyzers.Lang.Node.Internal;
/// <summary>
/// Maps Node.js dependency scopes to risk levels for security analysis.
/// Modeled after <c>JavaScopeClassifier</c> for consistency across language analyzers.
/// </summary>
internal static class NodeScopeClassifier
{
/// <summary>
/// Maps a Node.js dependency scope to a risk level string.
/// </summary>
/// <param name="scope">The dependency scope from package.json.</param>
/// <returns>
/// A risk level string: "production", "development", "peer", or "optional".
/// Defaults to "production" for null or unknown scopes.
/// </returns>
public static string GetRiskLevel(NodeDependencyScope? scope) => scope switch
{
null or NodeDependencyScope.Production => "production",
NodeDependencyScope.Development => "development",
NodeDependencyScope.Peer => "peer",
NodeDependencyScope.Optional => "optional",
_ => "production"
};
/// <summary>
/// Returns true if the scope indicates a direct (explicitly declared) dependency.
/// </summary>
/// <param name="scope">The dependency scope from package.json.</param>
/// <returns>
/// True for Production and Development scopes (direct dependencies).
/// False for Peer and Optional scopes (indirect or conditional dependencies).
/// </returns>
public static bool IsDirect(NodeDependencyScope? scope) => scope switch
{
NodeDependencyScope.Production or NodeDependencyScope.Development => true,
NodeDependencyScope.Peer or NodeDependencyScope.Optional => false,
null => true, // Unknown scope defaults to direct
_ => true
};
/// <summary>
/// Returns true if the dependency affects production runtime.
/// </summary>
/// <param name="scope">The dependency scope from package.json.</param>
/// <returns>
/// True for Production scope.
/// False for Development, Peer, and Optional scopes.
/// </returns>
public static bool IsProductionRuntime(NodeDependencyScope? scope) => scope switch
{
null or NodeDependencyScope.Production => true,
NodeDependencyScope.Development => false,
NodeDependencyScope.Peer => false, // Peer deps are provided by consumer
NodeDependencyScope.Optional => false, // May not be installed
_ => true
};
/// <summary>
/// Parses a package.json section name to a scope.
/// </summary>
/// <param name="sectionName">The package.json section name (e.g., "dependencies", "devDependencies").</param>
/// <returns>The corresponding scope, or null if the section name is not recognized.</returns>
public static NodeDependencyScope? ParseSection(string? sectionName) => sectionName?.ToLowerInvariant() switch
{
"dependencies" => NodeDependencyScope.Production,
"devdependencies" => NodeDependencyScope.Development,
"peerdependencies" => NodeDependencyScope.Peer,
"optionaldependencies" => NodeDependencyScope.Optional,
_ => null
};
}

View File

@@ -0,0 +1,389 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Conflicts;
/// <summary>
/// Detects version conflicts where the same Python package appears with multiple versions.
/// Common in containers with multiple virtualenvs or conflicting requirements.
/// </summary>
internal static partial class VersionConflictDetector
{
/// <summary>
/// Analyzes discovered packages for version conflicts.
/// </summary>
public static VersionConflictAnalysis Analyze(IEnumerable<PythonPackageInfo> packages)
{
ArgumentNullException.ThrowIfNull(packages);
var packageList = packages.ToList();
if (packageList.Count == 0)
{
return VersionConflictAnalysis.Empty;
}
// Group by normalized package name
var groups = packageList
.Where(p => !string.IsNullOrWhiteSpace(p.Version))
.GroupBy(p => p.NormalizedName, StringComparer.OrdinalIgnoreCase)
.Where(g => g.Select(p => p.Version).Distinct(StringComparer.OrdinalIgnoreCase).Count() > 1)
.ToList();
if (groups.Count == 0)
{
return VersionConflictAnalysis.Empty;
}
var conflicts = new List<PythonVersionConflict>();
foreach (var group in groups)
{
var versions = group
.Select(p => new PythonVersionOccurrence(
p.Version!,
p.Location,
p.MetadataPath ?? p.Location,
p.Kind.ToString(),
p.InstallerTool))
.OrderBy(v => v.Version, PythonVersionComparer.Instance)
.ToImmutableArray();
// Determine severity based on version distance
var severity = CalculateSeverity(versions);
conflicts.Add(new PythonVersionConflict(
group.Key,
group.First().Name, // Original non-normalized name
versions,
severity));
}
return new VersionConflictAnalysis(
[.. conflicts.OrderBy(c => c.NormalizedName, StringComparer.Ordinal)],
conflicts.Count,
conflicts.Max(c => c.Severity));
}
/// <summary>
/// Analyzes packages from discovery result for version conflicts.
/// </summary>
public static VersionConflictAnalysis Analyze(PythonPackageDiscoveryResult discoveryResult)
{
ArgumentNullException.ThrowIfNull(discoveryResult);
return Analyze(discoveryResult.Packages);
}
/// <summary>
/// Checks if a specific package has version conflicts in the given package set.
/// </summary>
public static PythonVersionConflict? GetConflict(
IEnumerable<PythonPackageInfo> packages,
string packageName)
{
var normalizedName = PythonPackageInfo.NormalizeName(packageName);
var analysis = Analyze(packages);
return analysis.GetConflict(normalizedName);
}
private static ConflictSeverity CalculateSeverity(ImmutableArray<PythonVersionOccurrence> versions)
{
var versionStrings = versions.Select(v => v.Version).Distinct().ToList();
if (versionStrings.Count == 1)
{
return ConflictSeverity.None;
}
// Try to parse as PEP 440 versions
var parsedVersions = versionStrings
.Select(TryParsePep440Version)
.Where(v => v is not null)
.Cast<Pep440Version>()
.ToList();
if (parsedVersions.Count < 2)
{
// Can't determine severity without parseable versions
return ConflictSeverity.Medium;
}
// Check for epoch differences (critical - completely different version schemes)
var epochs = parsedVersions.Select(v => v.Epoch).Distinct().ToList();
if (epochs.Count > 1)
{
return ConflictSeverity.High;
}
// Check for major version differences (high severity)
var majorVersions = parsedVersions.Select(v => v.Major).Distinct().ToList();
if (majorVersions.Count > 1)
{
return ConflictSeverity.High;
}
// Check for minor version differences (medium severity)
var minorVersions = parsedVersions.Select(v => v.Minor).Distinct().ToList();
if (minorVersions.Count > 1)
{
return ConflictSeverity.Medium;
}
// Only patch/micro version differences (low severity)
return ConflictSeverity.Low;
}
/// <summary>
/// Parses a PEP 440 version string.
/// Handles: epoch, release segments, pre/post/dev releases, local versions.
/// </summary>
private static Pep440Version? TryParsePep440Version(string version)
{
if (string.IsNullOrWhiteSpace(version))
{
return null;
}
// PEP 440 pattern:
// [N!]N(.N)*[{a|b|rc}N][.postN][.devN][+local]
var match = Pep440VersionPattern().Match(version);
if (!match.Success)
{
return null;
}
var epoch = 0;
if (match.Groups["epoch"].Success && int.TryParse(match.Groups["epoch"].Value, out var e))
{
epoch = e;
}
var release = match.Groups["release"].Value;
var releaseParts = release.Split('.');
if (!int.TryParse(releaseParts[0], out var major))
{
return null;
}
var minor = releaseParts.Length > 1 && int.TryParse(releaseParts[1], out var m) ? m : 0;
var micro = releaseParts.Length > 2 && int.TryParse(releaseParts[2], out var p) ? p : 0;
string? preRelease = null;
if (match.Groups["pre"].Success)
{
preRelease = match.Groups["pre"].Value;
}
string? postRelease = null;
if (match.Groups["post"].Success)
{
postRelease = match.Groups["post"].Value;
}
string? devRelease = null;
if (match.Groups["dev"].Success)
{
devRelease = match.Groups["dev"].Value;
}
string? local = null;
if (match.Groups["local"].Success)
{
local = match.Groups["local"].Value;
}
return new Pep440Version(epoch, major, minor, micro, preRelease, postRelease, devRelease, local);
}
// PEP 440 version pattern
[GeneratedRegex(
@"^((?<epoch>\d+)!)?(?<release>\d+(\.\d+)*)((?<pre>(a|alpha|b|beta|c|rc)\d*))?(\.?(?<post>post\d*))?(\.?(?<dev>dev\d*))?(\+(?<local>[a-z0-9.]+))?$",
RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex Pep440VersionPattern();
}
/// <summary>
/// Result of version conflict analysis.
/// </summary>
internal sealed record VersionConflictAnalysis(
ImmutableArray<PythonVersionConflict> Conflicts,
int TotalConflicts,
ConflictSeverity MaxSeverity)
{
public static readonly VersionConflictAnalysis Empty = new([], 0, ConflictSeverity.None);
/// <summary>
/// Returns true if any conflicts were found.
/// </summary>
public bool HasConflicts => TotalConflicts > 0;
/// <summary>
/// Gets conflicts for a specific package.
/// </summary>
public PythonVersionConflict? GetConflict(string normalizedName)
=> Conflicts.FirstOrDefault(c =>
string.Equals(c.NormalizedName, normalizedName, StringComparison.OrdinalIgnoreCase));
/// <summary>
/// Gets high-severity conflicts only.
/// </summary>
public ImmutableArray<PythonVersionConflict> HighSeverityConflicts =>
Conflicts.Where(c => c.Severity == ConflictSeverity.High).ToImmutableArray();
}
/// <summary>
/// Represents a version conflict for a single Python package.
/// </summary>
internal sealed record PythonVersionConflict(
string NormalizedName,
string OriginalName,
ImmutableArray<PythonVersionOccurrence> Versions,
ConflictSeverity Severity)
{
/// <summary>
/// Gets the PURL for this package (without version).
/// </summary>
public string Purl => $"pkg:pypi/{NormalizedName.Replace('_', '-')}";
/// <summary>
/// Gets all unique version strings.
/// </summary>
public IEnumerable<string> UniqueVersions
=> Versions.Select(v => v.Version).Distinct();
/// <summary>
/// Gets the versions as a comma-separated string.
/// </summary>
public string VersionsString
=> string.Join(",", UniqueVersions);
/// <summary>
/// Gets the number of locations where conflicting versions are found.
/// </summary>
public int LocationCount => Versions.Select(v => v.Location).Distinct().Count();
}
/// <summary>
/// Represents a single occurrence of a version.
/// </summary>
internal sealed record PythonVersionOccurrence(
string Version,
string Location,
string MetadataPath,
string PackageKind,
string? InstallerTool);
/// <summary>
/// Severity level of a version conflict.
/// </summary>
internal enum ConflictSeverity
{
/// <summary>
/// No conflict.
/// </summary>
None = 0,
/// <summary>
/// Only micro/patch version differences (likely compatible).
/// </summary>
Low = 1,
/// <summary>
/// Minor version differences (may have API changes).
/// </summary>
Medium = 2,
/// <summary>
/// Major version or epoch differences (likely incompatible).
/// </summary>
High = 3
}
/// <summary>
/// Represents a parsed PEP 440 version.
/// </summary>
internal sealed record Pep440Version(
int Epoch,
int Major,
int Minor,
int Micro,
string? PreRelease,
string? PostRelease,
string? DevRelease,
string? LocalVersion)
{
/// <summary>
/// Gets whether this is a pre-release version.
/// </summary>
public bool IsPreRelease => PreRelease is not null || DevRelease is not null;
/// <summary>
/// Gets the release tuple as a comparable string.
/// </summary>
public string ReleaseTuple => $"{Epoch}!{Major}.{Minor}.{Micro}";
}
/// <summary>
/// Comparer for PEP 440 version strings.
/// </summary>
internal sealed class PythonVersionComparer : IComparer<string>
{
public static readonly PythonVersionComparer Instance = new();
public int Compare(string? x, string? y)
{
if (x is null && y is null) return 0;
if (x is null) return -1;
if (y is null) return 1;
// Normalize versions for comparison
var xNorm = NormalizeVersion(x);
var yNorm = NormalizeVersion(y);
var xParts = xNorm.Split(['.', '-', '_'], StringSplitOptions.RemoveEmptyEntries);
var yParts = yNorm.Split(['.', '-', '_'], StringSplitOptions.RemoveEmptyEntries);
var maxParts = Math.Max(xParts.Length, yParts.Length);
for (int i = 0; i < maxParts; i++)
{
var xPart = i < xParts.Length ? xParts[i] : "0";
var yPart = i < yParts.Length ? yParts[i] : "0";
// Try numeric comparison first
if (int.TryParse(xPart, out var xNum) && int.TryParse(yPart, out var yNum))
{
var numCompare = xNum.CompareTo(yNum);
if (numCompare != 0) return numCompare;
}
else
{
// Fall back to string comparison
var strCompare = string.Compare(xPart, yPart, StringComparison.OrdinalIgnoreCase);
if (strCompare != 0) return strCompare;
}
}
return 0;
}
private static string NormalizeVersion(string version)
{
// Remove epoch for simple comparison
var epochIdx = version.IndexOf('!');
if (epochIdx >= 0)
{
version = version[(epochIdx + 1)..];
}
// Remove local version
var localIdx = version.IndexOf('+');
if (localIdx >= 0)
{
version = version[..localIdx];
}
return version.ToLowerInvariant();
}
}

View File

@@ -0,0 +1,447 @@
using System.Collections.Frozen;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Licensing;
/// <summary>
/// Normalizes Python license classifiers and license strings to SPDX expressions.
/// </summary>
internal static partial class SpdxLicenseNormalizer
{
/// <summary>
/// Maps PyPI classifiers to SPDX identifiers.
/// </summary>
private static readonly FrozenDictionary<string, string> ClassifierToSpdx =
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
// OSI Approved licenses
["License :: OSI Approved :: MIT License"] = "MIT",
["License :: OSI Approved :: MIT No Attribution License (MIT-0)"] = "MIT-0",
["License :: OSI Approved :: Apache Software License"] = "Apache-2.0",
["License :: OSI Approved :: BSD License"] = "BSD-3-Clause",
["License :: OSI Approved :: GNU General Public License (GPL)"] = "GPL-3.0-only",
["License :: OSI Approved :: GNU General Public License v2 (GPLv2)"] = "GPL-2.0-only",
["License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)"] = "GPL-2.0-or-later",
["License :: OSI Approved :: GNU General Public License v3 (GPLv3)"] = "GPL-3.0-only",
["License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)"] = "GPL-3.0-or-later",
["License :: OSI Approved :: GNU Lesser General Public License v2 (LGPLv2)"] = "LGPL-2.0-only",
["License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)"] = "LGPL-2.0-or-later",
["License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)"] = "LGPL-3.0-only",
["License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)"] = "LGPL-3.0-or-later",
["License :: OSI Approved :: GNU Affero General Public License v3"] = "AGPL-3.0-only",
["License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)"] = "AGPL-3.0-or-later",
["License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)"] = "MPL-2.0",
["License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)"] = "MPL-1.1",
["License :: OSI Approved :: ISC License (ISCL)"] = "ISC",
["License :: OSI Approved :: Python Software Foundation License"] = "PSF-2.0",
["License :: OSI Approved :: Zope Public License"] = "ZPL-2.1",
["License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)"] = "EPL-1.0",
["License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)"] = "EPL-2.0",
["License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)"] = "EUPL-1.2",
["License :: OSI Approved :: Academic Free License (AFL)"] = "AFL-3.0",
["License :: OSI Approved :: Artistic License"] = "Artistic-2.0",
["License :: OSI Approved :: Boost Software License 1.0 (BSL-1.0)"] = "BSL-1.0",
["License :: OSI Approved :: Common Development and Distribution License 1.0 (CDDL-1.0)"] = "CDDL-1.0",
["License :: OSI Approved :: Historical Permission Notice and Disclaimer (HPND)"] = "HPND",
["License :: OSI Approved :: IBM Public License"] = "IPL-1.0",
["License :: OSI Approved :: Intel Open Source License"] = "Intel",
["License :: OSI Approved :: Jabber Open Source License"] = "JOSL-1.0",
["License :: OSI Approved :: Open Software License 3.0 (OSL-3.0)"] = "OSL-3.0",
["License :: OSI Approved :: PostgreSQL License"] = "PostgreSQL",
["License :: OSI Approved :: The Unlicense (Unlicense)"] = "Unlicense",
["License :: OSI Approved :: Universal Permissive License (UPL)"] = "UPL-1.0",
["License :: OSI Approved :: W3C License"] = "W3C",
["License :: OSI Approved :: zlib/libpng License"] = "Zlib",
// BSD variants (common on PyPI)
["License :: OSI Approved :: BSD 2-Clause License"] = "BSD-2-Clause",
["License :: OSI Approved :: BSD 3-Clause License"] = "BSD-3-Clause",
["License :: OSI Approved :: BSD-2-Clause"] = "BSD-2-Clause",
["License :: OSI Approved :: BSD-3-Clause"] = "BSD-3-Clause",
// Public domain and CC0
["License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"] = "CC0-1.0",
["License :: Public Domain"] = "Unlicense",
// Other common ones
["License :: Other/Proprietary License"] = "LicenseRef-Proprietary",
["License :: Freeware"] = "LicenseRef-Freeware",
["License :: Freely Distributable"] = "LicenseRef-FreelyDistributable",
// DFSG Free licenses
["License :: DFSG approved"] = "LicenseRef-DFSG-Approved",
}.ToFrozenDictionary();
/// <summary>
/// Maps common license strings to SPDX identifiers.
/// </summary>
private static readonly FrozenDictionary<string, string> LicenseStringToSpdx =
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
// MIT variations
["MIT"] = "MIT",
["MIT License"] = "MIT",
["MIT license"] = "MIT",
["The MIT License"] = "MIT",
["MIT-0"] = "MIT-0",
// Apache variations
["Apache"] = "Apache-2.0",
["Apache 2"] = "Apache-2.0",
["Apache 2.0"] = "Apache-2.0",
["Apache-2"] = "Apache-2.0",
["Apache-2.0"] = "Apache-2.0",
["Apache License"] = "Apache-2.0",
["Apache License 2.0"] = "Apache-2.0",
["Apache License, Version 2.0"] = "Apache-2.0",
["Apache Software License"] = "Apache-2.0",
["ASL 2.0"] = "Apache-2.0",
// BSD variations
["BSD"] = "BSD-3-Clause",
["BSD License"] = "BSD-3-Clause",
["BSD license"] = "BSD-3-Clause",
["BSD-2"] = "BSD-2-Clause",
["BSD 2-Clause"] = "BSD-2-Clause",
["BSD-2-Clause"] = "BSD-2-Clause",
["BSD-3"] = "BSD-3-Clause",
["BSD 3-Clause"] = "BSD-3-Clause",
["BSD-3-Clause"] = "BSD-3-Clause",
["Simplified BSD"] = "BSD-2-Clause",
["New BSD"] = "BSD-3-Clause",
["Modified BSD"] = "BSD-3-Clause",
// GPL variations
["GPL"] = "GPL-3.0-only",
["GPLv2"] = "GPL-2.0-only",
["GPL v2"] = "GPL-2.0-only",
["GPL-2"] = "GPL-2.0-only",
["GPL-2.0"] = "GPL-2.0-only",
["GPL-2.0-only"] = "GPL-2.0-only",
["GPL-2.0+"] = "GPL-2.0-or-later",
["GPL-2.0-or-later"] = "GPL-2.0-or-later",
["GPLv3"] = "GPL-3.0-only",
["GPL v3"] = "GPL-3.0-only",
["GPL-3"] = "GPL-3.0-only",
["GPL-3.0"] = "GPL-3.0-only",
["GPL-3.0-only"] = "GPL-3.0-only",
["GPL-3.0+"] = "GPL-3.0-or-later",
["GPL-3.0-or-later"] = "GPL-3.0-or-later",
["GNU General Public License"] = "GPL-3.0-only",
["GNU General Public License v3"] = "GPL-3.0-only",
// LGPL variations
["LGPL"] = "LGPL-3.0-only",
["LGPLv2"] = "LGPL-2.0-only",
["LGPL-2.0"] = "LGPL-2.0-only",
["LGPL-2.1"] = "LGPL-2.1-only",
["LGPLv3"] = "LGPL-3.0-only",
["LGPL-3.0"] = "LGPL-3.0-only",
["GNU Lesser General Public License"] = "LGPL-3.0-only",
// AGPL variations
["AGPL"] = "AGPL-3.0-only",
["AGPLv3"] = "AGPL-3.0-only",
["AGPL-3.0"] = "AGPL-3.0-only",
// MPL variations
["MPL"] = "MPL-2.0",
["MPL 2.0"] = "MPL-2.0",
["MPL-2.0"] = "MPL-2.0",
["Mozilla Public License 2.0"] = "MPL-2.0",
// ISC
["ISC"] = "ISC",
["ISC License"] = "ISC",
// Other common licenses
["PSF"] = "PSF-2.0",
["Python Software Foundation License"] = "PSF-2.0",
["PSFL"] = "PSF-2.0",
["Unlicense"] = "Unlicense",
["The Unlicense"] = "Unlicense",
["CC0"] = "CC0-1.0",
["CC0 1.0"] = "CC0-1.0",
["CC0-1.0"] = "CC0-1.0",
["Public Domain"] = "Unlicense",
["Zlib"] = "Zlib",
["zlib"] = "Zlib",
["Boost"] = "BSL-1.0",
["BSL-1.0"] = "BSL-1.0",
["EPL"] = "EPL-2.0",
["EPL-1.0"] = "EPL-1.0",
["EPL-2.0"] = "EPL-2.0",
["Eclipse"] = "EPL-2.0",
["Eclipse Public License"] = "EPL-2.0",
["Artistic"] = "Artistic-2.0",
["Artistic License"] = "Artistic-2.0",
["PostgreSQL"] = "PostgreSQL",
["W3C"] = "W3C",
["WTFPL"] = "WTFPL",
}.ToFrozenDictionary();
/// <summary>
/// Normalizes a Python package's license information to an SPDX expression.
/// </summary>
/// <param name="license">The license field from METADATA.</param>
/// <param name="classifiers">The classifiers from METADATA.</param>
/// <param name="licenseExpression">PEP 639 license-expression field (if present).</param>
/// <returns>The normalized SPDX expression or null if not determinable.</returns>
public static string? Normalize(
string? license,
IEnumerable<string>? classifiers,
string? licenseExpression = null)
{
// PEP 639 license expression takes precedence
if (!string.IsNullOrWhiteSpace(licenseExpression))
{
// Validate it looks like an SPDX expression
if (IsValidSpdxExpression(licenseExpression))
{
return licenseExpression.Trim();
}
}
// Try classifiers next (most reliable)
if (classifiers is not null)
{
var spdxFromClassifier = NormalizeFromClassifiers(classifiers);
if (spdxFromClassifier is not null)
{
return spdxFromClassifier;
}
}
// Try the license string
if (!string.IsNullOrWhiteSpace(license))
{
var spdxFromString = NormalizeFromString(license);
if (spdxFromString is not null)
{
return spdxFromString;
}
}
return null;
}
/// <summary>
/// Normalizes license classifiers to SPDX.
/// </summary>
public static string? NormalizeFromClassifiers(IEnumerable<string> classifiers)
{
var spdxIds = new List<string>();
foreach (var classifier in classifiers)
{
if (ClassifierToSpdx.TryGetValue(classifier.Trim(), out var spdxId))
{
if (!spdxIds.Contains(spdxId, StringComparer.OrdinalIgnoreCase))
{
spdxIds.Add(spdxId);
}
}
}
if (spdxIds.Count == 0)
{
return null;
}
if (spdxIds.Count == 1)
{
return spdxIds[0];
}
// Multiple licenses - create OR expression (dual licensing)
return string.Join(" OR ", spdxIds.OrderBy(s => s, StringComparer.Ordinal));
}
/// <summary>
/// Normalizes a license string to SPDX.
/// </summary>
public static string? NormalizeFromString(string license)
{
if (string.IsNullOrWhiteSpace(license))
{
return null;
}
var trimmed = license.Trim();
// Direct lookup
if (LicenseStringToSpdx.TryGetValue(trimmed, out var spdxId))
{
return spdxId;
}
// Try normalized lookup (remove common suffixes/prefixes)
var normalized = NormalizeLicenseString(trimmed);
if (LicenseStringToSpdx.TryGetValue(normalized, out spdxId))
{
return spdxId;
}
// Try pattern matching for known patterns
spdxId = TryPatternMatch(trimmed);
if (spdxId is not null)
{
return spdxId;
}
// Can't normalize - return as LicenseRef
if (IsPlausibleLicenseName(trimmed))
{
return $"LicenseRef-{SanitizeForSpdx(trimmed)}";
}
return null;
}
private static string? TryPatternMatch(string license)
{
// MIT pattern
if (MitPattern().IsMatch(license))
{
return "MIT";
}
// Apache pattern
if (ApachePattern().IsMatch(license))
{
return "Apache-2.0";
}
// BSD pattern
var bsdMatch = BsdPattern().Match(license);
if (bsdMatch.Success)
{
var clauseCount = bsdMatch.Groups["clauses"].Value;
return clauseCount switch
{
"2" => "BSD-2-Clause",
"3" => "BSD-3-Clause",
"4" => "BSD-4-Clause",
_ => "BSD-3-Clause"
};
}
// GPL pattern
var gplMatch = GplPattern().Match(license);
if (gplMatch.Success)
{
var version = gplMatch.Groups["version"].Value;
var orLater = gplMatch.Groups["orlater"].Success;
return version switch
{
"2" or "2.0" => orLater ? "GPL-2.0-or-later" : "GPL-2.0-only",
"3" or "3.0" => orLater ? "GPL-3.0-or-later" : "GPL-3.0-only",
_ => "GPL-3.0-only"
};
}
// LGPL pattern
var lgplMatch = LgplPattern().Match(license);
if (lgplMatch.Success)
{
var version = lgplMatch.Groups["version"].Value;
return version switch
{
"2" or "2.0" => "LGPL-2.0-only",
"2.1" => "LGPL-2.1-only",
"3" or "3.0" => "LGPL-3.0-only",
_ => "LGPL-3.0-only"
};
}
return null;
}
private static string NormalizeLicenseString(string license)
{
// Remove common noise
var result = license
.Replace("the ", "", StringComparison.OrdinalIgnoreCase)
.Replace(" license", "", StringComparison.OrdinalIgnoreCase)
.Replace(" License", "", StringComparison.OrdinalIgnoreCase)
.Replace("(", "")
.Replace(")", "")
.Trim();
return result;
}
private static bool IsValidSpdxExpression(string expression)
{
// Basic validation - SPDX expressions use AND, OR, WITH, parentheses
if (string.IsNullOrWhiteSpace(expression))
{
return false;
}
// Must contain valid SPDX identifier characters
return SpdxExpressionPattern().IsMatch(expression);
}
private static bool IsPlausibleLicenseName(string text)
{
// Filter out things that are definitely not license names
if (text.Length > 100 || text.Length < 2)
{
return false;
}
// Skip if it looks like a URL
if (text.Contains("://") || text.Contains("www."))
{
return false;
}
// Skip if it's a full paragraph
if (text.Contains('\n') || text.Split(' ').Length > 10)
{
return false;
}
return true;
}
private static string SanitizeForSpdx(string text)
{
// SPDX LicenseRef identifiers can only contain alphanumeric, ".", "-"
var sanitized = new char[text.Length];
for (int i = 0; i < text.Length; i++)
{
var c = text[i];
if (char.IsLetterOrDigit(c) || c == '.' || c == '-')
{
sanitized[i] = c;
}
else
{
sanitized[i] = '-';
}
}
return new string(sanitized).Trim('-');
}
[GeneratedRegex(@"^MIT(\s|$)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex MitPattern();
[GeneratedRegex(@"Apache\s*(Software\s*)?(License\s*)?(Version\s*)?(2\.?0?)?", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex ApachePattern();
[GeneratedRegex(@"BSD[\s\-]?(?<clauses>[234])?\s*[\-]?\s*Clause", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex BsdPattern();
[GeneratedRegex(@"(GNU\s*)?(General\s*)?Public\s*License[\s,]*(v|version)?[\s]*(?<version>[23](\.0)?)?(?<orlater>\+|\s*or\s*later)?", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GplPattern();
[GeneratedRegex(@"(GNU\s*)?Lesser\s*(General\s*)?Public\s*License[\s,]*(v|version)?[\s]*(?<version>[23](\.0|\.1)?)?", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex LgplPattern();
[GeneratedRegex(@"^[A-Za-z0-9.\-\+ ]+(\s+(AND|OR|WITH)\s+[A-Za-z0-9.\-\+ ]+)*$", RegexOptions.Compiled)]
private static partial Regex SpdxExpressionPattern();
}

View File

@@ -0,0 +1,524 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Vendoring;
/// <summary>
/// Detects vendored (bundled) packages inside Python packages.
/// Python's equivalent of Java's shaded JAR detection.
/// Common patterns: pip._vendor, requests.packages, certifi bundled certs.
/// </summary>
internal static partial class VendoredPackageDetector
{
/// <summary>
/// Common vendoring directory patterns.
/// </summary>
private static readonly string[] VendorDirectoryPatterns =
[
"_vendor",
"_vendored",
"vendor",
"vendored",
"extern",
"external",
"third_party",
"thirdparty",
"packages", // Old requests pattern
"lib", // Sometimes used for bundled libs
"bundled"
];
/// <summary>
/// Well-known vendored packages in the Python ecosystem.
/// Maps parent package to expected vendored packages.
/// </summary>
private static readonly IReadOnlyDictionary<string, string[]> KnownVendoredPackages =
new Dictionary<string, string[]>(StringComparer.OrdinalIgnoreCase)
{
["pip"] = ["certifi", "chardet", "colorama", "distlib", "html5lib", "idna", "msgpack",
"packaging", "pep517", "pkg_resources", "platformdirs", "pygments", "pyparsing",
"requests", "resolvelib", "rich", "setuptools", "six", "tenacity", "tomli",
"truststore", "typing_extensions", "urllib3", "webencodings"],
["setuptools"] = ["more_itertools", "ordered_set", "packaging", "pyparsing"],
["requests"] = ["urllib3", "chardet", "idna", "certifi"],
["urllib3"] = ["six"],
["virtualenv"] = ["distlib", "filelock", "platformdirs", "six"],
};
/// <summary>
/// Analyzes a package for vendored dependencies.
/// </summary>
public static async Task<VendoringAnalysis> AnalyzeAsync(
PythonVirtualFileSystem vfs,
PythonPackageInfo package,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(vfs);
ArgumentNullException.ThrowIfNull(package);
var markers = new List<string>();
var embeddedPackages = new List<EmbeddedPackage>();
var vendoredPaths = new List<string>();
// Get package installation directory
var packageDir = GetPackageDirectory(package);
if (string.IsNullOrEmpty(packageDir))
{
return VendoringAnalysis.NotVendored(package.Name);
}
// Scan for vendor directories
foreach (var vendorPattern in VendorDirectoryPatterns)
{
cancellationToken.ThrowIfCancellationRequested();
var vendorPaths = await FindVendorDirectoriesAsync(vfs, packageDir, vendorPattern, cancellationToken)
.ConfigureAwait(false);
foreach (var vendorPath in vendorPaths)
{
markers.Add($"vendor-directory:{vendorPattern}");
vendoredPaths.Add(vendorPath);
// Extract embedded package info
var embedded = await ExtractEmbeddedPackagesAsync(vfs, vendorPath, package.Name, cancellationToken)
.ConfigureAwait(false);
embeddedPackages.AddRange(embedded);
}
}
// Check for well-known vendored packages
if (KnownVendoredPackages.TryGetValue(package.NormalizedName, out var expectedVendored))
{
var foundExpected = embeddedPackages
.Where(e => expectedVendored.Contains(e.Name, StringComparer.OrdinalIgnoreCase))
.Select(e => e.Name)
.ToList();
if (foundExpected.Count > 0)
{
markers.Add("known-vendored-package");
}
}
// Check RECORD file for vendor paths
if (package.RecordFiles.Length > 0)
{
var vendorRecords = package.RecordFiles
.Where(r => VendorDirectoryPatterns.Any(p =>
r.Path.Contains($"/{p}/", StringComparison.OrdinalIgnoreCase) ||
r.Path.Contains($"\\{p}\\", StringComparison.OrdinalIgnoreCase)))
.ToList();
if (vendorRecords.Count > 0)
{
markers.Add("record-vendor-entries");
}
}
// Calculate confidence
var confidence = CalculateConfidence(markers, embeddedPackages.Count);
return new VendoringAnalysis(
package.Name,
confidence >= VendoringConfidence.Low, // Any confidence > None indicates vendoring
confidence,
[.. markers.Distinct().OrderBy(m => m, StringComparer.Ordinal)],
[.. embeddedPackages.OrderBy(e => e.Name, StringComparer.Ordinal)],
[.. vendoredPaths.Distinct().OrderBy(p => p, StringComparer.Ordinal)]);
}
/// <summary>
/// Analyzes all packages in a discovery result for vendoring.
/// </summary>
public static async Task<ImmutableArray<VendoringAnalysis>> AnalyzeAllAsync(
PythonVirtualFileSystem vfs,
PythonPackageDiscoveryResult discoveryResult,
CancellationToken cancellationToken = default)
{
var results = new List<VendoringAnalysis>();
foreach (var package in discoveryResult.Packages)
{
cancellationToken.ThrowIfCancellationRequested();
var analysis = await AnalyzeAsync(vfs, package, cancellationToken).ConfigureAwait(false);
if (analysis.IsVendored)
{
results.Add(analysis);
}
}
return [.. results];
}
private static string? GetPackageDirectory(PythonPackageInfo package)
{
// The package module directory is typically in the same directory as the dist-info,
// with the same name as the package (normalized to lowercase with underscores).
// E.g., dist-info at "site-packages/pip-23.0.dist-info" means package at "site-packages/pip/"
string? baseDir = null;
if (!string.IsNullOrEmpty(package.MetadataPath))
{
// Get the directory containing dist-info (usually site-packages)
baseDir = Path.GetDirectoryName(package.MetadataPath);
}
else if (!string.IsNullOrEmpty(package.Location))
{
baseDir = package.Location;
}
if (string.IsNullOrEmpty(baseDir))
{
return null;
}
// The package directory is baseDir + package module name
// Use the first top-level module if available, otherwise use the normalized package name
var moduleName = package.TopLevelModules.Length > 0
? package.TopLevelModules[0]
: package.NormalizedName;
return Path.Combine(baseDir, moduleName).Replace('\\', '/');
}
private static async Task<List<string>> FindVendorDirectoriesAsync(
PythonVirtualFileSystem vfs,
string baseDir,
string vendorPattern,
CancellationToken cancellationToken)
{
var results = new List<string>();
try
{
// Check for direct vendor directory under package
foreach (var file in vfs.Files)
{
cancellationToken.ThrowIfCancellationRequested();
var relativePath = GetRelativePath(baseDir, file.VirtualPath);
if (string.IsNullOrEmpty(relativePath))
{
continue;
}
// Look for vendor directory pattern in path
var parts = relativePath.Split(['/', '\\'], StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < parts.Length - 1; i++)
{
if (string.Equals(parts[i], vendorPattern, StringComparison.OrdinalIgnoreCase))
{
// Found vendor directory
var vendorPath = string.Join("/", parts.Take(i + 1));
var fullVendorPath = Path.Combine(baseDir, vendorPath).Replace('\\', '/');
if (!results.Contains(fullVendorPath, StringComparer.OrdinalIgnoreCase))
{
results.Add(fullVendorPath);
}
break;
}
}
}
}
catch (Exception)
{
// Ignore errors during directory scanning
}
await Task.CompletedTask; // Keep async signature for future enhancements
return results;
}
private static async Task<List<EmbeddedPackage>> ExtractEmbeddedPackagesAsync(
PythonVirtualFileSystem vfs,
string vendorPath,
string parentPackage,
CancellationToken cancellationToken)
{
var packages = new List<EmbeddedPackage>();
var seenPackages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
try
{
// Find all Python packages in vendor directory
foreach (var file in vfs.Files)
{
cancellationToken.ThrowIfCancellationRequested();
if (!file.VirtualPath.StartsWith(vendorPath, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var relativePath = file.VirtualPath[(vendorPath.Length + 1)..];
var parts = relativePath.Split(['/', '\\'], StringSplitOptions.RemoveEmptyEntries);
if (parts.Length == 0)
{
continue;
}
// Get the package name (first directory or .py file)
var packageName = parts[0];
// Handle .py files (single-file modules)
if (packageName.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
{
packageName = packageName[..^3];
}
// Skip __pycache__ and other internal directories
if (packageName.StartsWith("__") || packageName.StartsWith("."))
{
continue;
}
if (!seenPackages.Add(packageName))
{
continue;
}
// Try to extract version from __init__.py or version.py
var version = await ExtractVersionAsync(vfs, vendorPath, packageName, cancellationToken)
.ConfigureAwait(false);
// Try to find license
var license = await ExtractLicenseAsync(vfs, vendorPath, packageName, cancellationToken)
.ConfigureAwait(false);
packages.Add(new EmbeddedPackage(
packageName,
version,
license,
Path.Combine(vendorPath, packageName).Replace('\\', '/'),
parentPackage));
}
}
catch (Exception)
{
// Ignore errors during extraction
}
return packages;
}
private static async Task<string?> ExtractVersionAsync(
PythonVirtualFileSystem vfs,
string vendorPath,
string packageName,
CancellationToken cancellationToken)
{
// Common locations for version information
var versionFiles = new[]
{
$"{vendorPath}/{packageName}/__init__.py",
$"{vendorPath}/{packageName}/_version.py",
$"{vendorPath}/{packageName}/version.py",
$"{vendorPath}/{packageName}/__version__.py"
};
foreach (var versionFile in versionFiles)
{
try
{
using var stream = await vfs.OpenReadAsync(versionFile, cancellationToken).ConfigureAwait(false);
if (stream is null) continue;
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
// Look for __version__ = "x.y.z"
var match = VersionPattern().Match(content);
if (match.Success)
{
return match.Groups["version"].Value;
}
}
catch
{
// Continue to next file
}
}
return null;
}
private static async Task<string?> ExtractLicenseAsync(
PythonVirtualFileSystem vfs,
string vendorPath,
string packageName,
CancellationToken cancellationToken)
{
// Common license file locations
var licenseFiles = new[]
{
$"{vendorPath}/{packageName}/LICENSE",
$"{vendorPath}/{packageName}/LICENSE.txt",
$"{vendorPath}/{packageName}/LICENSE.md",
$"{vendorPath}/{packageName}/COPYING"
};
foreach (var licenseFile in licenseFiles)
{
try
{
using var stream = await vfs.OpenReadAsync(licenseFile, cancellationToken).ConfigureAwait(false);
if (stream is null) continue;
using var reader = new StreamReader(stream);
var firstLine = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
// Try to identify license from content
if (firstLine?.Contains("MIT", StringComparison.OrdinalIgnoreCase) == true)
{
return "MIT";
}
if (firstLine?.Contains("Apache", StringComparison.OrdinalIgnoreCase) == true)
{
return "Apache-2.0";
}
if (firstLine?.Contains("BSD", StringComparison.OrdinalIgnoreCase) == true)
{
return "BSD-3-Clause";
}
return "Unknown (license file present)";
}
catch
{
// Continue to next file
}
}
return null;
}
private static string? GetRelativePath(string basePath, string fullPath)
{
basePath = basePath.Replace('\\', '/').TrimEnd('/');
fullPath = fullPath.Replace('\\', '/');
if (fullPath.StartsWith(basePath + "/", StringComparison.OrdinalIgnoreCase))
{
return fullPath[(basePath.Length + 1)..];
}
return null;
}
private static VendoringConfidence CalculateConfidence(List<string> markers, int embeddedCount)
{
var score = 0;
// Strong indicators
if (markers.Contains("known-vendored-package")) score += 3;
if (markers.Contains("record-vendor-entries")) score += 2;
// Vendor directory presence
var vendorDirs = markers.Count(m => m.StartsWith("vendor-directory:"));
score += vendorDirs;
// Embedded package count
if (embeddedCount > 5) score += 2;
else if (embeddedCount > 1) score += 1;
return score switch
{
>= 4 => VendoringConfidence.High,
>= 2 => VendoringConfidence.Medium,
>= 1 => VendoringConfidence.Low,
_ => VendoringConfidence.None
};
}
// Pattern to match __version__ = "x.y.z" or VERSION = "x.y.z"
[GeneratedRegex(
@"(?:__version__|VERSION)\s*=\s*['""](?<version>[^'""]+)['""]",
RegexOptions.Compiled)]
private static partial Regex VersionPattern();
}
/// <summary>
/// Result of vendoring analysis for a single package.
/// </summary>
internal sealed record VendoringAnalysis(
string PackageName,
bool IsVendored,
VendoringConfidence Confidence,
ImmutableArray<string> Markers,
ImmutableArray<EmbeddedPackage> EmbeddedPackages,
ImmutableArray<string> VendorPaths)
{
public static VendoringAnalysis NotVendored(string packageName) => new(
packageName,
false,
VendoringConfidence.None,
[],
[],
[]);
/// <summary>
/// Returns the count of embedded packages.
/// </summary>
public int EmbeddedCount => EmbeddedPackages.Length;
/// <summary>
/// Gets the embedded packages as a comma-separated list.
/// </summary>
public string GetEmbeddedPackageList()
=> string.Join(",", EmbeddedPackages.Select(p => p.NameWithVersion));
/// <summary>
/// Gets PURLs for all embedded packages.
/// </summary>
public IEnumerable<string> GetEmbeddedPurls()
=> EmbeddedPackages.Select(p => p.Purl);
}
/// <summary>
/// Represents a package embedded/vendored inside another package.
/// </summary>
internal sealed record EmbeddedPackage(
string Name,
string? Version,
string? License,
string Path,
string ParentPackage)
{
/// <summary>
/// Returns the name with version if available.
/// </summary>
public string NameWithVersion => Version is not null ? $"{Name}@{Version}" : Name;
/// <summary>
/// Returns the PURL for this embedded package.
/// </summary>
public string Purl => Version is not null
? $"pkg:pypi/{NormalizeName(Name)}@{Version}"
: $"pkg:pypi/{NormalizeName(Name)}";
/// <summary>
/// Returns a qualified name including the parent package.
/// </summary>
public string QualifiedName => $"{ParentPackage}._vendor.{Name}";
private static string NormalizeName(string name) =>
name.ToLowerInvariant().Replace('_', '-');
}
/// <summary>
/// Confidence level for vendoring detection.
/// </summary>
internal enum VendoringConfidence
{
None = 0,
Low = 1,
Medium = 2,
High = 3
}