tests fixes and sprints work

This commit is contained in:
master
2026-01-22 19:08:46 +02:00
parent c32fff8f86
commit 726d70dc7f
881 changed files with 134434 additions and 6228 deletions

View File

@@ -0,0 +1,652 @@
// -----------------------------------------------------------------------------
// DotNetLicenseDetector.cs
// Sprint: SPRINT_20260119_024_Scanner_license_detection_enhancements
// Task: TASK-024-010 - Add .NET/NuGet license detector
// Description: Enhanced .NET license detection returning LicenseDetectionResult
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml.Linq;
using StellaOps.Scanner.Analyzers.Lang.Core.Licensing;
using StellaOps.Scanner.Analyzers.Lang.DotNet.Internal.BuildMetadata;
namespace StellaOps.Scanner.Analyzers.Lang.DotNet.Internal.Licensing;
/// <summary>
/// Enhanced .NET/NuGet license detector that returns full LicenseDetectionResult.
/// Supports .csproj, .nuspec, AssemblyInfo, and LICENSE file extraction.
/// </summary>
internal sealed partial class DotNetLicenseDetector
{
private readonly ILicenseCategorizationService _categorizationService;
private readonly ILicenseTextExtractor _textExtractor;
private readonly ICopyrightExtractor _copyrightExtractor;
/// <summary>
/// Creates a new .NET license detector with the specified services.
/// </summary>
public DotNetLicenseDetector(
ILicenseCategorizationService categorizationService,
ILicenseTextExtractor textExtractor,
ICopyrightExtractor copyrightExtractor)
{
_categorizationService = categorizationService;
_textExtractor = textExtractor;
_copyrightExtractor = copyrightExtractor;
}
/// <summary>
/// Creates a new .NET license detector with default services.
/// </summary>
public DotNetLicenseDetector()
{
_categorizationService = new LicenseCategorizationService();
_textExtractor = new LicenseTextExtractor();
_copyrightExtractor = new CopyrightExtractor();
}
/// <summary>
/// Detects license information from .NET project metadata.
/// </summary>
/// <param name="projectMetadata">The project metadata.</param>
/// <param name="projectDirectory">Project directory for license file extraction.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The full license detection result.</returns>
public async Task<LicenseDetectionResult?> DetectFromProjectAsync(
DotNetProjectMetadata projectMetadata,
string? projectDirectory = null,
CancellationToken ct = default)
{
if (projectMetadata is null)
{
return null;
}
// Try to get license from project file metadata
var projectLicense = projectMetadata.Licenses.Length > 0
? projectMetadata.Licenses[0]
: null;
if (projectLicense is null)
{
// Try to detect from LICENSE file in project directory
if (!string.IsNullOrWhiteSpace(projectDirectory))
{
return await DetectFromDirectoryAsync(projectDirectory, ct);
}
return null;
}
// Extract license text if available
LicenseTextExtractionResult? licenseTextResult = null;
string? copyrightFromAssemblyInfo = null;
if (!string.IsNullOrWhiteSpace(projectDirectory))
{
// Try license file if specified
if (!string.IsNullOrWhiteSpace(projectLicense.File))
{
var licenseFilePath = Path.Combine(projectDirectory, projectLicense.File);
if (File.Exists(licenseFilePath))
{
licenseTextResult = await _textExtractor.ExtractAsync(licenseFilePath, ct);
}
}
else
{
// Try standard LICENSE files
var licenseFiles = await _textExtractor.ExtractFromDirectoryAsync(projectDirectory, ct);
licenseTextResult = licenseFiles.FirstOrDefault();
}
// Extract copyright from AssemblyInfo if exists
copyrightFromAssemblyInfo = await TryExtractAssemblyInfoCopyrightAsync(projectDirectory, ct);
}
// Determine SPDX ID
var spdxId = DetermineSpdxId(projectLicense);
// Get copyright notices
var copyrightNotices = new List<string>();
if (licenseTextResult?.CopyrightNotices.Length > 0)
{
copyrightNotices.AddRange(licenseTextResult.CopyrightNotices.Select(c => c.FullText));
}
if (!string.IsNullOrWhiteSpace(copyrightFromAssemblyInfo))
{
copyrightNotices.Add(copyrightFromAssemblyInfo);
}
var primaryCopyright = copyrightNotices.Count > 0
? copyrightNotices[0]
: null;
// Check for expression
var isExpression = IsExpression(spdxId);
var result = new LicenseDetectionResult
{
SpdxId = spdxId,
OriginalText = GetOriginalText(projectLicense),
LicenseUrl = projectLicense.Url,
Confidence = MapConfidence(projectLicense.Confidence),
Method = DetermineDetectionMethod(projectLicense),
SourceFile = projectMetadata.SourcePath ?? "*.csproj",
Category = LicenseCategory.Unknown,
Obligations = [],
LicenseText = licenseTextResult?.FullText,
LicenseTextHash = licenseTextResult?.TextHash,
CopyrightNotice = primaryCopyright,
IsExpression = isExpression,
ExpressionComponents = isExpression ? ParseExpressionComponents(spdxId) : []
};
return _categorizationService.Enrich(result);
}
/// <summary>
/// Detects license from a .nuspec file.
/// </summary>
public async Task<LicenseDetectionResult?> DetectFromNuspecAsync(
string nuspecPath,
CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(nuspecPath) || !File.Exists(nuspecPath))
{
return null;
}
try
{
var content = await File.ReadAllTextAsync(nuspecPath, ct);
return await DetectFromNuspecContentAsync(content, Path.GetDirectoryName(nuspecPath), ct);
}
catch
{
return null;
}
}
/// <summary>
/// Detects license from .nuspec content.
/// </summary>
public async Task<LicenseDetectionResult?> DetectFromNuspecContentAsync(
string nuspecContent,
string? packageDirectory = null,
CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(nuspecContent))
{
return null;
}
try
{
var doc = XDocument.Parse(nuspecContent);
var ns = doc.Root?.GetDefaultNamespace() ?? XNamespace.None;
var metadata = doc.Root?.Element(ns + "metadata");
if (metadata is null)
{
return null;
}
// Try license element (NuGet 4.9+)
var licenseElement = metadata.Element(ns + "license");
if (licenseElement is not null)
{
var licenseType = licenseElement.Attribute("type")?.Value;
var licenseValue = licenseElement.Value.Trim();
if (string.Equals(licenseType, "expression", StringComparison.OrdinalIgnoreCase))
{
return await CreateNuspecLicenseResultAsync(
licenseValue,
null,
LicenseDetectionMethod.PackageMetadata,
LicenseDetectionConfidence.High,
packageDirectory,
ct);
}
else if (string.Equals(licenseType, "file", StringComparison.OrdinalIgnoreCase))
{
// License is in a file within the package
if (!string.IsNullOrWhiteSpace(packageDirectory))
{
var licensePath = Path.Combine(packageDirectory, licenseValue);
if (File.Exists(licensePath))
{
return await DetectFromLicenseFileAsync(licensePath, ct);
}
}
}
}
// Try licenseUrl (deprecated but common)
var licenseUrl = metadata.Element(ns + "licenseUrl")?.Value;
if (!string.IsNullOrWhiteSpace(licenseUrl))
{
var spdxId = NormalizeFromUrl(licenseUrl);
return await CreateNuspecLicenseResultAsync(
spdxId,
licenseUrl,
LicenseDetectionMethod.UrlMatching,
spdxId.StartsWith("LicenseRef-", StringComparison.Ordinal)
? LicenseDetectionConfidence.Low
: LicenseDetectionConfidence.Medium,
packageDirectory,
ct);
}
return null;
}
catch
{
return null;
}
}
/// <summary>
/// Detects license from a directory (using LICENSE file).
/// </summary>
public async Task<LicenseDetectionResult?> DetectFromDirectoryAsync(
string directory,
CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(directory) || !Directory.Exists(directory))
{
return null;
}
var licenseFiles = await _textExtractor.ExtractFromDirectoryAsync(directory, ct);
var licenseTextResult = licenseFiles.FirstOrDefault();
if (licenseTextResult is null || string.IsNullOrWhiteSpace(licenseTextResult.DetectedLicenseId))
{
return null;
}
var copyrightNotices = licenseTextResult.CopyrightNotices;
var primaryCopyright = copyrightNotices.Length > 0
? copyrightNotices[0].FullText
: null;
var result = new LicenseDetectionResult
{
SpdxId = licenseTextResult.DetectedLicenseId,
Confidence = licenseTextResult.Confidence,
Method = LicenseDetectionMethod.LicenseFile,
SourceFile = licenseTextResult.SourceFile ?? "LICENSE",
Category = LicenseCategory.Unknown,
Obligations = [],
LicenseText = licenseTextResult.FullText,
LicenseTextHash = licenseTextResult.TextHash,
CopyrightNotice = primaryCopyright
};
return _categorizationService.Enrich(result);
}
/// <summary>
/// Detects license from LICENSE file content.
/// </summary>
public async Task<LicenseDetectionResult?> DetectFromLicenseFileAsync(
string licenseFilePath,
CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(licenseFilePath) || !File.Exists(licenseFilePath))
{
return null;
}
var licenseTextResult = await _textExtractor.ExtractAsync(licenseFilePath, ct);
if (licenseTextResult is null)
{
return null;
}
var spdxId = licenseTextResult.DetectedLicenseId ?? "LicenseRef-Unknown";
var confidence = licenseTextResult.DetectedLicenseId is not null
? licenseTextResult.Confidence
: LicenseDetectionConfidence.Low;
var result = new LicenseDetectionResult
{
SpdxId = spdxId,
Confidence = confidence,
Method = LicenseDetectionMethod.LicenseFile,
SourceFile = Path.GetFileName(licenseFilePath),
Category = LicenseCategory.Unknown,
Obligations = [],
LicenseText = licenseTextResult.FullText,
LicenseTextHash = licenseTextResult.TextHash,
CopyrightNotice = licenseTextResult.CopyrightNotices.Length > 0
? licenseTextResult.CopyrightNotices[0].FullText
: null
};
return _categorizationService.Enrich(result);
}
/// <summary>
/// Detects license synchronously from project license info.
/// </summary>
public LicenseDetectionResult? Detect(DotNetProjectLicenseInfo licenseInfo)
{
if (licenseInfo is null)
{
return null;
}
var spdxId = DetermineSpdxId(licenseInfo);
var isExpression = IsExpression(spdxId);
var result = new LicenseDetectionResult
{
SpdxId = spdxId,
OriginalText = GetOriginalText(licenseInfo),
LicenseUrl = licenseInfo.Url,
Confidence = MapConfidence(licenseInfo.Confidence),
Method = DetermineDetectionMethod(licenseInfo),
SourceFile = "*.csproj",
Category = LicenseCategory.Unknown,
Obligations = [],
IsExpression = isExpression,
ExpressionComponents = isExpression ? ParseExpressionComponents(spdxId) : []
};
return _categorizationService.Enrich(result);
}
/// <summary>
/// Detects licenses from multiple project license infos.
/// </summary>
public IReadOnlyList<LicenseDetectionResult> DetectMultiple(
IEnumerable<DotNetProjectLicenseInfo> licenseInfos)
{
var results = new List<LicenseDetectionResult>();
foreach (var info in licenseInfos)
{
var result = Detect(info);
if (result is not null)
{
results.Add(result);
}
}
return results;
}
private async Task<LicenseDetectionResult?> CreateNuspecLicenseResultAsync(
string spdxId,
string? url,
LicenseDetectionMethod method,
LicenseDetectionConfidence confidence,
string? packageDirectory,
CancellationToken ct)
{
LicenseTextExtractionResult? licenseTextResult = null;
if (!string.IsNullOrWhiteSpace(packageDirectory))
{
var licenseFiles = await _textExtractor.ExtractFromDirectoryAsync(packageDirectory, ct);
licenseTextResult = licenseFiles.FirstOrDefault();
}
var isExpression = IsExpression(spdxId);
var result = new LicenseDetectionResult
{
SpdxId = spdxId,
LicenseUrl = url,
Confidence = confidence,
Method = method,
SourceFile = "*.nuspec",
Category = LicenseCategory.Unknown,
Obligations = [],
LicenseText = licenseTextResult?.FullText,
LicenseTextHash = licenseTextResult?.TextHash,
CopyrightNotice = licenseTextResult?.CopyrightNotices.Length > 0
? licenseTextResult.CopyrightNotices[0].FullText
: null,
IsExpression = isExpression,
ExpressionComponents = isExpression ? ParseExpressionComponents(spdxId) : []
};
return _categorizationService.Enrich(result);
}
private static async Task<string?> TryExtractAssemblyInfoCopyrightAsync(
string projectDirectory,
CancellationToken ct)
{
// Look for AssemblyInfo.cs in Properties folder or root
var paths = new[]
{
Path.Combine(projectDirectory, "Properties", "AssemblyInfo.cs"),
Path.Combine(projectDirectory, "AssemblyInfo.cs")
};
foreach (var path in paths)
{
if (!File.Exists(path))
{
continue;
}
try
{
var content = await File.ReadAllTextAsync(path, ct);
var match = AssemblyCopyrightRegex().Match(content);
if (match.Success)
{
return match.Groups["copyright"].Value;
}
}
catch
{
// Ignore file read errors
}
}
return null;
}
private static string DetermineSpdxId(DotNetProjectLicenseInfo licenseInfo)
{
// Prefer normalized SPDX ID if available
if (!string.IsNullOrWhiteSpace(licenseInfo.NormalizedSpdxId))
{
return licenseInfo.NormalizedSpdxId;
}
// Try expression (highest confidence)
if (!string.IsNullOrWhiteSpace(licenseInfo.Expression))
{
return NormalizeSpdxExpression(licenseInfo.Expression);
}
// Try URL matching
if (!string.IsNullOrWhiteSpace(licenseInfo.Url))
{
return NormalizeFromUrl(licenseInfo.Url);
}
// Try file (need to inspect content, return unknown for now)
if (!string.IsNullOrWhiteSpace(licenseInfo.File))
{
return "LicenseRef-File";
}
return "LicenseRef-Unknown";
}
private static string? GetOriginalText(DotNetProjectLicenseInfo licenseInfo)
{
if (!string.IsNullOrWhiteSpace(licenseInfo.Expression))
{
return licenseInfo.Expression;
}
if (!string.IsNullOrWhiteSpace(licenseInfo.Url))
{
return licenseInfo.Url;
}
if (!string.IsNullOrWhiteSpace(licenseInfo.File))
{
return $"File: {licenseInfo.File}";
}
return null;
}
private static LicenseDetectionConfidence MapConfidence(DotNetProjectLicenseConfidence confidence)
{
return confidence switch
{
DotNetProjectLicenseConfidence.High => LicenseDetectionConfidence.High,
DotNetProjectLicenseConfidence.Medium => LicenseDetectionConfidence.Medium,
DotNetProjectLicenseConfidence.Low => LicenseDetectionConfidence.Low,
_ => LicenseDetectionConfidence.None
};
}
private static LicenseDetectionMethod DetermineDetectionMethod(DotNetProjectLicenseInfo licenseInfo)
{
if (!string.IsNullOrWhiteSpace(licenseInfo.Expression))
{
return LicenseDetectionMethod.PackageMetadata;
}
if (!string.IsNullOrWhiteSpace(licenseInfo.File))
{
return LicenseDetectionMethod.LicenseFile;
}
if (!string.IsNullOrWhiteSpace(licenseInfo.Url))
{
return LicenseDetectionMethod.UrlMatching;
}
return LicenseDetectionMethod.KeywordFallback;
}
private static string NormalizeSpdxExpression(string expression)
{
// Already an SPDX expression, just normalize spacing
return expression.Trim();
}
private static string NormalizeFromUrl(string url)
{
if (string.IsNullOrWhiteSpace(url))
{
return "LicenseRef-Unknown";
}
var lower = url.ToLowerInvariant();
// Common license URLs
if (lower.Contains("opensource.org/licenses/mit") || lower.Contains("mit-license"))
{
return "MIT";
}
if (lower.Contains("apache.org/licenses/license-2.0") || lower.Contains("apache-2.0"))
{
return "Apache-2.0";
}
if (lower.Contains("opensource.org/licenses/bsd-3-clause") || lower.Contains("bsd-3-clause"))
{
return "BSD-3-Clause";
}
if (lower.Contains("opensource.org/licenses/bsd-2-clause") || lower.Contains("bsd-2-clause"))
{
return "BSD-2-Clause";
}
if (lower.Contains("opensource.org/licenses/isc"))
{
return "ISC";
}
if (lower.Contains("gnu.org/licenses/gpl-3.0") || lower.Contains("gpl-3.0"))
{
return "GPL-3.0-only";
}
if (lower.Contains("gnu.org/licenses/gpl-2.0") || lower.Contains("gpl-2.0"))
{
return "GPL-2.0-only";
}
if (lower.Contains("gnu.org/licenses/lgpl-3.0") || lower.Contains("lgpl-3.0"))
{
return "LGPL-3.0-only";
}
if (lower.Contains("gnu.org/licenses/lgpl-2.1") || lower.Contains("lgpl-2.1"))
{
return "LGPL-2.1-only";
}
if (lower.Contains("mozilla.org/mpl/2.0") || lower.Contains("mpl-2.0"))
{
return "MPL-2.0";
}
if (lower.Contains("creativecommons.org/publicdomain/zero/1.0") || lower.Contains("cc0"))
{
return "CC0-1.0";
}
if (lower.Contains("unlicense.org") || lower.Contains("unlicense"))
{
return "Unlicense";
}
// NuGet.org license URLs
if (lower.Contains("licenses.nuget.org/"))
{
// Extract SPDX ID from URL like https://licenses.nuget.org/MIT
var parts = url.Split('/');
if (parts.Length > 0)
{
var lastPart = parts[^1].Trim();
if (!string.IsNullOrWhiteSpace(lastPart))
{
return lastPart;
}
}
}
return "LicenseRef-Url";
}
private static bool IsExpression(string spdxId)
{
return spdxId.Contains(" OR ", StringComparison.OrdinalIgnoreCase) ||
spdxId.Contains(" AND ", StringComparison.OrdinalIgnoreCase) ||
spdxId.Contains(" WITH ", StringComparison.OrdinalIgnoreCase) ||
(spdxId.Contains('(') && spdxId.Contains(')'));
}
private static ImmutableArray<string> ParseExpressionComponents(string expression)
{
var components = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var tokens = expression
.Replace("(", " ")
.Replace(")", " ")
.Split([' '], StringSplitOptions.RemoveEmptyEntries);
foreach (var token in tokens)
{
var upper = token.ToUpperInvariant();
if (upper is not "OR" and not "AND" and not "WITH")
{
components.Add(token);
}
}
return [.. components.OrderBy(c => c, StringComparer.Ordinal)];
}
[GeneratedRegex(@"\[assembly:\s*AssemblyCopyright\s*\(\s*""(?<copyright>[^""]+)""\s*\)\s*\]", RegexOptions.IgnoreCase)]
private static partial Regex AssemblyCopyrightRegex();
}