tests fixes and sprints work

This commit is contained in:
master
2026-01-22 19:08:46 +02:00
parent c32fff8f86
commit 726d70dc7f
881 changed files with 134434 additions and 6228 deletions

View File

@@ -0,0 +1,265 @@
// -----------------------------------------------------------------------------
// EnhancedRustLicenseDetector.cs
// Sprint: SPRINT_20260119_024_Scanner_license_detection_enhancements
// Task: TASK-024-008 - Upgrade Rust license detector
// Description: Enhanced Rust license detection returning LicenseDetectionResult
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
using StellaOps.Scanner.Analyzers.Lang.Core.Licensing;
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
/// <summary>
/// Enhanced Rust license detector that returns full LicenseDetectionResult.
/// </summary>
internal sealed class EnhancedRustLicenseDetector
{
private readonly ILicenseCategorizationService _categorizationService;
private readonly ILicenseTextExtractor _textExtractor;
private readonly ICopyrightExtractor _copyrightExtractor;
/// <summary>
/// Creates a new enhanced Rust license detector with the specified services.
/// </summary>
public EnhancedRustLicenseDetector(
ILicenseCategorizationService categorizationService,
ILicenseTextExtractor textExtractor,
ICopyrightExtractor copyrightExtractor)
{
_categorizationService = categorizationService;
_textExtractor = textExtractor;
_copyrightExtractor = copyrightExtractor;
}
/// <summary>
/// Creates a new enhanced Rust license detector with default services.
/// </summary>
public EnhancedRustLicenseDetector()
{
_categorizationService = new LicenseCategorizationService();
_textExtractor = new LicenseTextExtractor();
_copyrightExtractor = new CopyrightExtractor();
}
/// <summary>
/// Detects license from Rust license info.
/// </summary>
/// <param name="licenseInfo">The license info from Cargo.toml parsing.</param>
/// <param name="rootPath">Root path for resolving license files.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The full license detection result.</returns>
public async Task<LicenseDetectionResult?> DetectAsync(
RustLicenseInfo licenseInfo,
string? rootPath = null,
CancellationToken ct = default)
{
if (licenseInfo is null)
{
return null;
}
// Get SPDX expression from Cargo.toml
var spdxExpression = licenseInfo.Expressions.Length > 0
? string.Join(" OR ", licenseInfo.Expressions)
: null;
// Try to read license file content
LicenseTextExtractionResult? licenseTextResult = null;
if (licenseInfo.Files.Length > 0 && !string.IsNullOrWhiteSpace(rootPath))
{
var licenseFile = licenseInfo.Files[0];
var absolutePath = Path.GetFullPath(Path.Combine(rootPath, licenseFile.RelativePath.Replace('/', Path.DirectorySeparatorChar)));
if (File.Exists(absolutePath))
{
licenseTextResult = await _textExtractor.ExtractAsync(absolutePath, ct);
}
}
// If no expression from Cargo.toml, try to detect from license file
if (string.IsNullOrWhiteSpace(spdxExpression) && licenseTextResult?.DetectedLicenseId is not null)
{
spdxExpression = licenseTextResult.DetectedLicenseId;
}
if (string.IsNullOrWhiteSpace(spdxExpression))
{
// No license info found
return null;
}
// Check if it's an expression
var isExpression = spdxExpression.Contains(" OR ", StringComparison.OrdinalIgnoreCase) ||
spdxExpression.Contains(" AND ", StringComparison.OrdinalIgnoreCase) ||
spdxExpression.Contains("/", StringComparison.Ordinal); // Rust uses / for OR
// Normalize Rust-style expressions to SPDX (/ -> OR)
var normalizedExpression = spdxExpression.Replace("/", " OR ");
// Get copyright notices
var copyrightNotices = licenseTextResult?.CopyrightNotices ?? [];
var primaryCopyright = copyrightNotices.Length > 0
? copyrightNotices[0].FullText
: null;
// Determine confidence
var confidence = licenseInfo.Expressions.Length > 0
? LicenseDetectionConfidence.High
: licenseTextResult?.Confidence ?? LicenseDetectionConfidence.None;
// Determine source file
var sourceFile = licenseInfo.Files.Length > 0
? licenseInfo.Files[0].RelativePath
: licenseInfo.CargoTomlRelativePath;
var result = new LicenseDetectionResult
{
SpdxId = normalizedExpression,
OriginalText = spdxExpression != normalizedExpression ? spdxExpression : null,
Confidence = confidence,
Method = licenseInfo.Expressions.Length > 0
? LicenseDetectionMethod.PackageMetadata
: LicenseDetectionMethod.LicenseFile,
SourceFile = sourceFile,
Category = LicenseCategory.Unknown,
Obligations = [],
LicenseText = licenseTextResult?.FullText,
LicenseTextHash = licenseTextResult?.TextHash ?? GetFileHash(licenseInfo),
CopyrightNotice = primaryCopyright,
IsExpression = isExpression,
ExpressionComponents = isExpression ? ParseExpressionComponents(normalizedExpression) : []
};
return _categorizationService.Enrich(result);
}
/// <summary>
/// Detects license for a crate from the license index.
/// </summary>
public async Task<LicenseDetectionResult?> DetectFromIndexAsync(
RustLicenseIndex index,
string crateName,
string? version,
string? rootPath = null,
CancellationToken ct = default)
{
var info = index.Find(crateName, version);
if (info is null)
{
return null;
}
return await DetectAsync(info, rootPath, ct);
}
/// <summary>
/// Detects license synchronously without file reading.
/// </summary>
public LicenseDetectionResult? Detect(RustLicenseInfo licenseInfo)
{
if (licenseInfo is null)
{
return null;
}
var spdxExpression = licenseInfo.Expressions.Length > 0
? string.Join(" OR ", licenseInfo.Expressions)
: null;
if (string.IsNullOrWhiteSpace(spdxExpression))
{
return null;
}
var isExpression = spdxExpression.Contains(" OR ", StringComparison.OrdinalIgnoreCase) ||
spdxExpression.Contains(" AND ", StringComparison.OrdinalIgnoreCase) ||
spdxExpression.Contains("/", StringComparison.Ordinal);
var normalizedExpression = spdxExpression.Replace("/", " OR ");
var sourceFile = licenseInfo.Files.Length > 0
? licenseInfo.Files[0].RelativePath
: licenseInfo.CargoTomlRelativePath;
var result = new LicenseDetectionResult
{
SpdxId = normalizedExpression,
OriginalText = spdxExpression != normalizedExpression ? spdxExpression : null,
Confidence = LicenseDetectionConfidence.High,
Method = LicenseDetectionMethod.PackageMetadata,
SourceFile = sourceFile,
Category = LicenseCategory.Unknown,
Obligations = [],
LicenseTextHash = GetFileHash(licenseInfo),
IsExpression = isExpression,
ExpressionComponents = isExpression ? ParseExpressionComponents(normalizedExpression) : []
};
return _categorizationService.Enrich(result);
}
/// <summary>
/// Detects license from license file content.
/// </summary>
public LicenseDetectionResult? DetectFromContent(string content, string? sourceFile = null)
{
if (string.IsNullOrWhiteSpace(content))
{
return null;
}
var textResult = _textExtractor.Extract(content, sourceFile);
var spdxId = textResult.DetectedLicenseId ?? "LicenseRef-Unknown";
var copyrightNotices = textResult.CopyrightNotices;
var primaryCopyright = copyrightNotices.Length > 0
? copyrightNotices[0].FullText
: null;
var result = new LicenseDetectionResult
{
SpdxId = spdxId,
Confidence = textResult.Confidence,
Method = LicenseDetectionMethod.LicenseFile,
SourceFile = sourceFile ?? "LICENSE",
Category = LicenseCategory.Unknown,
Obligations = [],
LicenseText = content,
LicenseTextHash = textResult.TextHash,
CopyrightNotice = primaryCopyright
};
return _categorizationService.Enrich(result);
}
private static string? GetFileHash(RustLicenseInfo licenseInfo)
{
if (licenseInfo.Files.Length > 0 && licenseInfo.Files[0].Sha256 is not null)
{
return $"sha256:{licenseInfo.Files[0].Sha256}";
}
return null;
}
private static ImmutableArray<string> ParseExpressionComponents(string expression)
{
var components = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var tokens = expression
.Replace("(", " ")
.Replace(")", " ")
.Split([' ', '/'], StringSplitOptions.RemoveEmptyEntries);
foreach (var token in tokens)
{
var upper = token.ToUpperInvariant();
if (upper is not "OR" and not "AND" and not "WITH")
{
components.Add(token);
}
}
return [.. components.OrderBy(c => c, StringComparer.Ordinal)];
}
}