Refactor code structure for improved readability and maintainability
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-06 21:48:12 +02:00
parent f6c22854a4
commit dd0067ea0b
105 changed files with 12662 additions and 427 deletions

View File

@@ -0,0 +1,352 @@
using System.Collections.Frozen;
using System.Text.Json;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Java.Internal.BuildMetadata;
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.License;
/// <summary>
/// Normalizes license names and URLs to SPDX identifiers.
/// </summary>
internal sealed partial class SpdxLicenseNormalizer
{
private static readonly Lazy<SpdxLicenseNormalizer> LazyInstance = new(() => new SpdxLicenseNormalizer());
private readonly FrozenDictionary<string, SpdxLicenseMapping> _nameIndex;
private readonly FrozenDictionary<string, SpdxLicenseMapping> _urlIndex;
/// <summary>
/// Gets the singleton instance.
/// </summary>
public static SpdxLicenseNormalizer Instance => LazyInstance.Value;
private SpdxLicenseNormalizer()
{
var mappings = LoadMappings();
var nameDict = new Dictionary<string, SpdxLicenseMapping>(StringComparer.OrdinalIgnoreCase);
var urlDict = new Dictionary<string, SpdxLicenseMapping>(StringComparer.OrdinalIgnoreCase);
foreach (var mapping in mappings)
{
// Index by normalized name
foreach (var name in mapping.Names)
{
var normalizedName = NormalizeName(name);
nameDict.TryAdd(normalizedName, mapping);
}
// Index by URL
foreach (var url in mapping.Urls)
{
var normalizedUrl = NormalizeUrl(url);
urlDict.TryAdd(normalizedUrl, mapping);
}
}
_nameIndex = nameDict.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
_urlIndex = urlDict.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
}
/// <summary>
/// Normalizes a license name and/or URL to an SPDX identifier.
/// </summary>
public JavaLicenseInfo Normalize(string? name, string? url)
{
var result = new JavaLicenseInfo
{
Name = name,
Url = url
};
// Try URL first (higher confidence)
if (!string.IsNullOrWhiteSpace(url))
{
var normalizedUrl = NormalizeUrl(url);
if (_urlIndex.TryGetValue(normalizedUrl, out var urlMapping))
{
return result with
{
SpdxId = urlMapping.SpdxId,
SpdxConfidence = SpdxConfidence.High
};
}
}
// Then try name
if (!string.IsNullOrWhiteSpace(name))
{
var normalizedName = NormalizeName(name);
// Exact match
if (_nameIndex.TryGetValue(normalizedName, out var nameMapping))
{
return result with
{
SpdxId = nameMapping.SpdxId,
SpdxConfidence = SpdxConfidence.High
};
}
// Fuzzy match
var fuzzyMatch = TryFuzzyMatch(normalizedName);
if (fuzzyMatch is not null)
{
return result with
{
SpdxId = fuzzyMatch.SpdxId,
SpdxConfidence = SpdxConfidence.Medium
};
}
}
return result;
}
private static string NormalizeName(string name)
{
// Remove common noise words and normalize whitespace
var normalized = name.ToLowerInvariant()
.Replace("the", "", StringComparison.OrdinalIgnoreCase)
.Replace("license", "", StringComparison.OrdinalIgnoreCase)
.Replace("licence", "", StringComparison.OrdinalIgnoreCase)
.Replace("version", "", StringComparison.OrdinalIgnoreCase)
.Replace(",", "")
.Replace("(", "")
.Replace(")", "");
return WhitespacePattern().Replace(normalized, " ").Trim();
}
private static string NormalizeUrl(string url)
{
// Normalize URL for comparison
var normalized = url.ToLowerInvariant()
.Replace("https://", "")
.Replace("http://", "")
.Replace("www.", "")
.TrimEnd('/');
return normalized;
}
private SpdxLicenseMapping? TryFuzzyMatch(string normalizedName)
{
// Check for common patterns
if (normalizedName.Contains("apache") && normalizedName.Contains("2"))
{
return _nameIndex.GetValueOrDefault("apache 2.0");
}
if (normalizedName.Contains("mit"))
{
return _nameIndex.GetValueOrDefault("mit");
}
if (normalizedName.Contains("bsd") && normalizedName.Contains("3"))
{
return _nameIndex.GetValueOrDefault("bsd 3 clause");
}
if (normalizedName.Contains("bsd") && normalizedName.Contains("2"))
{
return _nameIndex.GetValueOrDefault("bsd 2 clause");
}
if (normalizedName.Contains("gpl") && normalizedName.Contains("3"))
{
return _nameIndex.GetValueOrDefault("gpl 3.0");
}
if (normalizedName.Contains("gpl") && normalizedName.Contains("2"))
{
return _nameIndex.GetValueOrDefault("gpl 2.0");
}
if (normalizedName.Contains("lgpl") && normalizedName.Contains("2.1"))
{
return _nameIndex.GetValueOrDefault("lgpl 2.1");
}
if (normalizedName.Contains("lgpl") && normalizedName.Contains("3"))
{
return _nameIndex.GetValueOrDefault("lgpl 3.0");
}
if (normalizedName.Contains("mpl") && normalizedName.Contains("2"))
{
return _nameIndex.GetValueOrDefault("mpl 2.0");
}
if (normalizedName.Contains("cddl"))
{
return _nameIndex.GetValueOrDefault("cddl 1.0");
}
if (normalizedName.Contains("epl") && normalizedName.Contains("2"))
{
return _nameIndex.GetValueOrDefault("epl 2.0");
}
if (normalizedName.Contains("epl") && normalizedName.Contains("1"))
{
return _nameIndex.GetValueOrDefault("epl 1.0");
}
return null;
}
private static IEnumerable<SpdxLicenseMapping> LoadMappings()
{
// High-confidence SPDX mappings for common licenses
// This list focuses on licenses commonly found in Java/Maven projects
return
[
// Apache
new SpdxLicenseMapping("Apache-2.0",
["Apache License 2.0", "Apache License, Version 2.0", "Apache 2.0", "Apache-2.0", "ASL 2.0", "AL 2.0"],
["apache.org/licenses/LICENSE-2.0", "opensource.org/licenses/Apache-2.0"]),
new SpdxLicenseMapping("Apache-1.1",
["Apache License 1.1", "Apache Software License 1.1"],
["apache.org/licenses/LICENSE-1.1"]),
// MIT
new SpdxLicenseMapping("MIT",
["MIT License", "MIT", "The MIT License", "Expat License"],
["opensource.org/licenses/MIT", "mit-license.org"]),
// BSD
new SpdxLicenseMapping("BSD-2-Clause",
["BSD 2-Clause License", "BSD-2-Clause", "Simplified BSD License", "FreeBSD License"],
["opensource.org/licenses/BSD-2-Clause"]),
new SpdxLicenseMapping("BSD-3-Clause",
["BSD 3-Clause License", "BSD-3-Clause", "New BSD License", "Modified BSD License"],
["opensource.org/licenses/BSD-3-Clause"]),
// GPL
new SpdxLicenseMapping("GPL-2.0-only",
["GNU General Public License v2.0", "GPL 2.0", "GPL-2.0", "GPLv2"],
["gnu.org/licenses/old-licenses/gpl-2.0", "opensource.org/licenses/GPL-2.0"]),
new SpdxLicenseMapping("GPL-2.0-or-later",
["GNU General Public License v2.0 or later", "GPL 2.0+", "GPL-2.0+", "GPLv2+"],
[]),
new SpdxLicenseMapping("GPL-3.0-only",
["GNU General Public License v3.0", "GPL 3.0", "GPL-3.0", "GPLv3"],
["gnu.org/licenses/gpl-3.0", "opensource.org/licenses/GPL-3.0"]),
new SpdxLicenseMapping("GPL-3.0-or-later",
["GNU General Public License v3.0 or later", "GPL 3.0+", "GPL-3.0+", "GPLv3+"],
[]),
// LGPL
new SpdxLicenseMapping("LGPL-2.1-only",
["GNU Lesser General Public License v2.1", "LGPL 2.1", "LGPL-2.1", "LGPLv2.1"],
["gnu.org/licenses/old-licenses/lgpl-2.1", "opensource.org/licenses/LGPL-2.1"]),
new SpdxLicenseMapping("LGPL-3.0-only",
["GNU Lesser General Public License v3.0", "LGPL 3.0", "LGPL-3.0", "LGPLv3"],
["gnu.org/licenses/lgpl-3.0", "opensource.org/licenses/LGPL-3.0"]),
// MPL
new SpdxLicenseMapping("MPL-2.0",
["Mozilla Public License 2.0", "MPL 2.0", "MPL-2.0"],
["mozilla.org/MPL/2.0", "opensource.org/licenses/MPL-2.0"]),
new SpdxLicenseMapping("MPL-1.1",
["Mozilla Public License 1.1", "MPL 1.1", "MPL-1.1"],
["mozilla.org/MPL/1.1"]),
// Eclipse
new SpdxLicenseMapping("EPL-1.0",
["Eclipse Public License 1.0", "EPL 1.0", "EPL-1.0"],
["eclipse.org/legal/epl-v10", "opensource.org/licenses/EPL-1.0"]),
new SpdxLicenseMapping("EPL-2.0",
["Eclipse Public License 2.0", "EPL 2.0", "EPL-2.0"],
["eclipse.org/legal/epl-2.0", "opensource.org/licenses/EPL-2.0"]),
// CDDL
new SpdxLicenseMapping("CDDL-1.0",
["Common Development and Distribution License 1.0", "CDDL 1.0", "CDDL-1.0"],
["opensource.org/licenses/CDDL-1.0"]),
new SpdxLicenseMapping("CDDL-1.1",
["Common Development and Distribution License 1.1", "CDDL 1.1", "CDDL-1.1"],
["glassfish.dev.java.net/public/CDDL+GPL_1_1"]),
// Creative Commons
new SpdxLicenseMapping("CC0-1.0",
["CC0 1.0 Universal", "CC0", "Public Domain"],
["creativecommons.org/publicdomain/zero/1.0"]),
new SpdxLicenseMapping("CC-BY-4.0",
["Creative Commons Attribution 4.0", "CC BY 4.0"],
["creativecommons.org/licenses/by/4.0"]),
// Unlicense
new SpdxLicenseMapping("Unlicense",
["The Unlicense", "Unlicense"],
["unlicense.org"]),
// ISC
new SpdxLicenseMapping("ISC",
["ISC License", "ISC"],
["opensource.org/licenses/ISC"]),
// Zlib
new SpdxLicenseMapping("Zlib",
["zlib License", "zlib/libpng License"],
["opensource.org/licenses/Zlib"]),
// WTFPL
new SpdxLicenseMapping("WTFPL",
["Do What The F*ck You Want To Public License", "WTFPL"],
["wtfpl.net"]),
// BSL (Business Source License)
new SpdxLicenseMapping("BSL-1.0",
["Boost Software License 1.0", "BSL-1.0", "Boost License"],
["boost.org/LICENSE_1_0.txt", "opensource.org/licenses/BSL-1.0"]),
// JSON License
new SpdxLicenseMapping("JSON",
["The JSON License", "JSON License"],
["json.org/license"]),
// AGPL
new SpdxLicenseMapping("AGPL-3.0-only",
["GNU Affero General Public License v3.0", "AGPL 3.0", "AGPL-3.0", "AGPLv3"],
["gnu.org/licenses/agpl-3.0", "opensource.org/licenses/AGPL-3.0"]),
// PostgreSQL
new SpdxLicenseMapping("PostgreSQL",
["PostgreSQL License", "The PostgreSQL License"],
["opensource.org/licenses/PostgreSQL"]),
// Unicode
new SpdxLicenseMapping("Unicode-DFS-2016",
["Unicode License Agreement", "Unicode DFS 2016"],
["unicode.org/copyright"]),
// W3C
new SpdxLicenseMapping("W3C",
["W3C Software Notice and License", "W3C License"],
["w3.org/Consortium/Legal/2015/copyright-software-and-document"])
];
}
[GeneratedRegex(@"\s+")]
private static partial Regex WhitespacePattern();
}
/// <summary>
/// Represents a mapping from license names/URLs to an SPDX identifier.
/// </summary>
internal sealed record SpdxLicenseMapping(
string SpdxId,
IReadOnlyList<string> Names,
IReadOnlyList<string> Urls);