Refactor code structure for improved readability and maintainability
This commit is contained in:
@@ -0,0 +1,352 @@
|
||||
using System.Collections.Frozen;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Java.Internal.BuildMetadata;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Java.Internal.License;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes license names and URLs to SPDX identifiers.
|
||||
/// </summary>
|
||||
internal sealed partial class SpdxLicenseNormalizer
|
||||
{
|
||||
private static readonly Lazy<SpdxLicenseNormalizer> LazyInstance = new(() => new SpdxLicenseNormalizer());
|
||||
|
||||
private readonly FrozenDictionary<string, SpdxLicenseMapping> _nameIndex;
|
||||
private readonly FrozenDictionary<string, SpdxLicenseMapping> _urlIndex;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the singleton instance.
|
||||
/// </summary>
|
||||
public static SpdxLicenseNormalizer Instance => LazyInstance.Value;
|
||||
|
||||
private SpdxLicenseNormalizer()
|
||||
{
|
||||
var mappings = LoadMappings();
|
||||
|
||||
var nameDict = new Dictionary<string, SpdxLicenseMapping>(StringComparer.OrdinalIgnoreCase);
|
||||
var urlDict = new Dictionary<string, SpdxLicenseMapping>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var mapping in mappings)
|
||||
{
|
||||
// Index by normalized name
|
||||
foreach (var name in mapping.Names)
|
||||
{
|
||||
var normalizedName = NormalizeName(name);
|
||||
nameDict.TryAdd(normalizedName, mapping);
|
||||
}
|
||||
|
||||
// Index by URL
|
||||
foreach (var url in mapping.Urls)
|
||||
{
|
||||
var normalizedUrl = NormalizeUrl(url);
|
||||
urlDict.TryAdd(normalizedUrl, mapping);
|
||||
}
|
||||
}
|
||||
|
||||
_nameIndex = nameDict.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
|
||||
_urlIndex = urlDict.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a license name and/or URL to an SPDX identifier.
|
||||
/// </summary>
|
||||
public JavaLicenseInfo Normalize(string? name, string? url)
|
||||
{
|
||||
var result = new JavaLicenseInfo
|
||||
{
|
||||
Name = name,
|
||||
Url = url
|
||||
};
|
||||
|
||||
// Try URL first (higher confidence)
|
||||
if (!string.IsNullOrWhiteSpace(url))
|
||||
{
|
||||
var normalizedUrl = NormalizeUrl(url);
|
||||
if (_urlIndex.TryGetValue(normalizedUrl, out var urlMapping))
|
||||
{
|
||||
return result with
|
||||
{
|
||||
SpdxId = urlMapping.SpdxId,
|
||||
SpdxConfidence = SpdxConfidence.High
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Then try name
|
||||
if (!string.IsNullOrWhiteSpace(name))
|
||||
{
|
||||
var normalizedName = NormalizeName(name);
|
||||
|
||||
// Exact match
|
||||
if (_nameIndex.TryGetValue(normalizedName, out var nameMapping))
|
||||
{
|
||||
return result with
|
||||
{
|
||||
SpdxId = nameMapping.SpdxId,
|
||||
SpdxConfidence = SpdxConfidence.High
|
||||
};
|
||||
}
|
||||
|
||||
// Fuzzy match
|
||||
var fuzzyMatch = TryFuzzyMatch(normalizedName);
|
||||
if (fuzzyMatch is not null)
|
||||
{
|
||||
return result with
|
||||
{
|
||||
SpdxId = fuzzyMatch.SpdxId,
|
||||
SpdxConfidence = SpdxConfidence.Medium
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static string NormalizeName(string name)
|
||||
{
|
||||
// Remove common noise words and normalize whitespace
|
||||
var normalized = name.ToLowerInvariant()
|
||||
.Replace("the", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace("license", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace("licence", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace("version", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(",", "")
|
||||
.Replace("(", "")
|
||||
.Replace(")", "");
|
||||
|
||||
return WhitespacePattern().Replace(normalized, " ").Trim();
|
||||
}
|
||||
|
||||
private static string NormalizeUrl(string url)
|
||||
{
|
||||
// Normalize URL for comparison
|
||||
var normalized = url.ToLowerInvariant()
|
||||
.Replace("https://", "")
|
||||
.Replace("http://", "")
|
||||
.Replace("www.", "")
|
||||
.TrimEnd('/');
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private SpdxLicenseMapping? TryFuzzyMatch(string normalizedName)
|
||||
{
|
||||
// Check for common patterns
|
||||
if (normalizedName.Contains("apache") && normalizedName.Contains("2"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("apache 2.0");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("mit"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("mit");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("bsd") && normalizedName.Contains("3"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("bsd 3 clause");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("bsd") && normalizedName.Contains("2"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("bsd 2 clause");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("gpl") && normalizedName.Contains("3"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("gpl 3.0");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("gpl") && normalizedName.Contains("2"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("gpl 2.0");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("lgpl") && normalizedName.Contains("2.1"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("lgpl 2.1");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("lgpl") && normalizedName.Contains("3"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("lgpl 3.0");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("mpl") && normalizedName.Contains("2"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("mpl 2.0");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("cddl"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("cddl 1.0");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("epl") && normalizedName.Contains("2"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("epl 2.0");
|
||||
}
|
||||
|
||||
if (normalizedName.Contains("epl") && normalizedName.Contains("1"))
|
||||
{
|
||||
return _nameIndex.GetValueOrDefault("epl 1.0");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static IEnumerable<SpdxLicenseMapping> LoadMappings()
|
||||
{
|
||||
// High-confidence SPDX mappings for common licenses
|
||||
// This list focuses on licenses commonly found in Java/Maven projects
|
||||
return
|
||||
[
|
||||
// Apache
|
||||
new SpdxLicenseMapping("Apache-2.0",
|
||||
["Apache License 2.0", "Apache License, Version 2.0", "Apache 2.0", "Apache-2.0", "ASL 2.0", "AL 2.0"],
|
||||
["apache.org/licenses/LICENSE-2.0", "opensource.org/licenses/Apache-2.0"]),
|
||||
|
||||
new SpdxLicenseMapping("Apache-1.1",
|
||||
["Apache License 1.1", "Apache Software License 1.1"],
|
||||
["apache.org/licenses/LICENSE-1.1"]),
|
||||
|
||||
// MIT
|
||||
new SpdxLicenseMapping("MIT",
|
||||
["MIT License", "MIT", "The MIT License", "Expat License"],
|
||||
["opensource.org/licenses/MIT", "mit-license.org"]),
|
||||
|
||||
// BSD
|
||||
new SpdxLicenseMapping("BSD-2-Clause",
|
||||
["BSD 2-Clause License", "BSD-2-Clause", "Simplified BSD License", "FreeBSD License"],
|
||||
["opensource.org/licenses/BSD-2-Clause"]),
|
||||
|
||||
new SpdxLicenseMapping("BSD-3-Clause",
|
||||
["BSD 3-Clause License", "BSD-3-Clause", "New BSD License", "Modified BSD License"],
|
||||
["opensource.org/licenses/BSD-3-Clause"]),
|
||||
|
||||
// GPL
|
||||
new SpdxLicenseMapping("GPL-2.0-only",
|
||||
["GNU General Public License v2.0", "GPL 2.0", "GPL-2.0", "GPLv2"],
|
||||
["gnu.org/licenses/old-licenses/gpl-2.0", "opensource.org/licenses/GPL-2.0"]),
|
||||
|
||||
new SpdxLicenseMapping("GPL-2.0-or-later",
|
||||
["GNU General Public License v2.0 or later", "GPL 2.0+", "GPL-2.0+", "GPLv2+"],
|
||||
[]),
|
||||
|
||||
new SpdxLicenseMapping("GPL-3.0-only",
|
||||
["GNU General Public License v3.0", "GPL 3.0", "GPL-3.0", "GPLv3"],
|
||||
["gnu.org/licenses/gpl-3.0", "opensource.org/licenses/GPL-3.0"]),
|
||||
|
||||
new SpdxLicenseMapping("GPL-3.0-or-later",
|
||||
["GNU General Public License v3.0 or later", "GPL 3.0+", "GPL-3.0+", "GPLv3+"],
|
||||
[]),
|
||||
|
||||
// LGPL
|
||||
new SpdxLicenseMapping("LGPL-2.1-only",
|
||||
["GNU Lesser General Public License v2.1", "LGPL 2.1", "LGPL-2.1", "LGPLv2.1"],
|
||||
["gnu.org/licenses/old-licenses/lgpl-2.1", "opensource.org/licenses/LGPL-2.1"]),
|
||||
|
||||
new SpdxLicenseMapping("LGPL-3.0-only",
|
||||
["GNU Lesser General Public License v3.0", "LGPL 3.0", "LGPL-3.0", "LGPLv3"],
|
||||
["gnu.org/licenses/lgpl-3.0", "opensource.org/licenses/LGPL-3.0"]),
|
||||
|
||||
// MPL
|
||||
new SpdxLicenseMapping("MPL-2.0",
|
||||
["Mozilla Public License 2.0", "MPL 2.0", "MPL-2.0"],
|
||||
["mozilla.org/MPL/2.0", "opensource.org/licenses/MPL-2.0"]),
|
||||
|
||||
new SpdxLicenseMapping("MPL-1.1",
|
||||
["Mozilla Public License 1.1", "MPL 1.1", "MPL-1.1"],
|
||||
["mozilla.org/MPL/1.1"]),
|
||||
|
||||
// Eclipse
|
||||
new SpdxLicenseMapping("EPL-1.0",
|
||||
["Eclipse Public License 1.0", "EPL 1.0", "EPL-1.0"],
|
||||
["eclipse.org/legal/epl-v10", "opensource.org/licenses/EPL-1.0"]),
|
||||
|
||||
new SpdxLicenseMapping("EPL-2.0",
|
||||
["Eclipse Public License 2.0", "EPL 2.0", "EPL-2.0"],
|
||||
["eclipse.org/legal/epl-2.0", "opensource.org/licenses/EPL-2.0"]),
|
||||
|
||||
// CDDL
|
||||
new SpdxLicenseMapping("CDDL-1.0",
|
||||
["Common Development and Distribution License 1.0", "CDDL 1.0", "CDDL-1.0"],
|
||||
["opensource.org/licenses/CDDL-1.0"]),
|
||||
|
||||
new SpdxLicenseMapping("CDDL-1.1",
|
||||
["Common Development and Distribution License 1.1", "CDDL 1.1", "CDDL-1.1"],
|
||||
["glassfish.dev.java.net/public/CDDL+GPL_1_1"]),
|
||||
|
||||
// Creative Commons
|
||||
new SpdxLicenseMapping("CC0-1.0",
|
||||
["CC0 1.0 Universal", "CC0", "Public Domain"],
|
||||
["creativecommons.org/publicdomain/zero/1.0"]),
|
||||
|
||||
new SpdxLicenseMapping("CC-BY-4.0",
|
||||
["Creative Commons Attribution 4.0", "CC BY 4.0"],
|
||||
["creativecommons.org/licenses/by/4.0"]),
|
||||
|
||||
// Unlicense
|
||||
new SpdxLicenseMapping("Unlicense",
|
||||
["The Unlicense", "Unlicense"],
|
||||
["unlicense.org"]),
|
||||
|
||||
// ISC
|
||||
new SpdxLicenseMapping("ISC",
|
||||
["ISC License", "ISC"],
|
||||
["opensource.org/licenses/ISC"]),
|
||||
|
||||
// Zlib
|
||||
new SpdxLicenseMapping("Zlib",
|
||||
["zlib License", "zlib/libpng License"],
|
||||
["opensource.org/licenses/Zlib"]),
|
||||
|
||||
// WTFPL
|
||||
new SpdxLicenseMapping("WTFPL",
|
||||
["Do What The F*ck You Want To Public License", "WTFPL"],
|
||||
["wtfpl.net"]),
|
||||
|
||||
// BSL (Business Source License)
|
||||
new SpdxLicenseMapping("BSL-1.0",
|
||||
["Boost Software License 1.0", "BSL-1.0", "Boost License"],
|
||||
["boost.org/LICENSE_1_0.txt", "opensource.org/licenses/BSL-1.0"]),
|
||||
|
||||
// JSON License
|
||||
new SpdxLicenseMapping("JSON",
|
||||
["The JSON License", "JSON License"],
|
||||
["json.org/license"]),
|
||||
|
||||
// AGPL
|
||||
new SpdxLicenseMapping("AGPL-3.0-only",
|
||||
["GNU Affero General Public License v3.0", "AGPL 3.0", "AGPL-3.0", "AGPLv3"],
|
||||
["gnu.org/licenses/agpl-3.0", "opensource.org/licenses/AGPL-3.0"]),
|
||||
|
||||
// PostgreSQL
|
||||
new SpdxLicenseMapping("PostgreSQL",
|
||||
["PostgreSQL License", "The PostgreSQL License"],
|
||||
["opensource.org/licenses/PostgreSQL"]),
|
||||
|
||||
// Unicode
|
||||
new SpdxLicenseMapping("Unicode-DFS-2016",
|
||||
["Unicode License Agreement", "Unicode DFS 2016"],
|
||||
["unicode.org/copyright"]),
|
||||
|
||||
// W3C
|
||||
new SpdxLicenseMapping("W3C",
|
||||
["W3C Software Notice and License", "W3C License"],
|
||||
["w3.org/Consortium/Legal/2015/copyright-software-and-document"])
|
||||
];
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"\s+")]
|
||||
private static partial Regex WhitespacePattern();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a mapping from license names/URLs to an SPDX identifier.
|
||||
/// </summary>
|
||||
internal sealed record SpdxLicenseMapping(
|
||||
string SpdxId,
|
||||
IReadOnlyList<string> Names,
|
||||
IReadOnlyList<string> Urls);
|
||||
Reference in New Issue
Block a user