up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-13 00:20:26 +02:00
parent e1f1bef4c1
commit 564df71bfb
2376 changed files with 334389 additions and 328032 deletions

View File

@@ -1,8 +1,8 @@
using System.Reflection;
[assembly: AssemblyCompany("StellaOps")]
[assembly: AssemblyProduct("StellaOps.Concelier.Normalization")]
[assembly: AssemblyTitle("StellaOps.Concelier.Normalization")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
[assembly: AssemblyInformationalVersion("1.0.0")]
using System.Reflection;
[assembly: AssemblyCompany("StellaOps")]
[assembly: AssemblyProduct("StellaOps.Concelier.Normalization")]
[assembly: AssemblyTitle("StellaOps.Concelier.Normalization")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
[assembly: AssemblyInformationalVersion("1.0.0")]

View File

@@ -1,127 +1,127 @@
using System.Globalization;
namespace StellaOps.Concelier.Normalization.Distro;
/// <summary>
/// Represents a Debian epoch:version-revision tuple and exposes parsing/formatting helpers.
/// </summary>
public sealed class DebianEvr
{
private DebianEvr(int epoch, bool hasExplicitEpoch, string version, string revision, string original)
{
Epoch = epoch;
HasExplicitEpoch = hasExplicitEpoch;
Version = version;
Revision = revision;
Original = original;
}
/// <summary>
/// Epoch segment (defaults to <c>0</c> when omitted).
/// </summary>
public int Epoch { get; }
/// <summary>
/// Indicates whether an epoch segment was present explicitly.
/// </summary>
public bool HasExplicitEpoch { get; }
/// <summary>
/// Version portion (without revision).
/// </summary>
public string Version { get; }
/// <summary>
/// Revision portion (after the last dash). Empty when omitted.
/// </summary>
public string Revision { get; }
/// <summary>
/// Trimmed EVR string supplied to <see cref="TryParse"/>.
/// </summary>
public string Original { get; }
/// <summary>
/// Attempts to parse the provided value into a <see cref="DebianEvr"/> instance.
/// </summary>
public static bool TryParse(string? value, out DebianEvr? result)
{
result = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var epoch = 0;
var hasExplicitEpoch = false;
var remainder = trimmed;
var colonIndex = remainder.IndexOf(':');
if (colonIndex >= 0)
{
if (colonIndex == 0)
{
return false;
}
var epochPart = remainder[..colonIndex];
if (!int.TryParse(epochPart, NumberStyles.Integer, CultureInfo.InvariantCulture, out epoch))
{
return false;
}
hasExplicitEpoch = true;
remainder = colonIndex < remainder.Length - 1 ? remainder[(colonIndex + 1)..] : string.Empty;
}
if (string.IsNullOrEmpty(remainder))
{
return false;
}
var version = remainder;
var revision = string.Empty;
var dashIndex = remainder.LastIndexOf('-');
if (dashIndex > 0)
{
version = remainder[..dashIndex];
revision = dashIndex < remainder.Length - 1 ? remainder[(dashIndex + 1)..] : string.Empty;
}
if (string.IsNullOrEmpty(version))
{
return false;
}
result = new DebianEvr(epoch, hasExplicitEpoch, version, revision, trimmed);
return true;
}
/// <summary>
/// Parses the provided value into a <see cref="DebianEvr"/> or throws <see cref="FormatException"/>.
/// </summary>
public static DebianEvr Parse(string value)
{
if (!TryParse(value, out var evr))
{
throw new FormatException($"Input '{value}' is not a valid Debian EVR string.");
}
return evr!;
}
/// <summary>
/// Returns a canonical EVR string with trimmed components and normalized epoch/revision placement.
/// </summary>
public string ToCanonicalString()
{
var epochSegment = HasExplicitEpoch || Epoch > 0 ? $"{Epoch}:" : string.Empty;
var revisionSegment = string.IsNullOrEmpty(Revision) ? string.Empty : $"-{Revision}";
return $"{epochSegment}{Version}{revisionSegment}";
}
/// <inheritdoc />
public override string ToString() => Original;
}
using System.Globalization;
namespace StellaOps.Concelier.Normalization.Distro;
/// <summary>
/// Represents a Debian epoch:version-revision tuple and exposes parsing/formatting helpers.
/// </summary>
public sealed class DebianEvr
{
private DebianEvr(int epoch, bool hasExplicitEpoch, string version, string revision, string original)
{
Epoch = epoch;
HasExplicitEpoch = hasExplicitEpoch;
Version = version;
Revision = revision;
Original = original;
}
/// <summary>
/// Epoch segment (defaults to <c>0</c> when omitted).
/// </summary>
public int Epoch { get; }
/// <summary>
/// Indicates whether an epoch segment was present explicitly.
/// </summary>
public bool HasExplicitEpoch { get; }
/// <summary>
/// Version portion (without revision).
/// </summary>
public string Version { get; }
/// <summary>
/// Revision portion (after the last dash). Empty when omitted.
/// </summary>
public string Revision { get; }
/// <summary>
/// Trimmed EVR string supplied to <see cref="TryParse"/>.
/// </summary>
public string Original { get; }
/// <summary>
/// Attempts to parse the provided value into a <see cref="DebianEvr"/> instance.
/// </summary>
public static bool TryParse(string? value, out DebianEvr? result)
{
result = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var epoch = 0;
var hasExplicitEpoch = false;
var remainder = trimmed;
var colonIndex = remainder.IndexOf(':');
if (colonIndex >= 0)
{
if (colonIndex == 0)
{
return false;
}
var epochPart = remainder[..colonIndex];
if (!int.TryParse(epochPart, NumberStyles.Integer, CultureInfo.InvariantCulture, out epoch))
{
return false;
}
hasExplicitEpoch = true;
remainder = colonIndex < remainder.Length - 1 ? remainder[(colonIndex + 1)..] : string.Empty;
}
if (string.IsNullOrEmpty(remainder))
{
return false;
}
var version = remainder;
var revision = string.Empty;
var dashIndex = remainder.LastIndexOf('-');
if (dashIndex > 0)
{
version = remainder[..dashIndex];
revision = dashIndex < remainder.Length - 1 ? remainder[(dashIndex + 1)..] : string.Empty;
}
if (string.IsNullOrEmpty(version))
{
return false;
}
result = new DebianEvr(epoch, hasExplicitEpoch, version, revision, trimmed);
return true;
}
/// <summary>
/// Parses the provided value into a <see cref="DebianEvr"/> or throws <see cref="FormatException"/>.
/// </summary>
public static DebianEvr Parse(string value)
{
if (!TryParse(value, out var evr))
{
throw new FormatException($"Input '{value}' is not a valid Debian EVR string.");
}
return evr!;
}
/// <summary>
/// Returns a canonical EVR string with trimmed components and normalized epoch/revision placement.
/// </summary>
public string ToCanonicalString()
{
var epochSegment = HasExplicitEpoch || Epoch > 0 ? $"{Epoch}:" : string.Empty;
var revisionSegment = string.IsNullOrEmpty(Revision) ? string.Empty : $"-{Revision}";
return $"{epochSegment}{Version}{revisionSegment}";
}
/// <inheritdoc />
public override string ToString() => Original;
}

View File

@@ -1,192 +1,192 @@
using System.Globalization;
namespace StellaOps.Concelier.Normalization.Distro;
/// <summary>
/// Represents a parsed NEVRA (Name-Epoch:Version-Release.Architecture) identifier and exposes helpers for canonical formatting.
/// </summary>
public sealed class Nevra
{
private Nevra(string name, int epoch, bool hasExplicitEpoch, string version, string release, string? architecture, string original)
{
Name = name;
Epoch = epoch;
HasExplicitEpoch = hasExplicitEpoch;
Version = version;
Release = release;
Architecture = architecture;
Original = original;
}
/// <summary>
/// Package name segment.
/// </summary>
public string Name { get; }
/// <summary>
/// Epoch extracted from the NEVRA string (defaults to <c>0</c> when omitted).
/// </summary>
public int Epoch { get; }
/// <summary>
/// Indicates whether an epoch segment was present explicitly (e.g. <c>0:</c>).
/// </summary>
public bool HasExplicitEpoch { get; }
/// <summary>
/// Version component (without epoch or release).
/// </summary>
public string Version { get; }
/// <summary>
/// Release component (without architecture suffix).
/// </summary>
public string Release { get; }
/// <summary>
/// Optional architecture suffix (e.g. <c>x86_64</c>, <c>noarch</c>).
/// </summary>
public string? Architecture { get; }
/// <summary>
/// Trimmed NEVRA string supplied to <see cref="TryParse"/>.
/// </summary>
public string Original { get; }
private static readonly ISet<string> KnownArchitectures = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"noarch",
"src",
"nosrc",
"x86_64",
"aarch64",
"armv7hl",
"armhfp",
"ppc64",
"ppc64le",
"ppc",
"s390",
"s390x",
"i386",
"i486",
"i586",
"i686",
"amd64",
"arm64",
"armv7l",
"armv6l",
"armv8l",
"armel",
"armhf",
"ia32e",
"loongarch64",
"mips",
"mips64",
"mips64le",
"mipsel",
"ppc32",
"ppc64p7",
"riscv64",
"sparc",
"sparc64"
};
/// <summary>
/// Attempts to parse the provided value into a <see cref="Nevra"/> instance.
/// </summary>
public static bool TryParse(string? value, out Nevra? result)
{
result = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var releaseSeparator = trimmed.LastIndexOf('-');
if (releaseSeparator <= 0 || releaseSeparator >= trimmed.Length - 1)
{
return false;
}
var releasePart = trimmed[(releaseSeparator + 1)..];
var nameVersionPart = trimmed[..releaseSeparator];
var versionSeparator = nameVersionPart.LastIndexOf('-');
if (versionSeparator <= 0 || versionSeparator >= nameVersionPart.Length)
{
return false;
}
var versionPart = nameVersionPart[(versionSeparator + 1)..];
var namePart = nameVersionPart[..versionSeparator];
if (string.IsNullOrWhiteSpace(namePart))
{
return false;
}
string? architecture = null;
var release = releasePart;
var architectureSeparator = releasePart.LastIndexOf('.');
if (architectureSeparator > 0 && architectureSeparator < releasePart.Length - 1)
{
var possibleArch = releasePart[(architectureSeparator + 1)..];
if (KnownArchitectures.Contains(possibleArch))
{
architecture = possibleArch;
release = releasePart[..architectureSeparator];
}
}
var version = versionPart;
var epoch = 0;
var hasExplicitEpoch = false;
var epochSeparator = versionPart.IndexOf(':');
if (epochSeparator >= 0)
{
hasExplicitEpoch = true;
var epochPart = versionPart[..epochSeparator];
version = epochSeparator < versionPart.Length - 1 ? versionPart[(epochSeparator + 1)..] : string.Empty;
if (epochPart.Length > 0 && !int.TryParse(epochPart, NumberStyles.Integer, CultureInfo.InvariantCulture, out epoch))
{
return false;
}
}
if (string.IsNullOrWhiteSpace(version))
{
return false;
}
result = new Nevra(namePart, epoch, hasExplicitEpoch, version, release, architecture, trimmed);
return true;
}
/// <summary>
/// Parses the provided value into a <see cref="Nevra"/> or throws <see cref="FormatException"/>.
/// </summary>
public static Nevra Parse(string value)
{
if (!TryParse(value, out var nevra))
{
throw new FormatException($"Input '{value}' is not a valid NEVRA string.");
}
return nevra!;
}
/// <summary>
/// Returns a canonical NEVRA string with trimmed components and normalized epoch/architecture placement.
/// </summary>
public string ToCanonicalString()
{
var epochSegment = HasExplicitEpoch || Epoch > 0 ? $"{Epoch}:" : string.Empty;
var archSegment = string.IsNullOrWhiteSpace(Architecture) ? string.Empty : $".{Architecture}";
return $"{Name}-{epochSegment}{Version}-{Release}{archSegment}";
}
/// <inheritdoc />
public override string ToString() => Original;
}
using System.Globalization;
namespace StellaOps.Concelier.Normalization.Distro;
/// <summary>
/// Represents a parsed NEVRA (Name-Epoch:Version-Release.Architecture) identifier and exposes helpers for canonical formatting.
/// </summary>
public sealed class Nevra
{
private Nevra(string name, int epoch, bool hasExplicitEpoch, string version, string release, string? architecture, string original)
{
Name = name;
Epoch = epoch;
HasExplicitEpoch = hasExplicitEpoch;
Version = version;
Release = release;
Architecture = architecture;
Original = original;
}
/// <summary>
/// Package name segment.
/// </summary>
public string Name { get; }
/// <summary>
/// Epoch extracted from the NEVRA string (defaults to <c>0</c> when omitted).
/// </summary>
public int Epoch { get; }
/// <summary>
/// Indicates whether an epoch segment was present explicitly (e.g. <c>0:</c>).
/// </summary>
public bool HasExplicitEpoch { get; }
/// <summary>
/// Version component (without epoch or release).
/// </summary>
public string Version { get; }
/// <summary>
/// Release component (without architecture suffix).
/// </summary>
public string Release { get; }
/// <summary>
/// Optional architecture suffix (e.g. <c>x86_64</c>, <c>noarch</c>).
/// </summary>
public string? Architecture { get; }
/// <summary>
/// Trimmed NEVRA string supplied to <see cref="TryParse"/>.
/// </summary>
public string Original { get; }
private static readonly ISet<string> KnownArchitectures = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"noarch",
"src",
"nosrc",
"x86_64",
"aarch64",
"armv7hl",
"armhfp",
"ppc64",
"ppc64le",
"ppc",
"s390",
"s390x",
"i386",
"i486",
"i586",
"i686",
"amd64",
"arm64",
"armv7l",
"armv6l",
"armv8l",
"armel",
"armhf",
"ia32e",
"loongarch64",
"mips",
"mips64",
"mips64le",
"mipsel",
"ppc32",
"ppc64p7",
"riscv64",
"sparc",
"sparc64"
};
/// <summary>
/// Attempts to parse the provided value into a <see cref="Nevra"/> instance.
/// </summary>
public static bool TryParse(string? value, out Nevra? result)
{
result = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var releaseSeparator = trimmed.LastIndexOf('-');
if (releaseSeparator <= 0 || releaseSeparator >= trimmed.Length - 1)
{
return false;
}
var releasePart = trimmed[(releaseSeparator + 1)..];
var nameVersionPart = trimmed[..releaseSeparator];
var versionSeparator = nameVersionPart.LastIndexOf('-');
if (versionSeparator <= 0 || versionSeparator >= nameVersionPart.Length)
{
return false;
}
var versionPart = nameVersionPart[(versionSeparator + 1)..];
var namePart = nameVersionPart[..versionSeparator];
if (string.IsNullOrWhiteSpace(namePart))
{
return false;
}
string? architecture = null;
var release = releasePart;
var architectureSeparator = releasePart.LastIndexOf('.');
if (architectureSeparator > 0 && architectureSeparator < releasePart.Length - 1)
{
var possibleArch = releasePart[(architectureSeparator + 1)..];
if (KnownArchitectures.Contains(possibleArch))
{
architecture = possibleArch;
release = releasePart[..architectureSeparator];
}
}
var version = versionPart;
var epoch = 0;
var hasExplicitEpoch = false;
var epochSeparator = versionPart.IndexOf(':');
if (epochSeparator >= 0)
{
hasExplicitEpoch = true;
var epochPart = versionPart[..epochSeparator];
version = epochSeparator < versionPart.Length - 1 ? versionPart[(epochSeparator + 1)..] : string.Empty;
if (epochPart.Length > 0 && !int.TryParse(epochPart, NumberStyles.Integer, CultureInfo.InvariantCulture, out epoch))
{
return false;
}
}
if (string.IsNullOrWhiteSpace(version))
{
return false;
}
result = new Nevra(namePart, epoch, hasExplicitEpoch, version, release, architecture, trimmed);
return true;
}
/// <summary>
/// Parses the provided value into a <see cref="Nevra"/> or throws <see cref="FormatException"/>.
/// </summary>
public static Nevra Parse(string value)
{
if (!TryParse(value, out var nevra))
{
throw new FormatException($"Input '{value}' is not a valid NEVRA string.");
}
return nevra!;
}
/// <summary>
/// Returns a canonical NEVRA string with trimmed components and normalized epoch/architecture placement.
/// </summary>
public string ToCanonicalString()
{
var epochSegment = HasExplicitEpoch || Epoch > 0 ? $"{Epoch}:" : string.Empty;
var archSegment = string.IsNullOrWhiteSpace(Architecture) ? string.Empty : $".{Architecture}";
return $"{Name}-{epochSegment}{Version}-{Release}{archSegment}";
}
/// <inheritdoc />
public override string ToString() => Original;
}

View File

@@ -1,352 +1,352 @@
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace StellaOps.Concelier.Normalization.Identifiers;
/// <summary>
/// Implements canonical normalization for CPE 2.3 identifiers (and URI binding conversion).
/// </summary>
internal static class Cpe23
{
private static readonly HashSet<char> CharactersRequiringEscape = new(new[]
{
'\\', ':', '/', '?', '#', '[', ']', '@', '!', '$', '&', '"', '\'', '(', ')', '+', ',', ';', '=', '%', '*',
'<', '>', '|', '^', '`', '{', '}', '~'
});
public static bool TryNormalize(string? value, out string? normalized)
{
normalized = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var components = SplitComponents(trimmed);
if (components.Count == 0)
{
return false;
}
if (!components[0].Equals("cpe", StringComparison.OrdinalIgnoreCase))
{
return false;
}
if (components.Count >= 2 && components[1].Equals("2.3", StringComparison.OrdinalIgnoreCase))
{
return TryNormalizeFrom23(components, out normalized);
}
if (components.Count >= 2 && components[1].Length > 0 && components[1][0] == '/')
{
return TryNormalizeFrom22(components, out normalized);
}
return false;
}
private static bool TryNormalizeFrom23(IReadOnlyList<string> components, out string? normalized)
{
normalized = null;
if (components.Count != 13)
{
return false;
}
var part = NormalizePart(components[2]);
if (part is null)
{
return false;
}
var normalizedComponents = new string[13];
normalizedComponents[0] = "cpe";
normalizedComponents[1] = "2.3";
normalizedComponents[2] = part;
normalizedComponents[3] = NormalizeField(components[3], lower: true, decodeUri: false);
normalizedComponents[4] = NormalizeField(components[4], lower: true, decodeUri: false);
normalizedComponents[5] = NormalizeField(components[5], lower: false, decodeUri: false);
normalizedComponents[6] = NormalizeField(components[6], lower: false, decodeUri: false);
normalizedComponents[7] = NormalizeField(components[7], lower: false, decodeUri: false);
normalizedComponents[8] = NormalizeField(components[8], lower: false, decodeUri: false);
normalizedComponents[9] = NormalizeField(components[9], lower: false, decodeUri: false);
normalizedComponents[10] = NormalizeField(components[10], lower: false, decodeUri: false);
normalizedComponents[11] = NormalizeField(components[11], lower: false, decodeUri: false);
normalizedComponents[12] = NormalizeField(components[12], lower: false, decodeUri: false);
normalized = string.Join(':', normalizedComponents);
return true;
}
private static bool TryNormalizeFrom22(IReadOnlyList<string> components, out string? normalized)
{
normalized = null;
if (components.Count < 2)
{
return false;
}
var partComponent = components[1];
if (partComponent.Length < 2 || partComponent[0] != '/')
{
return false;
}
var part = NormalizePart(partComponent[1..]);
if (part is null)
{
return false;
}
var vendor = NormalizeField(components.Count > 2 ? components[2] : null, lower: true, decodeUri: true);
var product = NormalizeField(components.Count > 3 ? components[3] : null, lower: true, decodeUri: true);
var version = NormalizeField(components.Count > 4 ? components[4] : null, lower: false, decodeUri: true);
var update = NormalizeField(components.Count > 5 ? components[5] : null, lower: false, decodeUri: true);
var (edition, swEdition, targetSw, targetHw, other) = ExpandEdition(components.Count > 6 ? components[6] : null);
var language = NormalizeField(components.Count > 7 ? components[7] : null, lower: true, decodeUri: true);
normalized = string.Join(':', new[]
{
"cpe",
"2.3",
part,
vendor,
product,
version,
update,
edition,
language,
swEdition,
targetSw,
targetHw,
other,
});
return true;
}
private static string? NormalizePart(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var token = value.Trim().ToLowerInvariant();
return token is "a" or "o" or "h" ? token : null;
}
private static string NormalizeField(string? value, bool lower, bool decodeUri)
{
if (string.IsNullOrWhiteSpace(value))
{
return "*";
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return trimmed;
}
var decoded = decodeUri ? DecodeUriComponent(trimmed) : UnescapeComponent(trimmed);
if (decoded is "*" or "-")
{
return decoded;
}
if (decoded.Length == 0)
{
return "*";
}
var normalized = lower ? decoded.ToLowerInvariant() : decoded;
return EscapeComponent(normalized);
}
private static (string Edition, string SwEdition, string TargetSw, string TargetHw, string Other) ExpandEdition(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return ("*", "*", "*", "*", "*");
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return (trimmed, "*", "*", "*", "*");
}
var decoded = DecodeUriComponent(trimmed);
if (!decoded.StartsWith("~", StringComparison.Ordinal))
{
return (NormalizeDecodedField(decoded, lower: false), "*", "*", "*", "*");
}
var segments = decoded.Split('~');
var swEdition = segments.Length > 1 ? NormalizeDecodedField(segments[1], lower: false) : "*";
var targetSw = segments.Length > 2 ? NormalizeDecodedField(segments[2], lower: false) : "*";
var targetHw = segments.Length > 3 ? NormalizeDecodedField(segments[3], lower: false) : "*";
var other = segments.Length > 4 ? NormalizeDecodedField(segments[4], lower: false) : "*";
return ("*", swEdition, targetSw, targetHw, other);
}
private static string NormalizeDecodedField(string? value, bool lower)
{
if (string.IsNullOrWhiteSpace(value))
{
return "*";
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return trimmed;
}
var normalized = lower ? trimmed.ToLowerInvariant() : trimmed;
if (normalized is "*" or "-")
{
return normalized;
}
return EscapeComponent(normalized);
}
private static string UnescapeComponent(string value)
{
var builder = new StringBuilder(value.Length);
var escape = false;
foreach (var ch in value)
{
if (escape)
{
builder.Append(ch);
escape = false;
continue;
}
if (ch == '\\')
{
escape = true;
continue;
}
builder.Append(ch);
}
if (escape)
{
builder.Append('\\');
}
return builder.ToString();
}
private static string EscapeComponent(string value)
{
if (value.Length == 0)
{
return value;
}
var builder = new StringBuilder(value.Length * 2);
foreach (var ch in value)
{
if (RequiresEscape(ch))
{
builder.Append('\\');
}
builder.Append(ch);
}
return builder.ToString();
}
private static bool RequiresEscape(char ch)
{
if (char.IsLetterOrDigit(ch))
{
return false;
}
if (char.IsWhiteSpace(ch))
{
return true;
}
return ch switch
{
'_' or '-' or '.' => false,
// Keep wildcard markers literal only when entire component is wildcard handled earlier.
'*' => true,
_ => CharactersRequiringEscape.Contains(ch)
};
}
private static string DecodeUriComponent(string value)
{
var builder = new StringBuilder(value.Length);
for (var i = 0; i < value.Length; i++)
{
var ch = value[i];
if (ch == '%' && i + 2 < value.Length && IsHex(value[i + 1]) && IsHex(value[i + 2]))
{
var hex = new string(new[] { value[i + 1], value[i + 2] });
var decoded = (char)int.Parse(hex, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
builder.Append(decoded);
i += 2;
}
else
{
builder.Append(ch);
}
}
return builder.ToString();
}
private static bool IsHex(char ch)
=> ch is >= '0' and <= '9' or >= 'A' and <= 'F' or >= 'a' and <= 'f';
private static List<string> SplitComponents(string value)
{
var results = new List<string>();
var builder = new StringBuilder();
var escape = false;
foreach (var ch in value)
{
if (escape)
{
builder.Append(ch);
escape = false;
continue;
}
if (ch == '\\')
{
builder.Append(ch);
escape = true;
continue;
}
if (ch == ':')
{
results.Add(builder.ToString());
builder.Clear();
continue;
}
builder.Append(ch);
}
results.Add(builder.ToString());
return results;
}
}
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace StellaOps.Concelier.Normalization.Identifiers;
/// <summary>
/// Implements canonical normalization for CPE 2.3 identifiers (and URI binding conversion).
/// </summary>
internal static class Cpe23
{
private static readonly HashSet<char> CharactersRequiringEscape = new(new[]
{
'\\', ':', '/', '?', '#', '[', ']', '@', '!', '$', '&', '"', '\'', '(', ')', '+', ',', ';', '=', '%', '*',
'<', '>', '|', '^', '`', '{', '}', '~'
});
public static bool TryNormalize(string? value, out string? normalized)
{
normalized = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var components = SplitComponents(trimmed);
if (components.Count == 0)
{
return false;
}
if (!components[0].Equals("cpe", StringComparison.OrdinalIgnoreCase))
{
return false;
}
if (components.Count >= 2 && components[1].Equals("2.3", StringComparison.OrdinalIgnoreCase))
{
return TryNormalizeFrom23(components, out normalized);
}
if (components.Count >= 2 && components[1].Length > 0 && components[1][0] == '/')
{
return TryNormalizeFrom22(components, out normalized);
}
return false;
}
private static bool TryNormalizeFrom23(IReadOnlyList<string> components, out string? normalized)
{
normalized = null;
if (components.Count != 13)
{
return false;
}
var part = NormalizePart(components[2]);
if (part is null)
{
return false;
}
var normalizedComponents = new string[13];
normalizedComponents[0] = "cpe";
normalizedComponents[1] = "2.3";
normalizedComponents[2] = part;
normalizedComponents[3] = NormalizeField(components[3], lower: true, decodeUri: false);
normalizedComponents[4] = NormalizeField(components[4], lower: true, decodeUri: false);
normalizedComponents[5] = NormalizeField(components[5], lower: false, decodeUri: false);
normalizedComponents[6] = NormalizeField(components[6], lower: false, decodeUri: false);
normalizedComponents[7] = NormalizeField(components[7], lower: false, decodeUri: false);
normalizedComponents[8] = NormalizeField(components[8], lower: false, decodeUri: false);
normalizedComponents[9] = NormalizeField(components[9], lower: false, decodeUri: false);
normalizedComponents[10] = NormalizeField(components[10], lower: false, decodeUri: false);
normalizedComponents[11] = NormalizeField(components[11], lower: false, decodeUri: false);
normalizedComponents[12] = NormalizeField(components[12], lower: false, decodeUri: false);
normalized = string.Join(':', normalizedComponents);
return true;
}
private static bool TryNormalizeFrom22(IReadOnlyList<string> components, out string? normalized)
{
normalized = null;
if (components.Count < 2)
{
return false;
}
var partComponent = components[1];
if (partComponent.Length < 2 || partComponent[0] != '/')
{
return false;
}
var part = NormalizePart(partComponent[1..]);
if (part is null)
{
return false;
}
var vendor = NormalizeField(components.Count > 2 ? components[2] : null, lower: true, decodeUri: true);
var product = NormalizeField(components.Count > 3 ? components[3] : null, lower: true, decodeUri: true);
var version = NormalizeField(components.Count > 4 ? components[4] : null, lower: false, decodeUri: true);
var update = NormalizeField(components.Count > 5 ? components[5] : null, lower: false, decodeUri: true);
var (edition, swEdition, targetSw, targetHw, other) = ExpandEdition(components.Count > 6 ? components[6] : null);
var language = NormalizeField(components.Count > 7 ? components[7] : null, lower: true, decodeUri: true);
normalized = string.Join(':', new[]
{
"cpe",
"2.3",
part,
vendor,
product,
version,
update,
edition,
language,
swEdition,
targetSw,
targetHw,
other,
});
return true;
}
private static string? NormalizePart(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var token = value.Trim().ToLowerInvariant();
return token is "a" or "o" or "h" ? token : null;
}
private static string NormalizeField(string? value, bool lower, bool decodeUri)
{
if (string.IsNullOrWhiteSpace(value))
{
return "*";
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return trimmed;
}
var decoded = decodeUri ? DecodeUriComponent(trimmed) : UnescapeComponent(trimmed);
if (decoded is "*" or "-")
{
return decoded;
}
if (decoded.Length == 0)
{
return "*";
}
var normalized = lower ? decoded.ToLowerInvariant() : decoded;
return EscapeComponent(normalized);
}
private static (string Edition, string SwEdition, string TargetSw, string TargetHw, string Other) ExpandEdition(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return ("*", "*", "*", "*", "*");
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return (trimmed, "*", "*", "*", "*");
}
var decoded = DecodeUriComponent(trimmed);
if (!decoded.StartsWith("~", StringComparison.Ordinal))
{
return (NormalizeDecodedField(decoded, lower: false), "*", "*", "*", "*");
}
var segments = decoded.Split('~');
var swEdition = segments.Length > 1 ? NormalizeDecodedField(segments[1], lower: false) : "*";
var targetSw = segments.Length > 2 ? NormalizeDecodedField(segments[2], lower: false) : "*";
var targetHw = segments.Length > 3 ? NormalizeDecodedField(segments[3], lower: false) : "*";
var other = segments.Length > 4 ? NormalizeDecodedField(segments[4], lower: false) : "*";
return ("*", swEdition, targetSw, targetHw, other);
}
private static string NormalizeDecodedField(string? value, bool lower)
{
if (string.IsNullOrWhiteSpace(value))
{
return "*";
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return trimmed;
}
var normalized = lower ? trimmed.ToLowerInvariant() : trimmed;
if (normalized is "*" or "-")
{
return normalized;
}
return EscapeComponent(normalized);
}
private static string UnescapeComponent(string value)
{
var builder = new StringBuilder(value.Length);
var escape = false;
foreach (var ch in value)
{
if (escape)
{
builder.Append(ch);
escape = false;
continue;
}
if (ch == '\\')
{
escape = true;
continue;
}
builder.Append(ch);
}
if (escape)
{
builder.Append('\\');
}
return builder.ToString();
}
private static string EscapeComponent(string value)
{
if (value.Length == 0)
{
return value;
}
var builder = new StringBuilder(value.Length * 2);
foreach (var ch in value)
{
if (RequiresEscape(ch))
{
builder.Append('\\');
}
builder.Append(ch);
}
return builder.ToString();
}
private static bool RequiresEscape(char ch)
{
if (char.IsLetterOrDigit(ch))
{
return false;
}
if (char.IsWhiteSpace(ch))
{
return true;
}
return ch switch
{
'_' or '-' or '.' => false,
// Keep wildcard markers literal only when entire component is wildcard handled earlier.
'*' => true,
_ => CharactersRequiringEscape.Contains(ch)
};
}
private static string DecodeUriComponent(string value)
{
var builder = new StringBuilder(value.Length);
for (var i = 0; i < value.Length; i++)
{
var ch = value[i];
if (ch == '%' && i + 2 < value.Length && IsHex(value[i + 1]) && IsHex(value[i + 2]))
{
var hex = new string(new[] { value[i + 1], value[i + 2] });
var decoded = (char)int.Parse(hex, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
builder.Append(decoded);
i += 2;
}
else
{
builder.Append(ch);
}
}
return builder.ToString();
}
private static bool IsHex(char ch)
=> ch is >= '0' and <= '9' or >= 'A' and <= 'F' or >= 'a' and <= 'f';
private static List<string> SplitComponents(string value)
{
var results = new List<string>();
var builder = new StringBuilder();
var escape = false;
foreach (var ch in value)
{
if (escape)
{
builder.Append(ch);
escape = false;
continue;
}
if (ch == '\\')
{
builder.Append(ch);
escape = true;
continue;
}
if (ch == ':')
{
results.Add(builder.ToString());
builder.Clear();
continue;
}
builder.Append(ch);
}
results.Add(builder.ToString());
return results;
}
}

View File

@@ -1,32 +1,32 @@
namespace StellaOps.Concelier.Normalization.Identifiers;
/// <summary>
/// Provides canonical normalization helpers for package identifiers.
/// </summary>
public static class IdentifierNormalizer
{
public static bool TryNormalizePackageUrl(string? value, out string? normalized, out PackageUrl? packageUrl)
{
normalized = null;
packageUrl = null;
if (!PackageUrl.TryParse(value, out var parsed))
{
return false;
}
var canonical = parsed!.ToCanonicalString();
normalized = canonical;
packageUrl = parsed;
return true;
}
public static bool TryNormalizePackageUrl(string? value, out string? normalized)
{
return TryNormalizePackageUrl(value, out normalized, out _);
}
public static bool TryNormalizeCpe(string? value, out string? normalized)
{
return Cpe23.TryNormalize(value, out normalized);
}
}
namespace StellaOps.Concelier.Normalization.Identifiers;
/// <summary>
/// Provides canonical normalization helpers for package identifiers.
/// </summary>
public static class IdentifierNormalizer
{
public static bool TryNormalizePackageUrl(string? value, out string? normalized, out PackageUrl? packageUrl)
{
normalized = null;
packageUrl = null;
if (!PackageUrl.TryParse(value, out var parsed))
{
return false;
}
var canonical = parsed!.ToCanonicalString();
normalized = canonical;
packageUrl = parsed;
return true;
}
public static bool TryNormalizePackageUrl(string? value, out string? normalized)
{
return TryNormalizePackageUrl(value, out normalized, out _);
}
public static bool TryNormalizeCpe(string? value, out string? normalized)
{
return Cpe23.TryNormalize(value, out normalized);
}
}

View File

@@ -1,299 +1,299 @@
using System.Collections.Immutable;
using System.Linq;
using System.Text;
namespace StellaOps.Concelier.Normalization.Identifiers;
/// <summary>
/// Represents a parsed Package URL (purl) identifier with canonical string rendering.
/// </summary>
public sealed class PackageUrl
{
private PackageUrl(
string type,
ImmutableArray<string> namespaceSegments,
string name,
string? version,
ImmutableArray<KeyValuePair<string, string>> qualifiers,
ImmutableArray<string> subpathSegments,
string original)
{
Type = type;
NamespaceSegments = namespaceSegments;
Name = name;
Version = version;
Qualifiers = qualifiers;
SubpathSegments = subpathSegments;
Original = original;
}
public string Type { get; }
public ImmutableArray<string> NamespaceSegments { get; }
public string Name { get; }
public string? Version { get; }
public ImmutableArray<KeyValuePair<string, string>> Qualifiers { get; }
public ImmutableArray<string> SubpathSegments { get; }
public string Original { get; }
private static readonly HashSet<string> LowerCaseNamespaceTypes = new(StringComparer.OrdinalIgnoreCase)
{
"maven",
"npm",
"pypi",
"nuget",
"composer",
"gem",
"apk",
"deb",
"rpm",
"oci",
};
private static readonly HashSet<string> LowerCaseNameTypes = new(StringComparer.OrdinalIgnoreCase)
{
"npm",
"pypi",
"nuget",
"composer",
"gem",
"apk",
"deb",
"rpm",
"oci",
};
public static bool TryParse(string? value, out PackageUrl? packageUrl)
{
packageUrl = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
return false;
}
var remainder = trimmed[4..];
var firstSlash = remainder.IndexOf('/');
if (firstSlash <= 0)
{
return false;
}
var type = remainder[..firstSlash].Trim().ToLowerInvariant();
remainder = remainder[(firstSlash + 1)..];
var subpathPart = string.Empty;
var subpathIndex = remainder.IndexOf('#');
if (subpathIndex >= 0)
{
subpathPart = remainder[(subpathIndex + 1)..];
remainder = remainder[..subpathIndex];
}
var qualifierPart = string.Empty;
var qualifierIndex = remainder.IndexOf('?');
if (qualifierIndex >= 0)
{
qualifierPart = remainder[(qualifierIndex + 1)..];
remainder = remainder[..qualifierIndex];
}
string? version = null;
var versionIndex = remainder.LastIndexOf('@');
if (versionIndex >= 0)
{
version = remainder[(versionIndex + 1)..];
remainder = remainder[..versionIndex];
}
if (string.IsNullOrWhiteSpace(remainder))
{
return false;
}
var rawSegments = remainder.Split('/', StringSplitOptions.RemoveEmptyEntries);
if (rawSegments.Length == 0)
{
return false;
}
var shouldLowerNamespace = LowerCaseNamespaceTypes.Contains(type);
var shouldLowerName = LowerCaseNameTypes.Contains(type);
var namespaceBuilder = ImmutableArray.CreateBuilder<string>(Math.Max(0, rawSegments.Length - 1));
for (var i = 0; i < rawSegments.Length - 1; i++)
{
var segment = Uri.UnescapeDataString(rawSegments[i].Trim());
if (segment.Length == 0)
{
continue;
}
if (shouldLowerNamespace)
{
segment = segment.ToLowerInvariant();
}
namespaceBuilder.Add(EscapePathSegment(segment));
}
var nameSegment = Uri.UnescapeDataString(rawSegments[^1].Trim());
if (nameSegment.Length == 0)
{
return false;
}
if (shouldLowerName)
{
nameSegment = nameSegment.ToLowerInvariant();
}
var canonicalName = EscapePathSegment(nameSegment);
var canonicalVersion = NormalizeComponent(version, escape: true, lowerCase: false);
var qualifiers = ParseQualifiers(qualifierPart);
var subpath = ParseSubpath(subpathPart);
packageUrl = new PackageUrl(
type,
namespaceBuilder.ToImmutable(),
canonicalName,
canonicalVersion,
qualifiers,
subpath,
trimmed);
return true;
}
public static PackageUrl Parse(string value)
{
if (!TryParse(value, out var parsed))
{
throw new FormatException($"Input '{value}' is not a valid Package URL.");
}
return parsed!;
}
public string ToCanonicalString()
{
var builder = new StringBuilder("pkg:");
builder.Append(Type);
builder.Append('/');
if (!NamespaceSegments.IsDefaultOrEmpty)
{
builder.Append(string.Join('/', NamespaceSegments));
builder.Append('/');
}
builder.Append(Name);
if (!string.IsNullOrEmpty(Version))
{
builder.Append('@');
builder.Append(Version);
}
if (!Qualifiers.IsDefaultOrEmpty && Qualifiers.Length > 0)
{
builder.Append('?');
builder.Append(string.Join('&', Qualifiers.Select(static kvp => $"{kvp.Key}={kvp.Value}")));
}
if (!SubpathSegments.IsDefaultOrEmpty && SubpathSegments.Length > 0)
{
builder.Append('#');
builder.Append(string.Join('/', SubpathSegments));
}
return builder.ToString();
}
public override string ToString() => ToCanonicalString();
private static ImmutableArray<KeyValuePair<string, string>> ParseQualifiers(string qualifierPart)
{
if (string.IsNullOrEmpty(qualifierPart))
{
return ImmutableArray<KeyValuePair<string, string>>.Empty;
}
var entries = qualifierPart.Split('&', StringSplitOptions.RemoveEmptyEntries);
var map = new SortedDictionary<string, string>(StringComparer.Ordinal);
foreach (var entry in entries)
{
var trimmed = entry.Trim();
if (trimmed.Length == 0)
{
continue;
}
var equalsIndex = trimmed.IndexOf('=');
if (equalsIndex <= 0)
{
continue;
}
var key = Uri.UnescapeDataString(trimmed[..equalsIndex]).Trim().ToLowerInvariant();
var valuePart = equalsIndex < trimmed.Length - 1 ? trimmed[(equalsIndex + 1)..] : string.Empty;
var value = NormalizeComponent(valuePart, escape: true, lowerCase: false);
map[key] = value;
}
return map.Select(static kvp => new KeyValuePair<string, string>(kvp.Key, kvp.Value)).ToImmutableArray();
}
private static ImmutableArray<string> ParseSubpath(string subpathPart)
{
if (string.IsNullOrEmpty(subpathPart))
{
return ImmutableArray<string>.Empty;
}
var segments = subpathPart.Split('/', StringSplitOptions.RemoveEmptyEntries);
var builder = ImmutableArray.CreateBuilder<string>(segments.Length);
foreach (var raw in segments)
{
var segment = Uri.UnescapeDataString(raw.Trim());
if (segment.Length == 0)
{
continue;
}
builder.Add(EscapePathSegment(segment));
}
return builder.ToImmutable();
}
private static string NormalizeComponent(string? value, bool escape, bool lowerCase)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
var unescaped = Uri.UnescapeDataString(value.Trim());
if (lowerCase)
{
unescaped = unescaped.ToLowerInvariant();
}
return escape ? Uri.EscapeDataString(unescaped) : unescaped;
}
private static string EscapePathSegment(string value)
{
return Uri.EscapeDataString(value);
}
}
using System.Collections.Immutable;
using System.Linq;
using System.Text;
namespace StellaOps.Concelier.Normalization.Identifiers;
/// <summary>
/// Represents a parsed Package URL (purl) identifier with canonical string rendering.
/// </summary>
public sealed class PackageUrl
{
private PackageUrl(
string type,
ImmutableArray<string> namespaceSegments,
string name,
string? version,
ImmutableArray<KeyValuePair<string, string>> qualifiers,
ImmutableArray<string> subpathSegments,
string original)
{
Type = type;
NamespaceSegments = namespaceSegments;
Name = name;
Version = version;
Qualifiers = qualifiers;
SubpathSegments = subpathSegments;
Original = original;
}
public string Type { get; }
public ImmutableArray<string> NamespaceSegments { get; }
public string Name { get; }
public string? Version { get; }
public ImmutableArray<KeyValuePair<string, string>> Qualifiers { get; }
public ImmutableArray<string> SubpathSegments { get; }
public string Original { get; }
private static readonly HashSet<string> LowerCaseNamespaceTypes = new(StringComparer.OrdinalIgnoreCase)
{
"maven",
"npm",
"pypi",
"nuget",
"composer",
"gem",
"apk",
"deb",
"rpm",
"oci",
};
private static readonly HashSet<string> LowerCaseNameTypes = new(StringComparer.OrdinalIgnoreCase)
{
"npm",
"pypi",
"nuget",
"composer",
"gem",
"apk",
"deb",
"rpm",
"oci",
};
public static bool TryParse(string? value, out PackageUrl? packageUrl)
{
packageUrl = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
return false;
}
var remainder = trimmed[4..];
var firstSlash = remainder.IndexOf('/');
if (firstSlash <= 0)
{
return false;
}
var type = remainder[..firstSlash].Trim().ToLowerInvariant();
remainder = remainder[(firstSlash + 1)..];
var subpathPart = string.Empty;
var subpathIndex = remainder.IndexOf('#');
if (subpathIndex >= 0)
{
subpathPart = remainder[(subpathIndex + 1)..];
remainder = remainder[..subpathIndex];
}
var qualifierPart = string.Empty;
var qualifierIndex = remainder.IndexOf('?');
if (qualifierIndex >= 0)
{
qualifierPart = remainder[(qualifierIndex + 1)..];
remainder = remainder[..qualifierIndex];
}
string? version = null;
var versionIndex = remainder.LastIndexOf('@');
if (versionIndex >= 0)
{
version = remainder[(versionIndex + 1)..];
remainder = remainder[..versionIndex];
}
if (string.IsNullOrWhiteSpace(remainder))
{
return false;
}
var rawSegments = remainder.Split('/', StringSplitOptions.RemoveEmptyEntries);
if (rawSegments.Length == 0)
{
return false;
}
var shouldLowerNamespace = LowerCaseNamespaceTypes.Contains(type);
var shouldLowerName = LowerCaseNameTypes.Contains(type);
var namespaceBuilder = ImmutableArray.CreateBuilder<string>(Math.Max(0, rawSegments.Length - 1));
for (var i = 0; i < rawSegments.Length - 1; i++)
{
var segment = Uri.UnescapeDataString(rawSegments[i].Trim());
if (segment.Length == 0)
{
continue;
}
if (shouldLowerNamespace)
{
segment = segment.ToLowerInvariant();
}
namespaceBuilder.Add(EscapePathSegment(segment));
}
var nameSegment = Uri.UnescapeDataString(rawSegments[^1].Trim());
if (nameSegment.Length == 0)
{
return false;
}
if (shouldLowerName)
{
nameSegment = nameSegment.ToLowerInvariant();
}
var canonicalName = EscapePathSegment(nameSegment);
var canonicalVersion = NormalizeComponent(version, escape: true, lowerCase: false);
var qualifiers = ParseQualifiers(qualifierPart);
var subpath = ParseSubpath(subpathPart);
packageUrl = new PackageUrl(
type,
namespaceBuilder.ToImmutable(),
canonicalName,
canonicalVersion,
qualifiers,
subpath,
trimmed);
return true;
}
public static PackageUrl Parse(string value)
{
if (!TryParse(value, out var parsed))
{
throw new FormatException($"Input '{value}' is not a valid Package URL.");
}
return parsed!;
}
public string ToCanonicalString()
{
var builder = new StringBuilder("pkg:");
builder.Append(Type);
builder.Append('/');
if (!NamespaceSegments.IsDefaultOrEmpty)
{
builder.Append(string.Join('/', NamespaceSegments));
builder.Append('/');
}
builder.Append(Name);
if (!string.IsNullOrEmpty(Version))
{
builder.Append('@');
builder.Append(Version);
}
if (!Qualifiers.IsDefaultOrEmpty && Qualifiers.Length > 0)
{
builder.Append('?');
builder.Append(string.Join('&', Qualifiers.Select(static kvp => $"{kvp.Key}={kvp.Value}")));
}
if (!SubpathSegments.IsDefaultOrEmpty && SubpathSegments.Length > 0)
{
builder.Append('#');
builder.Append(string.Join('/', SubpathSegments));
}
return builder.ToString();
}
public override string ToString() => ToCanonicalString();
private static ImmutableArray<KeyValuePair<string, string>> ParseQualifiers(string qualifierPart)
{
if (string.IsNullOrEmpty(qualifierPart))
{
return ImmutableArray<KeyValuePair<string, string>>.Empty;
}
var entries = qualifierPart.Split('&', StringSplitOptions.RemoveEmptyEntries);
var map = new SortedDictionary<string, string>(StringComparer.Ordinal);
foreach (var entry in entries)
{
var trimmed = entry.Trim();
if (trimmed.Length == 0)
{
continue;
}
var equalsIndex = trimmed.IndexOf('=');
if (equalsIndex <= 0)
{
continue;
}
var key = Uri.UnescapeDataString(trimmed[..equalsIndex]).Trim().ToLowerInvariant();
var valuePart = equalsIndex < trimmed.Length - 1 ? trimmed[(equalsIndex + 1)..] : string.Empty;
var value = NormalizeComponent(valuePart, escape: true, lowerCase: false);
map[key] = value;
}
return map.Select(static kvp => new KeyValuePair<string, string>(kvp.Key, kvp.Value)).ToImmutableArray();
}
private static ImmutableArray<string> ParseSubpath(string subpathPart)
{
if (string.IsNullOrEmpty(subpathPart))
{
return ImmutableArray<string>.Empty;
}
var segments = subpathPart.Split('/', StringSplitOptions.RemoveEmptyEntries);
var builder = ImmutableArray.CreateBuilder<string>(segments.Length);
foreach (var raw in segments)
{
var segment = Uri.UnescapeDataString(raw.Trim());
if (segment.Length == 0)
{
continue;
}
builder.Add(EscapePathSegment(segment));
}
return builder.ToImmutable();
}
private static string NormalizeComponent(string? value, bool escape, bool lowerCase)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
var unescaped = Uri.UnescapeDataString(value.Trim());
if (lowerCase)
{
unescaped = unescaped.ToLowerInvariant();
}
return escape ? Uri.EscapeDataString(unescaped) : unescaped;
}
private static string EscapePathSegment(string value)
{
return Uri.EscapeDataString(value);
}
}

View File

@@ -1,118 +1,118 @@
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Normalization.Text;
/// <summary>
/// Normalizes advisory descriptions by stripping markup, collapsing whitespace, and selecting the best locale fallback.
/// </summary>
public static class DescriptionNormalizer
{
private static readonly Regex HtmlTagRegex = new("<[^>]+>", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex WhitespaceRegex = new("\\s+", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly string[] PreferredLanguages = { "en", "en-us", "en-gb" };
public static NormalizedDescription Normalize(IEnumerable<LocalizedText> candidates)
{
if (candidates is null)
{
throw new ArgumentNullException(nameof(candidates));
}
var processed = new List<(string Text, string Language, int Index)>();
var index = 0;
foreach (var candidate in candidates)
{
if (string.IsNullOrWhiteSpace(candidate.Text))
{
index++;
continue;
}
var sanitized = Sanitize(candidate.Text);
if (string.IsNullOrWhiteSpace(sanitized))
{
index++;
continue;
}
var language = NormalizeLanguage(candidate.Language);
processed.Add((sanitized, language, index));
index++;
}
if (processed.Count == 0)
{
return new NormalizedDescription(string.Empty, "en");
}
var best = SelectBest(processed);
var languageTag = best.Language.Length > 0 ? best.Language : "en";
return new NormalizedDescription(best.Text, languageTag);
}
private static (string Text, string Language) SelectBest(List<(string Text, string Language, int Index)> processed)
{
foreach (var preferred in PreferredLanguages)
{
var normalized = NormalizeLanguage(preferred);
var match = processed.FirstOrDefault(entry => entry.Language.Equals(normalized, StringComparison.OrdinalIgnoreCase));
if (!string.IsNullOrEmpty(match.Text))
{
return (match.Text, normalized);
}
}
var first = processed.OrderBy(entry => entry.Index).First();
return (first.Text, first.Language);
}
private static string Sanitize(string text)
{
var decoded = WebUtility.HtmlDecode(text) ?? string.Empty;
var withoutTags = HtmlTagRegex.Replace(decoded, " ");
var collapsed = WhitespaceRegex.Replace(withoutTags, " ").Trim();
return collapsed;
}
private static string NormalizeLanguage(string? language)
{
if (string.IsNullOrWhiteSpace(language))
{
return string.Empty;
}
var trimmed = language.Trim();
try
{
var culture = CultureInfo.GetCultureInfo(trimmed);
if (!string.IsNullOrEmpty(culture.Name))
{
var parts = culture.Name.Split('-');
if (parts.Length > 0 && !string.IsNullOrWhiteSpace(parts[0]))
{
return parts[0].ToLowerInvariant();
}
}
}
catch (CultureNotFoundException)
{
// fall back to manual normalization
}
var primary = trimmed.Split(new[] { '-', '_' }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
return string.IsNullOrWhiteSpace(primary) ? string.Empty : primary.ToLowerInvariant();
}
}
/// <summary>
/// Represents a localized text candidate.
/// </summary>
public readonly record struct LocalizedText(string? Text, string? Language);
/// <summary>
/// Represents a normalized description result.
/// </summary>
public readonly record struct NormalizedDescription(string Text, string Language);
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
namespace StellaOps.Concelier.Normalization.Text;
/// <summary>
/// Normalizes advisory descriptions by stripping markup, collapsing whitespace, and selecting the best locale fallback.
/// </summary>
public static class DescriptionNormalizer
{
private static readonly Regex HtmlTagRegex = new("<[^>]+>", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex WhitespaceRegex = new("\\s+", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly string[] PreferredLanguages = { "en", "en-us", "en-gb" };
public static NormalizedDescription Normalize(IEnumerable<LocalizedText> candidates)
{
if (candidates is null)
{
throw new ArgumentNullException(nameof(candidates));
}
var processed = new List<(string Text, string Language, int Index)>();
var index = 0;
foreach (var candidate in candidates)
{
if (string.IsNullOrWhiteSpace(candidate.Text))
{
index++;
continue;
}
var sanitized = Sanitize(candidate.Text);
if (string.IsNullOrWhiteSpace(sanitized))
{
index++;
continue;
}
var language = NormalizeLanguage(candidate.Language);
processed.Add((sanitized, language, index));
index++;
}
if (processed.Count == 0)
{
return new NormalizedDescription(string.Empty, "en");
}
var best = SelectBest(processed);
var languageTag = best.Language.Length > 0 ? best.Language : "en";
return new NormalizedDescription(best.Text, languageTag);
}
private static (string Text, string Language) SelectBest(List<(string Text, string Language, int Index)> processed)
{
foreach (var preferred in PreferredLanguages)
{
var normalized = NormalizeLanguage(preferred);
var match = processed.FirstOrDefault(entry => entry.Language.Equals(normalized, StringComparison.OrdinalIgnoreCase));
if (!string.IsNullOrEmpty(match.Text))
{
return (match.Text, normalized);
}
}
var first = processed.OrderBy(entry => entry.Index).First();
return (first.Text, first.Language);
}
private static string Sanitize(string text)
{
var decoded = WebUtility.HtmlDecode(text) ?? string.Empty;
var withoutTags = HtmlTagRegex.Replace(decoded, " ");
var collapsed = WhitespaceRegex.Replace(withoutTags, " ").Trim();
return collapsed;
}
private static string NormalizeLanguage(string? language)
{
if (string.IsNullOrWhiteSpace(language))
{
return string.Empty;
}
var trimmed = language.Trim();
try
{
var culture = CultureInfo.GetCultureInfo(trimmed);
if (!string.IsNullOrEmpty(culture.Name))
{
var parts = culture.Name.Split('-');
if (parts.Length > 0 && !string.IsNullOrWhiteSpace(parts[0]))
{
return parts[0].ToLowerInvariant();
}
}
}
catch (CultureNotFoundException)
{
// fall back to manual normalization
}
var primary = trimmed.Split(new[] { '-', '_' }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
return string.IsNullOrWhiteSpace(primary) ? string.Empty : primary.ToLowerInvariant();
}
}
/// <summary>
/// Represents a localized text candidate.
/// </summary>
public readonly record struct LocalizedText(string? Text, string? Language);
/// <summary>
/// Represents a normalized description result.
/// </summary>
public readonly record struct NormalizedDescription(string Text, string Language);