Initial commit (history squashed)

This commit is contained in:
2025-10-07 10:14:21 +03:00
committed by Vladimir Moushkov
commit 6cbfd47ecd
621 changed files with 54480 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
using System.Reflection;
[assembly: AssemblyCompany("StellaOps")]
[assembly: AssemblyProduct("StellaOps.Feedser.Normalization")]
[assembly: AssemblyTitle("StellaOps.Feedser.Normalization")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
[assembly: AssemblyInformationalVersion("1.0.0")]

View File

@@ -0,0 +1,529 @@
using System.Collections.Immutable;
using System.Linq;
using StellaOps.Feedser.Models;
namespace StellaOps.Feedser.Normalization.Cvss;
/// <summary>
/// Provides helpers to canonicalize CVSS vectors and fill in derived score/severity information.
/// </summary>
public static class CvssMetricNormalizer
{
private static readonly string[] Cvss3BaseMetrics = { "AV", "AC", "PR", "UI", "S", "C", "I", "A" };
private static readonly string[] Cvss2BaseMetrics = { "AV", "AC", "AU", "C", "I", "A" };
public static bool TryNormalize(
string? version,
string? vector,
double? baseScore,
string? baseSeverity,
out CvssNormalizedMetric metric)
{
metric = default;
if (string.IsNullOrWhiteSpace(vector))
{
return false;
}
var rawVector = vector.Trim();
if (!TryDetermineVersion(version, rawVector, out var parsedVersion, out var vectorWithoutPrefix))
{
return false;
}
if (!TryParseMetrics(vectorWithoutPrefix, parsedVersion, out var canonicalVector, out var metrics))
{
return false;
}
if (!TryComputeBaseScore(parsedVersion, metrics, out var computedScore))
{
return false;
}
var normalizedScore = baseScore.HasValue
? Math.Round(baseScore.Value, 1, MidpointRounding.AwayFromZero)
: computedScore;
if (baseScore.HasValue && Math.Abs(normalizedScore - computedScore) > 0.2)
{
normalizedScore = computedScore;
}
var severity = NormalizeSeverity(baseSeverity, parsedVersion)
?? DetermineSeverity(normalizedScore, parsedVersion);
metric = new CvssNormalizedMetric(
ToVersionString(parsedVersion),
canonicalVector,
normalizedScore,
severity);
return true;
}
private static bool TryDetermineVersion(string? versionToken, string vector, out CvssVersion version, out string withoutPrefix)
{
if (TryExtractVersionFromVector(vector, out version, out withoutPrefix))
{
return true;
}
if (!string.IsNullOrWhiteSpace(versionToken) && TryMapVersion(versionToken!, out version))
{
withoutPrefix = StripPrefix(vector);
return true;
}
var upper = vector.ToUpperInvariant();
if (upper.Contains("PR:", StringComparison.Ordinal))
{
version = CvssVersion.V31;
withoutPrefix = StripPrefix(vector);
return true;
}
if (upper.Contains("AU:", StringComparison.Ordinal))
{
version = CvssVersion.V20;
withoutPrefix = StripPrefix(vector);
return true;
}
version = CvssVersion.V31;
withoutPrefix = StripPrefix(vector);
return true;
}
private static string StripPrefix(string vector)
{
if (!vector.StartsWith("CVSS:", StringComparison.OrdinalIgnoreCase))
{
return vector;
}
var remainder = vector[5..];
var slashIndex = remainder.IndexOf('/');
return slashIndex >= 0 && slashIndex < remainder.Length - 1
? remainder[(slashIndex + 1)..]
: string.Empty;
}
private static bool TryExtractVersionFromVector(string vector, out CvssVersion version, out string withoutPrefix)
{
withoutPrefix = vector;
if (!vector.StartsWith("CVSS:", StringComparison.OrdinalIgnoreCase))
{
version = default;
return false;
}
var remainder = vector[5..];
var slashIndex = remainder.IndexOf('/');
if (slashIndex <= 0 || slashIndex >= remainder.Length - 1)
{
version = CvssVersion.V31;
withoutPrefix = slashIndex > 0 && slashIndex < remainder.Length - 1
? remainder[(slashIndex + 1)..]
: string.Empty;
return false;
}
var versionToken = remainder[..slashIndex];
withoutPrefix = remainder[(slashIndex + 1)..];
if (TryMapVersion(versionToken, out version))
{
return true;
}
version = CvssVersion.V31;
return false;
}
private static bool TryMapVersion(string token, out CvssVersion version)
{
var trimmed = token.Trim();
if (trimmed.Length == 0)
{
version = default;
return false;
}
if (trimmed.StartsWith("v", StringComparison.OrdinalIgnoreCase))
{
trimmed = trimmed[1..];
}
trimmed = trimmed switch
{
"3" or "3.1.0" or "3.1" => "3.1",
"3.0" or "3.0.0" => "3.0",
"2" or "2.0.0" => "2.0",
_ => trimmed,
};
version = trimmed switch
{
"2" or "2.0" => CvssVersion.V20,
"3.0" => CvssVersion.V30,
"3.1" => CvssVersion.V31,
_ => CvssVersion.Unknown,
};
return version != CvssVersion.Unknown;
}
private static bool TryParseMetrics(
string vector,
CvssVersion version,
out string canonicalVector,
out ImmutableDictionary<string, string> metrics)
{
canonicalVector = string.Empty;
var parsed = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
var segments = vector.Split('/', StringSplitOptions.RemoveEmptyEntries);
if (segments.Length == 0)
{
metrics = ImmutableDictionary<string, string>.Empty;
return false;
}
foreach (var segment in segments)
{
var trimmed = segment.Trim();
if (trimmed.Length == 0)
{
continue;
}
var index = trimmed.IndexOf(':');
if (index <= 0 || index == trimmed.Length - 1)
{
metrics = ImmutableDictionary<string, string>.Empty;
return false;
}
var key = trimmed[..index].Trim().ToUpperInvariant();
var value = trimmed[(index + 1)..].Trim().ToUpperInvariant();
if (key.Length == 0 || value.Length == 0)
{
metrics = ImmutableDictionary<string, string>.Empty;
return false;
}
parsed[key] = value;
}
var required = version == CvssVersion.V20 ? Cvss2BaseMetrics : Cvss3BaseMetrics;
foreach (var metric in required)
{
if (!parsed.ContainsKey(metric))
{
metrics = ImmutableDictionary<string, string>.Empty;
return false;
}
}
var canonicalSegments = new List<string>(parsed.Count + 1);
foreach (var metric in required)
{
canonicalSegments.Add($"{metric}:{parsed[metric]}");
}
foreach (var entry in parsed.OrderBy(static pair => pair.Key, StringComparer.Ordinal))
{
if (required.Contains(entry.Key))
{
continue;
}
canonicalSegments.Add($"{entry.Key}:{entry.Value}");
}
canonicalVector = $"CVSS:{ToVersionString(version)}/{string.Join('/', canonicalSegments)}";
metrics = parsed.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase);
return true;
}
private static bool TryComputeBaseScore(CvssVersion version, IReadOnlyDictionary<string, string> metrics, out double score)
{
return version switch
{
CvssVersion.V20 => TryComputeCvss2(metrics, out score),
CvssVersion.V30 or CvssVersion.V31 => TryComputeCvss3(metrics, out score),
_ => (score = 0) == 0,
};
}
private static bool TryComputeCvss3(IReadOnlyDictionary<string, string> metrics, out double score)
{
try
{
var av = metrics["AV"] switch
{
"N" => 0.85,
"A" => 0.62,
"L" => 0.55,
"P" => 0.2,
_ => throw new InvalidOperationException(),
};
var ac = metrics["AC"] switch
{
"L" => 0.77,
"H" => 0.44,
_ => throw new InvalidOperationException(),
};
var scopeChanged = metrics["S"] switch
{
"U" => false,
"C" => true,
_ => throw new InvalidOperationException(),
};
var pr = metrics["PR"] switch
{
"N" => 0.85,
"L" => scopeChanged ? 0.68 : 0.62,
"H" => scopeChanged ? 0.5 : 0.27,
_ => throw new InvalidOperationException(),
};
var ui = metrics["UI"] switch
{
"N" => 0.85,
"R" => 0.62,
_ => throw new InvalidOperationException(),
};
var confidentiality = metrics["C"] switch
{
"N" => 0.0,
"L" => 0.22,
"H" => 0.56,
_ => throw new InvalidOperationException(),
};
var integrity = metrics["I"] switch
{
"N" => 0.0,
"L" => 0.22,
"H" => 0.56,
_ => throw new InvalidOperationException(),
};
var availability = metrics["A"] switch
{
"N" => 0.0,
"L" => 0.22,
"H" => 0.56,
_ => throw new InvalidOperationException(),
};
var impactSub = 1 - (1 - confidentiality) * (1 - integrity) * (1 - availability);
impactSub = Math.Clamp(impactSub, 0, 1);
var impact = scopeChanged
? 7.52 * (impactSub - 0.029) - 3.25 * Math.Pow(impactSub - 0.02, 15)
: 6.42 * impactSub;
var exploitability = 8.22 * av * ac * pr * ui;
if (impact <= 0)
{
score = 0;
return true;
}
var baseScore = scopeChanged
? Math.Min(1.08 * (impact + exploitability), 10)
: Math.Min(impact + exploitability, 10);
score = RoundUp(baseScore);
return true;
}
catch (KeyNotFoundException)
{
score = 0;
return false;
}
catch (InvalidOperationException)
{
score = 0;
return false;
}
}
private static bool TryComputeCvss2(IReadOnlyDictionary<string, string> metrics, out double score)
{
try
{
var av = metrics["AV"] switch
{
"L" => 0.395,
"A" => 0.646,
"N" => 1.0,
_ => throw new InvalidOperationException(),
};
var ac = metrics["AC"] switch
{
"H" => 0.35,
"M" => 0.61,
"L" => 0.71,
_ => throw new InvalidOperationException(),
};
var authValue = metrics.TryGetValue("AU", out var primaryAuth)
? primaryAuth
: metrics.TryGetValue("AUTH", out var fallbackAuth)
? fallbackAuth
: null;
if (string.IsNullOrEmpty(authValue))
{
throw new InvalidOperationException();
}
var authentication = authValue switch
{
"M" => 0.45,
"S" => 0.56,
"N" => 0.704,
_ => throw new InvalidOperationException(),
};
var confidentiality = metrics["C"] switch
{
"N" => 0.0,
"P" => 0.275,
"C" => 0.660,
_ => throw new InvalidOperationException(),
};
var integrity = metrics["I"] switch
{
"N" => 0.0,
"P" => 0.275,
"C" => 0.660,
_ => throw new InvalidOperationException(),
};
var availability = metrics["A"] switch
{
"N" => 0.0,
"P" => 0.275,
"C" => 0.660,
_ => throw new InvalidOperationException(),
};
var impact = 10.41 * (1 - (1 - confidentiality) * (1 - integrity) * (1 - availability));
var exploitability = 20 * av * ac * authentication;
var fImpact = impact == 0 ? 0.0 : 1.176;
var baseScore = ((0.6 * impact) + (0.4 * exploitability) - 1.5) * fImpact;
score = Math.Round(Math.Max(baseScore, 0), 1, MidpointRounding.AwayFromZero);
return true;
}
catch (KeyNotFoundException)
{
score = 0;
return false;
}
catch (InvalidOperationException)
{
score = 0;
return false;
}
}
private static string DetermineSeverity(double score, CvssVersion version)
{
if (score <= 0)
{
return "none";
}
if (version == CvssVersion.V20)
{
if (score < 4.0)
{
return "low";
}
if (score < 7.0)
{
return "medium";
}
return "high";
}
if (score < 4.0)
{
return "low";
}
if (score < 7.0)
{
return "medium";
}
if (score < 9.0)
{
return "high";
}
return "critical";
}
private static string? NormalizeSeverity(string? severity, CvssVersion version)
{
if (string.IsNullOrWhiteSpace(severity))
{
return null;
}
var normalized = severity.Trim().ToLowerInvariant();
return normalized switch
{
"none" or "informational" or "info" => "none",
"critical" when version != CvssVersion.V20 => "critical",
"critical" when version == CvssVersion.V20 => "high",
"high" => "high",
"medium" or "moderate" => "medium",
"low" => "low",
_ => null,
};
}
private static double RoundUp(double value)
{
return Math.Ceiling(value * 10.0) / 10.0;
}
private static string ToVersionString(CvssVersion version)
=> version switch
{
CvssVersion.V20 => "2.0",
CvssVersion.V30 => "3.0",
_ => "3.1",
};
private enum CvssVersion
{
Unknown = 0,
V20,
V30,
V31,
}
}
/// <summary>
/// Represents a normalized CVSS metric ready for canonical serialization.
/// </summary>
public readonly record struct CvssNormalizedMetric(string Version, string Vector, double BaseScore, string BaseSeverity)
{
public CvssMetric ToModel(AdvisoryProvenance provenance)
=> new(Version, Vector, BaseScore, BaseSeverity, provenance);
}

View File

@@ -0,0 +1,127 @@
using System.Globalization;
namespace StellaOps.Feedser.Normalization.Distro;
/// <summary>
/// Represents a Debian epoch:version-revision tuple and exposes parsing/formatting helpers.
/// </summary>
public sealed class DebianEvr
{
private DebianEvr(int epoch, bool hasExplicitEpoch, string version, string revision, string original)
{
Epoch = epoch;
HasExplicitEpoch = hasExplicitEpoch;
Version = version;
Revision = revision;
Original = original;
}
/// <summary>
/// Epoch segment (defaults to <c>0</c> when omitted).
/// </summary>
public int Epoch { get; }
/// <summary>
/// Indicates whether an epoch segment was present explicitly.
/// </summary>
public bool HasExplicitEpoch { get; }
/// <summary>
/// Version portion (without revision).
/// </summary>
public string Version { get; }
/// <summary>
/// Revision portion (after the last dash). Empty when omitted.
/// </summary>
public string Revision { get; }
/// <summary>
/// Trimmed EVR string supplied to <see cref="TryParse"/>.
/// </summary>
public string Original { get; }
/// <summary>
/// Attempts to parse the provided value into a <see cref="DebianEvr"/> instance.
/// </summary>
public static bool TryParse(string? value, out DebianEvr? result)
{
result = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var epoch = 0;
var hasExplicitEpoch = false;
var remainder = trimmed;
var colonIndex = remainder.IndexOf(':');
if (colonIndex >= 0)
{
if (colonIndex == 0)
{
return false;
}
var epochPart = remainder[..colonIndex];
if (!int.TryParse(epochPart, NumberStyles.Integer, CultureInfo.InvariantCulture, out epoch))
{
return false;
}
hasExplicitEpoch = true;
remainder = colonIndex < remainder.Length - 1 ? remainder[(colonIndex + 1)..] : string.Empty;
}
if (string.IsNullOrEmpty(remainder))
{
return false;
}
var version = remainder;
var revision = string.Empty;
var dashIndex = remainder.LastIndexOf('-');
if (dashIndex > 0)
{
version = remainder[..dashIndex];
revision = dashIndex < remainder.Length - 1 ? remainder[(dashIndex + 1)..] : string.Empty;
}
if (string.IsNullOrEmpty(version))
{
return false;
}
result = new DebianEvr(epoch, hasExplicitEpoch, version, revision, trimmed);
return true;
}
/// <summary>
/// Parses the provided value into a <see cref="DebianEvr"/> or throws <see cref="FormatException"/>.
/// </summary>
public static DebianEvr Parse(string value)
{
if (!TryParse(value, out var evr))
{
throw new FormatException($"Input '{value}' is not a valid Debian EVR string.");
}
return evr!;
}
/// <summary>
/// Returns a canonical EVR string with trimmed components and normalized epoch/revision placement.
/// </summary>
public string ToCanonicalString()
{
var epochSegment = HasExplicitEpoch || Epoch > 0 ? $"{Epoch}:" : string.Empty;
var revisionSegment = string.IsNullOrEmpty(Revision) ? string.Empty : $"-{Revision}";
return $"{epochSegment}{Version}{revisionSegment}";
}
/// <inheritdoc />
public override string ToString() => Original;
}

View File

@@ -0,0 +1,192 @@
using System.Globalization;
namespace StellaOps.Feedser.Normalization.Distro;
/// <summary>
/// Represents a parsed NEVRA (Name-Epoch:Version-Release.Architecture) identifier and exposes helpers for canonical formatting.
/// </summary>
public sealed class Nevra
{
private Nevra(string name, int epoch, bool hasExplicitEpoch, string version, string release, string? architecture, string original)
{
Name = name;
Epoch = epoch;
HasExplicitEpoch = hasExplicitEpoch;
Version = version;
Release = release;
Architecture = architecture;
Original = original;
}
/// <summary>
/// Package name segment.
/// </summary>
public string Name { get; }
/// <summary>
/// Epoch extracted from the NEVRA string (defaults to <c>0</c> when omitted).
/// </summary>
public int Epoch { get; }
/// <summary>
/// Indicates whether an epoch segment was present explicitly (e.g. <c>0:</c>).
/// </summary>
public bool HasExplicitEpoch { get; }
/// <summary>
/// Version component (without epoch or release).
/// </summary>
public string Version { get; }
/// <summary>
/// Release component (without architecture suffix).
/// </summary>
public string Release { get; }
/// <summary>
/// Optional architecture suffix (e.g. <c>x86_64</c>, <c>noarch</c>).
/// </summary>
public string? Architecture { get; }
/// <summary>
/// Trimmed NEVRA string supplied to <see cref="TryParse"/>.
/// </summary>
public string Original { get; }
private static readonly ISet<string> KnownArchitectures = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"noarch",
"src",
"nosrc",
"x86_64",
"aarch64",
"armv7hl",
"armhfp",
"ppc64",
"ppc64le",
"ppc",
"s390",
"s390x",
"i386",
"i486",
"i586",
"i686",
"amd64",
"arm64",
"armv7l",
"armv6l",
"armv8l",
"armel",
"armhf",
"ia32e",
"loongarch64",
"mips",
"mips64",
"mips64le",
"mipsel",
"ppc32",
"ppc64p7",
"riscv64",
"sparc",
"sparc64"
};
/// <summary>
/// Attempts to parse the provided value into a <see cref="Nevra"/> instance.
/// </summary>
public static bool TryParse(string? value, out Nevra? result)
{
result = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var releaseSeparator = trimmed.LastIndexOf('-');
if (releaseSeparator <= 0 || releaseSeparator >= trimmed.Length - 1)
{
return false;
}
var releasePart = trimmed[(releaseSeparator + 1)..];
var nameVersionPart = trimmed[..releaseSeparator];
var versionSeparator = nameVersionPart.LastIndexOf('-');
if (versionSeparator <= 0 || versionSeparator >= nameVersionPart.Length)
{
return false;
}
var versionPart = nameVersionPart[(versionSeparator + 1)..];
var namePart = nameVersionPart[..versionSeparator];
if (string.IsNullOrWhiteSpace(namePart))
{
return false;
}
string? architecture = null;
var release = releasePart;
var architectureSeparator = releasePart.LastIndexOf('.');
if (architectureSeparator > 0 && architectureSeparator < releasePart.Length - 1)
{
var possibleArch = releasePart[(architectureSeparator + 1)..];
if (KnownArchitectures.Contains(possibleArch))
{
architecture = possibleArch;
release = releasePart[..architectureSeparator];
}
}
var version = versionPart;
var epoch = 0;
var hasExplicitEpoch = false;
var epochSeparator = versionPart.IndexOf(':');
if (epochSeparator >= 0)
{
hasExplicitEpoch = true;
var epochPart = versionPart[..epochSeparator];
version = epochSeparator < versionPart.Length - 1 ? versionPart[(epochSeparator + 1)..] : string.Empty;
if (epochPart.Length > 0 && !int.TryParse(epochPart, NumberStyles.Integer, CultureInfo.InvariantCulture, out epoch))
{
return false;
}
}
if (string.IsNullOrWhiteSpace(version))
{
return false;
}
result = new Nevra(namePart, epoch, hasExplicitEpoch, version, release, architecture, trimmed);
return true;
}
/// <summary>
/// Parses the provided value into a <see cref="Nevra"/> or throws <see cref="FormatException"/>.
/// </summary>
public static Nevra Parse(string value)
{
if (!TryParse(value, out var nevra))
{
throw new FormatException($"Input '{value}' is not a valid NEVRA string.");
}
return nevra!;
}
/// <summary>
/// Returns a canonical NEVRA string with trimmed components and normalized epoch/architecture placement.
/// </summary>
public string ToCanonicalString()
{
var epochSegment = HasExplicitEpoch || Epoch > 0 ? $"{Epoch}:" : string.Empty;
var archSegment = string.IsNullOrWhiteSpace(Architecture) ? string.Empty : $".{Architecture}";
return $"{Name}-{epochSegment}{Version}-{Release}{archSegment}";
}
/// <inheritdoc />
public override string ToString() => Original;
}

View File

@@ -0,0 +1,352 @@
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace StellaOps.Feedser.Normalization.Identifiers;
/// <summary>
/// Implements canonical normalization for CPE 2.3 identifiers (and URI binding conversion).
/// </summary>
internal static class Cpe23
{
private static readonly HashSet<char> CharactersRequiringEscape = new(new[]
{
'\\', ':', '/', '?', '#', '[', ']', '@', '!', '$', '&', '"', '\'', '(', ')', '+', ',', ';', '=', '%', '*',
'<', '>', '|', '^', '`', '{', '}', '~'
});
public static bool TryNormalize(string? value, out string? normalized)
{
normalized = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
var components = SplitComponents(trimmed);
if (components.Count == 0)
{
return false;
}
if (!components[0].Equals("cpe", StringComparison.OrdinalIgnoreCase))
{
return false;
}
if (components.Count >= 2 && components[1].Equals("2.3", StringComparison.OrdinalIgnoreCase))
{
return TryNormalizeFrom23(components, out normalized);
}
if (components.Count >= 2 && components[1].Length > 0 && components[1][0] == '/')
{
return TryNormalizeFrom22(components, out normalized);
}
return false;
}
private static bool TryNormalizeFrom23(IReadOnlyList<string> components, out string? normalized)
{
normalized = null;
if (components.Count != 13)
{
return false;
}
var part = NormalizePart(components[2]);
if (part is null)
{
return false;
}
var normalizedComponents = new string[13];
normalizedComponents[0] = "cpe";
normalizedComponents[1] = "2.3";
normalizedComponents[2] = part;
normalizedComponents[3] = NormalizeField(components[3], lower: true, decodeUri: false);
normalizedComponents[4] = NormalizeField(components[4], lower: true, decodeUri: false);
normalizedComponents[5] = NormalizeField(components[5], lower: false, decodeUri: false);
normalizedComponents[6] = NormalizeField(components[6], lower: false, decodeUri: false);
normalizedComponents[7] = NormalizeField(components[7], lower: false, decodeUri: false);
normalizedComponents[8] = NormalizeField(components[8], lower: false, decodeUri: false);
normalizedComponents[9] = NormalizeField(components[9], lower: false, decodeUri: false);
normalizedComponents[10] = NormalizeField(components[10], lower: false, decodeUri: false);
normalizedComponents[11] = NormalizeField(components[11], lower: false, decodeUri: false);
normalizedComponents[12] = NormalizeField(components[12], lower: false, decodeUri: false);
normalized = string.Join(':', normalizedComponents);
return true;
}
private static bool TryNormalizeFrom22(IReadOnlyList<string> components, out string? normalized)
{
normalized = null;
if (components.Count < 2)
{
return false;
}
var partComponent = components[1];
if (partComponent.Length < 2 || partComponent[0] != '/')
{
return false;
}
var part = NormalizePart(partComponent[1..]);
if (part is null)
{
return false;
}
var vendor = NormalizeField(components.Count > 2 ? components[2] : null, lower: true, decodeUri: true);
var product = NormalizeField(components.Count > 3 ? components[3] : null, lower: true, decodeUri: true);
var version = NormalizeField(components.Count > 4 ? components[4] : null, lower: false, decodeUri: true);
var update = NormalizeField(components.Count > 5 ? components[5] : null, lower: false, decodeUri: true);
var (edition, swEdition, targetSw, targetHw, other) = ExpandEdition(components.Count > 6 ? components[6] : null);
var language = NormalizeField(components.Count > 7 ? components[7] : null, lower: true, decodeUri: true);
normalized = string.Join(':', new[]
{
"cpe",
"2.3",
part,
vendor,
product,
version,
update,
edition,
language,
swEdition,
targetSw,
targetHw,
other,
});
return true;
}
private static string? NormalizePart(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var token = value.Trim().ToLowerInvariant();
return token is "a" or "o" or "h" ? token : null;
}
private static string NormalizeField(string? value, bool lower, bool decodeUri)
{
if (string.IsNullOrWhiteSpace(value))
{
return "*";
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return trimmed;
}
var decoded = decodeUri ? DecodeUriComponent(trimmed) : UnescapeComponent(trimmed);
if (decoded is "*" or "-")
{
return decoded;
}
if (decoded.Length == 0)
{
return "*";
}
var normalized = lower ? decoded.ToLowerInvariant() : decoded;
return EscapeComponent(normalized);
}
private static (string Edition, string SwEdition, string TargetSw, string TargetHw, string Other) ExpandEdition(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return ("*", "*", "*", "*", "*");
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return (trimmed, "*", "*", "*", "*");
}
var decoded = DecodeUriComponent(trimmed);
if (!decoded.StartsWith("~", StringComparison.Ordinal))
{
return (NormalizeDecodedField(decoded, lower: false), "*", "*", "*", "*");
}
var segments = decoded.Split('~');
var swEdition = segments.Length > 1 ? NormalizeDecodedField(segments[1], lower: false) : "*";
var targetSw = segments.Length > 2 ? NormalizeDecodedField(segments[2], lower: false) : "*";
var targetHw = segments.Length > 3 ? NormalizeDecodedField(segments[3], lower: false) : "*";
var other = segments.Length > 4 ? NormalizeDecodedField(segments[4], lower: false) : "*";
return ("*", swEdition, targetSw, targetHw, other);
}
private static string NormalizeDecodedField(string? value, bool lower)
{
if (string.IsNullOrWhiteSpace(value))
{
return "*";
}
var trimmed = value.Trim();
if (trimmed is "*" or "-")
{
return trimmed;
}
var normalized = lower ? trimmed.ToLowerInvariant() : trimmed;
if (normalized is "*" or "-")
{
return normalized;
}
return EscapeComponent(normalized);
}
private static string UnescapeComponent(string value)
{
var builder = new StringBuilder(value.Length);
var escape = false;
foreach (var ch in value)
{
if (escape)
{
builder.Append(ch);
escape = false;
continue;
}
if (ch == '\\')
{
escape = true;
continue;
}
builder.Append(ch);
}
if (escape)
{
builder.Append('\\');
}
return builder.ToString();
}
private static string EscapeComponent(string value)
{
if (value.Length == 0)
{
return value;
}
var builder = new StringBuilder(value.Length * 2);
foreach (var ch in value)
{
if (RequiresEscape(ch))
{
builder.Append('\\');
}
builder.Append(ch);
}
return builder.ToString();
}
private static bool RequiresEscape(char ch)
{
if (char.IsLetterOrDigit(ch))
{
return false;
}
if (char.IsWhiteSpace(ch))
{
return true;
}
return ch switch
{
'_' or '-' or '.' => false,
// Keep wildcard markers literal only when entire component is wildcard handled earlier.
'*' => true,
_ => CharactersRequiringEscape.Contains(ch)
};
}
private static string DecodeUriComponent(string value)
{
var builder = new StringBuilder(value.Length);
for (var i = 0; i < value.Length; i++)
{
var ch = value[i];
if (ch == '%' && i + 2 < value.Length && IsHex(value[i + 1]) && IsHex(value[i + 2]))
{
var hex = new string(new[] { value[i + 1], value[i + 2] });
var decoded = (char)int.Parse(hex, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
builder.Append(decoded);
i += 2;
}
else
{
builder.Append(ch);
}
}
return builder.ToString();
}
private static bool IsHex(char ch)
=> ch is >= '0' and <= '9' or >= 'A' and <= 'F' or >= 'a' and <= 'f';
private static List<string> SplitComponents(string value)
{
var results = new List<string>();
var builder = new StringBuilder();
var escape = false;
foreach (var ch in value)
{
if (escape)
{
builder.Append(ch);
escape = false;
continue;
}
if (ch == '\\')
{
builder.Append(ch);
escape = true;
continue;
}
if (ch == ':')
{
results.Add(builder.ToString());
builder.Clear();
continue;
}
builder.Append(ch);
}
results.Add(builder.ToString());
return results;
}
}

View File

@@ -0,0 +1,32 @@
namespace StellaOps.Feedser.Normalization.Identifiers;
/// <summary>
/// Provides canonical normalization helpers for package identifiers.
/// </summary>
public static class IdentifierNormalizer
{
public static bool TryNormalizePackageUrl(string? value, out string? normalized, out PackageUrl? packageUrl)
{
normalized = null;
packageUrl = null;
if (!PackageUrl.TryParse(value, out var parsed))
{
return false;
}
var canonical = parsed!.ToCanonicalString();
normalized = canonical;
packageUrl = parsed;
return true;
}
public static bool TryNormalizePackageUrl(string? value, out string? normalized)
{
return TryNormalizePackageUrl(value, out normalized, out _);
}
public static bool TryNormalizeCpe(string? value, out string? normalized)
{
return Cpe23.TryNormalize(value, out normalized);
}
}

View File

@@ -0,0 +1,299 @@
using System.Collections.Immutable;
using System.Linq;
using System.Text;
namespace StellaOps.Feedser.Normalization.Identifiers;
/// <summary>
/// Represents a parsed Package URL (purl) identifier with canonical string rendering.
/// </summary>
public sealed class PackageUrl
{
private PackageUrl(
string type,
ImmutableArray<string> namespaceSegments,
string name,
string? version,
ImmutableArray<KeyValuePair<string, string>> qualifiers,
ImmutableArray<string> subpathSegments,
string original)
{
Type = type;
NamespaceSegments = namespaceSegments;
Name = name;
Version = version;
Qualifiers = qualifiers;
SubpathSegments = subpathSegments;
Original = original;
}
public string Type { get; }
public ImmutableArray<string> NamespaceSegments { get; }
public string Name { get; }
public string? Version { get; }
public ImmutableArray<KeyValuePair<string, string>> Qualifiers { get; }
public ImmutableArray<string> SubpathSegments { get; }
public string Original { get; }
private static readonly HashSet<string> LowerCaseNamespaceTypes = new(StringComparer.OrdinalIgnoreCase)
{
"maven",
"npm",
"pypi",
"nuget",
"composer",
"gem",
"apk",
"deb",
"rpm",
"oci",
};
private static readonly HashSet<string> LowerCaseNameTypes = new(StringComparer.OrdinalIgnoreCase)
{
"npm",
"pypi",
"nuget",
"composer",
"gem",
"apk",
"deb",
"rpm",
"oci",
};
public static bool TryParse(string? value, out PackageUrl? packageUrl)
{
packageUrl = null;
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
var trimmed = value.Trim();
if (!trimmed.StartsWith("pkg:", StringComparison.OrdinalIgnoreCase))
{
return false;
}
var remainder = trimmed[4..];
var firstSlash = remainder.IndexOf('/');
if (firstSlash <= 0)
{
return false;
}
var type = remainder[..firstSlash].Trim().ToLowerInvariant();
remainder = remainder[(firstSlash + 1)..];
var subpathPart = string.Empty;
var subpathIndex = remainder.IndexOf('#');
if (subpathIndex >= 0)
{
subpathPart = remainder[(subpathIndex + 1)..];
remainder = remainder[..subpathIndex];
}
var qualifierPart = string.Empty;
var qualifierIndex = remainder.IndexOf('?');
if (qualifierIndex >= 0)
{
qualifierPart = remainder[(qualifierIndex + 1)..];
remainder = remainder[..qualifierIndex];
}
string? version = null;
var versionIndex = remainder.LastIndexOf('@');
if (versionIndex >= 0)
{
version = remainder[(versionIndex + 1)..];
remainder = remainder[..versionIndex];
}
if (string.IsNullOrWhiteSpace(remainder))
{
return false;
}
var rawSegments = remainder.Split('/', StringSplitOptions.RemoveEmptyEntries);
if (rawSegments.Length == 0)
{
return false;
}
var shouldLowerNamespace = LowerCaseNamespaceTypes.Contains(type);
var shouldLowerName = LowerCaseNameTypes.Contains(type);
var namespaceBuilder = ImmutableArray.CreateBuilder<string>(Math.Max(0, rawSegments.Length - 1));
for (var i = 0; i < rawSegments.Length - 1; i++)
{
var segment = Uri.UnescapeDataString(rawSegments[i].Trim());
if (segment.Length == 0)
{
continue;
}
if (shouldLowerNamespace)
{
segment = segment.ToLowerInvariant();
}
namespaceBuilder.Add(EscapePathSegment(segment));
}
var nameSegment = Uri.UnescapeDataString(rawSegments[^1].Trim());
if (nameSegment.Length == 0)
{
return false;
}
if (shouldLowerName)
{
nameSegment = nameSegment.ToLowerInvariant();
}
var canonicalName = EscapePathSegment(nameSegment);
var canonicalVersion = NormalizeComponent(version, escape: true, lowerCase: false);
var qualifiers = ParseQualifiers(qualifierPart);
var subpath = ParseSubpath(subpathPart);
packageUrl = new PackageUrl(
type,
namespaceBuilder.ToImmutable(),
canonicalName,
canonicalVersion,
qualifiers,
subpath,
trimmed);
return true;
}
public static PackageUrl Parse(string value)
{
if (!TryParse(value, out var parsed))
{
throw new FormatException($"Input '{value}' is not a valid Package URL.");
}
return parsed!;
}
public string ToCanonicalString()
{
var builder = new StringBuilder("pkg:");
builder.Append(Type);
builder.Append('/');
if (!NamespaceSegments.IsDefaultOrEmpty)
{
builder.Append(string.Join('/', NamespaceSegments));
builder.Append('/');
}
builder.Append(Name);
if (!string.IsNullOrEmpty(Version))
{
builder.Append('@');
builder.Append(Version);
}
if (!Qualifiers.IsDefaultOrEmpty && Qualifiers.Length > 0)
{
builder.Append('?');
builder.Append(string.Join('&', Qualifiers.Select(static kvp => $"{kvp.Key}={kvp.Value}")));
}
if (!SubpathSegments.IsDefaultOrEmpty && SubpathSegments.Length > 0)
{
builder.Append('#');
builder.Append(string.Join('/', SubpathSegments));
}
return builder.ToString();
}
public override string ToString() => ToCanonicalString();
private static ImmutableArray<KeyValuePair<string, string>> ParseQualifiers(string qualifierPart)
{
if (string.IsNullOrEmpty(qualifierPart))
{
return ImmutableArray<KeyValuePair<string, string>>.Empty;
}
var entries = qualifierPart.Split('&', StringSplitOptions.RemoveEmptyEntries);
var map = new SortedDictionary<string, string>(StringComparer.Ordinal);
foreach (var entry in entries)
{
var trimmed = entry.Trim();
if (trimmed.Length == 0)
{
continue;
}
var equalsIndex = trimmed.IndexOf('=');
if (equalsIndex <= 0)
{
continue;
}
var key = Uri.UnescapeDataString(trimmed[..equalsIndex]).Trim().ToLowerInvariant();
var valuePart = equalsIndex < trimmed.Length - 1 ? trimmed[(equalsIndex + 1)..] : string.Empty;
var value = NormalizeComponent(valuePart, escape: true, lowerCase: false);
map[key] = value;
}
return map.Select(static kvp => new KeyValuePair<string, string>(kvp.Key, kvp.Value)).ToImmutableArray();
}
private static ImmutableArray<string> ParseSubpath(string subpathPart)
{
if (string.IsNullOrEmpty(subpathPart))
{
return ImmutableArray<string>.Empty;
}
var segments = subpathPart.Split('/', StringSplitOptions.RemoveEmptyEntries);
var builder = ImmutableArray.CreateBuilder<string>(segments.Length);
foreach (var raw in segments)
{
var segment = Uri.UnescapeDataString(raw.Trim());
if (segment.Length == 0)
{
continue;
}
builder.Add(EscapePathSegment(segment));
}
return builder.ToImmutable();
}
private static string NormalizeComponent(string? value, bool escape, bool lowerCase)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
var unescaped = Uri.UnescapeDataString(value.Trim());
if (lowerCase)
{
unescaped = unescaped.ToLowerInvariant();
}
return escape ? Uri.EscapeDataString(unescaped) : unescaped;
}
private static string EscapePathSegment(string value)
{
return Uri.EscapeDataString(value);
}
}

View File

@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
</PropertyGroup>
<ItemGroup>
<Compile Remove="Tests\**\*.cs" />
<None Remove="Tests\**\*" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.Feedser.Models/StellaOps.Feedser.Models.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,8 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|Canonical NEVRA/EVR parsing helpers|BE-Norm (Distro WG)|Models|DONE `Normalization.Distro` exposes parsers + canonical formatters consumed by Merge comparers/tests.|
|PURL/CPE identifier normalization|BE-Norm (OSS WG)|Models|DONE canonical PURL/CPE helpers feed connectors and exporter tooling.|
|CPE normalization escape handling|BE-Norm (OSS WG)|Normalization identifiers|DONE percent-decoding, edition sub-field expansion, and deterministic escaping landed in `Cpe23` with new tests covering boundary cases.|
|CVSS metric normalization & severity bands|BE-Norm (Risk WG)|Models|DONE `CvssMetricNormalizer` unifies vectors, recomputes scores/severities, and is wired through NVD/RedHat/JVN mappers with unit coverage.|
|Description and locale normalization pipeline|BE-Norm (I18N)|Source connectors|DONE `DescriptionNormalizer` strips markup, collapses whitespace, and provides locale fallback used by core mappers.|

View File

@@ -0,0 +1,118 @@
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
namespace StellaOps.Feedser.Normalization.Text;
/// <summary>
/// Normalizes advisory descriptions by stripping markup, collapsing whitespace, and selecting the best locale fallback.
/// </summary>
public static class DescriptionNormalizer
{
private static readonly Regex HtmlTagRegex = new("<[^>]+>", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex WhitespaceRegex = new("\\s+", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly string[] PreferredLanguages = { "en", "en-us", "en-gb" };
public static NormalizedDescription Normalize(IEnumerable<LocalizedText> candidates)
{
if (candidates is null)
{
throw new ArgumentNullException(nameof(candidates));
}
var processed = new List<(string Text, string Language, int Index)>();
var index = 0;
foreach (var candidate in candidates)
{
if (string.IsNullOrWhiteSpace(candidate.Text))
{
index++;
continue;
}
var sanitized = Sanitize(candidate.Text);
if (string.IsNullOrWhiteSpace(sanitized))
{
index++;
continue;
}
var language = NormalizeLanguage(candidate.Language);
processed.Add((sanitized, language, index));
index++;
}
if (processed.Count == 0)
{
return new NormalizedDescription(string.Empty, "en");
}
var best = SelectBest(processed);
var languageTag = best.Language.Length > 0 ? best.Language : "en";
return new NormalizedDescription(best.Text, languageTag);
}
private static (string Text, string Language) SelectBest(List<(string Text, string Language, int Index)> processed)
{
foreach (var preferred in PreferredLanguages)
{
var normalized = NormalizeLanguage(preferred);
var match = processed.FirstOrDefault(entry => entry.Language.Equals(normalized, StringComparison.OrdinalIgnoreCase));
if (!string.IsNullOrEmpty(match.Text))
{
return (match.Text, normalized);
}
}
var first = processed.OrderBy(entry => entry.Index).First();
return (first.Text, first.Language);
}
private static string Sanitize(string text)
{
var decoded = WebUtility.HtmlDecode(text) ?? string.Empty;
var withoutTags = HtmlTagRegex.Replace(decoded, " ");
var collapsed = WhitespaceRegex.Replace(withoutTags, " ").Trim();
return collapsed;
}
private static string NormalizeLanguage(string? language)
{
if (string.IsNullOrWhiteSpace(language))
{
return string.Empty;
}
var trimmed = language.Trim();
try
{
var culture = CultureInfo.GetCultureInfo(trimmed);
if (!string.IsNullOrEmpty(culture.Name))
{
var parts = culture.Name.Split('-');
if (parts.Length > 0 && !string.IsNullOrWhiteSpace(parts[0]))
{
return parts[0].ToLowerInvariant();
}
}
}
catch (CultureNotFoundException)
{
// fall back to manual normalization
}
var primary = trimmed.Split(new[] { '-', '_' }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
return string.IsNullOrWhiteSpace(primary) ? string.Empty : primary.ToLowerInvariant();
}
}
/// <summary>
/// Represents a localized text candidate.
/// </summary>
public readonly record struct LocalizedText(string? Text, string? Language);
/// <summary>
/// Represents a normalized description result.
/// </summary>
public readonly record struct NormalizedDescription(string Text, string Language);