Files
git.stella-ops.org/src/StellaOps.Feedser.Source.Ru.Nkcki/Internal/RuNkckiJsonParser.cs
Vladimir Moushkov ea1106ce7c up
2025-10-15 10:03:56 +03:00

647 lines
23 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace StellaOps.Feedser.Source.Ru.Nkcki.Internal;
internal static class RuNkckiJsonParser
{
private static readonly Regex ComparatorRegex = new(
@"^(?<name>.+?)\s*(?<operator><=|>=|<|>|==|=)\s*(?<version>.+?)$",
RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex RangeRegex = new(
@"^(?<name>.+?)\s+(?<start>[\p{L}\p{N}\._-]+)\s*[-]\s*(?<end>[\p{L}\p{N}\._-]+)$",
RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex QualifierRegex = new(
@"^(?<name>.+?)\s+(?<version>[\p{L}\p{N}\._-]+)\s+(?<qualifier>(and\s+earlier|and\s+later|and\s+newer|до\s+и\s+включительно|и\s+ниже|и\s+выше|и\s+старше|и\s+позже))$",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
private static readonly Regex QualifierInlineRegex = new(
@"верс(ии|ия)\s+(?<version>[\p{L}\p{N}\._-]+)\s+(?<qualifier>и\s+ниже|и\s+выше|и\s+старше)",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
private static readonly Regex VersionWindowRegex = new(
@"верс(ии|ия)\s+(?<start>[\p{L}\p{N}\._-]+)\s+по\s+(?<end>[\p{L}\p{N}\._-]+)",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
private static readonly char[] SoftwareSplitDelimiters = { '\n', ';', '\u2022', '\u2023', '\r' };
private static readonly StringComparer OrdinalIgnoreCase = StringComparer.OrdinalIgnoreCase;
public static RuNkckiVulnerabilityDto Parse(JsonElement element)
{
var fstecId = element.TryGetProperty("vuln_id", out var vulnIdElement) && vulnIdElement.TryGetProperty("FSTEC", out var fstec)
? Normalize(fstec.GetString())
: null;
var mitreId = element.TryGetProperty("vuln_id", out vulnIdElement) && vulnIdElement.TryGetProperty("MITRE", out var mitre)
? Normalize(mitre.GetString())
: null;
var datePublished = ParseDate(element.TryGetProperty("date_published", out var published) ? published.GetString() : null);
var dateUpdated = ParseDate(element.TryGetProperty("date_updated", out var updated) ? updated.GetString() : null);
var cvssRating = Normalize(element.TryGetProperty("cvss_rating", out var rating) ? rating.GetString() : null);
bool? patchAvailable = element.TryGetProperty("patch_available", out var patch) ? patch.ValueKind switch
{
JsonValueKind.True => true,
JsonValueKind.False => false,
_ => null,
} : null;
var description = ReadJoinedString(element, "description");
var mitigation = ReadJoinedString(element, "mitigation");
var productCategories = ReadStringCollection(element, "product_category");
var impact = ReadJoinedString(element, "impact");
var method = ReadJoinedString(element, "method_of_exploitation");
bool? userInteraction = element.TryGetProperty("user_interaction", out var uiElement) ? uiElement.ValueKind switch
{
JsonValueKind.True => true,
JsonValueKind.False => false,
_ => null,
} : null;
var (softwareText, softwareHasCpe, softwareEntries) = ParseVulnerableSoftware(element);
RuNkckiCweDto? cweDto = null;
if (element.TryGetProperty("cwe", out var cweElement))
{
int? number = null;
if (cweElement.TryGetProperty("cwe_number", out var numberElement))
{
if (numberElement.ValueKind == JsonValueKind.Number && numberElement.TryGetInt32(out var parsed))
{
number = parsed;
}
else if (int.TryParse(numberElement.GetString(), NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsedInt))
{
number = parsedInt;
}
}
var cweDescription = ReadJoinedString(cweElement, "cwe_description") ?? Normalize(cweElement.GetString());
if (number.HasValue || !string.IsNullOrWhiteSpace(cweDescription))
{
cweDto = new RuNkckiCweDto(number, cweDescription);
}
}
double? cvssScore = element.TryGetProperty("cvss", out var cvssElement) && cvssElement.TryGetProperty("cvss_score", out var scoreElement)
? ParseDouble(scoreElement)
: null;
var cvssVector = element.TryGetProperty("cvss", out cvssElement) && cvssElement.TryGetProperty("cvss_vector", out var vectorElement)
? Normalize(vectorElement.GetString())
: null;
double? cvssScoreV4 = element.TryGetProperty("cvss", out cvssElement) && cvssElement.TryGetProperty("cvss_score_v4", out var scoreV4Element)
? ParseDouble(scoreV4Element)
: null;
var cvssVectorV4 = element.TryGetProperty("cvss", out cvssElement) && cvssElement.TryGetProperty("cvss_vector_v4", out var vectorV4Element)
? Normalize(vectorV4Element.GetString())
: null;
var urls = ReadUrls(element);
var tags = ReadStringCollection(element, "tags");
return new RuNkckiVulnerabilityDto(
fstecId,
mitreId,
datePublished,
dateUpdated,
cvssRating,
patchAvailable,
description,
cweDto,
productCategories,
mitigation,
softwareText,
softwareHasCpe,
softwareEntries,
cvssScore,
cvssVector,
cvssScoreV4,
cvssVectorV4,
impact,
method,
userInteraction,
urls,
tags);
}
private static ImmutableArray<string> ReadUrls(JsonElement element)
{
if (!element.TryGetProperty("urls", out var urlsElement))
{
return ImmutableArray<string>.Empty;
}
var collected = new List<string>();
CollectUrls(urlsElement, collected);
if (collected.Count == 0)
{
return ImmutableArray<string>.Empty;
}
return collected
.Select(Normalize)
.Where(static url => !string.IsNullOrWhiteSpace(url))
.Select(static url => url!)
.Distinct(OrdinalIgnoreCase)
.OrderBy(static url => url, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
}
private static void CollectUrls(JsonElement element, ICollection<string> results)
{
switch (element.ValueKind)
{
case JsonValueKind.String:
var value = element.GetString();
if (!string.IsNullOrWhiteSpace(value))
{
results.Add(value);
}
break;
case JsonValueKind.Array:
foreach (var child in element.EnumerateArray())
{
CollectUrls(child, results);
}
break;
case JsonValueKind.Object:
if (element.TryGetProperty("url", out var urlProperty))
{
CollectUrls(urlProperty, results);
}
if (element.TryGetProperty("href", out var hrefProperty))
{
CollectUrls(hrefProperty, results);
}
foreach (var property in element.EnumerateObject())
{
if (property.NameEquals("value") || property.NameEquals("link"))
{
CollectUrls(property.Value, results);
}
}
break;
}
}
private static string? ReadJoinedString(JsonElement element, string property)
{
if (!element.TryGetProperty(property, out var target))
{
return null;
}
var values = ReadStringCollection(target);
if (!values.IsDefaultOrEmpty)
{
return string.Join("; ", values);
}
return Normalize(target.ValueKind == JsonValueKind.String ? target.GetString() : target.ToString());
}
private static ImmutableArray<string> ReadStringCollection(JsonElement element, string property)
{
if (!element.TryGetProperty(property, out var target))
{
return ImmutableArray<string>.Empty;
}
return ReadStringCollection(target);
}
private static ImmutableArray<string> ReadStringCollection(JsonElement element)
{
var builder = ImmutableArray.CreateBuilder<string>();
CollectStrings(element, builder);
return Deduplicate(builder);
}
private static void CollectStrings(JsonElement element, ImmutableArray<string>.Builder builder)
{
switch (element.ValueKind)
{
case JsonValueKind.String:
AddIfPresent(builder, Normalize(element.GetString()));
break;
case JsonValueKind.Number:
AddIfPresent(builder, Normalize(element.ToString()));
break;
case JsonValueKind.True:
builder.Add("true");
break;
case JsonValueKind.False:
builder.Add("false");
break;
case JsonValueKind.Array:
foreach (var child in element.EnumerateArray())
{
CollectStrings(child, builder);
}
break;
case JsonValueKind.Object:
foreach (var property in element.EnumerateObject())
{
CollectStrings(property.Value, builder);
}
break;
}
}
private static ImmutableArray<string> Deduplicate(ImmutableArray<string>.Builder builder)
{
if (builder.Count == 0)
{
return ImmutableArray<string>.Empty;
}
return builder
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Distinct(OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
}
private static void AddIfPresent(ImmutableArray<string>.Builder builder, string? value)
{
if (!string.IsNullOrWhiteSpace(value))
{
builder.Add(value!);
}
}
private static (string? Text, bool? HasCpe, ImmutableArray<RuNkckiSoftwareEntry> Entries) ParseVulnerableSoftware(JsonElement element)
{
if (!element.TryGetProperty("vulnerable_software", out var softwareElement))
{
return (null, null, ImmutableArray<RuNkckiSoftwareEntry>.Empty);
}
string? softwareText = null;
if (softwareElement.TryGetProperty("software_text", out var textElement))
{
softwareText = Normalize(textElement.ValueKind == JsonValueKind.String ? textElement.GetString() : textElement.ToString());
}
bool? softwareHasCpe = null;
if (softwareElement.TryGetProperty("cpe", out var cpeElement))
{
softwareHasCpe = cpeElement.ValueKind switch
{
JsonValueKind.True => true,
JsonValueKind.False => false,
_ => softwareHasCpe,
};
}
var entries = new List<RuNkckiSoftwareEntry>();
if (softwareElement.TryGetProperty("software", out var softwareNodes))
{
entries.AddRange(ParseSoftwareEntries(softwareNodes));
}
if (entries.Count == 0 && !string.IsNullOrWhiteSpace(softwareText))
{
entries.AddRange(SplitSoftwareTextIntoEntries(softwareText));
}
if (entries.Count == 0)
{
foreach (var fallbackProperty in new[] { "items", "aliases", "software_lines" })
{
if (softwareElement.TryGetProperty(fallbackProperty, out var fallbackNodes))
{
entries.AddRange(ParseSoftwareEntries(fallbackNodes));
}
}
}
if (entries.Count == 0)
{
return (softwareText, softwareHasCpe, ImmutableArray<RuNkckiSoftwareEntry>.Empty);
}
var grouped = entries
.GroupBy(static entry => entry.Identifier, OrdinalIgnoreCase)
.Select(static group =>
{
var evidence = string.Join(
"; ",
group.Select(static entry => entry.Evidence)
.Where(static evidence => !string.IsNullOrWhiteSpace(evidence))
.Distinct(OrdinalIgnoreCase));
var ranges = group
.SelectMany(static entry => entry.RangeExpressions)
.Where(static range => !string.IsNullOrWhiteSpace(range))
.Distinct(OrdinalIgnoreCase)
.OrderBy(static range => range, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
return new RuNkckiSoftwareEntry(
group.Key,
string.IsNullOrWhiteSpace(evidence) ? group.Key : evidence,
ranges);
})
.OrderBy(static entry => entry.Identifier, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
return (softwareText, softwareHasCpe, grouped);
}
private static IEnumerable<RuNkckiSoftwareEntry> ParseSoftwareEntries(JsonElement element)
{
switch (element.ValueKind)
{
case JsonValueKind.Array:
foreach (var child in element.EnumerateArray())
{
foreach (var entry in ParseSoftwareEntries(child))
{
yield return entry;
}
}
break;
case JsonValueKind.Object:
yield return CreateEntryFromObject(element);
break;
case JsonValueKind.String:
foreach (var entry in SplitSoftwareTextIntoEntries(element.GetString() ?? string.Empty))
{
yield return entry;
}
break;
}
}
private static RuNkckiSoftwareEntry CreateEntryFromObject(JsonElement element)
{
var vendor = ReadFirstString(element, "vendor", "manufacturer", "organisation");
var name = ReadFirstString(element, "name", "product", "title");
var rawVersion = ReadFirstString(element, "version", "versions", "range");
var comment = ReadFirstString(element, "comment", "notes", "summary");
var identifierParts = new List<string>();
if (!string.IsNullOrWhiteSpace(vendor))
{
identifierParts.Add(vendor!);
}
if (!string.IsNullOrWhiteSpace(name))
{
identifierParts.Add(name!);
}
var identifier = identifierParts.Count > 0
? string.Join(" ", identifierParts)
: ReadFirstString(element, "identifier") ?? name ?? rawVersion ?? comment ?? "unknown";
var evidenceParts = new List<string>(identifierParts);
if (!string.IsNullOrWhiteSpace(rawVersion))
{
evidenceParts.Add(rawVersion!);
}
if (!string.IsNullOrWhiteSpace(comment))
{
evidenceParts.Add(comment!);
}
var evidence = string.Join(" ", evidenceParts.Where(static part => !string.IsNullOrWhiteSpace(part))).Trim();
var rangeHints = new List<string?>();
if (!string.IsNullOrWhiteSpace(rawVersion))
{
rangeHints.Add(rawVersion);
}
if (element.TryGetProperty("range", out var rangeElement))
{
rangeHints.Add(Normalize(rangeElement.ToString()));
}
return CreateSoftwareEntry(identifier!, evidence, rangeHints);
}
private static IEnumerable<RuNkckiSoftwareEntry> SplitSoftwareTextIntoEntries(string text)
{
if (string.IsNullOrWhiteSpace(text))
{
yield break;
}
var segments = text.Split(SoftwareSplitDelimiters, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (segments.Length == 0)
{
segments = new[] { text };
}
foreach (var segment in segments)
{
var normalized = Normalize(segment);
if (string.IsNullOrWhiteSpace(normalized))
{
continue;
}
var (identifier, hints) = ExtractIdentifierAndRangeHints(normalized!);
yield return CreateSoftwareEntry(identifier, normalized!, hints);
}
}
private static RuNkckiSoftwareEntry CreateSoftwareEntry(string identifier, string evidence, IEnumerable<string?> hints)
{
var normalizedIdentifier = Normalize(identifier) ?? "unknown";
var normalizedEvidence = Normalize(evidence) ?? normalizedIdentifier;
var ranges = hints
.Select(NormalizeRangeHint)
.Where(static hint => !string.IsNullOrWhiteSpace(hint))
.Select(static hint => hint!)
.Distinct(OrdinalIgnoreCase)
.OrderBy(static hint => hint, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
return new RuNkckiSoftwareEntry(normalizedIdentifier, normalizedEvidence!, ranges);
}
private static string? NormalizeRangeHint(string? hint)
{
if (string.IsNullOrWhiteSpace(hint))
{
return null;
}
var normalized = Normalize(hint)?
.Replace("≤", "<=", StringComparison.Ordinal)
.Replace("≥", ">=", StringComparison.Ordinal)
.Replace("=>", ">=", StringComparison.Ordinal)
.Replace("=<", "<=", StringComparison.Ordinal);
if (string.IsNullOrWhiteSpace(normalized))
{
return null;
}
return normalized;
}
private static (string Identifier, IReadOnlyList<string?> RangeHints) ExtractIdentifierAndRangeHints(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return ("unknown", Array.Empty<string>());
}
var comparatorMatch = ComparatorRegex.Match(value);
if (comparatorMatch.Success)
{
var name = Normalize(comparatorMatch.Groups["name"].Value);
var version = Normalize(comparatorMatch.Groups["version"].Value);
var op = comparatorMatch.Groups["operator"].Value;
return (string.IsNullOrWhiteSpace(name) ? value : name!, new[] { $"{op} {version}" });
}
var rangeMatch = RangeRegex.Match(value);
if (rangeMatch.Success)
{
var name = Normalize(rangeMatch.Groups["name"].Value);
var start = Normalize(rangeMatch.Groups["start"].Value);
var end = Normalize(rangeMatch.Groups["end"].Value);
return (string.IsNullOrWhiteSpace(name) ? value : name!, new[] { $">= {start}", $"<= {end}" });
}
var qualifierMatch = QualifierRegex.Match(value);
if (qualifierMatch.Success)
{
var name = Normalize(qualifierMatch.Groups["name"].Value);
var version = Normalize(qualifierMatch.Groups["version"].Value);
var qualifier = qualifierMatch.Groups["qualifier"].Value.ToLowerInvariant();
var hint = qualifier.Contains("ниж") || qualifier.Contains("earlier") || qualifier.Contains("включ")
? $"<= {version}"
: $">= {version}";
return (string.IsNullOrWhiteSpace(name) ? value : name!, new[] { hint });
}
var inlineQualifierMatch = QualifierInlineRegex.Match(value);
if (inlineQualifierMatch.Success)
{
var version = Normalize(inlineQualifierMatch.Groups["version"].Value);
var qualifier = inlineQualifierMatch.Groups["qualifier"].Value.ToLowerInvariant();
var hint = qualifier.Contains("ниж") ? $"<= {version}" : $">= {version}";
var name = Normalize(QualifierInlineRegex.Replace(value, string.Empty));
return (string.IsNullOrWhiteSpace(name) ? value : name!, new[] { hint });
}
var windowMatch = VersionWindowRegex.Match(value);
if (windowMatch.Success)
{
var start = Normalize(windowMatch.Groups["start"].Value);
var end = Normalize(windowMatch.Groups["end"].Value);
var name = Normalize(VersionWindowRegex.Replace(value, string.Empty));
return (string.IsNullOrWhiteSpace(name) ? value : name!, new[] { $">= {start}", $"<= {end}" });
}
return (value, Array.Empty<string>());
}
private static string? ReadFirstString(JsonElement element, params string[] names)
{
foreach (var name in names)
{
if (element.TryGetProperty(name, out var property))
{
switch (property.ValueKind)
{
case JsonValueKind.String:
{
var normalized = Normalize(property.GetString());
if (!string.IsNullOrWhiteSpace(normalized))
{
return normalized;
}
break;
}
case JsonValueKind.Number:
{
var normalized = Normalize(property.ToString());
if (!string.IsNullOrWhiteSpace(normalized))
{
return normalized;
}
break;
}
}
}
}
return null;
}
private static double? ParseDouble(JsonElement element)
{
if (element.ValueKind == JsonValueKind.Number && element.TryGetDouble(out var value))
{
return value;
}
if (element.ValueKind == JsonValueKind.String && double.TryParse(element.GetString(), NumberStyles.Any, CultureInfo.InvariantCulture, out var parsed))
{
return parsed;
}
return null;
}
private static DateTimeOffset? ParseDate(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
if (DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var parsed))
{
return parsed;
}
if (DateTimeOffset.TryParse(value, CultureInfo.GetCultureInfo("ru-RU"), DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var ruParsed))
{
return ruParsed;
}
return null;
}
private static string? Normalize(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
var normalized = value
.Replace('\r', ' ')
.Replace('\n', ' ')
.Trim();
while (normalized.Contains(" ", StringComparison.Ordinal))
{
normalized = normalized.Replace(" ", " ", StringComparison.Ordinal);
}
return normalized.Length == 0 ? null : normalized;
}
}