173 lines
5.7 KiB
C#
173 lines
5.7 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.Collections.Immutable;
|
||
using System.Linq;
|
||
using System.Text;
|
||
using System.Text.RegularExpressions;
|
||
using System.Threading.Tasks;
|
||
|
||
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
|
||
|
||
internal static class KasperskyAdvisoryParser
|
||
{
|
||
private static readonly Regex CveRegex = new("CVE-\\d{4}-\\d+", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||
private static readonly Regex WhitespaceRegex = new("\\s+", RegexOptions.Compiled);
|
||
|
||
public static KasperskyAdvisoryDto Parse(
|
||
string advisoryKey,
|
||
string title,
|
||
string link,
|
||
DateTimeOffset published,
|
||
string? summary,
|
||
byte[] rawHtml)
|
||
{
|
||
var content = ExtractText(rawHtml);
|
||
var cves = ExtractCves(title, summary, content);
|
||
var vendors = ExtractVendors(title, summary, content);
|
||
|
||
return new KasperskyAdvisoryDto(
|
||
advisoryKey,
|
||
title,
|
||
link,
|
||
published,
|
||
summary,
|
||
content,
|
||
cves,
|
||
vendors);
|
||
}
|
||
|
||
private static string ExtractText(byte[] rawHtml)
|
||
{
|
||
if (rawHtml.Length == 0)
|
||
{
|
||
return string.Empty;
|
||
}
|
||
|
||
var html = Encoding.UTF8.GetString(rawHtml);
|
||
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", string.Empty, RegexOptions.IgnoreCase);
|
||
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", string.Empty, RegexOptions.IgnoreCase);
|
||
html = Regex.Replace(html, "<!--.*?-->", string.Empty, RegexOptions.Singleline);
|
||
html = Regex.Replace(html, "<[^>]+>", " ");
|
||
var decoded = System.Net.WebUtility.HtmlDecode(html);
|
||
return string.IsNullOrWhiteSpace(decoded) ? string.Empty : WhitespaceRegex.Replace(decoded, " ").Trim();
|
||
}
|
||
|
||
private static ImmutableArray<string> ExtractCves(string title, string? summary, string content)
|
||
{
|
||
var set = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||
void Capture(string? text)
|
||
{
|
||
if (string.IsNullOrWhiteSpace(text))
|
||
{
|
||
return;
|
||
}
|
||
|
||
foreach (Match match in CveRegex.Matches(text))
|
||
{
|
||
if (match.Success)
|
||
{
|
||
set.Add(match.Value.ToUpperInvariant());
|
||
}
|
||
}
|
||
}
|
||
|
||
Capture(title);
|
||
Capture(summary);
|
||
Capture(content);
|
||
|
||
return set.OrderBy(static cve => cve, StringComparer.Ordinal).ToImmutableArray();
|
||
}
|
||
|
||
private static ImmutableArray<string> ExtractVendors(string title, string? summary, string content)
|
||
{
|
||
var candidates = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||
|
||
void AddCandidate(string? text)
|
||
{
|
||
if (string.IsNullOrWhiteSpace(text))
|
||
{
|
||
return;
|
||
}
|
||
|
||
foreach (var segment in SplitSegments(text))
|
||
{
|
||
var cleaned = CleanVendorSegment(segment);
|
||
if (!string.IsNullOrWhiteSpace(cleaned))
|
||
{
|
||
candidates.Add(cleaned);
|
||
}
|
||
}
|
||
}
|
||
|
||
AddCandidate(title);
|
||
AddCandidate(summary);
|
||
AddCandidate(content);
|
||
|
||
return candidates.Count == 0
|
||
? ImmutableArray<string>.Empty
|
||
: candidates
|
||
.OrderBy(static vendor => vendor, StringComparer.Ordinal)
|
||
.ToImmutableArray();
|
||
}
|
||
|
||
private static IEnumerable<string> SplitSegments(string text)
|
||
{
|
||
var separators = new[] { ".", "-", "–", "—", ":" };
|
||
var queue = new Queue<string>();
|
||
queue.Enqueue(text);
|
||
|
||
foreach (var separator in separators)
|
||
{
|
||
var count = queue.Count;
|
||
for (var i = 0; i < count; i++)
|
||
{
|
||
var item = queue.Dequeue();
|
||
var parts = item.Split(separator, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
|
||
foreach (var part in parts)
|
||
{
|
||
queue.Enqueue(part);
|
||
}
|
||
}
|
||
}
|
||
|
||
return queue;
|
||
}
|
||
|
||
private static string? CleanVendorSegment(string value)
|
||
{
|
||
var trimmed = value.Trim();
|
||
if (string.IsNullOrEmpty(trimmed))
|
||
{
|
||
return null;
|
||
}
|
||
|
||
var lowered = trimmed.ToLowerInvariant();
|
||
if (lowered.Contains("cve-", StringComparison.Ordinal) || lowered.Contains("vulnerability", StringComparison.Ordinal))
|
||
{
|
||
trimmed = trimmed.Split(new[] { "vulnerability", "vulnerabilities" }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).FirstOrDefault() ?? trimmed;
|
||
}
|
||
|
||
var providedMatch = Regex.Match(trimmed, "provided by\\s+(?<vendor>[A-Za-z0-9&.,' ]+)", RegexOptions.IgnoreCase);
|
||
if (providedMatch.Success)
|
||
{
|
||
trimmed = providedMatch.Groups["vendor"].Value;
|
||
}
|
||
|
||
var descriptorMatch = Regex.Match(trimmed, "^(?<vendor>[A-Z][A-Za-z0-9&.,' ]{1,80}?)(?:\\s+(?:controllers?|devices?|modules?|products?|gateways?|routers?|appliances?|systems?|solutions?|firmware))\\b", RegexOptions.IgnoreCase);
|
||
if (descriptorMatch.Success)
|
||
{
|
||
trimmed = descriptorMatch.Groups["vendor"].Value;
|
||
}
|
||
|
||
trimmed = trimmed.Replace("’", "'", StringComparison.Ordinal);
|
||
trimmed = trimmed.Replace("\"", string.Empty, StringComparison.Ordinal);
|
||
|
||
if (trimmed.Length > 200)
|
||
{
|
||
trimmed = trimmed[..200];
|
||
}
|
||
|
||
return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed;
|
||
}
|
||
}
|