Rename Concelier Source modules to Connector
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
|
||||
|
||||
internal static class KasperskyAdvisoryParser
|
||||
{
|
||||
private static readonly Regex CveRegex = new("CVE-\\d{4}-\\d+", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex WhitespaceRegex = new("\\s+", RegexOptions.Compiled);
|
||||
|
||||
public static KasperskyAdvisoryDto Parse(
|
||||
string advisoryKey,
|
||||
string title,
|
||||
string link,
|
||||
DateTimeOffset published,
|
||||
string? summary,
|
||||
byte[] rawHtml)
|
||||
{
|
||||
var content = ExtractText(rawHtml);
|
||||
var cves = ExtractCves(title, summary, content);
|
||||
var vendors = ExtractVendors(title, summary, content);
|
||||
|
||||
return new KasperskyAdvisoryDto(
|
||||
advisoryKey,
|
||||
title,
|
||||
link,
|
||||
published,
|
||||
summary,
|
||||
content,
|
||||
cves,
|
||||
vendors);
|
||||
}
|
||||
|
||||
private static string ExtractText(byte[] rawHtml)
|
||||
{
|
||||
if (rawHtml.Length == 0)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var html = Encoding.UTF8.GetString(rawHtml);
|
||||
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", string.Empty, RegexOptions.IgnoreCase);
|
||||
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", string.Empty, RegexOptions.IgnoreCase);
|
||||
html = Regex.Replace(html, "<!--.*?-->", string.Empty, RegexOptions.Singleline);
|
||||
html = Regex.Replace(html, "<[^>]+>", " ");
|
||||
var decoded = System.Net.WebUtility.HtmlDecode(html);
|
||||
return string.IsNullOrWhiteSpace(decoded) ? string.Empty : WhitespaceRegex.Replace(decoded, " ").Trim();
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> ExtractCves(string title, string? summary, string content)
|
||||
{
|
||||
var set = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
void Capture(string? text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (Match match in CveRegex.Matches(text))
|
||||
{
|
||||
if (match.Success)
|
||||
{
|
||||
set.Add(match.Value.ToUpperInvariant());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Capture(title);
|
||||
Capture(summary);
|
||||
Capture(content);
|
||||
|
||||
return set.OrderBy(static cve => cve, StringComparer.Ordinal).ToImmutableArray();
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> ExtractVendors(string title, string? summary, string content)
|
||||
{
|
||||
var candidates = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
void AddCandidate(string? text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var segment in SplitSegments(text))
|
||||
{
|
||||
var cleaned = CleanVendorSegment(segment);
|
||||
if (!string.IsNullOrWhiteSpace(cleaned))
|
||||
{
|
||||
candidates.Add(cleaned);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AddCandidate(title);
|
||||
AddCandidate(summary);
|
||||
AddCandidate(content);
|
||||
|
||||
return candidates.Count == 0
|
||||
? ImmutableArray<string>.Empty
|
||||
: candidates
|
||||
.OrderBy(static vendor => vendor, StringComparer.Ordinal)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
|
||||
private static IEnumerable<string> SplitSegments(string text)
|
||||
{
|
||||
var separators = new[] { ".", "-", "–", "—", ":" };
|
||||
var queue = new Queue<string>();
|
||||
queue.Enqueue(text);
|
||||
|
||||
foreach (var separator in separators)
|
||||
{
|
||||
var count = queue.Count;
|
||||
for (var i = 0; i < count; i++)
|
||||
{
|
||||
var item = queue.Dequeue();
|
||||
var parts = item.Split(separator, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
|
||||
foreach (var part in parts)
|
||||
{
|
||||
queue.Enqueue(part);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
private static string? CleanVendorSegment(string value)
|
||||
{
|
||||
var trimmed = value.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var lowered = trimmed.ToLowerInvariant();
|
||||
if (lowered.Contains("cve-", StringComparison.Ordinal) || lowered.Contains("vulnerability", StringComparison.Ordinal))
|
||||
{
|
||||
trimmed = trimmed.Split(new[] { "vulnerability", "vulnerabilities" }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).FirstOrDefault() ?? trimmed;
|
||||
}
|
||||
|
||||
var providedMatch = Regex.Match(trimmed, "provided by\\s+(?<vendor>[A-Za-z0-9&.,' ]+)", RegexOptions.IgnoreCase);
|
||||
if (providedMatch.Success)
|
||||
{
|
||||
trimmed = providedMatch.Groups["vendor"].Value;
|
||||
}
|
||||
|
||||
var descriptorMatch = Regex.Match(trimmed, "^(?<vendor>[A-Z][A-Za-z0-9&.,' ]{1,80}?)(?:\\s+(?:controllers?|devices?|modules?|products?|gateways?|routers?|appliances?|systems?|solutions?|firmware))\\b", RegexOptions.IgnoreCase);
|
||||
if (descriptorMatch.Success)
|
||||
{
|
||||
trimmed = descriptorMatch.Groups["vendor"].Value;
|
||||
}
|
||||
|
||||
trimmed = trimmed.Replace("’", "'", StringComparison.Ordinal);
|
||||
trimmed = trimmed.Replace("\"", string.Empty, StringComparison.Ordinal);
|
||||
|
||||
if (trimmed.Length > 200)
|
||||
{
|
||||
trimmed = trimmed[..200];
|
||||
}
|
||||
|
||||
return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user