UP
This commit is contained in:
@@ -0,0 +1,326 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using AngleSharp.Html.Dom;
|
||||
using AngleSharp.Html.Parser;
|
||||
|
||||
namespace StellaOps.Feedser.Source.Distro.Debian.Internal;
|
||||
|
||||
internal static class DebianHtmlParser
|
||||
{
|
||||
public static DebianAdvisoryDto Parse(string html, DebianDetailMetadata metadata)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(html);
|
||||
ArgumentNullException.ThrowIfNull(metadata);
|
||||
|
||||
var parser = new HtmlParser();
|
||||
var document = parser.ParseDocument(html);
|
||||
|
||||
var description = ExtractDescription(document) ?? metadata.Title;
|
||||
var references = ExtractReferences(document, metadata);
|
||||
var packages = ExtractPackages(document, metadata.SourcePackage, metadata.Published);
|
||||
|
||||
return new DebianAdvisoryDto(
|
||||
metadata.AdvisoryId,
|
||||
metadata.SourcePackage,
|
||||
metadata.Title,
|
||||
description,
|
||||
metadata.CveIds,
|
||||
packages,
|
||||
references);
|
||||
}
|
||||
|
||||
private static string? ExtractDescription(IHtmlDocument document)
|
||||
{
|
||||
foreach (var table in document.QuerySelectorAll("table"))
|
||||
{
|
||||
if (table is not IHtmlTableElement tableElement)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var row in tableElement.Rows)
|
||||
{
|
||||
if (row.Cells.Length < 2)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var header = row.Cells[0].TextContent?.Trim();
|
||||
if (string.Equals(header, "Description", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return NormalizeWhitespace(row.Cells[1].TextContent);
|
||||
}
|
||||
}
|
||||
|
||||
// Only the first table contains the metadata rows we need.
|
||||
break;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<DebianReferenceDto> ExtractReferences(IHtmlDocument document, DebianDetailMetadata metadata)
|
||||
{
|
||||
var references = new List<DebianReferenceDto>();
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Add canonical Debian advisory page.
|
||||
var canonical = new Uri($"https://www.debian.org/security/{metadata.AdvisoryId.ToLowerInvariant()}");
|
||||
references.Add(new DebianReferenceDto(canonical.ToString(), "advisory", metadata.Title));
|
||||
seen.Add(canonical.ToString());
|
||||
|
||||
foreach (var link in document.QuerySelectorAll("a"))
|
||||
{
|
||||
var href = link.GetAttribute("href");
|
||||
if (string.IsNullOrWhiteSpace(href))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
string resolved;
|
||||
if (Uri.TryCreate(href, UriKind.Absolute, out var absolute))
|
||||
{
|
||||
resolved = absolute.ToString();
|
||||
}
|
||||
else if (Uri.TryCreate(metadata.DetailUri, href, out var relative))
|
||||
{
|
||||
resolved = relative.ToString();
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!seen.Add(resolved))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var text = NormalizeWhitespace(link.TextContent);
|
||||
string? kind = null;
|
||||
if (text.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
kind = "cve";
|
||||
}
|
||||
else if (resolved.Contains("debian.org/security", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
kind = "advisory";
|
||||
}
|
||||
|
||||
references.Add(new DebianReferenceDto(resolved, kind, text));
|
||||
}
|
||||
|
||||
return references;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<DebianPackageStateDto> ExtractPackages(IHtmlDocument document, string defaultPackage, DateTimeOffset published)
|
||||
{
|
||||
var table = FindPackagesTable(document);
|
||||
if (table is null)
|
||||
{
|
||||
return Array.Empty<DebianPackageStateDto>();
|
||||
}
|
||||
|
||||
var accumulators = new Dictionary<string, PackageAccumulator>(StringComparer.OrdinalIgnoreCase);
|
||||
string currentPackage = defaultPackage;
|
||||
|
||||
foreach (var body in table.Bodies)
|
||||
{
|
||||
foreach (var row in body.Rows)
|
||||
{
|
||||
if (row.Cells.Length < 4)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var packageCell = NormalizeWhitespace(row.Cells[0].TextContent);
|
||||
if (!string.IsNullOrWhiteSpace(packageCell))
|
||||
{
|
||||
currentPackage = ExtractPackageName(packageCell);
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(currentPackage))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var releaseRaw = NormalizeWhitespace(row.Cells[1].TextContent);
|
||||
var versionRaw = NormalizeWhitespace(row.Cells[2].TextContent);
|
||||
var statusRaw = NormalizeWhitespace(row.Cells[3].TextContent);
|
||||
if (string.IsNullOrWhiteSpace(releaseRaw))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var release = NormalizeRelease(releaseRaw);
|
||||
var key = $"{currentPackage}|{release}";
|
||||
if (!accumulators.TryGetValue(key, out var accumulator))
|
||||
{
|
||||
accumulator = new PackageAccumulator(currentPackage, release, published);
|
||||
accumulators[key] = accumulator;
|
||||
}
|
||||
|
||||
accumulator.Apply(statusRaw, versionRaw);
|
||||
}
|
||||
}
|
||||
|
||||
return accumulators.Values
|
||||
.Where(static acc => acc.ShouldEmit)
|
||||
.Select(static acc => acc.ToDto())
|
||||
.OrderBy(static dto => dto.Release, StringComparer.OrdinalIgnoreCase)
|
||||
.ThenBy(static dto => dto.Package, StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static IHtmlTableElement? FindPackagesTable(IHtmlDocument document)
|
||||
{
|
||||
foreach (var table in document.QuerySelectorAll("table"))
|
||||
{
|
||||
if (table is not IHtmlTableElement tableElement)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var header = tableElement.Rows.FirstOrDefault();
|
||||
if (header is null || header.Cells.Length < 4)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var firstHeader = NormalizeWhitespace(header.Cells[0].TextContent);
|
||||
var secondHeader = NormalizeWhitespace(header.Cells[1].TextContent);
|
||||
var thirdHeader = NormalizeWhitespace(header.Cells[2].TextContent);
|
||||
if (string.Equals(firstHeader, "Source Package", StringComparison.OrdinalIgnoreCase)
|
||||
&& string.Equals(secondHeader, "Release", StringComparison.OrdinalIgnoreCase)
|
||||
&& string.Equals(thirdHeader, "Version", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return tableElement;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string NormalizeRelease(string release)
|
||||
{
|
||||
var trimmed = release.Trim();
|
||||
var parenthesisIndex = trimmed.IndexOf('(');
|
||||
if (parenthesisIndex > 0)
|
||||
{
|
||||
trimmed = trimmed[..parenthesisIndex].Trim();
|
||||
}
|
||||
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
private static string ExtractPackageName(string value)
|
||||
{
|
||||
var trimmed = value.Split(' ', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
|
||||
if (string.IsNullOrWhiteSpace(trimmed))
|
||||
{
|
||||
return value.Trim();
|
||||
}
|
||||
|
||||
if (trimmed.EndsWith(")", StringComparison.Ordinal) && trimmed.Contains('('))
|
||||
{
|
||||
trimmed = trimmed[..trimmed.IndexOf('(')];
|
||||
}
|
||||
|
||||
return trimmed.Trim();
|
||||
}
|
||||
|
||||
private static string NormalizeWhitespace(string value)
|
||||
=> string.IsNullOrWhiteSpace(value)
|
||||
? string.Empty
|
||||
: string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries));
|
||||
|
||||
private sealed class PackageAccumulator
|
||||
{
|
||||
private readonly DateTimeOffset _published;
|
||||
|
||||
public PackageAccumulator(string package, string release, DateTimeOffset published)
|
||||
{
|
||||
Package = package;
|
||||
Release = release;
|
||||
_published = published;
|
||||
Status = "unknown";
|
||||
}
|
||||
|
||||
public string Package { get; }
|
||||
|
||||
public string Release { get; }
|
||||
|
||||
public string Status { get; private set; }
|
||||
|
||||
public string? IntroducedVersion { get; private set; }
|
||||
|
||||
public string? FixedVersion { get; private set; }
|
||||
|
||||
public string? LastAffectedVersion { get; private set; }
|
||||
|
||||
public bool ShouldEmit =>
|
||||
!string.Equals(Status, "not_affected", StringComparison.OrdinalIgnoreCase)
|
||||
|| IntroducedVersion is not null
|
||||
|| FixedVersion is not null;
|
||||
|
||||
public void Apply(string statusRaw, string versionRaw)
|
||||
{
|
||||
var status = statusRaw.ToLowerInvariant();
|
||||
var version = string.IsNullOrWhiteSpace(versionRaw) ? null : versionRaw.Trim();
|
||||
|
||||
if (status.Contains("fixed", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
FixedVersion = version;
|
||||
if (!string.Equals(Status, "open", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Status = "resolved";
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (status.Contains("vulnerable", StringComparison.OrdinalIgnoreCase)
|
||||
|| status.Contains("open", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
IntroducedVersion ??= version;
|
||||
if (!string.Equals(Status, "resolved", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Status = "open";
|
||||
}
|
||||
|
||||
LastAffectedVersion = null;
|
||||
return;
|
||||
}
|
||||
|
||||
if (status.Contains("not affected", StringComparison.OrdinalIgnoreCase)
|
||||
|| status.Contains("not vulnerable", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Status = "not_affected";
|
||||
IntroducedVersion = null;
|
||||
FixedVersion = null;
|
||||
LastAffectedVersion = null;
|
||||
return;
|
||||
}
|
||||
|
||||
if (status.Contains("end-of-life", StringComparison.OrdinalIgnoreCase) || status.Contains("end of life", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Status = "end_of_life";
|
||||
return;
|
||||
}
|
||||
|
||||
Status = statusRaw;
|
||||
}
|
||||
|
||||
public DebianPackageStateDto ToDto()
|
||||
=> new(
|
||||
Package: Package,
|
||||
Release: Release,
|
||||
Status: Status,
|
||||
IntroducedVersion: IntroducedVersion,
|
||||
FixedVersion: FixedVersion,
|
||||
LastAffectedVersion: LastAffectedVersion,
|
||||
Published: _published);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user