327 lines
11 KiB
C#
327 lines
11 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Globalization;
|
|
using System.Linq;
|
|
using AngleSharp.Html.Dom;
|
|
using AngleSharp.Html.Parser;
|
|
|
|
namespace StellaOps.Feedser.Source.Distro.Debian.Internal;
|
|
|
|
internal static class DebianHtmlParser
|
|
{
|
|
public static DebianAdvisoryDto Parse(string html, DebianDetailMetadata metadata)
|
|
{
|
|
ArgumentException.ThrowIfNullOrEmpty(html);
|
|
ArgumentNullException.ThrowIfNull(metadata);
|
|
|
|
var parser = new HtmlParser();
|
|
var document = parser.ParseDocument(html);
|
|
|
|
var description = ExtractDescription(document) ?? metadata.Title;
|
|
var references = ExtractReferences(document, metadata);
|
|
var packages = ExtractPackages(document, metadata.SourcePackage, metadata.Published);
|
|
|
|
return new DebianAdvisoryDto(
|
|
metadata.AdvisoryId,
|
|
metadata.SourcePackage,
|
|
metadata.Title,
|
|
description,
|
|
metadata.CveIds,
|
|
packages,
|
|
references);
|
|
}
|
|
|
|
private static string? ExtractDescription(IHtmlDocument document)
|
|
{
|
|
foreach (var table in document.QuerySelectorAll("table"))
|
|
{
|
|
if (table is not IHtmlTableElement tableElement)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
foreach (var row in tableElement.Rows)
|
|
{
|
|
if (row.Cells.Length < 2)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var header = row.Cells[0].TextContent?.Trim();
|
|
if (string.Equals(header, "Description", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return NormalizeWhitespace(row.Cells[1].TextContent);
|
|
}
|
|
}
|
|
|
|
// Only the first table contains the metadata rows we need.
|
|
break;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static IReadOnlyList<DebianReferenceDto> ExtractReferences(IHtmlDocument document, DebianDetailMetadata metadata)
|
|
{
|
|
var references = new List<DebianReferenceDto>();
|
|
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
// Add canonical Debian advisory page.
|
|
var canonical = new Uri($"https://www.debian.org/security/{metadata.AdvisoryId.ToLowerInvariant()}");
|
|
references.Add(new DebianReferenceDto(canonical.ToString(), "advisory", metadata.Title));
|
|
seen.Add(canonical.ToString());
|
|
|
|
foreach (var link in document.QuerySelectorAll("a"))
|
|
{
|
|
var href = link.GetAttribute("href");
|
|
if (string.IsNullOrWhiteSpace(href))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
string resolved;
|
|
if (Uri.TryCreate(href, UriKind.Absolute, out var absolute))
|
|
{
|
|
resolved = absolute.ToString();
|
|
}
|
|
else if (Uri.TryCreate(metadata.DetailUri, href, out var relative))
|
|
{
|
|
resolved = relative.ToString();
|
|
}
|
|
else
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (!seen.Add(resolved))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var text = NormalizeWhitespace(link.TextContent);
|
|
string? kind = null;
|
|
if (text.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
kind = "cve";
|
|
}
|
|
else if (resolved.Contains("debian.org/security", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
kind = "advisory";
|
|
}
|
|
|
|
references.Add(new DebianReferenceDto(resolved, kind, text));
|
|
}
|
|
|
|
return references;
|
|
}
|
|
|
|
private static IReadOnlyList<DebianPackageStateDto> ExtractPackages(IHtmlDocument document, string defaultPackage, DateTimeOffset published)
|
|
{
|
|
var table = FindPackagesTable(document);
|
|
if (table is null)
|
|
{
|
|
return Array.Empty<DebianPackageStateDto>();
|
|
}
|
|
|
|
var accumulators = new Dictionary<string, PackageAccumulator>(StringComparer.OrdinalIgnoreCase);
|
|
string currentPackage = defaultPackage;
|
|
|
|
foreach (var body in table.Bodies)
|
|
{
|
|
foreach (var row in body.Rows)
|
|
{
|
|
if (row.Cells.Length < 4)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var packageCell = NormalizeWhitespace(row.Cells[0].TextContent);
|
|
if (!string.IsNullOrWhiteSpace(packageCell))
|
|
{
|
|
currentPackage = ExtractPackageName(packageCell);
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(currentPackage))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var releaseRaw = NormalizeWhitespace(row.Cells[1].TextContent);
|
|
var versionRaw = NormalizeWhitespace(row.Cells[2].TextContent);
|
|
var statusRaw = NormalizeWhitespace(row.Cells[3].TextContent);
|
|
if (string.IsNullOrWhiteSpace(releaseRaw))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var release = NormalizeRelease(releaseRaw);
|
|
var key = $"{currentPackage}|{release}";
|
|
if (!accumulators.TryGetValue(key, out var accumulator))
|
|
{
|
|
accumulator = new PackageAccumulator(currentPackage, release, published);
|
|
accumulators[key] = accumulator;
|
|
}
|
|
|
|
accumulator.Apply(statusRaw, versionRaw);
|
|
}
|
|
}
|
|
|
|
return accumulators.Values
|
|
.Where(static acc => acc.ShouldEmit)
|
|
.Select(static acc => acc.ToDto())
|
|
.OrderBy(static dto => dto.Release, StringComparer.OrdinalIgnoreCase)
|
|
.ThenBy(static dto => dto.Package, StringComparer.OrdinalIgnoreCase)
|
|
.ToArray();
|
|
}
|
|
|
|
private static IHtmlTableElement? FindPackagesTable(IHtmlDocument document)
|
|
{
|
|
foreach (var table in document.QuerySelectorAll("table"))
|
|
{
|
|
if (table is not IHtmlTableElement tableElement)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var header = tableElement.Rows.FirstOrDefault();
|
|
if (header is null || header.Cells.Length < 4)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var firstHeader = NormalizeWhitespace(header.Cells[0].TextContent);
|
|
var secondHeader = NormalizeWhitespace(header.Cells[1].TextContent);
|
|
var thirdHeader = NormalizeWhitespace(header.Cells[2].TextContent);
|
|
if (string.Equals(firstHeader, "Source Package", StringComparison.OrdinalIgnoreCase)
|
|
&& string.Equals(secondHeader, "Release", StringComparison.OrdinalIgnoreCase)
|
|
&& string.Equals(thirdHeader, "Version", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return tableElement;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static string NormalizeRelease(string release)
|
|
{
|
|
var trimmed = release.Trim();
|
|
var parenthesisIndex = trimmed.IndexOf('(');
|
|
if (parenthesisIndex > 0)
|
|
{
|
|
trimmed = trimmed[..parenthesisIndex].Trim();
|
|
}
|
|
|
|
return trimmed;
|
|
}
|
|
|
|
private static string ExtractPackageName(string value)
|
|
{
|
|
var trimmed = value.Split(' ', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
|
|
if (string.IsNullOrWhiteSpace(trimmed))
|
|
{
|
|
return value.Trim();
|
|
}
|
|
|
|
if (trimmed.EndsWith(")", StringComparison.Ordinal) && trimmed.Contains('('))
|
|
{
|
|
trimmed = trimmed[..trimmed.IndexOf('(')];
|
|
}
|
|
|
|
return trimmed.Trim();
|
|
}
|
|
|
|
private static string NormalizeWhitespace(string value)
|
|
=> string.IsNullOrWhiteSpace(value)
|
|
? string.Empty
|
|
: string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries));
|
|
|
|
private sealed class PackageAccumulator
|
|
{
|
|
private readonly DateTimeOffset _published;
|
|
|
|
public PackageAccumulator(string package, string release, DateTimeOffset published)
|
|
{
|
|
Package = package;
|
|
Release = release;
|
|
_published = published;
|
|
Status = "unknown";
|
|
}
|
|
|
|
public string Package { get; }
|
|
|
|
public string Release { get; }
|
|
|
|
public string Status { get; private set; }
|
|
|
|
public string? IntroducedVersion { get; private set; }
|
|
|
|
public string? FixedVersion { get; private set; }
|
|
|
|
public string? LastAffectedVersion { get; private set; }
|
|
|
|
public bool ShouldEmit =>
|
|
!string.Equals(Status, "not_affected", StringComparison.OrdinalIgnoreCase)
|
|
|| IntroducedVersion is not null
|
|
|| FixedVersion is not null;
|
|
|
|
public void Apply(string statusRaw, string versionRaw)
|
|
{
|
|
var status = statusRaw.ToLowerInvariant();
|
|
var version = string.IsNullOrWhiteSpace(versionRaw) ? null : versionRaw.Trim();
|
|
|
|
if (status.Contains("fixed", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
FixedVersion = version;
|
|
if (!string.Equals(Status, "open", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
Status = "resolved";
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (status.Contains("vulnerable", StringComparison.OrdinalIgnoreCase)
|
|
|| status.Contains("open", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
IntroducedVersion ??= version;
|
|
if (!string.Equals(Status, "resolved", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
Status = "open";
|
|
}
|
|
|
|
LastAffectedVersion = null;
|
|
return;
|
|
}
|
|
|
|
if (status.Contains("not affected", StringComparison.OrdinalIgnoreCase)
|
|
|| status.Contains("not vulnerable", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
Status = "not_affected";
|
|
IntroducedVersion = null;
|
|
FixedVersion = null;
|
|
LastAffectedVersion = null;
|
|
return;
|
|
}
|
|
|
|
if (status.Contains("end-of-life", StringComparison.OrdinalIgnoreCase) || status.Contains("end of life", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
Status = "end_of_life";
|
|
return;
|
|
}
|
|
|
|
Status = statusRaw;
|
|
}
|
|
|
|
public DebianPackageStateDto ToDto()
|
|
=> new(
|
|
Package: Package,
|
|
Release: Release,
|
|
Status: Status,
|
|
IntroducedVersion: IntroducedVersion,
|
|
FixedVersion: FixedVersion,
|
|
LastAffectedVersion: LastAffectedVersion,
|
|
Published: _published);
|
|
}
|
|
}
|