Files
git.stella-ops.org/src/Concelier/__Libraries/StellaOps.Concelier.SourceIntel/ChangelogParser.cs
StellaOps Bot 3f197814c5 save progress
2026-01-02 21:06:27 +02:00

513 lines
18 KiB
C#

namespace StellaOps.Concelier.SourceIntel;
using System.Collections.Immutable;
using System.Text.RegularExpressions;
/// <summary>
/// Parses source package changelogs for CVE mentions (Tier 2).
/// </summary>
public static partial class ChangelogParser
{
/// <summary>
/// Parse Debian changelog for CVE mentions and bug references.
/// </summary>
public static ChangelogParseResult ParseDebianChangelog(string changelogContent)
{
var entries = new List<ChangelogEntry>();
var lines = changelogContent.Split('\n');
string? currentPackage = null;
string? currentVersion = null;
DateTimeOffset? currentDate = null;
var currentCves = new List<string>();
var currentBugs = new List<BugReference>();
var currentDescription = new List<string>();
foreach (var line in lines)
{
// Package header: "package (version) distribution; urgency=..."
var headerMatch = DebianHeaderRegex().Match(line);
if (headerMatch.Success)
{
// Save previous entry
if (currentPackage != null && currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
{
entries.Add(new ChangelogEntry
{
PackageName = currentPackage,
Version = currentVersion,
CveIds = currentCves.ToList(),
BugReferences = currentBugs.ToList(),
Description = string.Join(" ", currentDescription),
Date = currentDate ?? DateTimeOffset.UtcNow,
Confidence = currentCves.Count > 0 ? 0.80 : 0.75 // Bug-only entries have lower confidence
});
}
currentPackage = headerMatch.Groups[1].Value;
currentVersion = headerMatch.Groups[2].Value;
currentCves.Clear();
currentBugs.Clear();
currentDescription.Clear();
currentDate = null;
continue;
}
// Date line: " -- Author <email> Date"
var dateMatch = DebianDateRegex().Match(line);
if (dateMatch.Success)
{
currentDate = ParseDebianDate(dateMatch.Groups[1].Value);
continue;
}
// Content lines: look for CVE mentions
var cveMatches = CvePatternRegex().Matches(line);
foreach (Match match in cveMatches)
{
var cveId = match.Groups[0].Value;
if (!currentCves.Contains(cveId))
{
currentCves.Add(cveId);
}
}
// Content lines: look for bug references
var bugRefs = ExtractBugReferences(line);
foreach (var bug in bugRefs)
{
if (!currentBugs.Any(b => b.Tracker == bug.Tracker && b.BugId == bug.BugId))
{
currentBugs.Add(bug);
}
}
if (!string.IsNullOrWhiteSpace(line) && !line.StartsWith(" --"))
{
currentDescription.Add(line.Trim());
}
}
// Save last entry
if (currentPackage != null && currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
{
entries.Add(new ChangelogEntry
{
PackageName = currentPackage,
Version = currentVersion,
CveIds = currentCves.ToList(),
BugReferences = currentBugs.ToList(),
Description = string.Join(" ", currentDescription),
Date = currentDate ?? DateTimeOffset.UtcNow,
Confidence = currentCves.Count > 0 ? 0.80 : 0.75
});
}
return new ChangelogParseResult
{
Entries = entries,
ParsedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Parse RPM changelog for CVE mentions and bug references.
/// </summary>
public static ChangelogParseResult ParseRpmChangelog(string changelogContent)
{
var entries = new List<ChangelogEntry>();
var lines = changelogContent.Split('\n');
string? currentVersion = null;
DateTimeOffset? currentDate = null;
var currentCves = new List<string>();
var currentBugs = new List<BugReference>();
var currentDescription = new List<string>();
foreach (var line in lines)
{
// Entry header: "* Day Mon DD YYYY Author <email> - version-release"
var headerMatch = RpmHeaderRegex().Match(line);
if (headerMatch.Success)
{
// Save previous entry
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
{
entries.Add(new ChangelogEntry
{
PackageName = "rpm-package", // Extracted from spec file name
Version = currentVersion,
CveIds = currentCves.ToList(),
BugReferences = currentBugs.ToList(),
Description = string.Join(" ", currentDescription),
Date = currentDate ?? DateTimeOffset.UtcNow,
Confidence = currentCves.Count > 0 ? 0.80 : 0.75
});
}
currentDate = ParseRpmDate(headerMatch.Groups[1].Value);
currentVersion = headerMatch.Groups[2].Value;
currentCves.Clear();
currentBugs.Clear();
currentDescription.Clear();
continue;
}
// Content lines: look for CVE mentions
var cveMatches = CvePatternRegex().Matches(line);
foreach (Match match in cveMatches)
{
var cveId = match.Groups[0].Value;
if (!currentCves.Contains(cveId))
{
currentCves.Add(cveId);
}
}
// Content lines: look for bug references
var bugRefs = ExtractBugReferences(line);
foreach (var bug in bugRefs)
{
if (!currentBugs.Any(b => b.Tracker == bug.Tracker && b.BugId == bug.BugId))
{
currentBugs.Add(bug);
}
}
if (!string.IsNullOrWhiteSpace(line) && !line.StartsWith("*"))
{
currentDescription.Add(line.Trim());
}
}
// Save last entry
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
{
entries.Add(new ChangelogEntry
{
PackageName = "rpm-package",
Version = currentVersion,
CveIds = currentCves.ToList(),
BugReferences = currentBugs.ToList(),
Description = string.Join(" ", currentDescription),
Date = currentDate ?? DateTimeOffset.UtcNow,
Confidence = currentCves.Count > 0 ? 0.80 : 0.75
});
}
return new ChangelogParseResult
{
Entries = entries,
ParsedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Parse Alpine APKBUILD secfixes for CVE mentions.
/// Alpine secfixes typically don't contain bug tracker references, but we include
/// the functionality for consistency.
/// </summary>
public static ChangelogParseResult ParseAlpineSecfixes(string secfixesContent)
{
var entries = new List<ChangelogEntry>();
var lines = secfixesContent.Split('\n');
string? currentVersion = null;
var currentCves = new List<string>();
var currentBugs = new List<BugReference>();
foreach (var line in lines)
{
// Version line: " version-release:"
var versionMatch = AlpineVersionRegex().Match(line);
if (versionMatch.Success)
{
// Save previous entry
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
{
entries.Add(new ChangelogEntry
{
PackageName = "alpine-package",
Version = currentVersion,
CveIds = currentCves.ToList(),
BugReferences = currentBugs.ToList(),
Description = $"Security fixes for {string.Join(", ", currentCves)}",
Date = DateTimeOffset.UtcNow,
Confidence = 0.85 // Alpine secfixes are explicit
});
}
currentVersion = versionMatch.Groups[1].Value;
currentCves.Clear();
currentBugs.Clear();
continue;
}
// CVE line: " - CVE-XXXX-YYYY"
var cveMatches = CvePatternRegex().Matches(line);
foreach (Match match in cveMatches)
{
var cveId = match.Groups[0].Value;
if (!currentCves.Contains(cveId))
{
currentCves.Add(cveId);
}
}
// Bug references (rare in Alpine secfixes, but possible)
var bugRefs = ExtractBugReferences(line);
foreach (var bug in bugRefs)
{
if (!currentBugs.Any(b => b.Tracker == bug.Tracker && b.BugId == bug.BugId))
{
currentBugs.Add(bug);
}
}
}
// Save last entry
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
{
entries.Add(new ChangelogEntry
{
PackageName = "alpine-package",
Version = currentVersion,
CveIds = currentCves.ToList(),
BugReferences = currentBugs.ToList(),
Description = $"Security fixes for {string.Join(", ", currentCves)}",
Date = DateTimeOffset.UtcNow,
Confidence = 0.85
});
}
return new ChangelogParseResult
{
Entries = entries,
ParsedAt = DateTimeOffset.UtcNow
};
}
private static DateTimeOffset ParseDebianDate(string dateStr)
{
// "Mon, 15 Jan 2024 10:30:00 +0000"
if (DateTimeOffset.TryParse(dateStr, out var date))
{
return date;
}
return DateTimeOffset.UtcNow;
}
private static DateTimeOffset ParseRpmDate(string dateStr)
{
// "Mon Jan 15 2024"
if (DateTimeOffset.TryParse(dateStr, out var date))
{
return date;
}
return DateTimeOffset.UtcNow;
}
[GeneratedRegex(@"^(\S+) \(([^)]+)\)")]
private static partial Regex DebianHeaderRegex();
[GeneratedRegex(@" -- .+ <.+> (.+)")]
private static partial Regex DebianDateRegex();
[GeneratedRegex(@"^\* (.+) - (.+)")]
private static partial Regex RpmHeaderRegex();
[GeneratedRegex(@"^\s{2}([0-9A-Za-z\.\-_+]+):")]
private static partial Regex AlpineVersionRegex();
[GeneratedRegex(@"CVE-\d{4}-[0-9A-Za-z]{4,}")]
private static partial Regex CvePatternRegex();
// Bug tracker patterns for BP-401, BP-402, BP-403
/// <summary>
/// Debian BTS pattern: matches the "Closes:" or "Fixes:" prefix to identify Debian bug sections.
/// The actual bug numbers are extracted separately using DebianBugNumberRegex.
/// </summary>
[GeneratedRegex(@"(?:Closes|Fixes):\s*(.+?)(?=\s*(?:\(|$|,\s*(?:Closes|Fixes):))", RegexOptions.IgnoreCase)]
private static partial Regex DebianBugSectionRegex();
/// <summary>
/// Extract individual bug numbers from a Debian bug section (after "Closes:" or "Fixes:").
/// </summary>
[GeneratedRegex(@"#?(\d{4,})", RegexOptions.IgnoreCase)]
private static partial Regex DebianBugNumberRegex();
/// <summary>
/// Red Hat Bugzilla pattern: "RHBZ#123456", "rhbz#123456", "bz#123456", "Bug 123456"
/// </summary>
[GeneratedRegex(@"(?:RHBZ|rhbz|bz|Bug|BZ)[\s#:]+(\d{6,8})", RegexOptions.IgnoreCase)]
private static partial Regex RedHatBugRegex();
/// <summary>
/// Launchpad pattern: "LP: #123456" or "LP #123456"
/// </summary>
[GeneratedRegex(@"LP[\s:#]+(\d+)", RegexOptions.IgnoreCase)]
private static partial Regex LaunchpadBugRegex();
/// <summary>
/// GitHub pattern: "Fixes #123", "GH-123", "#123" in commit context
/// </summary>
[GeneratedRegex(@"(?:Fixes|Closes|Resolves)?\s*(?:GH-|#)(\d+)", RegexOptions.IgnoreCase)]
private static partial Regex GitHubBugRegex();
/// <summary>
/// Extract all bug references from a changelog line.
/// </summary>
public static ImmutableArray<BugReference> ExtractBugReferences(string line)
{
var bugs = ImmutableArray.CreateBuilder<BugReference>();
// Debian BTS - find "Closes:" or "Fixes:" sections and extract all numbers
if (line.Contains("Closes:", StringComparison.OrdinalIgnoreCase) ||
line.Contains("Fixes:", StringComparison.OrdinalIgnoreCase))
{
// Look for all bug numbers after Closes: or Fixes:
var debianSection = DebianBugSectionRegex().Match(line);
if (debianSection.Success)
{
var section = debianSection.Groups[1].Value;
foreach (Match numMatch in DebianBugNumberRegex().Matches(section))
{
var bugId = numMatch.Groups[1].Value;
if (!bugs.Any(b => b.Tracker == BugTracker.Debian && b.BugId == bugId))
{
bugs.Add(new BugReference
{
Tracker = BugTracker.Debian,
BugId = bugId,
RawReference = debianSection.Value.Trim()
});
}
}
}
else
{
// Fallback: just find any bug number patterns in the line after Closes: or Fixes:
var keyword = line.Contains("Closes:", StringComparison.OrdinalIgnoreCase) ? "Closes:" : "Fixes:";
var idx = line.IndexOf(keyword, StringComparison.OrdinalIgnoreCase);
if (idx >= 0)
{
var start = idx + keyword.Length;
var afterKeyword = start <= line.Length ? line[start..] : string.Empty;
foreach (Match numMatch in DebianBugNumberRegex().Matches(afterKeyword))
{
var bugId = numMatch.Groups[1].Value;
if (!bugs.Any(b => b.Tracker == BugTracker.Debian && b.BugId == bugId))
{
bugs.Add(new BugReference
{
Tracker = BugTracker.Debian,
BugId = bugId,
RawReference = $"Closes: #{bugId}"
});
}
}
}
}
}
// Red Hat Bugzilla
foreach (Match match in RedHatBugRegex().Matches(line))
{
bugs.Add(new BugReference
{
Tracker = BugTracker.RedHat,
BugId = match.Groups[1].Value,
RawReference = match.Value
});
}
// Launchpad
foreach (Match match in LaunchpadBugRegex().Matches(line))
{
bugs.Add(new BugReference
{
Tracker = BugTracker.Launchpad,
BugId = match.Groups[1].Value,
RawReference = match.Value
});
}
return bugs.ToImmutable();
}
}
public sealed record ChangelogParseResult
{
public required IReadOnlyList<ChangelogEntry> Entries { get; init; }
public required DateTimeOffset ParsedAt { get; init; }
}
public sealed record ChangelogEntry
{
public required string PackageName { get; init; }
public required string Version { get; init; }
public required IReadOnlyList<string> CveIds { get; init; }
public required IReadOnlyList<BugReference> BugReferences { get; init; }
public required string Description { get; init; }
public required DateTimeOffset Date { get; init; }
public required double Confidence { get; init; }
}
/// <summary>
/// Represents a bug tracker reference extracted from a changelog.
/// </summary>
public sealed record BugReference
{
/// <summary>
/// The bug tracker system.
/// </summary>
public required BugTracker Tracker { get; init; }
/// <summary>
/// The bug ID within that tracker.
/// </summary>
public required string BugId { get; init; }
/// <summary>
/// The full reference string as found in the changelog.
/// </summary>
public required string RawReference { get; init; }
}
/// <summary>
/// Supported bug tracker systems for CVE mapping.
/// </summary>
public enum BugTracker
{
/// <summary>
/// Debian BTS - "Closes: #123456" or "(Closes: #123)"
/// </summary>
Debian,
/// <summary>
/// Red Hat Bugzilla - "RHBZ#123456", "rhbz#123456", "bz#123456"
/// </summary>
RedHat,
/// <summary>
/// Launchpad - "LP: #123456"
/// </summary>
Launchpad,
/// <summary>
/// GitHub Issues - "Fixes #123", "GH-123"
/// </summary>
GitHub,
/// <summary>
/// GitLab Issues - "gitlab#123"
/// </summary>
GitLab,
/// <summary>
/// Unknown tracker type.
/// </summary>
Unknown
}