513 lines
18 KiB
C#
513 lines
18 KiB
C#
namespace StellaOps.Concelier.SourceIntel;
|
|
|
|
using System.Collections.Immutable;
|
|
using System.Text.RegularExpressions;
|
|
|
|
/// <summary>
|
|
/// Parses source package changelogs for CVE mentions (Tier 2).
|
|
/// </summary>
|
|
public static partial class ChangelogParser
|
|
{
|
|
/// <summary>
|
|
/// Parse Debian changelog for CVE mentions and bug references.
|
|
/// </summary>
|
|
public static ChangelogParseResult ParseDebianChangelog(string changelogContent)
|
|
{
|
|
var entries = new List<ChangelogEntry>();
|
|
var lines = changelogContent.Split('\n');
|
|
|
|
string? currentPackage = null;
|
|
string? currentVersion = null;
|
|
DateTimeOffset? currentDate = null;
|
|
var currentCves = new List<string>();
|
|
var currentBugs = new List<BugReference>();
|
|
var currentDescription = new List<string>();
|
|
|
|
foreach (var line in lines)
|
|
{
|
|
// Package header: "package (version) distribution; urgency=..."
|
|
var headerMatch = DebianHeaderRegex().Match(line);
|
|
if (headerMatch.Success)
|
|
{
|
|
// Save previous entry
|
|
if (currentPackage != null && currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
|
|
{
|
|
entries.Add(new ChangelogEntry
|
|
{
|
|
PackageName = currentPackage,
|
|
Version = currentVersion,
|
|
CveIds = currentCves.ToList(),
|
|
BugReferences = currentBugs.ToList(),
|
|
Description = string.Join(" ", currentDescription),
|
|
Date = currentDate ?? DateTimeOffset.UtcNow,
|
|
Confidence = currentCves.Count > 0 ? 0.80 : 0.75 // Bug-only entries have lower confidence
|
|
});
|
|
}
|
|
|
|
currentPackage = headerMatch.Groups[1].Value;
|
|
currentVersion = headerMatch.Groups[2].Value;
|
|
currentCves.Clear();
|
|
currentBugs.Clear();
|
|
currentDescription.Clear();
|
|
currentDate = null;
|
|
continue;
|
|
}
|
|
|
|
// Date line: " -- Author <email> Date"
|
|
var dateMatch = DebianDateRegex().Match(line);
|
|
if (dateMatch.Success)
|
|
{
|
|
currentDate = ParseDebianDate(dateMatch.Groups[1].Value);
|
|
continue;
|
|
}
|
|
|
|
// Content lines: look for CVE mentions
|
|
var cveMatches = CvePatternRegex().Matches(line);
|
|
foreach (Match match in cveMatches)
|
|
{
|
|
var cveId = match.Groups[0].Value;
|
|
if (!currentCves.Contains(cveId))
|
|
{
|
|
currentCves.Add(cveId);
|
|
}
|
|
}
|
|
|
|
// Content lines: look for bug references
|
|
var bugRefs = ExtractBugReferences(line);
|
|
foreach (var bug in bugRefs)
|
|
{
|
|
if (!currentBugs.Any(b => b.Tracker == bug.Tracker && b.BugId == bug.BugId))
|
|
{
|
|
currentBugs.Add(bug);
|
|
}
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(line) && !line.StartsWith(" --"))
|
|
{
|
|
currentDescription.Add(line.Trim());
|
|
}
|
|
}
|
|
|
|
// Save last entry
|
|
if (currentPackage != null && currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
|
|
{
|
|
entries.Add(new ChangelogEntry
|
|
{
|
|
PackageName = currentPackage,
|
|
Version = currentVersion,
|
|
CveIds = currentCves.ToList(),
|
|
BugReferences = currentBugs.ToList(),
|
|
Description = string.Join(" ", currentDescription),
|
|
Date = currentDate ?? DateTimeOffset.UtcNow,
|
|
Confidence = currentCves.Count > 0 ? 0.80 : 0.75
|
|
});
|
|
}
|
|
|
|
return new ChangelogParseResult
|
|
{
|
|
Entries = entries,
|
|
ParsedAt = DateTimeOffset.UtcNow
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Parse RPM changelog for CVE mentions and bug references.
|
|
/// </summary>
|
|
public static ChangelogParseResult ParseRpmChangelog(string changelogContent)
|
|
{
|
|
var entries = new List<ChangelogEntry>();
|
|
var lines = changelogContent.Split('\n');
|
|
|
|
string? currentVersion = null;
|
|
DateTimeOffset? currentDate = null;
|
|
var currentCves = new List<string>();
|
|
var currentBugs = new List<BugReference>();
|
|
var currentDescription = new List<string>();
|
|
|
|
foreach (var line in lines)
|
|
{
|
|
// Entry header: "* Day Mon DD YYYY Author <email> - version-release"
|
|
var headerMatch = RpmHeaderRegex().Match(line);
|
|
if (headerMatch.Success)
|
|
{
|
|
// Save previous entry
|
|
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
|
|
{
|
|
entries.Add(new ChangelogEntry
|
|
{
|
|
PackageName = "rpm-package", // Extracted from spec file name
|
|
Version = currentVersion,
|
|
CveIds = currentCves.ToList(),
|
|
BugReferences = currentBugs.ToList(),
|
|
Description = string.Join(" ", currentDescription),
|
|
Date = currentDate ?? DateTimeOffset.UtcNow,
|
|
Confidence = currentCves.Count > 0 ? 0.80 : 0.75
|
|
});
|
|
}
|
|
|
|
currentDate = ParseRpmDate(headerMatch.Groups[1].Value);
|
|
currentVersion = headerMatch.Groups[2].Value;
|
|
currentCves.Clear();
|
|
currentBugs.Clear();
|
|
currentDescription.Clear();
|
|
continue;
|
|
}
|
|
|
|
// Content lines: look for CVE mentions
|
|
var cveMatches = CvePatternRegex().Matches(line);
|
|
foreach (Match match in cveMatches)
|
|
{
|
|
var cveId = match.Groups[0].Value;
|
|
if (!currentCves.Contains(cveId))
|
|
{
|
|
currentCves.Add(cveId);
|
|
}
|
|
}
|
|
|
|
// Content lines: look for bug references
|
|
var bugRefs = ExtractBugReferences(line);
|
|
foreach (var bug in bugRefs)
|
|
{
|
|
if (!currentBugs.Any(b => b.Tracker == bug.Tracker && b.BugId == bug.BugId))
|
|
{
|
|
currentBugs.Add(bug);
|
|
}
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(line) && !line.StartsWith("*"))
|
|
{
|
|
currentDescription.Add(line.Trim());
|
|
}
|
|
}
|
|
|
|
// Save last entry
|
|
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
|
|
{
|
|
entries.Add(new ChangelogEntry
|
|
{
|
|
PackageName = "rpm-package",
|
|
Version = currentVersion,
|
|
CveIds = currentCves.ToList(),
|
|
BugReferences = currentBugs.ToList(),
|
|
Description = string.Join(" ", currentDescription),
|
|
Date = currentDate ?? DateTimeOffset.UtcNow,
|
|
Confidence = currentCves.Count > 0 ? 0.80 : 0.75
|
|
});
|
|
}
|
|
|
|
return new ChangelogParseResult
|
|
{
|
|
Entries = entries,
|
|
ParsedAt = DateTimeOffset.UtcNow
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Parse Alpine APKBUILD secfixes for CVE mentions.
|
|
/// Alpine secfixes typically don't contain bug tracker references, but we include
|
|
/// the functionality for consistency.
|
|
/// </summary>
|
|
public static ChangelogParseResult ParseAlpineSecfixes(string secfixesContent)
|
|
{
|
|
var entries = new List<ChangelogEntry>();
|
|
var lines = secfixesContent.Split('\n');
|
|
|
|
string? currentVersion = null;
|
|
var currentCves = new List<string>();
|
|
var currentBugs = new List<BugReference>();
|
|
|
|
foreach (var line in lines)
|
|
{
|
|
// Version line: " version-release:"
|
|
var versionMatch = AlpineVersionRegex().Match(line);
|
|
if (versionMatch.Success)
|
|
{
|
|
// Save previous entry
|
|
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
|
|
{
|
|
entries.Add(new ChangelogEntry
|
|
{
|
|
PackageName = "alpine-package",
|
|
Version = currentVersion,
|
|
CveIds = currentCves.ToList(),
|
|
BugReferences = currentBugs.ToList(),
|
|
Description = $"Security fixes for {string.Join(", ", currentCves)}",
|
|
Date = DateTimeOffset.UtcNow,
|
|
Confidence = 0.85 // Alpine secfixes are explicit
|
|
});
|
|
}
|
|
|
|
currentVersion = versionMatch.Groups[1].Value;
|
|
currentCves.Clear();
|
|
currentBugs.Clear();
|
|
continue;
|
|
}
|
|
|
|
// CVE line: " - CVE-XXXX-YYYY"
|
|
var cveMatches = CvePatternRegex().Matches(line);
|
|
foreach (Match match in cveMatches)
|
|
{
|
|
var cveId = match.Groups[0].Value;
|
|
if (!currentCves.Contains(cveId))
|
|
{
|
|
currentCves.Add(cveId);
|
|
}
|
|
}
|
|
|
|
// Bug references (rare in Alpine secfixes, but possible)
|
|
var bugRefs = ExtractBugReferences(line);
|
|
foreach (var bug in bugRefs)
|
|
{
|
|
if (!currentBugs.Any(b => b.Tracker == bug.Tracker && b.BugId == bug.BugId))
|
|
{
|
|
currentBugs.Add(bug);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Save last entry
|
|
if (currentVersion != null && (currentCves.Count > 0 || currentBugs.Count > 0))
|
|
{
|
|
entries.Add(new ChangelogEntry
|
|
{
|
|
PackageName = "alpine-package",
|
|
Version = currentVersion,
|
|
CveIds = currentCves.ToList(),
|
|
BugReferences = currentBugs.ToList(),
|
|
Description = $"Security fixes for {string.Join(", ", currentCves)}",
|
|
Date = DateTimeOffset.UtcNow,
|
|
Confidence = 0.85
|
|
});
|
|
}
|
|
|
|
return new ChangelogParseResult
|
|
{
|
|
Entries = entries,
|
|
ParsedAt = DateTimeOffset.UtcNow
|
|
};
|
|
}
|
|
|
|
private static DateTimeOffset ParseDebianDate(string dateStr)
|
|
{
|
|
// "Mon, 15 Jan 2024 10:30:00 +0000"
|
|
if (DateTimeOffset.TryParse(dateStr, out var date))
|
|
{
|
|
return date;
|
|
}
|
|
return DateTimeOffset.UtcNow;
|
|
}
|
|
|
|
private static DateTimeOffset ParseRpmDate(string dateStr)
|
|
{
|
|
// "Mon Jan 15 2024"
|
|
if (DateTimeOffset.TryParse(dateStr, out var date))
|
|
{
|
|
return date;
|
|
}
|
|
return DateTimeOffset.UtcNow;
|
|
}
|
|
|
|
[GeneratedRegex(@"^(\S+) \(([^)]+)\)")]
|
|
private static partial Regex DebianHeaderRegex();
|
|
|
|
[GeneratedRegex(@" -- .+ <.+> (.+)")]
|
|
private static partial Regex DebianDateRegex();
|
|
|
|
[GeneratedRegex(@"^\* (.+) - (.+)")]
|
|
private static partial Regex RpmHeaderRegex();
|
|
|
|
[GeneratedRegex(@"^\s{2}([0-9A-Za-z\.\-_+]+):")]
|
|
private static partial Regex AlpineVersionRegex();
|
|
|
|
[GeneratedRegex(@"CVE-\d{4}-[0-9A-Za-z]{4,}")]
|
|
private static partial Regex CvePatternRegex();
|
|
|
|
// Bug tracker patterns for BP-401, BP-402, BP-403
|
|
|
|
/// <summary>
|
|
/// Debian BTS pattern: matches the "Closes:" or "Fixes:" prefix to identify Debian bug sections.
|
|
/// The actual bug numbers are extracted separately using DebianBugNumberRegex.
|
|
/// </summary>
|
|
[GeneratedRegex(@"(?:Closes|Fixes):\s*(.+?)(?=\s*(?:\(|$|,\s*(?:Closes|Fixes):))", RegexOptions.IgnoreCase)]
|
|
private static partial Regex DebianBugSectionRegex();
|
|
|
|
/// <summary>
|
|
/// Extract individual bug numbers from a Debian bug section (after "Closes:" or "Fixes:").
|
|
/// </summary>
|
|
[GeneratedRegex(@"#?(\d{4,})", RegexOptions.IgnoreCase)]
|
|
private static partial Regex DebianBugNumberRegex();
|
|
|
|
/// <summary>
|
|
/// Red Hat Bugzilla pattern: "RHBZ#123456", "rhbz#123456", "bz#123456", "Bug 123456"
|
|
/// </summary>
|
|
[GeneratedRegex(@"(?:RHBZ|rhbz|bz|Bug|BZ)[\s#:]+(\d{6,8})", RegexOptions.IgnoreCase)]
|
|
private static partial Regex RedHatBugRegex();
|
|
|
|
/// <summary>
|
|
/// Launchpad pattern: "LP: #123456" or "LP #123456"
|
|
/// </summary>
|
|
[GeneratedRegex(@"LP[\s:#]+(\d+)", RegexOptions.IgnoreCase)]
|
|
private static partial Regex LaunchpadBugRegex();
|
|
|
|
/// <summary>
|
|
/// GitHub pattern: "Fixes #123", "GH-123", "#123" in commit context
|
|
/// </summary>
|
|
[GeneratedRegex(@"(?:Fixes|Closes|Resolves)?\s*(?:GH-|#)(\d+)", RegexOptions.IgnoreCase)]
|
|
private static partial Regex GitHubBugRegex();
|
|
|
|
/// <summary>
|
|
/// Extract all bug references from a changelog line.
|
|
/// </summary>
|
|
public static ImmutableArray<BugReference> ExtractBugReferences(string line)
|
|
{
|
|
var bugs = ImmutableArray.CreateBuilder<BugReference>();
|
|
|
|
// Debian BTS - find "Closes:" or "Fixes:" sections and extract all numbers
|
|
if (line.Contains("Closes:", StringComparison.OrdinalIgnoreCase) ||
|
|
line.Contains("Fixes:", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// Look for all bug numbers after Closes: or Fixes:
|
|
var debianSection = DebianBugSectionRegex().Match(line);
|
|
if (debianSection.Success)
|
|
{
|
|
var section = debianSection.Groups[1].Value;
|
|
foreach (Match numMatch in DebianBugNumberRegex().Matches(section))
|
|
{
|
|
var bugId = numMatch.Groups[1].Value;
|
|
if (!bugs.Any(b => b.Tracker == BugTracker.Debian && b.BugId == bugId))
|
|
{
|
|
bugs.Add(new BugReference
|
|
{
|
|
Tracker = BugTracker.Debian,
|
|
BugId = bugId,
|
|
RawReference = debianSection.Value.Trim()
|
|
});
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Fallback: just find any bug number patterns in the line after Closes: or Fixes:
|
|
var keyword = line.Contains("Closes:", StringComparison.OrdinalIgnoreCase) ? "Closes:" : "Fixes:";
|
|
var idx = line.IndexOf(keyword, StringComparison.OrdinalIgnoreCase);
|
|
if (idx >= 0)
|
|
{
|
|
var start = idx + keyword.Length;
|
|
var afterKeyword = start <= line.Length ? line[start..] : string.Empty;
|
|
foreach (Match numMatch in DebianBugNumberRegex().Matches(afterKeyword))
|
|
{
|
|
var bugId = numMatch.Groups[1].Value;
|
|
if (!bugs.Any(b => b.Tracker == BugTracker.Debian && b.BugId == bugId))
|
|
{
|
|
bugs.Add(new BugReference
|
|
{
|
|
Tracker = BugTracker.Debian,
|
|
BugId = bugId,
|
|
RawReference = $"Closes: #{bugId}"
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Red Hat Bugzilla
|
|
foreach (Match match in RedHatBugRegex().Matches(line))
|
|
{
|
|
bugs.Add(new BugReference
|
|
{
|
|
Tracker = BugTracker.RedHat,
|
|
BugId = match.Groups[1].Value,
|
|
RawReference = match.Value
|
|
});
|
|
}
|
|
|
|
// Launchpad
|
|
foreach (Match match in LaunchpadBugRegex().Matches(line))
|
|
{
|
|
bugs.Add(new BugReference
|
|
{
|
|
Tracker = BugTracker.Launchpad,
|
|
BugId = match.Groups[1].Value,
|
|
RawReference = match.Value
|
|
});
|
|
}
|
|
|
|
return bugs.ToImmutable();
|
|
}
|
|
}
|
|
|
|
public sealed record ChangelogParseResult
|
|
{
|
|
public required IReadOnlyList<ChangelogEntry> Entries { get; init; }
|
|
public required DateTimeOffset ParsedAt { get; init; }
|
|
}
|
|
|
|
public sealed record ChangelogEntry
|
|
{
|
|
public required string PackageName { get; init; }
|
|
public required string Version { get; init; }
|
|
public required IReadOnlyList<string> CveIds { get; init; }
|
|
public required IReadOnlyList<BugReference> BugReferences { get; init; }
|
|
public required string Description { get; init; }
|
|
public required DateTimeOffset Date { get; init; }
|
|
public required double Confidence { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Represents a bug tracker reference extracted from a changelog.
|
|
/// </summary>
|
|
public sealed record BugReference
|
|
{
|
|
/// <summary>
|
|
/// The bug tracker system.
|
|
/// </summary>
|
|
public required BugTracker Tracker { get; init; }
|
|
|
|
/// <summary>
|
|
/// The bug ID within that tracker.
|
|
/// </summary>
|
|
public required string BugId { get; init; }
|
|
|
|
/// <summary>
|
|
/// The full reference string as found in the changelog.
|
|
/// </summary>
|
|
public required string RawReference { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Supported bug tracker systems for CVE mapping.
|
|
/// </summary>
|
|
public enum BugTracker
|
|
{
|
|
/// <summary>
|
|
/// Debian BTS - "Closes: #123456" or "(Closes: #123)"
|
|
/// </summary>
|
|
Debian,
|
|
|
|
/// <summary>
|
|
/// Red Hat Bugzilla - "RHBZ#123456", "rhbz#123456", "bz#123456"
|
|
/// </summary>
|
|
RedHat,
|
|
|
|
/// <summary>
|
|
/// Launchpad - "LP: #123456"
|
|
/// </summary>
|
|
Launchpad,
|
|
|
|
/// <summary>
|
|
/// GitHub Issues - "Fixes #123", "GH-123"
|
|
/// </summary>
|
|
GitHub,
|
|
|
|
/// <summary>
|
|
/// GitLab Issues - "gitlab#123"
|
|
/// </summary>
|
|
GitLab,
|
|
|
|
/// <summary>
|
|
/// Unknown tracker type.
|
|
/// </summary>
|
|
Unknown
|
|
}
|