Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.FixIndex/Parsers/RpmChangelogParser.cs

226 lines
7.1 KiB
C#

using System.Text.RegularExpressions;
using StellaOps.BinaryIndex.Core.Models;
using StellaOps.BinaryIndex.FixIndex.Models;
namespace StellaOps.BinaryIndex.FixIndex.Parsers;
/// <summary>
/// Parses RPM spec file changelog sections for CVE mentions.
/// </summary>
/// <remarks>
/// RPM changelog format:
/// %changelog
/// * Mon Jan 01 2024 Packager &lt;email&gt; - 1.2.3-4
/// - Fix CVE-2024-1234
/// </remarks>
public sealed partial class RpmChangelogParser : IChangelogParser
{
[GeneratedRegex(@"\bCVE-\d{4}-\d{4,7}\b", RegexOptions.Compiled)]
private static partial Regex CvePatternRegex();
[GeneratedRegex(@"^\*\s+\w{3}\s+\w{3}\s+\d{1,2}\s+\d{4}\s+(.+?)\s+-\s+(\S+)", RegexOptions.Compiled)]
private static partial Regex EntryHeaderPatternRegex();
[GeneratedRegex(@"^%changelog\s*$", RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex ChangelogStartPatternRegex();
[GeneratedRegex(@"^%\w+", RegexOptions.Compiled)]
private static partial Regex SectionStartPatternRegex();
/// <summary>
/// Parses the top entry of an RPM spec changelog for CVE mentions.
/// </summary>
public IEnumerable<FixEvidence> ParseTopEntry(
string specContent,
string distro,
string release,
string sourcePkg)
{
if (string.IsNullOrWhiteSpace(specContent))
yield break;
var lines = specContent.Split('\n');
var inChangelog = false;
var inFirstEntry = false;
string? currentVersion = null;
var entryLines = new List<string>();
foreach (var line in lines)
{
// Detect %changelog start
if (ChangelogStartPatternRegex().IsMatch(line))
{
inChangelog = true;
continue;
}
if (!inChangelog)
continue;
// Exit on new section (e.g., %files, %prep)
if (SectionStartPatternRegex().IsMatch(line) && !ChangelogStartPatternRegex().IsMatch(line))
break;
// Detect entry header: * Day Mon DD YYYY Author <email> - version
var headerMatch = EntryHeaderPatternRegex().Match(line);
if (headerMatch.Success)
{
if (inFirstEntry)
{
// We've hit the second entry, stop processing
break;
}
inFirstEntry = true;
currentVersion = headerMatch.Groups[2].Value;
entryLines.Add(line);
continue;
}
if (inFirstEntry)
{
entryLines.Add(line);
}
}
if (currentVersion == null || entryLines.Count == 0)
yield break;
var entryText = string.Join('\n', entryLines);
var cves = CvePatternRegex().Matches(entryText)
.Select(m => m.Value)
.Distinct()
.ToList();
foreach (var cve in cves)
{
yield return new FixEvidence
{
Distro = distro,
Release = release,
SourcePkg = sourcePkg,
CveId = cve,
State = FixState.Fixed,
FixedVersion = currentVersion,
Method = FixMethod.Changelog,
Confidence = 0.75m, // RPM changelogs are less structured than Debian
Evidence = new ChangelogEvidence
{
File = "*.spec",
Version = currentVersion,
Excerpt = entryText.Length > 2000 ? entryText[..2000] : entryText,
LineNumber = null
},
CreatedAt = DateTimeOffset.UtcNow
};
}
}
/// <summary>
/// Parses the full RPM spec changelog for all CVE mentions with their versions.
/// </summary>
public IEnumerable<FixEvidence> ParseAllEntries(
string specContent,
string distro,
string release,
string sourcePkg)
{
if (string.IsNullOrWhiteSpace(specContent))
yield break;
var lines = specContent.Split('\n');
var inChangelog = false;
string? currentVersion = null;
var currentEntry = new List<string>();
foreach (var line in lines)
{
// Detect %changelog start
if (ChangelogStartPatternRegex().IsMatch(line))
{
inChangelog = true;
continue;
}
if (!inChangelog)
continue;
// Exit on new section
if (SectionStartPatternRegex().IsMatch(line) && !ChangelogStartPatternRegex().IsMatch(line))
{
// Process last entry
if (currentVersion != null && currentEntry.Count > 0)
{
foreach (var fix in ExtractCvesFromEntry(currentEntry, currentVersion, distro, release, sourcePkg))
yield return fix;
}
break;
}
// Detect entry header
var headerMatch = EntryHeaderPatternRegex().Match(line);
if (headerMatch.Success)
{
// Process previous entry
if (currentVersion != null && currentEntry.Count > 0)
{
foreach (var fix in ExtractCvesFromEntry(currentEntry, currentVersion, distro, release, sourcePkg))
yield return fix;
}
currentVersion = headerMatch.Groups[2].Value;
currentEntry = [line];
continue;
}
if (currentVersion != null)
{
currentEntry.Add(line);
}
}
// Process final entry if exists
if (currentVersion != null && currentEntry.Count > 0)
{
foreach (var fix in ExtractCvesFromEntry(currentEntry, currentVersion, distro, release, sourcePkg))
yield return fix;
}
}
private IEnumerable<FixEvidence> ExtractCvesFromEntry(
List<string> entryLines,
string version,
string distro,
string release,
string sourcePkg)
{
var entryText = string.Join('\n', entryLines);
var cves = CvePatternRegex().Matches(entryText)
.Select(m => m.Value)
.Distinct();
foreach (var cve in cves)
{
yield return new FixEvidence
{
Distro = distro,
Release = release,
SourcePkg = sourcePkg,
CveId = cve,
State = FixState.Fixed,
FixedVersion = version,
Method = FixMethod.Changelog,
Confidence = 0.75m,
Evidence = new ChangelogEvidence
{
File = "*.spec",
Version = version,
Excerpt = entryText.Length > 2000 ? entryText[..2000] : entryText,
LineNumber = null
},
CreatedAt = DateTimeOffset.UtcNow
};
}
}
}