using System.Text.RegularExpressions; using StellaOps.BinaryIndex.Core.Models; using StellaOps.BinaryIndex.FixIndex.Models; namespace StellaOps.BinaryIndex.FixIndex.Parsers; /// /// Parses RPM spec file changelog sections for CVE mentions. /// /// /// RPM changelog format: /// %changelog /// * Mon Jan 01 2024 Packager <email> - 1.2.3-4 /// - Fix CVE-2024-1234 /// public sealed partial class RpmChangelogParser : IChangelogParser { [GeneratedRegex(@"\bCVE-\d{4}-\d{4,7}\b", RegexOptions.Compiled)] private static partial Regex CvePatternRegex(); [GeneratedRegex(@"^\*\s+\w{3}\s+\w{3}\s+\d{1,2}\s+\d{4}\s+(.+?)\s+-\s+(\S+)", RegexOptions.Compiled)] private static partial Regex EntryHeaderPatternRegex(); [GeneratedRegex(@"^%changelog\s*$", RegexOptions.Compiled | RegexOptions.IgnoreCase)] private static partial Regex ChangelogStartPatternRegex(); [GeneratedRegex(@"^%\w+", RegexOptions.Compiled)] private static partial Regex SectionStartPatternRegex(); /// /// Parses the top entry of an RPM spec changelog for CVE mentions. /// public IEnumerable ParseTopEntry( string specContent, string distro, string release, string sourcePkg) { if (string.IsNullOrWhiteSpace(specContent)) yield break; var lines = specContent.Split('\n'); var inChangelog = false; var inFirstEntry = false; string? currentVersion = null; var entryLines = new List(); foreach (var line in lines) { // Detect %changelog start if (ChangelogStartPatternRegex().IsMatch(line)) { inChangelog = true; continue; } if (!inChangelog) continue; // Exit on new section (e.g., %files, %prep) if (SectionStartPatternRegex().IsMatch(line) && !ChangelogStartPatternRegex().IsMatch(line)) break; // Detect entry header: * Day Mon DD YYYY Author - version var headerMatch = EntryHeaderPatternRegex().Match(line); if (headerMatch.Success) { if (inFirstEntry) { // We've hit the second entry, stop processing break; } inFirstEntry = true; currentVersion = headerMatch.Groups[2].Value; entryLines.Add(line); continue; } if (inFirstEntry) { entryLines.Add(line); } } if (currentVersion == null || entryLines.Count == 0) yield break; var entryText = string.Join('\n', entryLines); var cves = CvePatternRegex().Matches(entryText) .Select(m => m.Value) .Distinct() .ToList(); foreach (var cve in cves) { yield return new FixEvidence { Distro = distro, Release = release, SourcePkg = sourcePkg, CveId = cve, State = FixState.Fixed, FixedVersion = currentVersion, Method = FixMethod.Changelog, Confidence = 0.75m, // RPM changelogs are less structured than Debian Evidence = new ChangelogEvidence { File = "*.spec", Version = currentVersion, Excerpt = entryText.Length > 2000 ? entryText[..2000] : entryText, LineNumber = null }, CreatedAt = DateTimeOffset.UtcNow }; } } /// /// Parses the full RPM spec changelog for all CVE mentions with their versions. /// public IEnumerable ParseAllEntries( string specContent, string distro, string release, string sourcePkg) { if (string.IsNullOrWhiteSpace(specContent)) yield break; var lines = specContent.Split('\n'); var inChangelog = false; string? currentVersion = null; var currentEntry = new List(); foreach (var line in lines) { // Detect %changelog start if (ChangelogStartPatternRegex().IsMatch(line)) { inChangelog = true; continue; } if (!inChangelog) continue; // Exit on new section if (SectionStartPatternRegex().IsMatch(line) && !ChangelogStartPatternRegex().IsMatch(line)) { // Process last entry if (currentVersion != null && currentEntry.Count > 0) { foreach (var fix in ExtractCvesFromEntry(currentEntry, currentVersion, distro, release, sourcePkg)) yield return fix; } break; } // Detect entry header var headerMatch = EntryHeaderPatternRegex().Match(line); if (headerMatch.Success) { // Process previous entry if (currentVersion != null && currentEntry.Count > 0) { foreach (var fix in ExtractCvesFromEntry(currentEntry, currentVersion, distro, release, sourcePkg)) yield return fix; } currentVersion = headerMatch.Groups[2].Value; currentEntry = [line]; continue; } if (currentVersion != null) { currentEntry.Add(line); } } // Process final entry if exists if (currentVersion != null && currentEntry.Count > 0) { foreach (var fix in ExtractCvesFromEntry(currentEntry, currentVersion, distro, release, sourcePkg)) yield return fix; } } private IEnumerable ExtractCvesFromEntry( List entryLines, string version, string distro, string release, string sourcePkg) { var entryText = string.Join('\n', entryLines); var cves = CvePatternRegex().Matches(entryText) .Select(m => m.Value) .Distinct(); foreach (var cve in cves) { yield return new FixEvidence { Distro = distro, Release = release, SourcePkg = sourcePkg, CveId = cve, State = FixState.Fixed, FixedVersion = version, Method = FixMethod.Changelog, Confidence = 0.75m, Evidence = new ChangelogEvidence { File = "*.spec", Version = version, Excerpt = entryText.Length > 2000 ? entryText[..2000] : entryText, LineNumber = null }, CreatedAt = DateTimeOffset.UtcNow }; } } }