using System; using System.Collections.Generic; using System.Buffers.Binary; using System.IO; using System.Threading; using Microsoft.Data.Sqlite; using Microsoft.Extensions.Logging; using StellaOps.Scanner.Analyzers.OS.Rpm.Internal; namespace StellaOps.Scanner.Analyzers.OS.Rpm; internal sealed class RpmDatabaseReader : IRpmDatabaseReader { private readonly ILogger _logger; private readonly RpmHeaderParser _parser = new(); public RpmDatabaseReader(ILogger logger) { _logger = logger; } public IReadOnlyList ReadHeaders(string rootPath, CancellationToken cancellationToken) { var sqlitePath = ResolveSqlitePath(rootPath); if (sqlitePath is null) { _logger.LogWarning("rpmdb.sqlite not found under root {RootPath}; attempting legacy rpmdb fallback.", rootPath); return ReadLegacyHeaders(rootPath, cancellationToken); } var headers = new List(); try { var connectionString = new SqliteConnectionStringBuilder { DataSource = sqlitePath, Mode = SqliteOpenMode.ReadOnly, }.ToString(); using var connection = new SqliteConnection(connectionString); connection.Open(); using var command = connection.CreateCommand(); command.CommandText = "SELECT * FROM Packages"; using var reader = command.ExecuteReader(); while (reader.Read()) { cancellationToken.ThrowIfCancellationRequested(); var blob = ExtractHeaderBlob(reader); if (blob is null) { continue; } try { headers.Add(_parser.Parse(blob)); } catch (Exception ex) { _logger.LogWarning(ex, "Failed to parse RPM header record (pkgKey={PkgKey}).", TryGetPkgKey(reader)); } } } catch (Exception ex) { _logger.LogWarning(ex, "Unable to read rpmdb.sqlite at {Path}.", sqlitePath); return ReadLegacyHeaders(rootPath, cancellationToken); } if (headers.Count == 0) { return ReadLegacyHeaders(rootPath, cancellationToken); } return headers; } private static string? ResolveSqlitePath(string rootPath) { var candidates = new[] { Path.Combine(rootPath, "var", "lib", "rpm", "rpmdb.sqlite"), Path.Combine(rootPath, "usr", "lib", "sysimage", "rpm", "rpmdb.sqlite"), }; foreach (var candidate in candidates) { if (File.Exists(candidate)) { return candidate; } } return null; } private IReadOnlyList ReadLegacyHeaders(string rootPath, CancellationToken cancellationToken) { var packagesPath = ResolveLegacyPackagesPath(rootPath); if (packagesPath is null) { _logger.LogWarning("Legacy rpmdb Packages file not found under root {RootPath}; rpm analyzer will skip.", rootPath); return Array.Empty(); } byte[] data; try { data = File.ReadAllBytes(packagesPath); } catch (Exception ex) { _logger.LogWarning(ex, "Unable to read legacy rpmdb Packages file at {Path}.", packagesPath); return Array.Empty(); } // Detect BerkeleyDB format and use appropriate extraction method if (BerkeleyDbReader.IsBerkeleyDb(data)) { _logger.LogDebug("Detected BerkeleyDB format for rpmdb at {Path}; using BDB extraction.", packagesPath); return ReadBerkeleyDbHeaders(data, packagesPath, cancellationToken); } // Fall back to raw RPM header scanning for non-BDB files return ReadRawRpmHeaders(data, packagesPath, cancellationToken); } private IReadOnlyList ReadBerkeleyDbHeaders(byte[] data, string packagesPath, CancellationToken cancellationToken) { var results = new List(); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); // Try page-aware extraction first var headerBlobs = BerkeleyDbReader.ExtractValues(data); if (headerBlobs.Count == 0) { // Fall back to overflow-aware extraction for fragmented data headerBlobs = BerkeleyDbReader.ExtractValuesWithOverflow(data); } foreach (var blob in headerBlobs) { cancellationToken.ThrowIfCancellationRequested(); try { var header = _parser.Parse(blob); var key = $"{header.Name}::{header.Version}::{header.Release}::{header.Architecture}"; if (seen.Add(key)) { results.Add(header); } } catch (Exception ex) { _logger.LogDebug(ex, "Failed to parse RPM header blob from BerkeleyDB."); } } if (results.Count == 0) { _logger.LogWarning("No RPM headers parsed from BerkeleyDB rpmdb at {Path}.", packagesPath); } else { _logger.LogDebug("Extracted {Count} RPM headers from BerkeleyDB rpmdb at {Path}.", results.Count, packagesPath); } return results; } private IReadOnlyList ReadRawRpmHeaders(byte[] data, string packagesPath, CancellationToken cancellationToken) { var headerBlobs = new List(); if (BerkeleyDbReader.IsBerkeleyDb(data)) { headerBlobs.AddRange(BerkeleyDbReader.ExtractValues(data)); if (headerBlobs.Count == 0) { headerBlobs.AddRange(BerkeleyDbReader.ExtractValuesWithOverflow(data)); } } else { headerBlobs.AddRange(ExtractRpmHeadersFromRaw(data, cancellationToken)); } if (headerBlobs.Count == 0) { _logger.LogWarning("No RPM headers parsed from legacy rpmdb Packages at {Path}.", packagesPath); return Array.Empty(); } var results = new List(headerBlobs.Count); var seen = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var blob in headerBlobs) { cancellationToken.ThrowIfCancellationRequested(); try { var header = _parser.Parse(blob); var key = $"{header.Name}::{header.Version}::{header.Release}::{header.Architecture}"; if (seen.Add(key)) { results.Add(header); } } catch (Exception ex) { _logger.LogWarning(ex, "Failed to parse RPM header from legacy rpmdb blob."); } } return results; } private static string? ResolveLegacyPackagesPath(string rootPath) { var candidates = new[] { Path.Combine(rootPath, "var", "lib", "rpm", "Packages"), Path.Combine(rootPath, "usr", "lib", "sysimage", "rpm", "Packages"), }; foreach (var candidate in candidates) { if (File.Exists(candidate)) { return candidate; } } return null; } private static IEnumerable ExtractRpmHeadersFromRaw(byte[] data, CancellationToken cancellationToken) { var magicBytes = new byte[] { 0x8e, 0xad, 0xe8, 0xab }; var seenOffsets = new HashSet(); var offset = 0; while (offset <= data.Length - magicBytes.Length) { cancellationToken.ThrowIfCancellationRequested(); var candidateIndex = FindNextMagic(data, magicBytes, offset); if (candidateIndex < 0) { yield break; } if (!seenOffsets.Add(candidateIndex)) { offset = candidateIndex + 1; continue; } if (TryExtractHeaderSlice(data, candidateIndex, out var slice)) { yield return slice; } offset = candidateIndex + 1; } } private static bool TryExtractHeaderSlice(byte[] data, int offset, out byte[] slice) { slice = Array.Empty(); if (offset + 16 >= data.Length) { return false; } try { var span = data.AsSpan(offset); var indexCount = BinaryPrimitives.ReadInt32BigEndian(span.Slice(8, 4)); var storeSize = BinaryPrimitives.ReadInt32BigEndian(span.Slice(12, 4)); if (indexCount <= 0 || storeSize <= 0) { return false; } var totalLength = 16 + (indexCount * 16) + storeSize; if (totalLength <= 0 || offset + totalLength > data.Length) { return false; } slice = new byte[totalLength]; Buffer.BlockCopy(data, offset, slice, 0, totalLength); return true; } catch { return false; } } private static int FindNextMagic(byte[] data, byte[] magic, int startIndex) { for (var i = startIndex; i <= data.Length - magic.Length; i++) { if (data[i] == magic[0] && data[i + 1] == magic[1] && data[i + 2] == magic[2] && data[i + 3] == magic[3]) { return i; } } return -1; } private static byte[]? ExtractHeaderBlob(SqliteDataReader reader) { for (var i = 0; i < reader.FieldCount; i++) { if (reader.GetFieldType(i) == typeof(byte[])) { return reader.GetFieldValue(i); } } return null; } private static object? TryGetPkgKey(SqliteDataReader reader) { try { var ordinal = reader.GetOrdinal("pkgKey"); if (ordinal >= 0) { return reader.GetValue(ordinal); } } catch { } return null; } }