Files
git.stella-ops.org/src/StellaOps.Concelier.Connector.Distro.Suse/SuseConnector.cs
2025-10-18 20:47:13 +03:00

574 lines
24 KiB
C#

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Distro.Suse.Configuration;
using StellaOps.Concelier.Connector.Distro.Suse.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Distro.Suse;
public sealed class SuseConnector : IFeedConnector
{
private static readonly Action<ILogger, string, int, Exception?> LogMapped =
LoggerMessage.Define<string, int>(
LogLevel.Information,
new EventId(1, "SuseMapped"),
"SUSE advisory {AdvisoryId} mapped with {AffectedCount} affected packages");
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly SuseOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<SuseConnector> _logger;
public SuseConnector(
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<SuseOptions> options,
TimeProvider? timeProvider,
ILogger<SuseConnector> logger)
{
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => SuseConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var pendingDocuments = new HashSet<Guid>(cursor.PendingDocuments);
var pendingMappings = new HashSet<Guid>(cursor.PendingMappings);
var fetchCache = new Dictionary<string, SuseFetchCacheEntry>(cursor.FetchCache, StringComparer.OrdinalIgnoreCase);
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var changesUri = _options.ChangesEndpoint;
var changesKey = changesUri.ToString();
touchedResources.Add(changesKey);
cursor.TryGetCache(changesKey, out var cachedChanges);
var changesRequest = new SourceFetchRequest(SuseOptions.HttpClientName, SourceName, changesUri)
{
Metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["suse.type"] = "changes"
},
AcceptHeaders = new[] { "text/csv", "text/plain" },
TimeoutOverride = _options.FetchTimeout,
ETag = cachedChanges?.ETag,
LastModified = cachedChanges?.LastModified,
};
SourceFetchResult changesResult;
try
{
changesResult = await _fetchService.FetchAsync(changesRequest, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "SUSE changes.csv fetch failed from {Uri}", changesUri);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
var maxModified = cursor.LastModified ?? DateTimeOffset.MinValue;
var processedUpdated = false;
var processedIds = new HashSet<string>(cursor.ProcessedIds, StringComparer.OrdinalIgnoreCase);
var currentWindowIds = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
IReadOnlyList<SuseChangeRecord> changeRecords = Array.Empty<SuseChangeRecord>();
if (changesResult.IsNotModified)
{
if (cursor.FetchCache.TryGetValue(changesKey, out var existingCache))
{
fetchCache[changesKey] = existingCache;
}
}
else if (changesResult.IsSuccess && changesResult.Document is not null)
{
fetchCache[changesKey] = SuseFetchCacheEntry.FromDocument(changesResult.Document);
if (changesResult.Document.GridFsId.HasValue)
{
byte[] changesBytes;
try
{
changesBytes = await _rawDocumentStorage.DownloadAsync(changesResult.Document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download SUSE changes.csv document {DocumentId}", changesResult.Document.Id);
throw;
}
var csv = Encoding.UTF8.GetString(changesBytes);
changeRecords = SuseChangesParser.Parse(csv);
}
}
if (changeRecords.Count > 0)
{
var baseline = (cursor.LastModified ?? (now - _options.InitialBackfill)) - _options.ResumeOverlap;
if (baseline < DateTimeOffset.UnixEpoch)
{
baseline = DateTimeOffset.UnixEpoch;
}
ProvenanceDiagnostics.ReportResumeWindow(SourceName, baseline, _logger);
var candidates = changeRecords
.Where(record => record.ModifiedAt >= baseline)
.OrderBy(record => record.ModifiedAt)
.ThenBy(record => record.FileName, StringComparer.OrdinalIgnoreCase)
.ToList();
if (candidates.Count == 0)
{
candidates = changeRecords
.OrderByDescending(record => record.ModifiedAt)
.ThenBy(record => record.FileName, StringComparer.OrdinalIgnoreCase)
.Take(_options.MaxAdvisoriesPerFetch)
.OrderBy(record => record.ModifiedAt)
.ThenBy(record => record.FileName, StringComparer.OrdinalIgnoreCase)
.ToList();
}
else if (candidates.Count > _options.MaxAdvisoriesPerFetch)
{
candidates = candidates
.OrderByDescending(record => record.ModifiedAt)
.ThenBy(record => record.FileName, StringComparer.OrdinalIgnoreCase)
.Take(_options.MaxAdvisoriesPerFetch)
.OrderBy(record => record.ModifiedAt)
.ThenBy(record => record.FileName, StringComparer.OrdinalIgnoreCase)
.ToList();
}
foreach (var record in candidates)
{
cancellationToken.ThrowIfCancellationRequested();
var detailUri = new Uri(_options.AdvisoryBaseUri, record.FileName);
var cacheKey = detailUri.AbsoluteUri;
touchedResources.Add(cacheKey);
cursor.TryGetCache(cacheKey, out var cachedEntry);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, cacheKey, cancellationToken).ConfigureAwait(false);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["suse.file"] = record.FileName,
["suse.modified"] = record.ModifiedAt.ToString("O", CultureInfo.InvariantCulture)
};
if (!metadata.ContainsKey("suse.id") && existing?.Metadata?.TryGetValue("suse.id", out var existingId) == true)
{
metadata["suse.id"] = existingId;
}
var request = new SourceFetchRequest(SuseOptions.HttpClientName, SourceName, detailUri)
{
Metadata = metadata,
AcceptHeaders = new[] { "application/json", "text/json" },
TimeoutOverride = _options.FetchTimeout,
ETag = existing?.Etag ?? cachedEntry?.ETag,
LastModified = existing?.LastModified ?? cachedEntry?.LastModified,
};
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(request, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to fetch SUSE advisory {FileName}", record.FileName);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(5), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
if (result.IsNotModified)
{
if (existing is not null)
{
fetchCache[cacheKey] = SuseFetchCacheEntry.FromDocument(existing);
if (string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
pendingDocuments.Remove(existing.Id);
pendingMappings.Remove(existing.Id);
}
}
continue;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
fetchCache[cacheKey] = SuseFetchCacheEntry.FromDocument(result.Document);
pendingDocuments.Add(result.Document.Id);
pendingMappings.Remove(result.Document.Id);
currentWindowIds.Add(record.FileName);
if (record.ModifiedAt > maxModified)
{
maxModified = record.ModifiedAt;
processedUpdated = true;
}
}
}
if (fetchCache.Count > 0 && touchedResources.Count > 0)
{
var staleKeys = fetchCache.Keys.Where(key => !touchedResources.Contains(key)).ToArray();
foreach (var key in staleKeys)
{
fetchCache.Remove(key);
}
}
var updatedCursor = cursor
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(pendingMappings)
.WithFetchCache(fetchCache);
if (processedUpdated && currentWindowIds.Count > 0)
{
updatedCursor = updatedCursor.WithProcessed(maxModified, currentWindowIds);
}
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remaining = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remaining.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("SUSE document {DocumentId} missing GridFS payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
continue;
}
byte[] bytes;
try
{
bytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download SUSE document {DocumentId}", document.Id);
throw;
}
SuseAdvisoryDto dto;
try
{
var json = Encoding.UTF8.GetString(bytes);
dto = SuseCsafParser.Parse(json);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse SUSE advisory {Uri}", document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
continue;
}
var metadata = document.Metadata is null
? new Dictionary<string, string>(StringComparer.Ordinal)
: new Dictionary<string, string>(document.Metadata, StringComparer.Ordinal);
metadata["suse.id"] = dto.AdvisoryId;
var updatedDocument = document with { Metadata = metadata };
await _documentStore.UpsertAsync(updatedDocument, cancellationToken).ConfigureAwait(false);
var payload = ToBson(dto);
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "suse.csaf.v1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remaining.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remaining)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dtoRecord = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dtoRecord is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
SuseAdvisoryDto dto;
try
{
dto = FromBson(dtoRecord.Payload);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize SUSE DTO for document {DocumentId}", documentId);
await _documentStore.UpdateStatusAsync(documentId, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var advisory = SuseMapper.Map(dto, document, _timeProvider.GetUtcNow());
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(documentId, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
LogMapped(_logger, dto.AdvisoryId, advisory.AffectedPackages.Length, null);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<SuseCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? SuseCursor.Empty : SuseCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(SuseCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private static BsonDocument ToBson(SuseAdvisoryDto dto)
{
var packages = new BsonArray();
foreach (var package in dto.Packages)
{
var packageDoc = new BsonDocument
{
["package"] = package.Package,
["platform"] = package.Platform,
["canonical"] = package.CanonicalNevra,
["status"] = package.Status
};
if (!string.IsNullOrWhiteSpace(package.Architecture))
{
packageDoc["arch"] = package.Architecture;
}
if (!string.IsNullOrWhiteSpace(package.IntroducedVersion))
{
packageDoc["introduced"] = package.IntroducedVersion;
}
if (!string.IsNullOrWhiteSpace(package.FixedVersion))
{
packageDoc["fixed"] = package.FixedVersion;
}
if (!string.IsNullOrWhiteSpace(package.LastAffectedVersion))
{
packageDoc["last"] = package.LastAffectedVersion;
}
packages.Add(packageDoc);
}
var references = new BsonArray();
foreach (var reference in dto.References)
{
var referenceDoc = new BsonDocument
{
["url"] = reference.Url
};
if (!string.IsNullOrWhiteSpace(reference.Kind))
{
referenceDoc["kind"] = reference.Kind;
}
if (!string.IsNullOrWhiteSpace(reference.Title))
{
referenceDoc["title"] = reference.Title;
}
references.Add(referenceDoc);
}
return new BsonDocument
{
["advisoryId"] = dto.AdvisoryId,
["title"] = dto.Title ?? string.Empty,
["summary"] = dto.Summary ?? string.Empty,
["published"] = dto.Published.UtcDateTime,
["cves"] = new BsonArray(dto.CveIds ?? Array.Empty<string>()),
["packages"] = packages,
["references"] = references
};
}
private static SuseAdvisoryDto FromBson(BsonDocument document)
{
var advisoryId = document.GetValue("advisoryId", string.Empty).AsString;
var title = document.GetValue("title", advisoryId).AsString;
var summary = document.TryGetValue("summary", out var summaryValue) ? summaryValue.AsString : null;
var published = document.TryGetValue("published", out var publishedValue)
? publishedValue.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(publishedValue.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(publishedValue.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => DateTimeOffset.UtcNow
}
: DateTimeOffset.UtcNow;
var cves = document.TryGetValue("cves", out var cveArray) && cveArray is BsonArray bsonCves
? bsonCves.OfType<BsonValue>()
.Select(static value => value?.ToString())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value!)
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray()
: Array.Empty<string>();
var packageList = new List<SusePackageStateDto>();
if (document.TryGetValue("packages", out var packageArray) && packageArray is BsonArray bsonPackages)
{
foreach (var element in bsonPackages.OfType<BsonDocument>())
{
var package = element.GetValue("package", string.Empty).AsString;
var platform = element.GetValue("platform", string.Empty).AsString;
var canonical = element.GetValue("canonical", string.Empty).AsString;
var status = element.GetValue("status", "unknown").AsString;
var architecture = element.TryGetValue("arch", out var archValue) ? archValue.AsString : null;
var introduced = element.TryGetValue("introduced", out var introducedValue) ? introducedValue.AsString : null;
var fixedVersion = element.TryGetValue("fixed", out var fixedValue) ? fixedValue.AsString : null;
var last = element.TryGetValue("last", out var lastValue) ? lastValue.AsString : null;
packageList.Add(new SusePackageStateDto(
package,
platform,
architecture,
canonical,
introduced,
fixedVersion,
last,
status));
}
}
var referenceList = new List<SuseReferenceDto>();
if (document.TryGetValue("references", out var referenceArray) && referenceArray is BsonArray bsonReferences)
{
foreach (var element in bsonReferences.OfType<BsonDocument>())
{
var url = element.GetValue("url", string.Empty).AsString;
if (string.IsNullOrWhiteSpace(url))
{
continue;
}
referenceList.Add(new SuseReferenceDto(
url,
element.TryGetValue("kind", out var kindValue) ? kindValue.AsString : null,
element.TryGetValue("title", out var titleValue) ? titleValue.AsString : null));
}
}
return new SuseAdvisoryDto(
advisoryId,
string.IsNullOrWhiteSpace(title) ? advisoryId : title,
string.IsNullOrWhiteSpace(summary) ? null : summary,
published,
cves,
packageList,
referenceList);
}
}