Implement Advisory Canonicalization and Backfill Migration
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled

- Added AdvisoryCanonicalizer for canonicalizing advisory identifiers.
- Created EnsureAdvisoryCanonicalKeyBackfillMigration to populate advisory_key and links in advisory_raw documents.
- Introduced FileSurfaceManifestStore for managing surface manifests with file system backing.
- Developed ISurfaceManifestReader and ISurfaceManifestWriter interfaces for reading and writing manifests.
- Implemented SurfaceManifestPathBuilder for constructing paths and URIs for surface manifests.
- Added tests for FileSurfaceManifestStore to ensure correct functionality and deterministic behavior.
- Updated documentation for new features and migration steps.
This commit is contained in:
master
2025-11-07 19:54:02 +02:00
parent a1ce3f74fa
commit 515975edc5
42 changed files with 1893 additions and 336 deletions

View File

@@ -0,0 +1,166 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using MongoDB.Bson;
using MongoDB.Driver;
using StellaOps.Concelier.Core.Raw;
using StellaOps.Concelier.RawModels;
namespace StellaOps.Concelier.Storage.Mongo.Migrations;
public sealed class EnsureAdvisoryCanonicalKeyBackfillMigration : IMongoMigration
{
public string Id => "2025-11-07-advisory-canonical-key";
public string Description => "Populate advisory_key and links for advisory_raw documents.";
public async Task ApplyAsync(IMongoDatabase database, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(database);
var collection = database.GetCollection<BsonDocument>(MongoStorageDefaults.Collections.AdvisoryRaw);
var filter = Builders<BsonDocument>.Filter.Or(
Builders<BsonDocument>.Filter.Exists("advisory_key", false),
Builders<BsonDocument>.Filter.Type("advisory_key", BsonType.Null),
Builders<BsonDocument>.Filter.Eq("advisory_key", string.Empty),
Builders<BsonDocument>.Filter.Or(
Builders<BsonDocument>.Filter.Exists("links", false),
Builders<BsonDocument>.Filter.Type("links", BsonType.Null)));
using var cursor = await collection.Find(filter).ToCursorAsync(cancellationToken).ConfigureAwait(false);
while (await cursor.MoveNextAsync(cancellationToken).ConfigureAwait(false))
{
foreach (var document in cursor.Current)
{
cancellationToken.ThrowIfCancellationRequested();
if (!document.TryGetValue("_id", out var idValue) || idValue.IsBsonNull)
{
continue;
}
var source = ParseSource(document.GetValue("source", new BsonDocument()).AsBsonDocument);
var upstream = ParseUpstream(document.GetValue("upstream", new BsonDocument()).AsBsonDocument);
var identifiers = ParseIdentifiers(document.GetValue("identifiers", new BsonDocument()).AsBsonDocument);
var canonical = AdvisoryCanonicalizer.Canonicalize(identifiers, source, upstream);
var linksArray = new BsonArray((canonical.Links.IsDefaultOrEmpty ? ImmutableArray<RawLink>.Empty : canonical.Links)
.Select(link => new BsonDocument
{
{ "scheme", link.Scheme },
{ "value", link.Value }
}));
var update = Builders<BsonDocument>.Update
.Set("advisory_key", canonical.AdvisoryKey)
.Set("links", linksArray);
await collection.UpdateOneAsync(
Builders<BsonDocument>.Filter.Eq("_id", idValue),
update,
cancellationToken: cancellationToken).ConfigureAwait(false);
}
}
}
private static RawSourceMetadata ParseSource(BsonDocument source)
{
return new RawSourceMetadata(
GetRequiredString(source, "vendor"),
GetOptionalString(source, "connector") ?? string.Empty,
GetOptionalString(source, "version") ?? "unknown",
GetOptionalString(source, "stream"));
}
private static RawUpstreamMetadata ParseUpstream(BsonDocument upstream)
{
var provenance = ImmutableDictionary.CreateBuilder<string, string>(StringComparer.Ordinal);
if (upstream.TryGetValue("provenance", out var provenanceValue) && provenanceValue.IsBsonDocument)
{
foreach (var element in provenanceValue.AsBsonDocument)
{
provenance[element.Name] = BsonValueToString(element.Value);
}
}
var signature = upstream.TryGetValue("signature", out var signatureValue) && signatureValue.IsBsonDocument
? signatureValue.AsBsonDocument
: new BsonDocument();
var signatureMetadata = new RawSignatureMetadata(
signature.GetValue("present", BsonBoolean.False).AsBoolean,
signature.TryGetValue("format", out var format) && !format.IsBsonNull ? format.AsString : null,
signature.TryGetValue("key_id", out var keyId) && !keyId.IsBsonNull ? keyId.AsString : null,
signature.TryGetValue("sig", out var sig) && !sig.IsBsonNull ? sig.AsString : null,
signature.TryGetValue("certificate", out var certificate) && !certificate.IsBsonNull ? certificate.AsString : null,
signature.TryGetValue("digest", out var digest) && !digest.IsBsonNull ? digest.AsString : null);
return new RawUpstreamMetadata(
GetRequiredString(upstream, "upstream_id"),
upstream.TryGetValue("document_version", out var version) && !version.IsBsonNull ? version.AsString : null,
GetDateTimeOffset(upstream, "retrieved_at", DateTimeOffset.UtcNow),
GetRequiredString(upstream, "content_hash"),
signatureMetadata,
provenance.ToImmutable());
}
private static RawIdentifiers ParseIdentifiers(BsonDocument identifiers)
{
var aliases = identifiers.TryGetValue("aliases", out var aliasesValue) && aliasesValue.IsBsonArray
? aliasesValue.AsBsonArray.Select(BsonValueToString).ToImmutableArray()
: ImmutableArray<string>.Empty;
return new RawIdentifiers(
aliases,
GetRequiredString(identifiers, "primary"));
}
private static string GetRequiredString(BsonDocument document, string name)
{
if (!document.TryGetValue(name, out var value) || value.IsBsonNull)
{
return string.Empty;
}
return value.IsString ? value.AsString : value.ToString();
}
private static string? GetOptionalString(BsonDocument document, string name)
{
if (!document.TryGetValue(name, out var value) || value.IsBsonNull)
{
return null;
}
return value.IsString ? value.AsString : value.ToString();
}
private static string BsonValueToString(BsonValue value)
{
return value switch
{
null => string.Empty,
BsonString s => s.AsString,
BsonBoolean b => b.AsBoolean.ToString(),
BsonDateTime dateTime => dateTime.ToUniversalTime().ToString("O"),
BsonInt32 i => i.AsInt32.ToString(CultureInfo.InvariantCulture),
BsonInt64 l => l.AsInt64.ToString(CultureInfo.InvariantCulture),
BsonDouble d => d.AsDouble.ToString(CultureInfo.InvariantCulture),
_ => value.ToString()
};
}
private static DateTimeOffset GetDateTimeOffset(BsonDocument document, string name, DateTimeOffset defaultValue)
{
if (!document.TryGetValue(name, out var value) || value.IsBsonNull)
{
return defaultValue;
}
return value.ToUniversalTime();
}
}

View File

@@ -223,12 +223,69 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
? EncodeCursor(records[^1].IngestedAt.UtcDateTime, records[^1].Id)
: null;
return new AdvisoryRawQueryResult(records, nextCursor, hasMore);
}
public async Task<IReadOnlyList<AdvisoryRawRecord>> ListForVerificationAsync(
string tenant,
DateTimeOffset since,
return new AdvisoryRawQueryResult(records, nextCursor, hasMore);
}
public async Task<IReadOnlyList<AdvisoryRawRecord>> FindByAdvisoryKeyAsync(
string tenant,
IReadOnlyCollection<string> searchValues,
IReadOnlyCollection<string> sourceVendors,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenant);
if (searchValues is null || searchValues.Count == 0)
{
return Array.Empty<AdvisoryRawRecord>();
}
var normalizedValues = searchValues
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value.Trim())
.Distinct(StringComparer.Ordinal)
.ToArray();
if (normalizedValues.Length == 0)
{
return Array.Empty<AdvisoryRawRecord>();
}
var filter = Builders<BsonDocument>.Filter.Eq("tenant", tenant)
& Builders<BsonDocument>.Filter.Or(
Builders<BsonDocument>.Filter.In("advisory_key", normalizedValues),
Builders<BsonDocument>.Filter.ElemMatch(
"links",
Builders<BsonDocument>.Filter.In("value", normalizedValues)));
if (sourceVendors is { Count: > 0 })
{
var vendorValues = sourceVendors
.Where(static vendor => !string.IsNullOrWhiteSpace(vendor))
.Select(static vendor => vendor.Trim().ToLowerInvariant())
.Distinct(StringComparer.Ordinal)
.ToArray();
if (vendorValues.Length > 0)
{
filter &= Builders<BsonDocument>.Filter.In("source.vendor", vendorValues);
}
}
var sort = Builders<BsonDocument>.Sort
.Descending("created_at")
.Descending("_id");
var documents = await _collection
.Find(filter)
.Sort(sort)
.ToListAsync(cancellationToken)
.ConfigureAwait(false);
return documents.Select(MapToRecord).ToArray();
}
public async Task<IReadOnlyList<AdvisoryRawRecord>> ListForVerificationAsync(
string tenant,
DateTimeOffset since,
DateTimeOffset until,
IReadOnlyCollection<string> sourceVendors,
CancellationToken cancellationToken)
@@ -368,29 +425,39 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
}
}
var linkset = new BsonDocument
{
{ "aliases", new BsonArray(document.Linkset.Aliases) },
{ "purls", new BsonArray(document.Linkset.PackageUrls) },
{ "cpes", new BsonArray(document.Linkset.Cpes) },
{ "references", references },
{ "reconciled_from", new BsonArray(document.Linkset.ReconciledFrom) },
{ "notes", notes }
};
var bson = new BsonDocument
{
{ "_id", id },
{ "tenant", document.Tenant },
{ "source", source },
{ "upstream", upstream },
{ "content", content },
{ "identifiers", identifiers },
{ "linkset", linkset },
{ "supersedes", supersedesValue is null ? BsonNull.Value : supersedesValue },
{ "created_at", document.Upstream.RetrievedAt.UtcDateTime },
{ "ingested_at", now }
};
var linkset = new BsonDocument
{
{ "aliases", new BsonArray(document.Linkset.Aliases) },
{ "purls", new BsonArray(document.Linkset.PackageUrls) },
{ "cpes", new BsonArray(document.Linkset.Cpes) },
{ "references", references },
{ "reconciled_from", new BsonArray(document.Linkset.ReconciledFrom) },
{ "notes", notes }
};
var linksArray = new BsonArray(
(document.Links.IsDefaultOrEmpty ? ImmutableArray<RawLink>.Empty : document.Links)
.Select(link => new BsonDocument
{
{ "scheme", link.Scheme },
{ "value", link.Value }
}));
var bson = new BsonDocument
{
{ "_id", id },
{ "tenant", document.Tenant },
{ "source", source },
{ "upstream", upstream },
{ "content", content },
{ "identifiers", identifiers },
{ "linkset", linkset },
{ "advisory_key", document.AdvisoryKey },
{ "links", linksArray },
{ "supersedes", supersedesValue is null ? BsonNull.Value : supersedesValue },
{ "created_at", document.Upstream.RetrievedAt.UtcDateTime },
{ "ingested_at", now }
};
return bson;
}
@@ -402,17 +469,53 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
var upstream = MapUpstream(document["upstream"].AsBsonDocument);
var content = MapContent(document["content"].AsBsonDocument);
var identifiers = MapIdentifiers(document["identifiers"].AsBsonDocument);
var linkset = MapLinkset(document["linkset"].AsBsonDocument);
var supersedes = document.GetValue("supersedes", BsonNull.Value);
var rawDocument = new AdvisoryRawDocument(
tenant,
source,
upstream,
content,
identifiers,
linkset,
supersedes.IsBsonNull ? null : supersedes.AsString);
var linkset = MapLinkset(document["linkset"].AsBsonDocument);
var supersedes = document.GetValue("supersedes", BsonNull.Value);
var advisoryKey = document.TryGetValue("advisory_key", out var advisoryKeyValue) && advisoryKeyValue.IsString
? advisoryKeyValue.AsString
: string.Empty;
var links = MapLinks(document);
AdvisoryCanonicalizationResult? canonical = null;
if (string.IsNullOrWhiteSpace(advisoryKey) || links.IsDefaultOrEmpty)
{
canonical = AdvisoryCanonicalizer.Canonicalize(identifiers, source, upstream);
if (string.IsNullOrWhiteSpace(advisoryKey))
{
advisoryKey = canonical.AdvisoryKey;
}
if (links.IsDefaultOrEmpty)
{
links = canonical.Links.IsDefault ? ImmutableArray<RawLink>.Empty : canonical.Links;
}
}
if (string.IsNullOrWhiteSpace(advisoryKey))
{
canonical ??= AdvisoryCanonicalizer.Canonicalize(identifiers, source, upstream);
advisoryKey = canonical.AdvisoryKey;
}
var normalizedLinks = links.IsDefaultOrEmpty
? (canonical?.Links ?? ImmutableArray<RawLink>.Empty)
: links;
if (normalizedLinks.IsDefault)
{
normalizedLinks = ImmutableArray<RawLink>.Empty;
}
var rawDocument = new AdvisoryRawDocument(
tenant,
source,
upstream,
content,
identifiers,
linkset,
advisoryKey,
normalizedLinks,
supersedes.IsBsonNull ? null : supersedes.AsString);
var ingestedAt = GetDateTimeOffset(document, "ingested_at", rawDocument.Upstream.RetrievedAt);
var createdAt = GetDateTimeOffset(document, "created_at", rawDocument.Upstream.RetrievedAt);
@@ -499,7 +602,7 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
GetRequiredString(identifiers, "primary"));
}
private static RawLinkset MapLinkset(BsonDocument linkset)
private static RawLinkset MapLinkset(BsonDocument linkset)
{
var aliases = linkset.TryGetValue("aliases", out var aliasesValue) && aliasesValue.IsBsonArray
? aliasesValue.AsBsonArray.Select(BsonValueToString).ToImmutableArray()
@@ -549,7 +652,36 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
ReconciledFrom = reconciledFrom,
Notes = notesBuilder.ToImmutable()
};
}
}
private static ImmutableArray<RawLink> MapLinks(BsonDocument document)
{
if (!document.TryGetValue("links", out var linksValue) || !linksValue.IsBsonArray)
{
return ImmutableArray<RawLink>.Empty;
}
var builder = ImmutableArray.CreateBuilder<RawLink>();
foreach (var element in linksValue.AsBsonArray)
{
if (!element.IsBsonDocument)
{
continue;
}
var linkDoc = element.AsBsonDocument;
var scheme = GetOptionalString(linkDoc, "scheme") ?? string.Empty;
var value = GetOptionalString(linkDoc, "value") ?? string.Empty;
if (string.IsNullOrWhiteSpace(value))
{
continue;
}
builder.Add(new RawLink(scheme, value));
}
return builder.Count == 0 ? ImmutableArray<RawLink>.Empty : builder.ToImmutable();
}
private static DateTimeOffset GetDateTimeOffset(BsonDocument document, string field, DateTimeOffset fallback)
{

View File

@@ -108,6 +108,7 @@ public static class ServiceCollectionExtensions
services.AddSingleton<IMongoMigration, EnsureGridFsExpiryIndexesMigration>();
services.AddSingleton<IMongoMigration, EnsureAdvisoryRawIdempotencyIndexMigration>();
services.AddSingleton<IMongoMigration, EnsureAdvisorySupersedesBackfillMigration>();
services.AddSingleton<IMongoMigration, EnsureAdvisoryCanonicalKeyBackfillMigration>();
services.AddSingleton<IMongoMigration, EnsureAdvisoryRawValidatorMigration>();
services.AddSingleton<IMongoMigration, EnsureAdvisoryObservationsRawLinksetMigration>();
services.AddSingleton<IMongoMigration, EnsureAdvisoryEventCollectionsMigration>();