Implement Advisory Canonicalization and Backfill Migration
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Added AdvisoryCanonicalizer for canonicalizing advisory identifiers. - Created EnsureAdvisoryCanonicalKeyBackfillMigration to populate advisory_key and links in advisory_raw documents. - Introduced FileSurfaceManifestStore for managing surface manifests with file system backing. - Developed ISurfaceManifestReader and ISurfaceManifestWriter interfaces for reading and writing manifests. - Implemented SurfaceManifestPathBuilder for constructing paths and URIs for surface manifests. - Added tests for FileSurfaceManifestStore to ensure correct functionality and deterministic behavior. - Updated documentation for new features and migration steps.
This commit is contained in:
@@ -0,0 +1,166 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using MongoDB.Bson;
|
||||
using MongoDB.Driver;
|
||||
using StellaOps.Concelier.Core.Raw;
|
||||
using StellaOps.Concelier.RawModels;
|
||||
|
||||
namespace StellaOps.Concelier.Storage.Mongo.Migrations;
|
||||
|
||||
public sealed class EnsureAdvisoryCanonicalKeyBackfillMigration : IMongoMigration
|
||||
{
|
||||
public string Id => "2025-11-07-advisory-canonical-key";
|
||||
|
||||
public string Description => "Populate advisory_key and links for advisory_raw documents.";
|
||||
|
||||
public async Task ApplyAsync(IMongoDatabase database, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(database);
|
||||
|
||||
var collection = database.GetCollection<BsonDocument>(MongoStorageDefaults.Collections.AdvisoryRaw);
|
||||
var filter = Builders<BsonDocument>.Filter.Or(
|
||||
Builders<BsonDocument>.Filter.Exists("advisory_key", false),
|
||||
Builders<BsonDocument>.Filter.Type("advisory_key", BsonType.Null),
|
||||
Builders<BsonDocument>.Filter.Eq("advisory_key", string.Empty),
|
||||
Builders<BsonDocument>.Filter.Or(
|
||||
Builders<BsonDocument>.Filter.Exists("links", false),
|
||||
Builders<BsonDocument>.Filter.Type("links", BsonType.Null)));
|
||||
|
||||
using var cursor = await collection.Find(filter).ToCursorAsync(cancellationToken).ConfigureAwait(false);
|
||||
while (await cursor.MoveNextAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
foreach (var document in cursor.Current)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (!document.TryGetValue("_id", out var idValue) || idValue.IsBsonNull)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var source = ParseSource(document.GetValue("source", new BsonDocument()).AsBsonDocument);
|
||||
var upstream = ParseUpstream(document.GetValue("upstream", new BsonDocument()).AsBsonDocument);
|
||||
var identifiers = ParseIdentifiers(document.GetValue("identifiers", new BsonDocument()).AsBsonDocument);
|
||||
|
||||
var canonical = AdvisoryCanonicalizer.Canonicalize(identifiers, source, upstream);
|
||||
var linksArray = new BsonArray((canonical.Links.IsDefaultOrEmpty ? ImmutableArray<RawLink>.Empty : canonical.Links)
|
||||
.Select(link => new BsonDocument
|
||||
{
|
||||
{ "scheme", link.Scheme },
|
||||
{ "value", link.Value }
|
||||
}));
|
||||
|
||||
var update = Builders<BsonDocument>.Update
|
||||
.Set("advisory_key", canonical.AdvisoryKey)
|
||||
.Set("links", linksArray);
|
||||
|
||||
await collection.UpdateOneAsync(
|
||||
Builders<BsonDocument>.Filter.Eq("_id", idValue),
|
||||
update,
|
||||
cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static RawSourceMetadata ParseSource(BsonDocument source)
|
||||
{
|
||||
return new RawSourceMetadata(
|
||||
GetRequiredString(source, "vendor"),
|
||||
GetOptionalString(source, "connector") ?? string.Empty,
|
||||
GetOptionalString(source, "version") ?? "unknown",
|
||||
GetOptionalString(source, "stream"));
|
||||
}
|
||||
|
||||
private static RawUpstreamMetadata ParseUpstream(BsonDocument upstream)
|
||||
{
|
||||
var provenance = ImmutableDictionary.CreateBuilder<string, string>(StringComparer.Ordinal);
|
||||
if (upstream.TryGetValue("provenance", out var provenanceValue) && provenanceValue.IsBsonDocument)
|
||||
{
|
||||
foreach (var element in provenanceValue.AsBsonDocument)
|
||||
{
|
||||
provenance[element.Name] = BsonValueToString(element.Value);
|
||||
}
|
||||
}
|
||||
|
||||
var signature = upstream.TryGetValue("signature", out var signatureValue) && signatureValue.IsBsonDocument
|
||||
? signatureValue.AsBsonDocument
|
||||
: new BsonDocument();
|
||||
|
||||
var signatureMetadata = new RawSignatureMetadata(
|
||||
signature.GetValue("present", BsonBoolean.False).AsBoolean,
|
||||
signature.TryGetValue("format", out var format) && !format.IsBsonNull ? format.AsString : null,
|
||||
signature.TryGetValue("key_id", out var keyId) && !keyId.IsBsonNull ? keyId.AsString : null,
|
||||
signature.TryGetValue("sig", out var sig) && !sig.IsBsonNull ? sig.AsString : null,
|
||||
signature.TryGetValue("certificate", out var certificate) && !certificate.IsBsonNull ? certificate.AsString : null,
|
||||
signature.TryGetValue("digest", out var digest) && !digest.IsBsonNull ? digest.AsString : null);
|
||||
|
||||
return new RawUpstreamMetadata(
|
||||
GetRequiredString(upstream, "upstream_id"),
|
||||
upstream.TryGetValue("document_version", out var version) && !version.IsBsonNull ? version.AsString : null,
|
||||
GetDateTimeOffset(upstream, "retrieved_at", DateTimeOffset.UtcNow),
|
||||
GetRequiredString(upstream, "content_hash"),
|
||||
signatureMetadata,
|
||||
provenance.ToImmutable());
|
||||
}
|
||||
|
||||
private static RawIdentifiers ParseIdentifiers(BsonDocument identifiers)
|
||||
{
|
||||
var aliases = identifiers.TryGetValue("aliases", out var aliasesValue) && aliasesValue.IsBsonArray
|
||||
? aliasesValue.AsBsonArray.Select(BsonValueToString).ToImmutableArray()
|
||||
: ImmutableArray<string>.Empty;
|
||||
|
||||
return new RawIdentifiers(
|
||||
aliases,
|
||||
GetRequiredString(identifiers, "primary"));
|
||||
}
|
||||
|
||||
private static string GetRequiredString(BsonDocument document, string name)
|
||||
{
|
||||
if (!document.TryGetValue(name, out var value) || value.IsBsonNull)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return value.IsString ? value.AsString : value.ToString();
|
||||
}
|
||||
|
||||
private static string? GetOptionalString(BsonDocument document, string name)
|
||||
{
|
||||
if (!document.TryGetValue(name, out var value) || value.IsBsonNull)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return value.IsString ? value.AsString : value.ToString();
|
||||
}
|
||||
|
||||
private static string BsonValueToString(BsonValue value)
|
||||
{
|
||||
return value switch
|
||||
{
|
||||
null => string.Empty,
|
||||
BsonString s => s.AsString,
|
||||
BsonBoolean b => b.AsBoolean.ToString(),
|
||||
BsonDateTime dateTime => dateTime.ToUniversalTime().ToString("O"),
|
||||
BsonInt32 i => i.AsInt32.ToString(CultureInfo.InvariantCulture),
|
||||
BsonInt64 l => l.AsInt64.ToString(CultureInfo.InvariantCulture),
|
||||
BsonDouble d => d.AsDouble.ToString(CultureInfo.InvariantCulture),
|
||||
_ => value.ToString()
|
||||
};
|
||||
}
|
||||
|
||||
private static DateTimeOffset GetDateTimeOffset(BsonDocument document, string name, DateTimeOffset defaultValue)
|
||||
{
|
||||
if (!document.TryGetValue(name, out var value) || value.IsBsonNull)
|
||||
{
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
return value.ToUniversalTime();
|
||||
}
|
||||
}
|
||||
@@ -223,12 +223,69 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
|
||||
? EncodeCursor(records[^1].IngestedAt.UtcDateTime, records[^1].Id)
|
||||
: null;
|
||||
|
||||
return new AdvisoryRawQueryResult(records, nextCursor, hasMore);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<AdvisoryRawRecord>> ListForVerificationAsync(
|
||||
string tenant,
|
||||
DateTimeOffset since,
|
||||
return new AdvisoryRawQueryResult(records, nextCursor, hasMore);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<AdvisoryRawRecord>> FindByAdvisoryKeyAsync(
|
||||
string tenant,
|
||||
IReadOnlyCollection<string> searchValues,
|
||||
IReadOnlyCollection<string> sourceVendors,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenant);
|
||||
if (searchValues is null || searchValues.Count == 0)
|
||||
{
|
||||
return Array.Empty<AdvisoryRawRecord>();
|
||||
}
|
||||
|
||||
var normalizedValues = searchValues
|
||||
.Where(static value => !string.IsNullOrWhiteSpace(value))
|
||||
.Select(static value => value.Trim())
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
if (normalizedValues.Length == 0)
|
||||
{
|
||||
return Array.Empty<AdvisoryRawRecord>();
|
||||
}
|
||||
|
||||
var filter = Builders<BsonDocument>.Filter.Eq("tenant", tenant)
|
||||
& Builders<BsonDocument>.Filter.Or(
|
||||
Builders<BsonDocument>.Filter.In("advisory_key", normalizedValues),
|
||||
Builders<BsonDocument>.Filter.ElemMatch(
|
||||
"links",
|
||||
Builders<BsonDocument>.Filter.In("value", normalizedValues)));
|
||||
|
||||
if (sourceVendors is { Count: > 0 })
|
||||
{
|
||||
var vendorValues = sourceVendors
|
||||
.Where(static vendor => !string.IsNullOrWhiteSpace(vendor))
|
||||
.Select(static vendor => vendor.Trim().ToLowerInvariant())
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
if (vendorValues.Length > 0)
|
||||
{
|
||||
filter &= Builders<BsonDocument>.Filter.In("source.vendor", vendorValues);
|
||||
}
|
||||
}
|
||||
|
||||
var sort = Builders<BsonDocument>.Sort
|
||||
.Descending("created_at")
|
||||
.Descending("_id");
|
||||
|
||||
var documents = await _collection
|
||||
.Find(filter)
|
||||
.Sort(sort)
|
||||
.ToListAsync(cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return documents.Select(MapToRecord).ToArray();
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<AdvisoryRawRecord>> ListForVerificationAsync(
|
||||
string tenant,
|
||||
DateTimeOffset since,
|
||||
DateTimeOffset until,
|
||||
IReadOnlyCollection<string> sourceVendors,
|
||||
CancellationToken cancellationToken)
|
||||
@@ -368,29 +425,39 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
|
||||
}
|
||||
}
|
||||
|
||||
var linkset = new BsonDocument
|
||||
{
|
||||
{ "aliases", new BsonArray(document.Linkset.Aliases) },
|
||||
{ "purls", new BsonArray(document.Linkset.PackageUrls) },
|
||||
{ "cpes", new BsonArray(document.Linkset.Cpes) },
|
||||
{ "references", references },
|
||||
{ "reconciled_from", new BsonArray(document.Linkset.ReconciledFrom) },
|
||||
{ "notes", notes }
|
||||
};
|
||||
|
||||
var bson = new BsonDocument
|
||||
{
|
||||
{ "_id", id },
|
||||
{ "tenant", document.Tenant },
|
||||
{ "source", source },
|
||||
{ "upstream", upstream },
|
||||
{ "content", content },
|
||||
{ "identifiers", identifiers },
|
||||
{ "linkset", linkset },
|
||||
{ "supersedes", supersedesValue is null ? BsonNull.Value : supersedesValue },
|
||||
{ "created_at", document.Upstream.RetrievedAt.UtcDateTime },
|
||||
{ "ingested_at", now }
|
||||
};
|
||||
var linkset = new BsonDocument
|
||||
{
|
||||
{ "aliases", new BsonArray(document.Linkset.Aliases) },
|
||||
{ "purls", new BsonArray(document.Linkset.PackageUrls) },
|
||||
{ "cpes", new BsonArray(document.Linkset.Cpes) },
|
||||
{ "references", references },
|
||||
{ "reconciled_from", new BsonArray(document.Linkset.ReconciledFrom) },
|
||||
{ "notes", notes }
|
||||
};
|
||||
|
||||
var linksArray = new BsonArray(
|
||||
(document.Links.IsDefaultOrEmpty ? ImmutableArray<RawLink>.Empty : document.Links)
|
||||
.Select(link => new BsonDocument
|
||||
{
|
||||
{ "scheme", link.Scheme },
|
||||
{ "value", link.Value }
|
||||
}));
|
||||
|
||||
var bson = new BsonDocument
|
||||
{
|
||||
{ "_id", id },
|
||||
{ "tenant", document.Tenant },
|
||||
{ "source", source },
|
||||
{ "upstream", upstream },
|
||||
{ "content", content },
|
||||
{ "identifiers", identifiers },
|
||||
{ "linkset", linkset },
|
||||
{ "advisory_key", document.AdvisoryKey },
|
||||
{ "links", linksArray },
|
||||
{ "supersedes", supersedesValue is null ? BsonNull.Value : supersedesValue },
|
||||
{ "created_at", document.Upstream.RetrievedAt.UtcDateTime },
|
||||
{ "ingested_at", now }
|
||||
};
|
||||
|
||||
return bson;
|
||||
}
|
||||
@@ -402,17 +469,53 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
|
||||
var upstream = MapUpstream(document["upstream"].AsBsonDocument);
|
||||
var content = MapContent(document["content"].AsBsonDocument);
|
||||
var identifiers = MapIdentifiers(document["identifiers"].AsBsonDocument);
|
||||
var linkset = MapLinkset(document["linkset"].AsBsonDocument);
|
||||
var supersedes = document.GetValue("supersedes", BsonNull.Value);
|
||||
|
||||
var rawDocument = new AdvisoryRawDocument(
|
||||
tenant,
|
||||
source,
|
||||
upstream,
|
||||
content,
|
||||
identifiers,
|
||||
linkset,
|
||||
supersedes.IsBsonNull ? null : supersedes.AsString);
|
||||
var linkset = MapLinkset(document["linkset"].AsBsonDocument);
|
||||
var supersedes = document.GetValue("supersedes", BsonNull.Value);
|
||||
|
||||
var advisoryKey = document.TryGetValue("advisory_key", out var advisoryKeyValue) && advisoryKeyValue.IsString
|
||||
? advisoryKeyValue.AsString
|
||||
: string.Empty;
|
||||
|
||||
var links = MapLinks(document);
|
||||
AdvisoryCanonicalizationResult? canonical = null;
|
||||
if (string.IsNullOrWhiteSpace(advisoryKey) || links.IsDefaultOrEmpty)
|
||||
{
|
||||
canonical = AdvisoryCanonicalizer.Canonicalize(identifiers, source, upstream);
|
||||
if (string.IsNullOrWhiteSpace(advisoryKey))
|
||||
{
|
||||
advisoryKey = canonical.AdvisoryKey;
|
||||
}
|
||||
|
||||
if (links.IsDefaultOrEmpty)
|
||||
{
|
||||
links = canonical.Links.IsDefault ? ImmutableArray<RawLink>.Empty : canonical.Links;
|
||||
}
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(advisoryKey))
|
||||
{
|
||||
canonical ??= AdvisoryCanonicalizer.Canonicalize(identifiers, source, upstream);
|
||||
advisoryKey = canonical.AdvisoryKey;
|
||||
}
|
||||
|
||||
var normalizedLinks = links.IsDefaultOrEmpty
|
||||
? (canonical?.Links ?? ImmutableArray<RawLink>.Empty)
|
||||
: links;
|
||||
if (normalizedLinks.IsDefault)
|
||||
{
|
||||
normalizedLinks = ImmutableArray<RawLink>.Empty;
|
||||
}
|
||||
|
||||
var rawDocument = new AdvisoryRawDocument(
|
||||
tenant,
|
||||
source,
|
||||
upstream,
|
||||
content,
|
||||
identifiers,
|
||||
linkset,
|
||||
advisoryKey,
|
||||
normalizedLinks,
|
||||
supersedes.IsBsonNull ? null : supersedes.AsString);
|
||||
|
||||
var ingestedAt = GetDateTimeOffset(document, "ingested_at", rawDocument.Upstream.RetrievedAt);
|
||||
var createdAt = GetDateTimeOffset(document, "created_at", rawDocument.Upstream.RetrievedAt);
|
||||
@@ -499,7 +602,7 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
|
||||
GetRequiredString(identifiers, "primary"));
|
||||
}
|
||||
|
||||
private static RawLinkset MapLinkset(BsonDocument linkset)
|
||||
private static RawLinkset MapLinkset(BsonDocument linkset)
|
||||
{
|
||||
var aliases = linkset.TryGetValue("aliases", out var aliasesValue) && aliasesValue.IsBsonArray
|
||||
? aliasesValue.AsBsonArray.Select(BsonValueToString).ToImmutableArray()
|
||||
@@ -549,7 +652,36 @@ internal sealed class MongoAdvisoryRawRepository : IAdvisoryRawRepository
|
||||
ReconciledFrom = reconciledFrom,
|
||||
Notes = notesBuilder.ToImmutable()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static ImmutableArray<RawLink> MapLinks(BsonDocument document)
|
||||
{
|
||||
if (!document.TryGetValue("links", out var linksValue) || !linksValue.IsBsonArray)
|
||||
{
|
||||
return ImmutableArray<RawLink>.Empty;
|
||||
}
|
||||
|
||||
var builder = ImmutableArray.CreateBuilder<RawLink>();
|
||||
foreach (var element in linksValue.AsBsonArray)
|
||||
{
|
||||
if (!element.IsBsonDocument)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var linkDoc = element.AsBsonDocument;
|
||||
var scheme = GetOptionalString(linkDoc, "scheme") ?? string.Empty;
|
||||
var value = GetOptionalString(linkDoc, "value") ?? string.Empty;
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
builder.Add(new RawLink(scheme, value));
|
||||
}
|
||||
|
||||
return builder.Count == 0 ? ImmutableArray<RawLink>.Empty : builder.ToImmutable();
|
||||
}
|
||||
|
||||
private static DateTimeOffset GetDateTimeOffset(BsonDocument document, string field, DateTimeOffset fallback)
|
||||
{
|
||||
|
||||
@@ -108,6 +108,7 @@ public static class ServiceCollectionExtensions
|
||||
services.AddSingleton<IMongoMigration, EnsureGridFsExpiryIndexesMigration>();
|
||||
services.AddSingleton<IMongoMigration, EnsureAdvisoryRawIdempotencyIndexMigration>();
|
||||
services.AddSingleton<IMongoMigration, EnsureAdvisorySupersedesBackfillMigration>();
|
||||
services.AddSingleton<IMongoMigration, EnsureAdvisoryCanonicalKeyBackfillMigration>();
|
||||
services.AddSingleton<IMongoMigration, EnsureAdvisoryRawValidatorMigration>();
|
||||
services.AddSingleton<IMongoMigration, EnsureAdvisoryObservationsRawLinksetMigration>();
|
||||
services.AddSingleton<IMongoMigration, EnsureAdvisoryEventCollectionsMigration>();
|
||||
|
||||
Reference in New Issue
Block a user