/** * Aggregation helper that surfaces advisory_raw duplicate candidates prior to enabling the * idempotency unique index. Intended for staging/offline snapshots. * * Usage: * mongo concelier ops/devops/scripts/check-advisory-raw-duplicates.js * * Environment variables: * LIMIT - optional cap on number of duplicate groups to print (default 50). */ (function () { function toInt(value, fallback) { var parsed = parseInt(value, 10); return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback; } var limit = typeof LIMIT !== "undefined" ? toInt(LIMIT, 50) : 50; var database = db.getName ? db.getSiblingDB(db.getName()) : db; if (!database) { throw new Error("Unable to resolve database handle"); } print(""); print("== advisory_raw duplicate audit =="); print("Database: " + database.getName()); print("Limit : " + limit); print(""); var pipeline = [ { $group: { _id: { vendor: "$source.vendor", upstreamId: "$upstream.upstream_id", contentHash: "$upstream.content_hash", tenant: "$tenant" }, ids: { $addToSet: "$_id" }, count: { $sum: 1 } } }, { $match: { count: { $gt: 1 } } }, { $project: { _id: 0, vendor: "$_id.vendor", upstreamId: "$_id.upstreamId", contentHash: "$_id.contentHash", tenant: "$_id.tenant", count: 1, ids: 1 } }, { $sort: { count: -1, vendor: 1, upstreamId: 1 } }, { $limit: limit } ]; var cursor = database.getCollection("advisory_raw").aggregate(pipeline, { allowDiskUse: true }); var any = false; while (cursor.hasNext()) { var doc = cursor.next(); any = true; print("---"); print("vendor : " + doc.vendor); print("upstream_id : " + doc.upstreamId); print("tenant : " + doc.tenant); print("content_hash: " + doc.contentHash); print("count : " + doc.count); print("ids : " + doc.ids.join(", ")); } if (!any) { print("No duplicate advisory_raw documents detected."); } print(""); })();