78 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			78 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
/**
 | 
						|
 * Aggregation helper that surfaces advisory_raw duplicate candidates prior to enabling the
 | 
						|
 * idempotency unique index. Intended for staging/offline snapshots.
 | 
						|
 *
 | 
						|
 * Usage:
 | 
						|
 *   mongo concelier ops/devops/scripts/check-advisory-raw-duplicates.js
 | 
						|
 *
 | 
						|
 * Environment variables:
 | 
						|
 *   LIMIT - optional cap on number of duplicate groups to print (default 50).
 | 
						|
 */
 | 
						|
(function () {
 | 
						|
  function toInt(value, fallback) {
 | 
						|
    var parsed = parseInt(value, 10);
 | 
						|
    return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
 | 
						|
  }
 | 
						|
 | 
						|
  var limit = typeof LIMIT !== "undefined" ? toInt(LIMIT, 50) : 50;
 | 
						|
  var database = db.getName ? db.getSiblingDB(db.getName()) : db;
 | 
						|
  if (!database) {
 | 
						|
    throw new Error("Unable to resolve database handle");
 | 
						|
  }
 | 
						|
 | 
						|
  print("");
 | 
						|
  print("== advisory_raw duplicate audit ==");
 | 
						|
  print("Database: " + database.getName());
 | 
						|
  print("Limit   : " + limit);
 | 
						|
  print("");
 | 
						|
 | 
						|
  var pipeline = [
 | 
						|
    {
 | 
						|
      $group: {
 | 
						|
        _id: {
 | 
						|
          vendor: "$source.vendor",
 | 
						|
          upstreamId: "$upstream.upstream_id",
 | 
						|
          contentHash: "$upstream.content_hash",
 | 
						|
          tenant: "$tenant"
 | 
						|
        },
 | 
						|
        ids: { $addToSet: "$_id" },
 | 
						|
        count: { $sum: 1 }
 | 
						|
      }
 | 
						|
    },
 | 
						|
    { $match: { count: { $gt: 1 } } },
 | 
						|
    {
 | 
						|
      $project: {
 | 
						|
        _id: 0,
 | 
						|
        vendor: "$_id.vendor",
 | 
						|
        upstreamId: "$_id.upstreamId",
 | 
						|
        contentHash: "$_id.contentHash",
 | 
						|
        tenant: "$_id.tenant",
 | 
						|
        count: 1,
 | 
						|
        ids: 1
 | 
						|
      }
 | 
						|
    },
 | 
						|
    { $sort: { count: -1, vendor: 1, upstreamId: 1 } },
 | 
						|
    { $limit: limit }
 | 
						|
  ];
 | 
						|
 | 
						|
  var cursor = database.getCollection("advisory_raw").aggregate(pipeline, { allowDiskUse: true });
 | 
						|
  var any = false;
 | 
						|
  while (cursor.hasNext()) {
 | 
						|
    var doc = cursor.next();
 | 
						|
    any = true;
 | 
						|
    print("---");
 | 
						|
    print("vendor      : " + doc.vendor);
 | 
						|
    print("upstream_id : " + doc.upstreamId);
 | 
						|
    print("tenant      : " + doc.tenant);
 | 
						|
    print("content_hash: " + doc.contentHash);
 | 
						|
    print("count       : " + doc.count);
 | 
						|
    print("ids         : " + doc.ids.join(", "));
 | 
						|
  }
 | 
						|
 | 
						|
  if (!any) {
 | 
						|
    print("No duplicate advisory_raw documents detected.");
 | 
						|
  }
 | 
						|
 | 
						|
  print("");
 | 
						|
})();
 |