using System.Text.Json; using MongoDB.Bson; using MongoDB.Bson.Serialization; using MongoDB.Driver; using Npgsql; using Scheduler.Backfill; using StellaOps.Scheduler.Models; using StellaOps.Scheduler.Storage.Mongo.Options; var parsed = ParseArgs(args); var options = BackfillOptions.From(parsed.MongoConnection, parsed.MongoDatabase, parsed.PostgresConnection, parsed.BatchSize, parsed.DryRun); var runner = new BackfillRunner(options); await runner.RunAsync(); return 0; static BackfillCliOptions ParseArgs(string[] args) { string? mongo = null; string? mongoDb = null; string? pg = null; int batch = 500; bool dryRun = false; for (var i = 0; i < args.Length; i++) { switch (args[i]) { case "--mongo" or "-m": mongo = NextValue(args, ref i); break; case "--mongo-db": mongoDb = NextValue(args, ref i); break; case "--pg" or "-p": pg = NextValue(args, ref i); break; case "--batch": batch = int.TryParse(NextValue(args, ref i), out var b) ? b : 500; break; case "--dry-run": dryRun = true; break; default: break; } } return new BackfillCliOptions(mongo, mongoDb, pg, batch, dryRun); } static string NextValue(string[] args, ref int index) { if (index + 1 >= args.Length) { return string.Empty; } index++; return args[index]; } internal sealed record BackfillCliOptions( string? MongoConnection, string? MongoDatabase, string? PostgresConnection, int BatchSize, bool DryRun); internal sealed record BackfillOptions( string MongoConnectionString, string MongoDatabase, string PostgresConnectionString, int BatchSize, bool DryRun) { public static BackfillOptions From(string? mongoConn, string? mongoDb, string pgConn, int batchSize, bool dryRun) { var mongoOptions = new SchedulerMongoOptions(); var conn = string.IsNullOrWhiteSpace(mongoConn) ? Environment.GetEnvironmentVariable("MONGO_CONNECTION_STRING") ?? mongoOptions.ConnectionString : mongoConn; var database = string.IsNullOrWhiteSpace(mongoDb) ? Environment.GetEnvironmentVariable("MONGO_DATABASE") ?? mongoOptions.Database : mongoDb!; var pg = string.IsNullOrWhiteSpace(pgConn) ? throw new ArgumentException("PostgreSQL connection string is required (--pg or POSTGRES_CONNECTION_STRING)") : pgConn; if (string.IsNullOrWhiteSpace(pg) && Environment.GetEnvironmentVariable("POSTGRES_CONNECTION_STRING") is { } envPg) { pg = envPg; } if (string.IsNullOrWhiteSpace(pg)) { throw new ArgumentException("PostgreSQL connection string is required."); } return new BackfillOptions(conn, database, pg, Math.Max(50, batchSize), dryRun); } } internal sealed class BackfillRunner { private readonly BackfillOptions _options; private readonly IMongoDatabase _mongo; private readonly NpgsqlDataSource _pg; public BackfillRunner(BackfillOptions options) { _options = options; _mongo = new MongoClient(options.MongoConnectionString).GetDatabase(options.MongoDatabase); _pg = NpgsqlDataSource.Create(options.PostgresConnectionString); } public async Task RunAsync() { Console.WriteLine($"Mongo -> Postgres backfill starting (dry-run={_options.DryRun})"); await BackfillSchedulesAsync(); await BackfillRunsAsync(); Console.WriteLine("Backfill complete."); } private async Task BackfillSchedulesAsync() { var collection = _mongo.GetCollection(new SchedulerMongoOptions().SchedulesCollection); using var cursor = await collection.Find(FilterDefinition.Empty).ToCursorAsync(); var batch = new List(_options.BatchSize); long total = 0; while (await cursor.MoveNextAsync()) { foreach (var doc in cursor.Current) { var schedule = BsonSerializer.Deserialize(doc); batch.Add(schedule); if (batch.Count >= _options.BatchSize) { total += await PersistSchedulesAsync(batch); batch.Clear(); } } } if (batch.Count > 0) { total += await PersistSchedulesAsync(batch); } Console.WriteLine($"Schedules backfilled: {total}"); } private async Task PersistSchedulesAsync(IEnumerable schedules) { if (_options.DryRun) { return schedules.LongCount(); } await using var conn = await _pg.OpenConnectionAsync(); await using var tx = await conn.BeginTransactionAsync(); const string sql = @" INSERT INTO scheduler.schedules ( id, tenant_id, name, description, enabled, cron_expression, timezone, mode, selection, only_if, notify, limits, subscribers, created_at, created_by, updated_at, updated_by, deleted_at, deleted_by) VALUES ( @id, @tenant_id, @name, @description, @enabled, @cron_expression, @timezone, @mode, @selection, @only_if, @notify, @limits, @subscribers, @created_at, @created_by, @updated_at, @updated_by, @deleted_at, @deleted_by) ON CONFLICT (id) DO UPDATE SET tenant_id = EXCLUDED.tenant_id, name = EXCLUDED.name, description = EXCLUDED.description, enabled = EXCLUDED.enabled, cron_expression = EXCLUDED.cron_expression, timezone = EXCLUDED.timezone, mode = EXCLUDED.mode, selection = EXCLUDED.selection, only_if = EXCLUDED.only_if, notify = EXCLUDED.notify, limits = EXCLUDED.limits, subscribers = EXCLUDED.subscribers, created_at = LEAST(scheduler.schedules.created_at, EXCLUDED.created_at), created_by = EXCLUDED.created_by, updated_at = EXCLUDED.updated_at, updated_by = EXCLUDED.updated_by, deleted_at = EXCLUDED.deleted_at, deleted_by = EXCLUDED.deleted_by;"; var affected = 0; foreach (var schedule in schedules) { await using var cmd = new NpgsqlCommand(sql, conn, tx); cmd.Parameters.AddWithValue("id", schedule.Id); cmd.Parameters.AddWithValue("tenant_id", schedule.TenantId); cmd.Parameters.AddWithValue("name", schedule.Name); cmd.Parameters.AddWithValue("description", DBNull.Value); cmd.Parameters.AddWithValue("enabled", schedule.Enabled); cmd.Parameters.AddWithValue("cron_expression", schedule.CronExpression); cmd.Parameters.AddWithValue("timezone", schedule.Timezone); cmd.Parameters.AddWithValue("mode", BackfillMappings.ToScheduleMode(schedule.Mode)); cmd.Parameters.AddWithValue("selection", CanonicalJsonSerializer.Serialize(schedule.Selection)); cmd.Parameters.AddWithValue("only_if", CanonicalJsonSerializer.Serialize(schedule.OnlyIf)); cmd.Parameters.AddWithValue("notify", CanonicalJsonSerializer.Serialize(schedule.Notify)); cmd.Parameters.AddWithValue("limits", CanonicalJsonSerializer.Serialize(schedule.Limits)); cmd.Parameters.AddWithValue("subscribers", schedule.Subscribers.ToArray()); cmd.Parameters.AddWithValue("created_at", schedule.CreatedAt.UtcDateTime); cmd.Parameters.AddWithValue("created_by", schedule.CreatedBy); cmd.Parameters.AddWithValue("updated_at", schedule.UpdatedAt.UtcDateTime); cmd.Parameters.AddWithValue("updated_by", schedule.UpdatedBy); cmd.Parameters.AddWithValue("deleted_at", DBNull.Value); cmd.Parameters.AddWithValue("deleted_by", DBNull.Value); affected += await cmd.ExecuteNonQueryAsync(); } await tx.CommitAsync(); return affected; } private async Task BackfillRunsAsync() { var collection = _mongo.GetCollection(new SchedulerMongoOptions().RunsCollection); using var cursor = await collection.Find(FilterDefinition.Empty).ToCursorAsync(); var batch = new List(_options.BatchSize); long total = 0; while (await cursor.MoveNextAsync()) { foreach (var doc in cursor.Current) { var run = BsonSerializer.Deserialize(doc); batch.Add(run); if (batch.Count >= _options.BatchSize) { total += await PersistRunsAsync(batch); batch.Clear(); } } } if (batch.Count > 0) { total += await PersistRunsAsync(batch); } Console.WriteLine($"Runs backfilled: {total}"); } private async Task PersistRunsAsync(IEnumerable runs) { if (_options.DryRun) { return runs.LongCount(); } await using var conn = await _pg.OpenConnectionAsync(); await using var tx = await conn.BeginTransactionAsync(); const string sql = @" INSERT INTO scheduler.runs ( id, tenant_id, schedule_id, state, trigger, stats, deltas, reason, retry_of, created_at, started_at, finished_at, error, created_by, updated_at, metadata) VALUES ( @id, @tenant_id, @schedule_id, @state, @trigger, @stats, @deltas, @reason, @retry_of, @created_at, @started_at, @finished_at, @error, @created_by, @updated_at, @metadata) ON CONFLICT (id) DO UPDATE SET tenant_id = EXCLUDED.tenant_id, schedule_id = EXCLUDED.schedule_id, state = EXCLUDED.state, trigger = EXCLUDED.trigger, stats = EXCLUDED.stats, deltas = EXCLUDED.deltas, reason = EXCLUDED.reason, retry_of = EXCLUDED.retry_of, created_at = LEAST(scheduler.runs.created_at, EXCLUDED.created_at), started_at = EXCLUDED.started_at, finished_at = EXCLUDED.finished_at, error = EXCLUDED.error, created_by = COALESCE(EXCLUDED.created_by, scheduler.runs.created_by), updated_at = EXCLUDED.updated_at, metadata = EXCLUDED.metadata;"; var affected = 0; foreach (var run in runs) { await using var cmd = new NpgsqlCommand(sql, conn, tx); cmd.Parameters.AddWithValue("id", run.Id); cmd.Parameters.AddWithValue("tenant_id", run.TenantId); cmd.Parameters.AddWithValue("schedule_id", (object?)run.ScheduleId ?? DBNull.Value); cmd.Parameters.AddWithValue("state", BackfillMappings.ToRunState(run.State)); cmd.Parameters.AddWithValue("trigger", BackfillMappings.ToRunTrigger(run.Trigger)); cmd.Parameters.AddWithValue("stats", CanonicalJsonSerializer.Serialize(run.Stats)); cmd.Parameters.AddWithValue("deltas", CanonicalJsonSerializer.Serialize(run.Deltas)); cmd.Parameters.AddWithValue("reason", CanonicalJsonSerializer.Serialize(run.Reason)); cmd.Parameters.AddWithValue("retry_of", (object?)run.RetryOf ?? DBNull.Value); cmd.Parameters.AddWithValue("created_at", run.CreatedAt.UtcDateTime); cmd.Parameters.AddWithValue("started_at", (object?)run.StartedAt?.UtcDateTime ?? DBNull.Value); cmd.Parameters.AddWithValue("finished_at", (object?)run.FinishedAt?.UtcDateTime ?? DBNull.Value); cmd.Parameters.AddWithValue("error", (object?)run.Error ?? DBNull.Value); cmd.Parameters.AddWithValue("created_by", (object?)run.Reason?.ManualReason ?? "system"); cmd.Parameters.AddWithValue("updated_at", DateTime.UtcNow); cmd.Parameters.AddWithValue("metadata", JsonSerializer.Serialize(new { schema = run.SchemaVersion })); affected += await cmd.ExecuteNonQueryAsync(); } await tx.CommitAsync(); return affected; } }