Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
316 lines
12 KiB
C#
316 lines
12 KiB
C#
using System.Text.Json;
|
|
using MongoDB.Bson;
|
|
using MongoDB.Bson.Serialization;
|
|
using MongoDB.Driver;
|
|
using Npgsql;
|
|
using Scheduler.Backfill;
|
|
using StellaOps.Scheduler.Models;
|
|
using StellaOps.Scheduler.Storage.Mongo.Options;
|
|
|
|
var parsed = ParseArgs(args);
|
|
var options = BackfillOptions.From(parsed.MongoConnection, parsed.MongoDatabase, parsed.PostgresConnection, parsed.BatchSize, parsed.DryRun);
|
|
|
|
var runner = new BackfillRunner(options);
|
|
await runner.RunAsync();
|
|
return 0;
|
|
|
|
static BackfillCliOptions ParseArgs(string[] args)
|
|
{
|
|
string? mongo = null;
|
|
string? mongoDb = null;
|
|
string? pg = null;
|
|
int batch = 500;
|
|
bool dryRun = false;
|
|
|
|
for (var i = 0; i < args.Length; i++)
|
|
{
|
|
switch (args[i])
|
|
{
|
|
case "--mongo" or "-m":
|
|
mongo = NextValue(args, ref i);
|
|
break;
|
|
case "--mongo-db":
|
|
mongoDb = NextValue(args, ref i);
|
|
break;
|
|
case "--pg" or "-p":
|
|
pg = NextValue(args, ref i);
|
|
break;
|
|
case "--batch":
|
|
batch = int.TryParse(NextValue(args, ref i), out var b) ? b : 500;
|
|
break;
|
|
case "--dry-run":
|
|
dryRun = true;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return new BackfillCliOptions(mongo, mongoDb, pg, batch, dryRun);
|
|
}
|
|
|
|
static string NextValue(string[] args, ref int index)
|
|
{
|
|
if (index + 1 >= args.Length)
|
|
{
|
|
return string.Empty;
|
|
}
|
|
index++;
|
|
return args[index];
|
|
}
|
|
|
|
internal sealed record BackfillCliOptions(
|
|
string? MongoConnection,
|
|
string? MongoDatabase,
|
|
string? PostgresConnection,
|
|
int BatchSize,
|
|
bool DryRun);
|
|
|
|
internal sealed record BackfillOptions(
|
|
string MongoConnectionString,
|
|
string MongoDatabase,
|
|
string PostgresConnectionString,
|
|
int BatchSize,
|
|
bool DryRun)
|
|
{
|
|
public static BackfillOptions From(string? mongoConn, string? mongoDb, string pgConn, int batchSize, bool dryRun)
|
|
{
|
|
var mongoOptions = new SchedulerMongoOptions();
|
|
var conn = string.IsNullOrWhiteSpace(mongoConn)
|
|
? Environment.GetEnvironmentVariable("MONGO_CONNECTION_STRING") ?? mongoOptions.ConnectionString
|
|
: mongoConn;
|
|
|
|
var database = string.IsNullOrWhiteSpace(mongoDb)
|
|
? Environment.GetEnvironmentVariable("MONGO_DATABASE") ?? mongoOptions.Database
|
|
: mongoDb!;
|
|
|
|
var pg = string.IsNullOrWhiteSpace(pgConn)
|
|
? throw new ArgumentException("PostgreSQL connection string is required (--pg or POSTGRES_CONNECTION_STRING)")
|
|
: pgConn;
|
|
|
|
if (string.IsNullOrWhiteSpace(pg) && Environment.GetEnvironmentVariable("POSTGRES_CONNECTION_STRING") is { } envPg)
|
|
{
|
|
pg = envPg;
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(pg))
|
|
{
|
|
throw new ArgumentException("PostgreSQL connection string is required.");
|
|
}
|
|
|
|
return new BackfillOptions(conn, database, pg, Math.Max(50, batchSize), dryRun);
|
|
}
|
|
}
|
|
|
|
internal sealed class BackfillRunner
|
|
{
|
|
private readonly BackfillOptions _options;
|
|
private readonly IMongoDatabase _mongo;
|
|
private readonly NpgsqlDataSource _pg;
|
|
|
|
public BackfillRunner(BackfillOptions options)
|
|
{
|
|
_options = options;
|
|
_mongo = new MongoClient(options.MongoConnectionString).GetDatabase(options.MongoDatabase);
|
|
_pg = NpgsqlDataSource.Create(options.PostgresConnectionString);
|
|
}
|
|
|
|
public async Task RunAsync()
|
|
{
|
|
Console.WriteLine($"Mongo -> Postgres backfill starting (dry-run={_options.DryRun})");
|
|
await BackfillSchedulesAsync();
|
|
await BackfillRunsAsync();
|
|
Console.WriteLine("Backfill complete.");
|
|
}
|
|
|
|
private async Task BackfillSchedulesAsync()
|
|
{
|
|
var collection = _mongo.GetCollection<BsonDocument>(new SchedulerMongoOptions().SchedulesCollection);
|
|
using var cursor = await collection.Find(FilterDefinition<BsonDocument>.Empty).ToCursorAsync();
|
|
|
|
var batch = new List<Schedule>(_options.BatchSize);
|
|
long total = 0;
|
|
|
|
while (await cursor.MoveNextAsync())
|
|
{
|
|
foreach (var doc in cursor.Current)
|
|
{
|
|
var schedule = BsonSerializer.Deserialize<Schedule>(doc);
|
|
batch.Add(schedule);
|
|
if (batch.Count >= _options.BatchSize)
|
|
{
|
|
total += await PersistSchedulesAsync(batch);
|
|
batch.Clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
if (batch.Count > 0)
|
|
{
|
|
total += await PersistSchedulesAsync(batch);
|
|
}
|
|
|
|
Console.WriteLine($"Schedules backfilled: {total}");
|
|
}
|
|
|
|
private async Task<long> PersistSchedulesAsync(IEnumerable<Schedule> schedules)
|
|
{
|
|
if (_options.DryRun)
|
|
{
|
|
return schedules.LongCount();
|
|
}
|
|
|
|
await using var conn = await _pg.OpenConnectionAsync();
|
|
await using var tx = await conn.BeginTransactionAsync();
|
|
|
|
const string sql = @"
|
|
INSERT INTO scheduler.schedules (
|
|
id, tenant_id, name, description, enabled, cron_expression, timezone, mode,
|
|
selection, only_if, notify, limits, subscribers, created_at, created_by, updated_at, updated_by, deleted_at, deleted_by)
|
|
VALUES (
|
|
@id, @tenant_id, @name, @description, @enabled, @cron_expression, @timezone, @mode,
|
|
@selection, @only_if, @notify, @limits, @subscribers, @created_at, @created_by, @updated_at, @updated_by, @deleted_at, @deleted_by)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
tenant_id = EXCLUDED.tenant_id,
|
|
name = EXCLUDED.name,
|
|
description = EXCLUDED.description,
|
|
enabled = EXCLUDED.enabled,
|
|
cron_expression = EXCLUDED.cron_expression,
|
|
timezone = EXCLUDED.timezone,
|
|
mode = EXCLUDED.mode,
|
|
selection = EXCLUDED.selection,
|
|
only_if = EXCLUDED.only_if,
|
|
notify = EXCLUDED.notify,
|
|
limits = EXCLUDED.limits,
|
|
subscribers = EXCLUDED.subscribers,
|
|
created_at = LEAST(scheduler.schedules.created_at, EXCLUDED.created_at),
|
|
created_by = EXCLUDED.created_by,
|
|
updated_at = EXCLUDED.updated_at,
|
|
updated_by = EXCLUDED.updated_by,
|
|
deleted_at = EXCLUDED.deleted_at,
|
|
deleted_by = EXCLUDED.deleted_by;";
|
|
|
|
var affected = 0;
|
|
foreach (var schedule in schedules)
|
|
{
|
|
await using var cmd = new NpgsqlCommand(sql, conn, tx);
|
|
cmd.Parameters.AddWithValue("id", schedule.Id);
|
|
cmd.Parameters.AddWithValue("tenant_id", schedule.TenantId);
|
|
cmd.Parameters.AddWithValue("name", schedule.Name);
|
|
cmd.Parameters.AddWithValue("description", DBNull.Value);
|
|
cmd.Parameters.AddWithValue("enabled", schedule.Enabled);
|
|
cmd.Parameters.AddWithValue("cron_expression", schedule.CronExpression);
|
|
cmd.Parameters.AddWithValue("timezone", schedule.Timezone);
|
|
cmd.Parameters.AddWithValue("mode", BackfillMappings.ToScheduleMode(schedule.Mode));
|
|
cmd.Parameters.AddWithValue("selection", CanonicalJsonSerializer.Serialize(schedule.Selection));
|
|
cmd.Parameters.AddWithValue("only_if", CanonicalJsonSerializer.Serialize(schedule.OnlyIf));
|
|
cmd.Parameters.AddWithValue("notify", CanonicalJsonSerializer.Serialize(schedule.Notify));
|
|
cmd.Parameters.AddWithValue("limits", CanonicalJsonSerializer.Serialize(schedule.Limits));
|
|
cmd.Parameters.AddWithValue("subscribers", schedule.Subscribers.ToArray());
|
|
cmd.Parameters.AddWithValue("created_at", schedule.CreatedAt.UtcDateTime);
|
|
cmd.Parameters.AddWithValue("created_by", schedule.CreatedBy);
|
|
cmd.Parameters.AddWithValue("updated_at", schedule.UpdatedAt.UtcDateTime);
|
|
cmd.Parameters.AddWithValue("updated_by", schedule.UpdatedBy);
|
|
cmd.Parameters.AddWithValue("deleted_at", DBNull.Value);
|
|
cmd.Parameters.AddWithValue("deleted_by", DBNull.Value);
|
|
|
|
affected += await cmd.ExecuteNonQueryAsync();
|
|
}
|
|
|
|
await tx.CommitAsync();
|
|
return affected;
|
|
}
|
|
|
|
private async Task BackfillRunsAsync()
|
|
{
|
|
var collection = _mongo.GetCollection<BsonDocument>(new SchedulerMongoOptions().RunsCollection);
|
|
using var cursor = await collection.Find(FilterDefinition<BsonDocument>.Empty).ToCursorAsync();
|
|
|
|
var batch = new List<Run>(_options.BatchSize);
|
|
long total = 0;
|
|
|
|
while (await cursor.MoveNextAsync())
|
|
{
|
|
foreach (var doc in cursor.Current)
|
|
{
|
|
var run = BsonSerializer.Deserialize<Run>(doc);
|
|
batch.Add(run);
|
|
if (batch.Count >= _options.BatchSize)
|
|
{
|
|
total += await PersistRunsAsync(batch);
|
|
batch.Clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
if (batch.Count > 0)
|
|
{
|
|
total += await PersistRunsAsync(batch);
|
|
}
|
|
|
|
Console.WriteLine($"Runs backfilled: {total}");
|
|
}
|
|
|
|
private async Task<long> PersistRunsAsync(IEnumerable<Run> runs)
|
|
{
|
|
if (_options.DryRun)
|
|
{
|
|
return runs.LongCount();
|
|
}
|
|
|
|
await using var conn = await _pg.OpenConnectionAsync();
|
|
await using var tx = await conn.BeginTransactionAsync();
|
|
|
|
const string sql = @"
|
|
INSERT INTO scheduler.runs (
|
|
id, tenant_id, schedule_id, state, trigger, stats, deltas, reason, retry_of,
|
|
created_at, started_at, finished_at, error, created_by, updated_at, metadata)
|
|
VALUES (
|
|
@id, @tenant_id, @schedule_id, @state, @trigger, @stats, @deltas, @reason, @retry_of,
|
|
@created_at, @started_at, @finished_at, @error, @created_by, @updated_at, @metadata)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
tenant_id = EXCLUDED.tenant_id,
|
|
schedule_id = EXCLUDED.schedule_id,
|
|
state = EXCLUDED.state,
|
|
trigger = EXCLUDED.trigger,
|
|
stats = EXCLUDED.stats,
|
|
deltas = EXCLUDED.deltas,
|
|
reason = EXCLUDED.reason,
|
|
retry_of = EXCLUDED.retry_of,
|
|
created_at = LEAST(scheduler.runs.created_at, EXCLUDED.created_at),
|
|
started_at = EXCLUDED.started_at,
|
|
finished_at = EXCLUDED.finished_at,
|
|
error = EXCLUDED.error,
|
|
created_by = COALESCE(EXCLUDED.created_by, scheduler.runs.created_by),
|
|
updated_at = EXCLUDED.updated_at,
|
|
metadata = EXCLUDED.metadata;";
|
|
|
|
var affected = 0;
|
|
foreach (var run in runs)
|
|
{
|
|
await using var cmd = new NpgsqlCommand(sql, conn, tx);
|
|
cmd.Parameters.AddWithValue("id", run.Id);
|
|
cmd.Parameters.AddWithValue("tenant_id", run.TenantId);
|
|
cmd.Parameters.AddWithValue("schedule_id", (object?)run.ScheduleId ?? DBNull.Value);
|
|
cmd.Parameters.AddWithValue("state", BackfillMappings.ToRunState(run.State));
|
|
cmd.Parameters.AddWithValue("trigger", BackfillMappings.ToRunTrigger(run.Trigger));
|
|
cmd.Parameters.AddWithValue("stats", CanonicalJsonSerializer.Serialize(run.Stats));
|
|
cmd.Parameters.AddWithValue("deltas", CanonicalJsonSerializer.Serialize(run.Deltas));
|
|
cmd.Parameters.AddWithValue("reason", CanonicalJsonSerializer.Serialize(run.Reason));
|
|
cmd.Parameters.AddWithValue("retry_of", (object?)run.RetryOf ?? DBNull.Value);
|
|
cmd.Parameters.AddWithValue("created_at", run.CreatedAt.UtcDateTime);
|
|
cmd.Parameters.AddWithValue("started_at", (object?)run.StartedAt?.UtcDateTime ?? DBNull.Value);
|
|
cmd.Parameters.AddWithValue("finished_at", (object?)run.FinishedAt?.UtcDateTime ?? DBNull.Value);
|
|
cmd.Parameters.AddWithValue("error", (object?)run.Error ?? DBNull.Value);
|
|
cmd.Parameters.AddWithValue("created_by", (object?)run.Reason?.ManualReason ?? "system");
|
|
cmd.Parameters.AddWithValue("updated_at", DateTime.UtcNow);
|
|
cmd.Parameters.AddWithValue("metadata", JsonSerializer.Serialize(new { schema = run.SchemaVersion }));
|
|
|
|
affected += await cmd.ExecuteNonQueryAsync();
|
|
}
|
|
|
|
await tx.CommitAsync();
|
|
return affected;
|
|
}
|
|
}
|