Files
git.stella-ops.org/src/Scheduler/Tools/Scheduler.Backfill/Program.cs
StellaOps Bot 37cba83708
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
up
2025-12-03 00:10:19 +02:00

316 lines
12 KiB
C#

using System.Text.Json;
using MongoDB.Bson;
using MongoDB.Bson.Serialization;
using MongoDB.Driver;
using Npgsql;
using Scheduler.Backfill;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Storage.Mongo.Options;
var parsed = ParseArgs(args);
var options = BackfillOptions.From(parsed.MongoConnection, parsed.MongoDatabase, parsed.PostgresConnection, parsed.BatchSize, parsed.DryRun);
var runner = new BackfillRunner(options);
await runner.RunAsync();
return 0;
static BackfillCliOptions ParseArgs(string[] args)
{
string? mongo = null;
string? mongoDb = null;
string? pg = null;
int batch = 500;
bool dryRun = false;
for (var i = 0; i < args.Length; i++)
{
switch (args[i])
{
case "--mongo" or "-m":
mongo = NextValue(args, ref i);
break;
case "--mongo-db":
mongoDb = NextValue(args, ref i);
break;
case "--pg" or "-p":
pg = NextValue(args, ref i);
break;
case "--batch":
batch = int.TryParse(NextValue(args, ref i), out var b) ? b : 500;
break;
case "--dry-run":
dryRun = true;
break;
default:
break;
}
}
return new BackfillCliOptions(mongo, mongoDb, pg, batch, dryRun);
}
static string NextValue(string[] args, ref int index)
{
if (index + 1 >= args.Length)
{
return string.Empty;
}
index++;
return args[index];
}
internal sealed record BackfillCliOptions(
string? MongoConnection,
string? MongoDatabase,
string? PostgresConnection,
int BatchSize,
bool DryRun);
internal sealed record BackfillOptions(
string MongoConnectionString,
string MongoDatabase,
string PostgresConnectionString,
int BatchSize,
bool DryRun)
{
public static BackfillOptions From(string? mongoConn, string? mongoDb, string pgConn, int batchSize, bool dryRun)
{
var mongoOptions = new SchedulerMongoOptions();
var conn = string.IsNullOrWhiteSpace(mongoConn)
? Environment.GetEnvironmentVariable("MONGO_CONNECTION_STRING") ?? mongoOptions.ConnectionString
: mongoConn;
var database = string.IsNullOrWhiteSpace(mongoDb)
? Environment.GetEnvironmentVariable("MONGO_DATABASE") ?? mongoOptions.Database
: mongoDb!;
var pg = string.IsNullOrWhiteSpace(pgConn)
? throw new ArgumentException("PostgreSQL connection string is required (--pg or POSTGRES_CONNECTION_STRING)")
: pgConn;
if (string.IsNullOrWhiteSpace(pg) && Environment.GetEnvironmentVariable("POSTGRES_CONNECTION_STRING") is { } envPg)
{
pg = envPg;
}
if (string.IsNullOrWhiteSpace(pg))
{
throw new ArgumentException("PostgreSQL connection string is required.");
}
return new BackfillOptions(conn, database, pg, Math.Max(50, batchSize), dryRun);
}
}
internal sealed class BackfillRunner
{
private readonly BackfillOptions _options;
private readonly IMongoDatabase _mongo;
private readonly NpgsqlDataSource _pg;
public BackfillRunner(BackfillOptions options)
{
_options = options;
_mongo = new MongoClient(options.MongoConnectionString).GetDatabase(options.MongoDatabase);
_pg = NpgsqlDataSource.Create(options.PostgresConnectionString);
}
public async Task RunAsync()
{
Console.WriteLine($"Mongo -> Postgres backfill starting (dry-run={_options.DryRun})");
await BackfillSchedulesAsync();
await BackfillRunsAsync();
Console.WriteLine("Backfill complete.");
}
private async Task BackfillSchedulesAsync()
{
var collection = _mongo.GetCollection<BsonDocument>(new SchedulerMongoOptions().SchedulesCollection);
using var cursor = await collection.Find(FilterDefinition<BsonDocument>.Empty).ToCursorAsync();
var batch = new List<Schedule>(_options.BatchSize);
long total = 0;
while (await cursor.MoveNextAsync())
{
foreach (var doc in cursor.Current)
{
var schedule = BsonSerializer.Deserialize<Schedule>(doc);
batch.Add(schedule);
if (batch.Count >= _options.BatchSize)
{
total += await PersistSchedulesAsync(batch);
batch.Clear();
}
}
}
if (batch.Count > 0)
{
total += await PersistSchedulesAsync(batch);
}
Console.WriteLine($"Schedules backfilled: {total}");
}
private async Task<long> PersistSchedulesAsync(IEnumerable<Schedule> schedules)
{
if (_options.DryRun)
{
return schedules.LongCount();
}
await using var conn = await _pg.OpenConnectionAsync();
await using var tx = await conn.BeginTransactionAsync();
const string sql = @"
INSERT INTO scheduler.schedules (
id, tenant_id, name, description, enabled, cron_expression, timezone, mode,
selection, only_if, notify, limits, subscribers, created_at, created_by, updated_at, updated_by, deleted_at, deleted_by)
VALUES (
@id, @tenant_id, @name, @description, @enabled, @cron_expression, @timezone, @mode,
@selection, @only_if, @notify, @limits, @subscribers, @created_at, @created_by, @updated_at, @updated_by, @deleted_at, @deleted_by)
ON CONFLICT (id) DO UPDATE SET
tenant_id = EXCLUDED.tenant_id,
name = EXCLUDED.name,
description = EXCLUDED.description,
enabled = EXCLUDED.enabled,
cron_expression = EXCLUDED.cron_expression,
timezone = EXCLUDED.timezone,
mode = EXCLUDED.mode,
selection = EXCLUDED.selection,
only_if = EXCLUDED.only_if,
notify = EXCLUDED.notify,
limits = EXCLUDED.limits,
subscribers = EXCLUDED.subscribers,
created_at = LEAST(scheduler.schedules.created_at, EXCLUDED.created_at),
created_by = EXCLUDED.created_by,
updated_at = EXCLUDED.updated_at,
updated_by = EXCLUDED.updated_by,
deleted_at = EXCLUDED.deleted_at,
deleted_by = EXCLUDED.deleted_by;";
var affected = 0;
foreach (var schedule in schedules)
{
await using var cmd = new NpgsqlCommand(sql, conn, tx);
cmd.Parameters.AddWithValue("id", schedule.Id);
cmd.Parameters.AddWithValue("tenant_id", schedule.TenantId);
cmd.Parameters.AddWithValue("name", schedule.Name);
cmd.Parameters.AddWithValue("description", DBNull.Value);
cmd.Parameters.AddWithValue("enabled", schedule.Enabled);
cmd.Parameters.AddWithValue("cron_expression", schedule.CronExpression);
cmd.Parameters.AddWithValue("timezone", schedule.Timezone);
cmd.Parameters.AddWithValue("mode", BackfillMappings.ToScheduleMode(schedule.Mode));
cmd.Parameters.AddWithValue("selection", CanonicalJsonSerializer.Serialize(schedule.Selection));
cmd.Parameters.AddWithValue("only_if", CanonicalJsonSerializer.Serialize(schedule.OnlyIf));
cmd.Parameters.AddWithValue("notify", CanonicalJsonSerializer.Serialize(schedule.Notify));
cmd.Parameters.AddWithValue("limits", CanonicalJsonSerializer.Serialize(schedule.Limits));
cmd.Parameters.AddWithValue("subscribers", schedule.Subscribers.ToArray());
cmd.Parameters.AddWithValue("created_at", schedule.CreatedAt.UtcDateTime);
cmd.Parameters.AddWithValue("created_by", schedule.CreatedBy);
cmd.Parameters.AddWithValue("updated_at", schedule.UpdatedAt.UtcDateTime);
cmd.Parameters.AddWithValue("updated_by", schedule.UpdatedBy);
cmd.Parameters.AddWithValue("deleted_at", DBNull.Value);
cmd.Parameters.AddWithValue("deleted_by", DBNull.Value);
affected += await cmd.ExecuteNonQueryAsync();
}
await tx.CommitAsync();
return affected;
}
private async Task BackfillRunsAsync()
{
var collection = _mongo.GetCollection<BsonDocument>(new SchedulerMongoOptions().RunsCollection);
using var cursor = await collection.Find(FilterDefinition<BsonDocument>.Empty).ToCursorAsync();
var batch = new List<Run>(_options.BatchSize);
long total = 0;
while (await cursor.MoveNextAsync())
{
foreach (var doc in cursor.Current)
{
var run = BsonSerializer.Deserialize<Run>(doc);
batch.Add(run);
if (batch.Count >= _options.BatchSize)
{
total += await PersistRunsAsync(batch);
batch.Clear();
}
}
}
if (batch.Count > 0)
{
total += await PersistRunsAsync(batch);
}
Console.WriteLine($"Runs backfilled: {total}");
}
private async Task<long> PersistRunsAsync(IEnumerable<Run> runs)
{
if (_options.DryRun)
{
return runs.LongCount();
}
await using var conn = await _pg.OpenConnectionAsync();
await using var tx = await conn.BeginTransactionAsync();
const string sql = @"
INSERT INTO scheduler.runs (
id, tenant_id, schedule_id, state, trigger, stats, deltas, reason, retry_of,
created_at, started_at, finished_at, error, created_by, updated_at, metadata)
VALUES (
@id, @tenant_id, @schedule_id, @state, @trigger, @stats, @deltas, @reason, @retry_of,
@created_at, @started_at, @finished_at, @error, @created_by, @updated_at, @metadata)
ON CONFLICT (id) DO UPDATE SET
tenant_id = EXCLUDED.tenant_id,
schedule_id = EXCLUDED.schedule_id,
state = EXCLUDED.state,
trigger = EXCLUDED.trigger,
stats = EXCLUDED.stats,
deltas = EXCLUDED.deltas,
reason = EXCLUDED.reason,
retry_of = EXCLUDED.retry_of,
created_at = LEAST(scheduler.runs.created_at, EXCLUDED.created_at),
started_at = EXCLUDED.started_at,
finished_at = EXCLUDED.finished_at,
error = EXCLUDED.error,
created_by = COALESCE(EXCLUDED.created_by, scheduler.runs.created_by),
updated_at = EXCLUDED.updated_at,
metadata = EXCLUDED.metadata;";
var affected = 0;
foreach (var run in runs)
{
await using var cmd = new NpgsqlCommand(sql, conn, tx);
cmd.Parameters.AddWithValue("id", run.Id);
cmd.Parameters.AddWithValue("tenant_id", run.TenantId);
cmd.Parameters.AddWithValue("schedule_id", (object?)run.ScheduleId ?? DBNull.Value);
cmd.Parameters.AddWithValue("state", BackfillMappings.ToRunState(run.State));
cmd.Parameters.AddWithValue("trigger", BackfillMappings.ToRunTrigger(run.Trigger));
cmd.Parameters.AddWithValue("stats", CanonicalJsonSerializer.Serialize(run.Stats));
cmd.Parameters.AddWithValue("deltas", CanonicalJsonSerializer.Serialize(run.Deltas));
cmd.Parameters.AddWithValue("reason", CanonicalJsonSerializer.Serialize(run.Reason));
cmd.Parameters.AddWithValue("retry_of", (object?)run.RetryOf ?? DBNull.Value);
cmd.Parameters.AddWithValue("created_at", run.CreatedAt.UtcDateTime);
cmd.Parameters.AddWithValue("started_at", (object?)run.StartedAt?.UtcDateTime ?? DBNull.Value);
cmd.Parameters.AddWithValue("finished_at", (object?)run.FinishedAt?.UtcDateTime ?? DBNull.Value);
cmd.Parameters.AddWithValue("error", (object?)run.Error ?? DBNull.Value);
cmd.Parameters.AddWithValue("created_by", (object?)run.Reason?.ManualReason ?? "system");
cmd.Parameters.AddWithValue("updated_at", DateTime.UtcNow);
cmd.Parameters.AddWithValue("metadata", JsonSerializer.Serialize(new { schema = run.SchemaVersion }));
affected += await cmd.ExecuteNonQueryAsync();
}
await tx.CommitAsync();
return affected;
}
}