using System.Globalization; using System.Text.Json; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging.Abstractions; using MongoDB.Driver; using StellaOps.Concelier.Connector.Common; using StellaOps.Concelier.Connector.Common.Fetch; using StellaOps.Concelier.Connector.Common.State; using StellaOps.Concelier.Storage.Mongo; using StellaOps.Concelier.Storage.Mongo.Documents; namespace SourceStateSeeder; internal static class Program { private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web) { PropertyNameCaseInsensitive = true, ReadCommentHandling = JsonCommentHandling.Skip, AllowTrailingCommas = true, }; public static async Task Main(string[] args) { try { var options = SeedOptions.Parse(args); if (options is null) { SeedOptions.PrintUsage(); return 1; } var seed = await LoadSpecificationAsync(options.InputPath).ConfigureAwait(false); var sourceName = seed.Source ?? options.SourceName; if (string.IsNullOrWhiteSpace(sourceName)) { Console.Error.WriteLine("Source name must be supplied via --source or the seed file."); return 1; } var specification = await BuildSpecificationAsync(seed, sourceName, options.InputPath, CancellationToken.None).ConfigureAwait(false); var client = new MongoClient(options.ConnectionString); var database = client.GetDatabase(options.DatabaseName); var loggerFactory = NullLoggerFactory.Instance; var documentStore = new DocumentStore(database, loggerFactory.CreateLogger()); var rawStorage = new RawDocumentStorage(database); var stateRepository = new MongoSourceStateRepository(database, loggerFactory.CreateLogger()); var processor = new SourceStateSeedProcessor( documentStore, rawStorage, stateRepository, TimeProvider.System, loggerFactory.CreateLogger()); var result = await processor.ProcessAsync(specification, CancellationToken.None).ConfigureAwait(false); Console.WriteLine( $"Seeded {result.DocumentsProcessed} document(s) for {sourceName} " + $"(pendingDocuments+= {result.PendingDocumentsAdded}, pendingMappings+= {result.PendingMappingsAdded}, knownAdvisories+= {result.KnownAdvisoriesAdded.Count})."); return 0; } catch (Exception ex) { Console.Error.WriteLine($"Error: {ex.Message}"); return 1; } } private static async Task LoadSpecificationAsync(string inputPath) { await using var stream = File.OpenRead(inputPath); var seed = await JsonSerializer.DeserializeAsync(stream, JsonOptions).ConfigureAwait(false) ?? throw new InvalidOperationException("Input file deserialized to null."); return seed; } private static async Task BuildSpecificationAsync( StateSeed seed, string sourceName, string inputPath, CancellationToken cancellationToken) { var baseDirectory = Path.GetDirectoryName(Path.GetFullPath(inputPath)) ?? Directory.GetCurrentDirectory(); var documents = new List(seed.Documents.Count); foreach (var documentSeed in seed.Documents) { documents.Add(await BuildDocumentAsync(documentSeed, baseDirectory, cancellationToken).ConfigureAwait(false)); } return new SourceStateSeedSpecification { Source = sourceName, Documents = documents.AsReadOnly(), Cursor = BuildCursor(seed.Cursor), KnownAdvisories = NormalizeStrings(seed.KnownAdvisories), CompletedAt = seed.CompletedAt, }; } private static async Task BuildDocumentAsync( DocumentSeed seed, string baseDirectory, CancellationToken cancellationToken) { if (string.IsNullOrWhiteSpace(seed.Uri)) { throw new InvalidOperationException("Seed entry missing 'uri'."); } if (string.IsNullOrWhiteSpace(seed.ContentFile)) { throw new InvalidOperationException($"Seed entry for '{seed.Uri}' missing 'contentFile'."); } var contentPath = ResolvePath(seed.ContentFile, baseDirectory); if (!File.Exists(contentPath)) { throw new FileNotFoundException($"Content file not found for '{seed.Uri}'.", contentPath); } var contentBytes = await File.ReadAllBytesAsync(contentPath, cancellationToken).ConfigureAwait(false); var metadata = seed.Metadata is null ? null : new Dictionary(seed.Metadata, StringComparer.OrdinalIgnoreCase); var headers = seed.Headers is null ? null : new Dictionary(seed.Headers, StringComparer.OrdinalIgnoreCase); if (!string.IsNullOrWhiteSpace(seed.ContentType)) { headers ??= new Dictionary(StringComparer.OrdinalIgnoreCase); if (!headers.ContainsKey("content-type")) { headers["content-type"] = seed.ContentType!; } } return new SourceStateSeedDocument { Uri = seed.Uri, DocumentId = seed.DocumentId, Content = contentBytes, ContentType = seed.ContentType, Status = string.IsNullOrWhiteSpace(seed.Status) ? DocumentStatuses.PendingParse : seed.Status, Headers = headers, Metadata = metadata, Etag = seed.Etag, LastModified = ParseOptionalDate(seed.LastModified), ExpiresAt = seed.ExpiresAt, FetchedAt = ParseOptionalDate(seed.FetchedAt), AddToPendingDocuments = seed.AddToPendingDocuments, AddToPendingMappings = seed.AddToPendingMappings, KnownIdentifiers = NormalizeStrings(seed.KnownIdentifiers), }; } private static SourceStateSeedCursor? BuildCursor(CursorSeed? cursorSeed) { if (cursorSeed is null) { return null; } return new SourceStateSeedCursor { PendingDocuments = NormalizeGuids(cursorSeed.PendingDocuments), PendingMappings = NormalizeGuids(cursorSeed.PendingMappings), KnownAdvisories = NormalizeStrings(cursorSeed.KnownAdvisories), LastModifiedCursor = cursorSeed.LastModifiedCursor, LastFetchAt = cursorSeed.LastFetchAt, Additional = cursorSeed.Additional is null ? null : new Dictionary(cursorSeed.Additional, StringComparer.OrdinalIgnoreCase), }; } private static IReadOnlyCollection? NormalizeGuids(IEnumerable? values) { if (values is null) { return null; } var set = new HashSet(); foreach (var guid in values) { if (guid != Guid.Empty) { set.Add(guid); } } return set.Count == 0 ? null : set.ToList(); } private static IReadOnlyCollection? NormalizeStrings(IEnumerable? values) { if (values is null) { return null; } var set = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var value in values) { if (!string.IsNullOrWhiteSpace(value)) { set.Add(value.Trim()); } } return set.Count == 0 ? null : set.ToList(); } private static DateTimeOffset? ParseOptionalDate(string? value) { if (string.IsNullOrWhiteSpace(value)) { return null; } return DateTimeOffset.Parse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); } private static string ResolvePath(string path, string baseDirectory) => Path.IsPathRooted(path) ? path : Path.GetFullPath(Path.Combine(baseDirectory, path)); } internal sealed record SeedOptions { public required string ConnectionString { get; init; } public required string DatabaseName { get; init; } public required string InputPath { get; init; } public string? SourceName { get; init; } public static SeedOptions? Parse(string[] args) { string? connectionString = null; string? database = null; string? input = null; string? source = null; for (var i = 0; i < args.Length; i++) { var arg = args[i]; switch (arg) { case "--connection-string": case "-c": connectionString = TakeValue(args, ref i, arg); break; case "--database": case "-d": database = TakeValue(args, ref i, arg); break; case "--input": case "-i": input = TakeValue(args, ref i, arg); break; case "--source": case "-s": source = TakeValue(args, ref i, arg); break; case "--help": case "-h": return null; default: Console.Error.WriteLine($"Unrecognized argument '{arg}'."); return null; } } if (string.IsNullOrWhiteSpace(connectionString) || string.IsNullOrWhiteSpace(database) || string.IsNullOrWhiteSpace(input)) { return null; } return new SeedOptions { ConnectionString = connectionString, DatabaseName = database, InputPath = input, SourceName = source, }; } public static void PrintUsage() { Console.WriteLine("Usage: dotnet run --project tools/SourceStateSeeder -- --connection-string --database --input [--source ]"); } private static string TakeValue(string[] args, ref int index, string arg) { if (index + 1 >= args.Length) { throw new ArgumentException($"Missing value for {arg}."); } index++; return args[index]; } } internal sealed record StateSeed { public string? Source { get; init; } public List Documents { get; init; } = new(); public CursorSeed? Cursor { get; init; } public List? KnownAdvisories { get; init; } public DateTimeOffset? CompletedAt { get; init; } } internal sealed record DocumentSeed { public string Uri { get; init; } = string.Empty; public string ContentFile { get; init; } = string.Empty; public Guid? DocumentId { get; init; } public string? ContentType { get; init; } public Dictionary? Metadata { get; init; } public Dictionary? Headers { get; init; } public string Status { get; init; } = DocumentStatuses.PendingParse; public bool AddToPendingDocuments { get; init; } = true; public bool AddToPendingMappings { get; init; } public string? LastModified { get; init; } public string? FetchedAt { get; init; } public string? Etag { get; init; } public DateTimeOffset? ExpiresAt { get; init; } public List? KnownIdentifiers { get; init; } } internal sealed record CursorSeed { public List? PendingDocuments { get; init; } public List? PendingMappings { get; init; } public List? KnownAdvisories { get; init; } public DateTimeOffset? LastModifiedCursor { get; init; } public DateTimeOffset? LastFetchAt { get; init; } public Dictionary? Additional { get; init; } }