using System.Collections.Immutable; using System.Net; using System.Runtime.CompilerServices; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.BinaryIndex.GroundTruth.Abstractions; using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Configuration; using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal; namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod; /// /// Debuginfod symbol source connector for Fedora/RHEL debuginfod services. /// Implements the three-phase pipeline: Fetch → Parse → Map. /// public sealed class DebuginfodConnector : SymbolSourceConnectorBase, ISymbolSourceCapability { private readonly IHttpClientFactory _httpClientFactory; private readonly ISymbolRawDocumentRepository _documentRepository; private readonly ISymbolObservationRepository _observationRepository; private readonly ISymbolSourceStateRepository _stateRepository; private readonly ISymbolObservationWriteGuard _writeGuard; private readonly DebuginfodOptions _options; private readonly DebuginfodDiagnostics _diagnostics; /// /// Source ID for this connector. /// public const string SourceName = "debuginfod-fedora"; public DebuginfodConnector( IHttpClientFactory httpClientFactory, ISymbolRawDocumentRepository documentRepository, ISymbolObservationRepository observationRepository, ISymbolSourceStateRepository stateRepository, ISymbolObservationWriteGuard writeGuard, IOptions options, DebuginfodDiagnostics diagnostics, ILogger logger, TimeProvider? timeProvider = null) : base(logger, timeProvider) { _httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory)); _documentRepository = documentRepository ?? throw new ArgumentNullException(nameof(documentRepository)); _observationRepository = observationRepository ?? throw new ArgumentNullException(nameof(observationRepository)); _stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository)); _writeGuard = writeGuard ?? throw new ArgumentNullException(nameof(writeGuard)); _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); _options.Validate(); _diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics)); } /// public override string SourceId => SourceName; /// public override string DisplayName => "Fedora debuginfod"; /// public override IReadOnlyList SupportedDistros => ["fedora", "rhel", "centos", "rocky", "alma"]; /// public override async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken) { var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken); // Check backoff if (state.BackoffUntil.HasValue && state.BackoffUntil.Value > UtcNow) { Logger.LogInformation( "Debuginfod fetch skipped due to backoff until {BackoffUntil}", state.BackoffUntil.Value); return; } // Get pending debug IDs from cursor (or use configured list) var debugIds = GetPendingDebugIds(state); if (debugIds.Length == 0) { Logger.LogDebug("No pending debug IDs to fetch from debuginfod"); return; } var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName); var fetchedCount = 0; var errorCount = 0; foreach (var debugId in debugIds) { cancellationToken.ThrowIfCancellationRequested(); try { var document = await FetchDebugInfoAsync(httpClient, debugId, cancellationToken); if (document is not null) { await _documentRepository.UpsertAsync(document, cancellationToken); state = state.AddPendingParse(document.Digest); fetchedCount++; _diagnostics.RecordFetchSuccess(); } } catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound) { Logger.LogDebug("Debug ID {DebugId} not found in debuginfod", debugId); _diagnostics.RecordFetchNotFound(); } catch (Exception ex) { LogError(ex, "Fetch", $"Failed to fetch debug ID {debugId}"); errorCount++; _diagnostics.RecordFetchError(); if (errorCount > 5) { await _stateRepository.MarkFailedAsync( SourceId, $"Too many fetch errors: {ex.Message}", TimeSpan.FromMinutes(15), cancellationToken); break; } } } state = state with { LastSuccessAt = UtcNow }; await _stateRepository.UpdateAsync(state, cancellationToken); Logger.LogInformation( "Debuginfod fetch completed: {FetchedCount} fetched, {ErrorCount} errors", fetchedCount, errorCount); } /// public override async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken) { var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken); if (state.PendingParse.Length == 0) { Logger.LogDebug("No documents pending parse for debuginfod"); return; } var dwParser = services.GetRequiredService(); var parsedCount = 0; foreach (var digest in state.PendingParse) { cancellationToken.ThrowIfCancellationRequested(); var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken); if (document is null) { Logger.LogWarning("Document {Digest} not found for parse", digest); state = state.RemovePendingParse(digest); continue; } try { // Parse DWARF symbols var symbols = await dwParser.ParseSymbolsAsync( document.PayloadId!.Value, cancellationToken); LogParse(digest, symbols.Count); // Update document status and move to map phase await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.PendingMap, cancellationToken); state = state.MoveToPendingMap(digest); parsedCount++; _diagnostics.RecordParseSuccess(symbols.Count); } catch (Exception ex) { LogError(ex, "Parse", $"Failed to parse document {digest}"); await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken); state = state.RemovePendingParse(digest); _diagnostics.RecordParseError(); } } await _stateRepository.UpdateAsync(state, cancellationToken); Logger.LogInformation("Debuginfod parse completed: {ParsedCount} documents parsed", parsedCount); } /// public override async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken) { var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken); if (state.PendingMap.Length == 0) { Logger.LogDebug("No documents pending map for debuginfod"); return; } var dwParser = services.GetRequiredService(); var mappedCount = 0; foreach (var digest in state.PendingMap) { cancellationToken.ThrowIfCancellationRequested(); var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken); if (document is null) { Logger.LogWarning("Document {Digest} not found for map", digest); state = state.MarkMapped(digest); continue; } try { // Parse symbols from stored payload var symbols = await dwParser.ParseSymbolsAsync( document.PayloadId!.Value, cancellationToken); // Build observation var observation = BuildObservation(document, symbols); // Validate against AOC _writeGuard.EnsureValid(observation); // Check for existing observation with same content var existingId = await _observationRepository.FindByContentHashAsync( SourceId, observation.DebugId, observation.ContentHash, cancellationToken); if (existingId is not null) { Logger.LogDebug( "Observation already exists with hash {Hash}, skipping", observation.ContentHash); } else { // Insert new observation await _observationRepository.InsertAsync(observation, cancellationToken); LogMap(observation.ObservationId); _diagnostics.RecordMapSuccess(symbols.Count); } await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Mapped, cancellationToken); state = state.MarkMapped(digest); mappedCount++; } catch (GroundTruthAocGuardException ex) { Logger.LogError( "AOC violation mapping document {Digest}: {Violations}", digest, string.Join(", ", ex.Violations.Select(v => v.Code))); await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Quarantined, cancellationToken); state = state.MarkMapped(digest); _diagnostics.RecordMapAocViolation(); } catch (Exception ex) { LogError(ex, "Map", $"Failed to map document {digest}"); await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken); state = state.MarkMapped(digest); _diagnostics.RecordMapError(); } } await _stateRepository.UpdateAsync(state, cancellationToken); Logger.LogInformation("Debuginfod map completed: {MappedCount} documents mapped", mappedCount); } /// public async Task TestConnectivityAsync(CancellationToken ct = default) { var startTime = UtcNow; try { var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName); var response = await httpClient.GetAsync("/metrics", ct); response.EnsureSuccessStatusCode(); var latency = UtcNow - startTime; return new SymbolSourceConnectivityResult( IsConnected: true, Latency: latency, ErrorMessage: null, TestedAt: UtcNow); } catch (Exception ex) { var latency = UtcNow - startTime; return new SymbolSourceConnectivityResult( IsConnected: false, Latency: latency, ErrorMessage: ex.Message, TestedAt: UtcNow); } } /// public async Task GetMetadataAsync(CancellationToken ct = default) { var stats = await _observationRepository.GetStatsAsync(ct); return new SymbolSourceMetadata( SourceId: SourceId, DisplayName: DisplayName, BaseUrl: _options.BaseUrl.ToString(), LastSyncAt: stats.NewestObservation, ObservationCount: (int)stats.TotalObservations, DebugIdCount: (int)stats.UniqueDebugIds, AdditionalInfo: new Dictionary { ["total_symbols"] = stats.TotalSymbols.ToString() }); } /// public async Task FetchByDebugIdAsync(string debugId, CancellationToken ct = default) { var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName); var document = await FetchDebugInfoAsync(httpClient, debugId, ct); if (document is null) return null; // For direct fetch, we need to parse symbols inline // This is a simplified version - full implementation would use stored payload return new SymbolData( DebugId: debugId, BinaryName: document.Metadata.GetValueOrDefault("binary_name", "unknown"), Architecture: document.Metadata.GetValueOrDefault("architecture", "unknown"), Symbols: [], BuildInfo: null, Provenance: new SymbolDataProvenance( SourceId: SourceId, DocumentUri: document.DocumentUri, FetchedAt: document.FetchedAt, ContentHash: document.Digest, SignatureState: SignatureState.None, SignatureDetails: null)); } private ImmutableArray GetPendingDebugIds(SymbolSourceState state) { // In production, this would come from a work queue or scheduled list // For now, return empty - the connector is query-driven via FetchByDebugIdAsync if (state.Cursor.TryGetValue("pending_debug_ids", out var pending) && !string.IsNullOrWhiteSpace(pending)) { return pending.Split(',', StringSplitOptions.RemoveEmptyEntries) .Select(s => s.Trim()) .ToImmutableArray(); } return ImmutableArray.Empty; } private async Task FetchDebugInfoAsync( HttpClient httpClient, string debugId, CancellationToken ct) { // Debuginfod URL pattern: /buildid/{buildid}/debuginfo var requestUri = $"/buildid/{debugId}/debuginfo"; LogFetch(requestUri, debugId); var response = await httpClient.GetAsync(requestUri, ct); response.EnsureSuccessStatusCode(); var content = await response.Content.ReadAsByteArrayAsync(ct); var digest = ComputeDocumentDigest(content); // Check if we already have this document var existing = await _documentRepository.FindByDigestAsync(digest, ct); if (existing is not null) { Logger.LogDebug("Document {Digest} already exists, skipping", digest); return null; } var contentType = response.Content.Headers.ContentType?.MediaType ?? "application/x-elf"; var etag = response.Headers.ETag?.Tag; return new SymbolRawDocument { Digest = digest, SourceId = SourceId, DocumentUri = $"{_options.BaseUrl}{requestUri}", FetchedAt = UtcNow, RecordedAt = UtcNow, ContentType = contentType, ContentSize = content.Length, ETag = etag, Status = DocumentStatus.PendingParse, PayloadId = null, // Will be set by blob storage Metadata = ImmutableDictionary.Empty .Add("debug_id", debugId) .Add("binary_name", "unknown") // Would extract from ELF headers }; } private SymbolObservation BuildObservation( SymbolRawDocument document, IReadOnlyList symbols) { var debugId = document.Metadata.GetValueOrDefault("debug_id", "unknown"); var binaryName = document.Metadata.GetValueOrDefault("binary_name", "unknown"); var architecture = document.Metadata.GetValueOrDefault("architecture", "x86_64"); // Determine revision number var existingObservations = _observationRepository .FindByDebugIdAsync(debugId, CancellationToken.None) .GetAwaiter() .GetResult(); var revision = existingObservations.Length + 1; var observation = new SymbolObservation { ObservationId = GenerateObservationId(debugId, revision), SourceId = SourceId, DebugId = debugId, BinaryName = binaryName, Architecture = architecture, Symbols = symbols.ToImmutableArray(), SymbolCount = symbols.Count, Provenance = new ObservationProvenance { SourceId = SourceId, DocumentUri = document.DocumentUri, FetchedAt = document.FetchedAt, RecordedAt = UtcNow, DocumentHash = document.Digest, SignatureState = SignatureState.None, ConnectorVersion = "1.0.0" }, ContentHash = "", // Will be computed CreatedAt = UtcNow }; // Compute content hash var contentHash = ComputeContentHash(observation); return observation with { ContentHash = contentHash }; } }