using System.Collections.Immutable;
using System.Net;
using System.Runtime.CompilerServices;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Configuration;
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod;
///
/// Debuginfod symbol source connector for Fedora/RHEL debuginfod services.
/// Implements the three-phase pipeline: Fetch → Parse → Map.
///
public sealed class DebuginfodConnector : SymbolSourceConnectorBase, ISymbolSourceCapability
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly ISymbolRawDocumentRepository _documentRepository;
private readonly ISymbolObservationRepository _observationRepository;
private readonly ISymbolSourceStateRepository _stateRepository;
private readonly ISymbolObservationWriteGuard _writeGuard;
private readonly DebuginfodOptions _options;
private readonly DebuginfodDiagnostics _diagnostics;
///
/// Source ID for this connector.
///
public const string SourceName = "debuginfod-fedora";
public DebuginfodConnector(
IHttpClientFactory httpClientFactory,
ISymbolRawDocumentRepository documentRepository,
ISymbolObservationRepository observationRepository,
ISymbolSourceStateRepository stateRepository,
ISymbolObservationWriteGuard writeGuard,
IOptions options,
DebuginfodDiagnostics diagnostics,
ILogger logger,
TimeProvider? timeProvider = null)
: base(logger, timeProvider)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_documentRepository = documentRepository ?? throw new ArgumentNullException(nameof(documentRepository));
_observationRepository = observationRepository ?? throw new ArgumentNullException(nameof(observationRepository));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_writeGuard = writeGuard ?? throw new ArgumentNullException(nameof(writeGuard));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
}
///
public override string SourceId => SourceName;
///
public override string DisplayName => "Fedora debuginfod";
///
public override IReadOnlyList SupportedDistros =>
["fedora", "rhel", "centos", "rocky", "alma"];
///
public override async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
// Check backoff
if (state.BackoffUntil.HasValue && state.BackoffUntil.Value > UtcNow)
{
Logger.LogInformation(
"Debuginfod fetch skipped due to backoff until {BackoffUntil}",
state.BackoffUntil.Value);
return;
}
// Get pending debug IDs from cursor (or use configured list)
var debugIds = GetPendingDebugIds(state);
if (debugIds.Length == 0)
{
Logger.LogDebug("No pending debug IDs to fetch from debuginfod");
return;
}
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
var fetchedCount = 0;
var errorCount = 0;
foreach (var debugId in debugIds)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var document = await FetchDebugInfoAsync(httpClient, debugId, cancellationToken);
if (document is not null)
{
await _documentRepository.UpsertAsync(document, cancellationToken);
state = state.AddPendingParse(document.Digest);
fetchedCount++;
_diagnostics.RecordFetchSuccess();
}
}
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
{
Logger.LogDebug("Debug ID {DebugId} not found in debuginfod", debugId);
_diagnostics.RecordFetchNotFound();
}
catch (Exception ex)
{
LogError(ex, "Fetch", $"Failed to fetch debug ID {debugId}");
errorCount++;
_diagnostics.RecordFetchError();
if (errorCount > 5)
{
await _stateRepository.MarkFailedAsync(
SourceId,
$"Too many fetch errors: {ex.Message}",
TimeSpan.FromMinutes(15),
cancellationToken);
break;
}
}
}
state = state with { LastSuccessAt = UtcNow };
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation(
"Debuginfod fetch completed: {FetchedCount} fetched, {ErrorCount} errors",
fetchedCount, errorCount);
}
///
public override async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
if (state.PendingParse.Length == 0)
{
Logger.LogDebug("No documents pending parse for debuginfod");
return;
}
var dwParser = services.GetRequiredService();
var parsedCount = 0;
foreach (var digest in state.PendingParse)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
if (document is null)
{
Logger.LogWarning("Document {Digest} not found for parse", digest);
state = state.RemovePendingParse(digest);
continue;
}
try
{
// Parse DWARF symbols
var symbols = await dwParser.ParseSymbolsAsync(
document.PayloadId!.Value,
cancellationToken);
LogParse(digest, symbols.Count);
// Update document status and move to map phase
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.PendingMap, cancellationToken);
state = state.MoveToPendingMap(digest);
parsedCount++;
_diagnostics.RecordParseSuccess(symbols.Count);
}
catch (Exception ex)
{
LogError(ex, "Parse", $"Failed to parse document {digest}");
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
state = state.RemovePendingParse(digest);
_diagnostics.RecordParseError();
}
}
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation("Debuginfod parse completed: {ParsedCount} documents parsed", parsedCount);
}
///
public override async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
if (state.PendingMap.Length == 0)
{
Logger.LogDebug("No documents pending map for debuginfod");
return;
}
var dwParser = services.GetRequiredService();
var mappedCount = 0;
foreach (var digest in state.PendingMap)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
if (document is null)
{
Logger.LogWarning("Document {Digest} not found for map", digest);
state = state.MarkMapped(digest);
continue;
}
try
{
// Parse symbols from stored payload
var symbols = await dwParser.ParseSymbolsAsync(
document.PayloadId!.Value,
cancellationToken);
// Build observation
var observation = BuildObservation(document, symbols);
// Validate against AOC
_writeGuard.EnsureValid(observation);
// Check for existing observation with same content
var existingId = await _observationRepository.FindByContentHashAsync(
SourceId,
observation.DebugId,
observation.ContentHash,
cancellationToken);
if (existingId is not null)
{
Logger.LogDebug(
"Observation already exists with hash {Hash}, skipping",
observation.ContentHash);
}
else
{
// Insert new observation
await _observationRepository.InsertAsync(observation, cancellationToken);
LogMap(observation.ObservationId);
_diagnostics.RecordMapSuccess(symbols.Count);
}
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Mapped, cancellationToken);
state = state.MarkMapped(digest);
mappedCount++;
}
catch (GroundTruthAocGuardException ex)
{
Logger.LogError(
"AOC violation mapping document {Digest}: {Violations}",
digest,
string.Join(", ", ex.Violations.Select(v => v.Code)));
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Quarantined, cancellationToken);
state = state.MarkMapped(digest);
_diagnostics.RecordMapAocViolation();
}
catch (Exception ex)
{
LogError(ex, "Map", $"Failed to map document {digest}");
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
state = state.MarkMapped(digest);
_diagnostics.RecordMapError();
}
}
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation("Debuginfod map completed: {MappedCount} documents mapped", mappedCount);
}
///
public async Task TestConnectivityAsync(CancellationToken ct = default)
{
var startTime = UtcNow;
try
{
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
var response = await httpClient.GetAsync("/metrics", ct);
response.EnsureSuccessStatusCode();
var latency = UtcNow - startTime;
return new SymbolSourceConnectivityResult(
IsConnected: true,
Latency: latency,
ErrorMessage: null,
TestedAt: UtcNow);
}
catch (Exception ex)
{
var latency = UtcNow - startTime;
return new SymbolSourceConnectivityResult(
IsConnected: false,
Latency: latency,
ErrorMessage: ex.Message,
TestedAt: UtcNow);
}
}
///
public async Task GetMetadataAsync(CancellationToken ct = default)
{
var stats = await _observationRepository.GetStatsAsync(ct);
return new SymbolSourceMetadata(
SourceId: SourceId,
DisplayName: DisplayName,
BaseUrl: _options.BaseUrl.ToString(),
LastSyncAt: stats.NewestObservation,
ObservationCount: (int)stats.TotalObservations,
DebugIdCount: (int)stats.UniqueDebugIds,
AdditionalInfo: new Dictionary
{
["total_symbols"] = stats.TotalSymbols.ToString()
});
}
///
public async Task FetchByDebugIdAsync(string debugId, CancellationToken ct = default)
{
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
var document = await FetchDebugInfoAsync(httpClient, debugId, ct);
if (document is null)
return null;
// For direct fetch, we need to parse symbols inline
// This is a simplified version - full implementation would use stored payload
return new SymbolData(
DebugId: debugId,
BinaryName: document.Metadata.GetValueOrDefault("binary_name", "unknown"),
Architecture: document.Metadata.GetValueOrDefault("architecture", "unknown"),
Symbols: [],
BuildInfo: null,
Provenance: new SymbolDataProvenance(
SourceId: SourceId,
DocumentUri: document.DocumentUri,
FetchedAt: document.FetchedAt,
ContentHash: document.Digest,
SignatureState: SignatureState.None,
SignatureDetails: null));
}
private ImmutableArray GetPendingDebugIds(SymbolSourceState state)
{
// In production, this would come from a work queue or scheduled list
// For now, return empty - the connector is query-driven via FetchByDebugIdAsync
if (state.Cursor.TryGetValue("pending_debug_ids", out var pending) &&
!string.IsNullOrWhiteSpace(pending))
{
return pending.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(s => s.Trim())
.ToImmutableArray();
}
return ImmutableArray.Empty;
}
private async Task FetchDebugInfoAsync(
HttpClient httpClient,
string debugId,
CancellationToken ct)
{
// Debuginfod URL pattern: /buildid/{buildid}/debuginfo
var requestUri = $"/buildid/{debugId}/debuginfo";
LogFetch(requestUri, debugId);
var response = await httpClient.GetAsync(requestUri, ct);
response.EnsureSuccessStatusCode();
var content = await response.Content.ReadAsByteArrayAsync(ct);
var digest = ComputeDocumentDigest(content);
// Check if we already have this document
var existing = await _documentRepository.FindByDigestAsync(digest, ct);
if (existing is not null)
{
Logger.LogDebug("Document {Digest} already exists, skipping", digest);
return null;
}
var contentType = response.Content.Headers.ContentType?.MediaType ?? "application/x-elf";
var etag = response.Headers.ETag?.Tag;
return new SymbolRawDocument
{
Digest = digest,
SourceId = SourceId,
DocumentUri = $"{_options.BaseUrl}{requestUri}",
FetchedAt = UtcNow,
RecordedAt = UtcNow,
ContentType = contentType,
ContentSize = content.Length,
ETag = etag,
Status = DocumentStatus.PendingParse,
PayloadId = null, // Will be set by blob storage
Metadata = ImmutableDictionary.Empty
.Add("debug_id", debugId)
.Add("binary_name", "unknown") // Would extract from ELF headers
};
}
private SymbolObservation BuildObservation(
SymbolRawDocument document,
IReadOnlyList symbols)
{
var debugId = document.Metadata.GetValueOrDefault("debug_id", "unknown");
var binaryName = document.Metadata.GetValueOrDefault("binary_name", "unknown");
var architecture = document.Metadata.GetValueOrDefault("architecture", "x86_64");
// Determine revision number
var existingObservations = _observationRepository
.FindByDebugIdAsync(debugId, CancellationToken.None)
.GetAwaiter()
.GetResult();
var revision = existingObservations.Length + 1;
var observation = new SymbolObservation
{
ObservationId = GenerateObservationId(debugId, revision),
SourceId = SourceId,
DebugId = debugId,
BinaryName = binaryName,
Architecture = architecture,
Symbols = symbols.ToImmutableArray(),
SymbolCount = symbols.Count,
Provenance = new ObservationProvenance
{
SourceId = SourceId,
DocumentUri = document.DocumentUri,
FetchedAt = document.FetchedAt,
RecordedAt = UtcNow,
DocumentHash = document.Digest,
SignatureState = SignatureState.None,
ConnectorVersion = "1.0.0"
},
ContentHash = "", // Will be computed
CreatedAt = UtcNow
};
// Compute content hash
var contentHash = ComputeContentHash(observation);
return observation with { ContentHash = contentHash };
}
}