450 lines
17 KiB
C#
450 lines
17 KiB
C#
using System.Collections.Immutable;
|
|
using System.Net;
|
|
using System.Runtime.CompilerServices;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
|
|
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Configuration;
|
|
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal;
|
|
|
|
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod;
|
|
|
|
/// <summary>
|
|
/// Debuginfod symbol source connector for Fedora/RHEL debuginfod services.
|
|
/// Implements the three-phase pipeline: Fetch → Parse → Map.
|
|
/// </summary>
|
|
public sealed class DebuginfodConnector : SymbolSourceConnectorBase, ISymbolSourceCapability
|
|
{
|
|
private readonly IHttpClientFactory _httpClientFactory;
|
|
private readonly ISymbolRawDocumentRepository _documentRepository;
|
|
private readonly ISymbolObservationRepository _observationRepository;
|
|
private readonly ISymbolSourceStateRepository _stateRepository;
|
|
private readonly ISymbolObservationWriteGuard _writeGuard;
|
|
private readonly DebuginfodOptions _options;
|
|
private readonly DebuginfodDiagnostics _diagnostics;
|
|
|
|
/// <summary>
|
|
/// Source ID for this connector.
|
|
/// </summary>
|
|
public const string SourceName = "debuginfod-fedora";
|
|
|
|
public DebuginfodConnector(
|
|
IHttpClientFactory httpClientFactory,
|
|
ISymbolRawDocumentRepository documentRepository,
|
|
ISymbolObservationRepository observationRepository,
|
|
ISymbolSourceStateRepository stateRepository,
|
|
ISymbolObservationWriteGuard writeGuard,
|
|
IOptions<DebuginfodOptions> options,
|
|
DebuginfodDiagnostics diagnostics,
|
|
ILogger<DebuginfodConnector> logger,
|
|
TimeProvider? timeProvider = null)
|
|
: base(logger, timeProvider)
|
|
{
|
|
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
|
|
_documentRepository = documentRepository ?? throw new ArgumentNullException(nameof(documentRepository));
|
|
_observationRepository = observationRepository ?? throw new ArgumentNullException(nameof(observationRepository));
|
|
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
|
|
_writeGuard = writeGuard ?? throw new ArgumentNullException(nameof(writeGuard));
|
|
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
|
_options.Validate();
|
|
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public override string SourceId => SourceName;
|
|
|
|
/// <inheritdoc/>
|
|
public override string DisplayName => "Fedora debuginfod";
|
|
|
|
/// <inheritdoc/>
|
|
public override IReadOnlyList<string> SupportedDistros =>
|
|
["fedora", "rhel", "centos", "rocky", "alma"];
|
|
|
|
/// <inheritdoc/>
|
|
public override async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
|
|
{
|
|
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
|
|
|
|
// Check backoff
|
|
if (state.BackoffUntil.HasValue && state.BackoffUntil.Value > UtcNow)
|
|
{
|
|
Logger.LogInformation(
|
|
"Debuginfod fetch skipped due to backoff until {BackoffUntil}",
|
|
state.BackoffUntil.Value);
|
|
return;
|
|
}
|
|
|
|
// Get pending debug IDs from cursor (or use configured list)
|
|
var debugIds = GetPendingDebugIds(state);
|
|
if (debugIds.Length == 0)
|
|
{
|
|
Logger.LogDebug("No pending debug IDs to fetch from debuginfod");
|
|
return;
|
|
}
|
|
|
|
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
|
|
var fetchedCount = 0;
|
|
var errorCount = 0;
|
|
|
|
foreach (var debugId in debugIds)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
try
|
|
{
|
|
var document = await FetchDebugInfoAsync(httpClient, debugId, cancellationToken);
|
|
if (document is not null)
|
|
{
|
|
await _documentRepository.UpsertAsync(document, cancellationToken);
|
|
state = state.AddPendingParse(document.Digest);
|
|
fetchedCount++;
|
|
_diagnostics.RecordFetchSuccess();
|
|
}
|
|
}
|
|
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
|
|
{
|
|
Logger.LogDebug("Debug ID {DebugId} not found in debuginfod", debugId);
|
|
_diagnostics.RecordFetchNotFound();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogError(ex, "Fetch", $"Failed to fetch debug ID {debugId}");
|
|
errorCount++;
|
|
_diagnostics.RecordFetchError();
|
|
|
|
if (errorCount > 5)
|
|
{
|
|
await _stateRepository.MarkFailedAsync(
|
|
SourceId,
|
|
$"Too many fetch errors: {ex.Message}",
|
|
TimeSpan.FromMinutes(15),
|
|
cancellationToken);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
state = state with { LastSuccessAt = UtcNow };
|
|
await _stateRepository.UpdateAsync(state, cancellationToken);
|
|
|
|
Logger.LogInformation(
|
|
"Debuginfod fetch completed: {FetchedCount} fetched, {ErrorCount} errors",
|
|
fetchedCount, errorCount);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public override async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
|
|
{
|
|
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
|
|
|
|
if (state.PendingParse.Length == 0)
|
|
{
|
|
Logger.LogDebug("No documents pending parse for debuginfod");
|
|
return;
|
|
}
|
|
|
|
var dwParser = services.GetRequiredService<IDwarfParser>();
|
|
var parsedCount = 0;
|
|
|
|
foreach (var digest in state.PendingParse)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
|
|
if (document is null)
|
|
{
|
|
Logger.LogWarning("Document {Digest} not found for parse", digest);
|
|
state = state.RemovePendingParse(digest);
|
|
continue;
|
|
}
|
|
|
|
try
|
|
{
|
|
// Parse DWARF symbols
|
|
var symbols = await dwParser.ParseSymbolsAsync(
|
|
document.PayloadId!.Value,
|
|
cancellationToken);
|
|
|
|
LogParse(digest, symbols.Count);
|
|
|
|
// Update document status and move to map phase
|
|
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.PendingMap, cancellationToken);
|
|
state = state.MoveToPendingMap(digest);
|
|
parsedCount++;
|
|
_diagnostics.RecordParseSuccess(symbols.Count);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogError(ex, "Parse", $"Failed to parse document {digest}");
|
|
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
|
|
state = state.RemovePendingParse(digest);
|
|
_diagnostics.RecordParseError();
|
|
}
|
|
}
|
|
|
|
await _stateRepository.UpdateAsync(state, cancellationToken);
|
|
|
|
Logger.LogInformation("Debuginfod parse completed: {ParsedCount} documents parsed", parsedCount);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public override async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
|
|
{
|
|
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
|
|
|
|
if (state.PendingMap.Length == 0)
|
|
{
|
|
Logger.LogDebug("No documents pending map for debuginfod");
|
|
return;
|
|
}
|
|
|
|
var dwParser = services.GetRequiredService<IDwarfParser>();
|
|
var mappedCount = 0;
|
|
|
|
foreach (var digest in state.PendingMap)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
|
|
if (document is null)
|
|
{
|
|
Logger.LogWarning("Document {Digest} not found for map", digest);
|
|
state = state.MarkMapped(digest);
|
|
continue;
|
|
}
|
|
|
|
try
|
|
{
|
|
// Parse symbols from stored payload
|
|
var symbols = await dwParser.ParseSymbolsAsync(
|
|
document.PayloadId!.Value,
|
|
cancellationToken);
|
|
|
|
// Build observation
|
|
var observation = BuildObservation(document, symbols);
|
|
|
|
// Validate against AOC
|
|
_writeGuard.EnsureValid(observation);
|
|
|
|
// Check for existing observation with same content
|
|
var existingId = await _observationRepository.FindByContentHashAsync(
|
|
SourceId,
|
|
observation.DebugId,
|
|
observation.ContentHash,
|
|
cancellationToken);
|
|
|
|
if (existingId is not null)
|
|
{
|
|
Logger.LogDebug(
|
|
"Observation already exists with hash {Hash}, skipping",
|
|
observation.ContentHash);
|
|
}
|
|
else
|
|
{
|
|
// Insert new observation
|
|
await _observationRepository.InsertAsync(observation, cancellationToken);
|
|
LogMap(observation.ObservationId);
|
|
_diagnostics.RecordMapSuccess(symbols.Count);
|
|
}
|
|
|
|
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Mapped, cancellationToken);
|
|
state = state.MarkMapped(digest);
|
|
mappedCount++;
|
|
}
|
|
catch (GroundTruthAocGuardException ex)
|
|
{
|
|
Logger.LogError(
|
|
"AOC violation mapping document {Digest}: {Violations}",
|
|
digest,
|
|
string.Join(", ", ex.Violations.Select(v => v.Code)));
|
|
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Quarantined, cancellationToken);
|
|
state = state.MarkMapped(digest);
|
|
_diagnostics.RecordMapAocViolation();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogError(ex, "Map", $"Failed to map document {digest}");
|
|
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
|
|
state = state.MarkMapped(digest);
|
|
_diagnostics.RecordMapError();
|
|
}
|
|
}
|
|
|
|
await _stateRepository.UpdateAsync(state, cancellationToken);
|
|
|
|
Logger.LogInformation("Debuginfod map completed: {MappedCount} documents mapped", mappedCount);
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<SymbolSourceConnectivityResult> TestConnectivityAsync(CancellationToken ct = default)
|
|
{
|
|
var startTime = UtcNow;
|
|
try
|
|
{
|
|
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
|
|
var response = await httpClient.GetAsync("/metrics", ct);
|
|
response.EnsureSuccessStatusCode();
|
|
|
|
var latency = UtcNow - startTime;
|
|
return new SymbolSourceConnectivityResult(
|
|
IsConnected: true,
|
|
Latency: latency,
|
|
ErrorMessage: null,
|
|
TestedAt: UtcNow);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
var latency = UtcNow - startTime;
|
|
return new SymbolSourceConnectivityResult(
|
|
IsConnected: false,
|
|
Latency: latency,
|
|
ErrorMessage: ex.Message,
|
|
TestedAt: UtcNow);
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<SymbolSourceMetadata> GetMetadataAsync(CancellationToken ct = default)
|
|
{
|
|
var stats = await _observationRepository.GetStatsAsync(ct);
|
|
return new SymbolSourceMetadata(
|
|
SourceId: SourceId,
|
|
DisplayName: DisplayName,
|
|
BaseUrl: _options.BaseUrl.ToString(),
|
|
LastSyncAt: stats.NewestObservation,
|
|
ObservationCount: (int)stats.TotalObservations,
|
|
DebugIdCount: (int)stats.UniqueDebugIds,
|
|
AdditionalInfo: new Dictionary<string, string>
|
|
{
|
|
["total_symbols"] = stats.TotalSymbols.ToString()
|
|
});
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<SymbolData?> FetchByDebugIdAsync(string debugId, CancellationToken ct = default)
|
|
{
|
|
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
|
|
var document = await FetchDebugInfoAsync(httpClient, debugId, ct);
|
|
if (document is null)
|
|
return null;
|
|
|
|
// For direct fetch, we need to parse symbols inline
|
|
// This is a simplified version - full implementation would use stored payload
|
|
return new SymbolData(
|
|
DebugId: debugId,
|
|
BinaryName: document.Metadata.GetValueOrDefault("binary_name", "unknown"),
|
|
Architecture: document.Metadata.GetValueOrDefault("architecture", "unknown"),
|
|
Symbols: [],
|
|
BuildInfo: null,
|
|
Provenance: new SymbolDataProvenance(
|
|
SourceId: SourceId,
|
|
DocumentUri: document.DocumentUri,
|
|
FetchedAt: document.FetchedAt,
|
|
ContentHash: document.Digest,
|
|
SignatureState: SignatureState.None,
|
|
SignatureDetails: null));
|
|
}
|
|
|
|
private ImmutableArray<string> GetPendingDebugIds(SymbolSourceState state)
|
|
{
|
|
// In production, this would come from a work queue or scheduled list
|
|
// For now, return empty - the connector is query-driven via FetchByDebugIdAsync
|
|
if (state.Cursor.TryGetValue("pending_debug_ids", out var pending) &&
|
|
!string.IsNullOrWhiteSpace(pending))
|
|
{
|
|
return pending.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(s => s.Trim())
|
|
.ToImmutableArray();
|
|
}
|
|
return ImmutableArray<string>.Empty;
|
|
}
|
|
|
|
private async Task<SymbolRawDocument?> FetchDebugInfoAsync(
|
|
HttpClient httpClient,
|
|
string debugId,
|
|
CancellationToken ct)
|
|
{
|
|
// Debuginfod URL pattern: /buildid/{buildid}/debuginfo
|
|
var requestUri = $"/buildid/{debugId}/debuginfo";
|
|
LogFetch(requestUri, debugId);
|
|
|
|
var response = await httpClient.GetAsync(requestUri, ct);
|
|
response.EnsureSuccessStatusCode();
|
|
|
|
var content = await response.Content.ReadAsByteArrayAsync(ct);
|
|
var digest = ComputeDocumentDigest(content);
|
|
|
|
// Check if we already have this document
|
|
var existing = await _documentRepository.FindByDigestAsync(digest, ct);
|
|
if (existing is not null)
|
|
{
|
|
Logger.LogDebug("Document {Digest} already exists, skipping", digest);
|
|
return null;
|
|
}
|
|
|
|
var contentType = response.Content.Headers.ContentType?.MediaType ?? "application/x-elf";
|
|
var etag = response.Headers.ETag?.Tag;
|
|
|
|
return new SymbolRawDocument
|
|
{
|
|
Digest = digest,
|
|
SourceId = SourceId,
|
|
DocumentUri = $"{_options.BaseUrl}{requestUri}",
|
|
FetchedAt = UtcNow,
|
|
RecordedAt = UtcNow,
|
|
ContentType = contentType,
|
|
ContentSize = content.Length,
|
|
ETag = etag,
|
|
Status = DocumentStatus.PendingParse,
|
|
PayloadId = null, // Will be set by blob storage
|
|
Metadata = ImmutableDictionary<string, string>.Empty
|
|
.Add("debug_id", debugId)
|
|
.Add("binary_name", "unknown") // Would extract from ELF headers
|
|
};
|
|
}
|
|
|
|
private SymbolObservation BuildObservation(
|
|
SymbolRawDocument document,
|
|
IReadOnlyList<ObservedSymbol> symbols)
|
|
{
|
|
var debugId = document.Metadata.GetValueOrDefault("debug_id", "unknown");
|
|
var binaryName = document.Metadata.GetValueOrDefault("binary_name", "unknown");
|
|
var architecture = document.Metadata.GetValueOrDefault("architecture", "x86_64");
|
|
|
|
// Determine revision number
|
|
var existingObservations = _observationRepository
|
|
.FindByDebugIdAsync(debugId, CancellationToken.None)
|
|
.GetAwaiter()
|
|
.GetResult();
|
|
var revision = existingObservations.Length + 1;
|
|
|
|
var observation = new SymbolObservation
|
|
{
|
|
ObservationId = GenerateObservationId(debugId, revision),
|
|
SourceId = SourceId,
|
|
DebugId = debugId,
|
|
BinaryName = binaryName,
|
|
Architecture = architecture,
|
|
Symbols = symbols.ToImmutableArray(),
|
|
SymbolCount = symbols.Count,
|
|
Provenance = new ObservationProvenance
|
|
{
|
|
SourceId = SourceId,
|
|
DocumentUri = document.DocumentUri,
|
|
FetchedAt = document.FetchedAt,
|
|
RecordedAt = UtcNow,
|
|
DocumentHash = document.Digest,
|
|
SignatureState = SignatureState.None,
|
|
ConnectorVersion = "1.0.0"
|
|
},
|
|
ContentHash = "", // Will be computed
|
|
CreatedAt = UtcNow
|
|
};
|
|
|
|
// Compute content hash
|
|
var contentHash = ComputeContentHash(observation);
|
|
return observation with { ContentHash = contentHash };
|
|
}
|
|
}
|