sprints work.

This commit is contained in:
master
2026-01-20 00:45:38 +02:00
parent b34bde89fa
commit 4903395618
275 changed files with 52785 additions and 79 deletions

View File

@@ -0,0 +1,59 @@
// -----------------------------------------------------------------------------
// IPredicateTimestampMetadata.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-004 - Predicate Writer Extensions
// Description: RFC-3161 timestamp metadata for embedding in predicates.
// -----------------------------------------------------------------------------
namespace StellaOps.Attestor.StandardPredicates;
/// <summary>
/// RFC-3161 timestamp metadata for embedding in predicates.
/// </summary>
public sealed record Rfc3161TimestampMetadata
{
/// <summary>
/// Gets the TSA URL that issued the timestamp.
/// </summary>
public required string TsaUrl { get; init; }
/// <summary>
/// Gets the digest of the timestamp token (base64 or hex).
/// </summary>
public required string TokenDigest { get; init; }
/// <summary>
/// Gets the digest algorithm used for the token digest.
/// </summary>
public string DigestAlgorithm { get; init; } = "SHA256";
/// <summary>
/// Gets the generation time from the TST.
/// </summary>
public required DateTimeOffset GenerationTime { get; init; }
/// <summary>
/// Gets the TSA policy OID.
/// </summary>
public string? PolicyOid { get; init; }
/// <summary>
/// Gets the TST serial number.
/// </summary>
public string? SerialNumber { get; init; }
/// <summary>
/// Gets the TSA name from the TSTInfo.
/// </summary>
public string? TsaName { get; init; }
/// <summary>
/// Gets whether the timestamp has stapled revocation data.
/// </summary>
public bool HasStapledRevocation { get; init; }
/// <summary>
/// Gets whether this is a qualified timestamp (eIDAS).
/// </summary>
public bool IsQualified { get; init; }
}

View File

@@ -0,0 +1,133 @@
// -----------------------------------------------------------------------------
// CycloneDxTimestampExtension.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-004 - Predicate Writer Extensions
// Description: CycloneDX signature.timestamp extension for RFC-3161 timestamps.
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Text.Json.Serialization;
namespace StellaOps.Attestor.StandardPredicates.Writers;
/// <summary>
/// Extension for adding RFC-3161 timestamp metadata to CycloneDX documents.
/// Adds signature.timestamp field per CycloneDX 1.5+ specification.
/// </summary>
public static class CycloneDxTimestampExtension
{
/// <summary>
/// Adds RFC-3161 timestamp metadata to a CycloneDX JSON document.
/// </summary>
/// <param name="cycloneDxJson">The CycloneDX JSON bytes.</param>
/// <param name="timestampMetadata">The timestamp metadata to add.</param>
/// <returns>The modified JSON bytes with timestamp metadata.</returns>
public static byte[] AddTimestampMetadata(
byte[] cycloneDxJson,
Rfc3161TimestampMetadata timestampMetadata)
{
var jsonNode = JsonNode.Parse(cycloneDxJson)
?? throw new InvalidOperationException("Failed to parse CycloneDX JSON");
// Create the signature.timestamp structure
var timestampNode = new JsonObject
{
["rfc3161"] = new JsonObject
{
["tsaUrl"] = timestampMetadata.TsaUrl,
["tokenDigest"] = $"{timestampMetadata.DigestAlgorithm.ToLowerInvariant()}:{timestampMetadata.TokenDigest}",
["generationTime"] = timestampMetadata.GenerationTime.ToString("yyyy-MM-ddTHH:mm:ssZ", CultureInfo.InvariantCulture)
}
};
// Add optional fields
var rfc3161Node = timestampNode["rfc3161"]!.AsObject();
if (timestampMetadata.PolicyOid is not null)
{
rfc3161Node["policyOid"] = timestampMetadata.PolicyOid;
}
if (timestampMetadata.SerialNumber is not null)
{
rfc3161Node["serialNumber"] = timestampMetadata.SerialNumber;
}
if (timestampMetadata.TsaName is not null)
{
rfc3161Node["tsaName"] = timestampMetadata.TsaName;
}
if (timestampMetadata.HasStapledRevocation)
{
rfc3161Node["stapledRevocation"] = true;
}
if (timestampMetadata.IsQualified)
{
rfc3161Node["qualified"] = true;
}
// Add or extend signature object
if (jsonNode["signature"] is JsonObject signatureNode)
{
signatureNode["timestamp"] = timestampNode;
}
else
{
jsonNode["signature"] = new JsonObject
{
["timestamp"] = timestampNode
};
}
// Serialize with deterministic ordering
var options = new JsonSerializerOptions
{
WriteIndented = false,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
return JsonSerializer.SerializeToUtf8Bytes(jsonNode, options);
}
/// <summary>
/// Extracts RFC-3161 timestamp metadata from a CycloneDX JSON document.
/// </summary>
/// <param name="cycloneDxJson">The CycloneDX JSON bytes.</param>
/// <returns>The timestamp metadata if present, null otherwise.</returns>
public static Rfc3161TimestampMetadata? ExtractTimestampMetadata(byte[] cycloneDxJson)
{
var jsonNode = JsonNode.Parse(cycloneDxJson);
var timestampNode = jsonNode?["signature"]?["timestamp"]?["rfc3161"];
if (timestampNode is null)
{
return null;
}
var tokenDigest = timestampNode["tokenDigest"]?.GetValue<string>() ?? "";
var digestAlgorithm = "SHA256";
var digestValue = tokenDigest;
// Parse "sha256:abc123" format
if (tokenDigest.Contains(':'))
{
var parts = tokenDigest.Split(':', 2);
digestAlgorithm = parts[0].ToUpperInvariant();
digestValue = parts[1];
}
return new Rfc3161TimestampMetadata
{
TsaUrl = timestampNode["tsaUrl"]?.GetValue<string>() ?? "",
TokenDigest = digestValue,
DigestAlgorithm = digestAlgorithm,
GenerationTime = DateTimeOffset.Parse(
timestampNode["generationTime"]?.GetValue<string>() ?? DateTimeOffset.MinValue.ToString("O"),
CultureInfo.InvariantCulture),
PolicyOid = timestampNode["policyOid"]?.GetValue<string>(),
SerialNumber = timestampNode["serialNumber"]?.GetValue<string>(),
TsaName = timestampNode["tsaName"]?.GetValue<string>(),
HasStapledRevocation = timestampNode["stapledRevocation"]?.GetValue<bool>() ?? false,
IsQualified = timestampNode["qualified"]?.GetValue<bool>() ?? false
};
}
}

View File

@@ -50,27 +50,28 @@ public sealed class CycloneDxWriter : ISbomWriter
}
/// <inheritdoc />
public byte[] Write(SbomDocument document)
public SbomWriteResult Write(SbomDocument document)
{
var cdx = ConvertToCycloneDx(document);
return _canonicalizer.Canonicalize(cdx);
var canonicalBytes = _canonicalizer.Canonicalize(cdx);
var goldenHash = _canonicalizer.ComputeGoldenHash(canonicalBytes);
return new SbomWriteResult
{
Format = SbomFormat.CycloneDx,
CanonicalBytes = canonicalBytes,
GoldenHash = goldenHash,
DocumentId = cdx.SerialNumber
};
}
/// <inheritdoc />
public Task<byte[]> WriteAsync(SbomDocument document, CancellationToken ct = default)
public Task<SbomWriteResult> WriteAsync(SbomDocument document, CancellationToken ct = default)
{
ct.ThrowIfCancellationRequested();
return Task.FromResult(Write(document));
}
/// <inheritdoc />
public string ComputeContentHash(SbomDocument document)
{
var bytes = Write(document);
var hash = SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private CycloneDxBom ConvertToCycloneDx(SbomDocument document)
{
// Sort components by bom-ref

View File

@@ -7,6 +7,32 @@
namespace StellaOps.Attestor.StandardPredicates.Writers;
/// <summary>
/// Result of SBOM write operation.
/// </summary>
public sealed record SbomWriteResult
{
/// <summary>
/// The format of the generated SBOM.
/// </summary>
public required Canonicalization.SbomFormat Format { get; init; }
/// <summary>
/// The canonical bytes of the SBOM.
/// </summary>
public required byte[] CanonicalBytes { get; init; }
/// <summary>
/// The golden hash of the canonical bytes.
/// </summary>
public required string GoldenHash { get; init; }
/// <summary>
/// Document ID.
/// </summary>
public string? DocumentId { get; init; }
}
/// <summary>
/// Writes SBOM documents in deterministic, canonical format.
/// </summary>
@@ -18,26 +44,19 @@ public interface ISbomWriter
Canonicalization.SbomFormat Format { get; }
/// <summary>
/// Writes an SBOM to canonical bytes.
/// Writes an SBOM to canonical format.
/// </summary>
/// <param name="document">The SBOM document model.</param>
/// <returns>Canonical JSON bytes.</returns>
byte[] Write(SbomDocument document);
/// <returns>Write result containing canonical bytes and hash.</returns>
SbomWriteResult Write(SbomDocument document);
/// <summary>
/// Writes an SBOM to canonical bytes asynchronously.
/// Writes an SBOM asynchronously.
/// </summary>
/// <param name="document">The SBOM document model.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Canonical JSON bytes.</returns>
Task<byte[]> WriteAsync(SbomDocument document, CancellationToken ct = default);
/// <summary>
/// Computes the content hash of the canonical SBOM.
/// </summary>
/// <param name="document">The SBOM document.</param>
/// <returns>SHA-256 hash in hex format.</returns>
string ComputeContentHash(SbomDocument document);
/// <returns>Write result containing canonical bytes and hash.</returns>
Task<SbomWriteResult> WriteAsync(SbomDocument document, CancellationToken ct = default);
}
/// <summary>

View File

@@ -0,0 +1,207 @@
// -----------------------------------------------------------------------------
// SpdxTimestampExtension.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-004 - Predicate Writer Extensions
// Description: SPDX 3.0+ annotation extension for RFC-3161 timestamps.
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Text.Json.Serialization;
namespace StellaOps.Attestor.StandardPredicates.Writers;
/// <summary>
/// Extension for adding RFC-3161 timestamp metadata to SPDX documents.
/// Uses SPDX 3.0 annotations for timestamp references.
/// </summary>
public static class SpdxTimestampExtension
{
/// <summary>
/// The annotation type for RFC-3161 timestamps.
/// </summary>
public const string TimestampAnnotationType = "OTHER";
/// <summary>
/// The annotator prefix for Stella timestamp annotations.
/// </summary>
public const string TimestampAnnotator = "Tool: stella-attestor";
/// <summary>
/// Adds RFC-3161 timestamp annotation to an SPDX JSON document.
/// </summary>
/// <param name="spdxJson">The SPDX JSON bytes.</param>
/// <param name="timestampMetadata">The timestamp metadata to add.</param>
/// <returns>The modified JSON bytes with timestamp annotation.</returns>
public static byte[] AddTimestampAnnotation(
byte[] spdxJson,
Rfc3161TimestampMetadata timestampMetadata)
{
var jsonNode = JsonNode.Parse(spdxJson)
?? throw new InvalidOperationException("Failed to parse SPDX JSON");
// Build the comment field with RFC3161 reference
var commentParts = new List<string>
{
$"RFC3161-TST:{timestampMetadata.DigestAlgorithm.ToLowerInvariant()}:{timestampMetadata.TokenDigest}",
$"TSA:{timestampMetadata.TsaUrl}"
};
if (timestampMetadata.TsaName is not null)
{
commentParts.Add($"TSAName:{timestampMetadata.TsaName}");
}
if (timestampMetadata.PolicyOid is not null)
{
commentParts.Add($"Policy:{timestampMetadata.PolicyOid}");
}
if (timestampMetadata.HasStapledRevocation)
{
commentParts.Add("Stapled:true");
}
if (timestampMetadata.IsQualified)
{
commentParts.Add("Qualified:true");
}
var comment = string.Join("; ", commentParts);
// Create the annotation
var annotation = new JsonObject
{
["annotationType"] = TimestampAnnotationType,
["annotator"] = TimestampAnnotator,
["annotationDate"] = timestampMetadata.GenerationTime.ToString("yyyy-MM-ddTHH:mm:ssZ", CultureInfo.InvariantCulture),
["comment"] = comment
};
// Add to annotations array
if (jsonNode["annotations"] is JsonArray annotationsArray)
{
annotationsArray.Add(annotation);
}
else
{
jsonNode["annotations"] = new JsonArray { annotation };
}
// Serialize with deterministic ordering
var options = new JsonSerializerOptions
{
WriteIndented = false,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
return JsonSerializer.SerializeToUtf8Bytes(jsonNode, options);
}
/// <summary>
/// Extracts RFC-3161 timestamp metadata from an SPDX JSON document.
/// </summary>
/// <param name="spdxJson">The SPDX JSON bytes.</param>
/// <returns>The timestamp metadata if present, null otherwise.</returns>
public static Rfc3161TimestampMetadata? ExtractTimestampMetadata(byte[] spdxJson)
{
var jsonNode = JsonNode.Parse(spdxJson);
var annotationsNode = jsonNode?["annotations"]?.AsArray();
if (annotationsNode is null)
{
return null;
}
// Find the timestamp annotation
foreach (var annotation in annotationsNode)
{
var annotator = annotation?["annotator"]?.GetValue<string>();
var comment = annotation?["comment"]?.GetValue<string>();
if (annotator == TimestampAnnotator && comment?.StartsWith("RFC3161-TST:") == true)
{
return ParseTimestampComment(
comment,
annotation?["annotationDate"]?.GetValue<string>());
}
}
return null;
}
private static Rfc3161TimestampMetadata? ParseTimestampComment(string comment, string? annotationDate)
{
var parts = comment.Split("; ");
if (parts.Length == 0)
{
return null;
}
string? digestAlgorithm = null;
string? tokenDigest = null;
string? tsaUrl = null;
string? tsaName = null;
string? policyOid = null;
bool hasStapledRevocation = false;
bool isQualified = false;
foreach (var part in parts)
{
if (part.StartsWith("RFC3161-TST:"))
{
var digestPart = part.Substring("RFC3161-TST:".Length);
var colonIdx = digestPart.IndexOf(':');
if (colonIdx > 0)
{
digestAlgorithm = digestPart.Substring(0, colonIdx).ToUpperInvariant();
tokenDigest = digestPart.Substring(colonIdx + 1);
}
}
else if (part.StartsWith("TSA:"))
{
tsaUrl = part.Substring("TSA:".Length);
}
else if (part.StartsWith("TSAName:"))
{
tsaName = part.Substring("TSAName:".Length);
}
else if (part.StartsWith("Policy:"))
{
policyOid = part.Substring("Policy:".Length);
}
else if (part == "Stapled:true")
{
hasStapledRevocation = true;
}
else if (part == "Qualified:true")
{
isQualified = true;
}
}
if (tokenDigest is null || tsaUrl is null)
{
return null;
}
DateTimeOffset generationTime = DateTimeOffset.MinValue;
if (annotationDate is not null)
{
DateTimeOffset.TryParse(annotationDate, CultureInfo.InvariantCulture, DateTimeStyles.None, out generationTime);
}
return new Rfc3161TimestampMetadata
{
TsaUrl = tsaUrl,
TokenDigest = tokenDigest,
DigestAlgorithm = digestAlgorithm ?? "SHA256",
GenerationTime = generationTime,
PolicyOid = policyOid,
TsaName = tsaName,
HasStapledRevocation = hasStapledRevocation,
IsQualified = isQualified
};
}
}

View File

@@ -0,0 +1,234 @@
// -----------------------------------------------------------------------------
// AttestationTimestampPolicyContext.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-003 - Policy Integration
// Description: Policy context for timestamp assertions.
// -----------------------------------------------------------------------------
namespace StellaOps.Attestor.Timestamping;
/// <summary>
/// Context for timestamp-related policy assertions.
/// </summary>
public sealed record AttestationTimestampPolicyContext
{
/// <summary>
/// Gets whether a valid TST is present.
/// </summary>
public bool HasValidTst { get; init; }
/// <summary>
/// Gets the TST generation time.
/// </summary>
public DateTimeOffset? TstTime { get; init; }
/// <summary>
/// Gets the TSA name.
/// </summary>
public string? TsaName { get; init; }
/// <summary>
/// Gets the TSA policy OID.
/// </summary>
public string? TsaPolicyOid { get; init; }
/// <summary>
/// Gets whether the TSA certificate is valid.
/// </summary>
public bool TsaCertificateValid { get; init; }
/// <summary>
/// Gets the TSA certificate expiration.
/// </summary>
public DateTimeOffset? TsaCertificateExpires { get; init; }
/// <summary>
/// Gets the OCSP status.
/// </summary>
public string? OcspStatus { get; init; }
/// <summary>
/// Gets whether CRL was checked.
/// </summary>
public bool CrlChecked { get; init; }
/// <summary>
/// Gets the Rekor integrated time.
/// </summary>
public DateTimeOffset? RekorTime { get; init; }
/// <summary>
/// Gets the time skew between TST and Rekor.
/// </summary>
public TimeSpan? TimeSkew { get; init; }
/// <summary>
/// Creates an empty context.
/// </summary>
public static AttestationTimestampPolicyContext Empty { get; } = new();
/// <summary>
/// Creates a context from a verification result.
/// </summary>
public static AttestationTimestampPolicyContext FromVerification(
TimestampedAttestation attestation,
AttestationTimestampVerificationResult result)
{
return new AttestationTimestampPolicyContext
{
HasValidTst = result.IsValid,
TstTime = attestation.TimestampTime,
TsaName = attestation.TsaName,
TsaPolicyOid = attestation.TsaPolicyOid,
TsaCertificateValid = result.TsaCertificateStatus?.IsValid ?? false,
TsaCertificateExpires = result.TsaCertificateStatus?.ExpiresAt,
OcspStatus = result.TsaCertificateStatus?.RevocationStatus,
CrlChecked = result.TsaCertificateStatus?.RevocationSource?.Contains("CRL") ?? false,
RekorTime = attestation.RekorReceipt?.IntegratedTime,
TimeSkew = result.TimeConsistency?.Skew
};
}
}
/// <summary>
/// Policy evaluator for timestamp requirements.
/// </summary>
public sealed class TimestampPolicyEvaluator
{
/// <summary>
/// Evaluates whether an attestation meets timestamp policy requirements.
/// </summary>
/// <param name="context">The timestamp policy context.</param>
/// <param name="policy">The policy to evaluate.</param>
/// <returns>The evaluation result.</returns>
public TimestampPolicyResult Evaluate(
AttestationTimestampPolicyContext context,
TimestampPolicy policy)
{
var violations = new List<PolicyViolation>();
// Check RFC-3161 requirement
if (policy.RequireRfc3161 && !context.HasValidTst)
{
violations.Add(new PolicyViolation(
"require-rfc3161",
"Valid RFC-3161 timestamp is required but not present"));
}
// Check time skew
if (policy.MaxTimeSkew.HasValue && context.TimeSkew.HasValue)
{
if (context.TimeSkew.Value.Duration() > policy.MaxTimeSkew.Value)
{
violations.Add(new PolicyViolation(
"time-skew",
$"Time skew {context.TimeSkew.Value} exceeds maximum {policy.MaxTimeSkew}"));
}
}
// Check certificate freshness
if (policy.MinCertificateFreshness.HasValue && context.TsaCertificateExpires.HasValue)
{
var remaining = context.TsaCertificateExpires.Value - DateTimeOffset.UtcNow;
if (remaining < policy.MinCertificateFreshness.Value)
{
violations.Add(new PolicyViolation(
"freshness",
$"TSA certificate expires in {remaining.TotalDays:F0} days, minimum required is {policy.MinCertificateFreshness.Value.TotalDays:F0} days"));
}
}
// Check revocation stapling
if (policy.RequireRevocationStapling)
{
var hasOcsp = context.OcspStatus is "Good" or "Unknown";
var hasCrl = context.CrlChecked;
if (!hasOcsp && !hasCrl)
{
violations.Add(new PolicyViolation(
"revocation-staple",
"OCSP or CRL revocation evidence is required"));
}
}
// Check trusted TSAs
if (policy.TrustedTsas is { Count: > 0 } && context.TsaName is not null)
{
if (!policy.TrustedTsas.Any(t => context.TsaName.Contains(t, StringComparison.OrdinalIgnoreCase)))
{
violations.Add(new PolicyViolation(
"trusted-tsa",
$"TSA '{context.TsaName}' is not in the trusted TSA list"));
}
}
return new TimestampPolicyResult
{
IsCompliant = violations.Count == 0,
Violations = violations
};
}
}
/// <summary>
/// Timestamp policy definition.
/// </summary>
public sealed record TimestampPolicy
{
/// <summary>
/// Gets whether RFC-3161 timestamp is required.
/// </summary>
public bool RequireRfc3161 { get; init; }
/// <summary>
/// Gets the maximum allowed time skew.
/// </summary>
public TimeSpan? MaxTimeSkew { get; init; }
/// <summary>
/// Gets the minimum TSA certificate freshness.
/// </summary>
public TimeSpan? MinCertificateFreshness { get; init; }
/// <summary>
/// Gets whether revocation stapling is required.
/// </summary>
public bool RequireRevocationStapling { get; init; }
/// <summary>
/// Gets the list of trusted TSAs.
/// </summary>
public IReadOnlyList<string>? TrustedTsas { get; init; }
/// <summary>
/// Gets the default policy.
/// </summary>
public static TimestampPolicy Default { get; } = new()
{
RequireRfc3161 = true,
MaxTimeSkew = TimeSpan.FromMinutes(5),
MinCertificateFreshness = TimeSpan.FromDays(180),
RequireRevocationStapling = true
};
}
/// <summary>
/// Result of timestamp policy evaluation.
/// </summary>
public sealed record TimestampPolicyResult
{
/// <summary>
/// Gets whether the policy is met.
/// </summary>
public required bool IsCompliant { get; init; }
/// <summary>
/// Gets the list of violations.
/// </summary>
public required IReadOnlyList<PolicyViolation> Violations { get; init; }
}
/// <summary>
/// A policy violation.
/// </summary>
public sealed record PolicyViolation(string RuleId, string Message);

View File

@@ -0,0 +1,276 @@
// -----------------------------------------------------------------------------
// AttestationTimestampService.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-001 - Attestation Signing Pipeline Extension
// Description: Service implementation for timestamping attestations.
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Attestor.Timestamping;
/// <summary>
/// Implementation of <see cref="IAttestationTimestampService"/>.
/// </summary>
public sealed class AttestationTimestampService : IAttestationTimestampService
{
private readonly AttestationTimestampServiceOptions _options;
private readonly ILogger<AttestationTimestampService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="AttestationTimestampService"/> class.
/// </summary>
public AttestationTimestampService(
IOptions<AttestationTimestampServiceOptions> options,
ILogger<AttestationTimestampService> logger)
{
_options = options.Value;
_logger = logger;
}
/// <inheritdoc />
public async Task<TimestampedAttestation> TimestampAsync(
ReadOnlyMemory<byte> envelope,
AttestationTimestampOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= AttestationTimestampOptions.Default;
// Hash the envelope
var algorithm = options.HashAlgorithm switch
{
"SHA256" => HashAlgorithmName.SHA256,
"SHA384" => HashAlgorithmName.SHA384,
"SHA512" => HashAlgorithmName.SHA512,
_ => HashAlgorithmName.SHA256
};
var hash = ComputeHash(envelope.Span, algorithm);
var digestHex = Convert.ToHexString(hash).ToLowerInvariant();
_logger.LogDebug(
"Timestamping attestation envelope with {Algorithm} digest: {Digest}",
options.HashAlgorithm,
digestHex);
// Call TSA client (placeholder - would integrate with ITimeStampAuthorityClient)
var tstBytes = await RequestTimestampAsync(hash, options, cancellationToken);
var (genTime, tsaName, policyOid) = ParseTstInfo(tstBytes);
_logger.LogInformation(
"Attestation timestamped at {Time} by {TSA}",
genTime,
tsaName);
return new TimestampedAttestation
{
Envelope = envelope.ToArray(),
EnvelopeDigest = $"{options.HashAlgorithm.ToLowerInvariant()}:{digestHex}",
TimeStampToken = tstBytes,
TimestampTime = genTime,
TsaName = tsaName,
TsaPolicyOid = policyOid
};
}
/// <inheritdoc />
public async Task<AttestationTimestampVerificationResult> VerifyAsync(
TimestampedAttestation attestation,
AttestationTimestampVerificationOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= AttestationTimestampVerificationOptions.Default;
var warnings = new List<string>();
try
{
// Step 1: Verify message imprint
var expectedHash = ComputeEnvelopeHash(attestation.Envelope, attestation.EnvelopeDigest);
var imprintValid = await VerifyImprintAsync(attestation.TimeStampToken, expectedHash, cancellationToken);
if (!imprintValid)
{
return AttestationTimestampVerificationResult.Failure(
TstVerificationStatus.ImprintMismatch,
"TST message imprint does not match attestation hash");
}
// Step 2: Verify TST signature (placeholder)
var signatureValid = await VerifyTstSignatureAsync(attestation.TimeStampToken, cancellationToken);
if (!signatureValid)
{
return AttestationTimestampVerificationResult.Failure(
TstVerificationStatus.InvalidSignature,
"TST signature verification failed");
}
// Step 3: Check time consistency with Rekor if present
TimeConsistencyResult? timeConsistency = null;
if (attestation.RekorReceipt is not null && options.RequireRekorConsistency)
{
timeConsistency = CheckTimeConsistency(
attestation.TimestampTime,
attestation.RekorReceipt.IntegratedTime,
options.MaxTimeSkew);
if (!timeConsistency.IsValid)
{
return AttestationTimestampVerificationResult.Failure(
TstVerificationStatus.TimeInconsistency,
$"TST time inconsistent with Rekor: skew={timeConsistency.Skew}");
}
}
// Step 4: Check TSA certificate revocation
TsaCertificateStatus? certStatus = null;
if (options.VerifyTsaRevocation)
{
certStatus = await CheckTsaCertificateAsync(attestation.TimeStampToken, options.AllowOffline, cancellationToken);
if (certStatus is { IsValid: false })
{
if (certStatus.RevocationStatus == "Revoked")
{
return AttestationTimestampVerificationResult.Failure(
TstVerificationStatus.CertificateRevoked,
"TSA certificate has been revoked");
}
warnings.Add($"TSA certificate status: {certStatus.RevocationStatus}");
}
// Warn if certificate is near expiration
if (certStatus?.ExpiresAt is not null)
{
var daysUntilExpiry = (certStatus.ExpiresAt.Value - DateTimeOffset.UtcNow).TotalDays;
if (daysUntilExpiry < 90)
{
warnings.Add($"TSA certificate expires in {daysUntilExpiry:F0} days");
}
}
}
return AttestationTimestampVerificationResult.Success(
timeConsistency,
certStatus,
warnings.Count > 0 ? warnings : null);
}
catch (Exception ex)
{
_logger.LogError(ex, "Attestation timestamp verification failed");
return AttestationTimestampVerificationResult.Failure(
TstVerificationStatus.Unknown,
ex.Message);
}
}
/// <inheritdoc />
public TimeConsistencyResult CheckTimeConsistency(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
TimeSpan? tolerance = null)
{
tolerance ??= _options.DefaultTimeSkewTolerance;
var skew = rekorTime - tstTime;
return new TimeConsistencyResult
{
TstTime = tstTime,
RekorTime = rekorTime,
WithinTolerance = Math.Abs(skew.TotalSeconds) <= tolerance.Value.TotalSeconds,
ConfiguredTolerance = tolerance.Value
};
}
private static byte[] ComputeHash(ReadOnlySpan<byte> data, HashAlgorithmName algorithm)
{
return algorithm.Name switch
{
"SHA256" => SHA256.HashData(data),
"SHA384" => SHA384.HashData(data),
"SHA512" => SHA512.HashData(data),
_ => SHA256.HashData(data)
};
}
private static byte[] ComputeEnvelopeHash(byte[] envelope, string digestSpec)
{
// Parse algorithm from digest spec (e.g., "sha256:abc...")
var colonIdx = digestSpec.IndexOf(':');
var algorithmName = colonIdx > 0 ? digestSpec[..colonIdx].ToUpperInvariant() : "SHA256";
var algorithm = algorithmName switch
{
"SHA256" => HashAlgorithmName.SHA256,
"SHA384" => HashAlgorithmName.SHA384,
"SHA512" => HashAlgorithmName.SHA512,
_ => HashAlgorithmName.SHA256
};
return ComputeHash(envelope, algorithm);
}
// Placeholder implementations - would integrate with actual TSA client
private Task<byte[]> RequestTimestampAsync(byte[] hash, AttestationTimestampOptions options, CancellationToken ct)
{
// This would call ITimeStampAuthorityClient.GetTimeStampAsync
// For now, return placeholder
_logger.LogDebug("Would request timestamp from TSA");
return Task.FromResult(Array.Empty<byte>());
}
private static (DateTimeOffset genTime, string tsaName, string policyOid) ParseTstInfo(byte[] tstBytes)
{
// This would parse the TST and extract TSTInfo
// For now, return placeholder values
return (DateTimeOffset.UtcNow, "Placeholder TSA", "1.2.3.4");
}
private Task<bool> VerifyImprintAsync(byte[] tst, byte[] expectedHash, CancellationToken ct)
{
// This would verify the messageImprint in the TST matches
return Task.FromResult(true);
}
private Task<bool> VerifyTstSignatureAsync(byte[] tst, CancellationToken ct)
{
// This would verify the CMS signature
return Task.FromResult(true);
}
private Task<TsaCertificateStatus> CheckTsaCertificateAsync(byte[] tst, bool allowOffline, CancellationToken ct)
{
// This would check the TSA certificate revocation status
return Task.FromResult(new TsaCertificateStatus
{
IsValid = true,
Subject = "Placeholder TSA",
RevocationStatus = "Good",
RevocationSource = "OCSP"
});
}
}
/// <summary>
/// Configuration options for <see cref="AttestationTimestampService"/>.
/// </summary>
public sealed record AttestationTimestampServiceOptions
{
/// <summary>
/// Gets the default time skew tolerance.
/// </summary>
public TimeSpan DefaultTimeSkewTolerance { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Gets whether timestamping is enabled by default.
/// </summary>
public bool EnabledByDefault { get; init; } = true;
/// <summary>
/// Gets whether to fail on TSA errors.
/// </summary>
public bool FailOnTsaError { get; init; } = false;
/// <summary>
/// Gets the minimum days before TSA cert expiry to warn.
/// </summary>
public int CertExpiryWarningDays { get; init; } = 90;
}

View File

@@ -0,0 +1,267 @@
// -----------------------------------------------------------------------------
// IAttestationTimestampService.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-001 - Attestation Signing Pipeline Extension
// Description: Service interface for timestamping attestations.
// -----------------------------------------------------------------------------
namespace StellaOps.Attestor.Timestamping;
/// <summary>
/// Service for timestamping attestations.
/// </summary>
public interface IAttestationTimestampService
{
/// <summary>
/// Timestamps a signed attestation envelope.
/// </summary>
/// <param name="envelope">The signed DSSE envelope bytes.</param>
/// <param name="options">Timestamping options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The timestamped attestation.</returns>
Task<TimestampedAttestation> TimestampAsync(
ReadOnlyMemory<byte> envelope,
AttestationTimestampOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies an attestation's timestamp.
/// </summary>
/// <param name="attestation">The timestamped attestation to verify.</param>
/// <param name="options">Verification options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The verification result.</returns>
Task<AttestationTimestampVerificationResult> VerifyAsync(
TimestampedAttestation attestation,
AttestationTimestampVerificationOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Checks time consistency between TST and Rekor.
/// </summary>
/// <param name="tstTime">The TST generation time.</param>
/// <param name="rekorTime">The Rekor integrated time.</param>
/// <param name="tolerance">Tolerance for time skew.</param>
/// <returns>The consistency result.</returns>
TimeConsistencyResult CheckTimeConsistency(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
TimeSpan? tolerance = null);
}
/// <summary>
/// Options for timestamping attestations.
/// </summary>
public sealed record AttestationTimestampOptions
{
/// <summary>
/// Gets the hash algorithm to use.
/// </summary>
public string HashAlgorithm { get; init; } = "SHA256";
/// <summary>
/// Gets whether to include nonce.
/// </summary>
public bool IncludeNonce { get; init; } = true;
/// <summary>
/// Gets whether to request certificates.
/// </summary>
public bool RequestCertificates { get; init; } = true;
/// <summary>
/// Gets the preferred TSA provider.
/// </summary>
public string? PreferredProvider { get; init; }
/// <summary>
/// Gets whether to store evidence.
/// </summary>
public bool StoreEvidence { get; init; } = true;
/// <summary>
/// Gets whether to fetch revocation data for stapling.
/// </summary>
public bool FetchRevocationData { get; init; } = true;
/// <summary>
/// Gets the default options.
/// </summary>
public static AttestationTimestampOptions Default { get; } = new();
}
/// <summary>
/// Options for verifying attestation timestamps.
/// </summary>
public sealed record AttestationTimestampVerificationOptions
{
/// <summary>
/// Gets whether TST signature verification is required.
/// </summary>
public bool RequireTstSignature { get; init; } = true;
/// <summary>
/// Gets whether Rekor consistency check is required.
/// </summary>
public bool RequireRekorConsistency { get; init; } = true;
/// <summary>
/// Gets the maximum allowed time skew.
/// </summary>
public TimeSpan MaxTimeSkew { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Gets whether to verify TSA certificate revocation.
/// </summary>
public bool VerifyTsaRevocation { get; init; } = true;
/// <summary>
/// Gets whether to allow offline verification.
/// </summary>
public bool AllowOffline { get; init; } = true;
/// <summary>
/// Gets the default options.
/// </summary>
public static AttestationTimestampVerificationOptions Default { get; } = new();
}
/// <summary>
/// Result of attestation timestamp verification.
/// </summary>
public sealed record AttestationTimestampVerificationResult
{
/// <summary>
/// Gets whether the overall verification passed.
/// </summary>
public bool IsValid { get; init; }
/// <summary>
/// Gets the TST verification result.
/// </summary>
public TstVerificationStatus TstStatus { get; init; }
/// <summary>
/// Gets the time consistency result.
/// </summary>
public TimeConsistencyResult? TimeConsistency { get; init; }
/// <summary>
/// Gets the TSA certificate status.
/// </summary>
public TsaCertificateStatus? TsaCertificateStatus { get; init; }
/// <summary>
/// Gets any error message.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Gets warnings from verification.
/// </summary>
public IReadOnlyList<string>? Warnings { get; init; }
/// <summary>
/// Creates a successful result.
/// </summary>
public static AttestationTimestampVerificationResult Success(
TimeConsistencyResult? timeConsistency = null,
TsaCertificateStatus? certStatus = null,
IReadOnlyList<string>? warnings = null) => new()
{
IsValid = true,
TstStatus = TstVerificationStatus.Valid,
TimeConsistency = timeConsistency,
TsaCertificateStatus = certStatus,
Warnings = warnings
};
/// <summary>
/// Creates a failure result.
/// </summary>
public static AttestationTimestampVerificationResult Failure(
TstVerificationStatus status,
string error) => new()
{
IsValid = false,
TstStatus = status,
Error = error
};
}
/// <summary>
/// Status of TST verification.
/// </summary>
public enum TstVerificationStatus
{
/// <summary>
/// TST is valid.
/// </summary>
Valid,
/// <summary>
/// TST signature is invalid.
/// </summary>
InvalidSignature,
/// <summary>
/// Message imprint does not match.
/// </summary>
ImprintMismatch,
/// <summary>
/// TST has expired.
/// </summary>
Expired,
/// <summary>
/// TSA certificate is revoked.
/// </summary>
CertificateRevoked,
/// <summary>
/// Time consistency check failed.
/// </summary>
TimeInconsistency,
/// <summary>
/// TST is missing.
/// </summary>
Missing,
/// <summary>
/// Unknown error.
/// </summary>
Unknown
}
/// <summary>
/// Status of TSA certificate.
/// </summary>
public sealed record TsaCertificateStatus
{
/// <summary>
/// Gets whether the certificate is valid.
/// </summary>
public bool IsValid { get; init; }
/// <summary>
/// Gets the certificate subject.
/// </summary>
public string? Subject { get; init; }
/// <summary>
/// Gets the certificate expiration.
/// </summary>
public DateTimeOffset? ExpiresAt { get; init; }
/// <summary>
/// Gets the revocation status.
/// </summary>
public string? RevocationStatus { get; init; }
/// <summary>
/// Gets the source of revocation information.
/// </summary>
public string? RevocationSource { get; init; }
}

View File

@@ -0,0 +1,194 @@
// -----------------------------------------------------------------------------
// ITimeCorrelationValidator.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-006 - Rekor Time Correlation
// Description: Interface for validating time correlation between TST and Rekor.
// -----------------------------------------------------------------------------
namespace StellaOps.Attestor.Timestamping;
/// <summary>
/// Validates time correlation between RFC-3161 timestamps and Rekor transparency log entries.
/// Prevents backdating attacks where a TST is obtained for malicious content and submitted
/// to Rekor much later.
/// </summary>
public interface ITimeCorrelationValidator
{
/// <summary>
/// Validates the time correlation between a TST generation time and Rekor integration time.
/// </summary>
/// <param name="tstTime">The generation time from the TST (TSTInfo.genTime).</param>
/// <param name="rekorTime">The integrated time from Rekor (IntegratedTime).</param>
/// <param name="policy">The correlation policy to apply.</param>
/// <returns>The validation result with details.</returns>
TimeCorrelationResult Validate(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
TimeCorrelationPolicy? policy = null);
/// <summary>
/// Validates time correlation asynchronously with metrics recording.
/// </summary>
/// <param name="tstTime">The generation time from the TST.</param>
/// <param name="rekorTime">The integrated time from Rekor.</param>
/// <param name="artifactDigest">The artifact digest for audit logging.</param>
/// <param name="policy">The correlation policy to apply.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The validation result with details.</returns>
Task<TimeCorrelationResult> ValidateAsync(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
string artifactDigest,
TimeCorrelationPolicy? policy = null,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Policy for time correlation validation.
/// </summary>
public sealed record TimeCorrelationPolicy
{
/// <summary>
/// Gets the maximum allowed gap between TST and Rekor times.
/// Default is 5 minutes.
/// </summary>
public TimeSpan MaximumGap { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Gets the gap threshold that triggers a suspicious warning.
/// Default is 1 minute.
/// </summary>
public TimeSpan SuspiciousGap { get; init; } = TimeSpan.FromMinutes(1);
/// <summary>
/// Gets whether to fail validation on suspicious (but not maximum) gaps.
/// Default is false (warning only).
/// </summary>
public bool FailOnSuspicious { get; init; } = false;
/// <summary>
/// Gets whether TST time must be before or equal to Rekor time.
/// Default is true (TST should come first).
/// </summary>
public bool RequireTstBeforeRekor { get; init; } = true;
/// <summary>
/// Gets the allowed clock skew tolerance for time comparison.
/// Default is 30 seconds.
/// </summary>
public TimeSpan ClockSkewTolerance { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Gets the default policy.
/// </summary>
public static TimeCorrelationPolicy Default { get; } = new();
/// <summary>
/// Gets a strict policy with no tolerance for gaps.
/// </summary>
public static TimeCorrelationPolicy Strict { get; } = new()
{
MaximumGap = TimeSpan.FromMinutes(2),
SuspiciousGap = TimeSpan.FromSeconds(30),
FailOnSuspicious = true,
ClockSkewTolerance = TimeSpan.FromSeconds(10)
};
}
/// <summary>
/// Result of time correlation validation.
/// </summary>
public sealed record TimeCorrelationResult
{
/// <summary>Gets whether the validation passed.</summary>
public required bool Valid { get; init; }
/// <summary>Gets whether the gap is suspicious but within limits.</summary>
public required bool Suspicious { get; init; }
/// <summary>Gets the actual gap between TST and Rekor times.</summary>
public required TimeSpan Gap { get; init; }
/// <summary>Gets the TST generation time.</summary>
public required DateTimeOffset TstTime { get; init; }
/// <summary>Gets the Rekor integration time.</summary>
public required DateTimeOffset RekorTime { get; init; }
/// <summary>Gets any error message if validation failed.</summary>
public string? ErrorMessage { get; init; }
/// <summary>Gets any warning message for suspicious gaps.</summary>
public string? WarningMessage { get; init; }
/// <summary>Gets the correlation status.</summary>
public TimeCorrelationStatus Status { get; init; }
/// <summary>
/// Creates a valid result.
/// </summary>
public static TimeCorrelationResult CreateValid(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
TimeSpan gap,
bool suspicious = false,
string? warningMessage = null)
{
return new TimeCorrelationResult
{
Valid = true,
Suspicious = suspicious,
Gap = gap,
TstTime = tstTime,
RekorTime = rekorTime,
WarningMessage = warningMessage,
Status = suspicious ? TimeCorrelationStatus.ValidWithWarning : TimeCorrelationStatus.Valid
};
}
/// <summary>
/// Creates an invalid result.
/// </summary>
public static TimeCorrelationResult CreateInvalid(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
TimeSpan gap,
string errorMessage,
TimeCorrelationStatus status)
{
return new TimeCorrelationResult
{
Valid = false,
Suspicious = true,
Gap = gap,
TstTime = tstTime,
RekorTime = rekorTime,
ErrorMessage = errorMessage,
Status = status
};
}
}
/// <summary>
/// Status of time correlation validation.
/// </summary>
public enum TimeCorrelationStatus
{
/// <summary>Times are properly correlated.</summary>
Valid,
/// <summary>Valid but gap is suspicious.</summary>
ValidWithWarning,
/// <summary>Gap exceeds maximum allowed.</summary>
GapExceeded,
/// <summary>TST time is after Rekor time (potential backdating).</summary>
TstAfterRekor,
/// <summary>Time order is suspicious.</summary>
SuspiciousTimeOrder,
/// <summary>Gap is suspicious and policy requires failure.</summary>
SuspiciousGapFailed
}

View File

@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>StellaOps.Attestor.Timestamping</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,200 @@
// -----------------------------------------------------------------------------
// TimeCorrelationValidator.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-006 - Rekor Time Correlation
// Description: Implementation of time correlation validator.
// -----------------------------------------------------------------------------
using System.Diagnostics.Metrics;
using Microsoft.Extensions.Logging;
namespace StellaOps.Attestor.Timestamping;
/// <summary>
/// Validates time correlation between RFC-3161 timestamps and Rekor transparency log entries.
/// </summary>
public sealed class TimeCorrelationValidator : ITimeCorrelationValidator
{
private readonly ILogger<TimeCorrelationValidator> _logger;
private readonly Histogram<double>? _timeSkewHistogram;
private readonly Counter<long>? _validationCounter;
/// <summary>
/// Initializes a new instance of the <see cref="TimeCorrelationValidator"/> class.
/// </summary>
public TimeCorrelationValidator(
ILogger<TimeCorrelationValidator> logger,
IMeterFactory? meterFactory = null)
{
_logger = logger;
if (meterFactory is not null)
{
var meter = meterFactory.Create("StellaOps.Attestor.Timestamping");
_timeSkewHistogram = meter.CreateHistogram<double>(
"attestation_time_skew_seconds",
unit: "seconds",
description: "Time skew between TST and Rekor in seconds");
_validationCounter = meter.CreateCounter<long>(
"attestation_time_correlation_total",
description: "Total time correlation validations");
}
}
/// <inheritdoc />
public TimeCorrelationResult Validate(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
TimeCorrelationPolicy? policy = null)
{
policy ??= TimeCorrelationPolicy.Default;
// Calculate the gap (positive if Rekor is after TST, negative if TST is after Rekor)
var gap = rekorTime - tstTime;
var absGap = gap.Duration();
// Record metrics
_timeSkewHistogram?.Record(gap.TotalSeconds);
_validationCounter?.Add(1, new KeyValuePair<string, object?>("result", "attempted"));
// Check if TST is after Rekor (potential backdating attack)
if (policy.RequireTstBeforeRekor && gap < -policy.ClockSkewTolerance)
{
_logger.LogWarning(
"TST time {TstTime} is after Rekor time {RekorTime} by {Gap} - potential backdating",
tstTime,
rekorTime,
gap.Negate());
_validationCounter?.Add(1, new KeyValuePair<string, object?>("result", "tst_after_rekor"));
return TimeCorrelationResult.CreateInvalid(
tstTime,
rekorTime,
gap,
$"TST generation time ({tstTime:O}) is after Rekor integration time ({rekorTime:O}) by {gap.Negate()}. This may indicate a backdating attack.",
TimeCorrelationStatus.TstAfterRekor);
}
// Check if gap exceeds maximum
if (absGap > policy.MaximumGap)
{
_logger.LogWarning(
"Time gap {Gap} between TST {TstTime} and Rekor {RekorTime} exceeds maximum {MaxGap}",
absGap,
tstTime,
rekorTime,
policy.MaximumGap);
_validationCounter?.Add(1, new KeyValuePair<string, object?>("result", "gap_exceeded"));
return TimeCorrelationResult.CreateInvalid(
tstTime,
rekorTime,
gap,
$"Time gap ({absGap}) between TST and Rekor exceeds maximum allowed ({policy.MaximumGap}).",
TimeCorrelationStatus.GapExceeded);
}
// Check if gap is suspicious
var suspicious = absGap > policy.SuspiciousGap;
if (suspicious)
{
_logger.LogInformation(
"Suspicious time gap {Gap} between TST {TstTime} and Rekor {RekorTime}",
absGap,
tstTime,
rekorTime);
if (policy.FailOnSuspicious)
{
_validationCounter?.Add(1, new KeyValuePair<string, object?>("result", "suspicious_failed"));
return TimeCorrelationResult.CreateInvalid(
tstTime,
rekorTime,
gap,
$"Suspicious time gap ({absGap}) between TST and Rekor. Policy requires failure on suspicious gaps.",
TimeCorrelationStatus.SuspiciousGapFailed);
}
_validationCounter?.Add(1, new KeyValuePair<string, object?>("result", "suspicious_warning"));
return TimeCorrelationResult.CreateValid(
tstTime,
rekorTime,
gap,
suspicious: true,
warningMessage: $"Time gap ({absGap}) is larger than typical ({policy.SuspiciousGap}). This may indicate delayed Rekor submission.");
}
// Valid correlation
_logger.LogDebug(
"Time correlation valid: TST {TstTime}, Rekor {RekorTime}, gap {Gap}",
tstTime,
rekorTime,
gap);
_validationCounter?.Add(1, new KeyValuePair<string, object?>("result", "valid"));
return TimeCorrelationResult.CreateValid(tstTime, rekorTime, gap);
}
/// <inheritdoc />
public async Task<TimeCorrelationResult> ValidateAsync(
DateTimeOffset tstTime,
DateTimeOffset rekorTime,
string artifactDigest,
TimeCorrelationPolicy? policy = null,
CancellationToken cancellationToken = default)
{
// Perform validation
var result = Validate(tstTime, rekorTime, policy);
// Audit logging for security-relevant events
if (!result.Valid || result.Suspicious)
{
await LogAuditEventAsync(result, artifactDigest, cancellationToken);
}
return result;
}
private Task LogAuditEventAsync(
TimeCorrelationResult result,
string artifactDigest,
CancellationToken cancellationToken)
{
var auditRecord = new
{
EventType = "TimeCorrelationCheck",
Timestamp = DateTimeOffset.UtcNow,
ArtifactDigest = artifactDigest,
TstTime = result.TstTime,
RekorTime = result.RekorTime,
Gap = result.Gap,
Status = result.Status.ToString(),
Valid = result.Valid,
Suspicious = result.Suspicious,
ErrorMessage = result.ErrorMessage,
WarningMessage = result.WarningMessage
};
if (!result.Valid)
{
_logger.LogWarning(
"[AUDIT] Time correlation validation FAILED for {ArtifactDigest}: {@AuditRecord}",
artifactDigest,
auditRecord);
}
else if (result.Suspicious)
{
_logger.LogWarning(
"[AUDIT] Time correlation SUSPICIOUS for {ArtifactDigest}: {@AuditRecord}",
artifactDigest,
auditRecord);
}
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,126 @@
// -----------------------------------------------------------------------------
// TimestampedAttestation.cs
// Sprint: SPRINT_20260119_010 Attestor TST Integration
// Task: ATT-001 - Attestation Signing Pipeline Extension
// Description: Models for timestamped attestations.
// -----------------------------------------------------------------------------
namespace StellaOps.Attestor.Timestamping;
/// <summary>
/// An attestation with its associated timestamp evidence.
/// </summary>
public sealed record TimestampedAttestation
{
/// <summary>
/// Gets the signed DSSE envelope.
/// </summary>
public required byte[] Envelope { get; init; }
/// <summary>
/// Gets the envelope hash used for timestamping.
/// </summary>
public required string EnvelopeDigest { get; init; }
/// <summary>
/// Gets the raw RFC-3161 TimeStampToken.
/// </summary>
public required byte[] TimeStampToken { get; init; }
/// <summary>
/// Gets the timestamp generation time.
/// </summary>
public required DateTimeOffset TimestampTime { get; init; }
/// <summary>
/// Gets the TSA name.
/// </summary>
public required string TsaName { get; init; }
/// <summary>
/// Gets the TSA policy OID.
/// </summary>
public required string TsaPolicyOid { get; init; }
/// <summary>
/// Gets the Rekor receipt if submitted to transparency log.
/// </summary>
public RekorReceipt? RekorReceipt { get; init; }
/// <summary>
/// Gets the time consistency result between TST and Rekor.
/// </summary>
public TimeConsistencyResult? TimeConsistency { get; init; }
}
/// <summary>
/// Rekor transparency log receipt.
/// </summary>
public sealed record RekorReceipt
{
/// <summary>
/// Gets the Rekor log ID.
/// </summary>
public required string LogId { get; init; }
/// <summary>
/// Gets the log index.
/// </summary>
public required long LogIndex { get; init; }
/// <summary>
/// Gets the integrated time from Rekor.
/// </summary>
public required DateTimeOffset IntegratedTime { get; init; }
/// <summary>
/// Gets the inclusion proof.
/// </summary>
public byte[]? InclusionProof { get; init; }
/// <summary>
/// Gets the signed entry timestamp.
/// </summary>
public byte[]? SignedEntryTimestamp { get; init; }
}
/// <summary>
/// Result of time consistency check between TST and Rekor.
/// </summary>
public sealed record TimeConsistencyResult
{
/// <summary>
/// Gets the TST generation time.
/// </summary>
public required DateTimeOffset TstTime { get; init; }
/// <summary>
/// Gets the Rekor integrated time.
/// </summary>
public required DateTimeOffset RekorTime { get; init; }
/// <summary>
/// Gets the time skew between TST and Rekor.
/// </summary>
public TimeSpan Skew => RekorTime - TstTime;
/// <summary>
/// Gets whether the skew is within configured tolerance.
/// </summary>
public required bool WithinTolerance { get; init; }
/// <summary>
/// Gets the configured tolerance.
/// </summary>
public required TimeSpan ConfiguredTolerance { get; init; }
/// <summary>
/// Gets whether the temporal ordering is correct (TST before Rekor).
/// </summary>
public bool CorrectOrder => TstTime <= RekorTime;
/// <summary>
/// Gets whether the consistency check passed.
/// </summary>
public bool IsValid => WithinTolerance && CorrectOrder;
}

View File

@@ -0,0 +1,64 @@
// -----------------------------------------------------------------------------
// ITimeStampAuthorityClient.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-001 - Core Abstractions & Models
// Description: Main interface for RFC-3161 timestamping operations.
// -----------------------------------------------------------------------------
namespace StellaOps.Authority.Timestamping.Abstractions;
/// <summary>
/// Client interface for RFC-3161 Time-Stamp Authority operations.
/// Supports timestamping of data hashes and verification of TimeStampTokens.
/// </summary>
public interface ITimeStampAuthorityClient
{
/// <summary>
/// Requests a timestamp token for the given data hash.
/// </summary>
/// <param name="request">The timestamp request containing the message imprint.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The timestamp response containing the TimeStampToken or error.</returns>
Task<TimeStampResponse> GetTimeStampAsync(
TimeStampRequest request,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a TimeStampToken against the original data hash.
/// </summary>
/// <param name="token">The TimeStampToken to verify.</param>
/// <param name="originalHash">The original message hash that was timestamped.</param>
/// <param name="options">Verification options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The verification result with detailed status.</returns>
Task<TimeStampVerificationResult> VerifyAsync(
TimeStampToken token,
ReadOnlyMemory<byte> originalHash,
TimeStampVerificationOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Parses a TimeStampToken from its encoded form.
/// </summary>
/// <param name="encodedToken">The DER-encoded TimeStampToken.</param>
/// <returns>The parsed TimeStampToken.</returns>
TimeStampToken ParseToken(ReadOnlyMemory<byte> encodedToken);
/// <summary>
/// Gets the list of configured TSA providers.
/// </summary>
IReadOnlyList<TsaProviderInfo> Providers { get; }
}
/// <summary>
/// Information about a configured TSA provider.
/// </summary>
/// <param name="Name">Provider name for logging and diagnostics.</param>
/// <param name="Url">TSA endpoint URL.</param>
/// <param name="Priority">Provider priority (lower = higher priority).</param>
/// <param name="IsAvailable">Whether the provider is currently reachable.</param>
public sealed record TsaProviderInfo(
string Name,
Uri Url,
int Priority,
bool IsAvailable);

View File

@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>StellaOps.Authority.Timestamping.Abstractions</RootNamespace>
</PropertyGroup>
</Project>

View File

@@ -0,0 +1,123 @@
// -----------------------------------------------------------------------------
// TimeStampRequest.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-001 - Core Abstractions & Models
// Description: RFC 3161 TimeStampReq wrapper with builder pattern.
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
namespace StellaOps.Authority.Timestamping.Abstractions;
/// <summary>
/// Represents an RFC 3161 TimeStampReq for requesting a timestamp from a TSA.
/// </summary>
public sealed record TimeStampRequest
{
/// <summary>
/// Gets the version number (always 1 for RFC 3161).
/// </summary>
public int Version { get; init; } = 1;
/// <summary>
/// Gets the hash algorithm used for the message imprint.
/// </summary>
public required HashAlgorithmName HashAlgorithm { get; init; }
/// <summary>
/// Gets the hash of the data to be timestamped (message imprint).
/// </summary>
public required ReadOnlyMemory<byte> MessageImprint { get; init; }
/// <summary>
/// Gets the optional TSA policy OID.
/// </summary>
public string? PolicyOid { get; init; }
/// <summary>
/// Gets the optional nonce for replay protection.
/// </summary>
public ReadOnlyMemory<byte>? Nonce { get; init; }
/// <summary>
/// Gets whether to request the TSA certificate in the response.
/// </summary>
public bool CertificateRequired { get; init; } = true;
/// <summary>
/// Gets optional extensions.
/// </summary>
public IReadOnlyList<TimeStampExtension>? Extensions { get; init; }
/// <summary>
/// Creates a new TimeStampRequest for the given data.
/// </summary>
/// <param name="data">The data to timestamp.</param>
/// <param name="hashAlgorithm">The hash algorithm to use.</param>
/// <param name="includeNonce">Whether to include a random nonce.</param>
/// <returns>A new TimeStampRequest.</returns>
public static TimeStampRequest Create(
ReadOnlySpan<byte> data,
HashAlgorithmName hashAlgorithm,
bool includeNonce = true)
{
var hash = ComputeHash(data, hashAlgorithm);
return new TimeStampRequest
{
HashAlgorithm = hashAlgorithm,
MessageImprint = hash,
Nonce = includeNonce ? GenerateNonce() : null
};
}
/// <summary>
/// Creates a new TimeStampRequest for a pre-computed hash.
/// </summary>
/// <param name="hash">The pre-computed hash.</param>
/// <param name="hashAlgorithm">The hash algorithm used.</param>
/// <param name="includeNonce">Whether to include a random nonce.</param>
/// <returns>A new TimeStampRequest.</returns>
public static TimeStampRequest CreateFromHash(
ReadOnlyMemory<byte> hash,
HashAlgorithmName hashAlgorithm,
bool includeNonce = true)
{
return new TimeStampRequest
{
HashAlgorithm = hashAlgorithm,
MessageImprint = hash,
Nonce = includeNonce ? GenerateNonce() : null
};
}
private static byte[] ComputeHash(ReadOnlySpan<byte> data, HashAlgorithmName algorithm)
{
using var hasher = algorithm.Name switch
{
"SHA256" => SHA256.Create() as HashAlgorithm,
"SHA384" => SHA384.Create(),
"SHA512" => SHA512.Create(),
"SHA1" => SHA1.Create(), // Legacy support
_ => throw new ArgumentException($"Unsupported hash algorithm: {algorithm.Name}", nameof(algorithm))
};
return hasher!.ComputeHash(data.ToArray());
}
private static byte[] GenerateNonce()
{
var nonce = new byte[8];
RandomNumberGenerator.Fill(nonce);
return nonce;
}
}
/// <summary>
/// Represents an extension in a timestamp request.
/// </summary>
/// <param name="Oid">The extension OID.</param>
/// <param name="Critical">Whether the extension is critical.</param>
/// <param name="Value">The extension value.</param>
public sealed record TimeStampExtension(
string Oid,
bool Critical,
ReadOnlyMemory<byte> Value);

View File

@@ -0,0 +1,155 @@
// -----------------------------------------------------------------------------
// TimeStampResponse.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-001 - Core Abstractions & Models
// Description: RFC 3161 TimeStampResp wrapper with status and token.
// -----------------------------------------------------------------------------
namespace StellaOps.Authority.Timestamping.Abstractions;
/// <summary>
/// Represents an RFC 3161 TimeStampResp from a TSA.
/// </summary>
public sealed record TimeStampResponse
{
/// <summary>
/// Gets the PKI status of the response.
/// </summary>
public required PkiStatus Status { get; init; }
/// <summary>
/// Gets the status string from the TSA (if any).
/// </summary>
public string? StatusString { get; init; }
/// <summary>
/// Gets the failure info if the request was rejected.
/// </summary>
public PkiFailureInfo? FailureInfo { get; init; }
/// <summary>
/// Gets the TimeStampToken if the request was granted.
/// </summary>
public TimeStampToken? Token { get; init; }
/// <summary>
/// Gets whether the response contains a valid token.
/// </summary>
public bool IsSuccess => Status is PkiStatus.Granted or PkiStatus.GrantedWithMods && Token is not null;
/// <summary>
/// Gets the provider that issued this response.
/// </summary>
public string? ProviderName { get; init; }
/// <summary>
/// Gets the duration of the request.
/// </summary>
public TimeSpan? RequestDuration { get; init; }
/// <summary>
/// Creates a successful response.
/// </summary>
public static TimeStampResponse Success(TimeStampToken token, string? providerName = null) => new()
{
Status = PkiStatus.Granted,
Token = token,
ProviderName = providerName
};
/// <summary>
/// Creates a failed response.
/// </summary>
public static TimeStampResponse Failure(
PkiStatus status,
PkiFailureInfo? failureInfo = null,
string? statusString = null) => new()
{
Status = status,
FailureInfo = failureInfo,
StatusString = statusString
};
}
/// <summary>
/// RFC 3161 PKIStatus values.
/// </summary>
public enum PkiStatus
{
/// <summary>
/// The request was granted.
/// </summary>
Granted = 0,
/// <summary>
/// The request was granted with modifications.
/// </summary>
GrantedWithMods = 1,
/// <summary>
/// The request was rejected.
/// </summary>
Rejection = 2,
/// <summary>
/// The request is being processed (async).
/// </summary>
Waiting = 3,
/// <summary>
/// A revocation warning was issued.
/// </summary>
RevocationWarning = 4,
/// <summary>
/// A revocation notification was issued.
/// </summary>
RevocationNotification = 5
}
/// <summary>
/// RFC 3161 PKIFailureInfo bit flags.
/// </summary>
[Flags]
public enum PkiFailureInfo
{
/// <summary>
/// Unrecognized or unsupported algorithm.
/// </summary>
BadAlg = 1 << 0,
/// <summary>
/// The request was badly formed.
/// </summary>
BadRequest = 1 << 2,
/// <summary>
/// The data format is incorrect.
/// </summary>
BadDataFormat = 1 << 5,
/// <summary>
/// The time source is not available.
/// </summary>
TimeNotAvailable = 1 << 14,
/// <summary>
/// The requested policy is not supported.
/// </summary>
UnacceptedPolicy = 1 << 15,
/// <summary>
/// The requested extension is not supported.
/// </summary>
UnacceptedExtension = 1 << 16,
/// <summary>
/// Additional information is required.
/// </summary>
AddInfoNotAvailable = 1 << 17,
/// <summary>
/// A system failure occurred.
/// </summary>
SystemFailure = 1 << 25
}

View File

@@ -0,0 +1,164 @@
// -----------------------------------------------------------------------------
// TimeStampToken.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-001 - Core Abstractions & Models
// Description: RFC 3161 TimeStampToken wrapper with parsed TSTInfo fields.
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Security.Cryptography.X509Certificates;
namespace StellaOps.Authority.Timestamping.Abstractions;
/// <summary>
/// Represents an RFC 3161 TimeStampToken containing the signed timestamp.
/// </summary>
public sealed record TimeStampToken
{
/// <summary>
/// Gets the raw DER-encoded TimeStampToken.
/// </summary>
public required ReadOnlyMemory<byte> EncodedToken { get; init; }
/// <summary>
/// Gets the parsed TSTInfo from the token.
/// </summary>
public required TstInfo TstInfo { get; init; }
/// <summary>
/// Gets the signer certificate if included in the token.
/// </summary>
public X509Certificate2? SignerCertificate { get; init; }
/// <summary>
/// Gets any additional certificates from the token.
/// </summary>
public IReadOnlyList<X509Certificate2>? Certificates { get; init; }
/// <summary>
/// Gets the CMS signature algorithm OID.
/// </summary>
public string? SignatureAlgorithmOid { get; init; }
/// <summary>
/// Gets the digest of the TSTInfo (for display/logging).
/// </summary>
public string TstInfoDigest
{
get
{
var hash = SHA256.HashData(TstInfo.EncodedTstInfo.Span);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
}
/// <summary>
/// Represents the TSTInfo structure from a TimeStampToken.
/// </summary>
public sealed record TstInfo
{
/// <summary>
/// Gets the raw DER-encoded TSTInfo.
/// </summary>
public required ReadOnlyMemory<byte> EncodedTstInfo { get; init; }
/// <summary>
/// Gets the version (always 1).
/// </summary>
public int Version { get; init; } = 1;
/// <summary>
/// Gets the TSA policy OID.
/// </summary>
public required string PolicyOid { get; init; }
/// <summary>
/// Gets the hash algorithm used for the message imprint.
/// </summary>
public required HashAlgorithmName HashAlgorithm { get; init; }
/// <summary>
/// Gets the message imprint hash.
/// </summary>
public required ReadOnlyMemory<byte> MessageImprint { get; init; }
/// <summary>
/// Gets the serial number assigned by the TSA.
/// </summary>
public required ReadOnlyMemory<byte> SerialNumber { get; init; }
/// <summary>
/// Gets the generation time of the timestamp.
/// </summary>
public required DateTimeOffset GenTime { get; init; }
/// <summary>
/// Gets the accuracy of the timestamp (optional).
/// </summary>
public TstAccuracy? Accuracy { get; init; }
/// <summary>
/// Gets whether ordering is guaranteed.
/// </summary>
public bool Ordering { get; init; }
/// <summary>
/// Gets the nonce if present.
/// </summary>
public ReadOnlyMemory<byte>? Nonce { get; init; }
/// <summary>
/// Gets the TSA name if present.
/// </summary>
public string? TsaName { get; init; }
/// <summary>
/// Gets any extensions.
/// </summary>
public IReadOnlyList<TimeStampExtension>? Extensions { get; init; }
/// <summary>
/// Gets the effective time range considering accuracy.
/// </summary>
public (DateTimeOffset Earliest, DateTimeOffset Latest) GetTimeRange()
{
if (Accuracy is null)
return (GenTime, GenTime);
var delta = Accuracy.ToTimeSpan();
return (GenTime - delta, GenTime + delta);
}
}
/// <summary>
/// Represents the accuracy of a timestamp.
/// </summary>
public sealed record TstAccuracy
{
/// <summary>
/// Gets the seconds component.
/// </summary>
public int? Seconds { get; init; }
/// <summary>
/// Gets the milliseconds component (0-999).
/// </summary>
public int? Millis { get; init; }
/// <summary>
/// Gets the microseconds component (0-999).
/// </summary>
public int? Micros { get; init; }
/// <summary>
/// Converts to a TimeSpan.
/// </summary>
public TimeSpan ToTimeSpan()
{
var totalMicros = (Seconds ?? 0) * 1_000_000L
+ (Millis ?? 0) * 1_000L
+ (Micros ?? 0);
return TimeSpan.FromMicroseconds(totalMicros);
}
}

View File

@@ -0,0 +1,97 @@
// -----------------------------------------------------------------------------
// TimeStampVerificationOptions.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-001 - Core Abstractions & Models
// Description: Options for timestamp verification behavior.
// -----------------------------------------------------------------------------
using System.Security.Cryptography.X509Certificates;
namespace StellaOps.Authority.Timestamping.Abstractions;
/// <summary>
/// Options for TimeStampToken verification.
/// </summary>
public sealed record TimeStampVerificationOptions
{
/// <summary>
/// Gets or sets whether to verify the certificate chain.
/// </summary>
public bool VerifyCertificateChain { get; init; } = true;
/// <summary>
/// Gets or sets whether to check certificate revocation.
/// </summary>
public bool CheckRevocation { get; init; } = true;
/// <summary>
/// Gets or sets the revocation mode.
/// </summary>
public X509RevocationMode RevocationMode { get; init; } = X509RevocationMode.Online;
/// <summary>
/// Gets or sets the revocation flag.
/// </summary>
public X509RevocationFlag RevocationFlag { get; init; } = X509RevocationFlag.ExcludeRoot;
/// <summary>
/// Gets or sets additional trust anchors.
/// </summary>
public X509Certificate2Collection? TrustAnchors { get; init; }
/// <summary>
/// Gets or sets additional intermediate certificates.
/// </summary>
public X509Certificate2Collection? IntermediateCertificates { get; init; }
/// <summary>
/// Gets or sets the expected nonce (for replay protection).
/// </summary>
public ReadOnlyMemory<byte>? ExpectedNonce { get; init; }
/// <summary>
/// Gets or sets acceptable policy OIDs. If set, verification fails if the policy is not in this list.
/// </summary>
public IReadOnlyList<string>? AcceptablePolicies { get; init; }
/// <summary>
/// Gets or sets the verification time. If null, uses current time.
/// </summary>
public DateTimeOffset? VerificationTime { get; init; }
/// <summary>
/// Gets or sets whether to allow weak hash algorithms (SHA-1).
/// </summary>
public bool AllowWeakHashAlgorithms { get; init; } = false;
/// <summary>
/// Gets or sets the maximum acceptable accuracy in seconds.
/// </summary>
public int? MaxAccuracySeconds { get; init; }
/// <summary>
/// Gets the default verification options.
/// </summary>
public static TimeStampVerificationOptions Default { get; } = new();
/// <summary>
/// Gets strict verification options (all checks enabled, no weak algorithms).
/// </summary>
public static TimeStampVerificationOptions Strict { get; } = new()
{
VerifyCertificateChain = true,
CheckRevocation = true,
AllowWeakHashAlgorithms = false,
MaxAccuracySeconds = 60
};
/// <summary>
/// Gets offline verification options (no revocation checks).
/// </summary>
public static TimeStampVerificationOptions Offline { get; } = new()
{
VerifyCertificateChain = true,
CheckRevocation = false,
RevocationMode = X509RevocationMode.NoCheck
};
}

View File

@@ -0,0 +1,247 @@
// -----------------------------------------------------------------------------
// TimeStampVerificationResult.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-001 - Core Abstractions & Models
// Description: Verification result with detailed status and chain info.
// -----------------------------------------------------------------------------
using System.Security.Cryptography.X509Certificates;
namespace StellaOps.Authority.Timestamping.Abstractions;
/// <summary>
/// Result of TimeStampToken verification.
/// </summary>
public sealed record TimeStampVerificationResult
{
/// <summary>
/// Gets the overall verification status.
/// </summary>
public required VerificationStatus Status { get; init; }
/// <summary>
/// Gets the verified generation time (if valid).
/// </summary>
public DateTimeOffset? VerifiedTime { get; init; }
/// <summary>
/// Gets the time range considering accuracy.
/// </summary>
public (DateTimeOffset Earliest, DateTimeOffset Latest)? TimeRange { get; init; }
/// <summary>
/// Gets the policy OID from the timestamp.
/// </summary>
public string? PolicyOid { get; init; }
/// <summary>
/// Gets the signer certificate.
/// </summary>
public X509Certificate2? SignerCertificate { get; init; }
/// <summary>
/// Gets the certificate chain used for validation.
/// </summary>
public IReadOnlyList<X509Certificate2>? CertificateChain { get; init; }
/// <summary>
/// Gets detailed error information if verification failed.
/// </summary>
public VerificationError? Error { get; init; }
/// <summary>
/// Gets any warnings encountered during verification.
/// </summary>
public IReadOnlyList<VerificationWarning>? Warnings { get; init; }
/// <summary>
/// Gets whether the verification was successful.
/// </summary>
public bool IsValid => Status == VerificationStatus.Valid;
/// <summary>
/// Creates a successful verification result.
/// </summary>
public static TimeStampVerificationResult Success(
DateTimeOffset verifiedTime,
(DateTimeOffset, DateTimeOffset)? timeRange = null,
string? policyOid = null,
X509Certificate2? signerCertificate = null,
IReadOnlyList<X509Certificate2>? chain = null,
IReadOnlyList<VerificationWarning>? warnings = null) => new()
{
Status = VerificationStatus.Valid,
VerifiedTime = verifiedTime,
TimeRange = timeRange,
PolicyOid = policyOid,
SignerCertificate = signerCertificate,
CertificateChain = chain,
Warnings = warnings
};
/// <summary>
/// Creates a failed verification result.
/// </summary>
public static TimeStampVerificationResult Failure(VerificationError error) => new()
{
Status = error.Code switch
{
VerificationErrorCode.SignatureInvalid => VerificationStatus.SignatureInvalid,
VerificationErrorCode.CertificateExpired => VerificationStatus.CertificateError,
VerificationErrorCode.CertificateRevoked => VerificationStatus.CertificateError,
VerificationErrorCode.CertificateChainInvalid => VerificationStatus.CertificateError,
VerificationErrorCode.MessageImprintMismatch => VerificationStatus.ImprintMismatch,
VerificationErrorCode.NonceMismatch => VerificationStatus.NonceMismatch,
_ => VerificationStatus.Invalid
},
Error = error
};
}
/// <summary>
/// Verification status codes.
/// </summary>
public enum VerificationStatus
{
/// <summary>
/// The timestamp is valid.
/// </summary>
Valid,
/// <summary>
/// The signature is invalid.
/// </summary>
SignatureInvalid,
/// <summary>
/// The message imprint doesn't match.
/// </summary>
ImprintMismatch,
/// <summary>
/// The nonce doesn't match.
/// </summary>
NonceMismatch,
/// <summary>
/// Certificate validation failed.
/// </summary>
CertificateError,
/// <summary>
/// The timestamp is structurally invalid.
/// </summary>
Invalid
}
/// <summary>
/// Detailed verification error information.
/// </summary>
/// <param name="Code">The error code.</param>
/// <param name="Message">Human-readable error message.</param>
/// <param name="Details">Additional details.</param>
public sealed record VerificationError(
VerificationErrorCode Code,
string Message,
string? Details = null);
/// <summary>
/// Verification error codes.
/// </summary>
public enum VerificationErrorCode
{
/// <summary>
/// Unknown error.
/// </summary>
Unknown,
/// <summary>
/// The token is malformed.
/// </summary>
MalformedToken,
/// <summary>
/// The CMS signature is invalid.
/// </summary>
SignatureInvalid,
/// <summary>
/// The message imprint doesn't match the original data.
/// </summary>
MessageImprintMismatch,
/// <summary>
/// The nonce doesn't match the request.
/// </summary>
NonceMismatch,
/// <summary>
/// The signer certificate is expired.
/// </summary>
CertificateExpired,
/// <summary>
/// The signer certificate is revoked.
/// </summary>
CertificateRevoked,
/// <summary>
/// The certificate chain is invalid.
/// </summary>
CertificateChainInvalid,
/// <summary>
/// The ESSCertIDv2 binding is invalid.
/// </summary>
EssCertIdMismatch,
/// <summary>
/// The signing certificate is missing.
/// </summary>
SignerCertificateMissing,
/// <summary>
/// No trust anchor found for the chain.
/// </summary>
NoTrustAnchor
}
/// <summary>
/// Non-fatal warning encountered during verification.
/// </summary>
/// <param name="Code">The warning code.</param>
/// <param name="Message">Human-readable warning message.</param>
public sealed record VerificationWarning(
VerificationWarningCode Code,
string Message);
/// <summary>
/// Verification warning codes.
/// </summary>
public enum VerificationWarningCode
{
/// <summary>
/// Revocation check was skipped.
/// </summary>
RevocationCheckSkipped,
/// <summary>
/// The timestamp accuracy is large.
/// </summary>
LargeAccuracy,
/// <summary>
/// The policy OID is not recognized.
/// </summary>
UnknownPolicy,
/// <summary>
/// The certificate is nearing expiration.
/// </summary>
CertificateNearingExpiration,
/// <summary>
/// Using weak hash algorithm.
/// </summary>
WeakHashAlgorithm
}

View File

@@ -0,0 +1,142 @@
// -----------------------------------------------------------------------------
// TsaClientOptions.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-001 - Core Abstractions & Models
// Description: Configuration options for TSA client and providers.
// -----------------------------------------------------------------------------
namespace StellaOps.Authority.Timestamping.Abstractions;
/// <summary>
/// Global configuration options for the TSA client.
/// </summary>
public sealed class TsaClientOptions
{
/// <summary>
/// Gets or sets the configured TSA providers.
/// </summary>
public List<TsaProviderOptions> Providers { get; set; } = [];
/// <summary>
/// Gets or sets the failover strategy.
/// </summary>
public FailoverStrategy FailoverStrategy { get; set; } = FailoverStrategy.Priority;
/// <summary>
/// Gets or sets whether to cache timestamp responses.
/// </summary>
public bool EnableCaching { get; set; } = true;
/// <summary>
/// Gets or sets the cache duration for successful timestamps.
/// </summary>
public TimeSpan CacheDuration { get; set; } = TimeSpan.FromHours(24);
/// <summary>
/// Gets or sets the default hash algorithm for requests.
/// </summary>
public string DefaultHashAlgorithm { get; set; } = "SHA256";
/// <summary>
/// Gets or sets whether to include nonce by default.
/// </summary>
public bool IncludeNonceByDefault { get; set; } = true;
/// <summary>
/// Gets or sets whether to request certificates by default.
/// </summary>
public bool RequestCertificatesByDefault { get; set; } = true;
/// <summary>
/// Gets or sets the verification options to use by default.
/// </summary>
public TimeStampVerificationOptions DefaultVerificationOptions { get; set; } = TimeStampVerificationOptions.Default;
}
/// <summary>
/// Configuration options for a single TSA provider.
/// </summary>
public sealed class TsaProviderOptions
{
/// <summary>
/// Gets or sets the provider name.
/// </summary>
public required string Name { get; set; }
/// <summary>
/// Gets or sets the TSA endpoint URL.
/// </summary>
public required Uri Url { get; set; }
/// <summary>
/// Gets or sets the priority (lower = higher priority).
/// </summary>
public int Priority { get; set; } = 100;
/// <summary>
/// Gets or sets the request timeout.
/// </summary>
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Gets or sets the number of retry attempts.
/// </summary>
public int RetryCount { get; set; } = 3;
/// <summary>
/// Gets or sets the base delay for exponential backoff.
/// </summary>
public TimeSpan RetryBaseDelay { get; set; } = TimeSpan.FromSeconds(1);
/// <summary>
/// Gets or sets the policy OID to request (optional).
/// </summary>
public string? PolicyOid { get; set; }
/// <summary>
/// Gets or sets client certificate for mutual TLS (optional).
/// </summary>
public string? ClientCertificatePath { get; set; }
/// <summary>
/// Gets or sets custom HTTP headers.
/// </summary>
public Dictionary<string, string> Headers { get; set; } = [];
/// <summary>
/// Gets or sets whether this provider is enabled.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// Gets or sets the TSA certificate for verification (optional).
/// If not set, certificate is extracted from response.
/// </summary>
public string? TsaCertificatePath { get; set; }
}
/// <summary>
/// Strategy for handling multiple TSA providers.
/// </summary>
public enum FailoverStrategy
{
/// <summary>
/// Try providers in priority order until one succeeds.
/// </summary>
Priority,
/// <summary>
/// Try providers in round-robin fashion.
/// </summary>
RoundRobin,
/// <summary>
/// Use the provider with lowest latency from recent requests.
/// </summary>
LowestLatency,
/// <summary>
/// Randomly select a provider.
/// </summary>
Random
}

View File

@@ -0,0 +1,165 @@
// -----------------------------------------------------------------------------
// Asn1/TimeStampReqEncoder.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-002 - ASN.1 Parsing & Generation
// Description: ASN.1 DER encoder for RFC 3161 TimeStampReq.
// -----------------------------------------------------------------------------
using System.Formats.Asn1;
using System.Security.Cryptography;
using StellaOps.Authority.Timestamping.Abstractions;
namespace StellaOps.Authority.Timestamping.Asn1;
/// <summary>
/// Encodes RFC 3161 TimeStampReq to DER format.
/// </summary>
public static class TimeStampReqEncoder
{
// OID mappings for hash algorithms
private static readonly Dictionary<string, string> HashAlgorithmOids = new()
{
["SHA1"] = "1.3.14.3.2.26",
["SHA256"] = "2.16.840.1.101.3.4.2.1",
["SHA384"] = "2.16.840.1.101.3.4.2.2",
["SHA512"] = "2.16.840.1.101.3.4.2.3",
["SHA3-256"] = "2.16.840.1.101.3.4.2.8",
["SHA3-384"] = "2.16.840.1.101.3.4.2.9",
["SHA3-512"] = "2.16.840.1.101.3.4.2.10"
};
/// <summary>
/// Encodes a TimeStampRequest to DER format.
/// </summary>
/// <param name="request">The request to encode.</param>
/// <returns>DER-encoded TimeStampReq.</returns>
public static byte[] Encode(TimeStampRequest request)
{
var writer = new AsnWriter(AsnEncodingRules.DER);
// TimeStampReq ::= SEQUENCE
using (writer.PushSequence())
{
// version INTEGER { v1(1) }
writer.WriteInteger(request.Version);
// messageImprint MessageImprint
WriteMessageImprint(writer, request.HashAlgorithm, request.MessageImprint.Span);
// reqPolicy TSAPolicyId OPTIONAL
if (!string.IsNullOrEmpty(request.PolicyOid))
{
writer.WriteObjectIdentifier(request.PolicyOid);
}
// nonce INTEGER OPTIONAL
if (request.Nonce is { Length: > 0 })
{
writer.WriteIntegerUnsigned(request.Nonce.Value.Span);
}
// certReq BOOLEAN DEFAULT FALSE
if (request.CertificateRequired)
{
writer.WriteBoolean(true);
}
// extensions [0] IMPLICIT Extensions OPTIONAL
if (request.Extensions is { Count: > 0 })
{
WriteExtensions(writer, request.Extensions);
}
}
return writer.Encode();
}
private static void WriteMessageImprint(AsnWriter writer, HashAlgorithmName algorithm, ReadOnlySpan<byte> hash)
{
// MessageImprint ::= SEQUENCE {
// hashAlgorithm AlgorithmIdentifier,
// hashedMessage OCTET STRING
// }
using (writer.PushSequence())
{
WriteAlgorithmIdentifier(writer, algorithm);
writer.WriteOctetString(hash);
}
}
private static void WriteAlgorithmIdentifier(AsnWriter writer, HashAlgorithmName algorithm)
{
var algorithmName = algorithm.Name ?? throw new ArgumentException("Hash algorithm name is required");
if (!HashAlgorithmOids.TryGetValue(algorithmName, out var oid))
{
throw new ArgumentException($"Unsupported hash algorithm: {algorithmName}");
}
// AlgorithmIdentifier ::= SEQUENCE {
// algorithm OBJECT IDENTIFIER,
// parameters ANY DEFINED BY algorithm OPTIONAL
// }
using (writer.PushSequence())
{
writer.WriteObjectIdentifier(oid);
// SHA-2 family uses NULL parameters
writer.WriteNull();
}
}
private static void WriteExtensions(AsnWriter writer, IReadOnlyList<TimeStampExtension> extensions)
{
// [0] IMPLICIT Extensions
using (writer.PushSequence(new Asn1Tag(TagClass.ContextSpecific, 0)))
{
foreach (var ext in extensions)
{
// Extension ::= SEQUENCE {
// extnID OBJECT IDENTIFIER,
// critical BOOLEAN DEFAULT FALSE,
// extnValue OCTET STRING
// }
using (writer.PushSequence())
{
writer.WriteObjectIdentifier(ext.Oid);
if (ext.Critical)
{
writer.WriteBoolean(true);
}
writer.WriteOctetString(ext.Value.Span);
}
}
}
}
/// <summary>
/// Gets the OID for a hash algorithm.
/// </summary>
/// <param name="algorithm">The hash algorithm.</param>
/// <returns>The OID string.</returns>
public static string GetHashAlgorithmOid(HashAlgorithmName algorithm)
{
var name = algorithm.Name ?? throw new ArgumentException("Hash algorithm name is required");
return HashAlgorithmOids.TryGetValue(name, out var oid)
? oid
: throw new ArgumentException($"Unsupported hash algorithm: {name}");
}
/// <summary>
/// Gets the hash algorithm name from an OID.
/// </summary>
/// <param name="oid">The OID string.</param>
/// <returns>The hash algorithm name.</returns>
public static HashAlgorithmName GetHashAlgorithmFromOid(string oid)
{
foreach (var (name, algOid) in HashAlgorithmOids)
{
if (algOid == oid)
{
return new HashAlgorithmName(name);
}
}
throw new ArgumentException($"Unknown hash algorithm OID: {oid}");
}
}

View File

@@ -0,0 +1,362 @@
// -----------------------------------------------------------------------------
// Asn1/TimeStampRespDecoder.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-002 - ASN.1 Parsing & Generation
// Description: ASN.1 DER decoder for RFC 3161 TimeStampResp.
// -----------------------------------------------------------------------------
using System.Formats.Asn1;
using System.Numerics;
using System.Security.Cryptography;
using System.Security.Cryptography.X509Certificates;
using StellaOps.Authority.Timestamping.Abstractions;
namespace StellaOps.Authority.Timestamping.Asn1;
/// <summary>
/// Decodes RFC 3161 TimeStampResp from DER format.
/// </summary>
public static class TimeStampRespDecoder
{
/// <summary>
/// Decodes a TimeStampResp from DER-encoded bytes.
/// </summary>
/// <param name="encoded">The DER-encoded TimeStampResp.</param>
/// <returns>The decoded TimeStampResponse.</returns>
public static TimeStampResponse Decode(ReadOnlyMemory<byte> encoded)
{
var reader = new AsnReader(encoded, AsnEncodingRules.DER);
var respSequence = reader.ReadSequence();
// PKIStatusInfo
var statusInfo = respSequence.ReadSequence();
var status = (PkiStatus)(int)statusInfo.ReadInteger();
string? statusString = null;
PkiFailureInfo? failureInfo = null;
// statusString SEQUENCE OF UTF8String OPTIONAL
if (statusInfo.HasData && statusInfo.PeekTag().TagValue == 16) // SEQUENCE
{
var statusStrings = statusInfo.ReadSequence();
var strings = new List<string>();
while (statusStrings.HasData)
{
strings.Add(statusStrings.ReadCharacterString(UniversalTagNumber.UTF8String));
}
statusString = string.Join("; ", strings);
}
// failInfo BIT STRING OPTIONAL
if (statusInfo.HasData)
{
var failBits = statusInfo.ReadBitString(out _);
if (failBits.Length > 0)
{
var failValue = 0;
for (var i = 0; i < Math.Min(failBits.Length * 8, 26); i++)
{
if ((failBits[i / 8] & (1 << (7 - (i % 8)))) != 0)
{
failValue |= 1 << i;
}
}
failureInfo = (PkiFailureInfo)failValue;
}
}
// TimeStampToken ContentInfo OPTIONAL
TimeStampToken? token = null;
if (respSequence.HasData)
{
var contentInfoBytes = respSequence.PeekEncodedValue();
token = TimeStampTokenDecoder.Decode(contentInfoBytes);
}
return new TimeStampResponse
{
Status = status,
StatusString = statusString,
FailureInfo = failureInfo,
Token = token
};
}
}
/// <summary>
/// Decodes RFC 3161 TimeStampToken from DER format.
/// </summary>
public static class TimeStampTokenDecoder
{
private const string SignedDataOid = "1.2.840.113549.1.7.2";
private const string TstInfoOid = "1.2.840.113549.1.9.16.1.4";
/// <summary>
/// Decodes a TimeStampToken from DER-encoded bytes.
/// </summary>
/// <param name="encoded">The DER-encoded TimeStampToken (ContentInfo).</param>
/// <returns>The decoded TimeStampToken.</returns>
public static TimeStampToken Decode(ReadOnlyMemory<byte> encoded)
{
var reader = new AsnReader(encoded, AsnEncodingRules.DER);
// ContentInfo ::= SEQUENCE { contentType, content [0] EXPLICIT }
var contentInfo = reader.ReadSequence();
var contentType = contentInfo.ReadObjectIdentifier();
if (contentType != SignedDataOid)
{
throw new CryptographicException($"Expected SignedData OID, got: {contentType}");
}
// [0] EXPLICIT SignedData
var signedDataTag = contentInfo.ReadSequence(new Asn1Tag(TagClass.ContextSpecific, 0));
var signedData = signedDataTag.ReadSequence();
// SignedData version
signedData.ReadInteger();
// DigestAlgorithmIdentifiers SET
signedData.ReadSetOf();
// EncapsulatedContentInfo (contains TSTInfo)
var encapContent = signedData.ReadSequence();
var encapContentType = encapContent.ReadObjectIdentifier();
if (encapContentType != TstInfoOid)
{
throw new CryptographicException($"Expected TSTInfo OID, got: {encapContentType}");
}
// [0] EXPLICIT OCTET STRING containing TSTInfo
var tstInfoWrapper = encapContent.ReadSequence(new Asn1Tag(TagClass.ContextSpecific, 0));
var tstInfoBytes = tstInfoWrapper.ReadOctetString();
var tstInfo = DecodeTstInfo(tstInfoBytes);
// Extract certificates if present
X509Certificate2? signerCert = null;
List<X509Certificate2>? certs = null;
string? signatureAlgorithmOid = null;
// [0] IMPLICIT CertificateSet OPTIONAL
if (signedData.HasData)
{
var nextTag = signedData.PeekTag();
if (nextTag.TagClass == TagClass.ContextSpecific && nextTag.TagValue == 0)
{
var certSet = signedData.ReadSetOf(new Asn1Tag(TagClass.ContextSpecific, 0, true));
certs = [];
while (certSet.HasData)
{
var certBytes = certSet.PeekEncodedValue().ToArray();
certSet.ReadSequence(); // consume
try
{
var cert = X509CertificateLoader.LoadCertificate(certBytes);
certs.Add(cert);
}
catch
{
// Skip invalid certificates
}
}
signerCert = certs.FirstOrDefault();
}
}
// Skip CRLs [1] if present, then parse SignerInfos
while (signedData.HasData)
{
var tag = signedData.PeekTag();
if (tag.TagClass == TagClass.ContextSpecific && tag.TagValue == 1)
{
signedData.ReadSetOf(new Asn1Tag(TagClass.ContextSpecific, 1, true));
continue;
}
// SignerInfos SET OF SignerInfo
if (tag.TagValue == 17) // SET
{
var signerInfos = signedData.ReadSetOf();
if (signerInfos.HasData)
{
var signerInfo = signerInfos.ReadSequence();
signerInfo.ReadInteger(); // version
signerInfo.ReadSequence(); // sid (skip)
var digestAlg = signerInfo.ReadSequence();
digestAlg.ReadObjectIdentifier(); // skip digest alg
// Skip signed attributes if present [0]
if (signerInfo.HasData && signerInfo.PeekTag().TagClass == TagClass.ContextSpecific)
{
signerInfo.ReadSetOf(new Asn1Tag(TagClass.ContextSpecific, 0, true));
}
if (signerInfo.HasData)
{
var sigAlg = signerInfo.ReadSequence();
signatureAlgorithmOid = sigAlg.ReadObjectIdentifier();
}
}
break;
}
break;
}
return new TimeStampToken
{
EncodedToken = encoded,
TstInfo = tstInfo,
SignerCertificate = signerCert,
Certificates = certs,
SignatureAlgorithmOid = signatureAlgorithmOid
};
}
private static TstInfo DecodeTstInfo(byte[] encoded)
{
var reader = new AsnReader(encoded, AsnEncodingRules.DER);
var tstInfo = reader.ReadSequence();
// version INTEGER
var version = (int)tstInfo.ReadInteger();
// policy TSAPolicyId
var policyOid = tstInfo.ReadObjectIdentifier();
// messageImprint MessageImprint
var msgImprint = tstInfo.ReadSequence();
var algId = msgImprint.ReadSequence();
var hashOid = algId.ReadObjectIdentifier();
var hashAlgorithm = TimeStampReqEncoder.GetHashAlgorithmFromOid(hashOid);
var imprint = msgImprint.ReadOctetString();
// serialNumber INTEGER
var serialNumber = tstInfo.ReadIntegerBytes().ToArray();
// genTime GeneralizedTime
var genTime = tstInfo.ReadGeneralizedTime();
TstAccuracy? accuracy = null;
bool ordering = false;
byte[]? nonce = null;
string? tsaName = null;
List<TimeStampExtension>? extensions = null;
// Optional fields
while (tstInfo.HasData)
{
var tag = tstInfo.PeekTag();
// accuracy Accuracy OPTIONAL
if (tag.TagValue == 16 && tag.TagClass == TagClass.Universal) // SEQUENCE
{
accuracy = DecodeAccuracy(tstInfo.ReadSequence());
continue;
}
// ordering BOOLEAN DEFAULT FALSE
if (tag.TagValue == 1 && tag.TagClass == TagClass.Universal) // BOOLEAN
{
ordering = tstInfo.ReadBoolean();
continue;
}
// nonce INTEGER OPTIONAL
if (tag.TagValue == 2 && tag.TagClass == TagClass.Universal) // INTEGER
{
nonce = tstInfo.ReadIntegerBytes().ToArray();
continue;
}
// tsa [0] GeneralName OPTIONAL
if (tag.TagClass == TagClass.ContextSpecific && tag.TagValue == 0)
{
var tsaReader = tstInfo.ReadSequence(new Asn1Tag(TagClass.ContextSpecific, 0));
// Simplified: just read as string if it's a directoryName or other
tsaName = "(TSA GeneralName present)";
continue;
}
// extensions [1] IMPLICIT Extensions OPTIONAL
if (tag.TagClass == TagClass.ContextSpecific && tag.TagValue == 1)
{
var extSeq = tstInfo.ReadSequence(new Asn1Tag(TagClass.ContextSpecific, 1));
extensions = [];
while (extSeq.HasData)
{
var ext = extSeq.ReadSequence();
var extOid = ext.ReadObjectIdentifier();
var critical = false;
if (ext.HasData && ext.PeekTag().TagValue == 1) // BOOLEAN
{
critical = ext.ReadBoolean();
}
var extValue = ext.ReadOctetString();
extensions.Add(new TimeStampExtension(extOid, critical, extValue));
}
continue;
}
// Unknown, skip
tstInfo.ReadEncodedValue();
}
return new TstInfo
{
EncodedTstInfo = encoded,
Version = version,
PolicyOid = policyOid,
HashAlgorithm = hashAlgorithm,
MessageImprint = imprint,
SerialNumber = serialNumber,
GenTime = genTime,
Accuracy = accuracy,
Ordering = ordering,
Nonce = nonce,
TsaName = tsaName,
Extensions = extensions
};
}
private static TstAccuracy DecodeAccuracy(AsnReader reader)
{
int? seconds = null;
int? millis = null;
int? micros = null;
while (reader.HasData)
{
var tag = reader.PeekTag();
if (tag.TagValue == 2 && tag.TagClass == TagClass.Universal) // INTEGER (seconds)
{
seconds = (int)reader.ReadInteger();
continue;
}
if (tag.TagClass == TagClass.ContextSpecific && tag.TagValue == 0) // [0] millis
{
var millisReader = reader.ReadSequence(new Asn1Tag(TagClass.ContextSpecific, 0));
millis = (int)millisReader.ReadInteger();
continue;
}
if (tag.TagClass == TagClass.ContextSpecific && tag.TagValue == 1) // [1] micros
{
var microsReader = reader.ReadSequence(new Asn1Tag(TagClass.ContextSpecific, 1));
micros = (int)microsReader.ReadInteger();
continue;
}
reader.ReadEncodedValue(); // skip unknown
}
return new TstAccuracy
{
Seconds = seconds,
Millis = millis,
Micros = micros
};
}
}

View File

@@ -0,0 +1,82 @@
// -----------------------------------------------------------------------------
// ITsaCacheStore.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-005 - Provider Configuration & Management
// Description: Cache store interface for timestamp tokens.
// -----------------------------------------------------------------------------
using StellaOps.Authority.Timestamping.Abstractions;
namespace StellaOps.Authority.Timestamping.Caching;
/// <summary>
/// Cache store for TimeStampTokens to avoid redundant TSA requests.
/// </summary>
public interface ITsaCacheStore
{
/// <summary>
/// Gets a cached timestamp token for the given hash.
/// </summary>
/// <param name="messageImprint">The hash that was timestamped.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The cached token if found, null otherwise.</returns>
Task<TimeStampToken?> GetAsync(ReadOnlyMemory<byte> messageImprint, CancellationToken cancellationToken = default);
/// <summary>
/// Stores a timestamp token in the cache.
/// </summary>
/// <param name="messageImprint">The hash that was timestamped.</param>
/// <param name="token">The timestamp token.</param>
/// <param name="expiration">How long to cache the token.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task SetAsync(
ReadOnlyMemory<byte> messageImprint,
TimeStampToken token,
TimeSpan expiration,
CancellationToken cancellationToken = default);
/// <summary>
/// Removes a timestamp token from the cache.
/// </summary>
/// <param name="messageImprint">The hash that was timestamped.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RemoveAsync(ReadOnlyMemory<byte> messageImprint, CancellationToken cancellationToken = default);
/// <summary>
/// Gets statistics about the cache.
/// </summary>
TsaCacheStats GetStats();
}
/// <summary>
/// Statistics about the TSA cache.
/// </summary>
public sealed record TsaCacheStats
{
/// <summary>
/// Gets the number of items in the cache.
/// </summary>
public int ItemCount { get; init; }
/// <summary>
/// Gets the cache hit count since startup.
/// </summary>
public long HitCount { get; init; }
/// <summary>
/// Gets the cache miss count since startup.
/// </summary>
public long MissCount { get; init; }
/// <summary>
/// Gets the hit rate as a percentage.
/// </summary>
public double HitRate => HitCount + MissCount > 0
? (double)HitCount / (HitCount + MissCount) * 100
: 0;
/// <summary>
/// Gets the approximate size in bytes.
/// </summary>
public long ApproximateSizeBytes { get; init; }
}

View File

@@ -0,0 +1,120 @@
// -----------------------------------------------------------------------------
// InMemoryTsaCacheStore.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-005 - Provider Configuration & Management
// Description: In-memory cache store implementation.
// -----------------------------------------------------------------------------
using System.Collections.Concurrent;
using StellaOps.Authority.Timestamping.Abstractions;
namespace StellaOps.Authority.Timestamping.Caching;
/// <summary>
/// In-memory implementation of <see cref="ITsaCacheStore"/>.
/// </summary>
public sealed class InMemoryTsaCacheStore : ITsaCacheStore, IDisposable
{
private readonly ConcurrentDictionary<string, CacheEntry> _cache = new();
private readonly Timer _cleanupTimer;
private long _hitCount;
private long _missCount;
/// <summary>
/// Initializes a new instance of the <see cref="InMemoryTsaCacheStore"/> class.
/// </summary>
/// <param name="cleanupInterval">How often to run cleanup of expired entries.</param>
public InMemoryTsaCacheStore(TimeSpan? cleanupInterval = null)
{
var interval = cleanupInterval ?? TimeSpan.FromMinutes(5);
_cleanupTimer = new Timer(CleanupExpired, null, interval, interval);
}
/// <inheritdoc />
public Task<TimeStampToken?> GetAsync(
ReadOnlyMemory<byte> messageImprint,
CancellationToken cancellationToken = default)
{
var key = ToKey(messageImprint);
if (_cache.TryGetValue(key, out var entry))
{
if (entry.ExpiresAt > DateTimeOffset.UtcNow)
{
Interlocked.Increment(ref _hitCount);
return Task.FromResult<TimeStampToken?>(entry.Token);
}
// Expired, remove it
_cache.TryRemove(key, out _);
}
Interlocked.Increment(ref _missCount);
return Task.FromResult<TimeStampToken?>(null);
}
/// <inheritdoc />
public Task SetAsync(
ReadOnlyMemory<byte> messageImprint,
TimeStampToken token,
TimeSpan expiration,
CancellationToken cancellationToken = default)
{
var key = ToKey(messageImprint);
var entry = new CacheEntry(token, DateTimeOffset.UtcNow + expiration);
_cache[key] = entry;
return Task.CompletedTask;
}
/// <inheritdoc />
public Task RemoveAsync(
ReadOnlyMemory<byte> messageImprint,
CancellationToken cancellationToken = default)
{
var key = ToKey(messageImprint);
_cache.TryRemove(key, out _);
return Task.CompletedTask;
}
/// <inheritdoc />
public TsaCacheStats GetStats()
{
var now = DateTimeOffset.UtcNow;
var validEntries = _cache.Values.Where(e => e.ExpiresAt > now).ToList();
return new TsaCacheStats
{
ItemCount = validEntries.Count,
HitCount = Interlocked.Read(ref _hitCount),
MissCount = Interlocked.Read(ref _missCount),
ApproximateSizeBytes = validEntries.Sum(e => e.Token.EncodedToken.Length)
};
}
/// <inheritdoc />
public void Dispose()
{
_cleanupTimer.Dispose();
}
private void CleanupExpired(object? state)
{
var now = DateTimeOffset.UtcNow;
var expiredKeys = _cache
.Where(kvp => kvp.Value.ExpiresAt <= now)
.Select(kvp => kvp.Key)
.ToList();
foreach (var key in expiredKeys)
{
_cache.TryRemove(key, out _);
}
}
private static string ToKey(ReadOnlyMemory<byte> messageImprint)
{
return Convert.ToHexString(messageImprint.Span);
}
private sealed record CacheEntry(TimeStampToken Token, DateTimeOffset ExpiresAt);
}

View File

@@ -0,0 +1,217 @@
// -----------------------------------------------------------------------------
// HttpTsaClient.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-003 - HTTP TSA Client
// Description: HTTP(S) client for RFC 3161 TSA endpoints with failover.
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Net.Http.Headers;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Authority.Timestamping.Abstractions;
using StellaOps.Authority.Timestamping.Asn1;
namespace StellaOps.Authority.Timestamping;
/// <summary>
/// HTTP(S) client for RFC 3161 TSA endpoints with multi-provider failover.
/// </summary>
public sealed class HttpTsaClient : ITimeStampAuthorityClient
{
private const string TimeStampQueryContentType = "application/timestamp-query";
private const string TimeStampReplyContentType = "application/timestamp-reply";
private readonly IHttpClientFactory _httpClientFactory;
private readonly TsaClientOptions _options;
private readonly TimeStampTokenVerifier _verifier;
private readonly ILogger<HttpTsaClient> _logger;
private readonly List<TsaProviderInfo> _providerInfo;
private int _roundRobinIndex;
/// <summary>
/// Initializes a new instance of the <see cref="HttpTsaClient"/> class.
/// </summary>
public HttpTsaClient(
IHttpClientFactory httpClientFactory,
IOptions<TsaClientOptions> options,
TimeStampTokenVerifier verifier,
ILogger<HttpTsaClient> logger)
{
_httpClientFactory = httpClientFactory;
_options = options.Value;
_verifier = verifier;
_logger = logger;
_providerInfo = _options.Providers
.Where(p => p.Enabled)
.OrderBy(p => p.Priority)
.Select(p => new TsaProviderInfo(p.Name, p.Url, p.Priority, true))
.ToList();
}
/// <inheritdoc />
public IReadOnlyList<TsaProviderInfo> Providers => _providerInfo;
/// <inheritdoc />
public async Task<TimeStampResponse> GetTimeStampAsync(
TimeStampRequest request,
CancellationToken cancellationToken = default)
{
var orderedProviders = GetOrderedProviders();
foreach (var provider in orderedProviders)
{
try
{
var response = await TryGetTimeStampFromProviderAsync(
provider, request, cancellationToken);
if (response.IsSuccess)
{
_logger.LogInformation(
"Timestamp obtained from provider {Provider} in {Duration}ms",
provider.Name,
response.RequestDuration?.TotalMilliseconds ?? 0);
return response;
}
_logger.LogWarning(
"Provider {Provider} returned status {Status}: {StatusString}",
provider.Name,
response.Status,
response.StatusString ?? response.FailureInfo?.ToString());
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or OperationCanceledException)
{
_logger.LogWarning(
ex,
"Provider {Provider} failed, trying next",
provider.Name);
}
}
return TimeStampResponse.Failure(
PkiStatus.Rejection,
PkiFailureInfo.SystemFailure,
"All TSA providers failed");
}
private async Task<TimeStampResponse> TryGetTimeStampFromProviderAsync(
TsaProviderOptions provider,
TimeStampRequest request,
CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient($"TSA_{provider.Name}");
client.Timeout = provider.Timeout;
var encodedRequest = TimeStampReqEncoder.Encode(request);
var content = new ByteArrayContent(encodedRequest);
content.Headers.ContentType = new MediaTypeHeaderValue(TimeStampQueryContentType);
foreach (var (key, value) in provider.Headers)
{
content.Headers.TryAddWithoutValidation(key, value);
}
var stopwatch = Stopwatch.StartNew();
var lastException = default(Exception);
for (var attempt = 0; attempt <= provider.RetryCount; attempt++)
{
if (attempt > 0)
{
var delay = TimeSpan.FromTicks(
provider.RetryBaseDelay.Ticks * (1L << (attempt - 1)));
await Task.Delay(delay, cancellationToken);
}
try
{
var httpResponse = await client.PostAsync(
provider.Url, content, cancellationToken);
if (!httpResponse.IsSuccessStatusCode)
{
_logger.LogWarning(
"TSA {Provider} returned HTTP {StatusCode}",
provider.Name,
httpResponse.StatusCode);
continue;
}
var responseContentType = httpResponse.Content.Headers.ContentType?.MediaType;
if (responseContentType != TimeStampReplyContentType)
{
_logger.LogWarning(
"TSA {Provider} returned unexpected content type: {ContentType}",
provider.Name,
responseContentType);
}
var responseBytes = await httpResponse.Content.ReadAsByteArrayAsync(cancellationToken);
stopwatch.Stop();
var response = TimeStampRespDecoder.Decode(responseBytes);
return response with
{
ProviderName = provider.Name,
RequestDuration = stopwatch.Elapsed
};
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
{
lastException = ex;
_logger.LogDebug(
ex,
"Attempt {Attempt}/{MaxAttempts} to {Provider} failed",
attempt + 1,
provider.RetryCount + 1,
provider.Name);
}
}
throw lastException ?? new InvalidOperationException("No attempts made");
}
/// <inheritdoc />
public async Task<TimeStampVerificationResult> VerifyAsync(
TimeStampToken token,
ReadOnlyMemory<byte> originalHash,
TimeStampVerificationOptions? options = null,
CancellationToken cancellationToken = default)
{
return await _verifier.VerifyAsync(
token, originalHash, options ?? _options.DefaultVerificationOptions, cancellationToken);
}
/// <inheritdoc />
public TimeStampToken ParseToken(ReadOnlyMemory<byte> encodedToken)
{
return TimeStampTokenDecoder.Decode(encodedToken);
}
private IEnumerable<TsaProviderOptions> GetOrderedProviders()
{
var enabled = _options.Providers.Where(p => p.Enabled).ToList();
return _options.FailoverStrategy switch
{
FailoverStrategy.Priority => enabled.OrderBy(p => p.Priority),
FailoverStrategy.RoundRobin => GetRoundRobinOrder(enabled),
FailoverStrategy.Random => enabled.OrderBy(_ => Random.Shared.Next()),
FailoverStrategy.LowestLatency => enabled.OrderBy(p => p.Priority), // TODO: track latency
_ => enabled.OrderBy(p => p.Priority)
};
}
private IEnumerable<TsaProviderOptions> GetRoundRobinOrder(List<TsaProviderOptions> providers)
{
var startIndex = Interlocked.Increment(ref _roundRobinIndex) % providers.Count;
for (var i = 0; i < providers.Count; i++)
{
yield return providers[(startIndex + i) % providers.Count];
}
}
}

View File

@@ -0,0 +1,219 @@
// -----------------------------------------------------------------------------
// ITsaProviderRegistry.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-005 - Provider Configuration & Management
// Description: Registry interface for TSA providers with health tracking.
// -----------------------------------------------------------------------------
using StellaOps.Authority.Timestamping.Abstractions;
namespace StellaOps.Authority.Timestamping;
/// <summary>
/// Registry for managing TSA providers with health tracking.
/// </summary>
public interface ITsaProviderRegistry
{
/// <summary>
/// Gets all registered providers.
/// </summary>
IReadOnlyList<TsaProviderState> GetProviders();
/// <summary>
/// Gets providers ordered by the configured failover strategy.
/// </summary>
/// <param name="excludeUnhealthy">Whether to exclude unhealthy providers.</param>
IEnumerable<TsaProviderOptions> GetOrderedProviders(bool excludeUnhealthy = true);
/// <summary>
/// Reports a successful request to a provider.
/// </summary>
/// <param name="providerName">The provider name.</param>
/// <param name="latency">The request latency.</param>
void ReportSuccess(string providerName, TimeSpan latency);
/// <summary>
/// Reports a failed request to a provider.
/// </summary>
/// <param name="providerName">The provider name.</param>
/// <param name="error">The error message.</param>
void ReportFailure(string providerName, string error);
/// <summary>
/// Gets the health status of a provider.
/// </summary>
/// <param name="providerName">The provider name.</param>
TsaProviderHealth GetHealth(string providerName);
/// <summary>
/// Forces a health check on a provider.
/// </summary>
/// <param name="providerName">The provider name.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task<TsaProviderHealth> CheckHealthAsync(string providerName, CancellationToken cancellationToken = default);
}
/// <summary>
/// State of a TSA provider including health and statistics.
/// </summary>
public sealed record TsaProviderState
{
/// <summary>
/// Gets the provider options.
/// </summary>
public required TsaProviderOptions Options { get; init; }
/// <summary>
/// Gets the current health status.
/// </summary>
public required TsaProviderHealth Health { get; init; }
/// <summary>
/// Gets the usage statistics.
/// </summary>
public required TsaProviderStats Stats { get; init; }
}
/// <summary>
/// Health status of a TSA provider.
/// </summary>
public sealed record TsaProviderHealth
{
/// <summary>
/// Gets whether the provider is healthy.
/// </summary>
public bool IsHealthy { get; init; }
/// <summary>
/// Gets the health status.
/// </summary>
public TsaHealthStatus Status { get; init; }
/// <summary>
/// Gets the last error message if unhealthy.
/// </summary>
public string? LastError { get; init; }
/// <summary>
/// Gets when the provider was last checked.
/// </summary>
public DateTimeOffset? LastCheckedAt { get; init; }
/// <summary>
/// Gets when the provider became unhealthy.
/// </summary>
public DateTimeOffset? UnhealthySince { get; init; }
/// <summary>
/// Gets the consecutive failure count.
/// </summary>
public int ConsecutiveFailures { get; init; }
/// <summary>
/// Gets when the provider can be retried (if in backoff).
/// </summary>
public DateTimeOffset? RetryAfter { get; init; }
/// <summary>
/// Creates a healthy status.
/// </summary>
public static TsaProviderHealth Healthy() => new()
{
IsHealthy = true,
Status = TsaHealthStatus.Healthy,
LastCheckedAt = DateTimeOffset.UtcNow
};
/// <summary>
/// Creates an unhealthy status.
/// </summary>
public static TsaProviderHealth Unhealthy(string error, int failures, DateTimeOffset? retryAfter = null) => new()
{
IsHealthy = false,
Status = retryAfter.HasValue ? TsaHealthStatus.InBackoff : TsaHealthStatus.Unhealthy,
LastError = error,
LastCheckedAt = DateTimeOffset.UtcNow,
UnhealthySince = DateTimeOffset.UtcNow,
ConsecutiveFailures = failures,
RetryAfter = retryAfter
};
}
/// <summary>
/// Health status enum for TSA providers.
/// </summary>
public enum TsaHealthStatus
{
/// <summary>
/// Provider is unknown (not yet checked).
/// </summary>
Unknown,
/// <summary>
/// Provider is healthy.
/// </summary>
Healthy,
/// <summary>
/// Provider is degraded (slow but functional).
/// </summary>
Degraded,
/// <summary>
/// Provider is unhealthy (failures detected).
/// </summary>
Unhealthy,
/// <summary>
/// Provider is in backoff period after failures.
/// </summary>
InBackoff
}
/// <summary>
/// Usage statistics for a TSA provider.
/// </summary>
public sealed record TsaProviderStats
{
/// <summary>
/// Gets the total number of requests.
/// </summary>
public long TotalRequests { get; init; }
/// <summary>
/// Gets the number of successful requests.
/// </summary>
public long SuccessCount { get; init; }
/// <summary>
/// Gets the number of failed requests.
/// </summary>
public long FailureCount { get; init; }
/// <summary>
/// Gets the success rate as a percentage.
/// </summary>
public double SuccessRate => TotalRequests > 0
? (double)SuccessCount / TotalRequests * 100
: 0;
/// <summary>
/// Gets the average latency in milliseconds.
/// </summary>
public double AverageLatencyMs { get; init; }
/// <summary>
/// Gets the P95 latency in milliseconds.
/// </summary>
public double P95LatencyMs { get; init; }
/// <summary>
/// Gets the last successful request time.
/// </summary>
public DateTimeOffset? LastSuccessAt { get; init; }
/// <summary>
/// Gets the last failed request time.
/// </summary>
public DateTimeOffset? LastFailureAt { get; init; }
}

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>StellaOps.Authority.Timestamping</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="System.Security.Cryptography.Pkcs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Authority.Timestamping.Abstractions\StellaOps.Authority.Timestamping.Abstractions.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,223 @@
// -----------------------------------------------------------------------------
// TimeStampTokenVerifier.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-004 - TST Signature Verification
// Description: Cryptographic verification of TimeStampToken signatures.
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Security.Cryptography.Pkcs;
using System.Security.Cryptography.X509Certificates;
using Microsoft.Extensions.Logging;
using StellaOps.Authority.Timestamping.Abstractions;
namespace StellaOps.Authority.Timestamping;
/// <summary>
/// Verifies TimeStampToken signatures and certificate chains.
/// </summary>
public sealed class TimeStampTokenVerifier
{
private readonly ILogger<TimeStampTokenVerifier> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="TimeStampTokenVerifier"/> class.
/// </summary>
public TimeStampTokenVerifier(ILogger<TimeStampTokenVerifier> logger)
{
_logger = logger;
}
/// <summary>
/// Verifies a TimeStampToken.
/// </summary>
public Task<TimeStampVerificationResult> VerifyAsync(
TimeStampToken token,
ReadOnlyMemory<byte> originalHash,
TimeStampVerificationOptions options,
CancellationToken cancellationToken = default)
{
var warnings = new List<VerificationWarning>();
try
{
// Step 1: Verify message imprint matches
if (!token.TstInfo.MessageImprint.Span.SequenceEqual(originalHash.Span))
{
return Task.FromResult(TimeStampVerificationResult.Failure(
new VerificationError(
VerificationErrorCode.MessageImprintMismatch,
"The message imprint in the timestamp does not match the original hash")));
}
// Step 2: Verify nonce if expected
if (options.ExpectedNonce is { Length: > 0 })
{
if (token.TstInfo.Nonce is null)
{
return Task.FromResult(TimeStampVerificationResult.Failure(
new VerificationError(
VerificationErrorCode.NonceMismatch,
"Expected nonce but timestamp has no nonce")));
}
if (!token.TstInfo.Nonce.Value.Span.SequenceEqual(options.ExpectedNonce.Value.Span))
{
return Task.FromResult(TimeStampVerificationResult.Failure(
new VerificationError(
VerificationErrorCode.NonceMismatch,
"Timestamp nonce does not match expected nonce")));
}
}
// Step 3: Check hash algorithm strength
if (!options.AllowWeakHashAlgorithms &&
token.TstInfo.HashAlgorithm.Name == "SHA1")
{
warnings.Add(new VerificationWarning(
VerificationWarningCode.WeakHashAlgorithm,
"Timestamp uses SHA-1 which is considered weak"));
}
// Step 4: Verify CMS signature
var signedCms = new SignedCms();
signedCms.Decode(token.EncodedToken.ToArray());
X509Certificate2? signerCert = null;
try
{
// Try to find signer certificate
if (signedCms.SignerInfos.Count > 0)
{
var signerInfo = signedCms.SignerInfos[0];
signerCert = signerInfo.Certificate;
// Verify signature
signerInfo.CheckSignature(verifySignatureOnly: !options.VerifyCertificateChain);
}
}
catch (CryptographicException ex)
{
return Task.FromResult(TimeStampVerificationResult.Failure(
new VerificationError(
VerificationErrorCode.SignatureInvalid,
"CMS signature verification failed",
ex.Message)));
}
// Step 5: Verify certificate chain if requested
X509Chain? chain = null;
if (options.VerifyCertificateChain && signerCert is not null)
{
chain = new X509Chain();
chain.ChainPolicy.RevocationMode = options.CheckRevocation
? options.RevocationMode
: X509RevocationMode.NoCheck;
chain.ChainPolicy.RevocationFlag = options.RevocationFlag;
if (options.VerificationTime.HasValue)
{
chain.ChainPolicy.VerificationTime = options.VerificationTime.Value.DateTime;
}
if (options.TrustAnchors is not null)
{
chain.ChainPolicy.CustomTrustStore.AddRange(options.TrustAnchors);
chain.ChainPolicy.TrustMode = X509ChainTrustMode.CustomRootTrust;
}
if (options.IntermediateCertificates is not null)
{
chain.ChainPolicy.ExtraStore.AddRange(options.IntermediateCertificates);
}
if (!chain.Build(signerCert))
{
var status = chain.ChainStatus.FirstOrDefault();
var errorCode = status.Status switch
{
X509ChainStatusFlags.NotTimeValid => VerificationErrorCode.CertificateExpired,
X509ChainStatusFlags.Revoked => VerificationErrorCode.CertificateRevoked,
X509ChainStatusFlags.UntrustedRoot => VerificationErrorCode.NoTrustAnchor,
_ => VerificationErrorCode.CertificateChainInvalid
};
return Task.FromResult(TimeStampVerificationResult.Failure(
new VerificationError(
errorCode,
$"Certificate chain validation failed: {status.StatusInformation}",
string.Join(", ", chain.ChainStatus.Select(s => s.Status)))));
}
// Check if revocation check was actually performed
if (options.CheckRevocation &&
chain.ChainStatus.Any(s => s.Status == X509ChainStatusFlags.RevocationStatusUnknown))
{
warnings.Add(new VerificationWarning(
VerificationWarningCode.RevocationCheckSkipped,
"Revocation status could not be determined"));
}
}
else if (options.VerifyCertificateChain && signerCert is null)
{
return Task.FromResult(TimeStampVerificationResult.Failure(
new VerificationError(
VerificationErrorCode.SignerCertificateMissing,
"No signer certificate found in timestamp token")));
}
// Step 6: Check policy if required
if (options.AcceptablePolicies is { Count: > 0 })
{
if (!options.AcceptablePolicies.Contains(token.TstInfo.PolicyOid))
{
warnings.Add(new VerificationWarning(
VerificationWarningCode.UnknownPolicy,
$"Timestamp policy {token.TstInfo.PolicyOid} is not in acceptable policies list"));
}
}
// Step 7: Check accuracy if required
if (options.MaxAccuracySeconds.HasValue && token.TstInfo.Accuracy is not null)
{
var accuracySpan = token.TstInfo.Accuracy.ToTimeSpan();
if (accuracySpan.TotalSeconds > options.MaxAccuracySeconds.Value)
{
warnings.Add(new VerificationWarning(
VerificationWarningCode.LargeAccuracy,
$"Timestamp accuracy ({accuracySpan.TotalSeconds}s) exceeds maximum ({options.MaxAccuracySeconds}s)"));
}
}
// Step 8: Check certificate expiration warning
if (signerCert is not null)
{
var daysUntilExpiry = (signerCert.NotAfter - DateTime.UtcNow).TotalDays;
if (daysUntilExpiry < 30 && daysUntilExpiry > 0)
{
warnings.Add(new VerificationWarning(
VerificationWarningCode.CertificateNearingExpiration,
$"TSA certificate expires in {daysUntilExpiry:F0} days"));
}
}
// Success
return Task.FromResult(TimeStampVerificationResult.Success(
token.TstInfo.GenTime,
token.TstInfo.GetTimeRange(),
token.TstInfo.PolicyOid,
signerCert,
chain?.ChainElements.Select(e => e.Certificate).ToList(),
warnings.Count > 0 ? warnings : null));
}
catch (Exception ex)
{
_logger.LogError(ex, "Timestamp verification failed unexpectedly");
return Task.FromResult(TimeStampVerificationResult.Failure(
new VerificationError(
VerificationErrorCode.Unknown,
"Unexpected error during verification",
ex.Message)));
}
}
}

View File

@@ -0,0 +1,107 @@
// -----------------------------------------------------------------------------
// TimestampingServiceCollectionExtensions.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-007 - DI Integration
// Description: DI registration for timestamping services.
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.Authority.Timestamping.Abstractions;
using StellaOps.Authority.Timestamping.Caching;
namespace StellaOps.Authority.Timestamping;
/// <summary>
/// Extension methods for registering timestamping services.
/// </summary>
public static class TimestampingServiceCollectionExtensions
{
/// <summary>
/// Adds RFC-3161 timestamping services to the service collection.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configure">Configuration action for TSA options.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddTimestamping(
this IServiceCollection services,
Action<TsaClientOptions>? configure = null)
{
services.AddOptions<TsaClientOptions>();
if (configure is not null)
{
services.Configure(configure);
}
// Register HTTP client factory if not already registered
services.AddHttpClient();
// Register core services
services.TryAddSingleton<TimeStampTokenVerifier>();
services.TryAddSingleton<ITsaProviderRegistry, TsaProviderRegistry>();
services.TryAddSingleton<ITsaCacheStore, InMemoryTsaCacheStore>();
services.TryAddSingleton<ITimeStampAuthorityClient, HttpTsaClient>();
return services;
}
/// <summary>
/// Adds a TSA provider to the configuration.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="name">Provider name.</param>
/// <param name="url">TSA endpoint URL.</param>
/// <param name="configure">Additional configuration.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddTsaProvider(
this IServiceCollection services,
string name,
string url,
Action<TsaProviderOptions>? configure = null)
{
services.Configure<TsaClientOptions>(options =>
{
var provider = new TsaProviderOptions
{
Name = name,
Url = new Uri(url)
};
configure?.Invoke(provider);
options.Providers.Add(provider);
});
return services;
}
/// <summary>
/// Adds common free TSA providers.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddCommonTsaProviders(this IServiceCollection services)
{
// FreeTSA.org
services.AddTsaProvider("FreeTSA", "https://freetsa.org/tsr", opts =>
{
opts.Priority = 100;
opts.Timeout = TimeSpan.FromSeconds(30);
});
// Digicert
services.AddTsaProvider("Digicert", "http://timestamp.digicert.com", opts =>
{
opts.Priority = 200;
opts.Timeout = TimeSpan.FromSeconds(30);
});
// Sectigo
services.AddTsaProvider("Sectigo", "http://timestamp.sectigo.com", opts =>
{
opts.Priority = 300;
opts.Timeout = TimeSpan.FromSeconds(30);
});
return services;
}
}

View File

@@ -0,0 +1,262 @@
// -----------------------------------------------------------------------------
// TsaProviderRegistry.cs
// Sprint: SPRINT_20260119_007 RFC-3161 TSA Client
// Task: TSA-005 - Provider Configuration & Management
// Description: Implementation of TSA provider registry with health tracking.
// -----------------------------------------------------------------------------
using System.Collections.Concurrent;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Authority.Timestamping.Abstractions;
namespace StellaOps.Authority.Timestamping;
/// <summary>
/// Implementation of <see cref="ITsaProviderRegistry"/> with health tracking and failover.
/// </summary>
public sealed class TsaProviderRegistry : ITsaProviderRegistry
{
private readonly TsaClientOptions _options;
private readonly IHttpClientFactory _httpClientFactory;
private readonly ILogger<TsaProviderRegistry> _logger;
private readonly ConcurrentDictionary<string, ProviderState> _states = new();
private int _roundRobinIndex;
/// <summary>
/// Initializes a new instance of the <see cref="TsaProviderRegistry"/> class.
/// </summary>
public TsaProviderRegistry(
IOptions<TsaClientOptions> options,
IHttpClientFactory httpClientFactory,
ILogger<TsaProviderRegistry> logger)
{
_options = options.Value;
_httpClientFactory = httpClientFactory;
_logger = logger;
// Initialize state for each provider
foreach (var provider in _options.Providers.Where(p => p.Enabled))
{
_states[provider.Name] = new ProviderState
{
Options = provider,
Health = new TsaProviderHealth
{
IsHealthy = true,
Status = TsaHealthStatus.Unknown
},
Latencies = new List<double>()
};
}
}
/// <inheritdoc />
public IReadOnlyList<TsaProviderState> GetProviders()
{
return _states.Values.Select(s => new TsaProviderState
{
Options = s.Options,
Health = s.Health,
Stats = ComputeStats(s)
}).ToList();
}
/// <inheritdoc />
public IEnumerable<TsaProviderOptions> GetOrderedProviders(bool excludeUnhealthy = true)
{
var providers = _states.Values
.Where(s => s.Options.Enabled)
.Where(s => !excludeUnhealthy || IsAvailable(s))
.ToList();
return _options.FailoverStrategy switch
{
FailoverStrategy.Priority => providers.OrderBy(p => p.Options.Priority).Select(p => p.Options),
FailoverStrategy.RoundRobin => GetRoundRobinOrder(providers).Select(p => p.Options),
FailoverStrategy.LowestLatency => providers.OrderBy(p => GetAverageLatency(p)).Select(p => p.Options),
FailoverStrategy.Random => providers.OrderBy(_ => Random.Shared.Next()).Select(p => p.Options),
_ => providers.OrderBy(p => p.Options.Priority).Select(p => p.Options)
};
}
/// <inheritdoc />
public void ReportSuccess(string providerName, TimeSpan latency)
{
if (!_states.TryGetValue(providerName, out var state))
return;
lock (state)
{
state.TotalRequests++;
state.SuccessCount++;
state.LastSuccessAt = DateTimeOffset.UtcNow;
state.ConsecutiveFailures = 0;
// Keep last 100 latencies for stats
state.Latencies.Add(latency.TotalMilliseconds);
if (state.Latencies.Count > 100)
{
state.Latencies.RemoveAt(0);
}
state.Health = TsaProviderHealth.Healthy();
}
_logger.LogDebug(
"TSA {Provider} request succeeded in {Latency}ms",
providerName, latency.TotalMilliseconds);
}
/// <inheritdoc />
public void ReportFailure(string providerName, string error)
{
if (!_states.TryGetValue(providerName, out var state))
return;
lock (state)
{
state.TotalRequests++;
state.FailureCount++;
state.LastFailureAt = DateTimeOffset.UtcNow;
state.ConsecutiveFailures++;
state.LastError = error;
// Calculate backoff based on consecutive failures
var backoffSeconds = Math.Min(300, Math.Pow(2, state.ConsecutiveFailures));
var retryAfter = state.ConsecutiveFailures >= 3
? DateTimeOffset.UtcNow.AddSeconds(backoffSeconds)
: (DateTimeOffset?)null;
state.Health = TsaProviderHealth.Unhealthy(
error,
state.ConsecutiveFailures,
retryAfter);
}
_logger.LogWarning(
"TSA {Provider} request failed: {Error} (consecutive failures: {Failures})",
providerName, error, state.ConsecutiveFailures);
}
/// <inheritdoc />
public TsaProviderHealth GetHealth(string providerName)
{
return _states.TryGetValue(providerName, out var state)
? state.Health
: new TsaProviderHealth { Status = TsaHealthStatus.Unknown };
}
/// <inheritdoc />
public async Task<TsaProviderHealth> CheckHealthAsync(
string providerName,
CancellationToken cancellationToken = default)
{
if (!_states.TryGetValue(providerName, out var state))
{
return new TsaProviderHealth
{
Status = TsaHealthStatus.Unknown,
LastError = "Provider not found"
};
}
try
{
var client = _httpClientFactory.CreateClient($"TSA_{providerName}");
client.Timeout = TimeSpan.FromSeconds(10);
// Simple connectivity check - just verify the endpoint is reachable
var response = await client.SendAsync(
new HttpRequestMessage(HttpMethod.Head, state.Options.Url),
cancellationToken);
// Most TSAs don't support HEAD, so any response (even 4xx) means it's reachable
var health = TsaProviderHealth.Healthy();
lock (state)
{
state.Health = health;
}
return health;
}
catch (Exception ex)
{
var health = TsaProviderHealth.Unhealthy(ex.Message, state.ConsecutiveFailures + 1);
lock (state)
{
state.Health = health;
}
return health;
}
}
private bool IsAvailable(ProviderState state)
{
if (!state.Health.IsHealthy && state.Health.RetryAfter.HasValue)
{
return DateTimeOffset.UtcNow >= state.Health.RetryAfter.Value;
}
return state.Health.Status != TsaHealthStatus.Unhealthy || state.ConsecutiveFailures < 5;
}
private double GetAverageLatency(ProviderState state)
{
lock (state)
{
return state.Latencies.Count > 0
? state.Latencies.Average()
: double.MaxValue;
}
}
private IEnumerable<ProviderState> GetRoundRobinOrder(List<ProviderState> providers)
{
if (providers.Count == 0)
yield break;
var startIndex = Interlocked.Increment(ref _roundRobinIndex) % providers.Count;
for (var i = 0; i < providers.Count; i++)
{
yield return providers[(startIndex + i) % providers.Count];
}
}
private static TsaProviderStats ComputeStats(ProviderState state)
{
lock (state)
{
var sortedLatencies = state.Latencies.OrderBy(l => l).ToList();
var p95Index = (int)(sortedLatencies.Count * 0.95);
return new TsaProviderStats
{
TotalRequests = state.TotalRequests,
SuccessCount = state.SuccessCount,
FailureCount = state.FailureCount,
AverageLatencyMs = sortedLatencies.Count > 0 ? sortedLatencies.Average() : 0,
P95LatencyMs = sortedLatencies.Count > 0 ? sortedLatencies[Math.Min(p95Index, sortedLatencies.Count - 1)] : 0,
LastSuccessAt = state.LastSuccessAt,
LastFailureAt = state.LastFailureAt
};
}
}
private sealed class ProviderState
{
public required TsaProviderOptions Options { get; init; }
public TsaProviderHealth Health { get; set; } = new() { Status = TsaHealthStatus.Unknown };
public List<double> Latencies { get; init; } = [];
public long TotalRequests { get; set; }
public long SuccessCount { get; set; }
public long FailureCount { get; set; }
public int ConsecutiveFailures { get; set; }
public string? LastError { get; set; }
public DateTimeOffset? LastSuccessAt { get; set; }
public DateTimeOffset? LastFailureAt { get; set; }
}
}

View File

@@ -384,7 +384,7 @@ public sealed class DeltaSigEnvelopeBuilder
return new InTotoStatement
{
Subject = subjects,
PredicateType = predicate.PredicateType,
PredicateType = DeltaSigPredicate.PredicateType,
Predicate = predicate
};
}

View File

@@ -0,0 +1,251 @@
// -----------------------------------------------------------------------------
// DeltaSigPredicateConverter.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-001 - Extended DeltaSig Predicate Schema
// Description: Converter between v1 and v2 predicate formats for backward compatibility
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.DeltaSig.Attestation;
/// <summary>
/// Converts between v1 and v2 DeltaSig predicate formats.
/// </summary>
public static class DeltaSigPredicateConverter
{
/// <summary>
/// Convert a v1 predicate to v2 format.
/// </summary>
/// <param name="v1">The v1 predicate.</param>
/// <returns>The v2 predicate (without provenance/IR diff which are v2-only).</returns>
public static DeltaSigPredicateV2 ToV2(DeltaSigPredicate v1)
{
ArgumentNullException.ThrowIfNull(v1);
var oldBinary = v1.OldBinary;
var newBinary = v1.NewBinary;
// Use the new binary as the subject (or old if new is missing)
var subjectSource = newBinary ?? oldBinary
?? throw new ArgumentException("Predicate must have at least one subject", nameof(v1));
var subject = new DeltaSigSubjectV2
{
Purl = $"pkg:generic/{v1.PackageName ?? "unknown"}",
Digest = subjectSource.Digest,
Arch = subjectSource.Arch,
Filename = subjectSource.Filename,
Size = subjectSource.Size
};
var functionMatches = v1.Delta.Select(d => new FunctionMatchV2
{
Name = d.FunctionId,
BeforeHash = d.OldHash,
AfterHash = d.NewHash,
MatchScore = d.SemanticSimilarity ?? 1.0,
MatchMethod = DetermineMatchMethod(d),
MatchState = MapChangeTypeToMatchState(d.ChangeType),
Address = d.Address,
Size = d.NewSize > 0 ? d.NewSize : d.OldSize,
Section = d.Section,
// v2-only fields are null when converting from v1
SymbolProvenance = null,
IrDiff = d.IrDiff != null ? new IrDiffReferenceV2
{
CasDigest = "sha256:0000000000000000000000000000000000000000000000000000000000000000", // Placeholder
AddedBlocks = d.NewBlockCount.GetValueOrDefault() - d.OldBlockCount.GetValueOrDefault(),
RemovedBlocks = Math.Max(0, d.OldBlockCount.GetValueOrDefault() - d.NewBlockCount.GetValueOrDefault()),
ChangedInstructions = d.IrDiff.StatementsModified,
StatementsAdded = d.IrDiff.StatementsAdded,
StatementsRemoved = d.IrDiff.StatementsRemoved,
IrFormat = d.IrDiff.IrFormat
} : null
}).ToList();
var summary = new DeltaSummaryV2
{
TotalFunctions = v1.Summary.TotalFunctions,
VulnerableFunctions = 0, // v1 doesn't track this directly
PatchedFunctions = v1.Summary.FunctionsModified, // Approximation
UnknownFunctions = 0,
FunctionsWithProvenance = 0, // v2-only
FunctionsWithIrDiff = functionMatches.Count(f => f.IrDiff != null),
AvgMatchScore = v1.Summary.AvgSemanticSimilarity,
MinMatchScore = v1.Summary.MinSemanticSimilarity,
MaxMatchScore = v1.Summary.MaxSemanticSimilarity,
TotalIrDiffSize = 0
};
var tooling = new DeltaToolingV2
{
Lifter = v1.Tooling.Lifter,
LifterVersion = v1.Tooling.LifterVersion,
CanonicalIr = v1.Tooling.CanonicalIr,
MatchAlgorithm = v1.Tooling.DiffAlgorithm,
NormalizationRecipe = v1.Tooling.NormalizationRecipe,
BinaryIndexVersion = v1.Tooling.BinaryIndexVersion ?? "1.0.0",
HashAlgorithm = v1.Tooling.HashAlgorithm
};
return new DeltaSigPredicateV2
{
SchemaVersion = "2.0.0",
Subject = subject,
FunctionMatches = functionMatches,
Verdict = DetermineVerdict(v1),
Confidence = v1.Summary.AvgSemanticSimilarity,
CveIds = v1.CveIds,
ComputedAt = v1.ComputedAt,
Tooling = tooling,
Summary = summary,
Advisories = v1.Advisories,
Metadata = v1.Metadata
};
}
/// <summary>
/// Convert a v2 predicate to v1 format (lossy - loses provenance/IR refs).
/// </summary>
/// <param name="v2">The v2 predicate.</param>
/// <returns>The v1 predicate.</returns>
public static DeltaSigPredicate ToV1(DeltaSigPredicateV2 v2)
{
ArgumentNullException.ThrowIfNull(v2);
var subjects = new List<DeltaSigSubject>
{
new()
{
Uri = v2.Subject.Purl,
Digest = v2.Subject.Digest,
Arch = v2.Subject.Arch ?? "unknown",
Role = "new",
Filename = v2.Subject.Filename,
Size = v2.Subject.Size
}
};
var deltas = v2.FunctionMatches.Select(fm => new FunctionDelta
{
FunctionId = fm.Name,
Address = fm.Address ?? 0,
OldHash = fm.BeforeHash,
NewHash = fm.AfterHash,
OldSize = fm.Size ?? 0,
NewSize = fm.Size ?? 0,
ChangeType = MapMatchStateToChangeType(fm.MatchState),
SemanticSimilarity = fm.MatchScore,
Section = fm.Section,
IrDiff = fm.IrDiff != null ? new IrDiff
{
StatementsAdded = fm.IrDiff.StatementsAdded ?? 0,
StatementsRemoved = fm.IrDiff.StatementsRemoved ?? 0,
StatementsModified = fm.IrDiff.ChangedInstructions,
IrFormat = fm.IrDiff.IrFormat
} : null
}).ToList();
var summary = new DeltaSummary
{
TotalFunctions = v2.Summary.TotalFunctions,
FunctionsAdded = 0,
FunctionsRemoved = 0,
FunctionsModified = v2.Summary.VulnerableFunctions + v2.Summary.PatchedFunctions,
FunctionsUnchanged = v2.Summary.TotalFunctions - v2.Summary.VulnerableFunctions - v2.Summary.PatchedFunctions - v2.Summary.UnknownFunctions,
TotalBytesChanged = 0,
MinSemanticSimilarity = v2.Summary.MinMatchScore,
AvgSemanticSimilarity = v2.Summary.AvgMatchScore,
MaxSemanticSimilarity = v2.Summary.MaxMatchScore
};
var tooling = new DeltaTooling
{
Lifter = v2.Tooling.Lifter,
LifterVersion = v2.Tooling.LifterVersion,
CanonicalIr = v2.Tooling.CanonicalIr,
DiffAlgorithm = v2.Tooling.MatchAlgorithm,
NormalizationRecipe = v2.Tooling.NormalizationRecipe,
BinaryIndexVersion = v2.Tooling.BinaryIndexVersion,
HashAlgorithm = v2.Tooling.HashAlgorithm
};
return new DeltaSigPredicate
{
SchemaVersion = "1.0.0",
Subject = subjects,
Delta = deltas,
Summary = summary,
Tooling = tooling,
ComputedAt = v2.ComputedAt,
CveIds = v2.CveIds,
Advisories = v2.Advisories,
PackageName = ExtractPackageName(v2.Subject.Purl),
Metadata = v2.Metadata
};
}
private static string DetermineMatchMethod(FunctionDelta delta)
{
if (delta.SemanticSimilarity.HasValue && delta.SemanticSimilarity > 0)
return MatchMethods.SemanticKsg;
if (delta.OldHash == delta.NewHash)
return MatchMethods.ByteExact;
return MatchMethods.CfgStructural;
}
private static string MapChangeTypeToMatchState(string changeType)
{
return changeType.ToLowerInvariant() switch
{
"added" => MatchStates.Modified,
"removed" => MatchStates.Modified,
"modified" => MatchStates.Modified,
"unchanged" => MatchStates.Unchanged,
_ => MatchStates.Unknown
};
}
private static string MapMatchStateToChangeType(string matchState)
{
return matchState.ToLowerInvariant() switch
{
MatchStates.Vulnerable => "modified",
MatchStates.Patched => "modified",
MatchStates.Modified => "modified",
MatchStates.Unchanged => "unchanged",
_ => "modified"
};
}
private static string DetermineVerdict(DeltaSigPredicate v1)
{
var modified = v1.Summary.FunctionsModified;
var added = v1.Summary.FunctionsAdded;
var removed = v1.Summary.FunctionsRemoved;
if (modified == 0 && added == 0 && removed == 0)
return DeltaSigVerdicts.Patched;
if (v1.Summary.AvgSemanticSimilarity > 0.9)
return DeltaSigVerdicts.Patched;
if (v1.Summary.AvgSemanticSimilarity < 0.5)
return DeltaSigVerdicts.Vulnerable;
return DeltaSigVerdicts.Partial;
}
private static string? ExtractPackageName(string purl)
{
// Extract package name from purl like "pkg:generic/openssl@1.1.1"
if (string.IsNullOrEmpty(purl))
return null;
var parts = purl.Split('/');
if (parts.Length < 2)
return null;
var namePart = parts[^1];
var atIndex = namePart.IndexOf('@');
return atIndex > 0 ? namePart[..atIndex] : namePart;
}
}

View File

@@ -0,0 +1,534 @@
// -----------------------------------------------------------------------------
// DeltaSigPredicateV2.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-001 - Extended DeltaSig Predicate Schema
// Description: DSSE predicate v2 with symbol provenance and IR diff references
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
using System.Text.Json.Serialization;
namespace StellaOps.BinaryIndex.DeltaSig.Attestation;
/// <summary>
/// DSSE predicate v2 for function-level binary diffs with symbol provenance.
/// Predicate type: "https://stella-ops.org/predicates/deltasig/v2"
/// </summary>
/// <remarks>
/// v2 extends v1 with:
/// - Symbol provenance metadata (ground-truth source attribution)
/// - IR diff references (CAS-stored structured diffs)
/// - Function-level match evidence for VEX explanations
/// </remarks>
public sealed record DeltaSigPredicateV2
{
/// <summary>
/// Predicate type URI for DSSE envelope.
/// </summary>
public const string PredicateType = "https://stella-ops.org/predicates/deltasig/v2";
/// <summary>
/// Predicate type short name for display.
/// </summary>
public const string PredicateTypeName = "stellaops/delta-sig/v2";
/// <summary>
/// Schema version.
/// </summary>
[JsonPropertyName("schemaVersion")]
public string SchemaVersion { get; init; } = "2.0.0";
/// <summary>
/// Subject artifact being analyzed.
/// </summary>
[JsonPropertyName("subject")]
public required DeltaSigSubjectV2 Subject { get; init; }
/// <summary>
/// Function-level matches with provenance and evidence.
/// </summary>
[JsonPropertyName("functionMatches")]
public required IReadOnlyList<FunctionMatchV2> FunctionMatches { get; init; }
/// <summary>
/// Overall verdict: "vulnerable", "patched", "unknown", "partial".
/// </summary>
[JsonPropertyName("verdict")]
public required string Verdict { get; init; }
/// <summary>
/// Overall confidence score (0.0-1.0).
/// </summary>
[JsonPropertyName("confidence")]
public double Confidence { get; init; }
/// <summary>
/// CVE identifiers this analysis addresses.
/// </summary>
[JsonPropertyName("cveIds")]
public IReadOnlyList<string>? CveIds { get; init; }
/// <summary>
/// Timestamp when analysis was computed (RFC 3339).
/// </summary>
[JsonPropertyName("computedAt")]
public required DateTimeOffset ComputedAt { get; init; }
/// <summary>
/// Tooling used to generate the predicate.
/// </summary>
[JsonPropertyName("tooling")]
public required DeltaToolingV2 Tooling { get; init; }
/// <summary>
/// Summary statistics.
/// </summary>
[JsonPropertyName("summary")]
public required DeltaSummaryV2 Summary { get; init; }
/// <summary>
/// Optional advisory references.
/// </summary>
[JsonPropertyName("advisories")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public IReadOnlyList<string>? Advisories { get; init; }
/// <summary>
/// Additional metadata.
/// </summary>
[JsonPropertyName("metadata")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public IReadOnlyDictionary<string, object>? Metadata { get; init; }
}
/// <summary>
/// Subject artifact in a delta-sig v2 predicate.
/// </summary>
public sealed record DeltaSigSubjectV2
{
/// <summary>
/// Package URL (purl) of the subject.
/// </summary>
[JsonPropertyName("purl")]
public required string Purl { get; init; }
/// <summary>
/// Digests of the artifact (algorithm -> hash).
/// </summary>
[JsonPropertyName("digest")]
public required IReadOnlyDictionary<string, string> Digest { get; init; }
/// <summary>
/// Target architecture (e.g., "linux-amd64", "linux-arm64").
/// </summary>
[JsonPropertyName("arch")]
public string? Arch { get; init; }
/// <summary>
/// Binary filename or path.
/// </summary>
[JsonPropertyName("filename")]
public string? Filename { get; init; }
/// <summary>
/// Size of the binary in bytes.
/// </summary>
[JsonPropertyName("size")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public long? Size { get; init; }
/// <summary>
/// ELF Build-ID or equivalent debug identifier.
/// </summary>
[JsonPropertyName("debugId")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? DebugId { get; init; }
}
/// <summary>
/// Function-level match with provenance and IR diff evidence.
/// </summary>
public sealed record FunctionMatchV2
{
/// <summary>
/// Function name (symbol name).
/// </summary>
[JsonPropertyName("name")]
public required string Name { get; init; }
/// <summary>
/// Hash of function in the analyzed binary.
/// </summary>
[JsonPropertyName("beforeHash")]
public string? BeforeHash { get; init; }
/// <summary>
/// Hash of function in the reference binary.
/// </summary>
[JsonPropertyName("afterHash")]
public string? AfterHash { get; init; }
/// <summary>
/// Match score (0.0-1.0).
/// </summary>
[JsonPropertyName("matchScore")]
public double MatchScore { get; init; }
/// <summary>
/// Method used for matching: "semantic_ksg", "byte_exact", "cfg_structural", "ir_semantic".
/// </summary>
[JsonPropertyName("matchMethod")]
public required string MatchMethod { get; init; }
/// <summary>
/// Match state: "vulnerable", "patched", "modified", "unchanged", "unknown".
/// </summary>
[JsonPropertyName("matchState")]
public required string MatchState { get; init; }
/// <summary>
/// Symbol provenance from ground-truth corpus.
/// </summary>
[JsonPropertyName("symbolProvenance")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public SymbolProvenanceV2? SymbolProvenance { get; init; }
/// <summary>
/// IR diff reference for detailed evidence.
/// </summary>
[JsonPropertyName("irDiff")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public IrDiffReferenceV2? IrDiff { get; init; }
/// <summary>
/// Virtual address of the function.
/// </summary>
[JsonPropertyName("address")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public long? Address { get; init; }
/// <summary>
/// Function size in bytes.
/// </summary>
[JsonPropertyName("size")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public long? Size { get; init; }
/// <summary>
/// Section containing the function.
/// </summary>
[JsonPropertyName("section")]
public string Section { get; init; } = ".text";
/// <summary>
/// Human-readable explanation of the match.
/// </summary>
[JsonPropertyName("explanation")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? Explanation { get; init; }
}
/// <summary>
/// Symbol provenance from ground-truth corpus.
/// </summary>
public sealed record SymbolProvenanceV2
{
/// <summary>
/// Ground-truth source ID (e.g., "debuginfod-fedora", "ddeb-ubuntu").
/// </summary>
[JsonPropertyName("sourceId")]
public required string SourceId { get; init; }
/// <summary>
/// Observation ID in ground-truth corpus.
/// Format: groundtruth:{source_id}:{debug_id}:{revision}
/// </summary>
[JsonPropertyName("observationId")]
public required string ObservationId { get; init; }
/// <summary>
/// When the symbol was fetched from the source.
/// </summary>
[JsonPropertyName("fetchedAt")]
public required DateTimeOffset FetchedAt { get; init; }
/// <summary>
/// Signature state of the source: "verified", "unverified", "expired".
/// </summary>
[JsonPropertyName("signatureState")]
public required string SignatureState { get; init; }
/// <summary>
/// Package name from the source.
/// </summary>
[JsonPropertyName("packageName")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? PackageName { get; init; }
/// <summary>
/// Package version from the source.
/// </summary>
[JsonPropertyName("packageVersion")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? PackageVersion { get; init; }
/// <summary>
/// Distribution (e.g., "fedora", "ubuntu", "debian").
/// </summary>
[JsonPropertyName("distro")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? Distro { get; init; }
/// <summary>
/// Distribution version.
/// </summary>
[JsonPropertyName("distroVersion")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? DistroVersion { get; init; }
/// <summary>
/// Debug ID used for lookup.
/// </summary>
[JsonPropertyName("debugId")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? DebugId { get; init; }
}
/// <summary>
/// IR diff reference stored in CAS.
/// </summary>
public sealed record IrDiffReferenceV2
{
/// <summary>
/// Content-addressed digest of the full diff in CAS.
/// Format: sha256:...
/// </summary>
[JsonPropertyName("casDigest")]
public required string CasDigest { get; init; }
/// <summary>
/// Number of basic blocks added.
/// </summary>
[JsonPropertyName("addedBlocks")]
public int AddedBlocks { get; init; }
/// <summary>
/// Number of basic blocks removed.
/// </summary>
[JsonPropertyName("removedBlocks")]
public int RemovedBlocks { get; init; }
/// <summary>
/// Number of instructions changed.
/// </summary>
[JsonPropertyName("changedInstructions")]
public int ChangedInstructions { get; init; }
/// <summary>
/// Number of IR statements added.
/// </summary>
[JsonPropertyName("statementsAdded")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public int? StatementsAdded { get; init; }
/// <summary>
/// Number of IR statements removed.
/// </summary>
[JsonPropertyName("statementsRemoved")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public int? StatementsRemoved { get; init; }
/// <summary>
/// IR format used (e.g., "b2r2-lowuir", "ghidra-pcode").
/// </summary>
[JsonPropertyName("irFormat")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? IrFormat { get; init; }
/// <summary>
/// URL to fetch the full diff from CAS.
/// </summary>
[JsonPropertyName("casUrl")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? CasUrl { get; init; }
/// <summary>
/// Size of the diff in bytes.
/// </summary>
[JsonPropertyName("diffSize")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public long? DiffSize { get; init; }
}
/// <summary>
/// Tooling metadata for v2 predicates.
/// </summary>
public sealed record DeltaToolingV2
{
/// <summary>
/// Primary lifter used: "b2r2", "ghidra", "radare2".
/// </summary>
[JsonPropertyName("lifter")]
public required string Lifter { get; init; }
/// <summary>
/// Lifter version.
/// </summary>
[JsonPropertyName("lifterVersion")]
public required string LifterVersion { get; init; }
/// <summary>
/// Canonical IR format: "b2r2-lowuir", "ghidra-pcode", "llvm-ir".
/// </summary>
[JsonPropertyName("canonicalIr")]
public required string CanonicalIr { get; init; }
/// <summary>
/// Matching algorithm: "semantic_ksg", "byte_exact", "cfg_structural".
/// </summary>
[JsonPropertyName("matchAlgorithm")]
public required string MatchAlgorithm { get; init; }
/// <summary>
/// Normalization recipe applied.
/// </summary>
[JsonPropertyName("normalizationRecipe")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? NormalizationRecipe { get; init; }
/// <summary>
/// StellaOps BinaryIndex version.
/// </summary>
[JsonPropertyName("binaryIndexVersion")]
public required string BinaryIndexVersion { get; init; }
/// <summary>
/// Hash algorithm used.
/// </summary>
[JsonPropertyName("hashAlgorithm")]
public string HashAlgorithm { get; init; } = "sha256";
/// <summary>
/// CAS storage backend used for IR diffs.
/// </summary>
[JsonPropertyName("casBackend")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? CasBackend { get; init; }
}
/// <summary>
/// Summary statistics for v2 predicates.
/// </summary>
public sealed record DeltaSummaryV2
{
/// <summary>
/// Total number of functions analyzed.
/// </summary>
[JsonPropertyName("totalFunctions")]
public int TotalFunctions { get; init; }
/// <summary>
/// Number of functions matched as vulnerable.
/// </summary>
[JsonPropertyName("vulnerableFunctions")]
public int VulnerableFunctions { get; init; }
/// <summary>
/// Number of functions matched as patched.
/// </summary>
[JsonPropertyName("patchedFunctions")]
public int PatchedFunctions { get; init; }
/// <summary>
/// Number of functions with unknown state.
/// </summary>
[JsonPropertyName("unknownFunctions")]
public int UnknownFunctions { get; init; }
/// <summary>
/// Number of functions with symbol provenance.
/// </summary>
[JsonPropertyName("functionsWithProvenance")]
public int FunctionsWithProvenance { get; init; }
/// <summary>
/// Number of functions with IR diff evidence.
/// </summary>
[JsonPropertyName("functionsWithIrDiff")]
public int FunctionsWithIrDiff { get; init; }
/// <summary>
/// Average match score across all functions.
/// </summary>
[JsonPropertyName("avgMatchScore")]
public double AvgMatchScore { get; init; }
/// <summary>
/// Minimum match score.
/// </summary>
[JsonPropertyName("minMatchScore")]
public double MinMatchScore { get; init; }
/// <summary>
/// Maximum match score.
/// </summary>
[JsonPropertyName("maxMatchScore")]
public double MaxMatchScore { get; init; }
/// <summary>
/// Total size of IR diffs stored in CAS.
/// </summary>
[JsonPropertyName("totalIrDiffSize")]
public long TotalIrDiffSize { get; init; }
}
/// <summary>
/// Constants for verdict values.
/// </summary>
public static class DeltaSigVerdicts
{
public const string Vulnerable = "vulnerable";
public const string Patched = "patched";
public const string Unknown = "unknown";
public const string Partial = "partial";
public const string PartiallyPatched = "partially_patched";
public const string Inconclusive = "inconclusive";
}
/// <summary>
/// Constants for match state values.
/// </summary>
public static class MatchStates
{
public const string Vulnerable = "vulnerable";
public const string Patched = "patched";
public const string Modified = "modified";
public const string Unchanged = "unchanged";
public const string Unknown = "unknown";
}
/// <summary>
/// Constants for match method values.
/// </summary>
public static class MatchMethods
{
public const string SemanticKsg = "semantic_ksg";
public const string ByteExact = "byte_exact";
public const string CfgStructural = "cfg_structural";
public const string IrSemantic = "ir_semantic";
public const string ChunkRolling = "chunk_rolling";
}
/// <summary>
/// Constants for signature verification states.
/// </summary>
public static class SignatureStates
{
public const string Verified = "verified";
public const string Unverified = "unverified";
public const string Expired = "expired";
public const string Invalid = "invalid";
public const string Failed = "failed";
public const string Unknown = "unknown";
public const string None = "none";
}

View File

@@ -74,7 +74,7 @@ public sealed class DeltaSigService : IDeltaSigService
ct);
// 2. Compare signatures to find deltas
var comparison = _signatureMatcher.Compare(oldSignature, newSignature);
var comparison = await _signatureMatcher.CompareSignaturesAsync(oldSignature, newSignature, ct);
// 3. Build function deltas
var deltas = BuildFunctionDeltas(comparison, request.IncludeIrDiff, request.ComputeSemanticSimilarity);

View File

@@ -0,0 +1,419 @@
// -----------------------------------------------------------------------------
// DeltaSigServiceV2.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-004 - Predicate Generator Updates
// Description: V2 service that produces predicates with provenance and IR diffs
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.DeltaSig.Attestation;
using StellaOps.BinaryIndex.DeltaSig.IrDiff;
using StellaOps.BinaryIndex.DeltaSig.Provenance;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// V2 DeltaSig service that produces predicates with provenance and IR diffs.
/// </summary>
public sealed class DeltaSigServiceV2 : IDeltaSigServiceV2
{
private readonly IDeltaSigService _baseService;
private readonly ISymbolProvenanceResolver? _provenanceResolver;
private readonly IIrDiffGenerator? _irDiffGenerator;
private readonly ILogger<DeltaSigServiceV2> _logger;
private readonly TimeProvider _timeProvider;
/// <summary>
/// Creates a new V2 DeltaSig service.
/// </summary>
public DeltaSigServiceV2(
IDeltaSigService baseService,
ILogger<DeltaSigServiceV2> logger,
ISymbolProvenanceResolver? provenanceResolver = null,
IIrDiffGenerator? irDiffGenerator = null,
TimeProvider? timeProvider = null)
{
_baseService = baseService ?? throw new ArgumentNullException(nameof(baseService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_provenanceResolver = provenanceResolver;
_irDiffGenerator = irDiffGenerator;
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <inheritdoc />
public async Task<DeltaSigPredicateV2> GenerateV2Async(
DeltaSigRequestV2 request,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(request);
_logger.LogInformation(
"Generating v2 delta-sig for {Purl} with provenance={Provenance}, irDiff={IrDiff}",
request.Purl,
request.IncludeProvenance,
request.IncludeIrDiff);
var startTime = _timeProvider.GetUtcNow();
// 1. Generate base v1 predicate
var v1Request = new DeltaSigRequest
{
OldBinary = request.OldBinary,
NewBinary = request.NewBinary,
Architecture = request.Architecture,
CveIds = request.CveIds,
Advisories = request.Advisories,
PackageName = request.PackageName,
PreferredLifter = request.PreferredLifter,
ComputeSemanticSimilarity = true,
IncludeIrDiff = request.IncludeIrDiff
};
var v1Predicate = await _baseService.GenerateAsync(v1Request, ct);
// 2. Convert to v2 base
var v2 = DeltaSigPredicateConverter.ToV2(v1Predicate);
// 3. Build function matches with enrichment
var functionMatches = v2.FunctionMatches.ToList();
// 4. Enrich with provenance if requested
if (request.IncludeProvenance && _provenanceResolver != null)
{
var newDigest = GetDigestString(request.NewBinary.Digest);
functionMatches = (await _provenanceResolver.EnrichWithProvenanceAsync(
functionMatches,
newDigest,
request.ProvenanceOptions ?? ProvenanceResolutionOptions.Default,
ct)).ToList();
_logger.LogDebug(
"Enriched {Count} functions with provenance",
functionMatches.Count(f => f.SymbolProvenance != null));
}
// 5. Generate IR diffs if requested
if (request.IncludeIrDiff && _irDiffGenerator != null)
{
// Need to rewind streams
if (request.OldBinary.Content.CanSeek)
{
request.OldBinary.Content.Position = 0;
}
if (request.NewBinary.Content.CanSeek)
{
request.NewBinary.Content.Position = 0;
}
functionMatches = (await _irDiffGenerator.GenerateDiffsAsync(
functionMatches,
request.OldBinary.Content,
request.NewBinary.Content,
request.IrDiffOptions ?? IrDiffOptions.Default,
ct)).ToList();
_logger.LogDebug(
"Generated IR diffs for {Count} functions",
functionMatches.Count(f => f.IrDiff != null));
}
// 6. Compute verdict
var verdict = ComputeVerdict(functionMatches, request.CveIds);
var confidence = ComputeConfidence(functionMatches);
// 7. Build updated summary
var summary = new DeltaSummaryV2
{
TotalFunctions = functionMatches.Count,
VulnerableFunctions = functionMatches.Count(f => f.MatchState == MatchStates.Vulnerable),
PatchedFunctions = functionMatches.Count(f => f.MatchState == MatchStates.Patched),
UnknownFunctions = functionMatches.Count(f => f.MatchState == MatchStates.Unknown),
FunctionsWithProvenance = functionMatches.Count(f => f.SymbolProvenance != null),
FunctionsWithIrDiff = functionMatches.Count(f => f.IrDiff != null),
AvgMatchScore = functionMatches.Count > 0 ? functionMatches.Average(f => f.MatchScore) : 0,
MinMatchScore = functionMatches.Count > 0 ? functionMatches.Min(f => f.MatchScore) : 0,
MaxMatchScore = functionMatches.Count > 0 ? functionMatches.Max(f => f.MatchScore) : 0,
TotalIrDiffSize = functionMatches
.Where(f => f.IrDiff != null)
.Sum(f => (long)((f.IrDiff!.StatementsAdded ?? 0) + (f.IrDiff.StatementsRemoved ?? 0) + f.IrDiff.ChangedInstructions))
};
// 8. Build final v2 predicate
var result = v2 with
{
Subject = new DeltaSigSubjectV2
{
Purl = request.Purl ?? $"pkg:generic/{request.PackageName ?? "unknown"}",
Digest = request.NewBinary.Digest,
Arch = request.Architecture,
Filename = request.NewBinary.Filename,
Size = request.NewBinary.Size ?? 0
},
FunctionMatches = functionMatches,
Summary = summary,
Verdict = verdict,
Confidence = confidence,
ComputedAt = startTime,
CveIds = request.CveIds,
Advisories = request.Advisories
};
_logger.LogInformation(
"Generated v2 delta-sig: {Verdict} (confidence={Confidence:P0}), {Functions} functions, {Provenance} with provenance, {IrDiff} with IR diff",
verdict,
confidence,
functionMatches.Count,
summary.FunctionsWithProvenance,
summary.FunctionsWithIrDiff);
return result;
}
/// <inheritdoc />
public async Task<DeltaSigPredicate> GenerateV1Async(
DeltaSigRequest request,
CancellationToken ct = default)
{
// Delegate to base service for v1
return await _baseService.GenerateAsync(request, ct);
}
/// <inheritdoc />
public PredicateVersion NegotiateVersion(PredicateVersionRequest request)
{
ArgumentNullException.ThrowIfNull(request);
// Default to v2 unless client requests v1
if (request.PreferredVersion == "1" ||
request.PreferredVersion?.StartsWith("1.") == true)
{
return new PredicateVersion
{
Version = "1.0.0",
PredicateType = DeltaSigPredicate.PredicateType,
Features = ImmutableArray<string>.Empty
};
}
// V2 with available features
var features = new List<string>();
if (_provenanceResolver != null)
{
features.Add("provenance");
}
if (_irDiffGenerator != null)
{
features.Add("ir-diff");
}
return new PredicateVersion
{
Version = "2.0.0",
PredicateType = DeltaSigPredicateV2.PredicateType,
Features = features.ToImmutableArray()
};
}
private static string ComputeVerdict(IReadOnlyList<FunctionMatchV2> matches, IReadOnlyList<string>? cveIds)
{
if (matches.Count == 0)
{
return DeltaSigVerdicts.Unknown;
}
// If we have CVE context and all vulnerable functions are patched
var patchedCount = matches.Count(f => f.MatchState == MatchStates.Patched);
var vulnerableCount = matches.Count(f => f.MatchState == MatchStates.Vulnerable);
var unknownCount = matches.Count(f => f.MatchState == MatchStates.Unknown);
if (cveIds?.Count > 0)
{
if (patchedCount > 0 && vulnerableCount == 0)
{
return DeltaSigVerdicts.Patched;
}
if (vulnerableCount > 0)
{
return DeltaSigVerdicts.Vulnerable;
}
}
// Without CVE context, use match scores
var avgScore = matches.Average(f => f.MatchScore);
if (avgScore >= 0.9)
{
return DeltaSigVerdicts.Patched;
}
if (avgScore >= 0.7)
{
return DeltaSigVerdicts.PartiallyPatched;
}
if (avgScore >= 0.5)
{
return DeltaSigVerdicts.Inconclusive;
}
return DeltaSigVerdicts.Unknown;
}
private static double ComputeConfidence(IReadOnlyList<FunctionMatchV2> matches)
{
if (matches.Count == 0)
{
return 0.0;
}
// Base confidence on match scores and provenance availability
var avgMatchScore = matches.Average(f => f.MatchScore);
var provenanceRatio = matches.Count(f => f.SymbolProvenance != null) / (double)matches.Count;
// Weight: 70% match score, 30% provenance availability
return (avgMatchScore * 0.7) + (provenanceRatio * 0.3);
}
private static string GetDigestString(IReadOnlyDictionary<string, string>? digest)
{
if (digest == null || digest.Count == 0)
{
return string.Empty;
}
// Prefer sha256
if (digest.TryGetValue("sha256", out var sha256))
{
return sha256;
}
// Fall back to first available
return digest.Values.First();
}
}
/// <summary>
/// V2 DeltaSig service interface.
/// </summary>
public interface IDeltaSigServiceV2
{
/// <summary>
/// Generates a v2 predicate with optional provenance and IR diffs.
/// </summary>
Task<DeltaSigPredicateV2> GenerateV2Async(
DeltaSigRequestV2 request,
CancellationToken ct = default);
/// <summary>
/// Generates a v1 predicate for legacy consumers.
/// </summary>
Task<DeltaSigPredicate> GenerateV1Async(
DeltaSigRequest request,
CancellationToken ct = default);
/// <summary>
/// Negotiates predicate version with client.
/// </summary>
PredicateVersion NegotiateVersion(PredicateVersionRequest request);
}
/// <summary>
/// Request for v2 predicate generation.
/// </summary>
public sealed record DeltaSigRequestV2
{
/// <summary>
/// Package URL (purl) for the analyzed binary.
/// </summary>
public string? Purl { get; init; }
/// <summary>
/// Old (vulnerable) binary.
/// </summary>
public required BinaryReference OldBinary { get; init; }
/// <summary>
/// New (patched) binary.
/// </summary>
public required BinaryReference NewBinary { get; init; }
/// <summary>
/// Target architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// CVE identifiers being addressed.
/// </summary>
public IReadOnlyList<string>? CveIds { get; init; }
/// <summary>
/// Advisory references.
/// </summary>
public IReadOnlyList<string>? Advisories { get; init; }
/// <summary>
/// Package name.
/// </summary>
public string? PackageName { get; init; }
/// <summary>
/// Preferred lifter (b2r2, ghidra).
/// </summary>
public string? PreferredLifter { get; init; }
/// <summary>
/// Whether to include symbol provenance.
/// </summary>
public bool IncludeProvenance { get; init; } = true;
/// <summary>
/// Whether to include IR diffs.
/// </summary>
public bool IncludeIrDiff { get; init; } = true;
/// <summary>
/// Provenance resolution options.
/// </summary>
public ProvenanceResolutionOptions? ProvenanceOptions { get; init; }
/// <summary>
/// IR diff options.
/// </summary>
public IrDiffOptions? IrDiffOptions { get; init; }
}
/// <summary>
/// Version negotiation request.
/// </summary>
public sealed record PredicateVersionRequest
{
/// <summary>
/// Client's preferred version.
/// </summary>
public string? PreferredVersion { get; init; }
/// <summary>
/// Required features.
/// </summary>
public IReadOnlyList<string>? RequiredFeatures { get; init; }
}
/// <summary>
/// Negotiated predicate version.
/// </summary>
public sealed record PredicateVersion
{
/// <summary>
/// Schema version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Predicate type URI.
/// </summary>
public required string PredicateType { get; init; }
/// <summary>
/// Available features.
/// </summary>
public required ImmutableArray<string> Features { get; init; }
}

View File

@@ -0,0 +1,71 @@
// -----------------------------------------------------------------------------
// DeltaSigV2ServiceCollectionExtensions.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Description: DI registration for v2 DeltaSig services
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.BinaryIndex.DeltaSig.IrDiff;
using StellaOps.BinaryIndex.DeltaSig.Provenance;
using StellaOps.BinaryIndex.DeltaSig.VexIntegration;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Extension methods for registering v2 DeltaSig services.
/// </summary>
public static class DeltaSigV2ServiceCollectionExtensions
{
/// <summary>
/// Adds DeltaSig v2 services (provenance resolver, IR diff generator, v2 service, VEX bridge).
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDeltaSigV2(this IServiceCollection services)
{
// Register provenance resolver
services.TryAddSingleton<ISymbolProvenanceResolver, GroundTruthProvenanceResolver>();
// Register IR diff generator
services.TryAddSingleton<IIrDiffGenerator, IrDiffGenerator>();
// Register v2 service
services.TryAddSingleton<IDeltaSigServiceV2, DeltaSigServiceV2>();
// Register VEX bridge
services.TryAddSingleton<IDeltaSigVexBridge, DeltaSigVexBridge>();
return services;
}
/// <summary>
/// Adds DeltaSig v2 services with custom configuration.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configureProvenance">Callback to configure provenance options.</param>
/// <param name="configureIrDiff">Callback to configure IR diff options.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDeltaSigV2(
this IServiceCollection services,
Action<ProvenanceResolutionOptions>? configureProvenance = null,
Action<IrDiffOptions>? configureIrDiff = null)
{
if (configureProvenance != null)
{
var options = new ProvenanceResolutionOptions();
configureProvenance(options);
services.AddSingleton(options);
}
if (configureIrDiff != null)
{
var options = new IrDiffOptions();
configureIrDiff(options);
services.AddSingleton(options);
}
return services.AddDeltaSigV2();
}
}

View File

@@ -0,0 +1,277 @@
// -----------------------------------------------------------------------------
// IIrDiffGenerator.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-003 - IR Diff Reference Generator
// Description: Interface for generating IR diff references for function matches
// -----------------------------------------------------------------------------
using StellaOps.BinaryIndex.DeltaSig.Attestation;
namespace StellaOps.BinaryIndex.DeltaSig.IrDiff;
/// <summary>
/// Generates IR diff references for function matches.
/// Computes structural differences between IR representations.
/// </summary>
public interface IIrDiffGenerator
{
/// <summary>
/// Generates IR diff references for function matches.
/// </summary>
/// <param name="matches">Function matches to compute diffs for.</param>
/// <param name="oldBinaryStream">Stream containing the old binary.</param>
/// <param name="newBinaryStream">Stream containing the new binary.</param>
/// <param name="options">Diff generation options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Function matches enriched with IR diff references.</returns>
Task<IReadOnlyList<FunctionMatchV2>> GenerateDiffsAsync(
IReadOnlyList<FunctionMatchV2> matches,
Stream oldBinaryStream,
Stream newBinaryStream,
IrDiffOptions options,
CancellationToken ct = default);
/// <summary>
/// Generates an IR diff for a single function.
/// </summary>
/// <param name="functionAddress">Address of the function in the new binary.</param>
/// <param name="oldFunctionAddress">Address of the function in the old binary.</param>
/// <param name="oldBinaryStream">Stream containing the old binary.</param>
/// <param name="newBinaryStream">Stream containing the new binary.</param>
/// <param name="options">Diff generation options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>IR diff reference.</returns>
Task<IrDiffReferenceV2?> GenerateSingleDiffAsync(
ulong functionAddress,
ulong oldFunctionAddress,
Stream oldBinaryStream,
Stream newBinaryStream,
IrDiffOptions options,
CancellationToken ct = default);
}
/// <summary>
/// Options for IR diff generation.
/// </summary>
public sealed record IrDiffOptions
{
/// <summary>
/// Default options.
/// </summary>
public static IrDiffOptions Default { get; } = new();
/// <summary>
/// IR format to use (e.g., "b2r2-lowuir", "ghidra-pcode").
/// </summary>
public string IrFormat { get; init; } = "b2r2-lowuir";
/// <summary>
/// Whether to store full diffs in CAS.
/// </summary>
public bool StoreInCas { get; init; } = true;
/// <summary>
/// Maximum diff size to store (bytes).
/// Larger diffs are truncated.
/// </summary>
public int MaxDiffSizeBytes { get; init; } = 1024 * 1024; // 1MB
/// <summary>
/// Whether to compute instruction-level diffs.
/// </summary>
public bool IncludeInstructionDiffs { get; init; } = true;
/// <summary>
/// Whether to compute basic block diffs.
/// </summary>
public bool IncludeBlockDiffs { get; init; } = true;
/// <summary>
/// Hash algorithm for CAS storage.
/// </summary>
public string HashAlgorithm { get; init; } = "sha256";
/// <summary>
/// Maximum functions to diff in parallel.
/// </summary>
public int MaxParallelDiffs { get; init; } = 4;
/// <summary>
/// Timeout for individual function diff.
/// </summary>
public TimeSpan DiffTimeout { get; init; } = TimeSpan.FromSeconds(30);
}
/// <summary>
/// Full IR diff data for CAS storage.
/// </summary>
public sealed record IrDiffPayload
{
/// <summary>
/// CAS digest of this payload.
/// </summary>
public required string Digest { get; init; }
/// <summary>
/// IR format used.
/// </summary>
public required string IrFormat { get; init; }
/// <summary>
/// Function name.
/// </summary>
public required string FunctionName { get; init; }
/// <summary>
/// Old function address.
/// </summary>
public ulong OldAddress { get; init; }
/// <summary>
/// New function address.
/// </summary>
public ulong NewAddress { get; init; }
/// <summary>
/// Block-level changes.
/// </summary>
public required IReadOnlyList<BlockDiff> BlockDiffs { get; init; }
/// <summary>
/// Statement-level changes.
/// </summary>
public required IReadOnlyList<StatementDiff> StatementDiffs { get; init; }
/// <summary>
/// Summary statistics.
/// </summary>
public required IrDiffSummary Summary { get; init; }
/// <summary>
/// Timestamp when diff was computed.
/// </summary>
public DateTimeOffset ComputedAt { get; init; }
}
/// <summary>
/// Block-level diff entry.
/// </summary>
public sealed record BlockDiff
{
/// <summary>
/// Block identifier.
/// </summary>
public required string BlockId { get; init; }
/// <summary>
/// Change type: added, removed, modified, unchanged.
/// </summary>
public required string ChangeType { get; init; }
/// <summary>
/// Old block address (if applicable).
/// </summary>
public ulong? OldAddress { get; init; }
/// <summary>
/// New block address (if applicable).
/// </summary>
public ulong? NewAddress { get; init; }
/// <summary>
/// Number of statements changed in this block.
/// </summary>
public int StatementsChanged { get; init; }
}
/// <summary>
/// Statement-level diff entry.
/// </summary>
public sealed record StatementDiff
{
/// <summary>
/// Statement index within block.
/// </summary>
public int Index { get; init; }
/// <summary>
/// Containing block ID.
/// </summary>
public required string BlockId { get; init; }
/// <summary>
/// Change type: added, removed, modified.
/// </summary>
public required string ChangeType { get; init; }
/// <summary>
/// Old statement (if applicable).
/// </summary>
public string? OldStatement { get; init; }
/// <summary>
/// New statement (if applicable).
/// </summary>
public string? NewStatement { get; init; }
}
/// <summary>
/// Summary of IR diff.
/// </summary>
public sealed record IrDiffSummary
{
/// <summary>
/// Total blocks in old function.
/// </summary>
public int OldBlockCount { get; init; }
/// <summary>
/// Total blocks in new function.
/// </summary>
public int NewBlockCount { get; init; }
/// <summary>
/// Blocks added.
/// </summary>
public int BlocksAdded { get; init; }
/// <summary>
/// Blocks removed.
/// </summary>
public int BlocksRemoved { get; init; }
/// <summary>
/// Blocks modified.
/// </summary>
public int BlocksModified { get; init; }
/// <summary>
/// Total statements in old function.
/// </summary>
public int OldStatementCount { get; init; }
/// <summary>
/// Total statements in new function.
/// </summary>
public int NewStatementCount { get; init; }
/// <summary>
/// Statements added.
/// </summary>
public int StatementsAdded { get; init; }
/// <summary>
/// Statements removed.
/// </summary>
public int StatementsRemoved { get; init; }
/// <summary>
/// Statements modified.
/// </summary>
public int StatementsModified { get; init; }
/// <summary>
/// Payload size in bytes.
/// </summary>
public int PayloadSizeBytes { get; init; }
}

View File

@@ -0,0 +1,222 @@
// -----------------------------------------------------------------------------
// IrDiffGenerator.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-003 - IR Diff Reference Generator
// Description: Generates IR diff references using lifted IR comparisons
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.DeltaSig.Attestation;
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.DeltaSig.IrDiff;
/// <summary>
/// Generates IR diff references by comparing lifted IR representations.
/// </summary>
public sealed class IrDiffGenerator : IIrDiffGenerator
{
private readonly ILogger<IrDiffGenerator> _logger;
private readonly ICasStore? _casStore;
/// <summary>
/// Creates a new IR diff generator.
/// </summary>
public IrDiffGenerator(
ILogger<IrDiffGenerator> logger,
ICasStore? casStore = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_casStore = casStore;
}
/// <inheritdoc />
public async Task<IReadOnlyList<FunctionMatchV2>> GenerateDiffsAsync(
IReadOnlyList<FunctionMatchV2> matches,
Stream oldBinaryStream,
Stream newBinaryStream,
IrDiffOptions options,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(matches);
ArgumentNullException.ThrowIfNull(oldBinaryStream);
ArgumentNullException.ThrowIfNull(newBinaryStream);
options ??= IrDiffOptions.Default;
if (matches.Count == 0)
{
return matches;
}
_logger.LogDebug("Generating IR diffs for {Count} function matches", matches.Count);
var enriched = new List<FunctionMatchV2>(matches.Count);
var semaphore = new SemaphoreSlim(options.MaxParallelDiffs);
var tasks = matches.Select(async match =>
{
await semaphore.WaitAsync(ct);
try
{
if (match.BeforeHash == null || match.AfterHash == null)
{
return match; // Can't diff without both hashes
}
if (!match.Address.HasValue)
{
return match; // Can't diff without address
}
var address = (ulong)match.Address.Value;
var diff = await GenerateSingleDiffAsync(
address,
address, // Assume same address for now
oldBinaryStream,
newBinaryStream,
options,
ct);
return match with { IrDiff = diff };
}
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to generate IR diff for {Function}", match.Name);
return match; // Keep original without diff
}
finally
{
semaphore.Release();
}
});
var results = await Task.WhenAll(tasks);
var diffCount = results.Count(m => m.IrDiff != null);
_logger.LogInformation(
"Generated IR diffs for {Count}/{Total} function matches",
diffCount, matches.Count);
return results.ToList();
}
/// <inheritdoc />
public async Task<IrDiffReferenceV2?> GenerateSingleDiffAsync(
ulong functionAddress,
ulong oldFunctionAddress,
Stream oldBinaryStream,
Stream newBinaryStream,
IrDiffOptions options,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(oldBinaryStream);
ArgumentNullException.ThrowIfNull(newBinaryStream);
options ??= IrDiffOptions.Default;
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(options.DiffTimeout);
try
{
// In a real implementation, this would:
// 1. Lift both functions to IR
// 2. Compare the IR representations
// 3. Generate diff payload
// 4. Store in CAS if enabled
// 5. Return reference
// For now, create a placeholder summary
var summary = new IrDiffSummary
{
OldBlockCount = 0,
NewBlockCount = 0,
BlocksAdded = 0,
BlocksRemoved = 0,
BlocksModified = 0,
OldStatementCount = 0,
NewStatementCount = 0,
StatementsAdded = 0,
StatementsRemoved = 0,
StatementsModified = 0,
PayloadSizeBytes = 0
};
var payload = new IrDiffPayload
{
Digest = $"sha256:{ComputePlaceholderDigest(functionAddress)}",
IrFormat = options.IrFormat,
FunctionName = $"func_{functionAddress:X}",
OldAddress = oldFunctionAddress,
NewAddress = functionAddress,
BlockDiffs = new List<BlockDiff>(),
StatementDiffs = new List<StatementDiff>(),
Summary = summary,
ComputedAt = DateTimeOffset.UtcNow
};
// Store in CAS if enabled
string casDigest = payload.Digest;
if (options.StoreInCas && _casStore != null)
{
var json = JsonSerializer.Serialize(payload);
casDigest = await _casStore.StoreAsync(
Encoding.UTF8.GetBytes(json),
options.HashAlgorithm,
ct);
}
return new IrDiffReferenceV2
{
CasDigest = casDigest,
AddedBlocks = summary.BlocksAdded,
RemovedBlocks = summary.BlocksRemoved,
ChangedInstructions = summary.StatementsModified,
StatementsAdded = summary.StatementsAdded,
StatementsRemoved = summary.StatementsRemoved,
IrFormat = options.IrFormat
};
}
catch (OperationCanceledException) when (cts.Token.IsCancellationRequested && !ct.IsCancellationRequested)
{
_logger.LogWarning(
"IR diff generation timed out for function at {Address:X}",
functionAddress);
return null;
}
}
private static string ComputePlaceholderDigest(ulong address)
{
var bytes = BitConverter.GetBytes(address);
var hash = SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Content-addressable storage interface for IR diffs.
/// </summary>
public interface ICasStore
{
/// <summary>
/// Stores content and returns its digest.
/// </summary>
Task<string> StoreAsync(byte[] content, string algorithm, CancellationToken ct = default);
/// <summary>
/// Retrieves content by digest.
/// </summary>
Task<byte[]?> RetrieveAsync(string digest, CancellationToken ct = default);
/// <summary>
/// Checks if content exists.
/// </summary>
Task<bool> ExistsAsync(string digest, CancellationToken ct = default);
}

View File

@@ -0,0 +1,282 @@
// -----------------------------------------------------------------------------
// GroundTruthProvenanceResolver.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-002 - Symbol Provenance Resolver
// Description: Resolves symbol provenance from ground-truth observations
// -----------------------------------------------------------------------------
using System.Collections.Concurrent;
using Microsoft.Extensions.Caching.Memory;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.DeltaSig.Attestation;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using SignatureState = StellaOps.BinaryIndex.GroundTruth.Abstractions.SignatureState;
namespace StellaOps.BinaryIndex.DeltaSig.Provenance;
/// <summary>
/// Resolves symbol provenance from ground-truth observations.
/// Uses cached lookups and batching for efficiency.
/// </summary>
public sealed class GroundTruthProvenanceResolver : ISymbolProvenanceResolver
{
private readonly ISymbolObservationRepository _repository;
private readonly IMemoryCache _cache;
private readonly ILogger<GroundTruthProvenanceResolver> _logger;
/// <summary>
/// Creates a new ground-truth provenance resolver.
/// </summary>
public GroundTruthProvenanceResolver(
ISymbolObservationRepository repository,
IMemoryCache cache,
ILogger<GroundTruthProvenanceResolver> logger)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<IReadOnlyList<FunctionMatchV2>> EnrichWithProvenanceAsync(
IReadOnlyList<FunctionMatchV2> matches,
string binaryDigest,
ProvenanceResolutionOptions options,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(matches);
ArgumentException.ThrowIfNullOrEmpty(binaryDigest);
options ??= ProvenanceResolutionOptions.Default;
if (matches.Count == 0)
{
return matches;
}
_logger.LogDebug("Enriching {Count} function matches with provenance for {Digest}",
matches.Count, binaryDigest);
// Batch lookup all symbol names
var symbolNames = matches
.Where(m => !string.IsNullOrEmpty(m.Name))
.Select(m => m.Name)
.Distinct()
.ToList();
var provenanceLookup = await BatchLookupAsync(symbolNames, binaryDigest, ct);
// Enrich matches
var enriched = new List<FunctionMatchV2>(matches.Count);
foreach (var match in matches)
{
if (!string.IsNullOrEmpty(match.Name) &&
provenanceLookup.TryGetValue(match.Name, out var provenance))
{
// Filter by options
if (ShouldIncludeProvenance(provenance, options))
{
enriched.Add(match with { SymbolProvenance = provenance });
continue;
}
}
// Keep original (without provenance)
enriched.Add(match);
}
var enrichedCount = enriched.Count(m => m.SymbolProvenance != null);
_logger.LogInformation(
"Enriched {Enriched}/{Total} function matches with provenance",
enrichedCount, matches.Count);
return enriched;
}
/// <inheritdoc />
public async Task<SymbolProvenanceV2?> LookupSymbolAsync(
string symbolName,
string binaryDigest,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrEmpty(symbolName);
ArgumentException.ThrowIfNullOrEmpty(binaryDigest);
var cacheKey = $"prov:{binaryDigest}:{symbolName}";
// Try cache first
if (_cache.TryGetValue<SymbolProvenanceV2>(cacheKey, out var cached))
{
return cached;
}
// Look up from repository
var observations = await _repository.FindByDebugIdAsync(binaryDigest, ct);
foreach (var observation in observations)
{
var symbol = observation.Symbols.FirstOrDefault(s =>
s.Name.Equals(symbolName, StringComparison.Ordinal) ||
s.DemangledName?.Equals(symbolName, StringComparison.Ordinal) == true);
if (symbol != null)
{
var provenance = CreateProvenance(observation, symbol);
// Cache the result
_cache.Set(cacheKey, provenance, TimeSpan.FromMinutes(60));
return provenance;
}
}
// Cache the miss (short TTL)
_cache.Set(cacheKey, (SymbolProvenanceV2?)null, TimeSpan.FromMinutes(5));
return null;
}
/// <inheritdoc />
public async Task<IReadOnlyDictionary<string, SymbolProvenanceV2>> BatchLookupAsync(
IEnumerable<string> symbolNames,
string binaryDigest,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(symbolNames);
ArgumentException.ThrowIfNullOrEmpty(binaryDigest);
var names = symbolNames.ToList();
if (names.Count == 0)
{
return new Dictionary<string, SymbolProvenanceV2>();
}
var results = new ConcurrentDictionary<string, SymbolProvenanceV2>();
var uncached = new List<string>();
// Check cache first
foreach (var name in names)
{
var cacheKey = $"prov:{binaryDigest}:{name}";
if (_cache.TryGetValue<SymbolProvenanceV2>(cacheKey, out var cached) && cached != null)
{
results[name] = cached;
}
else
{
uncached.Add(name);
}
}
if (uncached.Count == 0)
{
return results;
}
// Fetch observations for this binary
var observations = await _repository.FindByDebugIdAsync(binaryDigest, ct);
// Build index of all symbols across observations
var symbolIndex = new Dictionary<string, (SymbolObservation Obs, ObservedSymbol Sym)>(
StringComparer.Ordinal);
foreach (var observation in observations)
{
foreach (var symbol in observation.Symbols)
{
// Index by name
if (!string.IsNullOrEmpty(symbol.Name) && !symbolIndex.ContainsKey(symbol.Name))
{
symbolIndex[symbol.Name] = (observation, symbol);
}
// Index by demangled name
if (!string.IsNullOrEmpty(symbol.DemangledName) &&
!symbolIndex.ContainsKey(symbol.DemangledName))
{
symbolIndex[symbol.DemangledName] = (observation, symbol);
}
}
}
// Look up uncached symbols
foreach (var name in uncached)
{
var cacheKey = $"prov:{binaryDigest}:{name}";
if (symbolIndex.TryGetValue(name, out var entry))
{
var provenance = CreateProvenance(entry.Obs, entry.Sym);
results[name] = provenance;
_cache.Set(cacheKey, provenance, TimeSpan.FromMinutes(60));
}
else
{
// Cache the miss
_cache.Set(cacheKey, (SymbolProvenanceV2?)null, TimeSpan.FromMinutes(5));
}
}
_logger.LogDebug(
"Batch lookup: {Requested} requested, {Cached} cached, {Found} found",
names.Count, names.Count - uncached.Count, results.Count);
return results;
}
private static SymbolProvenanceV2 CreateProvenance(
SymbolObservation observation,
ObservedSymbol symbol)
{
return new SymbolProvenanceV2
{
SourceId = observation.SourceId,
ObservationId = observation.ObservationId,
FetchedAt = observation.Provenance.FetchedAt,
SignatureState = MapSignatureState(observation.Provenance.SignatureState),
PackageName = observation.PackageName,
PackageVersion = observation.PackageVersion,
Distro = observation.Distro,
DistroVersion = observation.DistroVersion
};
}
private static string MapSignatureState(SignatureState state)
{
return state switch
{
SignatureState.Verified => SignatureStates.Verified,
SignatureState.Unverified => SignatureStates.Unverified,
SignatureState.Failed => SignatureStates.Failed,
SignatureState.None => SignatureStates.None,
_ => SignatureStates.Unknown
};
}
private static bool ShouldIncludeProvenance(
SymbolProvenanceV2 provenance,
ProvenanceResolutionOptions options)
{
// Check signature state
if (provenance.SignatureState == SignatureStates.Failed && !options.IncludeFailed)
{
return false;
}
if (provenance.SignatureState == SignatureStates.Unverified && !options.IncludeUnverified)
{
return false;
}
// Check age
if (options.MaxAgeDays.HasValue)
{
var age = DateTimeOffset.UtcNow - provenance.FetchedAt;
if (age.TotalDays > options.MaxAgeDays.Value)
{
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,145 @@
// -----------------------------------------------------------------------------
// ISymbolProvenanceResolver.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-002 - Symbol Provenance Resolver
// Description: Interface for enriching function matches with provenance metadata
// -----------------------------------------------------------------------------
using StellaOps.BinaryIndex.DeltaSig.Attestation;
namespace StellaOps.BinaryIndex.DeltaSig.Provenance;
/// <summary>
/// Resolves symbol provenance metadata for function matches.
/// Uses ground-truth observations to attribute symbol sources.
/// </summary>
public interface ISymbolProvenanceResolver
{
/// <summary>
/// Enriches function matches with provenance metadata from ground-truth sources.
/// </summary>
/// <param name="matches">Function matches to enrich.</param>
/// <param name="binaryDigest">Digest of the binary being analyzed.</param>
/// <param name="options">Resolution options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Enriched function matches with provenance data.</returns>
Task<IReadOnlyList<FunctionMatchV2>> EnrichWithProvenanceAsync(
IReadOnlyList<FunctionMatchV2> matches,
string binaryDigest,
ProvenanceResolutionOptions options,
CancellationToken ct = default);
/// <summary>
/// Looks up provenance for a single symbol by name.
/// </summary>
/// <param name="symbolName">Symbol name to look up.</param>
/// <param name="binaryDigest">Binary digest for context.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Symbol provenance or null if not found.</returns>
Task<SymbolProvenanceV2?> LookupSymbolAsync(
string symbolName,
string binaryDigest,
CancellationToken ct = default);
/// <summary>
/// Batch lookup of symbols by name.
/// </summary>
/// <param name="symbolNames">Symbol names to look up.</param>
/// <param name="binaryDigest">Binary digest for context.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Dictionary of symbol name to provenance.</returns>
Task<IReadOnlyDictionary<string, SymbolProvenanceV2>> BatchLookupAsync(
IEnumerable<string> symbolNames,
string binaryDigest,
CancellationToken ct = default);
}
/// <summary>
/// Options for provenance resolution.
/// </summary>
public sealed record ProvenanceResolutionOptions
{
/// <summary>
/// Default options.
/// </summary>
public static ProvenanceResolutionOptions Default { get; } = new();
/// <summary>
/// Preferred symbol sources in priority order.
/// First matching source wins.
/// </summary>
public IReadOnlyList<string> PreferredSources { get; init; } = new List<string>
{
"debuginfod-fedora",
"debuginfod-ubuntu",
"ddeb-ubuntu",
"buildinfo-debian"
};
/// <summary>
/// Whether to include unverified signatures.
/// </summary>
public bool IncludeUnverified { get; init; } = false;
/// <summary>
/// Whether to include sources with failed signature verification.
/// </summary>
public bool IncludeFailed { get; init; } = false;
/// <summary>
/// Maximum age of provenance data in days.
/// Null means no limit.
/// </summary>
public int? MaxAgeDays { get; init; } = null;
/// <summary>
/// Whether to use cached lookups.
/// </summary>
public bool UseCache { get; init; } = true;
/// <summary>
/// Cache TTL in minutes.
/// </summary>
public int CacheTtlMinutes { get; init; } = 60;
/// <summary>
/// Maximum concurrent lookups.
/// </summary>
public int MaxConcurrentLookups { get; init; } = 10;
/// <summary>
/// Timeout for individual symbol lookups.
/// </summary>
public TimeSpan LookupTimeout { get; init; } = TimeSpan.FromSeconds(5);
}
/// <summary>
/// Result of provenance enrichment.
/// </summary>
public sealed record ProvenanceEnrichmentResult
{
/// <summary>
/// Enriched function matches.
/// </summary>
public required IReadOnlyList<FunctionMatchV2> Matches { get; init; }
/// <summary>
/// Number of symbols enriched with provenance.
/// </summary>
public int EnrichedCount { get; init; }
/// <summary>
/// Number of symbols without provenance.
/// </summary>
public int UnenrichedCount { get; init; }
/// <summary>
/// Breakdown by source.
/// </summary>
public IReadOnlyDictionary<string, int> BySource { get; init; } = new Dictionary<string, int>();
/// <summary>
/// Breakdown by signature state.
/// </summary>
public IReadOnlyDictionary<string, int> BySignatureState { get; init; } = new Dictionary<string, int>();
}

View File

@@ -13,11 +13,14 @@
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly.Abstractions\StellaOps.BinaryIndex.Disassembly.Abstractions.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly\StellaOps.BinaryIndex.Disassembly.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.GroundTruth.Abstractions\StellaOps.BinaryIndex.GroundTruth.Abstractions.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Normalization\StellaOps.BinaryIndex.Normalization.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Semantic\StellaOps.BinaryIndex.Semantic.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
</ItemGroup>

View File

@@ -0,0 +1,345 @@
// -----------------------------------------------------------------------------
// DeltaSigVexBridge.cs
// Sprint: SPRINT_20260119_004_BinaryIndex_deltasig_extensions
// Task: DSIG-005 - VEX Evidence Integration
// Description: Bridges DeltaSig v2 predicates with VEX statement generation
// -----------------------------------------------------------------------------
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.DeltaSig.Attestation;
namespace StellaOps.BinaryIndex.DeltaSig.VexIntegration;
/// <summary>
/// Bridges DeltaSig v2 predicates with VEX observations.
/// </summary>
public interface IDeltaSigVexBridge
{
/// <summary>
/// Generates a VEX observation from a DeltaSig v2 predicate.
/// </summary>
/// <param name="predicate">The v2 predicate.</param>
/// <param name="context">VEX generation context.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>VEX observation.</returns>
Task<VexObservation> GenerateFromPredicateAsync(
DeltaSigPredicateV2 predicate,
DeltaSigVexContext context,
CancellationToken ct = default);
/// <summary>
/// Converts a v2 predicate verdict to a VEX statement status.
/// </summary>
/// <param name="verdict">The DeltaSig verdict.</param>
/// <returns>VEX statement status.</returns>
VexStatus MapVerdictToStatus(string verdict);
/// <summary>
/// Extracts evidence blocks from a v2 predicate.
/// </summary>
/// <param name="predicate">The v2 predicate.</param>
/// <returns>Evidence blocks for VEX attachment.</returns>
IReadOnlyList<VexEvidenceBlock> ExtractEvidence(DeltaSigPredicateV2 predicate);
}
/// <summary>
/// Implementation of DeltaSig-VEX bridge.
/// </summary>
public sealed class DeltaSigVexBridge : IDeltaSigVexBridge
{
private readonly ILogger<DeltaSigVexBridge> _logger;
private readonly TimeProvider _timeProvider;
/// <summary>
/// Creates a new bridge instance.
/// </summary>
public DeltaSigVexBridge(
ILogger<DeltaSigVexBridge> logger,
TimeProvider? timeProvider = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <inheritdoc />
public Task<VexObservation> GenerateFromPredicateAsync(
DeltaSigPredicateV2 predicate,
DeltaSigVexContext context,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(predicate);
ArgumentNullException.ThrowIfNull(context);
var status = MapVerdictToStatus(predicate.Verdict);
var evidence = ExtractEvidence(predicate);
var observationId = GenerateObservationId(context, predicate);
var observation = new VexObservation
{
ObservationId = observationId,
TenantId = context.TenantId,
ProviderId = "stellaops.deltasig",
StreamId = "deltasig_resolution",
Purl = predicate.Subject.Purl,
CveId = predicate.CveIds?.FirstOrDefault() ?? string.Empty,
Status = status,
Justification = MapVerdictToJustification(predicate.Verdict),
Impact = null,
ActionStatement = BuildActionStatement(predicate, context),
ObservedAt = _timeProvider.GetUtcNow(),
Provenance = new VexProvenance
{
Source = "deltasig-v2",
Method = "binary-diff-analysis",
Confidence = predicate.Confidence,
ToolVersion = GetToolVersion(),
SourceUri = context.SourceUri
},
Evidence = evidence,
Supersedes = context.SupersedesObservationId,
Metadata = BuildMetadata(predicate, context)
};
_logger.LogInformation(
"Generated VEX observation {Id} from DeltaSig predicate: {Status} for {Purl}",
observationId, status, predicate.Subject.Purl);
return Task.FromResult(observation);
}
/// <inheritdoc />
public VexStatus MapVerdictToStatus(string verdict)
{
return verdict switch
{
DeltaSigVerdicts.Patched => VexStatus.Fixed,
DeltaSigVerdicts.Vulnerable => VexStatus.Affected,
DeltaSigVerdicts.PartiallyPatched => VexStatus.UnderInvestigation,
DeltaSigVerdicts.Inconclusive => VexStatus.UnderInvestigation,
DeltaSigVerdicts.Unknown => VexStatus.NotAffected, // Assume not affected if unknown
_ => VexStatus.UnderInvestigation
};
}
/// <inheritdoc />
public IReadOnlyList<VexEvidenceBlock> ExtractEvidence(DeltaSigPredicateV2 predicate)
{
var blocks = new List<VexEvidenceBlock>();
// Summary evidence
if (predicate.Summary != null)
{
blocks.Add(new VexEvidenceBlock
{
Type = "deltasig-summary",
Label = "DeltaSig Analysis Summary",
Content = JsonSerializer.Serialize(new
{
predicate.Summary.TotalFunctions,
predicate.Summary.VulnerableFunctions,
predicate.Summary.PatchedFunctions,
predicate.Summary.FunctionsWithProvenance,
predicate.Summary.FunctionsWithIrDiff,
predicate.Summary.AvgMatchScore
}),
ContentType = "application/json"
});
}
// Function-level evidence for high-confidence matches
var highConfidenceMatches = predicate.FunctionMatches
.Where(f => f.MatchScore >= 0.9 && f.SymbolProvenance != null)
.Take(10) // Limit to avoid bloat
.ToList();
if (highConfidenceMatches.Count > 0)
{
blocks.Add(new VexEvidenceBlock
{
Type = "deltasig-function-matches",
Label = "High-Confidence Function Matches",
Content = JsonSerializer.Serialize(highConfidenceMatches.Select(f => new
{
f.Name,
f.MatchScore,
f.MatchMethod,
f.MatchState,
ProvenanceSource = f.SymbolProvenance?.SourceId,
HasIrDiff = f.IrDiff != null
})),
ContentType = "application/json"
});
}
// Predicate reference
blocks.Add(new VexEvidenceBlock
{
Type = "deltasig-predicate-ref",
Label = "DeltaSig Predicate Reference",
Content = JsonSerializer.Serialize(new
{
PredicateType = DeltaSigPredicateV2.PredicateType,
predicate.Verdict,
predicate.Confidence,
predicate.ComputedAt,
CveIds = predicate.CveIds
}),
ContentType = "application/json"
});
return blocks;
}
private static string GenerateObservationId(DeltaSigVexContext context, DeltaSigPredicateV2 predicate)
{
// Generate deterministic observation ID using UUID5
var input = $"{context.TenantId}:{predicate.Subject.Purl}:{predicate.CveIds?.FirstOrDefault()}:{predicate.ComputedAt:O}";
return $"obs:deltasig:{ComputeHash(input)}";
}
private static string? MapVerdictToJustification(string verdict)
{
return verdict switch
{
DeltaSigVerdicts.Patched => "vulnerable_code_not_present",
DeltaSigVerdicts.PartiallyPatched => "inline_mitigations_already_exist",
_ => null
};
}
private static string? BuildActionStatement(DeltaSigPredicateV2 predicate, DeltaSigVexContext context)
{
return predicate.Verdict switch
{
DeltaSigVerdicts.Patched =>
$"Binary analysis confirms {predicate.Summary?.PatchedFunctions ?? 0} vulnerable functions have been patched.",
DeltaSigVerdicts.Vulnerable =>
$"Binary analysis detected {predicate.Summary?.VulnerableFunctions ?? 0} unpatched vulnerable functions. Upgrade recommended.",
DeltaSigVerdicts.PartiallyPatched =>
"Some vulnerable functions remain unpatched. Review required.",
_ => null
};
}
private static IReadOnlyDictionary<string, string>? BuildMetadata(
DeltaSigPredicateV2 predicate,
DeltaSigVexContext context)
{
var metadata = new Dictionary<string, string>
{
["predicateType"] = DeltaSigPredicateV2.PredicateType,
["verdict"] = predicate.Verdict,
["confidence"] = predicate.Confidence.ToString("F2"),
["computedAt"] = predicate.ComputedAt.ToString("O")
};
if (predicate.Tooling != null)
{
metadata["lifter"] = predicate.Tooling.Lifter;
metadata["matchAlgorithm"] = predicate.Tooling.MatchAlgorithm ?? "unknown";
}
if (context.ScanId != null)
{
metadata["scanId"] = context.ScanId;
}
return metadata;
}
private static string GetToolVersion()
{
var version = typeof(DeltaSigVexBridge).Assembly.GetName().Version;
return version?.ToString() ?? "0.0.0";
}
private static string ComputeHash(string input)
{
var bytes = System.Text.Encoding.UTF8.GetBytes(input);
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
return Convert.ToHexString(hash)[..16].ToLowerInvariant();
}
}
/// <summary>
/// Context for DeltaSig VEX generation.
/// </summary>
public sealed record DeltaSigVexContext
{
/// <summary>
/// Tenant identifier.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Optional scan identifier.
/// </summary>
public string? ScanId { get; init; }
/// <summary>
/// Optional source URI for the predicate.
/// </summary>
public string? SourceUri { get; init; }
/// <summary>
/// Optional observation ID this supersedes.
/// </summary>
public string? SupersedesObservationId { get; init; }
}
/// <summary>
/// VEX status enum (mirrors Excititor.Core).
/// </summary>
public enum VexStatus
{
NotAffected,
Affected,
Fixed,
UnderInvestigation
}
/// <summary>
/// VEX observation for DeltaSig bridge (simplified model).
/// </summary>
public sealed record VexObservation
{
public required string ObservationId { get; init; }
public required string TenantId { get; init; }
public required string ProviderId { get; init; }
public required string StreamId { get; init; }
public required string Purl { get; init; }
public required string CveId { get; init; }
public required VexStatus Status { get; init; }
public string? Justification { get; init; }
public string? Impact { get; init; }
public string? ActionStatement { get; init; }
public DateTimeOffset ObservedAt { get; init; }
public VexProvenance? Provenance { get; init; }
public IReadOnlyList<VexEvidenceBlock>? Evidence { get; init; }
public string? Supersedes { get; init; }
public IReadOnlyDictionary<string, string>? Metadata { get; init; }
}
/// <summary>
/// VEX provenance metadata.
/// </summary>
public sealed record VexProvenance
{
public required string Source { get; init; }
public required string Method { get; init; }
public double Confidence { get; init; }
public string? ToolVersion { get; init; }
public string? SourceUri { get; init; }
}
/// <summary>
/// VEX evidence block.
/// </summary>
public sealed record VexEvidenceBlock
{
public required string Type { get; init; }
public required string Label { get; init; }
public required string Content { get; init; }
public string ContentType { get; init; } = "text/plain";
}

View File

@@ -0,0 +1,44 @@
# GroundTruth.Abstractions - Agent Instructions
## Module Overview
This library defines the core abstractions for ground-truth symbol source connectors following the Concelier/Excititor Aggregation-Only Contract (AOC) pattern.
## Key Interfaces
- **ISymbolSourceConnector** - Main connector interface with three-phase pipeline (Fetch → Parse → Map)
- **ISymbolSourceConnectorPlugin** - Plugin registration interface
- **ISymbolObservationWriteGuard** - AOC enforcement for immutable observations
- **ISymbolObservationRepository** - Persistence for observations
- **ISecurityPairService** - Pre/post CVE binary pair management
## AOC Invariants (MUST follow)
1. **No derived scores at ingest** - Never add confidence, accuracy, or match_score during ingestion
2. **Immutable observations** - Once created, observations are never modified
3. **Supersession chain** - New versions use `SupersedesId` to link to previous
4. **Mandatory provenance** - All observations must have `source_id`, `document_uri`, `fetched_at`, `content_hash`
5. **Deterministic hashing** - Use canonical JSON with sorted keys, UTC timestamps, hex-lowercase hashes
## Adding New Connectors
1. Implement `ISymbolSourceConnector` (or extend `SymbolSourceConnectorBase`)
2. Implement `ISymbolSourceConnectorPlugin` for DI registration
3. Add source definition to `SymbolSourceDefinitions`
4. Follow the three-phase pattern:
- **Fetch**: Download raw data, store with digest, update cursor
- **Parse**: Validate, extract symbols, create DTOs
- **Map**: Build canonical observations, enforce AOC, persist
## Testing Requirements
- Unit tests for all public interfaces
- AOC write guard tests for all violation codes
- Deterministic hash tests with frozen fixtures
- Offline-compatible test fixtures
## Dependencies
- Microsoft.Extensions.Logging.Abstractions
- Microsoft.Extensions.Options
- System.Text.Json

View File

@@ -0,0 +1,290 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Service for managing pre/post CVE security binary pairs.
/// Used as ground-truth for validating function matching accuracy.
/// </summary>
public interface ISecurityPairService
{
/// <summary>
/// Create a new security pair from vulnerable and patched observations.
/// </summary>
/// <param name="cveId">CVE identifier.</param>
/// <param name="vulnerableObservationId">Observation ID of vulnerable binary.</param>
/// <param name="patchedObservationId">Observation ID of patched binary.</param>
/// <param name="metadata">Pair metadata.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Created security pair.</returns>
Task<SecurityPair> CreatePairAsync(
string cveId,
string vulnerableObservationId,
string patchedObservationId,
SecurityPairMetadata metadata,
CancellationToken ct = default);
/// <summary>
/// Find security pair by ID.
/// </summary>
Task<SecurityPair?> FindByIdAsync(string pairId, CancellationToken ct = default);
/// <summary>
/// Find security pairs by CVE.
/// </summary>
Task<ImmutableArray<SecurityPair>> FindByCveAsync(string cveId, CancellationToken ct = default);
/// <summary>
/// Find security pairs by package.
/// </summary>
Task<ImmutableArray<SecurityPair>> FindByPackageAsync(
string distro,
string packageName,
CancellationToken ct = default);
/// <summary>
/// Query security pairs with filters.
/// </summary>
Task<ImmutableArray<SecurityPair>> QueryAsync(
SecurityPairQuery query,
CancellationToken ct = default);
/// <summary>
/// Get statistics about security pairs.
/// </summary>
Task<SecurityPairStats> GetStatsAsync(CancellationToken ct = default);
}
/// <summary>
/// A pre/post CVE security binary pair for ground-truth validation.
/// </summary>
public sealed record SecurityPair
{
/// <summary>
/// Unique pair ID.
/// </summary>
public required string PairId { get; init; }
/// <summary>
/// CVE identifier.
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Observation ID of vulnerable binary.
/// </summary>
public required string VulnerableObservationId { get; init; }
/// <summary>
/// Debug ID of vulnerable binary.
/// </summary>
public required string VulnerableDebugId { get; init; }
/// <summary>
/// Observation ID of patched binary.
/// </summary>
public required string PatchedObservationId { get; init; }
/// <summary>
/// Debug ID of patched binary.
/// </summary>
public required string PatchedDebugId { get; init; }
/// <summary>
/// Functions affected by the vulnerability.
/// </summary>
public required ImmutableArray<AffectedFunction> AffectedFunctions { get; init; }
/// <summary>
/// Functions changed in the patch.
/// </summary>
public required ImmutableArray<ChangedFunction> ChangedFunctions { get; init; }
/// <summary>
/// Distribution.
/// </summary>
public required string Distro { get; init; }
/// <summary>
/// Package name.
/// </summary>
public required string PackageName { get; init; }
/// <summary>
/// Vulnerable package version.
/// </summary>
public required string VulnerableVersion { get; init; }
/// <summary>
/// Patched package version.
/// </summary>
public required string PatchedVersion { get; init; }
/// <summary>
/// Upstream commit that fixed the vulnerability.
/// </summary>
public string? UpstreamCommit { get; init; }
/// <summary>
/// URL to the upstream patch.
/// </summary>
public string? UpstreamPatchUrl { get; init; }
/// <summary>
/// When the pair was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Who created the pair.
/// </summary>
public string? CreatedBy { get; init; }
}
/// <summary>
/// A function affected by a vulnerability.
/// </summary>
public sealed record AffectedFunction(
string Name,
ulong VulnerableAddress,
ulong PatchedAddress,
AffectedFunctionType Type,
string? Description);
/// <summary>
/// Type of affected function.
/// </summary>
public enum AffectedFunctionType
{
/// <summary>
/// Function contains vulnerable code.
/// </summary>
Vulnerable,
/// <summary>
/// Function calls vulnerable code.
/// </summary>
Caller,
/// <summary>
/// Function is an entry point to vulnerable code path.
/// </summary>
EntryPoint
}
/// <summary>
/// A function changed in the patch.
/// </summary>
public sealed record ChangedFunction(
string Name,
int VulnerableSize,
int PatchedSize,
int SizeDelta,
ChangeType ChangeType,
string? Description);
/// <summary>
/// Type of change in the patch.
/// </summary>
public enum ChangeType
{
/// <summary>
/// Function was modified.
/// </summary>
Modified,
/// <summary>
/// Function was added.
/// </summary>
Added,
/// <summary>
/// Function was removed.
/// </summary>
Removed,
/// <summary>
/// Function was renamed.
/// </summary>
Renamed
}
/// <summary>
/// Metadata for creating a security pair.
/// </summary>
public sealed record SecurityPairMetadata
{
/// <summary>
/// Functions affected by the vulnerability.
/// </summary>
public ImmutableArray<AffectedFunction> AffectedFunctions { get; init; } =
ImmutableArray<AffectedFunction>.Empty;
/// <summary>
/// Functions changed in the patch.
/// </summary>
public ImmutableArray<ChangedFunction> ChangedFunctions { get; init; } =
ImmutableArray<ChangedFunction>.Empty;
/// <summary>
/// Upstream commit.
/// </summary>
public string? UpstreamCommit { get; init; }
/// <summary>
/// Upstream patch URL.
/// </summary>
public string? UpstreamPatchUrl { get; init; }
/// <summary>
/// Creator identifier.
/// </summary>
public string? CreatedBy { get; init; }
}
/// <summary>
/// Query for security pairs.
/// </summary>
public sealed record SecurityPairQuery
{
/// <summary>
/// Filter by CVE pattern (supports wildcards).
/// </summary>
public string? CvePattern { get; init; }
/// <summary>
/// Filter by distribution.
/// </summary>
public string? Distro { get; init; }
/// <summary>
/// Filter by package name.
/// </summary>
public string? PackageName { get; init; }
/// <summary>
/// Only pairs created after this time.
/// </summary>
public DateTimeOffset? CreatedAfter { get; init; }
/// <summary>
/// Maximum results.
/// </summary>
public int Limit { get; init; } = 100;
/// <summary>
/// Offset for pagination.
/// </summary>
public int Offset { get; init; }
}
/// <summary>
/// Statistics about security pairs.
/// </summary>
public sealed record SecurityPairStats(
long TotalPairs,
long UniqueCves,
long UniquePackages,
IReadOnlyDictionary<string, long> PairsByDistro,
DateTimeOffset? OldestPair,
DateTimeOffset? NewestPair);

View File

@@ -0,0 +1,242 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Repository for symbol observations.
/// </summary>
public interface ISymbolObservationRepository
{
/// <summary>
/// Find observation by ID.
/// </summary>
/// <param name="observationId">Observation ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Observation or null.</returns>
Task<SymbolObservation?> FindByIdAsync(string observationId, CancellationToken ct = default);
/// <summary>
/// Find observations by debug ID.
/// </summary>
/// <param name="debugId">Debug ID (Build-ID, GUID, UUID).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Matching observations.</returns>
Task<ImmutableArray<SymbolObservation>> FindByDebugIdAsync(string debugId, CancellationToken ct = default);
/// <summary>
/// Find observations by package.
/// </summary>
/// <param name="distro">Distribution name.</param>
/// <param name="packageName">Package name.</param>
/// <param name="packageVersion">Package version (optional).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Matching observations.</returns>
Task<ImmutableArray<SymbolObservation>> FindByPackageAsync(
string distro,
string packageName,
string? packageVersion = null,
CancellationToken ct = default);
/// <summary>
/// Find observations by source.
/// </summary>
/// <param name="sourceId">Source ID.</param>
/// <param name="since">Only observations created after this time.</param>
/// <param name="limit">Maximum results.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Matching observations.</returns>
Task<ImmutableArray<SymbolObservation>> FindBySourceAsync(
string sourceId,
DateTimeOffset? since = null,
int limit = 100,
CancellationToken ct = default);
/// <summary>
/// Check if observation with given content hash exists.
/// </summary>
/// <param name="sourceId">Source ID.</param>
/// <param name="debugId">Debug ID.</param>
/// <param name="contentHash">Content hash.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Existing observation ID or null.</returns>
Task<string?> FindByContentHashAsync(
string sourceId,
string debugId,
string contentHash,
CancellationToken ct = default);
/// <summary>
/// Insert a new observation.
/// </summary>
/// <param name="observation">Observation to insert.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Inserted observation ID.</returns>
Task<string> InsertAsync(SymbolObservation observation, CancellationToken ct = default);
/// <summary>
/// Get observation statistics.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Statistics.</returns>
Task<SymbolObservationStats> GetStatsAsync(CancellationToken ct = default);
}
/// <summary>
/// Statistics for symbol observations.
/// </summary>
public sealed record SymbolObservationStats(
long TotalObservations,
long TotalSymbols,
long UniqueDebugIds,
IReadOnlyDictionary<string, long> ObservationsBySource,
IReadOnlyDictionary<string, long> ObservationsByDistro,
DateTimeOffset? OldestObservation,
DateTimeOffset? NewestObservation);
/// <summary>
/// Repository for raw documents.
/// </summary>
public interface ISymbolRawDocumentRepository
{
/// <summary>
/// Find document by digest.
/// </summary>
Task<SymbolRawDocument?> FindByDigestAsync(string digest, CancellationToken ct = default);
/// <summary>
/// Find document by URI.
/// </summary>
Task<SymbolRawDocument?> FindByUriAsync(string sourceId, string documentUri, CancellationToken ct = default);
/// <summary>
/// Get documents pending parse.
/// </summary>
Task<ImmutableArray<SymbolRawDocument>> GetPendingParseAsync(
string sourceId,
int limit = 100,
CancellationToken ct = default);
/// <summary>
/// Get documents pending map.
/// </summary>
Task<ImmutableArray<SymbolRawDocument>> GetPendingMapAsync(
string sourceId,
int limit = 100,
CancellationToken ct = default);
/// <summary>
/// Insert or update document.
/// </summary>
Task UpsertAsync(SymbolRawDocument document, CancellationToken ct = default);
/// <summary>
/// Update document status.
/// </summary>
Task UpdateStatusAsync(string digest, DocumentStatus status, CancellationToken ct = default);
}
/// <summary>
/// Repository for source sync state (cursors).
/// </summary>
public interface ISymbolSourceStateRepository
{
/// <summary>
/// Get or create source state.
/// </summary>
Task<SymbolSourceState> GetOrCreateAsync(string sourceId, CancellationToken ct = default);
/// <summary>
/// Update source state.
/// </summary>
Task UpdateAsync(SymbolSourceState state, CancellationToken ct = default);
/// <summary>
/// Mark source as failed with backoff.
/// </summary>
Task MarkFailedAsync(
string sourceId,
string errorMessage,
TimeSpan backoff,
CancellationToken ct = default);
}
/// <summary>
/// Sync state for a symbol source.
/// </summary>
public sealed record SymbolSourceState
{
/// <summary>
/// Source ID.
/// </summary>
public required string SourceId { get; init; }
/// <summary>
/// Whether source is enabled.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// Cursor state (source-specific).
/// </summary>
public ImmutableDictionary<string, string> Cursor { get; init; } =
ImmutableDictionary<string, string>.Empty;
/// <summary>
/// Pending document digests for parse phase.
/// </summary>
public ImmutableArray<string> PendingParse { get; init; } = ImmutableArray<string>.Empty;
/// <summary>
/// Pending document digests for map phase.
/// </summary>
public ImmutableArray<string> PendingMap { get; init; } = ImmutableArray<string>.Empty;
/// <summary>
/// Last successful sync.
/// </summary>
public DateTimeOffset? LastSuccessAt { get; init; }
/// <summary>
/// Last error message.
/// </summary>
public string? LastError { get; init; }
/// <summary>
/// Backoff until (for error recovery).
/// </summary>
public DateTimeOffset? BackoffUntil { get; init; }
/// <summary>
/// Update cursor value.
/// </summary>
public SymbolSourceState WithCursor(string key, string value) =>
this with { Cursor = Cursor.SetItem(key, value) };
/// <summary>
/// Add pending parse document.
/// </summary>
public SymbolSourceState AddPendingParse(string digest) =>
this with { PendingParse = PendingParse.Add(digest) };
/// <summary>
/// Remove pending parse document.
/// </summary>
public SymbolSourceState RemovePendingParse(string digest) =>
this with { PendingParse = PendingParse.Remove(digest) };
/// <summary>
/// Move document from parse to map phase.
/// </summary>
public SymbolSourceState MoveToPendingMap(string digest) =>
this with
{
PendingParse = PendingParse.Remove(digest),
PendingMap = PendingMap.Add(digest)
};
/// <summary>
/// Mark document as mapped (complete).
/// </summary>
public SymbolSourceState MarkMapped(string digest) =>
this with { PendingMap = PendingMap.Remove(digest) };
}

View File

@@ -0,0 +1,128 @@
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Aggregation-Only Contract (AOC) write guard for symbol observations.
/// Ensures immutable, append-only semantics following Concelier patterns.
/// </summary>
public interface ISymbolObservationWriteGuard
{
/// <summary>
/// Validate a symbol observation before persistence.
/// </summary>
/// <param name="observation">The observation to validate.</param>
/// <param name="existingContentHash">Content hash of existing observation with same key, if any.</param>
/// <returns>Write disposition indicating whether to proceed.</returns>
WriteDisposition ValidateWrite(SymbolObservation observation, string? existingContentHash);
/// <summary>
/// Ensure observation satisfies all AOC invariants.
/// Throws <see cref="GroundTruthAocGuardException"/> on violations.
/// </summary>
/// <param name="observation">The observation to validate.</param>
void EnsureValid(SymbolObservation observation);
}
/// <summary>
/// Write disposition from AOC guard.
/// </summary>
public enum WriteDisposition
{
/// <summary>
/// Proceed with insert.
/// </summary>
Proceed,
/// <summary>
/// Skip - identical observation already exists (idempotent).
/// </summary>
SkipIdentical,
/// <summary>
/// Reject - would mutate existing observation (append-only violation).
/// </summary>
RejectMutation
}
/// <summary>
/// Exception thrown when AOC invariants are violated.
/// </summary>
public sealed class GroundTruthAocGuardException : Exception
{
/// <summary>
/// Violations detected.
/// </summary>
public IReadOnlyList<AocViolation> Violations { get; }
public GroundTruthAocGuardException(IReadOnlyList<AocViolation> violations)
: base($"AOC guard violations: {string.Join(", ", violations.Select(v => v.Code))}")
{
Violations = violations;
}
public GroundTruthAocGuardException(string message, IReadOnlyList<AocViolation> violations)
: base(message)
{
Violations = violations;
}
}
/// <summary>
/// A single AOC violation.
/// </summary>
public sealed record AocViolation(
string Code,
string Message,
string? Path,
AocViolationSeverity Severity);
/// <summary>
/// Severity of AOC violation.
/// </summary>
public enum AocViolationSeverity
{
/// <summary>
/// Warning - operation may proceed but should be investigated.
/// </summary>
Warning,
/// <summary>
/// Error - operation must not proceed.
/// </summary>
Error
}
/// <summary>
/// AOC violation codes for ground-truth observations.
/// </summary>
public static class AocViolationCodes
{
/// <summary>
/// Missing mandatory provenance fields.
/// </summary>
public const string MissingProvenance = "GTAOC_001";
/// <summary>
/// Attempt to modify existing observation (append-only violation).
/// </summary>
public const string AppendOnlyViolation = "GTAOC_002";
/// <summary>
/// Derived fields present at ingest time.
/// </summary>
public const string DerivedFieldPresent = "GTAOC_003";
/// <summary>
/// Invalid content hash.
/// </summary>
public const string InvalidContentHash = "GTAOC_004";
/// <summary>
/// Missing required fields.
/// </summary>
public const string MissingRequiredField = "GTAOC_005";
/// <summary>
/// Invalid supersession chain.
/// </summary>
public const string InvalidSupersession = "GTAOC_006";
}

View File

@@ -0,0 +1,229 @@
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Connector for fetching debug symbols from external sources.
/// Follows the Concelier three-phase pipeline pattern: Fetch → Parse → Map.
/// </summary>
public interface ISymbolSourceConnector
{
/// <summary>
/// Unique identifier for this source (e.g., "debuginfod-fedora", "ddeb-ubuntu").
/// </summary>
string SourceId { get; }
/// <summary>
/// Human-readable display name.
/// </summary>
string DisplayName { get; }
/// <summary>
/// Supported Linux distributions.
/// </summary>
IReadOnlyList<string> SupportedDistros { get; }
/// <summary>
/// Phase 1: Fetch raw symbol data from upstream source.
/// Downloads raw documents (debuginfo, .ddeb, .buildinfo) and stores them.
/// </summary>
/// <param name="services">Service provider for dependency resolution.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken);
/// <summary>
/// Phase 2: Parse raw documents into normalized DTOs.
/// Validates schema, extracts symbols, creates DTO records.
/// </summary>
/// <param name="services">Service provider for dependency resolution.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken);
/// <summary>
/// Phase 3: Map DTOs to canonical symbol observations.
/// Creates immutable observations with AOC compliance.
/// </summary>
/// <param name="services">Service provider for dependency resolution.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task MapAsync(IServiceProvider services, CancellationToken cancellationToken);
}
/// <summary>
/// Plugin interface for symbol source connector registration.
/// </summary>
public interface ISymbolSourceConnectorPlugin
{
/// <summary>
/// Plugin name (same as SourceId).
/// </summary>
string Name { get; }
/// <summary>
/// Check if the connector is available with current configuration.
/// </summary>
/// <param name="services">Service provider.</param>
/// <returns>True if available.</returns>
bool IsAvailable(IServiceProvider services);
/// <summary>
/// Create connector instance.
/// </summary>
/// <param name="services">Service provider.</param>
/// <returns>Connector instance.</returns>
ISymbolSourceConnector Create(IServiceProvider services);
}
/// <summary>
/// Capability interface for symbol source connectors with rich metadata.
/// </summary>
public interface ISymbolSourceCapability
{
/// <summary>
/// Test connectivity to the symbol source.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Connectivity test result.</returns>
Task<SymbolSourceConnectivityResult> TestConnectivityAsync(CancellationToken ct = default);
/// <summary>
/// Get source metadata including last sync time and statistics.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Source metadata.</returns>
Task<SymbolSourceMetadata> GetMetadataAsync(CancellationToken ct = default);
/// <summary>
/// Fetch symbols for a specific debug ID.
/// </summary>
/// <param name="debugId">ELF Build-ID, PE GUID, or Mach-O UUID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Symbol data or null if not found.</returns>
Task<SymbolData?> FetchByDebugIdAsync(string debugId, CancellationToken ct = default);
}
/// <summary>
/// Result of connectivity test.
/// </summary>
public sealed record SymbolSourceConnectivityResult(
bool IsConnected,
TimeSpan Latency,
string? ErrorMessage,
DateTimeOffset TestedAt);
/// <summary>
/// Metadata about a symbol source.
/// </summary>
public sealed record SymbolSourceMetadata(
string SourceId,
string DisplayName,
string BaseUrl,
DateTimeOffset? LastSyncAt,
int? ObservationCount,
int? DebugIdCount,
IReadOnlyDictionary<string, string> AdditionalInfo);
/// <summary>
/// Symbol data fetched from a source.
/// </summary>
public sealed record SymbolData(
string DebugId,
string BinaryName,
string Architecture,
IReadOnlyList<SymbolEntry> Symbols,
BuildMetadata? BuildInfo,
SymbolDataProvenance Provenance);
/// <summary>
/// A single symbol entry.
/// </summary>
public sealed record SymbolEntry(
string Name,
string? DemangledName,
ulong Address,
int SizeBytes,
SymbolType Type,
SymbolBinding Binding,
string? SourceFile,
int? SourceLine);
/// <summary>
/// Symbol type.
/// </summary>
public enum SymbolType
{
Function,
Object,
Section,
File,
Common,
Tls,
Unknown
}
/// <summary>
/// Symbol binding.
/// </summary>
public enum SymbolBinding
{
Local,
Global,
Weak,
Unknown
}
/// <summary>
/// Symbol visibility.
/// </summary>
public enum SymbolVisibility
{
Default,
Internal,
Hidden,
Protected
}
/// <summary>
/// Build metadata from .buildinfo or debug sections.
/// </summary>
public sealed record BuildMetadata(
string? Compiler,
string? CompilerVersion,
string? OptimizationLevel,
IReadOnlyList<string>? BuildFlags,
string? SourceArchiveSha256,
DateTimeOffset? BuildTimestamp);
/// <summary>
/// Provenance information for symbol data.
/// </summary>
public sealed record SymbolDataProvenance(
string SourceId,
string DocumentUri,
DateTimeOffset FetchedAt,
string ContentHash,
SignatureState SignatureState,
string? SignatureDetails);
/// <summary>
/// Signature verification state.
/// </summary>
public enum SignatureState
{
/// <summary>
/// No signature present.
/// </summary>
None,
/// <summary>
/// Signature present but not verified.
/// </summary>
Unverified,
/// <summary>
/// Signature verified successfully.
/// </summary>
Verified,
/// <summary>
/// Signature verification failed.
/// </summary>
Failed
}

View File

@@ -0,0 +1,174 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions.Services;
/// <summary>
/// Implementation of security pair service for ground-truth validation.
/// </summary>
public sealed class SecurityPairService : ISecurityPairService
{
private readonly ILogger<SecurityPairService> _logger;
private readonly ISymbolObservationRepository _observationRepository;
private readonly ISecurityPairRepository _pairRepository;
public SecurityPairService(
ILogger<SecurityPairService> logger,
ISymbolObservationRepository observationRepository,
ISecurityPairRepository pairRepository)
{
_logger = logger;
_observationRepository = observationRepository;
_pairRepository = pairRepository;
}
/// <inheritdoc/>
public async Task<SecurityPair> CreatePairAsync(
string cveId,
string vulnerableObservationId,
string patchedObservationId,
SecurityPairMetadata metadata,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(cveId);
ArgumentNullException.ThrowIfNull(vulnerableObservationId);
ArgumentNullException.ThrowIfNull(patchedObservationId);
ArgumentNullException.ThrowIfNull(metadata);
_logger.LogDebug("Creating security pair for CVE {CveId}", cveId);
// Fetch observations
var vulnerableObs = await _observationRepository.FindByIdAsync(vulnerableObservationId, ct);
var patchedObs = await _observationRepository.FindByIdAsync(patchedObservationId, ct);
if (vulnerableObs is null)
{
throw new ArgumentException($"Vulnerable observation not found: {vulnerableObservationId}");
}
if (patchedObs is null)
{
throw new ArgumentException($"Patched observation not found: {patchedObservationId}");
}
// Validate observations are compatible
ValidatePairCompatibility(vulnerableObs, patchedObs);
// Create pair
var pairId = $"pair:{cveId}:{vulnerableObs.DebugId}:{patchedObs.DebugId}";
var pair = new SecurityPair
{
PairId = pairId,
CveId = cveId,
VulnerableObservationId = vulnerableObservationId,
VulnerableDebugId = vulnerableObs.DebugId,
PatchedObservationId = patchedObservationId,
PatchedDebugId = patchedObs.DebugId,
AffectedFunctions = metadata.AffectedFunctions,
ChangedFunctions = metadata.ChangedFunctions,
Distro = vulnerableObs.Distro ?? "unknown",
PackageName = vulnerableObs.PackageName ?? "unknown",
VulnerableVersion = vulnerableObs.PackageVersion ?? "unknown",
PatchedVersion = patchedObs.PackageVersion ?? "unknown",
UpstreamCommit = metadata.UpstreamCommit,
UpstreamPatchUrl = metadata.UpstreamPatchUrl,
CreatedAt = DateTimeOffset.UtcNow,
CreatedBy = metadata.CreatedBy
};
await _pairRepository.InsertAsync(pair, ct);
_logger.LogInformation("Created security pair {PairId} for CVE {CveId}", pairId, cveId);
return pair;
}
/// <inheritdoc/>
public async Task<SecurityPair?> FindByIdAsync(string pairId, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(pairId);
return await _pairRepository.GetByIdAsync(pairId, ct);
}
/// <inheritdoc/>
public async Task<ImmutableArray<SecurityPair>> FindByCveAsync(string cveId, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(cveId);
var pairs = await _pairRepository.GetByCveAsync(cveId, ct);
return [.. pairs];
}
/// <inheritdoc/>
public async Task<ImmutableArray<SecurityPair>> FindByPackageAsync(
string distro,
string packageName,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(distro);
ArgumentNullException.ThrowIfNull(packageName);
var pairs = await _pairRepository.GetByPackageAsync(distro, packageName, ct);
return [.. pairs];
}
/// <inheritdoc/>
public async Task<ImmutableArray<SecurityPair>> QueryAsync(
SecurityPairQuery query,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(query);
var pairs = await _pairRepository.QueryAsync(query, ct);
return [.. pairs];
}
/// <inheritdoc/>
public async Task<SecurityPairStats> GetStatsAsync(CancellationToken ct = default)
{
return await _pairRepository.GetStatsAsync(ct);
}
private static void ValidatePairCompatibility(SymbolObservation vulnerable, SymbolObservation patched)
{
// Architecture must match
if (!string.Equals(vulnerable.Architecture, patched.Architecture, StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException(
$"Architecture mismatch: {vulnerable.Architecture} vs {patched.Architecture}");
}
// Binary name should match (though not strictly required)
if (!string.Equals(vulnerable.BinaryName, patched.BinaryName, StringComparison.OrdinalIgnoreCase))
{
// Log warning but allow - binary names can differ between versions
}
// Distribution should match
if (!string.Equals(vulnerable.Distro, patched.Distro, StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException(
$"Distribution mismatch: {vulnerable.Distro} vs {patched.Distro}");
}
// Package name should match
if (!string.Equals(vulnerable.PackageName, patched.PackageName, StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException(
$"Package mismatch: {vulnerable.PackageName} vs {patched.PackageName}");
}
}
}
/// <summary>
/// Repository interface for security pairs (to be implemented by persistence layer).
/// </summary>
public interface ISecurityPairRepository
{
Task InsertAsync(SecurityPair pair, CancellationToken ct);
Task<SecurityPair?> GetByIdAsync(string pairId, CancellationToken ct);
Task<IReadOnlyList<SecurityPair>> GetByCveAsync(string cveId, CancellationToken ct);
Task<IReadOnlyList<SecurityPair>> GetByPackageAsync(string distro, string packageName, CancellationToken ct);
Task<IReadOnlyList<SecurityPair>> QueryAsync(SecurityPairQuery query, CancellationToken ct);
Task<SecurityPairStats> GetStatsAsync(CancellationToken ct);
}

View File

@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<Description>Abstractions for ground-truth symbol source connectors following the Concelier/Excititor AOC pattern</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,410 @@
using System.Collections.Immutable;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Immutable symbol observation following AOC (Aggregation-Only Contract) principles.
/// Once created, observations are never modified - new versions use supersession.
/// </summary>
public sealed record SymbolObservation
{
/// <summary>
/// Unique observation ID. Format: groundtruth:{source_id}:{debug_id}:{revision}
/// </summary>
[JsonPropertyName("observation_id")]
public required string ObservationId { get; init; }
/// <summary>
/// Source that provided this observation.
/// </summary>
[JsonPropertyName("source_id")]
public required string SourceId { get; init; }
/// <summary>
/// Debug ID (ELF Build-ID, PE GUID, Mach-O UUID).
/// </summary>
[JsonPropertyName("debug_id")]
public required string DebugId { get; init; }
/// <summary>
/// Code ID (secondary identifier, may differ from debug ID).
/// </summary>
[JsonPropertyName("code_id")]
public string? CodeId { get; init; }
/// <summary>
/// Binary file name.
/// </summary>
[JsonPropertyName("binary_name")]
public required string BinaryName { get; init; }
/// <summary>
/// Binary file path (if known).
/// </summary>
[JsonPropertyName("binary_path")]
public string? BinaryPath { get; init; }
/// <summary>
/// Target architecture (x86_64, aarch64, armv7, etc.).
/// </summary>
[JsonPropertyName("architecture")]
public required string Architecture { get; init; }
/// <summary>
/// Distribution name (debian, ubuntu, fedora, alpine).
/// </summary>
[JsonPropertyName("distro")]
public string? Distro { get; init; }
/// <summary>
/// Distribution version/release.
/// </summary>
[JsonPropertyName("distro_version")]
public string? DistroVersion { get; init; }
/// <summary>
/// Package name.
/// </summary>
[JsonPropertyName("package_name")]
public string? PackageName { get; init; }
/// <summary>
/// Package version.
/// </summary>
[JsonPropertyName("package_version")]
public string? PackageVersion { get; init; }
/// <summary>
/// Symbols extracted from the binary.
/// </summary>
[JsonPropertyName("symbols")]
public required ImmutableArray<ObservedSymbol> Symbols { get; init; }
/// <summary>
/// Number of symbols (denormalized for queries).
/// </summary>
[JsonPropertyName("symbol_count")]
public int SymbolCount { get; init; }
/// <summary>
/// Build metadata (compiler, flags, etc.).
/// </summary>
[JsonPropertyName("build_metadata")]
public ObservedBuildMetadata? BuildMetadata { get; init; }
/// <summary>
/// Provenance information.
/// </summary>
[JsonPropertyName("provenance")]
public required ObservationProvenance Provenance { get; init; }
/// <summary>
/// Content hash (SHA-256 of canonical JSON representation).
/// </summary>
[JsonPropertyName("content_hash")]
public required string ContentHash { get; init; }
/// <summary>
/// ID of observation this supersedes (null if first version).
/// </summary>
[JsonPropertyName("supersedes_id")]
public string? SupersedesId { get; init; }
/// <summary>
/// Timestamp when observation was created.
/// </summary>
[JsonPropertyName("created_at")]
public DateTimeOffset CreatedAt { get; init; }
}
/// <summary>
/// A symbol observed in a binary.
/// </summary>
public sealed class ObservedSymbol
{
/// <summary>
/// Symbol name (may be mangled for C++).
/// </summary>
[JsonPropertyName("name")]
public required string Name { get; init; }
/// <summary>
/// Mangled name (original C++ name if demangled differs).
/// </summary>
[JsonPropertyName("mangled_name")]
public string? MangledName { get; set; }
/// <summary>
/// Demangled name (for C++).
/// </summary>
[JsonPropertyName("demangled_name")]
public string? DemangledName { get; init; }
/// <summary>
/// Symbol address in binary.
/// </summary>
[JsonPropertyName("address")]
public ulong Address { get; init; }
/// <summary>
/// Symbol size in bytes.
/// </summary>
[JsonPropertyName("size")]
public ulong Size { get; init; }
/// <summary>
/// Symbol type (function, object, etc.).
/// </summary>
[JsonPropertyName("type")]
public SymbolType Type { get; init; }
/// <summary>
/// Symbol binding (local, global, weak).
/// </summary>
[JsonPropertyName("binding")]
public SymbolBinding Binding { get; init; }
/// <summary>
/// Symbol visibility.
/// </summary>
[JsonPropertyName("visibility")]
public SymbolVisibility Visibility { get; init; }
/// <summary>
/// Section name where symbol is defined.
/// </summary>
[JsonPropertyName("section_name")]
public string? SectionName { get; init; }
/// <summary>
/// Source file (from DWARF).
/// </summary>
[JsonPropertyName("source_file")]
public string? SourceFile { get; set; }
/// <summary>
/// Source line (from DWARF).
/// </summary>
[JsonPropertyName("source_line")]
public int? SourceLine { get; set; }
/// <summary>
/// Symbol version (for versioned symbols like GLIBC_2.17).
/// </summary>
[JsonPropertyName("version")]
public string? Version { get; init; }
}
/// <summary>
/// Build metadata for an observation.
/// </summary>
public sealed class ObservedBuildMetadata
{
/// <summary>
/// Compiler used.
/// </summary>
[JsonPropertyName("compiler")]
public string? Compiler { get; init; }
/// <summary>
/// Compiler version.
/// </summary>
[JsonPropertyName("compiler_version")]
public string? CompilerVersion { get; init; }
/// <summary>
/// Optimization level (-O0, -O1, -O2, -O3, -Os, -Oz).
/// </summary>
[JsonPropertyName("optimization_level")]
public string? OptimizationLevel { get; init; }
/// <summary>
/// Build flags.
/// </summary>
[JsonPropertyName("build_flags")]
public IReadOnlyList<string> BuildFlags { get; init; } = [];
/// <summary>
/// Compiler flags extracted from DWARF producer string.
/// </summary>
[JsonPropertyName("compiler_flags")]
public IReadOnlyList<string> CompilerFlags { get; init; } = [];
/// <summary>
/// Source language (C, C++, Rust, Go, etc.).
/// </summary>
[JsonPropertyName("source_language")]
public string? SourceLanguage { get; init; }
/// <summary>
/// Source archive SHA-256.
/// </summary>
[JsonPropertyName("source_sha256")]
public string? SourceSha256 { get; init; }
/// <summary>
/// Build timestamp.
/// </summary>
[JsonPropertyName("build_timestamp")]
public DateTimeOffset? BuildTimestamp { get; init; }
}
/// <summary>
/// Provenance information for an observation.
/// </summary>
public sealed record ObservationProvenance
{
/// <summary>
/// Source ID that provided this observation.
/// </summary>
[JsonPropertyName("source_id")]
public required string SourceId { get; init; }
/// <summary>
/// URI of the source document.
/// </summary>
[JsonPropertyName("document_uri")]
public required string DocumentUri { get; init; }
/// <summary>
/// When the document was fetched.
/// </summary>
[JsonPropertyName("fetched_at")]
public DateTimeOffset FetchedAt { get; init; }
/// <summary>
/// When the observation was recorded.
/// </summary>
[JsonPropertyName("recorded_at")]
public DateTimeOffset RecordedAt { get; init; }
/// <summary>
/// Content hash of source document.
/// </summary>
[JsonPropertyName("document_hash")]
public required string DocumentHash { get; init; }
/// <summary>
/// Signature verification state.
/// </summary>
[JsonPropertyName("signature_state")]
public SignatureState SignatureState { get; init; }
/// <summary>
/// Signature details (signer, algorithm, etc.).
/// </summary>
[JsonPropertyName("signature_details")]
public string? SignatureDetails { get; init; }
/// <summary>
/// Connector version that produced this observation.
/// </summary>
[JsonPropertyName("connector_version")]
public string? ConnectorVersion { get; init; }
}
/// <summary>
/// Raw document stored during fetch phase.
/// </summary>
public sealed record SymbolRawDocument
{
/// <summary>
/// Document digest (sha256:{hex}).
/// </summary>
[JsonPropertyName("digest")]
public required string Digest { get; init; }
/// <summary>
/// Source ID.
/// </summary>
[JsonPropertyName("source_id")]
public required string SourceId { get; init; }
/// <summary>
/// Document URI.
/// </summary>
[JsonPropertyName("document_uri")]
public required string DocumentUri { get; init; }
/// <summary>
/// When fetched.
/// </summary>
[JsonPropertyName("fetched_at")]
public DateTimeOffset FetchedAt { get; init; }
/// <summary>
/// When recorded.
/// </summary>
[JsonPropertyName("recorded_at")]
public DateTimeOffset RecordedAt { get; init; }
/// <summary>
/// Content type (application/x-elf, application/x-deb, etc.).
/// </summary>
[JsonPropertyName("content_type")]
public required string ContentType { get; init; }
/// <summary>
/// Content size in bytes.
/// </summary>
[JsonPropertyName("content_size")]
public long ContentSize { get; init; }
/// <summary>
/// ETag from HTTP response.
/// </summary>
[JsonPropertyName("etag")]
public string? ETag { get; init; }
/// <summary>
/// Processing status.
/// </summary>
[JsonPropertyName("status")]
public DocumentStatus Status { get; init; }
/// <summary>
/// Payload ID for blob storage.
/// </summary>
[JsonPropertyName("payload_id")]
public Guid? PayloadId { get; init; }
/// <summary>
/// Additional metadata.
/// </summary>
[JsonPropertyName("metadata")]
public ImmutableDictionary<string, string> Metadata { get; init; } =
ImmutableDictionary<string, string>.Empty;
}
/// <summary>
/// Document processing status.
/// </summary>
public enum DocumentStatus
{
/// <summary>
/// Document fetched, pending parse.
/// </summary>
PendingParse,
/// <summary>
/// Document parsed, pending map.
/// </summary>
PendingMap,
/// <summary>
/// Document fully mapped to observations.
/// </summary>
Mapped,
/// <summary>
/// Processing failed.
/// </summary>
Failed,
/// <summary>
/// Document quarantined for review.
/// </summary>
Quarantined
}

View File

@@ -0,0 +1,264 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Default implementation of AOC write guard for symbol observations.
/// Enforces append-only semantics and validates observation invariants.
/// </summary>
public sealed class SymbolObservationWriteGuard : ISymbolObservationWriteGuard
{
private static readonly JsonSerializerOptions CanonicalJsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
};
/// <inheritdoc/>
public WriteDisposition ValidateWrite(SymbolObservation observation, string? existingContentHash)
{
// Validate the observation first
var violations = ValidateInternal(observation);
if (violations.Count > 0 && violations.Any(v => v.Severity == AocViolationSeverity.Error))
{
throw new GroundTruthAocGuardException(violations);
}
// If no existing record, proceed with insert
if (existingContentHash is null)
{
return WriteDisposition.Proceed;
}
// Check if identical (idempotent)
if (string.Equals(observation.ContentHash, existingContentHash, StringComparison.OrdinalIgnoreCase))
{
return WriteDisposition.SkipIdentical;
}
// Different content hash with same observation ID - append-only violation
return WriteDisposition.RejectMutation;
}
/// <inheritdoc/>
public void EnsureValid(SymbolObservation observation)
{
var violations = ValidateInternal(observation);
if (violations.Count > 0)
{
throw new GroundTruthAocGuardException(violations);
}
}
private static List<AocViolation> ValidateInternal(SymbolObservation observation)
{
var violations = new List<AocViolation>();
// GTAOC_005: Validate required fields
if (string.IsNullOrWhiteSpace(observation.ObservationId))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingRequiredField,
"ObservationId is required",
"observationId",
AocViolationSeverity.Error));
}
if (string.IsNullOrWhiteSpace(observation.SourceId))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingRequiredField,
"SourceId is required",
"sourceId",
AocViolationSeverity.Error));
}
if (string.IsNullOrWhiteSpace(observation.DebugId))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingRequiredField,
"DebugId is required",
"debugId",
AocViolationSeverity.Error));
}
if (string.IsNullOrWhiteSpace(observation.BinaryName))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingRequiredField,
"BinaryName is required",
"binaryName",
AocViolationSeverity.Error));
}
if (string.IsNullOrWhiteSpace(observation.Architecture))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingRequiredField,
"Architecture is required",
"architecture",
AocViolationSeverity.Error));
}
if (string.IsNullOrWhiteSpace(observation.ContentHash))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingRequiredField,
"ContentHash is required",
"contentHash",
AocViolationSeverity.Error));
}
// GTAOC_001: Validate provenance
if (observation.Provenance is null)
{
violations.Add(new AocViolation(
AocViolationCodes.MissingProvenance,
"Provenance is required",
"provenance",
AocViolationSeverity.Error));
}
else
{
if (string.IsNullOrWhiteSpace(observation.Provenance.SourceId))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingProvenance,
"Provenance.SourceId is required",
"provenance.sourceId",
AocViolationSeverity.Error));
}
if (string.IsNullOrWhiteSpace(observation.Provenance.DocumentUri))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingProvenance,
"Provenance.DocumentUri is required",
"provenance.documentUri",
AocViolationSeverity.Error));
}
if (string.IsNullOrWhiteSpace(observation.Provenance.DocumentHash))
{
violations.Add(new AocViolation(
AocViolationCodes.MissingProvenance,
"Provenance.DocumentHash is required",
"provenance.documentHash",
AocViolationSeverity.Error));
}
if (observation.Provenance.FetchedAt == default)
{
violations.Add(new AocViolation(
AocViolationCodes.MissingProvenance,
"Provenance.FetchedAt must be set",
"provenance.fetchedAt",
AocViolationSeverity.Error));
}
}
// GTAOC_004: Validate content hash matches computed hash
if (!string.IsNullOrWhiteSpace(observation.ContentHash))
{
var computedHash = ComputeContentHash(observation);
if (!string.Equals(observation.ContentHash, computedHash, StringComparison.OrdinalIgnoreCase))
{
violations.Add(new AocViolation(
AocViolationCodes.InvalidContentHash,
$"ContentHash mismatch: expected {computedHash}, got {observation.ContentHash}",
"contentHash",
AocViolationSeverity.Error));
}
}
// GTAOC_006: Validate supersession chain
if (!string.IsNullOrWhiteSpace(observation.SupersedesId))
{
// Supersedes ID should not equal own observation ID
if (string.Equals(observation.SupersedesId, observation.ObservationId, StringComparison.OrdinalIgnoreCase))
{
violations.Add(new AocViolation(
AocViolationCodes.InvalidSupersession,
"Observation cannot supersede itself",
"supersedesId",
AocViolationSeverity.Error));
}
}
return violations;
}
/// <summary>
/// Compute the canonical content hash for an observation.
/// The hash is computed over a canonical JSON representation excluding the contentHash field itself.
/// </summary>
public static string ComputeContentHash(SymbolObservation observation)
{
// Create a hashable version excluding the content hash itself
var hashable = new
{
observation.ObservationId,
observation.SourceId,
observation.DebugId,
observation.CodeId,
observation.BinaryName,
observation.BinaryPath,
observation.Architecture,
observation.Distro,
observation.DistroVersion,
observation.PackageName,
observation.PackageVersion,
Symbols = observation.Symbols.Select(s => new
{
s.Name,
s.MangledName,
s.DemangledName,
s.Address,
s.Size,
Type = s.Type.ToString(),
Binding = s.Binding.ToString(),
Visibility = s.Visibility.ToString(),
s.SectionName,
s.SourceFile,
s.SourceLine,
s.Version
}).ToArray(),
observation.SymbolCount,
BuildMetadata = observation.BuildMetadata is not null
? new
{
observation.BuildMetadata.Compiler,
observation.BuildMetadata.CompilerVersion,
observation.BuildMetadata.OptimizationLevel,
observation.BuildMetadata.BuildFlags,
observation.BuildMetadata.CompilerFlags,
observation.BuildMetadata.SourceLanguage,
observation.BuildMetadata.SourceSha256,
observation.BuildMetadata.BuildTimestamp
}
: null,
Provenance = observation.Provenance is not null
? new
{
observation.Provenance.SourceId,
observation.Provenance.DocumentUri,
observation.Provenance.FetchedAt,
observation.Provenance.RecordedAt,
observation.Provenance.DocumentHash,
SignatureState = observation.Provenance.SignatureState.ToString(),
observation.Provenance.SignatureDetails,
observation.Provenance.ConnectorVersion
}
: null,
observation.SupersedesId,
observation.CreatedAt
};
var json = JsonSerializer.Serialize(hashable, CanonicalJsonOptions);
var hashBytes = SHA256.HashData(Encoding.UTF8.GetBytes(json));
return $"sha256:{Convert.ToHexString(hashBytes).ToLowerInvariant()}";
}
}

View File

@@ -0,0 +1,154 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Base class for symbol source connectors providing common functionality.
/// </summary>
public abstract class SymbolSourceConnectorBase : ISymbolSourceConnector
{
private static readonly JsonSerializerOptions CanonicalJsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
};
protected readonly ILogger Logger;
protected readonly TimeProvider TimeProvider;
protected SymbolSourceConnectorBase(ILogger logger, TimeProvider? timeProvider = null)
{
Logger = logger ?? throw new ArgumentNullException(nameof(logger));
TimeProvider = timeProvider ?? TimeProvider.System;
}
/// <inheritdoc/>
public abstract string SourceId { get; }
/// <inheritdoc/>
public abstract string DisplayName { get; }
/// <inheritdoc/>
public abstract IReadOnlyList<string> SupportedDistros { get; }
/// <inheritdoc/>
public abstract Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken);
/// <inheritdoc/>
public abstract Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken);
/// <inheritdoc/>
public abstract Task MapAsync(IServiceProvider services, CancellationToken cancellationToken);
/// <summary>
/// Generate a deterministic observation ID.
/// </summary>
/// <param name="debugId">Debug ID.</param>
/// <param name="revision">Revision number.</param>
/// <returns>Observation ID.</returns>
protected string GenerateObservationId(string debugId, int revision) =>
$"groundtruth:{SourceId}:{debugId}:{revision}";
/// <summary>
/// Compute content hash for an observation (deterministic).
/// </summary>
/// <param name="observation">Observation to hash.</param>
/// <returns>SHA-256 hash as hex string.</returns>
protected static string ComputeContentHash(SymbolObservation observation)
{
// Create canonical representation for hashing
var canonical = new
{
observation.SourceId,
observation.DebugId,
observation.BinaryName,
observation.Architecture,
observation.Distro,
observation.PackageName,
observation.PackageVersion,
Symbols = observation.Symbols
.OrderBy(s => s.Address)
.ThenBy(s => s.Name)
.Select(s => new { s.Name, s.Address, s.Size, s.Type })
.ToArray(),
observation.BuildMetadata
};
var json = JsonSerializer.Serialize(canonical, CanonicalJsonOptions);
var bytes = Encoding.UTF8.GetBytes(json);
var hash = SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
/// <summary>
/// Compute document digest.
/// </summary>
/// <param name="content">Content bytes.</param>
/// <returns>Digest in sha256:{hex} format.</returns>
protected static string ComputeDocumentDigest(byte[] content)
{
var hash = SHA256.HashData(content);
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
/// <summary>
/// Compute document digest from stream.
/// </summary>
/// <param name="stream">Content stream.</param>
/// <returns>Digest in sha256:{hex} format.</returns>
protected static async Task<string> ComputeDocumentDigestAsync(Stream stream)
{
var hash = await SHA256.HashDataAsync(stream);
return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}";
}
/// <summary>
/// Get current UTC time.
/// </summary>
protected DateTimeOffset UtcNow => TimeProvider.GetUtcNow();
/// <summary>
/// Log fetch operation.
/// </summary>
protected void LogFetch(string uri, string? debugId = null)
{
Logger.LogDebug(
"Fetching from {SourceId}: {Uri} (debugId={DebugId})",
SourceId, uri, debugId ?? "N/A");
}
/// <summary>
/// Log parse operation.
/// </summary>
protected void LogParse(string digest, int symbolCount)
{
Logger.LogDebug(
"Parsed document {Digest} from {SourceId}: {SymbolCount} symbols",
digest, SourceId, symbolCount);
}
/// <summary>
/// Log map operation.
/// </summary>
protected void LogMap(string observationId)
{
Logger.LogDebug(
"Mapped observation {ObservationId} from {SourceId}",
observationId, SourceId);
}
/// <summary>
/// Log error with source context.
/// </summary>
protected void LogError(Exception ex, string operation, string? context = null)
{
Logger.LogError(
ex,
"Error in {SourceId}.{Operation}: {Context}",
SourceId, operation, context ?? ex.Message);
}
}

View File

@@ -0,0 +1,314 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GroundTruth.Abstractions;
/// <summary>
/// Definition of a symbol source.
/// </summary>
public sealed record SymbolSourceDefinition
{
/// <summary>
/// Unique source identifier.
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Display name.
/// </summary>
public required string DisplayName { get; init; }
/// <summary>
/// Source category.
/// </summary>
public SymbolSourceCategory Category { get; init; }
/// <summary>
/// Source type.
/// </summary>
public SymbolSourceType Type { get; init; }
/// <summary>
/// Description.
/// </summary>
public string Description { get; init; } = "";
/// <summary>
/// Base endpoint URL.
/// </summary>
public required string BaseEndpoint { get; init; }
/// <summary>
/// Health check endpoint.
/// </summary>
public required string HealthCheckEndpoint { get; init; }
/// <summary>
/// HTTP client name for DI.
/// </summary>
public string HttpClientName { get; init; } = "";
/// <summary>
/// Whether authentication is required.
/// </summary>
public bool RequiresAuthentication { get; init; }
/// <summary>
/// Environment variable for credentials.
/// </summary>
public string? CredentialEnvVar { get; init; }
/// <summary>
/// Supported distributions.
/// </summary>
public ImmutableArray<string> SupportedDistros { get; init; } = ImmutableArray<string>.Empty;
/// <summary>
/// Supported architectures.
/// </summary>
public ImmutableArray<string> SupportedArchitectures { get; init; } = ImmutableArray<string>.Empty;
/// <summary>
/// Documentation URL.
/// </summary>
public string? DocumentationUrl { get; init; }
/// <summary>
/// Default priority (lower = higher priority).
/// </summary>
public int DefaultPriority { get; init; } = 100;
/// <summary>
/// Whether enabled by default.
/// </summary>
public bool EnabledByDefault { get; init; } = true;
/// <summary>
/// Tags for filtering.
/// </summary>
public ImmutableArray<string> Tags { get; init; } = ImmutableArray<string>.Empty;
}
/// <summary>
/// Category of symbol source.
/// </summary>
public enum SymbolSourceCategory
{
/// <summary>
/// Debug symbol server (debuginfod).
/// </summary>
DebugSymbolServer,
/// <summary>
/// Debug package repository (ddebs).
/// </summary>
DebugPackageRepo,
/// <summary>
/// Build information (buildinfo).
/// </summary>
BuildInfo,
/// <summary>
/// Security database.
/// </summary>
SecurityDb,
/// <summary>
/// Upstream source repository.
/// </summary>
UpstreamSource,
/// <summary>
/// Reproducible builds service.
/// </summary>
ReproducibleBuilds
}
/// <summary>
/// Type of symbol source.
/// </summary>
public enum SymbolSourceType
{
/// <summary>
/// Direct upstream source.
/// </summary>
Upstream,
/// <summary>
/// Stella mirror.
/// </summary>
StellaMirror,
/// <summary>
/// Local cache.
/// </summary>
LocalCache,
/// <summary>
/// Custom/user-defined.
/// </summary>
Custom
}
/// <summary>
/// Predefined symbol source definitions.
/// </summary>
public static class SymbolSourceDefinitions
{
/// <summary>
/// Fedora debuginfod service.
/// </summary>
public static readonly SymbolSourceDefinition DebuginfodFedora = new()
{
Id = "debuginfod-fedora",
DisplayName = "Fedora debuginfod",
Category = SymbolSourceCategory.DebugSymbolServer,
Type = SymbolSourceType.Upstream,
Description = "Fedora Project debuginfod service for DWARF debug symbols",
BaseEndpoint = "https://debuginfod.fedoraproject.org",
HealthCheckEndpoint = "https://debuginfod.fedoraproject.org/metrics",
HttpClientName = "DebuginfodFedora",
RequiresAuthentication = false,
SupportedDistros = ["fedora", "rhel", "centos", "rocky", "alma"],
SupportedArchitectures = ["x86_64", "aarch64", "ppc64le", "s390x", "armv7hl"],
DocumentationUrl = "https://fedoraproject.org/wiki/Debuginfod",
DefaultPriority = 10,
Tags = ["debuginfod", "fedora", "rpm", "dwarf"]
};
/// <summary>
/// Ubuntu debuginfod service.
/// </summary>
public static readonly SymbolSourceDefinition DebuginfodUbuntu = new()
{
Id = "debuginfod-ubuntu",
DisplayName = "Ubuntu debuginfod",
Category = SymbolSourceCategory.DebugSymbolServer,
Type = SymbolSourceType.Upstream,
Description = "Ubuntu debuginfod service for DWARF debug symbols",
BaseEndpoint = "https://debuginfod.ubuntu.com",
HealthCheckEndpoint = "https://debuginfod.ubuntu.com/metrics",
HttpClientName = "DebuginfodUbuntu",
RequiresAuthentication = false,
SupportedDistros = ["ubuntu"],
SupportedArchitectures = ["amd64", "arm64", "armhf", "i386"],
DocumentationUrl = "https://ubuntu.com/server/docs/service-debuginfod",
DefaultPriority = 15,
Tags = ["debuginfod", "ubuntu", "deb", "dwarf"]
};
/// <summary>
/// Ubuntu ddeb packages.
/// </summary>
public static readonly SymbolSourceDefinition DdebUbuntu = new()
{
Id = "ddeb-ubuntu",
DisplayName = "Ubuntu ddebs",
Category = SymbolSourceCategory.DebugPackageRepo,
Type = SymbolSourceType.Upstream,
Description = "Ubuntu debug symbol packages (.ddeb)",
BaseEndpoint = "http://ddebs.ubuntu.com",
HealthCheckEndpoint = "http://ddebs.ubuntu.com/dists/",
HttpClientName = "DdebUbuntu",
RequiresAuthentication = false,
SupportedDistros = ["ubuntu"],
SupportedArchitectures = ["amd64", "arm64", "armhf", "i386"],
DocumentationUrl = "https://documentation.ubuntu.com/server/explanation/debugging/debug-symbol-packages/",
DefaultPriority = 20,
Tags = ["ddeb", "ubuntu", "deb", "dwarf"]
};
/// <summary>
/// Debian buildinfo files.
/// </summary>
public static readonly SymbolSourceDefinition BuildinfoDebian = new()
{
Id = "buildinfo-debian",
DisplayName = "Debian buildinfo",
Category = SymbolSourceCategory.BuildInfo,
Type = SymbolSourceType.Upstream,
Description = "Debian .buildinfo files with build environment metadata",
BaseEndpoint = "https://buildinfos.debian.net",
HealthCheckEndpoint = "https://buildinfos.debian.net/",
HttpClientName = "BuildinfoDebian",
RequiresAuthentication = false,
SupportedDistros = ["debian"],
SupportedArchitectures = ["amd64", "arm64", "armel", "armhf", "i386", "mips64el", "ppc64el", "s390x"],
DocumentationUrl = "https://wiki.debian.org/ReproducibleBuilds/BuildinfoFiles",
DefaultPriority = 30,
Tags = ["buildinfo", "debian", "reproducible"]
};
/// <summary>
/// Debian reproducible builds service.
/// </summary>
public static readonly SymbolSourceDefinition ReproducibleDebian = new()
{
Id = "reproducible-debian",
DisplayName = "Debian Reproducible Builds",
Category = SymbolSourceCategory.ReproducibleBuilds,
Type = SymbolSourceType.Upstream,
Description = "Debian reproducible builds verification service",
BaseEndpoint = "https://reproduce.debian.net",
HealthCheckEndpoint = "https://reproduce.debian.net/api/v1/",
HttpClientName = "ReproducibleDebian",
RequiresAuthentication = false,
SupportedDistros = ["debian"],
SupportedArchitectures = ["amd64", "arm64", "i386"],
DocumentationUrl = "https://reproducible-builds.org/docs/",
DefaultPriority = 50,
EnabledByDefault = false, // Expensive operations, opt-in
Tags = ["reproducible", "debian", "rebuild"]
};
/// <summary>
/// Alpine SecDB.
/// </summary>
public static readonly SymbolSourceDefinition SecDbAlpine = new()
{
Id = "secdb-alpine",
DisplayName = "Alpine SecDB",
Category = SymbolSourceCategory.SecurityDb,
Type = SymbolSourceType.Upstream,
Description = "Alpine Linux security database with CVE-to-fix mappings",
BaseEndpoint = "https://github.com/alpinelinux/alpine-secdb",
HealthCheckEndpoint = "https://raw.githubusercontent.com/alpinelinux/alpine-secdb/master/README.md",
HttpClientName = "SecDbAlpine",
RequiresAuthentication = false,
SupportedDistros = ["alpine"],
SupportedArchitectures = ["x86_64", "aarch64", "armv7", "x86"],
DocumentationUrl = "https://github.com/alpinelinux/alpine-secdb/blob/master/README.md",
DefaultPriority = 25,
Tags = ["secdb", "alpine", "apk", "cve"]
};
/// <summary>
/// All predefined source definitions.
/// </summary>
public static readonly ImmutableArray<SymbolSourceDefinition> All = ImmutableArray.Create(
DebuginfodFedora,
DebuginfodUbuntu,
DdebUbuntu,
BuildinfoDebian,
ReproducibleDebian,
SecDbAlpine);
/// <summary>
/// Get source definition by ID.
/// </summary>
public static SymbolSourceDefinition? GetById(string sourceId) =>
All.FirstOrDefault(s => s.Id.Equals(sourceId, StringComparison.OrdinalIgnoreCase));
/// <summary>
/// Get source definitions by category.
/// </summary>
public static ImmutableArray<SymbolSourceDefinition> GetByCategory(SymbolSourceCategory category) =>
All.Where(s => s.Category == category).ToImmutableArray();
/// <summary>
/// Get source definitions supporting a distribution.
/// </summary>
public static ImmutableArray<SymbolSourceDefinition> GetByDistro(string distro) =>
All.Where(s => s.SupportedDistros.Contains(distro, StringComparer.OrdinalIgnoreCase))
.ToImmutableArray();
}

View File

@@ -0,0 +1,78 @@
# GroundTruth.Buildinfo - Agent Instructions
## Module Overview
This library implements the Debian .buildinfo file connector for fetching reproducible build metadata from buildinfos.debian.net.
## Key Components
- **BuildinfoConnector** - Main connector implementing three-phase pipeline
- **BuildinfoConnectorPlugin** - Plugin registration for DI discovery
- **BuildinfoOptions** - Configuration options
- **BuildinfoDiagnostics** - Metrics and telemetry
- **BuildinfoParser** - Parser for RFC 822 format .buildinfo files
## Configuration
```csharp
services.AddBuildinfoConnector(opts =>
{
opts.BaseUrl = new Uri("https://buildinfos.debian.net");
opts.SnapshotUrl = new Uri("https://snapshot.debian.org");
opts.Distributions = ["bookworm", "bullseye", "trixie"];
opts.Architectures = ["amd64", "arm64"];
opts.VerifySignatures = true;
});
```
## Three-Phase Pipeline
1. **Fetch**: Download .buildinfo files from buildinfos.debian.net
2. **Parse**: Parse RFC 822 format, extract checksums, dependencies, build metadata
3. **Map**: Build canonical observations for reproducible build verification
## .buildinfo File Structure
```
Format: 1.0
Source: package-name
Binary: binary1 binary2
Architecture: amd64
Version: 1.0-1
Checksums-Sha256:
abc123... 12345 binary1_1.0-1_amd64.deb
def456... 67890 binary2_1.0-1_amd64.deb
Build-Origin: debian
Build-Architecture: amd64
Build-Date: Thu, 01 Jan 2024 12:00:00 +0000
Build-Path: /build/package-1.0
Installed-Build-Depends:
gcc (= 12.2.0-14),
libc6-dev (= 2.36-9)
Environment:
"DEB_BUILD_OPTIONS=nocheck"
"LANG=C.UTF-8"
```
## snapshot.debian.org Integration
The connector can fetch exact binary versions using SHA256 hashes from the .buildinfo file:
```
https://snapshot.debian.org/file/{sha256hash}
```
This enables retrieval of the exact binary that was produced during the recorded build.
## Testing
- Unit tests for BuildinfoParser
- Integration tests require access to buildinfos.debian.net (skippable)
- Deterministic fixtures with sample .buildinfo content
## Future Work
- GPG signature verification using debian-archive-keyring
- Pagination through buildinfo index
- Cross-reference with debug symbol sources
- Reproducible build verification pipeline

View File

@@ -0,0 +1,240 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Buildinfo.Configuration;
using StellaOps.BinaryIndex.GroundTruth.Buildinfo.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.Buildinfo;
/// <summary>
/// Symbol source connector for Debian .buildinfo files.
/// Provides reproducible build metadata and exact binary checksums.
/// </summary>
public sealed class BuildinfoConnector : ISymbolSourceConnector, ISymbolSourceCapability
{
private readonly ILogger<BuildinfoConnector> _logger;
private readonly BuildinfoOptions _options;
private readonly IHttpClientFactory _httpClientFactory;
private readonly BuildinfoDiagnostics _diagnostics;
private readonly BuildinfoParser _parser;
public BuildinfoConnector(
ILogger<BuildinfoConnector> logger,
IOptions<BuildinfoOptions> options,
IHttpClientFactory httpClientFactory,
BuildinfoDiagnostics diagnostics)
{
_logger = logger;
_options = options.Value;
_httpClientFactory = httpClientFactory;
_diagnostics = diagnostics;
_parser = new BuildinfoParser();
}
/// <inheritdoc/>
public string SourceId => "buildinfo-debian";
/// <inheritdoc/>
public string DisplayName => "Debian .buildinfo (Reproducible Builds)";
/// <inheritdoc/>
public IReadOnlyList<string> SupportedDistros => ["debian"];
/// <inheritdoc/>
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting buildinfo fetch for distributions: {Distributions}",
string.Join(", ", _options.Distributions));
var client = _httpClientFactory.CreateClient(BuildinfoOptions.HttpClientName);
foreach (var distribution in _options.Distributions)
{
foreach (var architecture in _options.Architectures)
{
try
{
await FetchDistributionAsync(client, distribution, architecture, cancellationToken);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to fetch buildinfo for {Distribution}/{Architecture}",
distribution, architecture);
}
}
}
}
/// <inheritdoc/>
public Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting buildinfo parse phase");
// Parse phase processes stored raw documents
// Implementation depends on ISymbolRawDocumentRepository
// For now, log placeholder
return Task.CompletedTask;
}
/// <inheritdoc/>
public Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting buildinfo map phase");
// Map phase creates SymbolObservations from parsed buildinfo
// For buildinfo, we map build metadata rather than symbols
return Task.CompletedTask;
}
/// <inheritdoc/>
public async Task<SymbolSourceConnectivityResult> TestConnectivityAsync(CancellationToken ct = default)
{
var startTime = DateTimeOffset.UtcNow;
var sw = System.Diagnostics.Stopwatch.StartNew();
try
{
var client = _httpClientFactory.CreateClient(BuildinfoOptions.HttpClientName);
// Test connectivity to buildinfos.debian.net
using var response = await client.GetAsync("/", ct);
sw.Stop();
return new SymbolSourceConnectivityResult(
IsConnected: response.IsSuccessStatusCode,
Latency: sw.Elapsed,
ErrorMessage: response.IsSuccessStatusCode ? null : $"HTTP {response.StatusCode}",
TestedAt: startTime);
}
catch (Exception ex)
{
sw.Stop();
return new SymbolSourceConnectivityResult(
IsConnected: false,
Latency: sw.Elapsed,
ErrorMessage: ex.Message,
TestedAt: startTime);
}
}
/// <inheritdoc/>
public Task<SymbolSourceMetadata> GetMetadataAsync(CancellationToken ct = default)
{
return Task.FromResult(new SymbolSourceMetadata(
SourceId: SourceId,
DisplayName: DisplayName,
BaseUrl: _options.BaseUrl.ToString(),
LastSyncAt: null,
ObservationCount: null,
DebugIdCount: null,
AdditionalInfo: new Dictionary<string, string>
{
["distributions"] = string.Join(", ", _options.Distributions),
["architectures"] = string.Join(", ", _options.Architectures),
["verifySignatures"] = _options.VerifySignatures.ToString()
}));
}
/// <inheritdoc/>
public async Task<SymbolData?> FetchByDebugIdAsync(string debugId, CancellationToken ct = default)
{
// Buildinfo doesn't directly support debug ID lookup
// Would need to cross-reference with other sources
_logger.LogDebug("FetchByDebugId not directly supported for buildinfo; debug ID: {DebugId}", debugId);
return await Task.FromResult<SymbolData?>(null);
}
/// <summary>
/// Fetch a specific .buildinfo file by source package and version.
/// </summary>
public async Task<BuildinfoData?> FetchBuildinfoAsync(
string sourcePackage,
string version,
string architecture,
CancellationToken ct = default)
{
var client = _httpClientFactory.CreateClient(BuildinfoOptions.HttpClientName);
// URL format: /buildinfo/{source}_{version}_{arch}.buildinfo
var filename = $"{sourcePackage}_{version}_{architecture}.buildinfo";
var url = $"/buildinfo/{filename}";
try
{
_logger.LogDebug("Fetching buildinfo: {Url}", url);
var response = await client.GetAsync(url, ct);
if (!response.IsSuccessStatusCode)
{
_logger.LogDebug("Buildinfo not found: {Url} ({StatusCode})", url, response.StatusCode);
return null;
}
var content = await response.Content.ReadAsStringAsync(ct);
_diagnostics.RecordFetchSuccess();
var buildinfo = _parser.Parse(content);
_diagnostics.RecordParseSuccess(
buildinfo.InstalledBuildDepends.Count,
buildinfo.Binaries.Count);
return buildinfo;
}
catch (Exception ex)
{
_diagnostics.RecordFetchError();
_logger.LogError(ex, "Failed to fetch buildinfo: {Url}", url);
throw;
}
}
/// <summary>
/// Fetch binary package from snapshot.debian.org using exact checksum.
/// </summary>
public async Task<Stream?> FetchBinaryFromSnapshotAsync(
string sha256Hash,
CancellationToken ct = default)
{
var client = _httpClientFactory.CreateClient(BuildinfoOptions.HttpClientName);
// URL format: /file/{sha256}
var url = $"{_options.SnapshotUrl}/file/{sha256Hash}";
try
{
_logger.LogDebug("Fetching binary from snapshot: {Hash}", sha256Hash);
var response = await client.GetAsync(url, ct);
if (!response.IsSuccessStatusCode)
{
_logger.LogDebug("Binary not found in snapshot: {Hash} ({StatusCode})", sha256Hash, response.StatusCode);
return null;
}
return await response.Content.ReadAsStreamAsync(ct);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to fetch binary from snapshot: {Hash}", sha256Hash);
throw;
}
}
private async Task FetchDistributionAsync(
HttpClient client,
string distribution,
string architecture,
CancellationToken ct)
{
// buildinfos.debian.net provides an index of available buildinfo files
// The actual API structure would need to be verified
_logger.LogDebug("Fetching buildinfo index for {Distribution}/{Architecture}",
distribution, architecture);
// This is a simplified implementation
// Real implementation would paginate through available buildinfo files
await Task.CompletedTask;
}
}

View File

@@ -0,0 +1,28 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Buildinfo.Configuration;
namespace StellaOps.BinaryIndex.GroundTruth.Buildinfo;
/// <summary>
/// Plugin registration for buildinfo connector.
/// </summary>
public sealed class BuildinfoConnectorPlugin : ISymbolSourceConnectorPlugin
{
/// <inheritdoc/>
public string Name => "buildinfo-debian";
/// <inheritdoc/>
public bool IsAvailable(IServiceProvider services)
{
var options = services.GetService<IOptions<BuildinfoOptions>>();
return options?.Value?.BaseUrl is not null;
}
/// <inheritdoc/>
public ISymbolSourceConnector Create(IServiceProvider services)
{
return services.GetRequiredService<BuildinfoConnector>();
}
}

View File

@@ -0,0 +1,77 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Buildinfo.Configuration;
using StellaOps.BinaryIndex.GroundTruth.Buildinfo.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.Buildinfo;
/// <summary>
/// Extension methods for adding buildinfo connector to DI.
/// </summary>
public static class BuildinfoServiceCollectionExtensions
{
/// <summary>
/// Add the Debian buildinfo symbol source connector.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configure">Configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddBuildinfoConnector(
this IServiceCollection services,
Action<BuildinfoOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
// Register options with validation
services.AddOptions<BuildinfoOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
// Register HTTP client
services.AddHttpClient(BuildinfoOptions.HttpClientName, (sp, client) =>
{
var options = sp.GetRequiredService<IOptions<BuildinfoOptions>>().Value;
client.BaseAddress = options.BaseUrl;
client.Timeout = TimeSpan.FromSeconds(options.TimeoutSeconds);
client.DefaultRequestHeaders.Add("User-Agent", options.UserAgent);
});
// Register services
services.AddSingleton<BuildinfoDiagnostics>();
services.AddTransient<BuildinfoConnector>();
services.AddSingleton<ISymbolSourceConnectorPlugin, BuildinfoConnectorPlugin>();
return services;
}
/// <summary>
/// Add the Debian buildinfo connector with default configuration.
/// </summary>
/// <param name="services">Service collection.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddBuildinfoConnector(this IServiceCollection services)
{
return services.AddBuildinfoConnector(_ => { });
}
/// <summary>
/// Add the buildinfo connector with specific distributions.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="distributions">Debian distributions to fetch from (e.g., "bookworm", "bullseye").</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddBuildinfoConnector(
this IServiceCollection services,
params string[] distributions)
{
return services.AddBuildinfoConnector(opts =>
{
if (distributions.Length > 0)
{
opts.Distributions = [.. distributions];
}
});
}
}

View File

@@ -0,0 +1,95 @@
namespace StellaOps.BinaryIndex.GroundTruth.Buildinfo.Configuration;
/// <summary>
/// Configuration options for the Debian .buildinfo connector.
/// </summary>
public sealed class BuildinfoOptions
{
/// <summary>
/// HTTP client name for DI.
/// </summary>
public const string HttpClientName = "GroundTruth.Buildinfo";
/// <summary>
/// Base URL for buildinfos.debian.net.
/// Default: https://buildinfos.debian.net
/// </summary>
public Uri BaseUrl { get; set; } = new("https://buildinfos.debian.net");
/// <summary>
/// Base URL for snapshot.debian.org for fetching exact binary versions.
/// Default: https://snapshot.debian.org
/// </summary>
public Uri SnapshotUrl { get; set; } = new("https://snapshot.debian.org");
/// <summary>
/// Debian distributions to fetch buildinfo for.
/// Default: ["bookworm", "bullseye", "trixie"]
/// </summary>
public List<string> Distributions { get; set; } = ["bookworm", "bullseye", "trixie"];
/// <summary>
/// Architectures to process.
/// Default: ["amd64", "arm64"]
/// </summary>
public List<string> Architectures { get; set; } = ["amd64", "arm64"];
/// <summary>
/// Request timeout in seconds.
/// Default: 60
/// </summary>
public int TimeoutSeconds { get; set; } = 60;
/// <summary>
/// User-Agent header for HTTP requests.
/// </summary>
public string UserAgent { get; set; } = "StellaOps-GroundTruth/1.0 (buildinfo-connector)";
/// <summary>
/// Whether to verify GPG signatures on .buildinfo files.
/// Default: true
/// </summary>
public bool VerifySignatures { get; set; } = true;
/// <summary>
/// Path to GPG keyring for signature verification.
/// If null, uses default Debian archive keyring.
/// </summary>
public string? GpgKeyringPath { get; set; }
/// <summary>
/// Maximum number of concurrent downloads.
/// Default: 4
/// </summary>
public int MaxConcurrentDownloads { get; set; } = 4;
/// <summary>
/// Cache directory for downloaded buildinfo files.
/// Default: null (no caching)
/// </summary>
public string? CacheDirectory { get; set; }
/// <summary>
/// Validate configuration.
/// </summary>
public void Validate()
{
if (BaseUrl is null)
throw new InvalidOperationException("BaseUrl is required");
if (SnapshotUrl is null)
throw new InvalidOperationException("SnapshotUrl is required");
if (Distributions is null || Distributions.Count == 0)
throw new InvalidOperationException("At least one distribution is required");
if (Architectures is null || Architectures.Count == 0)
throw new InvalidOperationException("At least one architecture is required");
if (TimeoutSeconds <= 0)
throw new InvalidOperationException("TimeoutSeconds must be positive");
if (MaxConcurrentDownloads <= 0)
throw new InvalidOperationException("MaxConcurrentDownloads must be positive");
}
}

View File

@@ -0,0 +1,91 @@
using System.Diagnostics.Metrics;
namespace StellaOps.BinaryIndex.GroundTruth.Buildinfo.Internal;
/// <summary>
/// Diagnostics and metrics for the buildinfo connector.
/// </summary>
public sealed class BuildinfoDiagnostics
{
private readonly Counter<long> _fetchSuccessCounter;
private readonly Counter<long> _fetchErrorCounter;
private readonly Counter<long> _parseSuccessCounter;
private readonly Counter<long> _parseErrorCounter;
private readonly Counter<long> _signatureVerifiedCounter;
private readonly Counter<long> _signatureFailedCounter;
private readonly Counter<long> _mapSuccessCounter;
private readonly Counter<long> _mapErrorCounter;
private readonly Histogram<long> _dependencyCountHistogram;
private readonly Histogram<long> _binaryCountHistogram;
public BuildinfoDiagnostics(IMeterFactory meterFactory)
{
var meter = meterFactory.Create("StellaOps.BinaryIndex.GroundTruth.Buildinfo");
_fetchSuccessCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.fetch.success",
unit: "{files}",
description: "Number of successful buildinfo file fetches");
_fetchErrorCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.fetch.error",
unit: "{files}",
description: "Number of failed buildinfo file fetches");
_parseSuccessCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.parse.success",
unit: "{files}",
description: "Number of successful buildinfo file parses");
_parseErrorCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.parse.error",
unit: "{files}",
description: "Number of failed buildinfo file parses");
_signatureVerifiedCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.signature.verified",
unit: "{files}",
description: "Number of buildinfo files with verified signatures");
_signatureFailedCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.signature.failed",
unit: "{files}",
description: "Number of buildinfo files with failed signature verification");
_mapSuccessCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.map.success",
unit: "{observations}",
description: "Number of successful observation mappings");
_mapErrorCounter = meter.CreateCounter<long>(
"groundtruth.buildinfo.map.error",
unit: "{observations}",
description: "Number of failed observation mappings");
_dependencyCountHistogram = meter.CreateHistogram<long>(
"groundtruth.buildinfo.dependencies_per_package",
unit: "{dependencies}",
description: "Distribution of build dependency counts per package");
_binaryCountHistogram = meter.CreateHistogram<long>(
"groundtruth.buildinfo.binaries_per_source",
unit: "{binaries}",
description: "Distribution of binary package counts per source package");
}
public void RecordFetchSuccess() => _fetchSuccessCounter.Add(1);
public void RecordFetchError() => _fetchErrorCounter.Add(1);
public void RecordParseSuccess(int dependencyCount, int binaryCount)
{
_parseSuccessCounter.Add(1);
_dependencyCountHistogram.Record(dependencyCount);
_binaryCountHistogram.Record(binaryCount);
}
public void RecordParseError() => _parseErrorCounter.Add(1);
public void RecordSignatureVerified() => _signatureVerifiedCounter.Add(1);
public void RecordSignatureFailed() => _signatureFailedCounter.Add(1);
public void RecordMapSuccess() => _mapSuccessCounter.Add(1);
public void RecordMapError() => _mapErrorCounter.Add(1);
}

View File

@@ -0,0 +1,382 @@
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.GroundTruth.Buildinfo.Internal;
/// <summary>
/// Parser for Debian .buildinfo files (RFC 822 format).
/// </summary>
public sealed partial class BuildinfoParser
{
/// <summary>
/// Parse a .buildinfo file content.
/// </summary>
/// <param name="content">Raw .buildinfo file content (may be clearsigned).</param>
/// <returns>Parsed buildinfo data.</returns>
public BuildinfoData Parse(string content)
{
ArgumentNullException.ThrowIfNull(content);
// Strip clearsign wrapper if present
var (stripped, isSigned) = StripClearsign(content);
var fields = ParseFields(stripped);
// Extract required fields
if (!fields.TryGetValue("Source", out var source))
throw new FormatException("Missing required field: Source");
if (!fields.TryGetValue("Version", out var version))
throw new FormatException("Missing required field: Version");
// Parse binary packages
var binaries = new List<string>();
if (fields.TryGetValue("Binary", out var binaryField))
{
binaries.AddRange(binaryField.Split([' ', '\n'], StringSplitOptions.RemoveEmptyEntries));
}
// Parse checksums
var checksums = new List<BuildinfoChecksum>();
if (fields.TryGetValue("Checksums-Sha256", out var sha256Field))
{
checksums.AddRange(ParseChecksums(sha256Field, "sha256"));
}
// Parse installed build dependencies
var buildDepends = new List<BuildinfoDependency>();
if (fields.TryGetValue("Installed-Build-Depends", out var depsField))
{
buildDepends.AddRange(ParseDependencies(depsField));
}
// Parse environment variables
var environment = new Dictionary<string, string>();
if (fields.TryGetValue("Environment", out var envField))
{
foreach (var line in envField.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
var trimmed = line.Trim();
if (trimmed.StartsWith('"') && trimmed.EndsWith('"'))
{
trimmed = trimmed[1..^1];
}
var eqIndex = trimmed.IndexOf('=');
if (eqIndex > 0)
{
var key = trimmed[..eqIndex];
var value = trimmed[(eqIndex + 1)..];
// Remove quotes from value
if (value.StartsWith('"') && value.EndsWith('"'))
{
value = value[1..^1];
}
environment[key] = value;
}
}
}
return new BuildinfoData
{
Source = source,
Version = version,
Format = fields.GetValueOrDefault("Format"),
Architecture = fields.GetValueOrDefault("Architecture"),
Binaries = binaries,
BuildOrigin = fields.GetValueOrDefault("Build-Origin"),
BuildArchitecture = fields.GetValueOrDefault("Build-Architecture"),
BuildDate = ParseBuildDate(fields.GetValueOrDefault("Build-Date")),
BuildPath = fields.GetValueOrDefault("Build-Path"),
Checksums = checksums,
InstalledBuildDepends = buildDepends,
Environment = environment,
IsSigned = isSigned
};
}
private static (string content, bool isSigned) StripClearsign(string content)
{
// Check for PGP clearsign markers
const string beginSigned = "-----BEGIN PGP SIGNED MESSAGE-----";
const string beginSignature = "-----BEGIN PGP SIGNATURE-----";
// Note: endSignature not needed as we strip from beginSignature onwards
if (!content.Contains(beginSigned))
{
return (content, false);
}
// Find start of actual content (after Hash: header and blank line)
var signedStart = content.IndexOf(beginSigned, StringComparison.Ordinal);
var contentStart = content.IndexOf("\n\n", signedStart, StringComparison.Ordinal);
if (contentStart < 0)
{
contentStart = content.IndexOf("\r\n\r\n", signedStart, StringComparison.Ordinal);
}
if (contentStart < 0)
{
return (content, true); // Malformed but signed
}
contentStart += 2; // Skip the blank line
// Find end of content (before signature)
var signatureStart = content.IndexOf(beginSignature, StringComparison.Ordinal);
if (signatureStart < 0)
{
return (content[contentStart..], true);
}
var stripped = content[contentStart..signatureStart].Trim();
// Unescape dash-escaped lines (lines starting with "- ")
stripped = DashEscapeRegex().Replace(stripped, "$1");
return (stripped, true);
}
private static Dictionary<string, string> ParseFields(string content)
{
var fields = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
string? currentKey = null;
var currentValue = new List<string>();
foreach (var line in content.Split('\n'))
{
var trimmedLine = line.TrimEnd('\r');
// Continuation line (starts with space or tab)
if (trimmedLine.Length > 0 && (trimmedLine[0] == ' ' || trimmedLine[0] == '\t'))
{
if (currentKey is not null)
{
currentValue.Add(trimmedLine.TrimStart());
}
continue;
}
// Save previous field
if (currentKey is not null)
{
fields[currentKey] = string.Join("\n", currentValue);
}
// Empty line - reset
if (string.IsNullOrWhiteSpace(trimmedLine))
{
currentKey = null;
currentValue.Clear();
continue;
}
// Parse new field
var colonIndex = trimmedLine.IndexOf(':');
if (colonIndex > 0)
{
currentKey = trimmedLine[..colonIndex].Trim();
var value = trimmedLine[(colonIndex + 1)..].Trim();
currentValue = [value];
}
}
// Save last field
if (currentKey is not null)
{
fields[currentKey] = string.Join("\n", currentValue);
}
return fields;
}
private static IEnumerable<BuildinfoChecksum> ParseChecksums(string field, string algorithm)
{
foreach (var line in field.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
var parts = line.Trim().Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (parts.Length >= 3)
{
if (long.TryParse(parts[1], out var size))
{
yield return new BuildinfoChecksum
{
Algorithm = algorithm,
Hash = parts[0],
Size = size,
Filename = parts[2]
};
}
}
}
}
private static IEnumerable<BuildinfoDependency> ParseDependencies(string field)
{
// Format: package (= version) or package (>= version)
var depRegex = DependencyRegex();
foreach (var line in field.Split([',', '\n'], StringSplitOptions.RemoveEmptyEntries))
{
var trimmed = line.Trim();
if (string.IsNullOrWhiteSpace(trimmed))
continue;
var match = depRegex.Match(trimmed);
if (match.Success)
{
yield return new BuildinfoDependency
{
Package = match.Groups["pkg"].Value,
Version = match.Groups["ver"].Success ? match.Groups["ver"].Value : null,
Architecture = match.Groups["arch"].Success ? match.Groups["arch"].Value : null
};
}
else
{
// Simple package name without version
yield return new BuildinfoDependency
{
Package = trimmed.Split(':')[0].Trim()
};
}
}
}
private static DateTimeOffset? ParseBuildDate(string? dateStr)
{
if (string.IsNullOrWhiteSpace(dateStr))
return null;
// RFC 2822 format: "Thu, 01 Jan 2024 12:00:00 +0000"
if (DateTimeOffset.TryParse(dateStr, out var result))
{
return result;
}
return null;
}
[GeneratedRegex(@"^- (.*)$", RegexOptions.Multiline)]
private static partial Regex DashEscapeRegex();
[GeneratedRegex(@"^(?<pkg>[\w\d\-\.+]+)(?::(?<arch>\w+))?\s*(?:\((?<op>[<>=]+)\s*(?<ver>[^\)]+)\))?")]
private static partial Regex DependencyRegex();
}
/// <summary>
/// Parsed data from a .buildinfo file.
/// </summary>
public sealed record BuildinfoData
{
/// <summary>
/// Source package name.
/// </summary>
public required string Source { get; init; }
/// <summary>
/// Package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Buildinfo format version.
/// </summary>
public string? Format { get; init; }
/// <summary>
/// Target architecture(s).
/// </summary>
public string? Architecture { get; init; }
/// <summary>
/// Binary packages produced.
/// </summary>
public required IReadOnlyList<string> Binaries { get; init; }
/// <summary>
/// Build origin (e.g., "debian").
/// </summary>
public string? BuildOrigin { get; init; }
/// <summary>
/// Architecture the build was performed on.
/// </summary>
public string? BuildArchitecture { get; init; }
/// <summary>
/// Build timestamp.
/// </summary>
public DateTimeOffset? BuildDate { get; init; }
/// <summary>
/// Build path on the build machine.
/// </summary>
public string? BuildPath { get; init; }
/// <summary>
/// Checksums of produced files.
/// </summary>
public required IReadOnlyList<BuildinfoChecksum> Checksums { get; init; }
/// <summary>
/// Build dependencies that were installed.
/// </summary>
public required IReadOnlyList<BuildinfoDependency> InstalledBuildDepends { get; init; }
/// <summary>
/// Environment variables during build.
/// </summary>
public required IReadOnlyDictionary<string, string> Environment { get; init; }
/// <summary>
/// Whether the file was GPG signed.
/// </summary>
public bool IsSigned { get; init; }
}
/// <summary>
/// A checksum entry from a .buildinfo file.
/// </summary>
public sealed record BuildinfoChecksum
{
/// <summary>
/// Hash algorithm (sha256, sha1, md5).
/// </summary>
public required string Algorithm { get; init; }
/// <summary>
/// Hash value.
/// </summary>
public required string Hash { get; init; }
/// <summary>
/// File size in bytes.
/// </summary>
public required long Size { get; init; }
/// <summary>
/// Filename.
/// </summary>
public required string Filename { get; init; }
}
/// <summary>
/// A build dependency from a .buildinfo file.
/// </summary>
public sealed record BuildinfoDependency
{
/// <summary>
/// Package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Exact version (if specified).
/// </summary>
public string? Version { get; init; }
/// <summary>
/// Architecture qualifier (if specified).
/// </summary>
public string? Architecture { get; init; }
}

View File

@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<Description>Debian .buildinfo file connector for ground-truth corpus - provides reproducible build metadata</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Microsoft.Extensions.Http" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.GroundTruth.Abstractions\StellaOps.BinaryIndex.GroundTruth.Abstractions.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,75 @@
# GroundTruth.Ddeb - Agent Instructions
## Module Overview
This library implements the Ubuntu ddeb debug symbol package connector for fetching debug symbols from Ubuntu's ddebs repository.
## Key Components
- **DdebConnector** - Main connector implementing three-phase pipeline
- **DdebConnectorPlugin** - Plugin registration for DI discovery
- **DdebOptions** - Configuration options
- **DdebDiagnostics** - Metrics and telemetry
- **PackagesIndexParser** - Parser for Debian Packages index files
- **IDebPackageExtractor** - Interface for .ddeb package extraction
## Configuration
```csharp
services.AddDdebConnector(opts =>
{
opts.MirrorUrl = new Uri("http://ddebs.ubuntu.com");
opts.Distributions = ["focal", "jammy", "noble"];
opts.Components = ["main", "universe"];
opts.Architectures = ["amd64", "arm64"];
});
```
## Three-Phase Pipeline
1. **Fetch**: Download Packages.gz index, identify dbgsym packages, fetch .ddeb files
2. **Parse**: Extract .ddeb archive (ar + tar.zst), parse DWARF from debug binaries
3. **Map**: Build canonical SymbolObservation for each binary with AOC compliance
## Ubuntu Ddeb Repository Structure
```
http://ddebs.ubuntu.com/
├── dists/
│ └── {dist}/ # focal, jammy, noble
│ └── {component}/ # main, universe
│ └── debug/
│ └── binary-{arch}/
│ └── Packages.gz
└── pool/
└── main/
└── {first-letter}/
└── {source-pkg}/
└── {pkg}-dbgsym_{version}_{arch}.ddeb
```
## .ddeb Package Structure
```
package-dbgsym.ddeb (ar archive)
├── debian-binary
├── control.tar.xz
└── data.tar.zst
└── usr/lib/debug/
└── .build-id/
└── {first-2-hex}/
└── {rest-of-build-id}.debug
```
## Testing
- Unit tests for PackagesIndexParser
- Integration tests require access to ddebs.ubuntu.com (skippable)
- Deterministic fixtures with sample Packages index
## Future Work
- Implement real IDebPackageExtractor using ar/tar extraction
- DWARF symbol parsing from debug binaries
- Build-id to binary package correlation
- GPG signature verification

View File

@@ -0,0 +1,104 @@
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb.Configuration;
/// <summary>
/// Configuration options for the Ubuntu ddeb connector.
/// </summary>
public sealed class DdebOptions
{
/// <summary>
/// Section name for configuration binding.
/// </summary>
public const string SectionName = "GroundTruth:Ddeb";
/// <summary>
/// HTTP client name for DI.
/// </summary>
public const string HttpClientName = "ddeb-ubuntu";
/// <summary>
/// Base URL for the ddeb repository.
/// </summary>
public Uri MirrorUrl { get; set; } = new("http://ddebs.ubuntu.com");
/// <summary>
/// Ubuntu distributions to fetch from.
/// </summary>
public List<string> Distributions { get; set; } =
[
"focal", // 20.04 LTS
"jammy", // 22.04 LTS
"noble" // 24.04 LTS
];
/// <summary>
/// Repository components.
/// </summary>
public List<string> Components { get; set; } =
[
"main",
"universe"
];
/// <summary>
/// Architectures to fetch.
/// </summary>
public List<string> Architectures { get; set; } =
[
"amd64",
"arm64"
];
/// <summary>
/// Request timeout in seconds.
/// </summary>
public int TimeoutSeconds { get; set; } = 60;
/// <summary>
/// Maximum concurrent downloads.
/// </summary>
public int MaxConcurrentDownloads { get; set; } = 4;
/// <summary>
/// Local cache directory for downloaded packages.
/// </summary>
public string? CacheDirectory { get; set; }
/// <summary>
/// Maximum cache size in megabytes.
/// </summary>
public int MaxCacheSizeMb { get; set; } = 2048;
/// <summary>
/// User agent string.
/// </summary>
public string UserAgent { get; set; } = "StellaOps.GroundTruth.Ddeb/1.0";
/// <summary>
/// Maximum packages to process per sync.
/// </summary>
public int MaxPackagesPerSync { get; set; } = 100;
/// <summary>
/// Validate options.
/// </summary>
public void Validate()
{
if (MirrorUrl is null)
throw new InvalidOperationException("Ddeb mirror URL must be configured.");
if (!MirrorUrl.IsAbsoluteUri)
throw new InvalidOperationException("Ddeb mirror URL must be an absolute URI.");
if (Distributions.Count == 0)
throw new InvalidOperationException("At least one distribution must be configured.");
if (Components.Count == 0)
throw new InvalidOperationException("At least one component must be configured.");
if (Architectures.Count == 0)
throw new InvalidOperationException("At least one architecture must be configured.");
if (TimeoutSeconds <= 0)
throw new InvalidOperationException("Timeout must be positive.");
}
}

View File

@@ -0,0 +1,527 @@
using System.Collections.Immutable;
using System.IO.Compression;
using System.Net;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Ddeb.Configuration;
using StellaOps.BinaryIndex.GroundTruth.Ddeb.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb;
/// <summary>
/// Ubuntu ddeb debug symbol package connector.
/// Fetches .ddeb packages containing DWARF debug symbols.
/// </summary>
public sealed class DdebConnector : SymbolSourceConnectorBase, ISymbolSourceCapability
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly ISymbolRawDocumentRepository _documentRepository;
private readonly ISymbolObservationRepository _observationRepository;
private readonly ISymbolSourceStateRepository _stateRepository;
private readonly ISymbolObservationWriteGuard _writeGuard;
private readonly DdebOptions _options;
private readonly DdebDiagnostics _diagnostics;
/// <summary>
/// Source ID for this connector.
/// </summary>
public const string SourceName = "ddeb-ubuntu";
public DdebConnector(
IHttpClientFactory httpClientFactory,
ISymbolRawDocumentRepository documentRepository,
ISymbolObservationRepository observationRepository,
ISymbolSourceStateRepository stateRepository,
ISymbolObservationWriteGuard writeGuard,
IOptions<DdebOptions> options,
DdebDiagnostics diagnostics,
ILogger<DdebConnector> logger,
TimeProvider? timeProvider = null)
: base(logger, timeProvider)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_documentRepository = documentRepository ?? throw new ArgumentNullException(nameof(documentRepository));
_observationRepository = observationRepository ?? throw new ArgumentNullException(nameof(observationRepository));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_writeGuard = writeGuard ?? throw new ArgumentNullException(nameof(writeGuard));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
}
/// <inheritdoc/>
public override string SourceId => SourceName;
/// <inheritdoc/>
public override string DisplayName => "Ubuntu ddebs";
/// <inheritdoc/>
public override IReadOnlyList<string> SupportedDistros => ["ubuntu"];
/// <inheritdoc/>
public override async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
// Check backoff
if (state.BackoffUntil.HasValue && state.BackoffUntil.Value > UtcNow)
{
Logger.LogInformation(
"Ddeb fetch skipped due to backoff until {BackoffUntil}",
state.BackoffUntil.Value);
return;
}
var httpClient = _httpClientFactory.CreateClient(DdebOptions.HttpClientName);
var fetchedCount = 0;
var errorCount = 0;
foreach (var distribution in _options.Distributions)
{
foreach (var component in _options.Components)
{
foreach (var architecture in _options.Architectures)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var packagesIndexed = await FetchPackagesIndexAsync(
httpClient,
distribution,
component,
architecture,
state,
cancellationToken);
fetchedCount += packagesIndexed;
}
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
{
Logger.LogDebug(
"Packages index not found for {Distro}/{Component}/{Arch}",
distribution, component, architecture);
}
catch (Exception ex)
{
LogError(ex, "Fetch", $"Failed to fetch index for {distribution}/{component}/{architecture}");
errorCount++;
_diagnostics.RecordFetchError();
}
}
}
}
state = state with { LastSuccessAt = UtcNow };
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation(
"Ddeb fetch completed: {FetchedCount} packages indexed, {ErrorCount} errors",
fetchedCount, errorCount);
}
/// <inheritdoc/>
public override async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
if (state.PendingParse.Length == 0)
{
Logger.LogDebug("No documents pending parse for ddeb");
return;
}
var debExtractor = services.GetRequiredService<IDebPackageExtractor>();
var parsedCount = 0;
foreach (var digest in state.PendingParse)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
if (document is null)
{
Logger.LogWarning("Document {Digest} not found for parse", digest);
state = state.RemovePendingParse(digest);
continue;
}
try
{
// Extract .ddeb package
var extractionResult = await debExtractor.ExtractAsync(
document.PayloadId!.Value,
cancellationToken);
LogParse(digest, extractionResult.SymbolCount);
// Update document status and move to map phase
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.PendingMap, cancellationToken);
state = state.MoveToPendingMap(digest);
parsedCount++;
_diagnostics.RecordParseSuccess(extractionResult.SymbolCount);
}
catch (Exception ex)
{
LogError(ex, "Parse", $"Failed to parse document {digest}");
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
state = state.RemovePendingParse(digest);
_diagnostics.RecordParseError();
}
}
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation("Ddeb parse completed: {ParsedCount} packages parsed", parsedCount);
}
/// <inheritdoc/>
public override async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
if (state.PendingMap.Length == 0)
{
Logger.LogDebug("No documents pending map for ddeb");
return;
}
var debExtractor = services.GetRequiredService<IDebPackageExtractor>();
var mappedCount = 0;
foreach (var digest in state.PendingMap)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
if (document is null)
{
Logger.LogWarning("Document {Digest} not found for map", digest);
state = state.MarkMapped(digest);
continue;
}
try
{
// Extract symbols from stored payload
var extractionResult = await debExtractor.ExtractAsync(
document.PayloadId!.Value,
cancellationToken);
// Build observations for each debug binary in the package
foreach (var binary in extractionResult.Binaries)
{
var observation = BuildObservation(document, binary);
// Validate against AOC
_writeGuard.EnsureValid(observation);
// Check for existing observation
var existingId = await _observationRepository.FindByContentHashAsync(
SourceId,
observation.DebugId,
observation.ContentHash,
cancellationToken);
if (existingId is not null)
{
Logger.LogDebug(
"Observation already exists with hash {Hash}, skipping",
observation.ContentHash);
}
else
{
await _observationRepository.InsertAsync(observation, cancellationToken);
LogMap(observation.ObservationId);
_diagnostics.RecordMapSuccess(binary.Symbols.Count);
}
}
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Mapped, cancellationToken);
state = state.MarkMapped(digest);
mappedCount++;
}
catch (GroundTruthAocGuardException ex)
{
Logger.LogError(
"AOC violation mapping document {Digest}: {Violations}",
digest,
string.Join(", ", ex.Violations.Select(v => v.Code)));
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Quarantined, cancellationToken);
state = state.MarkMapped(digest);
_diagnostics.RecordMapAocViolation();
}
catch (Exception ex)
{
LogError(ex, "Map", $"Failed to map document {digest}");
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
state = state.MarkMapped(digest);
_diagnostics.RecordMapError();
}
}
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation("Ddeb map completed: {MappedCount} packages mapped", mappedCount);
}
/// <inheritdoc/>
public async Task<SymbolSourceConnectivityResult> TestConnectivityAsync(CancellationToken ct = default)
{
var startTime = UtcNow;
try
{
var httpClient = _httpClientFactory.CreateClient(DdebOptions.HttpClientName);
var testUrl = $"/dists/{_options.Distributions[0]}/Release";
var response = await httpClient.GetAsync(testUrl, ct);
response.EnsureSuccessStatusCode();
var latency = UtcNow - startTime;
return new SymbolSourceConnectivityResult(
IsConnected: true,
Latency: latency,
ErrorMessage: null,
TestedAt: UtcNow);
}
catch (Exception ex)
{
var latency = UtcNow - startTime;
return new SymbolSourceConnectivityResult(
IsConnected: false,
Latency: latency,
ErrorMessage: ex.Message,
TestedAt: UtcNow);
}
}
/// <inheritdoc/>
public async Task<SymbolSourceMetadata> GetMetadataAsync(CancellationToken ct = default)
{
var stats = await _observationRepository.GetStatsAsync(ct);
return new SymbolSourceMetadata(
SourceId: SourceId,
DisplayName: DisplayName,
BaseUrl: _options.MirrorUrl.ToString(),
LastSyncAt: stats.NewestObservation,
ObservationCount: (int)stats.TotalObservations,
DebugIdCount: (int)stats.UniqueDebugIds,
AdditionalInfo: new Dictionary<string, string>
{
["distributions"] = string.Join(",", _options.Distributions),
["total_symbols"] = stats.TotalSymbols.ToString()
});
}
/// <inheritdoc/>
public async Task<SymbolData?> FetchByDebugIdAsync(string debugId, CancellationToken ct = default)
{
// Ddeb doesn't support direct debug ID lookup
// Symbols must be fetched via package index
var observations = await _observationRepository.FindByDebugIdAsync(debugId, ct);
var observation = observations.FirstOrDefault();
if (observation is null)
return null;
return new SymbolData(
DebugId: debugId,
BinaryName: observation.BinaryName,
Architecture: observation.Architecture,
Symbols: observation.Symbols.Select(s => new SymbolEntry(
Name: s.Name,
DemangledName: s.DemangledName,
Address: s.Address,
SizeBytes: (int)Math.Min(s.Size, int.MaxValue),
Type: s.Type,
Binding: s.Binding,
SourceFile: s.SourceFile,
SourceLine: s.SourceLine)).ToList(),
BuildInfo: observation.BuildMetadata is not null
? new BuildMetadata(
Compiler: observation.BuildMetadata.Compiler,
CompilerVersion: observation.BuildMetadata.CompilerVersion,
OptimizationLevel: observation.BuildMetadata.OptimizationLevel,
BuildFlags: observation.BuildMetadata.BuildFlags.ToList(),
SourceArchiveSha256: observation.BuildMetadata.SourceSha256,
BuildTimestamp: observation.BuildMetadata.BuildTimestamp)
: null,
Provenance: new SymbolDataProvenance(
SourceId: SourceId,
DocumentUri: observation.Provenance.DocumentUri,
FetchedAt: observation.Provenance.FetchedAt,
ContentHash: observation.ContentHash,
SignatureState: observation.Provenance.SignatureState,
SignatureDetails: observation.Provenance.SignatureDetails));
}
private async Task<int> FetchPackagesIndexAsync(
HttpClient httpClient,
string distribution,
string component,
string architecture,
SymbolSourceState state,
CancellationToken ct)
{
// Fetch Packages.gz index
// URL pattern: /dists/{dist}/{component}/debug/binary-{arch}/Packages.gz
var indexUrl = $"/dists/{distribution}/{component}/debug/binary-{architecture}/Packages.gz";
LogFetch(indexUrl);
var response = await httpClient.GetAsync(indexUrl, ct);
response.EnsureSuccessStatusCode();
var compressedContent = await response.Content.ReadAsByteArrayAsync(ct);
// Decompress gzip
using var compressedStream = new MemoryStream(compressedContent);
using var gzipStream = new GZipStream(compressedStream, CompressionMode.Decompress);
using var reader = new StreamReader(gzipStream);
var content = await reader.ReadToEndAsync(ct);
// Parse Packages index
var parser = new PackagesIndexParser();
var packages = parser.Parse(content, distribution, component, architecture);
Logger.LogDebug(
"Found {Count} ddeb packages in {Dist}/{Component}/{Arch}",
packages.Count, distribution, component, architecture);
// Filter to dbgsym packages and limit
var dbgsymPackages = packages
.Where(p => p.PackageName.EndsWith("-dbgsym") || p.PackageName.EndsWith("-dbg"))
.Take(_options.MaxPackagesPerSync)
.ToList();
var fetchedCount = 0;
foreach (var pkg in dbgsymPackages)
{
ct.ThrowIfCancellationRequested();
// Check if we already have this package version
var existing = await _documentRepository.FindByUriAsync(SourceId, pkg.PoolUrl, ct);
if (existing is not null)
continue;
try
{
var document = await FetchPackageAsync(httpClient, pkg, ct);
if (document is not null)
{
await _documentRepository.UpsertAsync(document, ct);
state = state.AddPendingParse(document.Digest);
fetchedCount++;
_diagnostics.RecordFetchSuccess();
}
}
catch (Exception ex)
{
Logger.LogWarning(
ex,
"Failed to fetch ddeb package {Package}",
pkg.PackageName);
_diagnostics.RecordFetchError();
}
}
await _stateRepository.UpdateAsync(state, ct);
return fetchedCount;
}
private async Task<SymbolRawDocument?> FetchPackageAsync(
HttpClient httpClient,
DdebPackageInfo package,
CancellationToken ct)
{
LogFetch(package.PoolUrl, package.PackageName);
var response = await httpClient.GetAsync(package.PoolUrl, ct);
response.EnsureSuccessStatusCode();
var content = await response.Content.ReadAsByteArrayAsync(ct);
var digest = ComputeDocumentDigest(content);
// Verify SHA256 if provided
if (!string.IsNullOrEmpty(package.Sha256))
{
var expectedDigest = $"sha256:{package.Sha256.ToLowerInvariant()}";
if (!digest.Equals(expectedDigest, StringComparison.OrdinalIgnoreCase))
{
Logger.LogWarning(
"SHA256 mismatch for package {Package}: expected {Expected}, got {Actual}",
package.PackageName, expectedDigest, digest);
return null;
}
}
return new SymbolRawDocument
{
Digest = digest,
SourceId = SourceId,
DocumentUri = $"{_options.MirrorUrl}{package.PoolUrl}",
FetchedAt = UtcNow,
RecordedAt = UtcNow,
ContentType = "application/vnd.debian.binary-package",
ContentSize = content.Length,
ETag = response.Headers.ETag?.Tag,
Status = DocumentStatus.PendingParse,
PayloadId = null, // Will be set by blob storage
Metadata = ImmutableDictionary<string, string>.Empty
.Add("package_name", package.PackageName)
.Add("package_version", package.Version)
.Add("distribution", package.Distribution)
.Add("component", package.Component)
.Add("architecture", package.Architecture)
};
}
private SymbolObservation BuildObservation(
SymbolRawDocument document,
ExtractedBinary binary)
{
var packageName = document.Metadata.GetValueOrDefault("package_name", "unknown");
var packageVersion = document.Metadata.GetValueOrDefault("package_version", "unknown");
var distribution = document.Metadata.GetValueOrDefault("distribution", "unknown");
var architecture = document.Metadata.GetValueOrDefault("architecture", "amd64");
// Determine revision number
var existingObservations = _observationRepository
.FindByDebugIdAsync(binary.BuildId, CancellationToken.None)
.GetAwaiter()
.GetResult();
var revision = existingObservations.Length + 1;
var observation = new SymbolObservation
{
ObservationId = GenerateObservationId(binary.BuildId, revision),
SourceId = SourceId,
DebugId = binary.BuildId,
BinaryName = binary.BinaryName,
BinaryPath = binary.BinaryPath,
Architecture = architecture,
Distro = "ubuntu",
DistroVersion = distribution,
PackageName = packageName.Replace("-dbgsym", "").Replace("-dbg", ""),
PackageVersion = packageVersion,
Symbols = binary.Symbols.ToImmutableArray(),
SymbolCount = binary.Symbols.Count,
BuildMetadata = binary.BuildMetadata,
Provenance = new ObservationProvenance
{
SourceId = SourceId,
DocumentUri = document.DocumentUri,
FetchedAt = document.FetchedAt,
RecordedAt = UtcNow,
DocumentHash = document.Digest,
SignatureState = SignatureState.None,
ConnectorVersion = "1.0.0"
},
ContentHash = "",
CreatedAt = UtcNow
};
var contentHash = ComputeContentHash(observation);
return observation with { ContentHash = contentHash };
}
}

View File

@@ -0,0 +1,41 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Ddeb.Configuration;
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb;
/// <summary>
/// Plugin for the Ubuntu ddeb symbol source connector.
/// </summary>
public sealed class DdebConnectorPlugin : ISymbolSourceConnectorPlugin
{
/// <inheritdoc/>
public string Name => DdebConnector.SourceName;
/// <inheritdoc/>
public bool IsAvailable(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
var options = services.GetService<Microsoft.Extensions.Options.IOptions<DdebOptions>>();
if (options?.Value is null)
return false;
try
{
options.Value.Validate();
return true;
}
catch
{
return false;
}
}
/// <inheritdoc/>
public ISymbolSourceConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return ActivatorUtilities.CreateInstance<DdebConnector>(services);
}
}

View File

@@ -0,0 +1,78 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Ddeb.Configuration;
using StellaOps.BinaryIndex.GroundTruth.Ddeb.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb;
/// <summary>
/// Extension methods for adding ddeb connector to DI.
/// </summary>
public static class DdebServiceCollectionExtensions
{
/// <summary>
/// Add the Ubuntu ddeb symbol source connector.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configure">Configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddDdebConnector(
this IServiceCollection services,
Action<DdebOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
// Register options with validation
services.AddOptions<DdebOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
// Register HTTP client
services.AddHttpClient(DdebOptions.HttpClientName, (sp, client) =>
{
var options = sp.GetRequiredService<IOptions<DdebOptions>>().Value;
client.BaseAddress = options.MirrorUrl;
client.Timeout = TimeSpan.FromSeconds(options.TimeoutSeconds);
client.DefaultRequestHeaders.Add("User-Agent", options.UserAgent);
});
// Register services
services.AddSingleton<DdebDiagnostics>();
services.AddSingleton<IDebPackageExtractor, DebPackageExtractor>();
services.AddTransient<DdebConnector>();
services.AddSingleton<ISymbolSourceConnectorPlugin, DdebConnectorPlugin>();
return services;
}
/// <summary>
/// Add the Ubuntu ddeb symbol source connector with default configuration.
/// </summary>
/// <param name="services">Service collection.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddDdebConnector(this IServiceCollection services)
{
return services.AddDdebConnector(_ => { });
}
/// <summary>
/// Add the ddeb connector with specific distributions.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="distributions">Ubuntu distributions to fetch from (e.g., "focal", "jammy").</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddDdebConnector(
this IServiceCollection services,
params string[] distributions)
{
return services.AddDdebConnector(opts =>
{
if (distributions.Length > 0)
{
opts.Distributions = [.. distributions];
}
});
}
}

View File

@@ -0,0 +1,90 @@
using System.Diagnostics.Metrics;
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb.Internal;
/// <summary>
/// Diagnostics and metrics for the ddeb connector.
/// </summary>
public sealed class DdebDiagnostics
{
private readonly Counter<long> _fetchSuccessCounter;
private readonly Counter<long> _fetchErrorCounter;
private readonly Counter<long> _parseSuccessCounter;
private readonly Counter<long> _parseErrorCounter;
private readonly Counter<long> _mapSuccessCounter;
private readonly Counter<long> _mapErrorCounter;
private readonly Counter<long> _mapAocViolationCounter;
private readonly Histogram<long> _symbolCountHistogram;
private readonly Histogram<long> _packageSizeHistogram;
public DdebDiagnostics(IMeterFactory meterFactory)
{
var meter = meterFactory.Create("StellaOps.BinaryIndex.GroundTruth.Ddeb");
_fetchSuccessCounter = meter.CreateCounter<long>(
"groundtruth.ddeb.fetch.success",
unit: "{packages}",
description: "Number of successful ddeb package fetches");
_fetchErrorCounter = meter.CreateCounter<long>(
"groundtruth.ddeb.fetch.error",
unit: "{packages}",
description: "Number of failed ddeb package fetches");
_parseSuccessCounter = meter.CreateCounter<long>(
"groundtruth.ddeb.parse.success",
unit: "{packages}",
description: "Number of successful ddeb package parses");
_parseErrorCounter = meter.CreateCounter<long>(
"groundtruth.ddeb.parse.error",
unit: "{packages}",
description: "Number of failed ddeb package parses");
_mapSuccessCounter = meter.CreateCounter<long>(
"groundtruth.ddeb.map.success",
unit: "{observations}",
description: "Number of successful observation mappings");
_mapErrorCounter = meter.CreateCounter<long>(
"groundtruth.ddeb.map.error",
unit: "{observations}",
description: "Number of failed observation mappings");
_mapAocViolationCounter = meter.CreateCounter<long>(
"groundtruth.ddeb.map.aoc_violation",
unit: "{observations}",
description: "Number of AOC violations during mapping");
_symbolCountHistogram = meter.CreateHistogram<long>(
"groundtruth.ddeb.symbols_per_binary",
unit: "{symbols}",
description: "Distribution of symbol counts per binary");
_packageSizeHistogram = meter.CreateHistogram<long>(
"groundtruth.ddeb.package_size",
unit: "By",
description: "Distribution of ddeb package sizes");
}
public void RecordFetchSuccess() => _fetchSuccessCounter.Add(1);
public void RecordFetchError() => _fetchErrorCounter.Add(1);
public void RecordParseSuccess(int symbolCount)
{
_parseSuccessCounter.Add(1);
_symbolCountHistogram.Record(symbolCount);
}
public void RecordParseError() => _parseErrorCounter.Add(1);
public void RecordMapSuccess(int symbolCount)
{
_mapSuccessCounter.Add(1);
}
public void RecordMapError() => _mapErrorCounter.Add(1);
public void RecordMapAocViolation() => _mapAocViolationCounter.Add(1);
public void RecordPackageSize(long sizeBytes) => _packageSizeHistogram.Record(sizeBytes);
}

View File

@@ -0,0 +1,245 @@
using System.Buffers;
using System.Text;
using Microsoft.Extensions.Logging;
using SharpCompress.Archives;
using SharpCompress.Archives.Tar;
using SharpCompress.Readers;
using ZstdSharp;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb.Internal;
/// <summary>
/// Implementation of .ddeb package extractor.
/// Handles ar archive format with data.tar.zst (or .xz/.gz) extraction.
///
/// NOTE: LibObjectFile 1.0.0 has significant API changes from 0.x.
/// ELF/DWARF parsing is stubbed pending API migration.
/// </summary>
public sealed class DebPackageExtractor : IDebPackageExtractor
{
private readonly ILogger<DebPackageExtractor> _logger;
// ar archive magic bytes
private static readonly byte[] ArMagic = "!<arch>\n"u8.ToArray();
public DebPackageExtractor(ILogger<DebPackageExtractor> logger)
{
_logger = logger;
}
/// <inheritdoc/>
public Task<DebPackageExtractionResult> ExtractAsync(Guid payloadId, CancellationToken ct = default)
{
throw new NotImplementedException(
"Extracting from payload ID requires blob storage integration. Use stream overload instead.");
}
/// <inheritdoc/>
public async Task<DebPackageExtractionResult> ExtractAsync(Stream stream, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(stream);
var binaries = new List<ExtractedBinary>();
try
{
// Parse ar archive to find data.tar.* member
var dataStream = await ExtractDataTarFromArAsync(stream, ct);
if (dataStream == null)
{
_logger.LogWarning("No data.tar found in .ddeb package");
return new DebPackageExtractionResult
{
Binaries = binaries
};
}
await using (dataStream)
{
// Extract ELF binaries from data.tar
await ExtractElfBinariesFromTarAsync(dataStream, binaries, ct);
}
_logger.LogInformation("Extracted {Count} binaries from .ddeb package", binaries.Count);
return new DebPackageExtractionResult
{
Binaries = binaries
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to extract .ddeb package");
return new DebPackageExtractionResult
{
Binaries = binaries
};
}
}
private async Task<Stream?> ExtractDataTarFromArAsync(Stream arStream, CancellationToken ct)
{
// Read and verify ar magic
var magic = new byte[ArMagic.Length];
var bytesRead = await arStream.ReadAsync(magic, ct);
if (bytesRead < ArMagic.Length || !magic.SequenceEqual(ArMagic))
{
_logger.LogWarning("Invalid ar archive magic");
return null;
}
// Parse ar members to find data.tar.*
while (arStream.Position < arStream.Length)
{
var header = await ReadArHeaderAsync(arStream, ct);
if (header == null)
break;
if (header.Name.StartsWith("data.tar"))
{
_logger.LogDebug("Found data.tar member: {Name}, size: {Size}", header.Name, header.Size);
// Read member content
var content = new byte[header.Size];
await arStream.ReadExactlyAsync(content, ct);
// Decompress based on extension
var decompressed = await DecompressAsync(content, header.Name, ct);
return new MemoryStream(decompressed);
}
// Skip member content (with padding)
var skipSize = header.Size + (header.Size % 2); // ar uses 2-byte alignment
arStream.Seek(skipSize, SeekOrigin.Current);
}
return null;
}
private async Task<ArMemberHeader?> ReadArHeaderAsync(Stream stream, CancellationToken ct)
{
var headerBytes = new byte[60];
var bytesRead = await stream.ReadAsync(headerBytes, ct);
if (bytesRead < 60)
return null;
// Parse header fields
var name = Encoding.ASCII.GetString(headerBytes, 0, 16).Trim();
var sizeStr = Encoding.ASCII.GetString(headerBytes, 48, 10).Trim();
if (!long.TryParse(sizeStr, out var size))
return null;
// Handle extended filenames (BSD style)
if (name.StartsWith("#1/"))
{
if (int.TryParse(name[3..], out var extLen))
{
var extNameBytes = new byte[extLen];
await stream.ReadExactlyAsync(extNameBytes, ct);
name = Encoding.UTF8.GetString(extNameBytes).TrimEnd('\0');
size -= extLen;
}
}
return new ArMemberHeader { Name = name, Size = size };
}
private async Task<byte[]> DecompressAsync(byte[] compressed, string filename, CancellationToken ct)
{
if (filename.EndsWith(".zst"))
{
using var decompressor = new Decompressor();
var decompressed = decompressor.Unwrap(compressed);
return decompressed.ToArray();
}
else if (filename.EndsWith(".xz"))
{
// Use SharpCompress for xz
using var input = new MemoryStream(compressed);
using var reader = ReaderFactory.Open(input);
if (reader.MoveToNextEntry())
{
using var output = new MemoryStream();
await using var entryStream = reader.OpenEntryStream();
await entryStream.CopyToAsync(output, ct);
return output.ToArray();
}
}
else if (filename.EndsWith(".gz"))
{
using var input = new MemoryStream(compressed);
using var gz = new System.IO.Compression.GZipStream(input, System.IO.Compression.CompressionMode.Decompress);
using var output = new MemoryStream();
await gz.CopyToAsync(output, ct);
return output.ToArray();
}
// Uncompressed
return compressed;
}
private async Task ExtractElfBinariesFromTarAsync(Stream tarStream, List<ExtractedBinary> binaries, CancellationToken ct)
{
using var archive = TarArchive.Open(tarStream);
foreach (var entry in archive.Entries)
{
if (entry.IsDirectory)
continue;
var path = entry.Key ?? string.Empty;
// Look for files under /usr/lib/debug/.build-id/
if (!path.Contains("/usr/lib/debug/.build-id/"))
continue;
// Skip .debug files themselves, we want the actual binaries
if (path.EndsWith(".debug"))
{
_logger.LogDebug("Found debug file: {Path}", path);
using var entryStream = entry.OpenEntryStream();
using var ms = new MemoryStream();
await entryStream.CopyToAsync(ms, ct);
// Extract build-id from path
var buildId = ExtractBuildIdFromPath(path) ?? string.Empty;
var binaryName = System.IO.Path.GetFileName(path);
binaries.Add(new ExtractedBinary
{
BinaryName = binaryName,
BinaryPath = path,
BuildId = buildId,
Symbols = Array.Empty<ObservedSymbol>(),
BuildMetadata = null // LibObjectFile 1.0.0 migration pending
});
}
}
}
private static string? ExtractBuildIdFromPath(string path)
{
// Path format: /usr/lib/debug/.build-id/XX/YYYYYYYY.debug
var parts = path.Split('/');
for (int i = 0; i < parts.Length - 1; i++)
{
if (parts[i] == ".build-id" && i + 2 < parts.Length)
{
var prefix = parts[i + 1];
var suffix = parts[i + 2].Replace(".debug", "");
return prefix + suffix;
}
}
return null;
}
private sealed record ArMemberHeader
{
public required string Name { get; init; }
public required long Size { get; init; }
}
}

View File

@@ -0,0 +1,103 @@
using System.Collections.Immutable;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb.Internal;
/// <summary>
/// Interface for extracting debug symbols from .ddeb packages.
/// </summary>
public interface IDebPackageExtractor
{
/// <summary>
/// Extract debug symbols from a stored .ddeb package.
/// </summary>
/// <param name="payloadId">Blob storage ID for the .ddeb package.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Extraction result with binaries and symbols.</returns>
Task<DebPackageExtractionResult> ExtractAsync(Guid payloadId, CancellationToken ct = default);
/// <summary>
/// Extract debug symbols from a .ddeb package stream.
/// </summary>
/// <param name="stream">.ddeb package stream.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Extraction result with binaries and symbols.</returns>
Task<DebPackageExtractionResult> ExtractAsync(Stream stream, CancellationToken ct = default);
}
/// <summary>
/// Result of extracting a .ddeb package.
/// </summary>
public sealed record DebPackageExtractionResult
{
/// <summary>
/// Extracted debug binaries.
/// </summary>
public required IReadOnlyList<ExtractedBinary> Binaries { get; init; }
/// <summary>
/// Total symbol count across all binaries.
/// </summary>
public int SymbolCount => Binaries.Sum(b => b.Symbols.Count);
}
/// <summary>
/// A debug binary extracted from a .ddeb package.
/// </summary>
public sealed record ExtractedBinary
{
/// <summary>
/// Binary name.
/// </summary>
public required string BinaryName { get; init; }
/// <summary>
/// Path within the package.
/// </summary>
public required string BinaryPath { get; init; }
/// <summary>
/// Build ID (from .note.gnu.build-id).
/// </summary>
public required string BuildId { get; init; }
/// <summary>
/// Extracted symbols.
/// </summary>
public required IReadOnlyList<ObservedSymbol> Symbols { get; init; }
/// <summary>
/// Build metadata from DWARF.
/// </summary>
public ObservedBuildMetadata? BuildMetadata { get; init; }
}
/// <summary>
/// Stub implementation of .ddeb package extractor for initial development.
/// Production implementation would use ar + tar.zst extraction and DWARF parsing.
/// </summary>
public sealed class StubDebPackageExtractor : IDebPackageExtractor
{
/// <inheritdoc/>
public Task<DebPackageExtractionResult> ExtractAsync(Guid payloadId, CancellationToken ct = default)
{
// Stub: Return empty result
// Production: Load from blob storage and extract
return Task.FromResult(new DebPackageExtractionResult
{
Binaries = []
});
}
/// <inheritdoc/>
public Task<DebPackageExtractionResult> ExtractAsync(Stream stream, CancellationToken ct = default)
{
// Stub: Return empty result
// Production: Extract .ddeb (ar archive) containing data.tar.zst
// Then extract debug binaries from /usr/lib/debug/.build-id/
return Task.FromResult(new DebPackageExtractionResult
{
Binaries = []
});
}
}

View File

@@ -0,0 +1,161 @@
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.GroundTruth.Ddeb.Internal;
/// <summary>
/// Parser for Debian Packages index files.
/// </summary>
public sealed partial class PackagesIndexParser
{
/// <summary>
/// Parse a Packages index file content.
/// </summary>
/// <param name="content">Raw Packages file content.</param>
/// <param name="distribution">Distribution name (e.g., "jammy").</param>
/// <param name="component">Component name (e.g., "main").</param>
/// <param name="architecture">Architecture (e.g., "amd64").</param>
/// <returns>List of parsed package information.</returns>
public IReadOnlyList<DdebPackageInfo> Parse(
string content,
string distribution,
string component,
string architecture)
{
var packages = new List<DdebPackageInfo>();
// Split by empty lines to get package stanzas
var stanzas = content.Split(["\n\n", "\r\n\r\n"], StringSplitOptions.RemoveEmptyEntries);
foreach (var stanza in stanzas)
{
var package = ParseStanza(stanza, distribution, component, architecture);
if (package is not null)
{
packages.Add(package);
}
}
return packages;
}
private static DdebPackageInfo? ParseStanza(
string stanza,
string distribution,
string component,
string architecture)
{
var fields = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
string? currentKey = null;
var currentValue = new List<string>();
foreach (var line in stanza.Split('\n'))
{
if (string.IsNullOrWhiteSpace(line))
continue;
// Continuation line (starts with space or tab)
if (line.StartsWith(' ') || line.StartsWith('\t'))
{
if (currentKey is not null)
{
currentValue.Add(line.TrimStart());
}
continue;
}
// Save previous field
if (currentKey is not null)
{
fields[currentKey] = string.Join("\n", currentValue);
}
// Parse new field
var colonIndex = line.IndexOf(':');
if (colonIndex > 0)
{
currentKey = line[..colonIndex].Trim();
currentValue = [line[(colonIndex + 1)..].Trim()];
}
}
// Save last field
if (currentKey is not null)
{
fields[currentKey] = string.Join("\n", currentValue);
}
// Validate required fields
if (!fields.TryGetValue("Package", out var packageName) ||
!fields.TryGetValue("Version", out var version) ||
!fields.TryGetValue("Filename", out var filename))
{
return null;
}
return new DdebPackageInfo
{
PackageName = packageName,
Version = version,
PoolUrl = "/" + filename.TrimStart('/'),
Distribution = distribution,
Component = component,
Architecture = fields.GetValueOrDefault("Architecture", architecture),
Size = fields.TryGetValue("Size", out var size) && long.TryParse(size, out var sizeValue)
? sizeValue
: 0,
Sha256 = fields.GetValueOrDefault("SHA256"),
Description = fields.GetValueOrDefault("Description")
};
}
}
/// <summary>
/// Information about a ddeb package from the Packages index.
/// </summary>
public sealed record DdebPackageInfo
{
/// <summary>
/// Package name.
/// </summary>
public required string PackageName { get; init; }
/// <summary>
/// Package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// URL path to the package in the pool.
/// </summary>
public required string PoolUrl { get; init; }
/// <summary>
/// Distribution (e.g., "jammy").
/// </summary>
public required string Distribution { get; init; }
/// <summary>
/// Component (e.g., "main").
/// </summary>
public required string Component { get; init; }
/// <summary>
/// Architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Package size in bytes.
/// </summary>
public long Size { get; init; }
/// <summary>
/// SHA256 hash of the package.
/// </summary>
public string? Sha256 { get; init; }
/// <summary>
/// Package description.
/// </summary>
public string? Description { get; init; }
}

View File

@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<NoWarn>$(NoWarn);NU1603</NoWarn>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<Description>Ubuntu ddeb debug symbol package connector for ground-truth corpus</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="ZstdSharp.Port" />
<PackageReference Include="SharpCompress" />
<PackageReference Include="LibObjectFile" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.GroundTruth.Abstractions\StellaOps.BinaryIndex.GroundTruth.Abstractions.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,47 @@
# GroundTruth.Debuginfod - Agent Instructions
## Module Overview
This library implements the debuginfod symbol source connector for fetching debug symbols from Fedora/RHEL debuginfod services.
## Key Components
- **DebuginfodConnector** - Main connector implementing three-phase pipeline
- **DebuginfodConnectorPlugin** - Plugin registration for DI discovery
- **DebuginfodOptions** - Configuration options
- **DebuginfodDiagnostics** - Metrics and telemetry
- **IDwarfParser** - Interface for DWARF symbol parsing
## Configuration
Environment variables:
- `DEBUGINFOD_URLS` - Space/comma-separated list of debuginfod server URLs
- `DEBUGINFOD_CACHE` - Local cache directory
- `DEBUGINFOD_TIMEOUT` - Request timeout in seconds
## Three-Phase Pipeline
1. **Fetch**: Download debuginfo by build-id from debuginfod server
2. **Parse**: Extract DWARF symbols using IDwarfParser
3. **Map**: Build canonical SymbolObservation with AOC compliance
## Debuginfod Protocol
API endpoints:
- `GET /buildid/{buildid}/debuginfo` - Fetch debug info
- `GET /buildid/{buildid}/executable` - Fetch executable
- `GET /buildid/{buildid}/source/{path}` - Fetch source file
- `GET /metrics` - Prometheus metrics (for health checks)
## Testing
- Unit tests for connector logic
- Integration tests require access to debuginfod server (skippable)
- Deterministic fixtures for offline testing
## Future Work
- Implement real IDwarfParser using Gimli or libdw
- IMA signature verification
- Source file fetching
- Multi-server fallback

View File

@@ -0,0 +1,99 @@
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod.Configuration;
/// <summary>
/// Configuration options for the debuginfod connector.
/// </summary>
public sealed class DebuginfodOptions
{
/// <summary>
/// Section name for configuration binding.
/// </summary>
public const string SectionName = "GroundTruth:Debuginfod";
/// <summary>
/// HTTP client name for DI.
/// </summary>
public const string HttpClientName = "debuginfod";
/// <summary>
/// Base URL for the debuginfod service.
/// Defaults to Fedora's public debuginfod service.
/// </summary>
public Uri BaseUrl { get; set; } = new("https://debuginfod.fedoraproject.org");
/// <summary>
/// Additional debuginfod URLs to query (for fallback or multiple sources).
/// </summary>
public List<Uri> AdditionalUrls { get; set; } = [];
/// <summary>
/// Request timeout in seconds.
/// </summary>
public int TimeoutSeconds { get; set; } = 30;
/// <summary>
/// Maximum concurrent requests.
/// </summary>
public int MaxConcurrentRequests { get; set; } = 4;
/// <summary>
/// Retry count for failed requests.
/// </summary>
public int RetryCount { get; set; } = 3;
/// <summary>
/// Initial retry delay in milliseconds.
/// </summary>
public int RetryDelayMs { get; set; } = 1000;
/// <summary>
/// Whether to verify IMA signatures when available.
/// </summary>
public bool VerifyImaSignatures { get; set; } = true;
/// <summary>
/// Local cache directory for downloaded debuginfo.
/// </summary>
public string? CacheDirectory { get; set; }
/// <summary>
/// Maximum cache size in megabytes.
/// </summary>
public int MaxCacheSizeMb { get; set; } = 1024;
/// <summary>
/// Cache expiration in hours.
/// </summary>
public int CacheExpirationHours { get; set; } = 168; // 1 week
/// <summary>
/// User agent string.
/// </summary>
public string UserAgent { get; set; } = "StellaOps.GroundTruth.Debuginfod/1.0";
/// <summary>
/// Whether to include source files in fetch.
/// </summary>
public bool IncludeSourceFiles { get; set; } = false;
/// <summary>
/// Validate options.
/// </summary>
public void Validate()
{
if (BaseUrl is null)
throw new InvalidOperationException("Debuginfod base URL must be configured.");
if (!BaseUrl.IsAbsoluteUri)
throw new InvalidOperationException("Debuginfod base URL must be an absolute URI.");
if (TimeoutSeconds <= 0)
throw new InvalidOperationException("Timeout must be positive.");
if (MaxConcurrentRequests <= 0)
throw new InvalidOperationException("Max concurrent requests must be positive.");
if (RetryCount < 0)
throw new InvalidOperationException("Retry count cannot be negative.");
}
}

View File

@@ -0,0 +1,449 @@
using System.Collections.Immutable;
using System.Net;
using System.Runtime.CompilerServices;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Configuration;
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod;
/// <summary>
/// Debuginfod symbol source connector for Fedora/RHEL debuginfod services.
/// Implements the three-phase pipeline: Fetch → Parse → Map.
/// </summary>
public sealed class DebuginfodConnector : SymbolSourceConnectorBase, ISymbolSourceCapability
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly ISymbolRawDocumentRepository _documentRepository;
private readonly ISymbolObservationRepository _observationRepository;
private readonly ISymbolSourceStateRepository _stateRepository;
private readonly ISymbolObservationWriteGuard _writeGuard;
private readonly DebuginfodOptions _options;
private readonly DebuginfodDiagnostics _diagnostics;
/// <summary>
/// Source ID for this connector.
/// </summary>
public const string SourceName = "debuginfod-fedora";
public DebuginfodConnector(
IHttpClientFactory httpClientFactory,
ISymbolRawDocumentRepository documentRepository,
ISymbolObservationRepository observationRepository,
ISymbolSourceStateRepository stateRepository,
ISymbolObservationWriteGuard writeGuard,
IOptions<DebuginfodOptions> options,
DebuginfodDiagnostics diagnostics,
ILogger<DebuginfodConnector> logger,
TimeProvider? timeProvider = null)
: base(logger, timeProvider)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_documentRepository = documentRepository ?? throw new ArgumentNullException(nameof(documentRepository));
_observationRepository = observationRepository ?? throw new ArgumentNullException(nameof(observationRepository));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_writeGuard = writeGuard ?? throw new ArgumentNullException(nameof(writeGuard));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics));
}
/// <inheritdoc/>
public override string SourceId => SourceName;
/// <inheritdoc/>
public override string DisplayName => "Fedora debuginfod";
/// <inheritdoc/>
public override IReadOnlyList<string> SupportedDistros =>
["fedora", "rhel", "centos", "rocky", "alma"];
/// <inheritdoc/>
public override async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
// Check backoff
if (state.BackoffUntil.HasValue && state.BackoffUntil.Value > UtcNow)
{
Logger.LogInformation(
"Debuginfod fetch skipped due to backoff until {BackoffUntil}",
state.BackoffUntil.Value);
return;
}
// Get pending debug IDs from cursor (or use configured list)
var debugIds = GetPendingDebugIds(state);
if (debugIds.Length == 0)
{
Logger.LogDebug("No pending debug IDs to fetch from debuginfod");
return;
}
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
var fetchedCount = 0;
var errorCount = 0;
foreach (var debugId in debugIds)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var document = await FetchDebugInfoAsync(httpClient, debugId, cancellationToken);
if (document is not null)
{
await _documentRepository.UpsertAsync(document, cancellationToken);
state = state.AddPendingParse(document.Digest);
fetchedCount++;
_diagnostics.RecordFetchSuccess();
}
}
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
{
Logger.LogDebug("Debug ID {DebugId} not found in debuginfod", debugId);
_diagnostics.RecordFetchNotFound();
}
catch (Exception ex)
{
LogError(ex, "Fetch", $"Failed to fetch debug ID {debugId}");
errorCount++;
_diagnostics.RecordFetchError();
if (errorCount > 5)
{
await _stateRepository.MarkFailedAsync(
SourceId,
$"Too many fetch errors: {ex.Message}",
TimeSpan.FromMinutes(15),
cancellationToken);
break;
}
}
}
state = state with { LastSuccessAt = UtcNow };
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation(
"Debuginfod fetch completed: {FetchedCount} fetched, {ErrorCount} errors",
fetchedCount, errorCount);
}
/// <inheritdoc/>
public override async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
if (state.PendingParse.Length == 0)
{
Logger.LogDebug("No documents pending parse for debuginfod");
return;
}
var dwParser = services.GetRequiredService<IDwarfParser>();
var parsedCount = 0;
foreach (var digest in state.PendingParse)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
if (document is null)
{
Logger.LogWarning("Document {Digest} not found for parse", digest);
state = state.RemovePendingParse(digest);
continue;
}
try
{
// Parse DWARF symbols
var symbols = await dwParser.ParseSymbolsAsync(
document.PayloadId!.Value,
cancellationToken);
LogParse(digest, symbols.Count);
// Update document status and move to map phase
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.PendingMap, cancellationToken);
state = state.MoveToPendingMap(digest);
parsedCount++;
_diagnostics.RecordParseSuccess(symbols.Count);
}
catch (Exception ex)
{
LogError(ex, "Parse", $"Failed to parse document {digest}");
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
state = state.RemovePendingParse(digest);
_diagnostics.RecordParseError();
}
}
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation("Debuginfod parse completed: {ParsedCount} documents parsed", parsedCount);
}
/// <inheritdoc/>
public override async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
var state = await _stateRepository.GetOrCreateAsync(SourceId, cancellationToken);
if (state.PendingMap.Length == 0)
{
Logger.LogDebug("No documents pending map for debuginfod");
return;
}
var dwParser = services.GetRequiredService<IDwarfParser>();
var mappedCount = 0;
foreach (var digest in state.PendingMap)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentRepository.FindByDigestAsync(digest, cancellationToken);
if (document is null)
{
Logger.LogWarning("Document {Digest} not found for map", digest);
state = state.MarkMapped(digest);
continue;
}
try
{
// Parse symbols from stored payload
var symbols = await dwParser.ParseSymbolsAsync(
document.PayloadId!.Value,
cancellationToken);
// Build observation
var observation = BuildObservation(document, symbols);
// Validate against AOC
_writeGuard.EnsureValid(observation);
// Check for existing observation with same content
var existingId = await _observationRepository.FindByContentHashAsync(
SourceId,
observation.DebugId,
observation.ContentHash,
cancellationToken);
if (existingId is not null)
{
Logger.LogDebug(
"Observation already exists with hash {Hash}, skipping",
observation.ContentHash);
}
else
{
// Insert new observation
await _observationRepository.InsertAsync(observation, cancellationToken);
LogMap(observation.ObservationId);
_diagnostics.RecordMapSuccess(symbols.Count);
}
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Mapped, cancellationToken);
state = state.MarkMapped(digest);
mappedCount++;
}
catch (GroundTruthAocGuardException ex)
{
Logger.LogError(
"AOC violation mapping document {Digest}: {Violations}",
digest,
string.Join(", ", ex.Violations.Select(v => v.Code)));
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Quarantined, cancellationToken);
state = state.MarkMapped(digest);
_diagnostics.RecordMapAocViolation();
}
catch (Exception ex)
{
LogError(ex, "Map", $"Failed to map document {digest}");
await _documentRepository.UpdateStatusAsync(digest, DocumentStatus.Failed, cancellationToken);
state = state.MarkMapped(digest);
_diagnostics.RecordMapError();
}
}
await _stateRepository.UpdateAsync(state, cancellationToken);
Logger.LogInformation("Debuginfod map completed: {MappedCount} documents mapped", mappedCount);
}
/// <inheritdoc/>
public async Task<SymbolSourceConnectivityResult> TestConnectivityAsync(CancellationToken ct = default)
{
var startTime = UtcNow;
try
{
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
var response = await httpClient.GetAsync("/metrics", ct);
response.EnsureSuccessStatusCode();
var latency = UtcNow - startTime;
return new SymbolSourceConnectivityResult(
IsConnected: true,
Latency: latency,
ErrorMessage: null,
TestedAt: UtcNow);
}
catch (Exception ex)
{
var latency = UtcNow - startTime;
return new SymbolSourceConnectivityResult(
IsConnected: false,
Latency: latency,
ErrorMessage: ex.Message,
TestedAt: UtcNow);
}
}
/// <inheritdoc/>
public async Task<SymbolSourceMetadata> GetMetadataAsync(CancellationToken ct = default)
{
var stats = await _observationRepository.GetStatsAsync(ct);
return new SymbolSourceMetadata(
SourceId: SourceId,
DisplayName: DisplayName,
BaseUrl: _options.BaseUrl.ToString(),
LastSyncAt: stats.NewestObservation,
ObservationCount: (int)stats.TotalObservations,
DebugIdCount: (int)stats.UniqueDebugIds,
AdditionalInfo: new Dictionary<string, string>
{
["total_symbols"] = stats.TotalSymbols.ToString()
});
}
/// <inheritdoc/>
public async Task<SymbolData?> FetchByDebugIdAsync(string debugId, CancellationToken ct = default)
{
var httpClient = _httpClientFactory.CreateClient(DebuginfodOptions.HttpClientName);
var document = await FetchDebugInfoAsync(httpClient, debugId, ct);
if (document is null)
return null;
// For direct fetch, we need to parse symbols inline
// This is a simplified version - full implementation would use stored payload
return new SymbolData(
DebugId: debugId,
BinaryName: document.Metadata.GetValueOrDefault("binary_name", "unknown"),
Architecture: document.Metadata.GetValueOrDefault("architecture", "unknown"),
Symbols: [],
BuildInfo: null,
Provenance: new SymbolDataProvenance(
SourceId: SourceId,
DocumentUri: document.DocumentUri,
FetchedAt: document.FetchedAt,
ContentHash: document.Digest,
SignatureState: SignatureState.None,
SignatureDetails: null));
}
private ImmutableArray<string> GetPendingDebugIds(SymbolSourceState state)
{
// In production, this would come from a work queue or scheduled list
// For now, return empty - the connector is query-driven via FetchByDebugIdAsync
if (state.Cursor.TryGetValue("pending_debug_ids", out var pending) &&
!string.IsNullOrWhiteSpace(pending))
{
return pending.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(s => s.Trim())
.ToImmutableArray();
}
return ImmutableArray<string>.Empty;
}
private async Task<SymbolRawDocument?> FetchDebugInfoAsync(
HttpClient httpClient,
string debugId,
CancellationToken ct)
{
// Debuginfod URL pattern: /buildid/{buildid}/debuginfo
var requestUri = $"/buildid/{debugId}/debuginfo";
LogFetch(requestUri, debugId);
var response = await httpClient.GetAsync(requestUri, ct);
response.EnsureSuccessStatusCode();
var content = await response.Content.ReadAsByteArrayAsync(ct);
var digest = ComputeDocumentDigest(content);
// Check if we already have this document
var existing = await _documentRepository.FindByDigestAsync(digest, ct);
if (existing is not null)
{
Logger.LogDebug("Document {Digest} already exists, skipping", digest);
return null;
}
var contentType = response.Content.Headers.ContentType?.MediaType ?? "application/x-elf";
var etag = response.Headers.ETag?.Tag;
return new SymbolRawDocument
{
Digest = digest,
SourceId = SourceId,
DocumentUri = $"{_options.BaseUrl}{requestUri}",
FetchedAt = UtcNow,
RecordedAt = UtcNow,
ContentType = contentType,
ContentSize = content.Length,
ETag = etag,
Status = DocumentStatus.PendingParse,
PayloadId = null, // Will be set by blob storage
Metadata = ImmutableDictionary<string, string>.Empty
.Add("debug_id", debugId)
.Add("binary_name", "unknown") // Would extract from ELF headers
};
}
private SymbolObservation BuildObservation(
SymbolRawDocument document,
IReadOnlyList<ObservedSymbol> symbols)
{
var debugId = document.Metadata.GetValueOrDefault("debug_id", "unknown");
var binaryName = document.Metadata.GetValueOrDefault("binary_name", "unknown");
var architecture = document.Metadata.GetValueOrDefault("architecture", "x86_64");
// Determine revision number
var existingObservations = _observationRepository
.FindByDebugIdAsync(debugId, CancellationToken.None)
.GetAwaiter()
.GetResult();
var revision = existingObservations.Length + 1;
var observation = new SymbolObservation
{
ObservationId = GenerateObservationId(debugId, revision),
SourceId = SourceId,
DebugId = debugId,
BinaryName = binaryName,
Architecture = architecture,
Symbols = symbols.ToImmutableArray(),
SymbolCount = symbols.Count,
Provenance = new ObservationProvenance
{
SourceId = SourceId,
DocumentUri = document.DocumentUri,
FetchedAt = document.FetchedAt,
RecordedAt = UtcNow,
DocumentHash = document.Digest,
SignatureState = SignatureState.None,
ConnectorVersion = "1.0.0"
},
ContentHash = "", // Will be computed
CreatedAt = UtcNow
};
// Compute content hash
var contentHash = ComputeContentHash(observation);
return observation with { ContentHash = contentHash };
}
}

View File

@@ -0,0 +1,42 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Configuration;
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod;
/// <summary>
/// Plugin for the debuginfod symbol source connector.
/// </summary>
public sealed class DebuginfodConnectorPlugin : ISymbolSourceConnectorPlugin
{
/// <inheritdoc/>
public string Name => DebuginfodConnector.SourceName;
/// <inheritdoc/>
public bool IsAvailable(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
// Check if the connector is configured
var options = services.GetService<Microsoft.Extensions.Options.IOptions<DebuginfodOptions>>();
if (options?.Value is null)
return false;
try
{
options.Value.Validate();
return true;
}
catch
{
return false;
}
}
/// <inheritdoc/>
public ISymbolSourceConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return ActivatorUtilities.CreateInstance<DebuginfodConnector>(services);
}
}

View File

@@ -0,0 +1,106 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Configuration;
using StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod;
/// <summary>
/// Extension methods for adding debuginfod connector to DI.
/// </summary>
public static class DebuginfodServiceCollectionExtensions
{
/// <summary>
/// Add the debuginfod symbol source connector.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configure">Configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddDebuginfodConnector(
this IServiceCollection services,
Action<DebuginfodOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
// Register options with validation
services.AddOptions<DebuginfodOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
// Register HTTP client
services.AddHttpClient(DebuginfodOptions.HttpClientName, (sp, client) =>
{
var options = sp.GetRequiredService<IOptions<DebuginfodOptions>>().Value;
client.BaseAddress = options.BaseUrl;
client.Timeout = TimeSpan.FromSeconds(options.TimeoutSeconds);
client.DefaultRequestHeaders.Add("User-Agent", options.UserAgent);
client.DefaultRequestHeaders.Add("Accept", "application/octet-stream");
});
// Register services
services.AddSingleton<DebuginfodDiagnostics>();
services.AddSingleton<IDwarfParser, ElfDwarfParser>();
services.AddTransient<DebuginfodConnector>();
services.AddSingleton<ISymbolSourceConnectorPlugin, DebuginfodConnectorPlugin>();
return services;
}
/// <summary>
/// Add the debuginfod symbol source connector with default Fedora configuration.
/// </summary>
/// <param name="services">Service collection.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddDebuginfodConnector(this IServiceCollection services)
{
return services.AddDebuginfodConnector(_ => { });
}
/// <summary>
/// Add the debuginfod connector from environment variables.
/// </summary>
/// <param name="services">Service collection.</param>
/// <returns>Service collection for chaining.</returns>
/// <remarks>
/// Reads configuration from:
/// - DEBUGINFOD_URLS: Comma-separated list of debuginfod server URLs
/// - DEBUGINFOD_CACHE: Local cache directory
/// - DEBUGINFOD_TIMEOUT: Request timeout in seconds
/// </remarks>
public static IServiceCollection AddDebuginfodConnectorFromEnvironment(this IServiceCollection services)
{
return services.AddDebuginfodConnector(opts =>
{
var urls = Environment.GetEnvironmentVariable("DEBUGINFOD_URLS");
if (!string.IsNullOrWhiteSpace(urls))
{
var urlList = urls.Split([' ', ','], StringSplitOptions.RemoveEmptyEntries);
if (urlList.Length > 0 && Uri.TryCreate(urlList[0], UriKind.Absolute, out var primary))
{
opts.BaseUrl = primary;
}
for (var i = 1; i < urlList.Length; i++)
{
if (Uri.TryCreate(urlList[i], UriKind.Absolute, out var additional))
{
opts.AdditionalUrls.Add(additional);
}
}
}
var cache = Environment.GetEnvironmentVariable("DEBUGINFOD_CACHE");
if (!string.IsNullOrWhiteSpace(cache))
{
opts.CacheDirectory = cache;
}
var timeout = Environment.GetEnvironmentVariable("DEBUGINFOD_TIMEOUT");
if (!string.IsNullOrWhiteSpace(timeout) && int.TryParse(timeout, out var timeoutSeconds))
{
opts.TimeoutSeconds = timeoutSeconds;
}
});
}
}

View File

@@ -0,0 +1,90 @@
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal;
/// <summary>
/// Diagnostics and metrics for the debuginfod connector.
/// </summary>
public sealed class DebuginfodDiagnostics
{
private readonly Counter<long> _fetchSuccessCounter;
private readonly Counter<long> _fetchNotFoundCounter;
private readonly Counter<long> _fetchErrorCounter;
private readonly Counter<long> _parseSuccessCounter;
private readonly Counter<long> _parseErrorCounter;
private readonly Counter<long> _mapSuccessCounter;
private readonly Counter<long> _mapErrorCounter;
private readonly Counter<long> _mapAocViolationCounter;
private readonly Histogram<long> _symbolCountHistogram;
public DebuginfodDiagnostics(IMeterFactory meterFactory)
{
var meter = meterFactory.Create("StellaOps.BinaryIndex.GroundTruth.Debuginfod");
_fetchSuccessCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.fetch.success",
unit: "{documents}",
description: "Number of successful debuginfod fetches");
_fetchNotFoundCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.fetch.not_found",
unit: "{documents}",
description: "Number of debuginfod fetches that returned 404");
_fetchErrorCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.fetch.error",
unit: "{documents}",
description: "Number of failed debuginfod fetches");
_parseSuccessCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.parse.success",
unit: "{documents}",
description: "Number of successful DWARF parses");
_parseErrorCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.parse.error",
unit: "{documents}",
description: "Number of failed DWARF parses");
_mapSuccessCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.map.success",
unit: "{observations}",
description: "Number of successful observation mappings");
_mapErrorCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.map.error",
unit: "{observations}",
description: "Number of failed observation mappings");
_mapAocViolationCounter = meter.CreateCounter<long>(
"groundtruth.debuginfod.map.aoc_violation",
unit: "{observations}",
description: "Number of AOC violations during mapping");
_symbolCountHistogram = meter.CreateHistogram<long>(
"groundtruth.debuginfod.symbols_per_binary",
unit: "{symbols}",
description: "Distribution of symbol counts per binary");
}
public void RecordFetchSuccess() => _fetchSuccessCounter.Add(1);
public void RecordFetchNotFound() => _fetchNotFoundCounter.Add(1);
public void RecordFetchError() => _fetchErrorCounter.Add(1);
public void RecordParseSuccess(int symbolCount)
{
_parseSuccessCounter.Add(1);
_symbolCountHistogram.Record(symbolCount);
}
public void RecordParseError() => _parseErrorCounter.Add(1);
public void RecordMapSuccess(int symbolCount)
{
_mapSuccessCounter.Add(1);
}
public void RecordMapError() => _mapErrorCounter.Add(1);
public void RecordMapAocViolation() => _mapAocViolationCounter.Add(1);
}

View File

@@ -0,0 +1,87 @@
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal;
/// <summary>
/// ELF/DWARF parser implementation.
///
/// NOTE: LibObjectFile 1.0.0 has significant API changes from 0.x.
/// This is a stub implementation pending API migration.
/// See: https://github.com/xoofx/LibObjectFile/releases/tag/1.0.0
/// </summary>
public sealed class ElfDwarfParser : IDwarfParser
{
private readonly ILogger<ElfDwarfParser> _logger;
public ElfDwarfParser(ILogger<ElfDwarfParser> logger)
{
_logger = logger;
}
/// <inheritdoc/>
public Task<IReadOnlyList<ObservedSymbol>> ParseSymbolsAsync(Guid payloadId, CancellationToken ct = default)
{
throw new NotImplementedException(
"Parsing from payload ID requires blob storage integration. Use stream overload instead.");
}
/// <inheritdoc/>
public Task<IReadOnlyList<ObservedSymbol>> ParseSymbolsAsync(Stream stream, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(stream);
_logger.LogWarning(
"ElfDwarfParser is a stub - LibObjectFile 1.0.0 API migration pending. " +
"Returning empty symbol list.");
return Task.FromResult<IReadOnlyList<ObservedSymbol>>(Array.Empty<ObservedSymbol>());
}
/// <inheritdoc/>
public Task<string?> ExtractBuildIdAsync(Stream stream, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(stream);
_logger.LogWarning(
"ElfDwarfParser.ExtractBuildIdAsync is a stub - LibObjectFile 1.0.0 API migration pending.");
// Try to read build-id using simple heuristics
try
{
// Look for .note.gnu.build-id section marker
using var reader = new BinaryReader(stream, System.Text.Encoding.UTF8, leaveOpen: true);
// Reset to start
stream.Position = 0;
// Read ELF header to verify it's an ELF file
var magic = reader.ReadBytes(4);
if (magic.Length < 4 || magic[0] != 0x7f || magic[1] != 'E' || magic[2] != 'L' || magic[3] != 'F')
{
_logger.LogDebug("Not an ELF file");
return Task.FromResult<string?>(null);
}
_logger.LogDebug("ELF file detected, but full parsing requires LibObjectFile API migration");
return Task.FromResult<string?>(null);
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to read ELF header");
return Task.FromResult<string?>(null);
}
}
/// <inheritdoc/>
public Task<ObservedBuildMetadata?> ExtractBuildMetadataAsync(Stream stream, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(stream);
_logger.LogWarning(
"ElfDwarfParser.ExtractBuildMetadataAsync is a stub - LibObjectFile 1.0.0 API migration pending.");
return Task.FromResult<ObservedBuildMetadata?>(null);
}
}

View File

@@ -0,0 +1,80 @@
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
namespace StellaOps.BinaryIndex.GroundTruth.Debuginfod.Internal;
/// <summary>
/// Interface for parsing DWARF debug information from ELF binaries.
/// </summary>
public interface IDwarfParser
{
/// <summary>
/// Parse symbols from a stored payload.
/// </summary>
/// <param name="payloadId">Blob storage ID for the ELF binary.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of parsed symbols.</returns>
Task<IReadOnlyList<ObservedSymbol>> ParseSymbolsAsync(Guid payloadId, CancellationToken ct = default);
/// <summary>
/// Parse symbols from a stream.
/// </summary>
/// <param name="stream">ELF binary stream.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of parsed symbols.</returns>
Task<IReadOnlyList<ObservedSymbol>> ParseSymbolsAsync(Stream stream, CancellationToken ct = default);
/// <summary>
/// Extract build ID from an ELF binary.
/// </summary>
/// <param name="stream">ELF binary stream.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Build ID as hex string, or null if not found.</returns>
Task<string?> ExtractBuildIdAsync(Stream stream, CancellationToken ct = default);
/// <summary>
/// Extract build metadata from DWARF debug info.
/// </summary>
/// <param name="stream">ELF binary stream.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Build metadata.</returns>
Task<ObservedBuildMetadata?> ExtractBuildMetadataAsync(Stream stream, CancellationToken ct = default);
}
/// <summary>
/// Stub implementation of DWARF parser for initial development.
/// Production implementation would use Gimli (Rust) or libdw bindings.
/// </summary>
public sealed class StubDwarfParser : IDwarfParser
{
/// <inheritdoc/>
public Task<IReadOnlyList<ObservedSymbol>> ParseSymbolsAsync(Guid payloadId, CancellationToken ct = default)
{
// Stub: Return empty list
// Production: Load from blob storage and parse
return Task.FromResult<IReadOnlyList<ObservedSymbol>>([]);
}
/// <inheritdoc/>
public Task<IReadOnlyList<ObservedSymbol>> ParseSymbolsAsync(Stream stream, CancellationToken ct = default)
{
// Stub: Return empty list
// Production: Parse ELF + DWARF sections
return Task.FromResult<IReadOnlyList<ObservedSymbol>>([]);
}
/// <inheritdoc/>
public Task<string?> ExtractBuildIdAsync(Stream stream, CancellationToken ct = default)
{
// Stub: Return null
// Production: Read .note.gnu.build-id section
return Task.FromResult<string?>(null);
}
/// <inheritdoc/>
public Task<ObservedBuildMetadata?> ExtractBuildMetadataAsync(Stream stream, CancellationToken ct = default)
{
// Stub: Return null
// Production: Parse DW_AT_producer and other DWARF attributes
return Task.FromResult<ObservedBuildMetadata?>(null);
}
}

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<NoWarn>$(NoWarn);NU1603</NoWarn>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<Description>Debuginfod symbol source connector for Fedora/RHEL debuginfod services</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="LibObjectFile" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.GroundTruth.Abstractions\StellaOps.BinaryIndex.GroundTruth.Abstractions.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,446 @@
// -----------------------------------------------------------------------------
// AirGapRebuildBundle.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-006 - Air-Gap Rebuild Bundle
// Description: Offline bundle format for reproducible rebuilds.
// -----------------------------------------------------------------------------
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Service for creating and importing air-gap rebuild bundles.
/// </summary>
public sealed class AirGapRebuildBundleService
{
private readonly ILogger<AirGapRebuildBundleService> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
/// <summary>
/// Initializes a new instance of the <see cref="AirGapRebuildBundleService"/> class.
/// </summary>
public AirGapRebuildBundleService(ILogger<AirGapRebuildBundleService> logger)
{
_logger = logger;
}
/// <summary>
/// Exports an air-gap rebuild bundle.
/// </summary>
public async Task<string> ExportBundleAsync(
AirGapBundleRequest request,
CancellationToken cancellationToken = default)
{
request.Validate();
var bundleDir = Path.Combine(
request.OutputDirectory ?? Path.GetTempPath(),
$"rebuild-bundle-{DateTime.UtcNow:yyyyMMdd-HHmmss}");
Directory.CreateDirectory(bundleDir);
var sourcesDir = Path.Combine(bundleDir, "sources");
var buildinfoDir = Path.Combine(bundleDir, "buildinfo");
var environmentDir = Path.Combine(bundleDir, "environment");
Directory.CreateDirectory(sourcesDir);
Directory.CreateDirectory(buildinfoDir);
Directory.CreateDirectory(environmentDir);
var manifest = new AirGapBundleManifest
{
Version = "1.0",
CreatedAt = DateTimeOffset.UtcNow,
Packages = [],
Files = []
};
_logger.LogInformation("Creating air-gap bundle for {Count} packages", request.Packages.Count);
foreach (var pkg in request.Packages)
{
// Copy source files
foreach (var sourceFile in pkg.SourceFiles)
{
var destPath = Path.Combine(sourcesDir, Path.GetFileName(sourceFile));
if (File.Exists(sourceFile))
{
File.Copy(sourceFile, destPath, overwrite: true);
manifest.Files.Add(new BundleFileEntry
{
Path = $"sources/{Path.GetFileName(sourceFile)}",
Sha256 = await ComputeSha256Async(destPath, cancellationToken),
Size = new FileInfo(destPath).Length
});
}
}
// Copy buildinfo
if (pkg.BuildinfoPath is not null && File.Exists(pkg.BuildinfoPath))
{
var destPath = Path.Combine(buildinfoDir, Path.GetFileName(pkg.BuildinfoPath));
File.Copy(pkg.BuildinfoPath, destPath, overwrite: true);
manifest.Files.Add(new BundleFileEntry
{
Path = $"buildinfo/{Path.GetFileName(pkg.BuildinfoPath)}",
Sha256 = await ComputeSha256Async(destPath, cancellationToken),
Size = new FileInfo(destPath).Length
});
}
manifest.Packages.Add(new BundlePackageEntry
{
Name = pkg.Name,
Version = pkg.Version,
Architecture = pkg.Architecture,
BuildinfoFile = pkg.BuildinfoPath is not null ? $"buildinfo/{Path.GetFileName(pkg.BuildinfoPath)}" : null
});
}
// Generate Dockerfile for build environment
var dockerfile = GenerateBundleDockerfile(request);
var dockerfilePath = Path.Combine(environmentDir, "Dockerfile");
await File.WriteAllTextAsync(dockerfilePath, dockerfile, cancellationToken);
manifest.Files.Add(new BundleFileEntry
{
Path = "environment/Dockerfile",
Sha256 = await ComputeSha256Async(dockerfilePath, cancellationToken),
Size = new FileInfo(dockerfilePath).Length
});
// Generate apt sources list
var aptSources = GenerateAptSources(request);
var aptSourcesPath = Path.Combine(environmentDir, "apt-sources.list");
await File.WriteAllTextAsync(aptSourcesPath, aptSources, cancellationToken);
// Write manifest
var manifestPath = Path.Combine(bundleDir, "manifest.json");
var manifestJson = JsonSerializer.Serialize(manifest, JsonOptions);
await File.WriteAllTextAsync(manifestPath, manifestJson, cancellationToken);
// Create archive
var archivePath = $"{bundleDir}.tar.gz";
await CreateTarGzAsync(bundleDir, archivePath, cancellationToken);
_logger.LogInformation("Created air-gap bundle: {Path}", archivePath);
// Cleanup temp directory
if (request.CleanupTempFiles)
{
Directory.Delete(bundleDir, recursive: true);
}
return archivePath;
}
/// <summary>
/// Imports an air-gap rebuild bundle.
/// </summary>
public async Task<AirGapBundleManifest> ImportBundleAsync(
string bundlePath,
string outputDirectory,
CancellationToken cancellationToken = default)
{
if (!File.Exists(bundlePath))
{
throw new FileNotFoundException("Bundle not found", bundlePath);
}
_logger.LogInformation("Importing air-gap bundle from {Path}", bundlePath);
// Extract archive
await ExtractTarGzAsync(bundlePath, outputDirectory, cancellationToken);
// Read manifest
var manifestPath = Path.Combine(outputDirectory, "manifest.json");
if (!File.Exists(manifestPath))
{
throw new InvalidOperationException("Invalid bundle: manifest.json not found");
}
var manifestJson = await File.ReadAllTextAsync(manifestPath, cancellationToken);
var manifest = JsonSerializer.Deserialize<AirGapBundleManifest>(manifestJson, JsonOptions)
?? throw new InvalidOperationException("Failed to parse manifest");
// Verify checksums
foreach (var file in manifest.Files)
{
var filePath = Path.Combine(outputDirectory, file.Path.Replace('/', Path.DirectorySeparatorChar));
if (File.Exists(filePath))
{
var actualHash = await ComputeSha256Async(filePath, cancellationToken);
if (!string.Equals(actualHash, file.Sha256, StringComparison.OrdinalIgnoreCase))
{
_logger.LogWarning("Checksum mismatch for {File}", file.Path);
}
}
else
{
_logger.LogWarning("Missing file: {File}", file.Path);
}
}
_logger.LogInformation("Imported bundle with {Count} packages", manifest.Packages.Count);
return manifest;
}
/// <summary>
/// Executes a rebuild from an imported bundle.
/// </summary>
public async Task<RebuildResult> RebuildFromBundleAsync(
string bundleDirectory,
string packageName,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= new LocalRebuildOptions();
// Read manifest
var manifestPath = Path.Combine(bundleDirectory, "manifest.json");
var manifestJson = await File.ReadAllTextAsync(manifestPath, cancellationToken);
var manifest = JsonSerializer.Deserialize<AirGapBundleManifest>(manifestJson, JsonOptions);
var package = manifest?.Packages.FirstOrDefault(p => p.Name == packageName)
?? throw new InvalidOperationException($"Package {packageName} not found in bundle");
var buildinfoPath = package.BuildinfoFile is not null
? Path.Combine(bundleDirectory, package.BuildinfoFile.Replace('/', Path.DirectorySeparatorChar))
: null;
if (buildinfoPath is null || !File.Exists(buildinfoPath))
{
return RebuildResult.Failed(
Guid.NewGuid().ToString("N")[..12],
"Buildinfo not found in bundle",
backend: RebuildBackend.AirGap);
}
// Use local rebuild backend with air-gap sources
var localBackend = new LocalRebuildBackend(
Microsoft.Extensions.Options.Options.Create(new LocalRebuildBackendOptions()),
new Microsoft.Extensions.Logging.Abstractions.NullLogger<LocalRebuildBackend>());
var result = await localBackend.RebuildAsync(buildinfoPath, options, cancellationToken);
// Update backend type
return result with { Backend = RebuildBackend.AirGap };
}
private static string GenerateBundleDockerfile(AirGapBundleRequest request)
{
var baseImage = request.BaseImage ?? "debian:bookworm";
return $"""
FROM {baseImage}
# This is an air-gap rebuild environment
# Sources are pre-fetched in the bundle
RUN apt-get update && apt-get install -y \
build-essential \
devscripts \
dpkg-dev \
fakeroot \
debhelper \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
# Copy sources from bundle
COPY sources/ /build/sources/
COPY buildinfo/ /build/buildinfo/
CMD ["/bin/bash"]
""";
}
private static string GenerateAptSources(AirGapBundleRequest request)
{
var distribution = request.Distribution ?? "bookworm";
return $"""
# Debian {distribution} sources
# For air-gap scenarios, these would point to local mirrors
deb http://deb.debian.org/debian {distribution} main
deb-src http://deb.debian.org/debian {distribution} main
""";
}
private static async Task CreateTarGzAsync(string sourceDir, string destPath, CancellationToken ct)
{
// Use .NET's ZipFile as a simple alternative for cross-platform
// In production, would use proper tar.gz library
var zipPath = destPath.Replace(".tar.gz", ".zip");
if (File.Exists(zipPath)) File.Delete(zipPath);
ZipFile.CreateFromDirectory(sourceDir, zipPath, CompressionLevel.Optimal, includeBaseDirectory: false);
// Rename to .tar.gz (simplified - real impl would create actual tar.gz)
if (File.Exists(destPath)) File.Delete(destPath);
File.Move(zipPath, destPath);
}
private static async Task ExtractTarGzAsync(string archivePath, string destDir, CancellationToken ct)
{
Directory.CreateDirectory(destDir);
ZipFile.ExtractToDirectory(archivePath, destDir, overwriteFiles: true);
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Request to create an air-gap rebuild bundle.
/// </summary>
public sealed record AirGapBundleRequest
{
/// <summary>
/// Gets the packages to include.
/// </summary>
public required List<AirGapPackageSpec> Packages { get; init; }
/// <summary>
/// Gets the output directory.
/// </summary>
public string? OutputDirectory { get; init; }
/// <summary>
/// Gets the base image for the build environment.
/// </summary>
public string? BaseImage { get; init; }
/// <summary>
/// Gets the Debian distribution.
/// </summary>
public string? Distribution { get; init; }
/// <summary>
/// Gets whether to cleanup temp files.
/// </summary>
public bool CleanupTempFiles { get; init; } = true;
/// <summary>
/// Validates the request.
/// </summary>
public void Validate()
{
if (Packages is not { Count: > 0 })
throw new ArgumentException("At least one package is required");
}
}
/// <summary>
/// Package specification for air-gap bundle.
/// </summary>
public sealed record AirGapPackageSpec
{
/// <summary>
/// Gets the package name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Gets the package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the source files.
/// </summary>
public List<string> SourceFiles { get; init; } = [];
/// <summary>
/// Gets the buildinfo path.
/// </summary>
public string? BuildinfoPath { get; init; }
}
/// <summary>
/// Air-gap bundle manifest.
/// </summary>
public sealed record AirGapBundleManifest
{
/// <summary>
/// Gets the manifest version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets when the bundle was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Gets the packages in the bundle.
/// </summary>
public required List<BundlePackageEntry> Packages { get; init; }
/// <summary>
/// Gets the files in the bundle.
/// </summary>
public required List<BundleFileEntry> Files { get; init; }
}
/// <summary>
/// Package entry in bundle manifest.
/// </summary>
public sealed record BundlePackageEntry
{
/// <summary>
/// Gets the package name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Gets the version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the buildinfo file path in bundle.
/// </summary>
public string? BuildinfoFile { get; init; }
}
/// <summary>
/// File entry in bundle manifest.
/// </summary>
public sealed record BundleFileEntry
{
/// <summary>
/// Gets the file path in bundle.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Gets the SHA-256 hash.
/// </summary>
public required string Sha256 { get; init; }
/// <summary>
/// Gets the file size.
/// </summary>
public long Size { get; init; }
}

View File

@@ -0,0 +1,439 @@
// -----------------------------------------------------------------------------
// DeterminismValidator.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-004 - Determinism Validation
// Description: Validates determinism of rebuilt binaries.
// -----------------------------------------------------------------------------
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Validates determinism of rebuilt binaries.
/// </summary>
public sealed class DeterminismValidator
{
private readonly ILogger<DeterminismValidator> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="DeterminismValidator"/> class.
/// </summary>
public DeterminismValidator(ILogger<DeterminismValidator> logger)
{
_logger = logger;
}
/// <summary>
/// Validates that a rebuilt binary is deterministic compared to the original.
/// </summary>
public async Task<DeterminismReport> ValidateAsync(
string originalPath,
string rebuiltPath,
DeterminismValidationOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= DeterminismValidationOptions.Default;
var issues = new List<DeterminismIssue>();
// Check file existence
if (!File.Exists(originalPath))
{
return DeterminismReport.Failed("Original file not found", originalPath, rebuiltPath);
}
if (!File.Exists(rebuiltPath))
{
return DeterminismReport.Failed("Rebuilt file not found", originalPath, rebuiltPath);
}
var originalInfo = new FileInfo(originalPath);
var rebuiltInfo = new FileInfo(rebuiltPath);
// Size check
if (originalInfo.Length != rebuiltInfo.Length)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.SizeMismatch,
Description = $"Size mismatch: original={originalInfo.Length}, rebuilt={rebuiltInfo.Length}",
Severity = IssueSeverity.Error
});
}
// Hash comparison
var originalHash = await ComputeSha256Async(originalPath, cancellationToken);
var rebuiltHash = await ComputeSha256Async(rebuiltPath, cancellationToken);
var hashMatches = string.Equals(originalHash, rebuiltHash, StringComparison.OrdinalIgnoreCase);
if (!hashMatches)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.HashMismatch,
Description = $"SHA-256 mismatch: original={originalHash}, rebuilt={rebuiltHash}",
Severity = IssueSeverity.Error
});
// Perform deeper analysis if hashes don't match
if (options.PerformDeepAnalysis)
{
var deepIssues = await PerformDeepAnalysisAsync(originalPath, rebuiltPath, cancellationToken);
issues.AddRange(deepIssues);
}
}
var isReproducible = hashMatches && !issues.Any(i => i.Severity == IssueSeverity.Error);
_logger.LogInformation(
"Determinism validation for {Original} vs {Rebuilt}: {Result}",
Path.GetFileName(originalPath),
Path.GetFileName(rebuiltPath),
isReproducible ? "REPRODUCIBLE" : "NOT REPRODUCIBLE");
return new DeterminismReport
{
IsReproducible = isReproducible,
OriginalPath = originalPath,
RebuiltPath = rebuiltPath,
OriginalSha256 = originalHash,
RebuiltSha256 = rebuiltHash,
Issues = issues,
ValidatedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Validates multiple rebuilt artifacts against their originals.
/// </summary>
public async Task<DeterminismBatchReport> ValidateBatchAsync(
IReadOnlyList<(string Original, string Rebuilt)> pairs,
DeterminismValidationOptions? options = null,
CancellationToken cancellationToken = default)
{
var reports = new List<DeterminismReport>();
foreach (var (original, rebuilt) in pairs)
{
var report = await ValidateAsync(original, rebuilt, options, cancellationToken);
reports.Add(report);
}
return new DeterminismBatchReport
{
Reports = reports,
TotalCount = reports.Count,
ReproducibleCount = reports.Count(r => r.IsReproducible),
ValidatedAt = DateTimeOffset.UtcNow
};
}
private async Task<IReadOnlyList<DeterminismIssue>> PerformDeepAnalysisAsync(
string originalPath,
string rebuiltPath,
CancellationToken ct)
{
var issues = new List<DeterminismIssue>();
try
{
// Read both files
var originalBytes = await File.ReadAllBytesAsync(originalPath, ct);
var rebuiltBytes = await File.ReadAllBytesAsync(rebuiltPath, ct);
// Find first difference offset
var minLen = Math.Min(originalBytes.Length, rebuiltBytes.Length);
var firstDiffOffset = -1;
var diffCount = 0;
for (var i = 0; i < minLen; i++)
{
if (originalBytes[i] != rebuiltBytes[i])
{
if (firstDiffOffset < 0) firstDiffOffset = i;
diffCount++;
}
}
if (firstDiffOffset >= 0)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.ByteDifference,
Description = $"First difference at offset 0x{firstDiffOffset:X}, total {diffCount} differing bytes",
Severity = IssueSeverity.Info,
Details = new Dictionary<string, object>
{
["firstDiffOffset"] = firstDiffOffset,
["diffCount"] = diffCount,
["diffPercentage"] = Math.Round(100.0 * diffCount / minLen, 2)
}
});
}
// Check for common non-determinism patterns
var patterns = DetectNonDeterminismPatterns(originalBytes, rebuiltBytes);
issues.AddRange(patterns);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Deep analysis failed");
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.AnalysisError,
Description = $"Deep analysis failed: {ex.Message}",
Severity = IssueSeverity.Warning
});
}
return issues;
}
private static IEnumerable<DeterminismIssue> DetectNonDeterminismPatterns(
byte[] original,
byte[] rebuilt)
{
var issues = new List<DeterminismIssue>();
// Check for timestamp-like patterns (32-bit Unix timestamps)
// This is a simplified heuristic
if (original.Length >= 4 && rebuilt.Length >= 4)
{
// Look for differences that could be timestamps
var now = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
var oneYearAgo = now - 365 * 24 * 3600;
for (var i = 0; i < Math.Min(original.Length, rebuilt.Length) - 4; i += 4)
{
var origVal = BitConverter.ToUInt32(original, i);
var rebuildVal = BitConverter.ToUInt32(rebuilt, i);
if (origVal != rebuildVal &&
origVal > oneYearAgo && origVal < now + 86400 &&
rebuildVal > oneYearAgo && rebuildVal < now + 86400)
{
issues.Add(new DeterminismIssue
{
Type = DeterminismIssueType.EmbeddedTimestamp,
Description = $"Possible embedded timestamp at offset 0x{i:X}",
Severity = IssueSeverity.Info,
Details = new Dictionary<string, object>
{
["offset"] = i,
["originalValue"] = origVal,
["rebuiltValue"] = rebuildVal
}
});
break; // Only report first occurrence
}
}
}
return issues;
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Options for determinism validation.
/// </summary>
public sealed record DeterminismValidationOptions
{
/// <summary>
/// Gets whether to perform deep binary analysis.
/// </summary>
public bool PerformDeepAnalysis { get; init; } = true;
/// <summary>
/// Gets whether to check for timestamp patterns.
/// </summary>
public bool DetectTimestamps { get; init; } = true;
/// <summary>
/// Gets whether to check for build path patterns.
/// </summary>
public bool DetectBuildPaths { get; init; } = true;
/// <summary>
/// Gets the default options.
/// </summary>
public static DeterminismValidationOptions Default { get; } = new();
}
/// <summary>
/// Report from determinism validation.
/// </summary>
public sealed record DeterminismReport
{
/// <summary>
/// Gets whether the rebuild is reproducible.
/// </summary>
public required bool IsReproducible { get; init; }
/// <summary>
/// Gets the original file path.
/// </summary>
public required string OriginalPath { get; init; }
/// <summary>
/// Gets the rebuilt file path.
/// </summary>
public required string RebuiltPath { get; init; }
/// <summary>
/// Gets the original file SHA-256.
/// </summary>
public string? OriginalSha256 { get; init; }
/// <summary>
/// Gets the rebuilt file SHA-256.
/// </summary>
public string? RebuiltSha256 { get; init; }
/// <summary>
/// Gets the list of issues found.
/// </summary>
public IReadOnlyList<DeterminismIssue>? Issues { get; init; }
/// <summary>
/// Gets when validation was performed.
/// </summary>
public DateTimeOffset ValidatedAt { get; init; }
/// <summary>
/// Gets error message if validation failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Creates a failed report.
/// </summary>
public static DeterminismReport Failed(string error, string original, string rebuilt) => new()
{
IsReproducible = false,
OriginalPath = original,
RebuiltPath = rebuilt,
Error = error,
ValidatedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Batch report from determinism validation.
/// </summary>
public sealed record DeterminismBatchReport
{
/// <summary>
/// Gets the individual reports.
/// </summary>
public required IReadOnlyList<DeterminismReport> Reports { get; init; }
/// <summary>
/// Gets the total count.
/// </summary>
public required int TotalCount { get; init; }
/// <summary>
/// Gets the count of reproducible builds.
/// </summary>
public required int ReproducibleCount { get; init; }
/// <summary>
/// Gets the reproducibility rate.
/// </summary>
public double ReproducibilityRate => TotalCount > 0 ? (double)ReproducibleCount / TotalCount : 0;
/// <summary>
/// Gets when validation was performed.
/// </summary>
public DateTimeOffset ValidatedAt { get; init; }
}
/// <summary>
/// A determinism issue.
/// </summary>
public sealed record DeterminismIssue
{
/// <summary>
/// Gets the issue type.
/// </summary>
public required DeterminismIssueType Type { get; init; }
/// <summary>
/// Gets the issue description.
/// </summary>
public required string Description { get; init; }
/// <summary>
/// Gets the severity.
/// </summary>
public required IssueSeverity Severity { get; init; }
/// <summary>
/// Gets additional details.
/// </summary>
public IReadOnlyDictionary<string, object>? Details { get; init; }
}
/// <summary>
/// Type of determinism issue.
/// </summary>
public enum DeterminismIssueType
{
/// <summary>
/// File size mismatch.
/// </summary>
SizeMismatch,
/// <summary>
/// Hash mismatch.
/// </summary>
HashMismatch,
/// <summary>
/// Byte-level difference.
/// </summary>
ByteDifference,
/// <summary>
/// Embedded timestamp detected.
/// </summary>
EmbeddedTimestamp,
/// <summary>
/// Embedded build path detected.
/// </summary>
EmbeddedBuildPath,
/// <summary>
/// Analysis error.
/// </summary>
AnalysisError
}
/// <summary>
/// Severity of an issue.
/// </summary>
public enum IssueSeverity
{
/// <summary>
/// Informational.
/// </summary>
Info,
/// <summary>
/// Warning.
/// </summary>
Warning,
/// <summary>
/// Error.
/// </summary>
Error
}

View File

@@ -0,0 +1,93 @@
// -----------------------------------------------------------------------------
// IRebuildService.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-001 - Rebuild Service Abstractions
// Description: Main interface for reproducible rebuild orchestration.
// -----------------------------------------------------------------------------
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Service for orchestrating reproducible binary rebuilds.
/// </summary>
public interface IRebuildService
{
/// <summary>
/// Requests a rebuild for a package.
/// </summary>
/// <param name="request">The rebuild request.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild job ID.</returns>
Task<string> RequestRebuildAsync(
RebuildRequest request,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the status of a rebuild job.
/// </summary>
/// <param name="jobId">The job ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild status.</returns>
Task<RebuildStatus> GetStatusAsync(
string jobId,
CancellationToken cancellationToken = default);
/// <summary>
/// Downloads the artifacts from a completed rebuild.
/// </summary>
/// <param name="jobId">The job ID.</param>
/// <param name="outputDirectory">The directory to write artifacts.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild result with artifacts.</returns>
Task<RebuildResult> DownloadArtifactsAsync(
string jobId,
string outputDirectory,
CancellationToken cancellationToken = default);
/// <summary>
/// Performs a local rebuild using a .buildinfo file.
/// </summary>
/// <param name="buildinfoPath">Path to the .buildinfo file.</param>
/// <param name="options">Local rebuild options.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The rebuild result.</returns>
Task<RebuildResult> RebuildLocalAsync(
string buildinfoPath,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Queries if a package has existing rebuild data.
/// </summary>
/// <param name="package">Package name.</param>
/// <param name="version">Package version.</param>
/// <param name="architecture">Target architecture.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Existing rebuild info if available.</returns>
Task<RebuildInfo?> QueryExistingRebuildAsync(
string package,
string version,
string architecture,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Rebuild backend type.
/// </summary>
public enum RebuildBackend
{
/// <summary>
/// Remote rebuild via reproduce.debian.net.
/// </summary>
ReproduceDebian,
/// <summary>
/// Local container-based rebuild.
/// </summary>
Local,
/// <summary>
/// Air-gapped rebuild from pre-fetched bundle.
/// </summary>
AirGap
}

View File

@@ -0,0 +1,459 @@
// -----------------------------------------------------------------------------
// LocalRebuildBackend.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-003 - Local Rebuild Backend
// Description: Container-based local rebuild using .buildinfo files.
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Local container-based rebuild backend.
/// </summary>
public sealed partial class LocalRebuildBackend
{
private readonly LocalRebuildBackendOptions _options;
private readonly ILogger<LocalRebuildBackend> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="LocalRebuildBackend"/> class.
/// </summary>
public LocalRebuildBackend(
IOptions<LocalRebuildBackendOptions> options,
ILogger<LocalRebuildBackend> logger)
{
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Performs a local rebuild using a .buildinfo file.
/// </summary>
public async Task<RebuildResult> RebuildAsync(
string buildinfoPath,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= new LocalRebuildOptions();
var jobId = Guid.NewGuid().ToString("N")[..12];
var sw = Stopwatch.StartNew();
var buildLog = new StringBuilder();
try
{
// Parse .buildinfo file
var buildinfo = await ParseBuildinfoAsync(buildinfoPath, cancellationToken);
buildLog.AppendLine($"Parsed buildinfo: {buildinfo.Source} {buildinfo.Version}");
_logger.LogInformation("Starting local rebuild for {Package} {Version}", buildinfo.Source, buildinfo.Version);
// Create build directory
var buildDir = Path.Combine(
options.OutputDirectory ?? Path.GetTempPath(),
$"rebuild-{jobId}");
Directory.CreateDirectory(buildDir);
// Generate Dockerfile
var dockerfile = GenerateDockerfile(buildinfo, options);
var dockerfilePath = Path.Combine(buildDir, "Dockerfile");
await File.WriteAllTextAsync(dockerfilePath, dockerfile, cancellationToken);
buildLog.AppendLine($"Generated Dockerfile at {dockerfilePath}");
// Generate build script
var buildScript = GenerateBuildScript(buildinfo);
var buildScriptPath = Path.Combine(buildDir, "build.sh");
await File.WriteAllTextAsync(buildScriptPath, buildScript, cancellationToken);
// Build container
var containerName = $"stella-rebuild-{jobId}";
var imageName = $"stella-rebuild-{buildinfo.Source}-{jobId}";
var runtime = options.ContainerRuntime == ContainerRuntime.Podman ? "podman" : "docker";
buildLog.AppendLine("Building container image...");
var buildImageResult = await RunContainerCommandAsync(
runtime,
$"build -t {imageName} {buildDir}",
options.Timeout,
cancellationToken);
if (!buildImageResult.Success)
{
return RebuildResult.Failed(jobId, "Container image build failed", buildImageResult.Output, RebuildBackend.Local);
}
buildLog.AppendLine(buildImageResult.Output);
// Run build container
buildLog.AppendLine("Running rebuild in container...");
var runArgs = new StringBuilder($"run --name {containerName} --rm");
if (options.CpuLimit.HasValue)
{
runArgs.Append($" --cpus={options.CpuLimit}");
}
if (!string.IsNullOrEmpty(options.MemoryLimit))
{
runArgs.Append($" --memory={options.MemoryLimit}");
}
runArgs.Append($" -v {buildDir}/output:/output {imageName}");
Directory.CreateDirectory(Path.Combine(buildDir, "output"));
var runResult = await RunContainerCommandAsync(
runtime,
runArgs.ToString(),
options.Timeout,
cancellationToken);
buildLog.AppendLine(runResult.Output);
if (!runResult.Success)
{
return RebuildResult.Failed(jobId, "Build execution failed", buildLog.ToString(), RebuildBackend.Local);
}
// Collect artifacts
var outputDir = Path.Combine(buildDir, "output");
var artifacts = await CollectArtifactsAsync(outputDir, cancellationToken);
// Verify checksums
var checksumResults = await VerifyChecksumsAsync(artifacts, buildinfo, cancellationToken);
var reproducible = checksumResults.All(c => c.Matches);
sw.Stop();
_logger.LogInformation(
"Rebuild completed: {Package} {Version} - Reproducible: {Reproducible}",
buildinfo.Source, buildinfo.Version, reproducible);
return new RebuildResult
{
JobId = jobId,
Success = true,
Reproducible = reproducible,
Artifacts = artifacts,
BuildLog = buildLog.ToString(),
Duration = sw.Elapsed,
Backend = RebuildBackend.Local,
ChecksumResults = checksumResults,
BuildinfoPath = buildinfoPath
};
}
catch (Exception ex)
{
sw.Stop();
_logger.LogError(ex, "Local rebuild failed for {BuildinfoPath}", buildinfoPath);
return RebuildResult.Failed(jobId, ex.Message, buildLog.ToString(), RebuildBackend.Local);
}
}
private async Task<BuildinfoData> ParseBuildinfoAsync(string path, CancellationToken ct)
{
var content = await File.ReadAllTextAsync(path, ct);
var data = new BuildinfoData();
foreach (var line in content.Split('\n'))
{
var colonIdx = line.IndexOf(':');
if (colonIdx < 0) continue;
var key = line[..colonIdx].Trim();
var value = line[(colonIdx + 1)..].Trim();
switch (key)
{
case "Source":
data.Source = value;
break;
case "Version":
data.Version = value;
break;
case "Architecture":
data.Architecture = value;
break;
case "Build-Origin":
data.BuildOrigin = value;
break;
case "Build-Architecture":
data.BuildArchitecture = value;
break;
case "Build-Date":
data.BuildDate = value;
break;
case "Build-Path":
data.BuildPath = value;
break;
case "Installed-Build-Depends":
data.InstalledBuildDepends = value.Split(',').Select(d => d.Trim()).ToList();
break;
case "Environment":
// Parse environment variables
break;
case "Checksums-Sha256":
// Parse checksums - handled in subsequent lines
break;
default:
// Check for checksum lines (start with space)
if (line.StartsWith(' ') && data.Checksums is not null)
{
var parts = line.Trim().Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (parts.Length >= 3)
{
data.Checksums[parts[2]] = parts[0];
}
}
break;
}
// Initialize checksums dict when we hit that section
if (key == "Checksums-Sha256")
{
data.Checksums = new Dictionary<string, string>();
}
}
return data;
}
private string GenerateDockerfile(BuildinfoData buildinfo, LocalRebuildOptions options)
{
var baseImage = options.BaseImage ?? _options.DefaultBaseImage;
var sb = new StringBuilder();
sb.AppendLine($"FROM {baseImage}");
sb.AppendLine();
sb.AppendLine("# Install build dependencies");
sb.AppendLine("RUN apt-get update && apt-get install -y \\");
sb.AppendLine(" build-essential \\");
sb.AppendLine(" devscripts \\");
sb.AppendLine(" dpkg-dev \\");
sb.AppendLine(" fakeroot \\");
sb.AppendLine(" debhelper \\");
// Add package-specific build dependencies
if (buildinfo.InstalledBuildDepends is { Count: > 0 })
{
foreach (var dep in buildinfo.InstalledBuildDepends.Take(20)) // Limit for Dockerfile length
{
// Extract package name without version constraint
var match = PackageNameRegex().Match(dep);
if (match.Success)
{
sb.AppendLine($" {match.Groups[1].Value} \\");
}
}
}
sb.AppendLine(" && rm -rf /var/lib/apt/lists/*");
sb.AppendLine();
// Set up build environment
if (!string.IsNullOrEmpty(buildinfo.BuildPath))
{
sb.AppendLine($"WORKDIR {buildinfo.BuildPath}");
}
else
{
sb.AppendLine("WORKDIR /build");
}
sb.AppendLine();
sb.AppendLine("# Copy build script");
sb.AppendLine("COPY build.sh /build/build.sh");
sb.AppendLine("RUN chmod +x /build/build.sh");
sb.AppendLine();
sb.AppendLine("CMD [\"/build/build.sh\"]");
return sb.ToString();
}
private static string GenerateBuildScript(BuildinfoData buildinfo)
{
var sb = new StringBuilder();
sb.AppendLine("#!/bin/bash");
sb.AppendLine("set -ex");
sb.AppendLine();
sb.AppendLine("# Fetch source package");
sb.AppendLine($"apt-get source {buildinfo.Source}={buildinfo.Version}");
sb.AppendLine();
sb.AppendLine($"cd {buildinfo.Source}-*");
sb.AppendLine();
sb.AppendLine("# Build package");
sb.AppendLine("dpkg-buildpackage -b -uc -us");
sb.AppendLine();
sb.AppendLine("# Copy artifacts to output");
sb.AppendLine("cp ../*.deb /output/ || true");
sb.AppendLine("cp ../*.buildinfo /output/ || true");
sb.AppendLine("cp ../*.changes /output/ || true");
return sb.ToString();
}
private async Task<(bool Success, string Output)> RunContainerCommandAsync(
string runtime,
string args,
TimeSpan timeout,
CancellationToken ct)
{
var psi = new ProcessStartInfo
{
FileName = runtime,
Arguments = args,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var process = new Process { StartInfo = psi };
var output = new StringBuilder();
process.OutputDataReceived += (_, e) =>
{
if (e.Data is not null) output.AppendLine(e.Data);
};
process.ErrorDataReceived += (_, e) =>
{
if (e.Data is not null) output.AppendLine(e.Data);
};
process.Start();
process.BeginOutputReadLine();
process.BeginErrorReadLine();
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(timeout);
try
{
await process.WaitForExitAsync(cts.Token);
return (process.ExitCode == 0, output.ToString());
}
catch (OperationCanceledException)
{
process.Kill(true);
return (false, output.ToString() + "\n[TIMEOUT]");
}
}
private static async Task<List<RebuildArtifact>> CollectArtifactsAsync(string outputDir, CancellationToken ct)
{
var artifacts = new List<RebuildArtifact>();
if (!Directory.Exists(outputDir))
{
return artifacts;
}
foreach (var file in Directory.GetFiles(outputDir))
{
var fileInfo = new FileInfo(file);
var hash = await ComputeSha256Async(file, ct);
artifacts.Add(new RebuildArtifact
{
Filename = fileInfo.Name,
Path = file,
Size = fileInfo.Length,
Sha256 = hash,
Type = InferArtifactType(fileInfo.Name),
HasDwarfSymbols = await HasDwarfSymbolsAsync(file, ct)
});
}
return artifacts;
}
private static async Task<IReadOnlyList<ChecksumVerification>> VerifyChecksumsAsync(
IReadOnlyList<RebuildArtifact> artifacts,
BuildinfoData buildinfo,
CancellationToken ct)
{
var results = new List<ChecksumVerification>();
foreach (var artifact in artifacts)
{
var expected = buildinfo.Checksums?.GetValueOrDefault(artifact.Filename) ?? "unknown";
results.Add(new ChecksumVerification
{
Filename = artifact.Filename,
ExpectedSha256 = expected,
ActualSha256 = artifact.Sha256
});
}
return results;
}
private static RebuildArtifactType InferArtifactType(string filename)
{
if (filename.EndsWith("-dbgsym.deb", StringComparison.OrdinalIgnoreCase))
return RebuildArtifactType.DebugSymbols;
if (filename.EndsWith(".deb", StringComparison.OrdinalIgnoreCase))
return RebuildArtifactType.DebPackage;
if (filename.EndsWith(".log", StringComparison.OrdinalIgnoreCase))
return RebuildArtifactType.BuildLog;
return RebuildArtifactType.Other;
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static Task<bool> HasDwarfSymbolsAsync(string filePath, CancellationToken ct)
{
// Would use libelf or readelf to check for DWARF sections
// For now, assume .deb files may have symbols
return Task.FromResult(filePath.EndsWith(".deb", StringComparison.OrdinalIgnoreCase));
}
[GeneratedRegex(@"^([a-z0-9][a-z0-9+.-]+)")]
private static partial Regex PackageNameRegex();
}
/// <summary>
/// Options for local rebuild backend.
/// </summary>
public sealed record LocalRebuildBackendOptions
{
/// <summary>
/// Gets the default base image for builds.
/// </summary>
public string DefaultBaseImage { get; init; } = "debian:bookworm";
/// <summary>
/// Gets the container runtime.
/// </summary>
public ContainerRuntime ContainerRuntime { get; init; } = ContainerRuntime.Docker;
/// <summary>
/// Gets the default timeout.
/// </summary>
public TimeSpan DefaultTimeout { get; init; } = TimeSpan.FromHours(2);
}
/// <summary>
/// Parsed .buildinfo data.
/// </summary>
internal sealed class BuildinfoData
{
public string Source { get; set; } = "";
public string Version { get; set; } = "";
public string Architecture { get; set; } = "";
public string? BuildOrigin { get; set; }
public string? BuildArchitecture { get; set; }
public string? BuildDate { get; set; }
public string? BuildPath { get; set; }
public List<string>? InstalledBuildDepends { get; set; }
public Dictionary<string, string>? Checksums { get; set; }
}

View File

@@ -0,0 +1,458 @@
// -----------------------------------------------------------------------------
// RebuildModels.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-001 - Rebuild Service Abstractions
// Description: Request/response models for reproducible rebuilds.
// -----------------------------------------------------------------------------
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Request for a reproducible rebuild.
/// </summary>
public sealed record RebuildRequest
{
/// <summary>
/// Gets the package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Gets the package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the target architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the distribution (e.g., "bookworm", "sid").
/// </summary>
public string? Distribution { get; init; }
/// <summary>
/// Gets the preferred rebuild backend.
/// </summary>
public RebuildBackend PreferredBackend { get; init; } = RebuildBackend.ReproduceDebian;
/// <summary>
/// Gets the path to a .buildinfo file (for local rebuilds).
/// </summary>
public string? BuildinfoPath { get; init; }
/// <summary>
/// Gets custom build environment variables.
/// </summary>
public IReadOnlyDictionary<string, string>? EnvironmentVariables { get; init; }
/// <summary>
/// Gets the timeout for the rebuild operation.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromHours(2);
/// <summary>
/// Gets whether to verify checksums after rebuild.
/// </summary>
public bool VerifyChecksums { get; init; } = true;
/// <summary>
/// Validates the request.
/// </summary>
public void Validate()
{
if (string.IsNullOrWhiteSpace(Package))
throw new ArgumentException("Package name is required");
if (string.IsNullOrWhiteSpace(Version))
throw new ArgumentException("Version is required");
if (string.IsNullOrWhiteSpace(Architecture))
throw new ArgumentException("Architecture is required");
}
}
/// <summary>
/// Result of a reproducible rebuild.
/// </summary>
public sealed record RebuildResult
{
/// <summary>
/// Gets the job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets whether the rebuild was successful.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Gets whether the rebuild was byte-identical to the original.
/// </summary>
public bool? Reproducible { get; init; }
/// <summary>
/// Gets the rebuilt artifacts.
/// </summary>
public IReadOnlyList<RebuildArtifact>? Artifacts { get; init; }
/// <summary>
/// Gets the build log.
/// </summary>
public string? BuildLog { get; init; }
/// <summary>
/// Gets error message if failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Gets the build duration.
/// </summary>
public TimeSpan? Duration { get; init; }
/// <summary>
/// Gets the backend that was used.
/// </summary>
public RebuildBackend Backend { get; init; }
/// <summary>
/// Gets checksum verification results.
/// </summary>
public IReadOnlyList<ChecksumVerification>? ChecksumResults { get; init; }
/// <summary>
/// Gets the .buildinfo file used.
/// </summary>
public string? BuildinfoPath { get; init; }
/// <summary>
/// Creates a successful result.
/// </summary>
public static RebuildResult Successful(
string jobId,
IReadOnlyList<RebuildArtifact> artifacts,
bool reproducible,
RebuildBackend backend) => new()
{
JobId = jobId,
Success = true,
Reproducible = reproducible,
Artifacts = artifacts,
Backend = backend
};
/// <summary>
/// Creates a failed result.
/// </summary>
public static RebuildResult Failed(
string jobId,
string error,
string? buildLog = null,
RebuildBackend backend = RebuildBackend.Local) => new()
{
JobId = jobId,
Success = false,
Error = error,
BuildLog = buildLog,
Backend = backend
};
}
/// <summary>
/// A rebuilt artifact.
/// </summary>
public sealed record RebuildArtifact
{
/// <summary>
/// Gets the artifact filename.
/// </summary>
public required string Filename { get; init; }
/// <summary>
/// Gets the local path to the artifact.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Gets the artifact size in bytes.
/// </summary>
public required long Size { get; init; }
/// <summary>
/// Gets the SHA-256 hash of the artifact.
/// </summary>
public required string Sha256 { get; init; }
/// <summary>
/// Gets the artifact type.
/// </summary>
public RebuildArtifactType Type { get; init; }
/// <summary>
/// Gets whether DWARF symbols are present.
/// </summary>
public bool HasDwarfSymbols { get; init; }
}
/// <summary>
/// Type of rebuild artifact.
/// </summary>
public enum RebuildArtifactType
{
/// <summary>
/// Debian binary package (.deb).
/// </summary>
DebPackage,
/// <summary>
/// Debug symbols package (-dbgsym.deb).
/// </summary>
DebugSymbols,
/// <summary>
/// ELF binary.
/// </summary>
ElfBinary,
/// <summary>
/// Shared library.
/// </summary>
SharedLibrary,
/// <summary>
/// Build log.
/// </summary>
BuildLog,
/// <summary>
/// Other artifact type.
/// </summary>
Other
}
/// <summary>
/// Status of a rebuild job.
/// </summary>
public sealed record RebuildStatus
{
/// <summary>
/// Gets the job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets the current state.
/// </summary>
public required RebuildState State { get; init; }
/// <summary>
/// Gets progress percentage (0-100).
/// </summary>
public int? Progress { get; init; }
/// <summary>
/// Gets the current stage description.
/// </summary>
public string? CurrentStage { get; init; }
/// <summary>
/// Gets when the job was started.
/// </summary>
public DateTimeOffset? StartedAt { get; init; }
/// <summary>
/// Gets estimated completion time.
/// </summary>
public DateTimeOffset? EstimatedCompletion { get; init; }
/// <summary>
/// Gets error message if failed.
/// </summary>
public string? Error { get; init; }
}
/// <summary>
/// State of a rebuild job.
/// </summary>
public enum RebuildState
{
/// <summary>
/// Job is queued.
/// </summary>
Queued,
/// <summary>
/// Fetching source packages.
/// </summary>
FetchingSources,
/// <summary>
/// Setting up build environment.
/// </summary>
SettingUpEnvironment,
/// <summary>
/// Building.
/// </summary>
Building,
/// <summary>
/// Verifying checksums.
/// </summary>
Verifying,
/// <summary>
/// Extracting symbols.
/// </summary>
ExtractingSymbols,
/// <summary>
/// Completed successfully.
/// </summary>
Completed,
/// <summary>
/// Failed.
/// </summary>
Failed,
/// <summary>
/// Cancelled.
/// </summary>
Cancelled
}
/// <summary>
/// Existing rebuild information.
/// </summary>
public sealed record RebuildInfo
{
/// <summary>
/// Gets the job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets the package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Gets the package version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets whether it was reproducible.
/// </summary>
public bool Reproducible { get; init; }
/// <summary>
/// Gets when the rebuild was performed.
/// </summary>
public required DateTimeOffset BuiltAt { get; init; }
/// <summary>
/// Gets the backend that was used.
/// </summary>
public RebuildBackend Backend { get; init; }
/// <summary>
/// Gets the artifact checksums.
/// </summary>
public IReadOnlyDictionary<string, string>? ArtifactChecksums { get; init; }
}
/// <summary>
/// Checksum verification result.
/// </summary>
public sealed record ChecksumVerification
{
/// <summary>
/// Gets the artifact filename.
/// </summary>
public required string Filename { get; init; }
/// <summary>
/// Gets the expected checksum from .buildinfo.
/// </summary>
public required string ExpectedSha256 { get; init; }
/// <summary>
/// Gets the actual checksum of rebuilt artifact.
/// </summary>
public required string ActualSha256 { get; init; }
/// <summary>
/// Gets whether the checksums match.
/// </summary>
public bool Matches => string.Equals(ExpectedSha256, ActualSha256, StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Options for local rebuilds.
/// </summary>
public sealed record LocalRebuildOptions
{
/// <summary>
/// Gets the container runtime to use.
/// </summary>
public ContainerRuntime ContainerRuntime { get; init; } = ContainerRuntime.Docker;
/// <summary>
/// Gets the base image for the build container.
/// </summary>
public string? BaseImage { get; init; }
/// <summary>
/// Gets the directory for build outputs.
/// </summary>
public string? OutputDirectory { get; init; }
/// <summary>
/// Gets whether to keep the build container after completion.
/// </summary>
public bool KeepContainer { get; init; } = false;
/// <summary>
/// Gets whether to extract debug symbols.
/// </summary>
public bool ExtractSymbols { get; init; } = true;
/// <summary>
/// Gets the build timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromHours(2);
/// <summary>
/// Gets CPU limit for the container.
/// </summary>
public int? CpuLimit { get; init; }
/// <summary>
/// Gets memory limit for the container.
/// </summary>
public string? MemoryLimit { get; init; }
}
/// <summary>
/// Container runtime for local builds.
/// </summary>
public enum ContainerRuntime
{
/// <summary>
/// Docker.
/// </summary>
Docker,
/// <summary>
/// Podman.
/// </summary>
Podman
}

View File

@@ -0,0 +1,173 @@
// -----------------------------------------------------------------------------
// RebuildService.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-001 through REPR-007 - Service Orchestration
// Description: Main rebuild service orchestrating all backends.
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Main rebuild service implementation.
/// </summary>
public sealed class RebuildService : IRebuildService
{
private readonly ReproduceDebianClient _reproduceDebianClient;
private readonly LocalRebuildBackend _localBackend;
private readonly AirGapRebuildBundleService _airGapService;
private readonly RebuildServiceOptions _options;
private readonly ILogger<RebuildService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="RebuildService"/> class.
/// </summary>
public RebuildService(
ReproduceDebianClient reproduceDebianClient,
LocalRebuildBackend localBackend,
AirGapRebuildBundleService airGapService,
IOptions<RebuildServiceOptions> options,
ILogger<RebuildService> logger)
{
_reproduceDebianClient = reproduceDebianClient;
_localBackend = localBackend;
_airGapService = airGapService;
_options = options.Value;
_logger = logger;
}
/// <inheritdoc />
public async Task<string> RequestRebuildAsync(
RebuildRequest request,
CancellationToken cancellationToken = default)
{
request.Validate();
_logger.LogInformation(
"Requesting rebuild for {Package} {Version} via {Backend}",
request.Package,
request.Version,
request.PreferredBackend);
// For now, generate a job ID and start the rebuild
var jobId = Guid.NewGuid().ToString("N")[..12];
// Store the request for status tracking
// In production, would persist to database
return jobId;
}
/// <inheritdoc />
public async Task<RebuildStatus> GetStatusAsync(
string jobId,
CancellationToken cancellationToken = default)
{
// In production, would query from database/job queue
return new RebuildStatus
{
JobId = jobId,
State = RebuildState.Queued,
CurrentStage = "Pending"
};
}
/// <inheritdoc />
public async Task<RebuildResult> DownloadArtifactsAsync(
string jobId,
string outputDirectory,
CancellationToken cancellationToken = default)
{
Directory.CreateDirectory(outputDirectory);
var artifacts = await _reproduceDebianClient.DownloadArtifactsAsync(
jobId,
outputDirectory,
cancellationToken);
return RebuildResult.Successful(
jobId,
artifacts,
artifacts.Count > 0,
RebuildBackend.ReproduceDebian);
}
/// <inheritdoc />
public async Task<RebuildResult> RebuildLocalAsync(
string buildinfoPath,
LocalRebuildOptions? options = null,
CancellationToken cancellationToken = default)
{
if (!File.Exists(buildinfoPath))
{
return RebuildResult.Failed(
Guid.NewGuid().ToString("N")[..12],
$"Buildinfo file not found: {buildinfoPath}",
backend: RebuildBackend.Local);
}
return await _localBackend.RebuildAsync(buildinfoPath, options, cancellationToken);
}
/// <inheritdoc />
public async Task<RebuildInfo?> QueryExistingRebuildAsync(
string package,
string version,
string architecture,
CancellationToken cancellationToken = default)
{
_logger.LogDebug(
"Querying existing rebuild for {Package} {Version} {Arch}",
package, version, architecture);
var buildInfo = await _reproduceDebianClient.QueryBuildAsync(
package,
version,
architecture,
cancellationToken);
if (buildInfo is null)
{
return null;
}
return new RebuildInfo
{
JobId = buildInfo.Id,
Package = buildInfo.Package,
Version = buildInfo.Version,
Architecture = buildInfo.Architecture,
Reproducible = buildInfo.Reproducible,
BuiltAt = buildInfo.CompletedAt ?? buildInfo.StartedAt ?? DateTimeOffset.MinValue,
Backend = RebuildBackend.ReproduceDebian
};
}
}
/// <summary>
/// Configuration for the rebuild service.
/// </summary>
public sealed record RebuildServiceOptions
{
/// <summary>
/// Gets the default backend to use.
/// </summary>
public RebuildBackend DefaultBackend { get; init; } = RebuildBackend.ReproduceDebian;
/// <summary>
/// Gets the output directory for artifacts.
/// </summary>
public string OutputDirectory { get; init; } = Path.Combine(Path.GetTempPath(), "stella-rebuilds");
/// <summary>
/// Gets whether to prefer local rebuilds.
/// </summary>
public bool PreferLocalRebuild { get; init; } = false;
/// <summary>
/// Gets the job retention period.
/// </summary>
public TimeSpan JobRetention { get; init; } = TimeSpan.FromDays(30);
}

View File

@@ -0,0 +1,332 @@
// -----------------------------------------------------------------------------
// ReproduceDebianClient.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-002 - Reproduce.debian.net Integration
// Description: HTTP client for reproduce.debian.net API.
// -----------------------------------------------------------------------------
using System.Net.Http.Json;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Client for the reproduce.debian.net API.
/// </summary>
public sealed class ReproduceDebianClient
{
private readonly HttpClient _httpClient;
private readonly ReproduceDebianOptions _options;
private readonly ILogger<ReproduceDebianClient> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
PropertyNameCaseInsensitive = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
/// <summary>
/// Initializes a new instance of the <see cref="ReproduceDebianClient"/> class.
/// </summary>
public ReproduceDebianClient(
HttpClient httpClient,
IOptions<ReproduceDebianOptions> options,
ILogger<ReproduceDebianClient> logger)
{
_httpClient = httpClient;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Queries for existing rebuild status of a package.
/// </summary>
public async Task<ReproduceDebianBuildInfo?> QueryBuildAsync(
string package,
string version,
string architecture,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(package)}";
var query = $"?version={Uri.EscapeDataString(version)}&arch={Uri.EscapeDataString(architecture)}";
_logger.LogDebug("Querying reproduce.debian.net for {Package} {Version} {Arch}", package, version, architecture);
try
{
var response = await _httpClient.GetAsync(url + query, cancellationToken);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
response.EnsureSuccessStatusCode();
return await response.Content.ReadFromJsonAsync<ReproduceDebianBuildInfo>(JsonOptions, cancellationToken);
}
catch (HttpRequestException ex)
{
_logger.LogWarning(ex, "Failed to query reproduce.debian.net for {Package}", package);
throw;
}
}
/// <summary>
/// Gets the build log for a completed build.
/// </summary>
public async Task<string?> GetBuildLogAsync(
string buildId,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(buildId)}/log";
_logger.LogDebug("Fetching build log for {BuildId}", buildId);
try
{
var response = await _httpClient.GetAsync(url, cancellationToken);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
response.EnsureSuccessStatusCode();
return await response.Content.ReadAsStringAsync(cancellationToken);
}
catch (HttpRequestException ex)
{
_logger.LogWarning(ex, "Failed to fetch build log for {BuildId}", buildId);
throw;
}
}
/// <summary>
/// Downloads artifacts from a completed build.
/// </summary>
public async Task<IReadOnlyList<RebuildArtifact>> DownloadArtifactsAsync(
string buildId,
string outputDirectory,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(buildId)}/artifacts";
_logger.LogDebug("Fetching artifact list for {BuildId}", buildId);
var listResponse = await _httpClient.GetAsync(url, cancellationToken);
listResponse.EnsureSuccessStatusCode();
var artifactList = await listResponse.Content.ReadFromJsonAsync<ReproduceDebianArtifactList>(JsonOptions, cancellationToken);
if (artifactList?.Artifacts is null || artifactList.Artifacts.Count == 0)
{
_logger.LogWarning("No artifacts found for build {BuildId}", buildId);
return [];
}
Directory.CreateDirectory(outputDirectory);
var results = new List<RebuildArtifact>();
foreach (var artifact in artifactList.Artifacts)
{
var artifactUrl = $"{url}/{Uri.EscapeDataString(artifact.Filename)}";
var outputPath = Path.Combine(outputDirectory, artifact.Filename);
_logger.LogDebug("Downloading artifact {Filename}", artifact.Filename);
using var downloadResponse = await _httpClient.GetAsync(artifactUrl, cancellationToken);
downloadResponse.EnsureSuccessStatusCode();
await using var fileStream = File.Create(outputPath);
await downloadResponse.Content.CopyToAsync(fileStream, cancellationToken);
var fileInfo = new FileInfo(outputPath);
results.Add(new RebuildArtifact
{
Filename = artifact.Filename,
Path = outputPath,
Size = fileInfo.Length,
Sha256 = artifact.Sha256 ?? await ComputeSha256Async(outputPath, cancellationToken),
Type = InferArtifactType(artifact.Filename)
});
}
_logger.LogInformation("Downloaded {Count} artifacts for build {BuildId}", results.Count, buildId);
return results;
}
/// <summary>
/// Lists all builds for a package.
/// </summary>
public async Task<IReadOnlyList<ReproduceDebianBuildInfo>> ListBuildsAsync(
string package,
int limit = 10,
CancellationToken cancellationToken = default)
{
var url = $"{_options.BaseUrl}/api/v1/builds/{Uri.EscapeDataString(package)}?limit={limit}";
var response = await _httpClient.GetAsync(url, cancellationToken);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return [];
}
response.EnsureSuccessStatusCode();
var result = await response.Content.ReadFromJsonAsync<ReproduceDebianBuildList>(JsonOptions, cancellationToken);
return result?.Builds ?? [];
}
private static RebuildArtifactType InferArtifactType(string filename)
{
if (filename.EndsWith("-dbgsym.deb", StringComparison.OrdinalIgnoreCase) ||
filename.EndsWith("-dbg.deb", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.DebugSymbols;
}
if (filename.EndsWith(".deb", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.DebPackage;
}
if (filename.EndsWith(".so", StringComparison.OrdinalIgnoreCase) ||
filename.Contains(".so.", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.SharedLibrary;
}
if (filename.EndsWith(".log", StringComparison.OrdinalIgnoreCase))
{
return RebuildArtifactType.BuildLog;
}
return RebuildArtifactType.Other;
}
private static async Task<string> ComputeSha256Async(string filePath, CancellationToken ct)
{
await using var stream = File.OpenRead(filePath);
var hash = await System.Security.Cryptography.SHA256.HashDataAsync(stream, ct);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Configuration for reproduce.debian.net client.
/// </summary>
public sealed record ReproduceDebianOptions
{
/// <summary>
/// Gets the base URL for the API.
/// </summary>
public string BaseUrl { get; init; } = "https://reproduce.debian.net";
/// <summary>
/// Gets the request timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Gets the maximum retry count.
/// </summary>
public int MaxRetries { get; init; } = 3;
/// <summary>
/// Gets the delay between retries.
/// </summary>
public TimeSpan RetryDelay { get; init; } = TimeSpan.FromSeconds(5);
}
/// <summary>
/// Build info from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianBuildInfo
{
/// <summary>
/// Gets the build ID.
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Gets the package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Gets the version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Gets the architecture.
/// </summary>
public required string Architecture { get; init; }
/// <summary>
/// Gets the build status.
/// </summary>
public required string Status { get; init; }
/// <summary>
/// Gets whether the build was reproducible.
/// </summary>
public bool Reproducible { get; init; }
/// <summary>
/// Gets when the build was started.
/// </summary>
public DateTimeOffset? StartedAt { get; init; }
/// <summary>
/// Gets when the build completed.
/// </summary>
public DateTimeOffset? CompletedAt { get; init; }
/// <summary>
/// Gets the buildinfo file hash.
/// </summary>
public string? BuildinfoSha256 { get; init; }
}
/// <summary>
/// Build list from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianBuildList
{
/// <summary>
/// Gets the list of builds.
/// </summary>
public IReadOnlyList<ReproduceDebianBuildInfo>? Builds { get; init; }
}
/// <summary>
/// Artifact from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianArtifact
{
/// <summary>
/// Gets the filename.
/// </summary>
public required string Filename { get; init; }
/// <summary>
/// Gets the size.
/// </summary>
public long Size { get; init; }
/// <summary>
/// Gets the SHA-256 hash.
/// </summary>
public string? Sha256 { get; init; }
}
/// <summary>
/// Artifact list from reproduce.debian.net.
/// </summary>
public sealed record ReproduceDebianArtifactList
{
/// <summary>
/// Gets the artifacts.
/// </summary>
public IReadOnlyList<ReproduceDebianArtifact>? Artifacts { get; init; }
}

View File

@@ -0,0 +1,70 @@
// -----------------------------------------------------------------------------
// ServiceCollectionExtensions.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-007 - CLI Commands & DI
// Description: Dependency injection registration for rebuild services.
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Extension methods for registering reproducible rebuild services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds reproducible rebuild services to the service collection.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configureReproduceDebian">Configuration for reproduce.debian.net client.</param>
/// <param name="configureLocalBackend">Configuration for local rebuild backend.</param>
/// <param name="configureService">Configuration for rebuild service.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddReproducibleRebuild(
this IServiceCollection services,
Action<ReproduceDebianOptions>? configureReproduceDebian = null,
Action<LocalRebuildBackendOptions>? configureLocalBackend = null,
Action<RebuildServiceOptions>? configureService = null)
{
// Register options
services.AddOptions<ReproduceDebianOptions>();
services.AddOptions<LocalRebuildBackendOptions>();
services.AddOptions<RebuildServiceOptions>();
if (configureReproduceDebian is not null)
{
services.Configure(configureReproduceDebian);
}
if (configureLocalBackend is not null)
{
services.Configure(configureLocalBackend);
}
if (configureService is not null)
{
services.Configure(configureService);
}
// Register HttpClient for reproduce.debian.net
services.AddHttpClient<ReproduceDebianClient>((sp, client) =>
{
var options = sp.GetService<Microsoft.Extensions.Options.IOptions<ReproduceDebianOptions>>()?.Value
?? new ReproduceDebianOptions();
client.BaseAddress = new Uri(options.BaseUrl);
client.Timeout = options.Timeout;
client.DefaultRequestHeaders.Add("User-Agent", "StellaOps-BinaryIndex/1.0");
});
// Register services
services.AddSingleton<LocalRebuildBackend>();
services.AddSingleton<AirGapRebuildBundleService>();
services.AddSingleton<DeterminismValidator>();
services.AddSingleton<SymbolExtractor>();
services.AddSingleton<IRebuildService, RebuildService>();
return services;
}
}

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>preview</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>StellaOps.BinaryIndex.GroundTruth.Reproducible</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,577 @@
// -----------------------------------------------------------------------------
// SymbolExtractor.cs
// Sprint: SPRINT_20260119_005 Reproducible Rebuild Integration
// Task: REPR-005 - Symbol Extraction from Rebuilds
// Description: Extracts DWARF symbols from rebuilt binaries.
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GroundTruth.Reproducible;
/// <summary>
/// Extracts symbols from rebuilt binaries for ground-truth corpus.
/// </summary>
public sealed partial class SymbolExtractor
{
private readonly ILogger<SymbolExtractor> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="SymbolExtractor"/> class.
/// </summary>
public SymbolExtractor(ILogger<SymbolExtractor> logger)
{
_logger = logger;
}
/// <summary>
/// Extracts symbols from an ELF binary.
/// </summary>
public async Task<SymbolExtractionResult> ExtractAsync(
string binaryPath,
SymbolExtractionOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= SymbolExtractionOptions.Default;
var symbols = new List<ExtractedSymbol>();
if (!File.Exists(binaryPath))
{
return SymbolExtractionResult.Failed($"File not found: {binaryPath}");
}
try
{
// Check if file is ELF
if (!await IsElfBinaryAsync(binaryPath, cancellationToken))
{
return SymbolExtractionResult.Failed("Not an ELF binary");
}
// Extract symbols using nm
var nmSymbols = await ExtractWithNmAsync(binaryPath, cancellationToken);
symbols.AddRange(nmSymbols);
// Extract DWARF info using readelf/objdump if available
if (options.ExtractDwarf)
{
var dwarfInfo = await ExtractDwarfInfoAsync(binaryPath, cancellationToken);
// Enrich symbols with DWARF source info
EnrichWithDwarf(symbols, dwarfInfo);
}
_logger.LogInformation(
"Extracted {Count} symbols from {Path}",
symbols.Count,
Path.GetFileName(binaryPath));
return new SymbolExtractionResult
{
Success = true,
BinaryPath = binaryPath,
Symbols = symbols,
HasDwarf = symbols.Any(s => s.SourceFile is not null),
ExtractedAt = DateTimeOffset.UtcNow
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Symbol extraction failed for {Path}", binaryPath);
return SymbolExtractionResult.Failed(ex.Message);
}
}
/// <summary>
/// Creates ground-truth observations from extracted symbols.
/// </summary>
public IReadOnlyList<GroundTruthObservation> CreateObservations(
SymbolExtractionResult extraction,
RebuildResult rebuild)
{
if (!extraction.Success || extraction.Symbols is null)
{
return [];
}
var observations = new List<GroundTruthObservation>();
foreach (var symbol in extraction.Symbols)
{
observations.Add(new GroundTruthObservation
{
SymbolName = symbol.Name,
DemangledName = symbol.DemangledName,
Address = symbol.Address,
Size = symbol.Size,
Type = symbol.Type,
SourceFile = symbol.SourceFile,
SourceLine = symbol.SourceLine,
SourceId = "reproducible-rebuild",
BuildinfoPath = rebuild.BuildinfoPath,
ExtractedAt = extraction.ExtractedAt,
Provenance = new ObservationProvenance
{
JobId = rebuild.JobId,
Backend = rebuild.Backend.ToString(),
Reproducible = rebuild.Reproducible ?? false,
BinaryHash = extraction.BinarySha256
}
});
}
return observations;
}
private static async Task<bool> IsElfBinaryAsync(string path, CancellationToken ct)
{
var magic = new byte[4];
await using var stream = File.OpenRead(path);
var bytesRead = await stream.ReadAsync(magic, ct);
// ELF magic: 0x7F 'E' 'L' 'F'
return bytesRead == 4 &&
magic[0] == 0x7F &&
magic[1] == (byte)'E' &&
magic[2] == (byte)'L' &&
magic[3] == (byte)'F';
}
private async Task<IReadOnlyList<ExtractedSymbol>> ExtractWithNmAsync(
string binaryPath,
CancellationToken ct)
{
var symbols = new List<ExtractedSymbol>();
// Run nm to extract symbols
var (success, output) = await RunToolAsync("nm", $"-C -S --defined-only \"{binaryPath}\"", ct);
if (!success)
{
_logger.LogWarning("nm failed for {Path}, trying readelf", binaryPath);
return symbols;
}
// Parse nm output: address size type name
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
var match = NmOutputRegex().Match(line);
if (match.Success)
{
var address = Convert.ToUInt64(match.Groups[1].Value, 16);
var size = match.Groups[2].Success ? Convert.ToUInt64(match.Groups[2].Value, 16) : 0;
var type = match.Groups[3].Value;
var name = match.Groups[4].Value;
symbols.Add(new ExtractedSymbol
{
Name = name,
DemangledName = name, // nm -C already demangles
Address = address,
Size = size,
Type = MapNmType(type)
});
}
}
return symbols;
}
private async Task<DwarfInfo> ExtractDwarfInfoAsync(string binaryPath, CancellationToken ct)
{
var info = new DwarfInfo();
// Use readelf to check for DWARF sections
var (success, output) = await RunToolAsync("readelf", $"-S \"{binaryPath}\"", ct);
if (success)
{
info.HasDebugInfo = output.Contains(".debug_info");
info.HasDebugLine = output.Contains(".debug_line");
info.HasDebugAbbrev = output.Contains(".debug_abbrev");
}
// Extract source line info if available
if (info.HasDebugLine)
{
var (lineSuccess, lineOutput) = await RunToolAsync(
"readelf",
$"--debug-dump=decodedline \"{binaryPath}\"",
ct);
if (lineSuccess)
{
info.LineInfo = ParseLineInfo(lineOutput);
}
}
return info;
}
private static Dictionary<ulong, (string File, int Line)> ParseLineInfo(string output)
{
var result = new Dictionary<ulong, (string, int)>();
// Parse readelf --debug-dump=decodedline output
foreach (var line in output.Split('\n'))
{
// Format varies but typically: directory file line column address
var match = Regex.Match(line, @"0x([0-9a-f]+)\s+\d+\s+(\d+)\s+\d+\s+.*?([^\s/]+\.c(?:pp|xx)?)", RegexOptions.IgnoreCase);
if (match.Success)
{
var address = Convert.ToUInt64(match.Groups[1].Value, 16);
var lineNum = int.Parse(match.Groups[2].Value);
var file = match.Groups[3].Value;
result[address] = (file, lineNum);
}
}
return result;
}
private static void EnrichWithDwarf(List<ExtractedSymbol> symbols, DwarfInfo dwarfInfo)
{
if (dwarfInfo.LineInfo is null) return;
foreach (var symbol in symbols)
{
if (dwarfInfo.LineInfo.TryGetValue(symbol.Address, out var lineInfo))
{
symbol.SourceFile = lineInfo.File;
symbol.SourceLine = lineInfo.Line;
}
}
}
private static SymbolType MapNmType(string nmType)
{
return nmType.ToUpperInvariant() switch
{
"T" => SymbolType.Function,
"t" => SymbolType.LocalFunction,
"D" => SymbolType.Data,
"d" => SymbolType.LocalData,
"B" => SymbolType.Bss,
"b" => SymbolType.LocalBss,
"R" => SymbolType.ReadOnly,
"r" => SymbolType.LocalReadOnly,
"W" => SymbolType.Weak,
"w" => SymbolType.WeakUndefined,
_ => SymbolType.Other
};
}
private static async Task<(bool Success, string Output)> RunToolAsync(
string tool,
string args,
CancellationToken ct)
{
try
{
var psi = new ProcessStartInfo
{
FileName = tool,
Arguments = args,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var process = new Process { StartInfo = psi };
var output = new StringBuilder();
process.OutputDataReceived += (_, e) =>
{
if (e.Data is not null) output.AppendLine(e.Data);
};
process.Start();
process.BeginOutputReadLine();
await process.WaitForExitAsync(ct);
return (process.ExitCode == 0, output.ToString());
}
catch
{
return (false, string.Empty);
}
}
[GeneratedRegex(@"^([0-9a-f]+)\s+(?:([0-9a-f]+)\s+)?([A-Za-z])\s+(.+)$")]
private static partial Regex NmOutputRegex();
}
/// <summary>
/// Options for symbol extraction.
/// </summary>
public sealed record SymbolExtractionOptions
{
/// <summary>
/// Gets whether to extract DWARF information.
/// </summary>
public bool ExtractDwarf { get; init; } = true;
/// <summary>
/// Gets whether to demangle C++ names.
/// </summary>
public bool Demangle { get; init; } = true;
/// <summary>
/// Gets the default options.
/// </summary>
public static SymbolExtractionOptions Default { get; } = new();
}
/// <summary>
/// Result of symbol extraction.
/// </summary>
public sealed record SymbolExtractionResult
{
/// <summary>
/// Gets whether extraction was successful.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Gets the binary path.
/// </summary>
public string? BinaryPath { get; init; }
/// <summary>
/// Gets the binary SHA-256.
/// </summary>
public string? BinarySha256 { get; init; }
/// <summary>
/// Gets the extracted symbols.
/// </summary>
public IReadOnlyList<ExtractedSymbol>? Symbols { get; init; }
/// <summary>
/// Gets whether DWARF info was found.
/// </summary>
public bool HasDwarf { get; init; }
/// <summary>
/// Gets when extraction was performed.
/// </summary>
public DateTimeOffset ExtractedAt { get; init; }
/// <summary>
/// Gets error message if failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Creates a failed result.
/// </summary>
public static SymbolExtractionResult Failed(string error) => new()
{
Success = false,
Error = error,
ExtractedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// An extracted symbol.
/// </summary>
public sealed class ExtractedSymbol
{
/// <summary>
/// Gets the symbol name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Gets the demangled name.
/// </summary>
public string? DemangledName { get; init; }
/// <summary>
/// Gets the symbol address.
/// </summary>
public ulong Address { get; init; }
/// <summary>
/// Gets the symbol size.
/// </summary>
public ulong Size { get; init; }
/// <summary>
/// Gets the symbol type.
/// </summary>
public SymbolType Type { get; init; }
/// <summary>
/// Gets the source file (from DWARF).
/// </summary>
public string? SourceFile { get; set; }
/// <summary>
/// Gets the source line (from DWARF).
/// </summary>
public int? SourceLine { get; set; }
}
/// <summary>
/// Symbol type.
/// </summary>
public enum SymbolType
{
/// <summary>
/// Function (global).
/// </summary>
Function,
/// <summary>
/// Local function.
/// </summary>
LocalFunction,
/// <summary>
/// Data (global).
/// </summary>
Data,
/// <summary>
/// Local data.
/// </summary>
LocalData,
/// <summary>
/// BSS section (global).
/// </summary>
Bss,
/// <summary>
/// Local BSS.
/// </summary>
LocalBss,
/// <summary>
/// Read-only data (global).
/// </summary>
ReadOnly,
/// <summary>
/// Local read-only data.
/// </summary>
LocalReadOnly,
/// <summary>
/// Weak symbol.
/// </summary>
Weak,
/// <summary>
/// Weak undefined symbol.
/// </summary>
WeakUndefined,
/// <summary>
/// Other type.
/// </summary>
Other
}
/// <summary>
/// Ground-truth observation from reproducible rebuild.
/// </summary>
public sealed record GroundTruthObservation
{
/// <summary>
/// Gets the symbol name.
/// </summary>
public required string SymbolName { get; init; }
/// <summary>
/// Gets the demangled name.
/// </summary>
public string? DemangledName { get; init; }
/// <summary>
/// Gets the address.
/// </summary>
public ulong Address { get; init; }
/// <summary>
/// Gets the size.
/// </summary>
public ulong Size { get; init; }
/// <summary>
/// Gets the symbol type.
/// </summary>
public SymbolType Type { get; init; }
/// <summary>
/// Gets the source file.
/// </summary>
public string? SourceFile { get; init; }
/// <summary>
/// Gets the source line.
/// </summary>
public int? SourceLine { get; init; }
/// <summary>
/// Gets the source ID.
/// </summary>
public required string SourceId { get; init; }
/// <summary>
/// Gets the buildinfo path.
/// </summary>
public string? BuildinfoPath { get; init; }
/// <summary>
/// Gets when this was extracted.
/// </summary>
public DateTimeOffset ExtractedAt { get; init; }
/// <summary>
/// Gets the provenance.
/// </summary>
public ObservationProvenance? Provenance { get; init; }
}
/// <summary>
/// Provenance of a ground-truth observation.
/// </summary>
public sealed record ObservationProvenance
{
/// <summary>
/// Gets the rebuild job ID.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Gets the backend used.
/// </summary>
public required string Backend { get; init; }
/// <summary>
/// Gets whether the rebuild was reproducible.
/// </summary>
public bool Reproducible { get; init; }
/// <summary>
/// Gets the binary hash.
/// </summary>
public string? BinaryHash { get; init; }
}
/// <summary>
/// DWARF debug information.
/// </summary>
internal sealed class DwarfInfo
{
public bool HasDebugInfo { get; set; }
public bool HasDebugLine { get; set; }
public bool HasDebugAbbrev { get; set; }
public Dictionary<ulong, (string File, int Line)>? LineInfo { get; set; }
}

View File

@@ -0,0 +1,69 @@
# GroundTruth.SecDb - Agent Instructions
## Module Overview
This library implements the Alpine SecDB connector for fetching CVE-to-fix mapping data from Alpine's security database.
## Key Components
- **SecDbConnector** - Main connector implementing three-phase pipeline
- **SecDbConnectorPlugin** - Plugin registration for DI discovery
- **SecDbOptions** - Configuration options
- **SecDbDiagnostics** - Metrics and telemetry
- **SecDbParser** - Parser for Alpine SecDB YAML files
## Configuration
```csharp
services.AddSecDbConnector(opts =>
{
opts.RepositoryUrl = "https://gitlab.alpinelinux.org/alpine/secdb.git";
opts.Branches = ["edge", "v3.19", "v3.18", "v3.17"];
opts.Repositories = ["main", "community"];
opts.FetchAports = false; // Set true to fetch patch details
});
```
## Three-Phase Pipeline
1. **Fetch**: Clone/sync secdb repository, download YAML files per branch
2. **Parse**: Parse YAML files, extract CVE-to-fix mappings per package
3. **Map**: Build canonical observations linking CVEs to fixed package versions
## SecDB YAML Structure
```yaml
distroversion: v3.19
reponame: main
urlprefix: https://dl-cdn.alpinelinux.org/alpine
packages:
- pkg: openssl
secfixes:
3.1.4-r0:
- CVE-2023-5678
- CVE-2023-5679 description of fix
3.1.3-r0:
- CVE-2023-1234
0:
- CVE-2024-9999 unfixed vulnerability
```
## aports Integration
When `FetchAports` is enabled, the connector can cross-reference with Alpine aports to extract:
- Patch file content
- APKBUILD details
- Source modifications
## Testing
- Unit tests for SecDbParser
- Integration tests require GitLab access (skippable)
- Deterministic fixtures with sample YAML content
## Future Work
- Full git clone support using LibGit2Sharp
- aports integration for patch extraction
- CVE enrichment with CVSS scores
- Pre/post vulnerability binary pair generation

View File

@@ -0,0 +1,95 @@
namespace StellaOps.BinaryIndex.GroundTruth.SecDb.Configuration;
/// <summary>
/// Configuration options for the Alpine SecDB connector.
/// </summary>
public sealed class SecDbOptions
{
/// <summary>
/// HTTP client name for DI.
/// </summary>
public const string HttpClientName = "GroundTruth.SecDb";
/// <summary>
/// Git repository URL for Alpine secdb.
/// Default: https://gitlab.alpinelinux.org/alpine/secdb.git
/// </summary>
public string RepositoryUrl { get; set; } = "https://gitlab.alpinelinux.org/alpine/secdb.git";
/// <summary>
/// Local directory for secdb clone.
/// Default: null (uses temp directory)
/// </summary>
public string? LocalPath { get; set; }
/// <summary>
/// Git repository URL for Alpine aports (for patch details).
/// Default: https://gitlab.alpinelinux.org/alpine/aports.git
/// </summary>
public string AportsRepositoryUrl { get; set; } = "https://gitlab.alpinelinux.org/alpine/aports.git";
/// <summary>
/// Local directory for aports clone.
/// Default: null (uses temp directory)
/// </summary>
public string? AportsLocalPath { get; set; }
/// <summary>
/// Alpine branches to process.
/// Default: ["edge", "v3.19", "v3.18", "v3.17"]
/// </summary>
public List<string> Branches { get; set; } = ["edge", "v3.19", "v3.18", "v3.17"];
/// <summary>
/// Repositories within each branch to process.
/// Default: ["main", "community"]
/// </summary>
public List<string> Repositories { get; set; } = ["main", "community"];
/// <summary>
/// Whether to fetch aports for patch details.
/// Default: false (expensive operation)
/// </summary>
public bool FetchAports { get; set; } = false;
/// <summary>
/// Request timeout in seconds for HTTP operations.
/// Default: 120 (git operations can be slow)
/// </summary>
public int TimeoutSeconds { get; set; } = 120;
/// <summary>
/// User-Agent header for HTTP requests.
/// </summary>
public string UserAgent { get; set; } = "StellaOps-GroundTruth/1.0 (secdb-connector)";
/// <summary>
/// Whether to use shallow clone to save bandwidth.
/// Default: true
/// </summary>
public bool ShallowClone { get; set; } = true;
/// <summary>
/// Depth for shallow clone.
/// Default: 1
/// </summary>
public int CloneDepth { get; set; } = 1;
/// <summary>
/// Validate configuration.
/// </summary>
public void Validate()
{
if (string.IsNullOrWhiteSpace(RepositoryUrl))
throw new InvalidOperationException("RepositoryUrl is required");
if (Branches is null || Branches.Count == 0)
throw new InvalidOperationException("At least one branch is required");
if (Repositories is null || Repositories.Count == 0)
throw new InvalidOperationException("At least one repository is required");
if (TimeoutSeconds <= 0)
throw new InvalidOperationException("TimeoutSeconds must be positive");
}
}

View File

@@ -0,0 +1,77 @@
using System.Diagnostics.Metrics;
namespace StellaOps.BinaryIndex.GroundTruth.SecDb.Internal;
/// <summary>
/// Diagnostics and metrics for the SecDB connector.
/// </summary>
public sealed class SecDbDiagnostics
{
private readonly Counter<long> _syncSuccessCounter;
private readonly Counter<long> _syncErrorCounter;
private readonly Counter<long> _parseSuccessCounter;
private readonly Counter<long> _parseErrorCounter;
private readonly Counter<long> _mapSuccessCounter;
private readonly Counter<long> _mapErrorCounter;
private readonly Histogram<long> _vulnerabilityCountHistogram;
private readonly Histogram<long> _packageCountHistogram;
public SecDbDiagnostics(IMeterFactory meterFactory)
{
var meter = meterFactory.Create("StellaOps.BinaryIndex.GroundTruth.SecDb");
_syncSuccessCounter = meter.CreateCounter<long>(
"groundtruth.secdb.sync.success",
unit: "{branches}",
description: "Number of successful secdb branch syncs");
_syncErrorCounter = meter.CreateCounter<long>(
"groundtruth.secdb.sync.error",
unit: "{branches}",
description: "Number of failed secdb branch syncs");
_parseSuccessCounter = meter.CreateCounter<long>(
"groundtruth.secdb.parse.success",
unit: "{files}",
description: "Number of successful secdb file parses");
_parseErrorCounter = meter.CreateCounter<long>(
"groundtruth.secdb.parse.error",
unit: "{files}",
description: "Number of failed secdb file parses");
_mapSuccessCounter = meter.CreateCounter<long>(
"groundtruth.secdb.map.success",
unit: "{vulnerabilities}",
description: "Number of successful vulnerability mappings");
_mapErrorCounter = meter.CreateCounter<long>(
"groundtruth.secdb.map.error",
unit: "{vulnerabilities}",
description: "Number of failed vulnerability mappings");
_vulnerabilityCountHistogram = meter.CreateHistogram<long>(
"groundtruth.secdb.vulnerabilities_per_branch",
unit: "{vulnerabilities}",
description: "Distribution of vulnerability counts per branch");
_packageCountHistogram = meter.CreateHistogram<long>(
"groundtruth.secdb.packages_per_branch",
unit: "{packages}",
description: "Distribution of package counts per branch");
}
public void RecordSyncSuccess() => _syncSuccessCounter.Add(1);
public void RecordSyncError() => _syncErrorCounter.Add(1);
public void RecordParseSuccess(int vulnerabilityCount, int packageCount)
{
_parseSuccessCounter.Add(1);
_vulnerabilityCountHistogram.Record(vulnerabilityCount);
_packageCountHistogram.Record(packageCount);
}
public void RecordParseError() => _parseErrorCounter.Add(1);
public void RecordMapSuccess() => _mapSuccessCounter.Add(1);
public void RecordMapError() => _mapErrorCounter.Add(1);
}

View File

@@ -0,0 +1,268 @@
using YamlDotNet.Serialization;
using YamlDotNet.Serialization.NamingConventions;
namespace StellaOps.BinaryIndex.GroundTruth.SecDb.Internal;
/// <summary>
/// Parser for Alpine SecDB YAML files.
/// </summary>
public sealed class SecDbParser
{
private readonly IDeserializer _deserializer;
public SecDbParser()
{
_deserializer = new DeserializerBuilder()
.WithNamingConvention(CamelCaseNamingConvention.Instance)
.IgnoreUnmatchedProperties()
.Build();
}
/// <summary>
/// Parse a SecDB YAML file.
/// </summary>
/// <param name="content">YAML content.</param>
/// <param name="branch">Alpine branch (e.g., "v3.19").</param>
/// <param name="repository">Repository name (e.g., "main").</param>
/// <returns>Parsed security database entries.</returns>
public SecDbFile Parse(string content, string branch, string repository)
{
ArgumentNullException.ThrowIfNull(content);
try
{
var raw = _deserializer.Deserialize<SecDbYamlRoot>(content);
var packages = new List<SecDbPackage>();
if (raw?.Packages is not null)
{
foreach (var pkgEntry in raw.Packages)
{
var package = ParsePackage(pkgEntry, branch, repository);
if (package is not null)
{
packages.Add(package);
}
}
}
return new SecDbFile
{
Branch = branch,
Repository = repository,
DistroVersion = raw?.Distroversion ?? branch,
RepoName = raw?.Reponame ?? repository,
UrlPrefix = raw?.Urlprefix,
Packages = packages
};
}
catch (Exception ex)
{
throw new FormatException($"Failed to parse SecDB YAML for {branch}/{repository}", ex);
}
}
/// <summary>
/// Parse all YAML files from a directory.
/// </summary>
/// <param name="directoryPath">Path to secdb directory.</param>
/// <param name="branch">Alpine branch.</param>
/// <returns>All parsed entries.</returns>
public IReadOnlyList<SecDbFile> ParseDirectory(string directoryPath, string branch)
{
var files = new List<SecDbFile>();
if (!Directory.Exists(directoryPath))
{
return files;
}
foreach (var yamlFile in Directory.EnumerateFiles(directoryPath, "*.yaml"))
{
var repository = Path.GetFileNameWithoutExtension(yamlFile);
var content = File.ReadAllText(yamlFile);
try
{
var parsed = Parse(content, branch, repository);
files.Add(parsed);
}
catch
{
// Skip malformed files
}
}
return files;
}
private static SecDbPackage? ParsePackage(SecDbYamlPackage pkgEntry, string branch, string repository)
{
if (pkgEntry.Pkg is null)
return null;
var vulnerabilities = new List<SecDbVulnerability>();
if (pkgEntry.Secfixes is not null)
{
foreach (var (version, cves) in pkgEntry.Secfixes)
{
if (cves is null)
continue;
foreach (var cve in cves)
{
if (string.IsNullOrWhiteSpace(cve))
continue;
// Parse CVE ID and optional description
// Format: "CVE-2024-1234" or "CVE-2024-1234 some description"
var parts = cve.Split(' ', 2, StringSplitOptions.RemoveEmptyEntries);
var cveId = parts[0].Trim();
var description = parts.Length > 1 ? parts[1].Trim() : null;
// Skip non-CVE entries (like "XSA-123" or internal references)
if (!cveId.StartsWith("CVE-", StringComparison.OrdinalIgnoreCase))
continue;
vulnerabilities.Add(new SecDbVulnerability
{
CveId = cveId.ToUpperInvariant(),
FixedInVersion = version,
Description = description,
Branch = branch,
Repository = repository
});
}
}
}
return new SecDbPackage
{
Name = pkgEntry.Pkg,
Branch = branch,
Repository = repository,
Vulnerabilities = vulnerabilities
};
}
// YAML deserialization classes
private sealed class SecDbYamlRoot
{
public string? Distroversion { get; set; }
public string? Reponame { get; set; }
public string? Urlprefix { get; set; }
public List<SecDbYamlPackage>? Packages { get; set; }
}
private sealed class SecDbYamlPackage
{
public string? Pkg { get; set; }
public Dictionary<string, List<string>?>? Secfixes { get; set; }
}
}
/// <summary>
/// Parsed SecDB file.
/// </summary>
public sealed record SecDbFile
{
/// <summary>
/// Alpine branch (e.g., "v3.19", "edge").
/// </summary>
public required string Branch { get; init; }
/// <summary>
/// Repository name (e.g., "main", "community").
/// </summary>
public required string Repository { get; init; }
/// <summary>
/// Distribution version from YAML.
/// </summary>
public string? DistroVersion { get; init; }
/// <summary>
/// Repository name from YAML.
/// </summary>
public string? RepoName { get; init; }
/// <summary>
/// URL prefix for packages.
/// </summary>
public string? UrlPrefix { get; init; }
/// <summary>
/// Packages with security fixes.
/// </summary>
public required IReadOnlyList<SecDbPackage> Packages { get; init; }
/// <summary>
/// Total vulnerability count across all packages.
/// </summary>
public int VulnerabilityCount => Packages.Sum(p => p.Vulnerabilities.Count);
}
/// <summary>
/// A package entry in SecDB.
/// </summary>
public sealed record SecDbPackage
{
/// <summary>
/// Package name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Alpine branch.
/// </summary>
public required string Branch { get; init; }
/// <summary>
/// Repository (main, community).
/// </summary>
public required string Repository { get; init; }
/// <summary>
/// Security vulnerabilities fixed in this package.
/// </summary>
public required IReadOnlyList<SecDbVulnerability> Vulnerabilities { get; init; }
}
/// <summary>
/// A vulnerability entry from SecDB.
/// </summary>
public sealed record SecDbVulnerability
{
/// <summary>
/// CVE identifier.
/// </summary>
public required string CveId { get; init; }
/// <summary>
/// Version in which the vulnerability was fixed.
/// Special value "0" means unfixed.
/// </summary>
public required string FixedInVersion { get; init; }
/// <summary>
/// Optional description or note.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Alpine branch where this fix applies.
/// </summary>
public required string Branch { get; init; }
/// <summary>
/// Repository where this package lives.
/// </summary>
public required string Repository { get; init; }
/// <summary>
/// Whether this vulnerability is marked as unfixed.
/// </summary>
public bool IsUnfixed => FixedInVersion == "0";
}

View File

@@ -0,0 +1,295 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.SecDb.Configuration;
using StellaOps.BinaryIndex.GroundTruth.SecDb.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.SecDb;
/// <summary>
/// Symbol source connector for Alpine SecDB.
/// Provides CVE-to-fix mapping for Alpine Linux packages.
/// </summary>
public sealed class SecDbConnector : ISymbolSourceConnector, ISymbolSourceCapability
{
private readonly ILogger<SecDbConnector> _logger;
private readonly SecDbOptions _options;
private readonly IHttpClientFactory _httpClientFactory;
private readonly SecDbDiagnostics _diagnostics;
private readonly SecDbParser _parser;
public SecDbConnector(
ILogger<SecDbConnector> logger,
IOptions<SecDbOptions> options,
IHttpClientFactory httpClientFactory,
SecDbDiagnostics diagnostics)
{
_logger = logger;
_options = options.Value;
_httpClientFactory = httpClientFactory;
_diagnostics = diagnostics;
_parser = new SecDbParser();
}
/// <inheritdoc/>
public string SourceId => "secdb-alpine";
/// <inheritdoc/>
public string DisplayName => "Alpine SecDB (Security Database)";
/// <inheritdoc/>
public IReadOnlyList<string> SupportedDistros => ["alpine"];
/// <inheritdoc/>
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting SecDB fetch for branches: {Branches}",
string.Join(", ", _options.Branches));
// Determine local path for clone
var localPath = _options.LocalPath ?? Path.Combine(Path.GetTempPath(), "stella-secdb");
// Clone or pull the repository
await SyncRepositoryAsync(localPath, cancellationToken);
// Process each branch
foreach (var branch in _options.Branches)
{
try
{
await ProcessBranchAsync(localPath, branch, cancellationToken);
_diagnostics.RecordSyncSuccess();
}
catch (Exception ex)
{
_diagnostics.RecordSyncError();
_logger.LogError(ex, "Failed to process SecDB branch: {Branch}", branch);
}
}
}
/// <inheritdoc/>
public Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting SecDB parse phase");
// Parse phase processes stored raw documents
// For SecDB, parsing happens during fetch since YAML is simple
return Task.CompletedTask;
}
/// <inheritdoc/>
public Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
_logger.LogInformation("Starting SecDB map phase");
// Map phase creates observations from parsed vulnerability data
// Maps CVEs to package fix versions
return Task.CompletedTask;
}
/// <inheritdoc/>
public async Task<SymbolSourceConnectivityResult> TestConnectivityAsync(CancellationToken ct = default)
{
var startTime = DateTimeOffset.UtcNow;
var sw = System.Diagnostics.Stopwatch.StartNew();
try
{
var client = _httpClientFactory.CreateClient(SecDbOptions.HttpClientName);
// Test connectivity to GitLab API
var response = await client.GetAsync(
"https://gitlab.alpinelinux.org/api/v4/projects/alpine%2Fsecdb", ct);
sw.Stop();
return new SymbolSourceConnectivityResult(
IsConnected: response.IsSuccessStatusCode,
Latency: sw.Elapsed,
ErrorMessage: response.IsSuccessStatusCode ? null : $"HTTP {response.StatusCode}",
TestedAt: startTime);
}
catch (Exception ex)
{
sw.Stop();
return new SymbolSourceConnectivityResult(
IsConnected: false,
Latency: sw.Elapsed,
ErrorMessage: ex.Message,
TestedAt: startTime);
}
}
/// <inheritdoc/>
public Task<SymbolSourceMetadata> GetMetadataAsync(CancellationToken ct = default)
{
return Task.FromResult(new SymbolSourceMetadata(
SourceId: SourceId,
DisplayName: DisplayName,
BaseUrl: _options.RepositoryUrl,
LastSyncAt: null,
ObservationCount: null,
DebugIdCount: null,
AdditionalInfo: new Dictionary<string, string>
{
["branches"] = string.Join(", ", _options.Branches),
["repositories"] = string.Join(", ", _options.Repositories),
["fetchAports"] = _options.FetchAports.ToString()
}));
}
/// <inheritdoc/>
public Task<SymbolData?> FetchByDebugIdAsync(string debugId, CancellationToken ct = default)
{
// SecDB doesn't support debug ID lookup - it's CVE-focused
_logger.LogDebug("FetchByDebugId not supported for SecDB; debug ID: {DebugId}", debugId);
return Task.FromResult<SymbolData?>(null);
}
/// <summary>
/// Get vulnerabilities for a specific package.
/// </summary>
/// <param name="packageName">Package name.</param>
/// <param name="branch">Optional branch filter.</param>
/// <returns>List of vulnerabilities affecting the package.</returns>
public async Task<IReadOnlyList<SecDbVulnerability>> GetVulnerabilitiesForPackageAsync(
string packageName,
string? branch = null)
{
var localPath = _options.LocalPath ?? Path.Combine(Path.GetTempPath(), "stella-secdb");
if (!Directory.Exists(localPath))
{
return [];
}
var vulnerabilities = new List<SecDbVulnerability>();
var branches = branch is not null ? [branch] : _options.Branches;
foreach (var b in branches)
{
var branchPath = Path.Combine(localPath, b);
if (!Directory.Exists(branchPath))
continue;
var files = _parser.ParseDirectory(branchPath, b);
foreach (var file in files)
{
foreach (var pkg in file.Packages)
{
if (string.Equals(pkg.Name, packageName, StringComparison.OrdinalIgnoreCase))
{
vulnerabilities.AddRange(pkg.Vulnerabilities);
}
}
}
}
return await Task.FromResult(vulnerabilities);
}
/// <summary>
/// Get all CVEs fixed in a specific version.
/// </summary>
/// <param name="packageName">Package name.</param>
/// <param name="version">Version string.</param>
/// <returns>List of CVEs fixed in this version.</returns>
public async Task<IReadOnlyList<string>> GetCvesFixedInVersionAsync(
string packageName,
string version)
{
var vulnerabilities = await GetVulnerabilitiesForPackageAsync(packageName);
return vulnerabilities
.Where(v => v.FixedInVersion == version)
.Select(v => v.CveId)
.Distinct()
.ToList();
}
private async Task SyncRepositoryAsync(string localPath, CancellationToken ct)
{
// Note: Full git implementation would use LibGit2Sharp or shell out to git
// For now, we'll use HTTP to fetch raw files from GitLab
_logger.LogDebug("Syncing SecDB repository to {LocalPath}", localPath);
if (!Directory.Exists(localPath))
{
Directory.CreateDirectory(localPath);
}
var client = _httpClientFactory.CreateClient(SecDbOptions.HttpClientName);
foreach (var branch in _options.Branches)
{
var branchPath = Path.Combine(localPath, branch);
Directory.CreateDirectory(branchPath);
foreach (var repo in _options.Repositories)
{
try
{
// Fetch raw YAML file from GitLab
// URL format: https://gitlab.alpinelinux.org/alpine/secdb/-/raw/{branch}/{repo}.yaml
var url = $"https://gitlab.alpinelinux.org/alpine/secdb/-/raw/{branch}/{repo}.yaml";
_logger.LogDebug("Fetching {Url}", url);
var response = await client.GetAsync(url, ct);
if (response.IsSuccessStatusCode)
{
var content = await response.Content.ReadAsStringAsync(ct);
var filePath = Path.Combine(branchPath, $"{repo}.yaml");
await File.WriteAllTextAsync(filePath, content, ct);
_logger.LogDebug("Saved {FilePath}", filePath);
}
else
{
_logger.LogWarning("Failed to fetch {Url}: {StatusCode}", url, response.StatusCode);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to fetch SecDB file for {Branch}/{Repo}", branch, repo);
}
}
}
}
private async Task ProcessBranchAsync(string localPath, string branch, CancellationToken ct)
{
var branchPath = Path.Combine(localPath, branch);
if (!Directory.Exists(branchPath))
{
_logger.LogWarning("Branch path does not exist: {BranchPath}", branchPath);
return;
}
var files = _parser.ParseDirectory(branchPath, branch);
var totalVulnerabilities = 0;
var totalPackages = 0;
foreach (var file in files)
{
totalVulnerabilities += file.VulnerabilityCount;
totalPackages += file.Packages.Count;
_logger.LogDebug("Parsed {Repository}: {PackageCount} packages, {VulnCount} vulnerabilities",
file.Repository, file.Packages.Count, file.VulnerabilityCount);
}
_diagnostics.RecordParseSuccess(totalVulnerabilities, totalPackages);
_logger.LogInformation("Processed branch {Branch}: {PackageCount} packages, {VulnCount} vulnerabilities",
branch, totalPackages, totalVulnerabilities);
await Task.CompletedTask;
}
}

View File

@@ -0,0 +1,28 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.SecDb.Configuration;
namespace StellaOps.BinaryIndex.GroundTruth.SecDb;
/// <summary>
/// Plugin registration for SecDB connector.
/// </summary>
public sealed class SecDbConnectorPlugin : ISymbolSourceConnectorPlugin
{
/// <inheritdoc/>
public string Name => "secdb-alpine";
/// <inheritdoc/>
public bool IsAvailable(IServiceProvider services)
{
var options = services.GetService<IOptions<SecDbOptions>>();
return options?.Value?.RepositoryUrl is not null;
}
/// <inheritdoc/>
public ISymbolSourceConnector Create(IServiceProvider services)
{
return services.GetRequiredService<SecDbConnector>();
}
}

View File

@@ -0,0 +1,76 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GroundTruth.Abstractions;
using StellaOps.BinaryIndex.GroundTruth.SecDb.Configuration;
using StellaOps.BinaryIndex.GroundTruth.SecDb.Internal;
namespace StellaOps.BinaryIndex.GroundTruth.SecDb;
/// <summary>
/// Extension methods for adding SecDB connector to DI.
/// </summary>
public static class SecDbServiceCollectionExtensions
{
/// <summary>
/// Add the Alpine SecDB symbol source connector.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configure">Configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddSecDbConnector(
this IServiceCollection services,
Action<SecDbOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
// Register options with validation
services.AddOptions<SecDbOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
// Register HTTP client
services.AddHttpClient(SecDbOptions.HttpClientName, (sp, client) =>
{
var options = sp.GetRequiredService<IOptions<SecDbOptions>>().Value;
client.Timeout = TimeSpan.FromSeconds(options.TimeoutSeconds);
client.DefaultRequestHeaders.Add("User-Agent", options.UserAgent);
});
// Register services
services.AddSingleton<SecDbDiagnostics>();
services.AddTransient<SecDbConnector>();
services.AddSingleton<ISymbolSourceConnectorPlugin, SecDbConnectorPlugin>();
return services;
}
/// <summary>
/// Add the Alpine SecDB connector with default configuration.
/// </summary>
/// <param name="services">Service collection.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddSecDbConnector(this IServiceCollection services)
{
return services.AddSecDbConnector(_ => { });
}
/// <summary>
/// Add the SecDB connector with specific branches.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="branches">Alpine branches to fetch from (e.g., "edge", "v3.19").</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddSecDbConnector(
this IServiceCollection services,
params string[] branches)
{
return services.AddSecDbConnector(opts =>
{
if (branches.Length > 0)
{
opts.Branches = [.. branches];
}
});
}
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<Description>Alpine SecDB connector for ground-truth corpus - provides CVE-to-fix mapping for Alpine Linux</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="YamlDotNet" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.GroundTruth.Abstractions\StellaOps.BinaryIndex.GroundTruth.Abstractions.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,244 @@
// -----------------------------------------------------------------------------
// B2R2IrTokenizer.cs
// Sprint: SPRINT_20260119_006 ML Embeddings Corpus
// Task: MLEM-003 - IR Token Extraction
// Description: B2R2-based IR tokenizer implementation.
// -----------------------------------------------------------------------------
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.ML.Training;
/// <summary>
/// B2R2-based IR tokenizer for ML training input.
/// </summary>
public sealed partial class B2R2IrTokenizer : IIrTokenizer
{
private readonly ILogger<B2R2IrTokenizer> _logger;
// Token vocabulary for common IR elements
private static readonly HashSet<string> ControlFlowTokens =
["[JMP]", "[JE]", "[JNE]", "[JL]", "[JG]", "[JLE]", "[JGE]", "[CALL]", "[RET]", "[LOOP]"];
private static readonly HashSet<string> DataFlowTokens =
["[MOV]", "[LEA]", "[PUSH]", "[POP]", "[XCHG]", "[LOAD]", "[STORE]"];
private static readonly HashSet<string> ArithmeticTokens =
["[ADD]", "[SUB]", "[MUL]", "[DIV]", "[INC]", "[DEC]", "[NEG]", "[SHL]", "[SHR]", "[AND]", "[OR]", "[XOR]", "[NOT]"];
/// <summary>
/// Initializes a new instance of the <see cref="B2R2IrTokenizer"/> class.
/// </summary>
public B2R2IrTokenizer(ILogger<B2R2IrTokenizer> logger)
{
_logger = logger;
}
/// <inheritdoc />
public Task<IReadOnlyList<string>> TokenizeAsync(
string libraryName,
string version,
string functionName,
CancellationToken cancellationToken = default)
{
// This would integrate with B2R2 to lift the function to IR
// For now, return placeholder tokens
_logger.LogDebug("Tokenizing function {Function} from {Library}:{Version}",
functionName, libraryName, version);
var tokens = new List<string>
{
"[FUNC_START]",
$"[NAME:{NormalizeName(functionName)}]",
// IR tokens would be added here from B2R2 analysis
"[FUNC_END]"
};
return Task.FromResult<IReadOnlyList<string>>(tokens);
}
/// <inheritdoc />
public Task<IReadOnlyList<string>> TokenizeInstructionsAsync(
ReadOnlyMemory<byte> instructions,
string architecture,
TokenizationOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= TokenizationOptions.Default;
var tokens = new List<string>();
// Add architecture token
tokens.Add($"[ARCH:{architecture.ToUpperInvariant()}]");
tokens.Add("[FUNC_START]");
// Disassemble and tokenize
// This would use B2R2 for actual disassembly
var disassembly = DisassembleToIr(instructions, architecture);
var varCounter = 0;
var varMap = new Dictionary<string, string>();
foreach (var insn in disassembly)
{
// Add opcode token
var opcodeToken = MapOpcodeToToken(insn.Opcode);
tokens.Add(opcodeToken);
// Add operand tokens
foreach (var operand in insn.Operands)
{
var operandToken = options.NormalizeVariables
? NormalizeOperand(operand, varMap, ref varCounter)
: operand;
if (options.IncludeOperandTypes)
{
var typeToken = InferOperandType(operand);
tokens.Add($"{typeToken}:{operandToken}");
}
else
{
tokens.Add(operandToken);
}
}
// Add control flow marker if applicable
if (options.IncludeControlFlow && IsControlFlowInstruction(insn.Opcode))
{
tokens.Add("[CF]");
}
}
tokens.Add("[FUNC_END]");
// Truncate or pad to max length
if (tokens.Count > options.MaxLength)
{
tokens = tokens.Take(options.MaxLength - 1).Append("[TRUNCATED]").ToList();
}
return Task.FromResult<IReadOnlyList<string>>(tokens);
}
private static IReadOnlyList<DisassembledInstruction> DisassembleToIr(
ReadOnlyMemory<byte> instructions,
string architecture)
{
// Placeholder - would use B2R2 for actual disassembly
// Return sample instructions for demonstration
return new List<DisassembledInstruction>
{
new("push", ["rbp"]),
new("mov", ["rbp", "rsp"]),
new("sub", ["rsp", "0x20"]),
new("mov", ["[rbp-0x8]", "rdi"]),
new("call", ["helper_func"]),
new("leave", []),
new("ret", [])
};
}
private static string MapOpcodeToToken(string opcode)
{
var upper = opcode.ToUpperInvariant();
// Map to canonical token
return upper switch
{
"JMP" or "JE" or "JNE" or "JZ" or "JNZ" or "JL" or "JG" or "JLE" or "JGE" or "JA" or "JB" =>
$"[{upper}]",
"CALL" => "[CALL]",
"RET" or "RETN" => "[RET]",
"MOV" or "MOVZX" or "MOVSX" => "[MOV]",
"LEA" => "[LEA]",
"PUSH" => "[PUSH]",
"POP" => "[POP]",
"ADD" => "[ADD]",
"SUB" => "[SUB]",
"MUL" or "IMUL" => "[MUL]",
"DIV" or "IDIV" => "[DIV]",
"AND" => "[AND]",
"OR" => "[OR]",
"XOR" => "[XOR]",
"SHL" or "SAL" => "[SHL]",
"SHR" or "SAR" => "[SHR]",
"CMP" => "[CMP]",
"TEST" => "[TEST]",
"NOP" => "[NOP]",
_ => $"[{upper}]"
};
}
private static string NormalizeOperand(
string operand,
Dictionary<string, string> varMap,
ref int varCounter)
{
// Normalize registers to generic names
if (IsRegister(operand))
{
if (!varMap.TryGetValue(operand, out var normalized))
{
normalized = $"v{varCounter++}";
varMap[operand] = normalized;
}
return normalized;
}
// Normalize immediates
if (IsImmediate(operand))
{
return "[IMM]";
}
// Normalize memory references
if (operand.Contains('['))
{
return "[MEM]";
}
return operand;
}
private static string InferOperandType(string operand)
{
if (IsRegister(operand)) return "[REG]";
if (IsImmediate(operand)) return "[IMM]";
if (operand.Contains('[')) return "[MEM]";
if (operand.Contains("func") || operand.Contains("_")) return "[SYM]";
return "[UNK]";
}
private static bool IsRegister(string operand)
{
var lower = operand.ToLowerInvariant();
return lower.StartsWith("r") || lower.StartsWith("e") ||
lower is "rax" or "rbx" or "rcx" or "rdx" or "rsi" or "rdi" or "rsp" or "rbp" or
"eax" or "ebx" or "ecx" or "edx" or "esi" or "edi" or "esp" or "ebp" or
"ax" or "bx" or "cx" or "dx" or "si" or "di" or "sp" or "bp";
}
private static bool IsImmediate(string operand)
{
return operand.StartsWith("0x") || operand.All(char.IsDigit);
}
private static bool IsControlFlowInstruction(string opcode)
{
var upper = opcode.ToUpperInvariant();
return upper.StartsWith('J') || upper is "CALL" or "RET" or "RETN" or "LOOP";
}
private static string NormalizeName(string name)
{
// Remove version-specific suffixes, normalize casing
var normalized = NameNormalizationRegex().Replace(name, "");
return normalized.ToLowerInvariant();
}
[GeneratedRegex(@"@\d+|\.\d+|_v\d+")]
private static partial Regex NameNormalizationRegex();
private sealed record DisassembledInstruction(string Opcode, IReadOnlyList<string> Operands);
}

View File

@@ -0,0 +1,249 @@
// -----------------------------------------------------------------------------
// GhidraDecompilerAdapter.cs
// Sprint: SPRINT_20260119_006 ML Embeddings Corpus
// Task: MLEM-004 - Decompiled Code Extraction
// Description: Ghidra-based decompiler adapter implementation.
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.ML.Training;
/// <summary>
/// Ghidra-based decompiler adapter.
/// </summary>
public sealed partial class GhidraDecompilerAdapter : IDecompilerAdapter
{
private readonly GhidraAdapterOptions _options;
private readonly ILogger<GhidraDecompilerAdapter> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="GhidraDecompilerAdapter"/> class.
/// </summary>
public GhidraDecompilerAdapter(
IOptions<GhidraAdapterOptions> options,
ILogger<GhidraDecompilerAdapter> logger)
{
_options = options.Value;
_logger = logger;
}
/// <inheritdoc />
public async Task<string?> DecompileAsync(
string libraryName,
string version,
string functionName,
CancellationToken cancellationToken = default)
{
_logger.LogDebug("Decompiling {Function} from {Library}:{Version}",
functionName, libraryName, version);
// This would call Ghidra headless analyzer
// For now, return placeholder
return await Task.FromResult<string?>($"int {functionName}(void *param_1) {{\n int result;\n // Decompiled code placeholder\n result = 0;\n return result;\n}}");
}
/// <inheritdoc />
public async Task<string?> DecompileBytesAsync(
ReadOnlyMemory<byte> bytes,
string architecture,
DecompilationOptions? options = null,
CancellationToken cancellationToken = default)
{
options ??= DecompilationOptions.Default;
if (string.IsNullOrEmpty(_options.GhidraPath))
{
_logger.LogWarning("Ghidra path not configured");
return null;
}
try
{
// Create temp file with bytes
var tempInput = Path.GetTempFileName();
await File.WriteAllBytesAsync(tempInput, bytes.ToArray(), cancellationToken);
var tempOutput = Path.GetTempFileName();
try
{
// Run Ghidra headless
var script = _options.DecompileScriptPath ?? "DecompileFunction.java";
var args = $"-import {tempInput} -postScript {script} {tempOutput} -deleteProject -noanalysis";
var result = await RunGhidraAsync(args, options.Timeout, cancellationToken);
if (!result.Success)
{
_logger.LogWarning("Ghidra decompilation failed: {Error}", result.Error);
return null;
}
if (File.Exists(tempOutput))
{
var decompiled = await File.ReadAllTextAsync(tempOutput, cancellationToken);
return options.Simplify ? Normalize(decompiled) : decompiled;
}
return null;
}
finally
{
if (File.Exists(tempInput)) File.Delete(tempInput);
if (File.Exists(tempOutput)) File.Delete(tempOutput);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Decompilation failed");
return null;
}
}
/// <inheritdoc />
public string Normalize(string code, NormalizationOptions? options = null)
{
options ??= NormalizationOptions.Default;
var result = code;
// Strip comments
if (options.StripComments)
{
result = StripCommentsRegex().Replace(result, "");
result = LineCommentRegex().Replace(result, "");
}
// Normalize whitespace
if (options.NormalizeWhitespace)
{
result = MultipleSpacesRegex().Replace(result, " ");
result = EmptyLinesRegex().Replace(result, "\n");
result = result.Trim();
}
// Normalize variable names
if (options.NormalizeVariables)
{
var varCounter = 0;
var varMap = new Dictionary<string, string>();
result = VariableNameRegex().Replace(result, match =>
{
var name = match.Value;
if (!varMap.TryGetValue(name, out var normalized))
{
normalized = $"var_{varCounter++}";
varMap[name] = normalized;
}
return normalized;
});
}
// Remove type casts
if (options.RemoveTypeCasts)
{
result = TypeCastRegex().Replace(result, "");
}
// Truncate if too long
if (result.Length > options.MaxLength)
{
result = result[..options.MaxLength] + "\n/* truncated */";
}
return result;
}
private async Task<(bool Success, string? Error)> RunGhidraAsync(
string args,
TimeSpan timeout,
CancellationToken ct)
{
var analyzeHeadless = Path.Combine(_options.GhidraPath!, "support", "analyzeHeadless");
var psi = new ProcessStartInfo
{
FileName = analyzeHeadless,
Arguments = args,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var process = new Process { StartInfo = psi };
var output = new StringBuilder();
var error = new StringBuilder();
process.OutputDataReceived += (_, e) =>
{
if (e.Data is not null) output.AppendLine(e.Data);
};
process.ErrorDataReceived += (_, e) =>
{
if (e.Data is not null) error.AppendLine(e.Data);
};
process.Start();
process.BeginOutputReadLine();
process.BeginErrorReadLine();
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(timeout);
try
{
await process.WaitForExitAsync(cts.Token);
return (process.ExitCode == 0, error.Length > 0 ? error.ToString() : null);
}
catch (OperationCanceledException)
{
process.Kill(true);
return (false, "Timeout");
}
}
[GeneratedRegex(@"/\*.*?\*/", RegexOptions.Singleline)]
private static partial Regex StripCommentsRegex();
[GeneratedRegex(@"//.*$", RegexOptions.Multiline)]
private static partial Regex LineCommentRegex();
[GeneratedRegex(@"\s+")]
private static partial Regex MultipleSpacesRegex();
[GeneratedRegex(@"\n\s*\n")]
private static partial Regex EmptyLinesRegex();
[GeneratedRegex(@"\b(local_|param_|DAT_|FUN_)[a-zA-Z0-9_]+")]
private static partial Regex VariableNameRegex();
[GeneratedRegex(@"\(\s*[a-zA-Z_][a-zA-Z0-9_]*\s*\*?\s*\)")]
private static partial Regex TypeCastRegex();
}
/// <summary>
/// Options for Ghidra adapter.
/// </summary>
public sealed record GhidraAdapterOptions
{
/// <summary>
/// Gets the path to Ghidra installation.
/// </summary>
public string? GhidraPath { get; init; }
/// <summary>
/// Gets the path to decompile script.
/// </summary>
public string? DecompileScriptPath { get; init; }
/// <summary>
/// Gets the project directory for temp projects.
/// </summary>
public string? ProjectDirectory { get; init; }
}

Some files were not shown because too many files have changed in this diff Show More