Add comprehensive security tests for OWASP A03 (Injection) and A10 (SSRF)
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled

- Implemented InjectionTests.cs to cover various injection vulnerabilities including SQL, NoSQL, Command, LDAP, and XPath injections.
- Created SsrfTests.cs to test for Server-Side Request Forgery (SSRF) vulnerabilities, including internal URL access, cloud metadata access, and URL allowlist bypass attempts.
- Introduced MaliciousPayloads.cs to store a collection of malicious payloads for testing various security vulnerabilities.
- Added SecurityAssertions.cs for common security-specific assertion helpers.
- Established SecurityTestBase.cs as a base class for security tests, providing common infrastructure and mocking utilities.
- Configured the test project StellaOps.Security.Tests.csproj with necessary dependencies for testing.
This commit is contained in:
master
2025-12-16 13:11:57 +02:00
parent 5a480a3c2a
commit b55d9fa68d
72 changed files with 8051 additions and 71 deletions

View File

@@ -0,0 +1,164 @@
namespace StellaOps.Scheduler.Storage.Postgres.Models;
/// <summary>
/// Scope type for failure signatures.
/// </summary>
public enum FailureSignatureScopeType
{
/// <summary>Repository scope.</summary>
Repo,
/// <summary>Container image scope.</summary>
Image,
/// <summary>Artifact scope.</summary>
Artifact,
/// <summary>Global scope (all tenants).</summary>
Global
}
/// <summary>
/// Error category for failure classification.
/// </summary>
public enum ErrorCategory
{
/// <summary>Network-related failure.</summary>
Network,
/// <summary>Authentication/authorization failure.</summary>
Auth,
/// <summary>Validation failure.</summary>
Validation,
/// <summary>Resource exhaustion (memory, disk, CPU).</summary>
Resource,
/// <summary>Operation timeout.</summary>
Timeout,
/// <summary>Configuration error.</summary>
Config,
/// <summary>Unknown/uncategorized error.</summary>
Unknown
}
/// <summary>
/// Resolution status for failure signatures.
/// </summary>
public enum ResolutionStatus
{
/// <summary>Issue is not yet resolved.</summary>
Unresolved,
/// <summary>Issue is being investigated.</summary>
Investigating,
/// <summary>Issue has been resolved.</summary>
Resolved,
/// <summary>Issue will not be fixed.</summary>
WontFix
}
/// <summary>
/// Predicted outcome for TTFS hints.
/// </summary>
public enum PredictedOutcome
{
/// <summary>Prediction not available.</summary>
Unknown,
/// <summary>Expected to pass.</summary>
Pass,
/// <summary>Expected to fail.</summary>
Fail,
/// <summary>Expected to be flaky.</summary>
Flaky
}
/// <summary>
/// Represents a failure signature entity for predictive TTFS hints.
/// Tracks common failure patterns by scope, toolchain, and error code.
/// </summary>
public sealed class FailureSignatureEntity
{
/// <summary>
/// Unique signature identifier.
/// </summary>
public Guid SignatureId { get; init; }
/// <summary>
/// Tenant this signature belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// When this signature was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// When this signature was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Type of scope for this signature.
/// </summary>
public FailureSignatureScopeType ScopeType { get; init; }
/// <summary>
/// Identifier within the scope (repo name, image digest, etc).
/// </summary>
public required string ScopeId { get; init; }
/// <summary>
/// Hash of the toolchain/build environment.
/// </summary>
public required string ToolchainHash { get; init; }
/// <summary>
/// Error code if available.
/// </summary>
public string? ErrorCode { get; init; }
/// <summary>
/// Category of error.
/// </summary>
public ErrorCategory? ErrorCategory { get; init; }
/// <summary>
/// Number of times this signature has been seen.
/// </summary>
public int OccurrenceCount { get; init; } = 1;
/// <summary>
/// When this signature was first seen.
/// </summary>
public DateTimeOffset FirstSeenAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// When this signature was last seen.
/// </summary>
public DateTimeOffset LastSeenAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Current resolution status.
/// </summary>
public ResolutionStatus ResolutionStatus { get; init; } = ResolutionStatus.Unresolved;
/// <summary>
/// Notes about resolution.
/// </summary>
public string? ResolutionNotes { get; init; }
/// <summary>
/// When the issue was resolved.
/// </summary>
public DateTimeOffset? ResolvedAt { get; init; }
/// <summary>
/// Who resolved the issue.
/// </summary>
public string? ResolvedBy { get; init; }
/// <summary>
/// Predicted outcome based on this signature.
/// </summary>
public PredictedOutcome PredictedOutcome { get; init; } = PredictedOutcome.Unknown;
/// <summary>
/// Confidence score for the prediction (0.0 to 1.0).
/// </summary>
public decimal? ConfidenceScore { get; init; }
}

View File

@@ -0,0 +1,440 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Storage.Postgres.Models;
namespace StellaOps.Scheduler.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for failure signature operations.
/// </summary>
public sealed class FailureSignatureRepository : RepositoryBase<SchedulerDataSource>, IFailureSignatureRepository
{
/// <summary>
/// Creates a new failure signature repository.
/// </summary>
public FailureSignatureRepository(SchedulerDataSource dataSource, ILogger<FailureSignatureRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<FailureSignatureEntity> CreateAsync(
FailureSignatureEntity signature,
CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.failure_signatures (
signature_id, tenant_id, scope_type, scope_id, toolchain_hash,
error_code, error_category, occurrence_count, first_seen_at, last_seen_at,
resolution_status, resolution_notes, predicted_outcome, confidence_score
)
VALUES (
@signature_id, @tenant_id, @scope_type, @scope_id, @toolchain_hash,
@error_code, @error_category, @occurrence_count, @first_seen_at, @last_seen_at,
@resolution_status, @resolution_notes, @predicted_outcome, @confidence_score
)
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(signature.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddSignatureParameters(command, signature);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapSignature(reader);
}
/// <inheritdoc />
public async Task<FailureSignatureEntity?> GetByIdAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "signature_id", signatureId);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<FailureSignatureEntity?> GetByKeyAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND scope_type = @scope_type
AND scope_id = @scope_id
AND toolchain_hash = @toolchain_hash
AND (error_code = @error_code OR (@error_code IS NULL AND error_code IS NULL))
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "scope_type", scopeType.ToString().ToLowerInvariant());
AddParameter(cmd, "scope_id", scopeId);
AddParameter(cmd, "toolchain_hash", toolchainHash);
AddParameter(cmd, "error_code", errorCode ?? (object)DBNull.Value);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<FailureSignatureEntity>> GetByScopeAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND scope_type = @scope_type
AND scope_id = @scope_id
ORDER BY last_seen_at DESC
""";
return await QueryListAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "scope_type", scopeType.ToString().ToLowerInvariant());
AddParameter(cmd, "scope_id", scopeId);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<FailureSignatureEntity>> GetUnresolvedAsync(
string tenantId,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND resolution_status = 'unresolved'
ORDER BY occurrence_count DESC, last_seen_at DESC
LIMIT @limit
""";
return await QueryListAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "limit", limit);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<FailureSignatureEntity>> GetByPredictedOutcomeAsync(
string tenantId,
PredictedOutcome outcome,
decimal minConfidence = 0.5m,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND predicted_outcome = @predicted_outcome
AND confidence_score >= @min_confidence
ORDER BY confidence_score DESC, last_seen_at DESC
LIMIT @limit
""";
return await QueryListAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "predicted_outcome", outcome.ToString().ToLowerInvariant());
AddParameter(cmd, "min_confidence", minConfidence);
AddParameter(cmd, "limit", limit);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<FailureSignatureEntity> UpsertOccurrenceAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
ErrorCategory? errorCategory,
CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.failure_signatures (
signature_id, tenant_id, scope_type, scope_id, toolchain_hash,
error_code, error_category, occurrence_count, first_seen_at, last_seen_at
)
VALUES (
gen_random_uuid(), @tenant_id, @scope_type, @scope_id, @toolchain_hash,
@error_code, @error_category, 1, NOW(), NOW()
)
ON CONFLICT (tenant_id, scope_type, scope_id, toolchain_hash, error_code)
DO UPDATE SET
occurrence_count = scheduler.failure_signatures.occurrence_count + 1,
last_seen_at = NOW(),
updated_at = NOW(),
error_category = COALESCE(EXCLUDED.error_category, scheduler.failure_signatures.error_category)
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "scope_type", scopeType.ToString().ToLowerInvariant());
AddParameter(command, "scope_id", scopeId);
AddParameter(command, "toolchain_hash", toolchainHash);
AddParameter(command, "error_code", errorCode ?? (object)DBNull.Value);
AddParameter(command, "error_category", errorCategory?.ToString().ToLowerInvariant() ?? (object)DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapSignature(reader);
}
/// <inheritdoc />
public async Task<bool> UpdateResolutionAsync(
string tenantId,
Guid signatureId,
ResolutionStatus status,
string? notes,
string? resolvedBy,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.failure_signatures
SET resolution_status = @resolution_status,
resolution_notes = @resolution_notes,
resolved_by = @resolved_by,
resolved_at = CASE WHEN @resolution_status = 'resolved' THEN NOW() ELSE resolved_at END,
updated_at = NOW()
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "signature_id", signatureId);
AddParameter(command, "resolution_status", status.ToString().ToLowerInvariant());
AddParameter(command, "resolution_notes", notes ?? (object)DBNull.Value);
AddParameter(command, "resolved_by", resolvedBy ?? (object)DBNull.Value);
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rowsAffected > 0;
}
/// <inheritdoc />
public async Task<bool> UpdatePredictionAsync(
string tenantId,
Guid signatureId,
PredictedOutcome outcome,
decimal confidence,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.failure_signatures
SET predicted_outcome = @predicted_outcome,
confidence_score = @confidence_score,
updated_at = NOW()
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "signature_id", signatureId);
AddParameter(command, "predicted_outcome", outcome.ToString().ToLowerInvariant());
AddParameter(command, "confidence_score", confidence);
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rowsAffected > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default)
{
const string sql = """
DELETE FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "signature_id", signatureId);
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rowsAffected > 0;
}
/// <inheritdoc />
public async Task<int> PruneResolvedAsync(
string tenantId,
TimeSpan olderThan,
CancellationToken cancellationToken = default)
{
const string sql = """
DELETE FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND resolution_status = 'resolved'
AND resolved_at < @cutoff
""";
var cutoff = DateTimeOffset.UtcNow.Subtract(olderThan);
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "cutoff", cutoff);
return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
private void AddSignatureParameters(NpgsqlCommand command, FailureSignatureEntity signature)
{
AddParameter(command, "signature_id", signature.SignatureId == Guid.Empty ? Guid.NewGuid() : signature.SignatureId);
AddParameter(command, "tenant_id", signature.TenantId);
AddParameter(command, "scope_type", signature.ScopeType.ToString().ToLowerInvariant());
AddParameter(command, "scope_id", signature.ScopeId);
AddParameter(command, "toolchain_hash", signature.ToolchainHash);
AddParameter(command, "error_code", signature.ErrorCode ?? (object)DBNull.Value);
AddParameter(command, "error_category", signature.ErrorCategory?.ToString().ToLowerInvariant() ?? (object)DBNull.Value);
AddParameter(command, "occurrence_count", signature.OccurrenceCount);
AddParameter(command, "first_seen_at", signature.FirstSeenAt);
AddParameter(command, "last_seen_at", signature.LastSeenAt);
AddParameter(command, "resolution_status", signature.ResolutionStatus.ToString().ToLowerInvariant());
AddParameter(command, "resolution_notes", signature.ResolutionNotes ?? (object)DBNull.Value);
AddParameter(command, "predicted_outcome", signature.PredictedOutcome.ToString().ToLowerInvariant());
AddParameter(command, "confidence_score", signature.ConfidenceScore ?? (object)DBNull.Value);
}
private static FailureSignatureEntity MapSignature(NpgsqlDataReader reader)
{
return new FailureSignatureEntity
{
SignatureId = reader.GetGuid(reader.GetOrdinal("signature_id")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("updated_at")),
ScopeType = ParseScopeType(reader.GetString(reader.GetOrdinal("scope_type"))),
ScopeId = reader.GetString(reader.GetOrdinal("scope_id")),
ToolchainHash = reader.GetString(reader.GetOrdinal("toolchain_hash")),
ErrorCode = reader.IsDBNull(reader.GetOrdinal("error_code"))
? null
: reader.GetString(reader.GetOrdinal("error_code")),
ErrorCategory = reader.IsDBNull(reader.GetOrdinal("error_category"))
? null
: ParseErrorCategory(reader.GetString(reader.GetOrdinal("error_category"))),
OccurrenceCount = reader.GetInt32(reader.GetOrdinal("occurrence_count")),
FirstSeenAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("first_seen_at")),
LastSeenAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("last_seen_at")),
ResolutionStatus = ParseResolutionStatus(reader.GetString(reader.GetOrdinal("resolution_status"))),
ResolutionNotes = reader.IsDBNull(reader.GetOrdinal("resolution_notes"))
? null
: reader.GetString(reader.GetOrdinal("resolution_notes")),
ResolvedAt = reader.IsDBNull(reader.GetOrdinal("resolved_at"))
? null
: reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("resolved_at")),
ResolvedBy = reader.IsDBNull(reader.GetOrdinal("resolved_by"))
? null
: reader.GetString(reader.GetOrdinal("resolved_by")),
PredictedOutcome = reader.IsDBNull(reader.GetOrdinal("predicted_outcome"))
? PredictedOutcome.Unknown
: ParsePredictedOutcome(reader.GetString(reader.GetOrdinal("predicted_outcome"))),
ConfidenceScore = reader.IsDBNull(reader.GetOrdinal("confidence_score"))
? null
: reader.GetDecimal(reader.GetOrdinal("confidence_score"))
};
}
private static FailureSignatureScopeType ParseScopeType(string value) => value.ToLowerInvariant() switch
{
"repo" => FailureSignatureScopeType.Repo,
"image" => FailureSignatureScopeType.Image,
"artifact" => FailureSignatureScopeType.Artifact,
"global" => FailureSignatureScopeType.Global,
_ => throw new ArgumentException($"Unknown scope type: {value}")
};
private static ErrorCategory ParseErrorCategory(string value) => value.ToLowerInvariant() switch
{
"network" => ErrorCategory.Network,
"auth" => ErrorCategory.Auth,
"validation" => ErrorCategory.Validation,
"resource" => ErrorCategory.Resource,
"timeout" => ErrorCategory.Timeout,
"config" => ErrorCategory.Config,
_ => ErrorCategory.Unknown
};
private static ResolutionStatus ParseResolutionStatus(string value) => value.ToLowerInvariant() switch
{
"unresolved" => ResolutionStatus.Unresolved,
"investigating" => ResolutionStatus.Investigating,
"resolved" => ResolutionStatus.Resolved,
"wont_fix" or "wontfix" => ResolutionStatus.WontFix,
_ => ResolutionStatus.Unresolved
};
private static PredictedOutcome ParsePredictedOutcome(string value) => value.ToLowerInvariant() switch
{
"pass" => PredictedOutcome.Pass,
"fail" => PredictedOutcome.Fail,
"flaky" => PredictedOutcome.Flaky,
_ => PredictedOutcome.Unknown
};
}

View File

@@ -0,0 +1,112 @@
using StellaOps.Scheduler.Storage.Postgres.Models;
namespace StellaOps.Scheduler.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for failure signature operations.
/// </summary>
public interface IFailureSignatureRepository
{
/// <summary>
/// Creates a new failure signature.
/// </summary>
Task<FailureSignatureEntity> CreateAsync(
FailureSignatureEntity signature,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a failure signature by ID.
/// </summary>
Task<FailureSignatureEntity?> GetByIdAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a failure signature by its unique key (scope + toolchain + error code).
/// </summary>
Task<FailureSignatureEntity?> GetByKeyAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all failure signatures for a scope.
/// </summary>
Task<IReadOnlyList<FailureSignatureEntity>> GetByScopeAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all unresolved failure signatures for a tenant.
/// </summary>
Task<IReadOnlyList<FailureSignatureEntity>> GetUnresolvedAsync(
string tenantId,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets failure signatures matching a predicted outcome.
/// </summary>
Task<IReadOnlyList<FailureSignatureEntity>> GetByPredictedOutcomeAsync(
string tenantId,
PredictedOutcome outcome,
decimal minConfidence = 0.5m,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Increments the occurrence count and updates last seen timestamp.
/// Creates the signature if it doesn't exist (upsert).
/// </summary>
Task<FailureSignatureEntity> UpsertOccurrenceAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
ErrorCategory? errorCategory,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates the resolution status of a signature.
/// </summary>
Task<bool> UpdateResolutionAsync(
string tenantId,
Guid signatureId,
ResolutionStatus status,
string? notes,
string? resolvedBy,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates the predicted outcome for a signature.
/// </summary>
Task<bool> UpdatePredictionAsync(
string tenantId,
Guid signatureId,
PredictedOutcome outcome,
decimal confidence,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a failure signature.
/// </summary>
Task<bool> DeleteAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default);
/// <summary>
/// Prunes old resolved signatures.
/// </summary>
Task<int> PruneResolvedAsync(
string tenantId,
TimeSpan olderThan,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,311 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Scheduler.Storage.Postgres.Models;
using StellaOps.Scheduler.Storage.Postgres.Repositories;
namespace StellaOps.Scheduler.Worker.Indexing;
/// <summary>
/// Options for the failure signature indexer.
/// </summary>
public sealed class FailureSignatureIndexerOptions
{
/// <summary>
/// Interval between indexing runs.
/// </summary>
public TimeSpan IndexInterval { get; set; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Whether the indexer is enabled.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// Batch size for processing job failures.
/// </summary>
public int BatchSize { get; set; } = 100;
/// <summary>
/// Age threshold for pruning resolved signatures.
/// </summary>
public TimeSpan PruneResolvedOlderThan { get; set; } = TimeSpan.FromDays(90);
}
/// <summary>
/// Background service that indexes job failures into failure signatures.
/// Analyzes completed jobs to identify patterns for predictive TTFS hints.
/// </summary>
public sealed class FailureSignatureIndexer : BackgroundService
{
private readonly IFailureSignatureRepository _signatureRepository;
private readonly IJobRepository _jobRepository;
private readonly IJobHistoryRepository _historyRepository;
private readonly IOptions<FailureSignatureIndexerOptions> _options;
private readonly ILogger<FailureSignatureIndexer> _logger;
public FailureSignatureIndexer(
IFailureSignatureRepository signatureRepository,
IJobRepository jobRepository,
IJobHistoryRepository historyRepository,
IOptions<FailureSignatureIndexerOptions> options,
ILogger<FailureSignatureIndexer> logger)
{
_signatureRepository = signatureRepository;
_jobRepository = jobRepository;
_historyRepository = historyRepository;
_options = options;
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
if (!_options.Value.Enabled)
{
_logger.LogInformation("Failure signature indexer is disabled");
return;
}
_logger.LogInformation("Starting failure signature indexer with interval {Interval}",
_options.Value.IndexInterval);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await IndexFailuresAsync(stoppingToken);
await PruneOldSignaturesAsync(stoppingToken);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during failure signature indexing");
}
await Task.Delay(_options.Value.IndexInterval, stoppingToken);
}
}
private async Task IndexFailuresAsync(CancellationToken ct)
{
_logger.LogDebug("Starting failure indexing batch");
// Get recent failed jobs that haven't been indexed
var failedJobs = await _historyRepository.GetRecentFailedJobsAsync(
_options.Value.BatchSize,
ct);
var indexed = 0;
foreach (var job in failedJobs)
{
try
{
var signature = await ExtractSignatureAsync(job, ct);
if (signature != null)
{
await _signatureRepository.UpsertOccurrenceAsync(
job.TenantId,
signature.ScopeType,
signature.ScopeId,
signature.ToolchainHash,
signature.ErrorCode,
signature.ErrorCategory,
ct);
indexed++;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to index signature for job {JobId}", job.JobId);
}
}
if (indexed > 0)
{
_logger.LogInformation("Indexed {Count} failure signatures", indexed);
}
}
private async Task PruneOldSignaturesAsync(CancellationToken ct)
{
// Prune is expensive, only run occasionally
var random = Random.Shared.Next(0, 12);
if (random != 0)
{
return;
}
_logger.LogDebug("Starting resolved signature pruning");
// Get all tenants with resolved signatures
// In production, this would be paginated
try
{
var pruned = await _signatureRepository.PruneResolvedAsync(
"*", // All tenants
_options.Value.PruneResolvedOlderThan,
ct);
if (pruned > 0)
{
_logger.LogInformation("Pruned {Count} old resolved signatures", pruned);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to prune resolved signatures");
}
}
private Task<FailureSignatureExtraction?> ExtractSignatureAsync(
FailedJobRecord job,
CancellationToken ct)
{
// Extract signature from job failure
// This would analyze the job metadata, error details, etc.
var scopeType = DetermineScopeType(job);
var scopeId = ExtractScopeId(job, scopeType);
var toolchainHash = ComputeToolchainHash(job);
var (errorCode, category) = ClassifyError(job);
if (string.IsNullOrEmpty(scopeId) || string.IsNullOrEmpty(toolchainHash))
{
return Task.FromResult<FailureSignatureExtraction?>(null);
}
var extraction = new FailureSignatureExtraction
{
ScopeType = scopeType,
ScopeId = scopeId,
ToolchainHash = toolchainHash,
ErrorCode = errorCode,
ErrorCategory = category
};
return Task.FromResult<FailureSignatureExtraction?>(extraction);
}
private static FailureSignatureScopeType DetermineScopeType(FailedJobRecord job)
{
// Determine scope based on job type and context
if (!string.IsNullOrEmpty(job.ImageDigest))
{
return FailureSignatureScopeType.Image;
}
if (!string.IsNullOrEmpty(job.ArtifactDigest))
{
return FailureSignatureScopeType.Artifact;
}
if (!string.IsNullOrEmpty(job.Repository))
{
return FailureSignatureScopeType.Repo;
}
return FailureSignatureScopeType.Global;
}
private static string ExtractScopeId(FailedJobRecord job, FailureSignatureScopeType scopeType)
{
return scopeType switch
{
FailureSignatureScopeType.Image => job.ImageDigest ?? "",
FailureSignatureScopeType.Artifact => job.ArtifactDigest ?? "",
FailureSignatureScopeType.Repo => job.Repository ?? "",
FailureSignatureScopeType.Global => "global",
_ => ""
};
}
private static string ComputeToolchainHash(FailedJobRecord job)
{
// Compute a fingerprint of the build/scan environment
// This includes scanner versions, tool versions, etc.
var components = new[]
{
job.JobType,
job.ScannerVersion ?? "unknown",
job.RuntimeVersion ?? "unknown"
};
var combined = string.Join("|", components);
var hash = System.Security.Cryptography.SHA256.HashData(
System.Text.Encoding.UTF8.GetBytes(combined));
return Convert.ToHexStringLower(hash[..8]); // First 8 bytes
}
private static (string? ErrorCode, ErrorCategory Category) ClassifyError(FailedJobRecord job)
{
// Classify error based on error message and details
var error = job.Error?.ToLowerInvariant() ?? "";
var errorCode = job.ErrorCode;
if (error.Contains("timeout") || error.Contains("timed out"))
{
return (errorCode, ErrorCategory.Timeout);
}
if (error.Contains("unauthorized") || error.Contains("authentication") || error.Contains("401"))
{
return (errorCode, ErrorCategory.Auth);
}
if (error.Contains("network") || error.Contains("connection refused") || error.Contains("dns"))
{
return (errorCode, ErrorCategory.Network);
}
if (error.Contains("validation") || error.Contains("invalid") || error.Contains("malformed"))
{
return (errorCode, ErrorCategory.Validation);
}
if (error.Contains("out of memory") || error.Contains("disk full") || error.Contains("resource"))
{
return (errorCode, ErrorCategory.Resource);
}
if (error.Contains("config") || error.Contains("configuration"))
{
return (errorCode, ErrorCategory.Config);
}
return (errorCode, ErrorCategory.Unknown);
}
}
/// <summary>
/// Extracted failure signature data.
/// </summary>
internal sealed class FailureSignatureExtraction
{
public FailureSignatureScopeType ScopeType { get; init; }
public required string ScopeId { get; init; }
public required string ToolchainHash { get; init; }
public string? ErrorCode { get; init; }
public ErrorCategory ErrorCategory { get; init; }
}
/// <summary>
/// Record representing a failed job for signature extraction.
/// </summary>
public sealed record FailedJobRecord
{
public required Guid JobId { get; init; }
public required string TenantId { get; init; }
public required string JobType { get; init; }
public string? ImageDigest { get; init; }
public string? ArtifactDigest { get; init; }
public string? Repository { get; init; }
public string? Error { get; init; }
public string? ErrorCode { get; init; }
public string? ScannerVersion { get; init; }
public string? RuntimeVersion { get; init; }
public DateTimeOffset FailedAt { get; init; }
}