Add comprehensive security tests for OWASP A03 (Injection) and A10 (SSRF)
- Implemented InjectionTests.cs to cover various injection vulnerabilities including SQL, NoSQL, Command, LDAP, and XPath injections. - Created SsrfTests.cs to test for Server-Side Request Forgery (SSRF) vulnerabilities, including internal URL access, cloud metadata access, and URL allowlist bypass attempts. - Introduced MaliciousPayloads.cs to store a collection of malicious payloads for testing various security vulnerabilities. - Added SecurityAssertions.cs for common security-specific assertion helpers. - Established SecurityTestBase.cs as a base class for security tests, providing common infrastructure and mocking utilities. - Configured the test project StellaOps.Security.Tests.csproj with necessary dependencies for testing.
This commit is contained in:
@@ -0,0 +1,164 @@
|
||||
namespace StellaOps.Scheduler.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Scope type for failure signatures.
|
||||
/// </summary>
|
||||
public enum FailureSignatureScopeType
|
||||
{
|
||||
/// <summary>Repository scope.</summary>
|
||||
Repo,
|
||||
/// <summary>Container image scope.</summary>
|
||||
Image,
|
||||
/// <summary>Artifact scope.</summary>
|
||||
Artifact,
|
||||
/// <summary>Global scope (all tenants).</summary>
|
||||
Global
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Error category for failure classification.
|
||||
/// </summary>
|
||||
public enum ErrorCategory
|
||||
{
|
||||
/// <summary>Network-related failure.</summary>
|
||||
Network,
|
||||
/// <summary>Authentication/authorization failure.</summary>
|
||||
Auth,
|
||||
/// <summary>Validation failure.</summary>
|
||||
Validation,
|
||||
/// <summary>Resource exhaustion (memory, disk, CPU).</summary>
|
||||
Resource,
|
||||
/// <summary>Operation timeout.</summary>
|
||||
Timeout,
|
||||
/// <summary>Configuration error.</summary>
|
||||
Config,
|
||||
/// <summary>Unknown/uncategorized error.</summary>
|
||||
Unknown
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolution status for failure signatures.
|
||||
/// </summary>
|
||||
public enum ResolutionStatus
|
||||
{
|
||||
/// <summary>Issue is not yet resolved.</summary>
|
||||
Unresolved,
|
||||
/// <summary>Issue is being investigated.</summary>
|
||||
Investigating,
|
||||
/// <summary>Issue has been resolved.</summary>
|
||||
Resolved,
|
||||
/// <summary>Issue will not be fixed.</summary>
|
||||
WontFix
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Predicted outcome for TTFS hints.
|
||||
/// </summary>
|
||||
public enum PredictedOutcome
|
||||
{
|
||||
/// <summary>Prediction not available.</summary>
|
||||
Unknown,
|
||||
/// <summary>Expected to pass.</summary>
|
||||
Pass,
|
||||
/// <summary>Expected to fail.</summary>
|
||||
Fail,
|
||||
/// <summary>Expected to be flaky.</summary>
|
||||
Flaky
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a failure signature entity for predictive TTFS hints.
|
||||
/// Tracks common failure patterns by scope, toolchain, and error code.
|
||||
/// </summary>
|
||||
public sealed class FailureSignatureEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique signature identifier.
|
||||
/// </summary>
|
||||
public Guid SignatureId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this signature belongs to.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When this signature was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; } = DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// When this signature was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; } = DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// Type of scope for this signature.
|
||||
/// </summary>
|
||||
public FailureSignatureScopeType ScopeType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Identifier within the scope (repo name, image digest, etc).
|
||||
/// </summary>
|
||||
public required string ScopeId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Hash of the toolchain/build environment.
|
||||
/// </summary>
|
||||
public required string ToolchainHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Error code if available.
|
||||
/// </summary>
|
||||
public string? ErrorCode { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Category of error.
|
||||
/// </summary>
|
||||
public ErrorCategory? ErrorCategory { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of times this signature has been seen.
|
||||
/// </summary>
|
||||
public int OccurrenceCount { get; init; } = 1;
|
||||
|
||||
/// <summary>
|
||||
/// When this signature was first seen.
|
||||
/// </summary>
|
||||
public DateTimeOffset FirstSeenAt { get; init; } = DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// When this signature was last seen.
|
||||
/// </summary>
|
||||
public DateTimeOffset LastSeenAt { get; init; } = DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// Current resolution status.
|
||||
/// </summary>
|
||||
public ResolutionStatus ResolutionStatus { get; init; } = ResolutionStatus.Unresolved;
|
||||
|
||||
/// <summary>
|
||||
/// Notes about resolution.
|
||||
/// </summary>
|
||||
public string? ResolutionNotes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the issue was resolved.
|
||||
/// </summary>
|
||||
public DateTimeOffset? ResolvedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Who resolved the issue.
|
||||
/// </summary>
|
||||
public string? ResolvedBy { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Predicted outcome based on this signature.
|
||||
/// </summary>
|
||||
public PredictedOutcome PredictedOutcome { get; init; } = PredictedOutcome.Unknown;
|
||||
|
||||
/// <summary>
|
||||
/// Confidence score for the prediction (0.0 to 1.0).
|
||||
/// </summary>
|
||||
public decimal? ConfidenceScore { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,440 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
using StellaOps.Scheduler.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for failure signature operations.
|
||||
/// </summary>
|
||||
public sealed class FailureSignatureRepository : RepositoryBase<SchedulerDataSource>, IFailureSignatureRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new failure signature repository.
|
||||
/// </summary>
|
||||
public FailureSignatureRepository(SchedulerDataSource dataSource, ILogger<FailureSignatureRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FailureSignatureEntity> CreateAsync(
|
||||
FailureSignatureEntity signature,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO scheduler.failure_signatures (
|
||||
signature_id, tenant_id, scope_type, scope_id, toolchain_hash,
|
||||
error_code, error_category, occurrence_count, first_seen_at, last_seen_at,
|
||||
resolution_status, resolution_notes, predicted_outcome, confidence_score
|
||||
)
|
||||
VALUES (
|
||||
@signature_id, @tenant_id, @scope_type, @scope_id, @toolchain_hash,
|
||||
@error_code, @error_category, @occurrence_count, @first_seen_at, @last_seen_at,
|
||||
@resolution_status, @resolution_notes, @predicted_outcome, @confidence_score
|
||||
)
|
||||
RETURNING *
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(signature.TenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddSignatureParameters(command, signature);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapSignature(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FailureSignatureEntity?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM scheduler.failure_signatures
|
||||
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "signature_id", signatureId);
|
||||
},
|
||||
MapSignature,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FailureSignatureEntity?> GetByKeyAsync(
|
||||
string tenantId,
|
||||
FailureSignatureScopeType scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
string? errorCode,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM scheduler.failure_signatures
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND scope_type = @scope_type
|
||||
AND scope_id = @scope_id
|
||||
AND toolchain_hash = @toolchain_hash
|
||||
AND (error_code = @error_code OR (@error_code IS NULL AND error_code IS NULL))
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "scope_type", scopeType.ToString().ToLowerInvariant());
|
||||
AddParameter(cmd, "scope_id", scopeId);
|
||||
AddParameter(cmd, "toolchain_hash", toolchainHash);
|
||||
AddParameter(cmd, "error_code", errorCode ?? (object)DBNull.Value);
|
||||
},
|
||||
MapSignature,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<FailureSignatureEntity>> GetByScopeAsync(
|
||||
string tenantId,
|
||||
FailureSignatureScopeType scopeType,
|
||||
string scopeId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM scheduler.failure_signatures
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND scope_type = @scope_type
|
||||
AND scope_id = @scope_id
|
||||
ORDER BY last_seen_at DESC
|
||||
""";
|
||||
|
||||
return await QueryListAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "scope_type", scopeType.ToString().ToLowerInvariant());
|
||||
AddParameter(cmd, "scope_id", scopeId);
|
||||
},
|
||||
MapSignature,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<FailureSignatureEntity>> GetUnresolvedAsync(
|
||||
string tenantId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM scheduler.failure_signatures
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND resolution_status = 'unresolved'
|
||||
ORDER BY occurrence_count DESC, last_seen_at DESC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
return await QueryListAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "limit", limit);
|
||||
},
|
||||
MapSignature,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<FailureSignatureEntity>> GetByPredictedOutcomeAsync(
|
||||
string tenantId,
|
||||
PredictedOutcome outcome,
|
||||
decimal minConfidence = 0.5m,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM scheduler.failure_signatures
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND predicted_outcome = @predicted_outcome
|
||||
AND confidence_score >= @min_confidence
|
||||
ORDER BY confidence_score DESC, last_seen_at DESC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
return await QueryListAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "predicted_outcome", outcome.ToString().ToLowerInvariant());
|
||||
AddParameter(cmd, "min_confidence", minConfidence);
|
||||
AddParameter(cmd, "limit", limit);
|
||||
},
|
||||
MapSignature,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FailureSignatureEntity> UpsertOccurrenceAsync(
|
||||
string tenantId,
|
||||
FailureSignatureScopeType scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
string? errorCode,
|
||||
ErrorCategory? errorCategory,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO scheduler.failure_signatures (
|
||||
signature_id, tenant_id, scope_type, scope_id, toolchain_hash,
|
||||
error_code, error_category, occurrence_count, first_seen_at, last_seen_at
|
||||
)
|
||||
VALUES (
|
||||
gen_random_uuid(), @tenant_id, @scope_type, @scope_id, @toolchain_hash,
|
||||
@error_code, @error_category, 1, NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (tenant_id, scope_type, scope_id, toolchain_hash, error_code)
|
||||
DO UPDATE SET
|
||||
occurrence_count = scheduler.failure_signatures.occurrence_count + 1,
|
||||
last_seen_at = NOW(),
|
||||
updated_at = NOW(),
|
||||
error_category = COALESCE(EXCLUDED.error_category, scheduler.failure_signatures.error_category)
|
||||
RETURNING *
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "scope_type", scopeType.ToString().ToLowerInvariant());
|
||||
AddParameter(command, "scope_id", scopeId);
|
||||
AddParameter(command, "toolchain_hash", toolchainHash);
|
||||
AddParameter(command, "error_code", errorCode ?? (object)DBNull.Value);
|
||||
AddParameter(command, "error_category", errorCategory?.ToString().ToLowerInvariant() ?? (object)DBNull.Value);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapSignature(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> UpdateResolutionAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
ResolutionStatus status,
|
||||
string? notes,
|
||||
string? resolvedBy,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE scheduler.failure_signatures
|
||||
SET resolution_status = @resolution_status,
|
||||
resolution_notes = @resolution_notes,
|
||||
resolved_by = @resolved_by,
|
||||
resolved_at = CASE WHEN @resolution_status = 'resolved' THEN NOW() ELSE resolved_at END,
|
||||
updated_at = NOW()
|
||||
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "signature_id", signatureId);
|
||||
AddParameter(command, "resolution_status", status.ToString().ToLowerInvariant());
|
||||
AddParameter(command, "resolution_notes", notes ?? (object)DBNull.Value);
|
||||
AddParameter(command, "resolved_by", resolvedBy ?? (object)DBNull.Value);
|
||||
|
||||
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
return rowsAffected > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> UpdatePredictionAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
PredictedOutcome outcome,
|
||||
decimal confidence,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE scheduler.failure_signatures
|
||||
SET predicted_outcome = @predicted_outcome,
|
||||
confidence_score = @confidence_score,
|
||||
updated_at = NOW()
|
||||
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "signature_id", signatureId);
|
||||
AddParameter(command, "predicted_outcome", outcome.ToString().ToLowerInvariant());
|
||||
AddParameter(command, "confidence_score", confidence);
|
||||
|
||||
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
return rowsAffected > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> DeleteAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
DELETE FROM scheduler.failure_signatures
|
||||
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "signature_id", signatureId);
|
||||
|
||||
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
return rowsAffected > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<int> PruneResolvedAsync(
|
||||
string tenantId,
|
||||
TimeSpan olderThan,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
DELETE FROM scheduler.failure_signatures
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND resolution_status = 'resolved'
|
||||
AND resolved_at < @cutoff
|
||||
""";
|
||||
|
||||
var cutoff = DateTimeOffset.UtcNow.Subtract(olderThan);
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "cutoff", cutoff);
|
||||
|
||||
return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private void AddSignatureParameters(NpgsqlCommand command, FailureSignatureEntity signature)
|
||||
{
|
||||
AddParameter(command, "signature_id", signature.SignatureId == Guid.Empty ? Guid.NewGuid() : signature.SignatureId);
|
||||
AddParameter(command, "tenant_id", signature.TenantId);
|
||||
AddParameter(command, "scope_type", signature.ScopeType.ToString().ToLowerInvariant());
|
||||
AddParameter(command, "scope_id", signature.ScopeId);
|
||||
AddParameter(command, "toolchain_hash", signature.ToolchainHash);
|
||||
AddParameter(command, "error_code", signature.ErrorCode ?? (object)DBNull.Value);
|
||||
AddParameter(command, "error_category", signature.ErrorCategory?.ToString().ToLowerInvariant() ?? (object)DBNull.Value);
|
||||
AddParameter(command, "occurrence_count", signature.OccurrenceCount);
|
||||
AddParameter(command, "first_seen_at", signature.FirstSeenAt);
|
||||
AddParameter(command, "last_seen_at", signature.LastSeenAt);
|
||||
AddParameter(command, "resolution_status", signature.ResolutionStatus.ToString().ToLowerInvariant());
|
||||
AddParameter(command, "resolution_notes", signature.ResolutionNotes ?? (object)DBNull.Value);
|
||||
AddParameter(command, "predicted_outcome", signature.PredictedOutcome.ToString().ToLowerInvariant());
|
||||
AddParameter(command, "confidence_score", signature.ConfidenceScore ?? (object)DBNull.Value);
|
||||
}
|
||||
|
||||
private static FailureSignatureEntity MapSignature(NpgsqlDataReader reader)
|
||||
{
|
||||
return new FailureSignatureEntity
|
||||
{
|
||||
SignatureId = reader.GetGuid(reader.GetOrdinal("signature_id")),
|
||||
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
|
||||
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("updated_at")),
|
||||
ScopeType = ParseScopeType(reader.GetString(reader.GetOrdinal("scope_type"))),
|
||||
ScopeId = reader.GetString(reader.GetOrdinal("scope_id")),
|
||||
ToolchainHash = reader.GetString(reader.GetOrdinal("toolchain_hash")),
|
||||
ErrorCode = reader.IsDBNull(reader.GetOrdinal("error_code"))
|
||||
? null
|
||||
: reader.GetString(reader.GetOrdinal("error_code")),
|
||||
ErrorCategory = reader.IsDBNull(reader.GetOrdinal("error_category"))
|
||||
? null
|
||||
: ParseErrorCategory(reader.GetString(reader.GetOrdinal("error_category"))),
|
||||
OccurrenceCount = reader.GetInt32(reader.GetOrdinal("occurrence_count")),
|
||||
FirstSeenAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("first_seen_at")),
|
||||
LastSeenAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("last_seen_at")),
|
||||
ResolutionStatus = ParseResolutionStatus(reader.GetString(reader.GetOrdinal("resolution_status"))),
|
||||
ResolutionNotes = reader.IsDBNull(reader.GetOrdinal("resolution_notes"))
|
||||
? null
|
||||
: reader.GetString(reader.GetOrdinal("resolution_notes")),
|
||||
ResolvedAt = reader.IsDBNull(reader.GetOrdinal("resolved_at"))
|
||||
? null
|
||||
: reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("resolved_at")),
|
||||
ResolvedBy = reader.IsDBNull(reader.GetOrdinal("resolved_by"))
|
||||
? null
|
||||
: reader.GetString(reader.GetOrdinal("resolved_by")),
|
||||
PredictedOutcome = reader.IsDBNull(reader.GetOrdinal("predicted_outcome"))
|
||||
? PredictedOutcome.Unknown
|
||||
: ParsePredictedOutcome(reader.GetString(reader.GetOrdinal("predicted_outcome"))),
|
||||
ConfidenceScore = reader.IsDBNull(reader.GetOrdinal("confidence_score"))
|
||||
? null
|
||||
: reader.GetDecimal(reader.GetOrdinal("confidence_score"))
|
||||
};
|
||||
}
|
||||
|
||||
private static FailureSignatureScopeType ParseScopeType(string value) => value.ToLowerInvariant() switch
|
||||
{
|
||||
"repo" => FailureSignatureScopeType.Repo,
|
||||
"image" => FailureSignatureScopeType.Image,
|
||||
"artifact" => FailureSignatureScopeType.Artifact,
|
||||
"global" => FailureSignatureScopeType.Global,
|
||||
_ => throw new ArgumentException($"Unknown scope type: {value}")
|
||||
};
|
||||
|
||||
private static ErrorCategory ParseErrorCategory(string value) => value.ToLowerInvariant() switch
|
||||
{
|
||||
"network" => ErrorCategory.Network,
|
||||
"auth" => ErrorCategory.Auth,
|
||||
"validation" => ErrorCategory.Validation,
|
||||
"resource" => ErrorCategory.Resource,
|
||||
"timeout" => ErrorCategory.Timeout,
|
||||
"config" => ErrorCategory.Config,
|
||||
_ => ErrorCategory.Unknown
|
||||
};
|
||||
|
||||
private static ResolutionStatus ParseResolutionStatus(string value) => value.ToLowerInvariant() switch
|
||||
{
|
||||
"unresolved" => ResolutionStatus.Unresolved,
|
||||
"investigating" => ResolutionStatus.Investigating,
|
||||
"resolved" => ResolutionStatus.Resolved,
|
||||
"wont_fix" or "wontfix" => ResolutionStatus.WontFix,
|
||||
_ => ResolutionStatus.Unresolved
|
||||
};
|
||||
|
||||
private static PredictedOutcome ParsePredictedOutcome(string value) => value.ToLowerInvariant() switch
|
||||
{
|
||||
"pass" => PredictedOutcome.Pass,
|
||||
"fail" => PredictedOutcome.Fail,
|
||||
"flaky" => PredictedOutcome.Flaky,
|
||||
_ => PredictedOutcome.Unknown
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
using StellaOps.Scheduler.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for failure signature operations.
|
||||
/// </summary>
|
||||
public interface IFailureSignatureRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new failure signature.
|
||||
/// </summary>
|
||||
Task<FailureSignatureEntity> CreateAsync(
|
||||
FailureSignatureEntity signature,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a failure signature by ID.
|
||||
/// </summary>
|
||||
Task<FailureSignatureEntity?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a failure signature by its unique key (scope + toolchain + error code).
|
||||
/// </summary>
|
||||
Task<FailureSignatureEntity?> GetByKeyAsync(
|
||||
string tenantId,
|
||||
FailureSignatureScopeType scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
string? errorCode,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all failure signatures for a scope.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<FailureSignatureEntity>> GetByScopeAsync(
|
||||
string tenantId,
|
||||
FailureSignatureScopeType scopeType,
|
||||
string scopeId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all unresolved failure signatures for a tenant.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<FailureSignatureEntity>> GetUnresolvedAsync(
|
||||
string tenantId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets failure signatures matching a predicted outcome.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<FailureSignatureEntity>> GetByPredictedOutcomeAsync(
|
||||
string tenantId,
|
||||
PredictedOutcome outcome,
|
||||
decimal minConfidence = 0.5m,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Increments the occurrence count and updates last seen timestamp.
|
||||
/// Creates the signature if it doesn't exist (upsert).
|
||||
/// </summary>
|
||||
Task<FailureSignatureEntity> UpsertOccurrenceAsync(
|
||||
string tenantId,
|
||||
FailureSignatureScopeType scopeType,
|
||||
string scopeId,
|
||||
string toolchainHash,
|
||||
string? errorCode,
|
||||
ErrorCategory? errorCategory,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates the resolution status of a signature.
|
||||
/// </summary>
|
||||
Task<bool> UpdateResolutionAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
ResolutionStatus status,
|
||||
string? notes,
|
||||
string? resolvedBy,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates the predicted outcome for a signature.
|
||||
/// </summary>
|
||||
Task<bool> UpdatePredictionAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
PredictedOutcome outcome,
|
||||
decimal confidence,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a failure signature.
|
||||
/// </summary>
|
||||
Task<bool> DeleteAsync(
|
||||
string tenantId,
|
||||
Guid signatureId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Prunes old resolved signatures.
|
||||
/// </summary>
|
||||
Task<int> PruneResolvedAsync(
|
||||
string tenantId,
|
||||
TimeSpan olderThan,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,311 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Scheduler.Storage.Postgres.Models;
|
||||
using StellaOps.Scheduler.Storage.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Worker.Indexing;
|
||||
|
||||
/// <summary>
|
||||
/// Options for the failure signature indexer.
|
||||
/// </summary>
|
||||
public sealed class FailureSignatureIndexerOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Interval between indexing runs.
|
||||
/// </summary>
|
||||
public TimeSpan IndexInterval { get; set; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
/// <summary>
|
||||
/// Whether the indexer is enabled.
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Batch size for processing job failures.
|
||||
/// </summary>
|
||||
public int BatchSize { get; set; } = 100;
|
||||
|
||||
/// <summary>
|
||||
/// Age threshold for pruning resolved signatures.
|
||||
/// </summary>
|
||||
public TimeSpan PruneResolvedOlderThan { get; set; } = TimeSpan.FromDays(90);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Background service that indexes job failures into failure signatures.
|
||||
/// Analyzes completed jobs to identify patterns for predictive TTFS hints.
|
||||
/// </summary>
|
||||
public sealed class FailureSignatureIndexer : BackgroundService
|
||||
{
|
||||
private readonly IFailureSignatureRepository _signatureRepository;
|
||||
private readonly IJobRepository _jobRepository;
|
||||
private readonly IJobHistoryRepository _historyRepository;
|
||||
private readonly IOptions<FailureSignatureIndexerOptions> _options;
|
||||
private readonly ILogger<FailureSignatureIndexer> _logger;
|
||||
|
||||
public FailureSignatureIndexer(
|
||||
IFailureSignatureRepository signatureRepository,
|
||||
IJobRepository jobRepository,
|
||||
IJobHistoryRepository historyRepository,
|
||||
IOptions<FailureSignatureIndexerOptions> options,
|
||||
ILogger<FailureSignatureIndexer> logger)
|
||||
{
|
||||
_signatureRepository = signatureRepository;
|
||||
_jobRepository = jobRepository;
|
||||
_historyRepository = historyRepository;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
if (!_options.Value.Enabled)
|
||||
{
|
||||
_logger.LogInformation("Failure signature indexer is disabled");
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.LogInformation("Starting failure signature indexer with interval {Interval}",
|
||||
_options.Value.IndexInterval);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await IndexFailuresAsync(stoppingToken);
|
||||
await PruneOldSignaturesAsync(stoppingToken);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error during failure signature indexing");
|
||||
}
|
||||
|
||||
await Task.Delay(_options.Value.IndexInterval, stoppingToken);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task IndexFailuresAsync(CancellationToken ct)
|
||||
{
|
||||
_logger.LogDebug("Starting failure indexing batch");
|
||||
|
||||
// Get recent failed jobs that haven't been indexed
|
||||
var failedJobs = await _historyRepository.GetRecentFailedJobsAsync(
|
||||
_options.Value.BatchSize,
|
||||
ct);
|
||||
|
||||
var indexed = 0;
|
||||
foreach (var job in failedJobs)
|
||||
{
|
||||
try
|
||||
{
|
||||
var signature = await ExtractSignatureAsync(job, ct);
|
||||
if (signature != null)
|
||||
{
|
||||
await _signatureRepository.UpsertOccurrenceAsync(
|
||||
job.TenantId,
|
||||
signature.ScopeType,
|
||||
signature.ScopeId,
|
||||
signature.ToolchainHash,
|
||||
signature.ErrorCode,
|
||||
signature.ErrorCategory,
|
||||
ct);
|
||||
indexed++;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to index signature for job {JobId}", job.JobId);
|
||||
}
|
||||
}
|
||||
|
||||
if (indexed > 0)
|
||||
{
|
||||
_logger.LogInformation("Indexed {Count} failure signatures", indexed);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task PruneOldSignaturesAsync(CancellationToken ct)
|
||||
{
|
||||
// Prune is expensive, only run occasionally
|
||||
var random = Random.Shared.Next(0, 12);
|
||||
if (random != 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.LogDebug("Starting resolved signature pruning");
|
||||
|
||||
// Get all tenants with resolved signatures
|
||||
// In production, this would be paginated
|
||||
try
|
||||
{
|
||||
var pruned = await _signatureRepository.PruneResolvedAsync(
|
||||
"*", // All tenants
|
||||
_options.Value.PruneResolvedOlderThan,
|
||||
ct);
|
||||
|
||||
if (pruned > 0)
|
||||
{
|
||||
_logger.LogInformation("Pruned {Count} old resolved signatures", pruned);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to prune resolved signatures");
|
||||
}
|
||||
}
|
||||
|
||||
private Task<FailureSignatureExtraction?> ExtractSignatureAsync(
|
||||
FailedJobRecord job,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Extract signature from job failure
|
||||
// This would analyze the job metadata, error details, etc.
|
||||
|
||||
var scopeType = DetermineScopeType(job);
|
||||
var scopeId = ExtractScopeId(job, scopeType);
|
||||
var toolchainHash = ComputeToolchainHash(job);
|
||||
var (errorCode, category) = ClassifyError(job);
|
||||
|
||||
if (string.IsNullOrEmpty(scopeId) || string.IsNullOrEmpty(toolchainHash))
|
||||
{
|
||||
return Task.FromResult<FailureSignatureExtraction?>(null);
|
||||
}
|
||||
|
||||
var extraction = new FailureSignatureExtraction
|
||||
{
|
||||
ScopeType = scopeType,
|
||||
ScopeId = scopeId,
|
||||
ToolchainHash = toolchainHash,
|
||||
ErrorCode = errorCode,
|
||||
ErrorCategory = category
|
||||
};
|
||||
|
||||
return Task.FromResult<FailureSignatureExtraction?>(extraction);
|
||||
}
|
||||
|
||||
private static FailureSignatureScopeType DetermineScopeType(FailedJobRecord job)
|
||||
{
|
||||
// Determine scope based on job type and context
|
||||
if (!string.IsNullOrEmpty(job.ImageDigest))
|
||||
{
|
||||
return FailureSignatureScopeType.Image;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(job.ArtifactDigest))
|
||||
{
|
||||
return FailureSignatureScopeType.Artifact;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(job.Repository))
|
||||
{
|
||||
return FailureSignatureScopeType.Repo;
|
||||
}
|
||||
|
||||
return FailureSignatureScopeType.Global;
|
||||
}
|
||||
|
||||
private static string ExtractScopeId(FailedJobRecord job, FailureSignatureScopeType scopeType)
|
||||
{
|
||||
return scopeType switch
|
||||
{
|
||||
FailureSignatureScopeType.Image => job.ImageDigest ?? "",
|
||||
FailureSignatureScopeType.Artifact => job.ArtifactDigest ?? "",
|
||||
FailureSignatureScopeType.Repo => job.Repository ?? "",
|
||||
FailureSignatureScopeType.Global => "global",
|
||||
_ => ""
|
||||
};
|
||||
}
|
||||
|
||||
private static string ComputeToolchainHash(FailedJobRecord job)
|
||||
{
|
||||
// Compute a fingerprint of the build/scan environment
|
||||
// This includes scanner versions, tool versions, etc.
|
||||
var components = new[]
|
||||
{
|
||||
job.JobType,
|
||||
job.ScannerVersion ?? "unknown",
|
||||
job.RuntimeVersion ?? "unknown"
|
||||
};
|
||||
|
||||
var combined = string.Join("|", components);
|
||||
var hash = System.Security.Cryptography.SHA256.HashData(
|
||||
System.Text.Encoding.UTF8.GetBytes(combined));
|
||||
|
||||
return Convert.ToHexStringLower(hash[..8]); // First 8 bytes
|
||||
}
|
||||
|
||||
private static (string? ErrorCode, ErrorCategory Category) ClassifyError(FailedJobRecord job)
|
||||
{
|
||||
// Classify error based on error message and details
|
||||
var error = job.Error?.ToLowerInvariant() ?? "";
|
||||
var errorCode = job.ErrorCode;
|
||||
|
||||
if (error.Contains("timeout") || error.Contains("timed out"))
|
||||
{
|
||||
return (errorCode, ErrorCategory.Timeout);
|
||||
}
|
||||
|
||||
if (error.Contains("unauthorized") || error.Contains("authentication") || error.Contains("401"))
|
||||
{
|
||||
return (errorCode, ErrorCategory.Auth);
|
||||
}
|
||||
|
||||
if (error.Contains("network") || error.Contains("connection refused") || error.Contains("dns"))
|
||||
{
|
||||
return (errorCode, ErrorCategory.Network);
|
||||
}
|
||||
|
||||
if (error.Contains("validation") || error.Contains("invalid") || error.Contains("malformed"))
|
||||
{
|
||||
return (errorCode, ErrorCategory.Validation);
|
||||
}
|
||||
|
||||
if (error.Contains("out of memory") || error.Contains("disk full") || error.Contains("resource"))
|
||||
{
|
||||
return (errorCode, ErrorCategory.Resource);
|
||||
}
|
||||
|
||||
if (error.Contains("config") || error.Contains("configuration"))
|
||||
{
|
||||
return (errorCode, ErrorCategory.Config);
|
||||
}
|
||||
|
||||
return (errorCode, ErrorCategory.Unknown);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracted failure signature data.
|
||||
/// </summary>
|
||||
internal sealed class FailureSignatureExtraction
|
||||
{
|
||||
public FailureSignatureScopeType ScopeType { get; init; }
|
||||
public required string ScopeId { get; init; }
|
||||
public required string ToolchainHash { get; init; }
|
||||
public string? ErrorCode { get; init; }
|
||||
public ErrorCategory ErrorCategory { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Record representing a failed job for signature extraction.
|
||||
/// </summary>
|
||||
public sealed record FailedJobRecord
|
||||
{
|
||||
public required Guid JobId { get; init; }
|
||||
public required string TenantId { get; init; }
|
||||
public required string JobType { get; init; }
|
||||
public string? ImageDigest { get; init; }
|
||||
public string? ArtifactDigest { get; init; }
|
||||
public string? Repository { get; init; }
|
||||
public string? Error { get; init; }
|
||||
public string? ErrorCode { get; init; }
|
||||
public string? ScannerVersion { get; init; }
|
||||
public string? RuntimeVersion { get; init; }
|
||||
public DateTimeOffset FailedAt { get; init; }
|
||||
}
|
||||
Reference in New Issue
Block a user