up
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Infrastructure.Postgres.Connections;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
|
||||
namespace StellaOps.Authority.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL data source for the Authority module.
|
||||
/// Manages connections with tenant context for authentication and authorization data.
|
||||
/// </summary>
|
||||
public sealed class AuthorityDataSource : DataSourceBase
|
||||
{
|
||||
/// <summary>
|
||||
/// Default schema name for Authority tables.
|
||||
/// </summary>
|
||||
public const string DefaultSchemaName = "auth";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Authority data source.
|
||||
/// </summary>
|
||||
public AuthorityDataSource(IOptions<PostgresOptions> options, ILogger<AuthorityDataSource> logger)
|
||||
: base(CreateOptions(options.Value), logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override string ModuleName => "Authority";
|
||||
|
||||
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
|
||||
{
|
||||
// Use default schema if not specified
|
||||
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
|
||||
{
|
||||
baseOptions.SchemaName = DefaultSchemaName;
|
||||
}
|
||||
return baseOptions;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
-- Authority Schema Migration 001: Initial Schema
|
||||
-- Creates the authority schema for IAM, tenants, users, and tokens
|
||||
|
||||
-- Create schema
|
||||
CREATE SCHEMA IF NOT EXISTS authority;
|
||||
|
||||
-- Tenants table
|
||||
CREATE TABLE IF NOT EXISTS authority.tenants (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL,
|
||||
display_name TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'suspended', 'deleted')),
|
||||
settings JSONB NOT NULL DEFAULT '{}',
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by TEXT,
|
||||
updated_by TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_tenants_status ON authority.tenants(status);
|
||||
CREATE INDEX idx_tenants_created_at ON authority.tenants(created_at);
|
||||
|
||||
-- Users table
|
||||
CREATE TABLE IF NOT EXISTS authority.users (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
|
||||
username TEXT NOT NULL,
|
||||
email TEXT,
|
||||
display_name TEXT,
|
||||
password_hash TEXT,
|
||||
password_salt TEXT,
|
||||
password_algorithm TEXT DEFAULT 'argon2id',
|
||||
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'inactive', 'locked', 'deleted')),
|
||||
email_verified BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
mfa_enabled BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
mfa_secret TEXT,
|
||||
failed_login_attempts INT NOT NULL DEFAULT 0,
|
||||
last_login_at TIMESTAMPTZ,
|
||||
last_password_change_at TIMESTAMPTZ,
|
||||
password_expires_at TIMESTAMPTZ,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by TEXT,
|
||||
updated_by TEXT,
|
||||
UNIQUE(tenant_id, username),
|
||||
UNIQUE(tenant_id, email)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_users_tenant_id ON authority.users(tenant_id);
|
||||
CREATE INDEX idx_users_status ON authority.users(tenant_id, status);
|
||||
CREATE INDEX idx_users_email ON authority.users(tenant_id, email);
|
||||
|
||||
-- Roles table
|
||||
CREATE TABLE IF NOT EXISTS authority.roles (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
|
||||
name TEXT NOT NULL,
|
||||
display_name TEXT,
|
||||
description TEXT,
|
||||
is_system BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_roles_tenant_id ON authority.roles(tenant_id);
|
||||
|
||||
-- Permissions table
|
||||
CREATE TABLE IF NOT EXISTS authority.permissions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
|
||||
name TEXT NOT NULL,
|
||||
resource TEXT NOT NULL,
|
||||
action TEXT NOT NULL,
|
||||
description TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_permissions_tenant_id ON authority.permissions(tenant_id);
|
||||
CREATE INDEX idx_permissions_resource ON authority.permissions(tenant_id, resource);
|
||||
|
||||
-- Role-Permission assignments
|
||||
CREATE TABLE IF NOT EXISTS authority.role_permissions (
|
||||
role_id UUID NOT NULL REFERENCES authority.roles(id) ON DELETE CASCADE,
|
||||
permission_id UUID NOT NULL REFERENCES authority.permissions(id) ON DELETE CASCADE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
PRIMARY KEY (role_id, permission_id)
|
||||
);
|
||||
|
||||
-- User-Role assignments
|
||||
CREATE TABLE IF NOT EXISTS authority.user_roles (
|
||||
user_id UUID NOT NULL REFERENCES authority.users(id) ON DELETE CASCADE,
|
||||
role_id UUID NOT NULL REFERENCES authority.roles(id) ON DELETE CASCADE,
|
||||
granted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
granted_by TEXT,
|
||||
expires_at TIMESTAMPTZ,
|
||||
PRIMARY KEY (user_id, role_id)
|
||||
);
|
||||
|
||||
-- API Keys table
|
||||
CREATE TABLE IF NOT EXISTS authority.api_keys (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
|
||||
user_id UUID REFERENCES authority.users(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
key_hash TEXT NOT NULL,
|
||||
key_prefix TEXT NOT NULL,
|
||||
scopes TEXT[] NOT NULL DEFAULT '{}',
|
||||
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'revoked', 'expired')),
|
||||
last_used_at TIMESTAMPTZ,
|
||||
expires_at TIMESTAMPTZ,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
revoked_at TIMESTAMPTZ,
|
||||
revoked_by TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_api_keys_tenant_id ON authority.api_keys(tenant_id);
|
||||
CREATE INDEX idx_api_keys_key_prefix ON authority.api_keys(key_prefix);
|
||||
CREATE INDEX idx_api_keys_user_id ON authority.api_keys(user_id);
|
||||
CREATE INDEX idx_api_keys_status ON authority.api_keys(tenant_id, status);
|
||||
|
||||
-- Tokens table (access tokens)
|
||||
CREATE TABLE IF NOT EXISTS authority.tokens (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
|
||||
user_id UUID REFERENCES authority.users(id) ON DELETE CASCADE,
|
||||
token_hash TEXT NOT NULL UNIQUE,
|
||||
token_type TEXT NOT NULL DEFAULT 'access' CHECK (token_type IN ('access', 'refresh', 'api')),
|
||||
scopes TEXT[] NOT NULL DEFAULT '{}',
|
||||
client_id TEXT,
|
||||
issued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ NOT NULL,
|
||||
revoked_at TIMESTAMPTZ,
|
||||
revoked_by TEXT,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_tokens_tenant_id ON authority.tokens(tenant_id);
|
||||
CREATE INDEX idx_tokens_user_id ON authority.tokens(user_id);
|
||||
CREATE INDEX idx_tokens_expires_at ON authority.tokens(expires_at);
|
||||
CREATE INDEX idx_tokens_token_hash ON authority.tokens(token_hash);
|
||||
|
||||
-- Refresh Tokens table
|
||||
CREATE TABLE IF NOT EXISTS authority.refresh_tokens (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
|
||||
user_id UUID NOT NULL REFERENCES authority.users(id) ON DELETE CASCADE,
|
||||
token_hash TEXT NOT NULL UNIQUE,
|
||||
access_token_id UUID REFERENCES authority.tokens(id) ON DELETE SET NULL,
|
||||
client_id TEXT,
|
||||
issued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ NOT NULL,
|
||||
revoked_at TIMESTAMPTZ,
|
||||
revoked_by TEXT,
|
||||
replaced_by UUID,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_refresh_tokens_tenant_id ON authority.refresh_tokens(tenant_id);
|
||||
CREATE INDEX idx_refresh_tokens_user_id ON authority.refresh_tokens(user_id);
|
||||
CREATE INDEX idx_refresh_tokens_expires_at ON authority.refresh_tokens(expires_at);
|
||||
|
||||
-- Sessions table
|
||||
CREATE TABLE IF NOT EXISTS authority.sessions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
|
||||
user_id UUID NOT NULL REFERENCES authority.users(id) ON DELETE CASCADE,
|
||||
session_token_hash TEXT NOT NULL UNIQUE,
|
||||
ip_address TEXT,
|
||||
user_agent TEXT,
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
last_activity_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ NOT NULL,
|
||||
ended_at TIMESTAMPTZ,
|
||||
end_reason TEXT,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_sessions_tenant_id ON authority.sessions(tenant_id);
|
||||
CREATE INDEX idx_sessions_user_id ON authority.sessions(user_id);
|
||||
CREATE INDEX idx_sessions_expires_at ON authority.sessions(expires_at);
|
||||
|
||||
-- Audit log table
|
||||
CREATE TABLE IF NOT EXISTS authority.audit (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
tenant_id TEXT NOT NULL,
|
||||
user_id UUID,
|
||||
action TEXT NOT NULL,
|
||||
resource_type TEXT NOT NULL,
|
||||
resource_id TEXT,
|
||||
old_value JSONB,
|
||||
new_value JSONB,
|
||||
ip_address TEXT,
|
||||
user_agent TEXT,
|
||||
correlation_id TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_audit_tenant_id ON authority.audit(tenant_id);
|
||||
CREATE INDEX idx_audit_user_id ON authority.audit(user_id);
|
||||
CREATE INDEX idx_audit_action ON authority.audit(action);
|
||||
CREATE INDEX idx_audit_resource ON authority.audit(resource_type, resource_id);
|
||||
CREATE INDEX idx_audit_created_at ON authority.audit(created_at);
|
||||
CREATE INDEX idx_audit_correlation_id ON authority.audit(correlation_id);
|
||||
|
||||
-- Function to update updated_at timestamp
|
||||
CREATE OR REPLACE FUNCTION authority.update_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Triggers for updated_at
|
||||
CREATE TRIGGER trg_tenants_updated_at
|
||||
BEFORE UPDATE ON authority.tenants
|
||||
FOR EACH ROW EXECUTE FUNCTION authority.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_users_updated_at
|
||||
BEFORE UPDATE ON authority.users
|
||||
FOR EACH ROW EXECUTE FUNCTION authority.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_roles_updated_at
|
||||
BEFORE UPDATE ON authority.roles
|
||||
FOR EACH ROW EXECUTE FUNCTION authority.update_updated_at();
|
||||
@@ -0,0 +1,62 @@
|
||||
namespace StellaOps.Authority.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a tenant entity in the auth schema.
|
||||
/// </summary>
|
||||
public sealed class TenantEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique tenant identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant slug/key (unique).
|
||||
/// </summary>
|
||||
public required string Slug { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional description.
|
||||
/// </summary>
|
||||
public string? Description { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Contact email for the tenant.
|
||||
/// </summary>
|
||||
public string? ContactEmail { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant is enabled.
|
||||
/// </summary>
|
||||
public bool Enabled { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Tenant settings as JSON.
|
||||
/// </summary>
|
||||
public string Settings { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Tenant metadata as JSON.
|
||||
/// </summary>
|
||||
public string Metadata { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// When the tenant was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the tenant was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// User who created the tenant.
|
||||
/// </summary>
|
||||
public string? CreatedBy { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
namespace StellaOps.Authority.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a user entity in the auth schema.
|
||||
/// </summary>
|
||||
public sealed class UserEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique user identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this user belongs to.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username (unique per tenant).
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Email address (unique per tenant).
|
||||
/// </summary>
|
||||
public required string Email { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// User's display name.
|
||||
/// </summary>
|
||||
public string? DisplayName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Argon2id password hash.
|
||||
/// </summary>
|
||||
public string? PasswordHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password salt.
|
||||
/// </summary>
|
||||
public string? PasswordSalt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// User is enabled.
|
||||
/// </summary>
|
||||
public bool Enabled { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Email has been verified.
|
||||
/// </summary>
|
||||
public bool EmailVerified { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// MFA is enabled for this user.
|
||||
/// </summary>
|
||||
public bool MfaEnabled { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// MFA secret (encrypted).
|
||||
/// </summary>
|
||||
public string? MfaSecret { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// MFA backup codes (encrypted JSON array).
|
||||
/// </summary>
|
||||
public string? MfaBackupCodes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of failed login attempts.
|
||||
/// </summary>
|
||||
public int FailedLoginAttempts { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Account locked until this time.
|
||||
/// </summary>
|
||||
public DateTimeOffset? LockedUntil { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Last successful login time.
|
||||
/// </summary>
|
||||
public DateTimeOffset? LastLoginAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the password was last changed.
|
||||
/// </summary>
|
||||
public DateTimeOffset? PasswordChangedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// User settings as JSON.
|
||||
/// </summary>
|
||||
public string Settings { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// User metadata as JSON.
|
||||
/// </summary>
|
||||
public string Metadata { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// When the user was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the user was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// User who created this user.
|
||||
/// </summary>
|
||||
public string? CreatedBy { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
using StellaOps.Authority.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Authority.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for tenant operations.
|
||||
/// </summary>
|
||||
public interface ITenantRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new tenant.
|
||||
/// </summary>
|
||||
Task<TenantEntity> CreateAsync(TenantEntity tenant, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a tenant by ID.
|
||||
/// </summary>
|
||||
Task<TenantEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a tenant by slug.
|
||||
/// </summary>
|
||||
Task<TenantEntity?> GetBySlugAsync(string slug, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all tenants with optional filtering.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<TenantEntity>> GetAllAsync(
|
||||
bool? enabled = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a tenant.
|
||||
/// </summary>
|
||||
Task<bool> UpdateAsync(TenantEntity tenant, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a tenant.
|
||||
/// </summary>
|
||||
Task<bool> DeleteAsync(Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a tenant slug exists.
|
||||
/// </summary>
|
||||
Task<bool> SlugExistsAsync(string slug, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
using StellaOps.Authority.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Authority.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for user operations.
|
||||
/// </summary>
|
||||
public interface IUserRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new user.
|
||||
/// </summary>
|
||||
Task<UserEntity> CreateAsync(UserEntity user, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a user by ID.
|
||||
/// </summary>
|
||||
Task<UserEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a user by username.
|
||||
/// </summary>
|
||||
Task<UserEntity?> GetByUsernameAsync(string tenantId, string username, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a user by email.
|
||||
/// </summary>
|
||||
Task<UserEntity?> GetByEmailAsync(string tenantId, string email, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all users for a tenant with optional filtering.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<UserEntity>> GetAllAsync(
|
||||
string tenantId,
|
||||
bool? enabled = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a user.
|
||||
/// </summary>
|
||||
Task<bool> UpdateAsync(UserEntity user, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a user.
|
||||
/// </summary>
|
||||
Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates the user's password hash.
|
||||
/// </summary>
|
||||
Task<bool> UpdatePasswordAsync(
|
||||
string tenantId,
|
||||
Guid userId,
|
||||
string passwordHash,
|
||||
string passwordSalt,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Records a failed login attempt.
|
||||
/// </summary>
|
||||
Task<int> RecordFailedLoginAsync(
|
||||
string tenantId,
|
||||
Guid userId,
|
||||
DateTimeOffset? lockUntil = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Records a successful login.
|
||||
/// </summary>
|
||||
Task RecordSuccessfulLoginAsync(
|
||||
string tenantId,
|
||||
Guid userId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,194 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Authority.Storage.Postgres.Models;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Authority.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for tenant operations.
|
||||
/// </summary>
|
||||
public sealed class TenantRepository : RepositoryBase<AuthorityDataSource>, ITenantRepository
|
||||
{
|
||||
private const string SystemTenantId = "_system";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new tenant repository.
|
||||
/// </summary>
|
||||
public TenantRepository(AuthorityDataSource dataSource, ILogger<TenantRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<TenantEntity> CreateAsync(TenantEntity tenant, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO auth.tenants (id, slug, name, description, contact_email, enabled, settings, metadata, created_by)
|
||||
VALUES (@id, @slug, @name, @description, @contact_email, @enabled, @settings::jsonb, @metadata::jsonb, @created_by)
|
||||
RETURNING id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "id", tenant.Id);
|
||||
AddParameter(command, "slug", tenant.Slug);
|
||||
AddParameter(command, "name", tenant.Name);
|
||||
AddParameter(command, "description", tenant.Description);
|
||||
AddParameter(command, "contact_email", tenant.ContactEmail);
|
||||
AddParameter(command, "enabled", tenant.Enabled);
|
||||
AddJsonbParameter(command, "settings", tenant.Settings);
|
||||
AddJsonbParameter(command, "metadata", tenant.Metadata);
|
||||
AddParameter(command, "created_by", tenant.CreatedBy);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapTenant(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<TenantEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM auth.tenants
|
||||
WHERE id = @id
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd => AddParameter(cmd, "id", id),
|
||||
MapTenant,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<TenantEntity?> GetBySlugAsync(string slug, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM auth.tenants
|
||||
WHERE slug = @slug
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd => AddParameter(cmd, "slug", slug),
|
||||
MapTenant,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<TenantEntity>> GetAllAsync(
|
||||
bool? enabled = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = """
|
||||
SELECT id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM auth.tenants
|
||||
""";
|
||||
|
||||
if (enabled.HasValue)
|
||||
{
|
||||
sql += " WHERE enabled = @enabled";
|
||||
}
|
||||
|
||||
sql += " ORDER BY name, id LIMIT @limit OFFSET @offset";
|
||||
|
||||
return await QueryAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
if (enabled.HasValue)
|
||||
{
|
||||
AddParameter(cmd, "enabled", enabled.Value);
|
||||
}
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapTenant,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> UpdateAsync(TenantEntity tenant, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE auth.tenants
|
||||
SET name = @name,
|
||||
description = @description,
|
||||
contact_email = @contact_email,
|
||||
enabled = @enabled,
|
||||
settings = @settings::jsonb,
|
||||
metadata = @metadata::jsonb
|
||||
WHERE id = @id
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "id", tenant.Id);
|
||||
AddParameter(cmd, "name", tenant.Name);
|
||||
AddParameter(cmd, "description", tenant.Description);
|
||||
AddParameter(cmd, "contact_email", tenant.ContactEmail);
|
||||
AddParameter(cmd, "enabled", tenant.Enabled);
|
||||
AddJsonbParameter(cmd, "settings", tenant.Settings);
|
||||
AddJsonbParameter(cmd, "metadata", tenant.Metadata);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> DeleteAsync(Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = "DELETE FROM auth.tenants WHERE id = @id";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd => AddParameter(cmd, "id", id),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> SlugExistsAsync(string slug, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = "SELECT EXISTS(SELECT 1 FROM auth.tenants WHERE slug = @slug)";
|
||||
|
||||
var result = await ExecuteScalarAsync<bool>(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd => AddParameter(cmd, "slug", slug),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static TenantEntity MapTenant(NpgsqlDataReader reader) => new()
|
||||
{
|
||||
Id = reader.GetGuid(0),
|
||||
Slug = reader.GetString(1),
|
||||
Name = reader.GetString(2),
|
||||
Description = GetNullableString(reader, 3),
|
||||
ContactEmail = GetNullableString(reader, 4),
|
||||
Enabled = reader.GetBoolean(5),
|
||||
Settings = reader.GetString(6),
|
||||
Metadata = reader.GetString(7),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(8),
|
||||
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(9),
|
||||
CreatedBy = GetNullableString(reader, 10)
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,353 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Authority.Storage.Postgres.Models;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Authority.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for user operations.
|
||||
/// </summary>
|
||||
public sealed class UserRepository : RepositoryBase<AuthorityDataSource>, IUserRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new user repository.
|
||||
/// </summary>
|
||||
public UserRepository(AuthorityDataSource dataSource, ILogger<UserRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<UserEntity> CreateAsync(UserEntity user, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO auth.users (
|
||||
id, tenant_id, username, email, display_name, password_hash, password_salt,
|
||||
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
|
||||
settings, metadata, created_by
|
||||
)
|
||||
VALUES (
|
||||
@id, @tenant_id, @username, @email, @display_name, @password_hash, @password_salt,
|
||||
@enabled, @email_verified, @mfa_enabled, @mfa_secret, @mfa_backup_codes,
|
||||
@settings::jsonb, @metadata::jsonb, @created_by
|
||||
)
|
||||
RETURNING id, tenant_id, username, email, display_name, password_hash, password_salt,
|
||||
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
|
||||
failed_login_attempts, locked_until, last_login_at, password_changed_at,
|
||||
settings::text, metadata::text, created_at, updated_at, created_by
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(user.TenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddUserParameters(command, user);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapUser(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<UserEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
|
||||
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
|
||||
failed_login_attempts, locked_until, last_login_at, password_changed_at,
|
||||
settings::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM auth.users
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
MapUser,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<UserEntity?> GetByUsernameAsync(string tenantId, string username, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
|
||||
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
|
||||
failed_login_attempts, locked_until, last_login_at, password_changed_at,
|
||||
settings::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM auth.users
|
||||
WHERE tenant_id = @tenant_id AND username = @username
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "username", username);
|
||||
},
|
||||
MapUser,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<UserEntity?> GetByEmailAsync(string tenantId, string email, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
|
||||
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
|
||||
failed_login_attempts, locked_until, last_login_at, password_changed_at,
|
||||
settings::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM auth.users
|
||||
WHERE tenant_id = @tenant_id AND email = @email
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "email", email);
|
||||
},
|
||||
MapUser,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<UserEntity>> GetAllAsync(
|
||||
string tenantId,
|
||||
bool? enabled = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = """
|
||||
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
|
||||
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
|
||||
failed_login_attempts, locked_until, last_login_at, password_changed_at,
|
||||
settings::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM auth.users
|
||||
WHERE tenant_id = @tenant_id
|
||||
""";
|
||||
|
||||
if (enabled.HasValue)
|
||||
{
|
||||
sql += " AND enabled = @enabled";
|
||||
}
|
||||
|
||||
sql += " ORDER BY username, id LIMIT @limit OFFSET @offset";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
if (enabled.HasValue)
|
||||
{
|
||||
AddParameter(cmd, "enabled", enabled.Value);
|
||||
}
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapUser,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> UpdateAsync(UserEntity user, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE auth.users
|
||||
SET username = @username,
|
||||
email = @email,
|
||||
display_name = @display_name,
|
||||
enabled = @enabled,
|
||||
email_verified = @email_verified,
|
||||
mfa_enabled = @mfa_enabled,
|
||||
mfa_secret = @mfa_secret,
|
||||
mfa_backup_codes = @mfa_backup_codes,
|
||||
settings = @settings::jsonb,
|
||||
metadata = @metadata::jsonb
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
user.TenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", user.TenantId);
|
||||
AddParameter(cmd, "id", user.Id);
|
||||
AddParameter(cmd, "username", user.Username);
|
||||
AddParameter(cmd, "email", user.Email);
|
||||
AddParameter(cmd, "display_name", user.DisplayName);
|
||||
AddParameter(cmd, "enabled", user.Enabled);
|
||||
AddParameter(cmd, "email_verified", user.EmailVerified);
|
||||
AddParameter(cmd, "mfa_enabled", user.MfaEnabled);
|
||||
AddParameter(cmd, "mfa_secret", user.MfaSecret);
|
||||
AddParameter(cmd, "mfa_backup_codes", user.MfaBackupCodes);
|
||||
AddJsonbParameter(cmd, "settings", user.Settings);
|
||||
AddJsonbParameter(cmd, "metadata", user.Metadata);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = "DELETE FROM auth.users WHERE tenant_id = @tenant_id AND id = @id";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> UpdatePasswordAsync(
|
||||
string tenantId,
|
||||
Guid userId,
|
||||
string passwordHash,
|
||||
string passwordSalt,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE auth.users
|
||||
SET password_hash = @password_hash,
|
||||
password_salt = @password_salt,
|
||||
password_changed_at = NOW()
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", userId);
|
||||
AddParameter(cmd, "password_hash", passwordHash);
|
||||
AddParameter(cmd, "password_salt", passwordSalt);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<int> RecordFailedLoginAsync(
|
||||
string tenantId,
|
||||
Guid userId,
|
||||
DateTimeOffset? lockUntil = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE auth.users
|
||||
SET failed_login_attempts = failed_login_attempts + 1,
|
||||
locked_until = @locked_until
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
RETURNING failed_login_attempts
|
||||
""";
|
||||
|
||||
var result = await ExecuteScalarAsync<int>(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", userId);
|
||||
AddParameter(cmd, "locked_until", lockUntil);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task RecordSuccessfulLoginAsync(
|
||||
string tenantId,
|
||||
Guid userId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE auth.users
|
||||
SET failed_login_attempts = 0,
|
||||
locked_until = NULL,
|
||||
last_login_at = NOW()
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", userId);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private static void AddUserParameters(NpgsqlCommand command, UserEntity user)
|
||||
{
|
||||
AddParameter(command, "id", user.Id);
|
||||
AddParameter(command, "tenant_id", user.TenantId);
|
||||
AddParameter(command, "username", user.Username);
|
||||
AddParameter(command, "email", user.Email);
|
||||
AddParameter(command, "display_name", user.DisplayName);
|
||||
AddParameter(command, "password_hash", user.PasswordHash);
|
||||
AddParameter(command, "password_salt", user.PasswordSalt);
|
||||
AddParameter(command, "enabled", user.Enabled);
|
||||
AddParameter(command, "email_verified", user.EmailVerified);
|
||||
AddParameter(command, "mfa_enabled", user.MfaEnabled);
|
||||
AddParameter(command, "mfa_secret", user.MfaSecret);
|
||||
AddParameter(command, "mfa_backup_codes", user.MfaBackupCodes);
|
||||
AddJsonbParameter(command, "settings", user.Settings);
|
||||
AddJsonbParameter(command, "metadata", user.Metadata);
|
||||
AddParameter(command, "created_by", user.CreatedBy);
|
||||
}
|
||||
|
||||
private static UserEntity MapUser(NpgsqlDataReader reader) => new()
|
||||
{
|
||||
Id = reader.GetGuid(0),
|
||||
TenantId = reader.GetString(1),
|
||||
Username = reader.GetString(2),
|
||||
Email = reader.GetString(3),
|
||||
DisplayName = GetNullableString(reader, 4),
|
||||
PasswordHash = GetNullableString(reader, 5),
|
||||
PasswordSalt = GetNullableString(reader, 6),
|
||||
Enabled = reader.GetBoolean(7),
|
||||
EmailVerified = reader.GetBoolean(8),
|
||||
MfaEnabled = reader.GetBoolean(9),
|
||||
MfaSecret = GetNullableString(reader, 10),
|
||||
MfaBackupCodes = GetNullableString(reader, 11),
|
||||
FailedLoginAttempts = reader.GetInt32(12),
|
||||
LockedUntil = GetNullableDateTimeOffset(reader, 13),
|
||||
LastLoginAt = GetNullableDateTimeOffset(reader, 14),
|
||||
PasswordChangedAt = GetNullableDateTimeOffset(reader, 15),
|
||||
Settings = reader.GetString(16),
|
||||
Metadata = reader.GetString(17),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(18),
|
||||
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(19),
|
||||
CreatedBy = GetNullableString(reader, 20)
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Authority.Storage.Postgres.Repositories;
|
||||
using StellaOps.Infrastructure.Postgres;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
|
||||
namespace StellaOps.Authority.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for configuring Authority PostgreSQL storage services.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds Authority PostgreSQL storage services.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configuration">Configuration root.</param>
|
||||
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddAuthorityPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration,
|
||||
string sectionName = "Postgres:Authority")
|
||||
{
|
||||
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
|
||||
services.AddSingleton<AuthorityDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<ITenantRepository, TenantRepository>();
|
||||
services.AddScoped<IUserRepository, UserRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds Authority PostgreSQL storage services with explicit options.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configureOptions">Options configuration action.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddAuthorityPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
Action<PostgresOptions> configureOptions)
|
||||
{
|
||||
services.Configure(configureOptions);
|
||||
services.AddSingleton<AuthorityDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<ITenantRepository, TenantRepository>();
|
||||
services.AddScoped<IUserRepository, UserRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" ?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Authority.Storage.Postgres</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,50 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Connections;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
|
||||
namespace StellaOps.Concelier.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL data source for the Concelier (vulnerability) module.
|
||||
/// Manages connections for advisory ingestion, merging, and vulnerability data.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The Concelier module stores global vulnerability data that is not tenant-scoped.
|
||||
/// Advisories and their metadata are shared across all tenants.
|
||||
/// </remarks>
|
||||
public sealed class ConcelierDataSource : DataSourceBase
|
||||
{
|
||||
/// <summary>
|
||||
/// Default schema name for Concelier/vulnerability tables.
|
||||
/// </summary>
|
||||
public const string DefaultSchemaName = "vuln";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Concelier data source.
|
||||
/// </summary>
|
||||
public ConcelierDataSource(IOptions<PostgresOptions> options, ILogger<ConcelierDataSource> logger)
|
||||
: base(CreateOptions(options.Value), logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override string ModuleName => "Concelier";
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void ConfigureDataSourceBuilder(NpgsqlDataSourceBuilder builder)
|
||||
{
|
||||
base.ConfigureDataSourceBuilder(builder);
|
||||
// Enable full-text search vector support for advisory searching
|
||||
}
|
||||
|
||||
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
|
||||
{
|
||||
baseOptions.SchemaName = DefaultSchemaName;
|
||||
}
|
||||
return baseOptions;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,261 @@
|
||||
-- Vulnerability Schema Migration 001: Initial Schema
|
||||
-- Creates the vuln schema for advisories and vulnerability data
|
||||
|
||||
-- Create schema
|
||||
CREATE SCHEMA IF NOT EXISTS vuln;
|
||||
|
||||
-- Enable pg_trgm for fuzzy text search
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
-- Sources table (feed sources)
|
||||
CREATE TABLE IF NOT EXISTS vuln.sources (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
key TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL,
|
||||
source_type TEXT NOT NULL,
|
||||
url TEXT,
|
||||
priority INT NOT NULL DEFAULT 0,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
config JSONB NOT NULL DEFAULT '{}',
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_sources_enabled ON vuln.sources(enabled, priority DESC);
|
||||
|
||||
-- Feed snapshots table
|
||||
CREATE TABLE IF NOT EXISTS vuln.feed_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
source_id UUID NOT NULL REFERENCES vuln.sources(id),
|
||||
snapshot_id TEXT NOT NULL,
|
||||
advisory_count INT NOT NULL DEFAULT 0,
|
||||
checksum TEXT,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(source_id, snapshot_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_feed_snapshots_source ON vuln.feed_snapshots(source_id);
|
||||
CREATE INDEX idx_feed_snapshots_created ON vuln.feed_snapshots(created_at);
|
||||
|
||||
-- Advisory snapshots table (point-in-time snapshots)
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisory_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
feed_snapshot_id UUID NOT NULL REFERENCES vuln.feed_snapshots(id),
|
||||
advisory_key TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(feed_snapshot_id, advisory_key)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisory_snapshots_feed ON vuln.advisory_snapshots(feed_snapshot_id);
|
||||
CREATE INDEX idx_advisory_snapshots_key ON vuln.advisory_snapshots(advisory_key);
|
||||
|
||||
-- Advisories table (main vulnerability data)
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisories (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_key TEXT NOT NULL UNIQUE,
|
||||
primary_vuln_id TEXT NOT NULL,
|
||||
source_id UUID REFERENCES vuln.sources(id),
|
||||
title TEXT,
|
||||
summary TEXT,
|
||||
description TEXT,
|
||||
severity TEXT CHECK (severity IN ('critical', 'high', 'medium', 'low', 'unknown')),
|
||||
published_at TIMESTAMPTZ,
|
||||
modified_at TIMESTAMPTZ,
|
||||
withdrawn_at TIMESTAMPTZ,
|
||||
provenance JSONB NOT NULL DEFAULT '{}',
|
||||
raw_payload JSONB,
|
||||
search_vector TSVECTOR,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisories_vuln_id ON vuln.advisories(primary_vuln_id);
|
||||
CREATE INDEX idx_advisories_source ON vuln.advisories(source_id);
|
||||
CREATE INDEX idx_advisories_severity ON vuln.advisories(severity);
|
||||
CREATE INDEX idx_advisories_published ON vuln.advisories(published_at);
|
||||
CREATE INDEX idx_advisories_modified ON vuln.advisories(modified_at);
|
||||
CREATE INDEX idx_advisories_search ON vuln.advisories USING GIN(search_vector);
|
||||
|
||||
-- Advisory aliases table (CVE, GHSA, etc.)
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisory_aliases (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
|
||||
alias_type TEXT NOT NULL,
|
||||
alias_value TEXT NOT NULL,
|
||||
is_primary BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(advisory_id, alias_type, alias_value)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisory_aliases_advisory ON vuln.advisory_aliases(advisory_id);
|
||||
CREATE INDEX idx_advisory_aliases_value ON vuln.advisory_aliases(alias_type, alias_value);
|
||||
CREATE INDEX idx_advisory_aliases_cve ON vuln.advisory_aliases(alias_value)
|
||||
WHERE alias_type = 'CVE';
|
||||
|
||||
-- Advisory CVSS scores table
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisory_cvss (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
|
||||
cvss_version TEXT NOT NULL,
|
||||
vector_string TEXT NOT NULL,
|
||||
base_score NUMERIC(3,1) NOT NULL,
|
||||
base_severity TEXT,
|
||||
exploitability_score NUMERIC(3,1),
|
||||
impact_score NUMERIC(3,1),
|
||||
source TEXT,
|
||||
is_primary BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(advisory_id, cvss_version, source)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisory_cvss_advisory ON vuln.advisory_cvss(advisory_id);
|
||||
CREATE INDEX idx_advisory_cvss_score ON vuln.advisory_cvss(base_score DESC);
|
||||
|
||||
-- Advisory affected packages table
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisory_affected (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
|
||||
ecosystem TEXT NOT NULL,
|
||||
package_name TEXT NOT NULL,
|
||||
purl TEXT,
|
||||
version_range JSONB NOT NULL DEFAULT '{}',
|
||||
versions_affected TEXT[],
|
||||
versions_fixed TEXT[],
|
||||
database_specific JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisory_affected_advisory ON vuln.advisory_affected(advisory_id);
|
||||
CREATE INDEX idx_advisory_affected_ecosystem ON vuln.advisory_affected(ecosystem, package_name);
|
||||
CREATE INDEX idx_advisory_affected_purl ON vuln.advisory_affected(purl);
|
||||
CREATE INDEX idx_advisory_affected_purl_trgm ON vuln.advisory_affected USING GIN(purl gin_trgm_ops);
|
||||
|
||||
-- Advisory references table
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisory_references (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
|
||||
ref_type TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisory_references_advisory ON vuln.advisory_references(advisory_id);
|
||||
|
||||
-- Advisory credits table
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisory_credits (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
contact TEXT,
|
||||
credit_type TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisory_credits_advisory ON vuln.advisory_credits(advisory_id);
|
||||
|
||||
-- Advisory weaknesses table (CWE)
|
||||
CREATE TABLE IF NOT EXISTS vuln.advisory_weaknesses (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
|
||||
cwe_id TEXT NOT NULL,
|
||||
description TEXT,
|
||||
source TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(advisory_id, cwe_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_advisory_weaknesses_advisory ON vuln.advisory_weaknesses(advisory_id);
|
||||
CREATE INDEX idx_advisory_weaknesses_cwe ON vuln.advisory_weaknesses(cwe_id);
|
||||
|
||||
-- KEV flags table (Known Exploited Vulnerabilities)
|
||||
CREATE TABLE IF NOT EXISTS vuln.kev_flags (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
|
||||
cve_id TEXT NOT NULL,
|
||||
vendor_project TEXT,
|
||||
product TEXT,
|
||||
vulnerability_name TEXT,
|
||||
date_added DATE NOT NULL,
|
||||
due_date DATE,
|
||||
known_ransomware_use BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
notes TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(advisory_id, cve_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_kev_flags_advisory ON vuln.kev_flags(advisory_id);
|
||||
CREATE INDEX idx_kev_flags_cve ON vuln.kev_flags(cve_id);
|
||||
CREATE INDEX idx_kev_flags_date ON vuln.kev_flags(date_added);
|
||||
|
||||
-- Source states table (cursor tracking)
|
||||
CREATE TABLE IF NOT EXISTS vuln.source_states (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
source_id UUID NOT NULL REFERENCES vuln.sources(id) UNIQUE,
|
||||
cursor TEXT,
|
||||
last_sync_at TIMESTAMPTZ,
|
||||
last_success_at TIMESTAMPTZ,
|
||||
last_error TEXT,
|
||||
sync_count BIGINT NOT NULL DEFAULT 0,
|
||||
error_count INT NOT NULL DEFAULT 0,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_source_states_source ON vuln.source_states(source_id);
|
||||
|
||||
-- Merge events table (advisory merge audit)
|
||||
CREATE TABLE IF NOT EXISTS vuln.merge_events (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id),
|
||||
source_id UUID REFERENCES vuln.sources(id),
|
||||
event_type TEXT NOT NULL,
|
||||
old_value JSONB,
|
||||
new_value JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_merge_events_advisory ON vuln.merge_events(advisory_id);
|
||||
CREATE INDEX idx_merge_events_created ON vuln.merge_events(created_at);
|
||||
|
||||
-- Function to update search vector
|
||||
CREATE OR REPLACE FUNCTION vuln.update_advisory_search_vector()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.search_vector =
|
||||
setweight(to_tsvector('english', COALESCE(NEW.primary_vuln_id, '')), 'A') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'B') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.summary, '')), 'C') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.description, '')), 'D');
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Trigger for search vector
|
||||
CREATE TRIGGER trg_advisories_search_vector
|
||||
BEFORE INSERT OR UPDATE ON vuln.advisories
|
||||
FOR EACH ROW EXECUTE FUNCTION vuln.update_advisory_search_vector();
|
||||
|
||||
-- Update timestamp function
|
||||
CREATE OR REPLACE FUNCTION vuln.update_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Triggers
|
||||
CREATE TRIGGER trg_sources_updated_at
|
||||
BEFORE UPDATE ON vuln.sources
|
||||
FOR EACH ROW EXECUTE FUNCTION vuln.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_advisories_updated_at
|
||||
BEFORE UPDATE ON vuln.advisories
|
||||
FOR EACH ROW EXECUTE FUNCTION vuln.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_source_states_updated_at
|
||||
BEFORE UPDATE ON vuln.source_states
|
||||
FOR EACH ROW EXECUTE FUNCTION vuln.update_updated_at();
|
||||
@@ -0,0 +1,82 @@
|
||||
namespace StellaOps.Concelier.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an advisory entity in the vuln schema.
|
||||
/// </summary>
|
||||
public sealed class AdvisoryEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique advisory identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Advisory key (unique identifier, e.g., "ghsa:GHSA-xxxx").
|
||||
/// </summary>
|
||||
public required string AdvisoryKey { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Primary vulnerability ID (CVE, GHSA, etc.).
|
||||
/// </summary>
|
||||
public required string PrimaryVulnId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Source that provided this advisory.
|
||||
/// </summary>
|
||||
public Guid? SourceId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Advisory title.
|
||||
/// </summary>
|
||||
public string? Title { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Brief summary.
|
||||
/// </summary>
|
||||
public string? Summary { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Full description.
|
||||
/// </summary>
|
||||
public string? Description { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Severity level.
|
||||
/// </summary>
|
||||
public string? Severity { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the advisory was published.
|
||||
/// </summary>
|
||||
public DateTimeOffset? PublishedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the advisory was last modified.
|
||||
/// </summary>
|
||||
public DateTimeOffset? ModifiedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the advisory was withdrawn (if applicable).
|
||||
/// </summary>
|
||||
public DateTimeOffset? WithdrawnAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Provenance information as JSON.
|
||||
/// </summary>
|
||||
public string Provenance { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Raw payload from the source as JSON.
|
||||
/// </summary>
|
||||
public string? RawPayload { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the record was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the record was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
namespace StellaOps.Concelier.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a vulnerability feed source entity.
|
||||
/// </summary>
|
||||
public sealed class SourceEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique source identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Unique source key (e.g., "nvd", "ghsa", "osv").
|
||||
/// </summary>
|
||||
public required string Key { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Source type (e.g., "nvd", "osv", "github").
|
||||
/// </summary>
|
||||
public required string SourceType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Source URL.
|
||||
/// </summary>
|
||||
public string? Url { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Priority for merge precedence (higher = more authoritative).
|
||||
/// </summary>
|
||||
public int Priority { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Source is enabled.
|
||||
/// </summary>
|
||||
public bool Enabled { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Source-specific configuration as JSON.
|
||||
/// </summary>
|
||||
public string Config { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Source metadata as JSON.
|
||||
/// </summary>
|
||||
public string Metadata { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// When the record was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the record was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,320 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Concelier.Storage.Postgres.Models;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for advisory operations.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Advisory data is global (not tenant-scoped) as vulnerability information
|
||||
/// is shared across all tenants.
|
||||
/// </remarks>
|
||||
public sealed class AdvisoryRepository : RepositoryBase<ConcelierDataSource>, IAdvisoryRepository
|
||||
{
|
||||
private const string SystemTenantId = "_system";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new advisory repository.
|
||||
/// </summary>
|
||||
public AdvisoryRepository(ConcelierDataSource dataSource, ILogger<AdvisoryRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AdvisoryEntity> UpsertAsync(AdvisoryEntity advisory, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO vuln.advisories (
|
||||
id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance, raw_payload
|
||||
)
|
||||
VALUES (
|
||||
@id, @advisory_key, @primary_vuln_id, @source_id, @title, @summary, @description,
|
||||
@severity, @published_at, @modified_at, @withdrawn_at, @provenance::jsonb, @raw_payload::jsonb
|
||||
)
|
||||
ON CONFLICT (advisory_key) DO UPDATE SET
|
||||
primary_vuln_id = EXCLUDED.primary_vuln_id,
|
||||
source_id = COALESCE(EXCLUDED.source_id, vuln.advisories.source_id),
|
||||
title = COALESCE(EXCLUDED.title, vuln.advisories.title),
|
||||
summary = COALESCE(EXCLUDED.summary, vuln.advisories.summary),
|
||||
description = COALESCE(EXCLUDED.description, vuln.advisories.description),
|
||||
severity = COALESCE(EXCLUDED.severity, vuln.advisories.severity),
|
||||
published_at = COALESCE(EXCLUDED.published_at, vuln.advisories.published_at),
|
||||
modified_at = COALESCE(EXCLUDED.modified_at, vuln.advisories.modified_at),
|
||||
withdrawn_at = EXCLUDED.withdrawn_at,
|
||||
provenance = vuln.advisories.provenance || EXCLUDED.provenance,
|
||||
raw_payload = EXCLUDED.raw_payload
|
||||
RETURNING id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "id", advisory.Id);
|
||||
AddParameter(command, "advisory_key", advisory.AdvisoryKey);
|
||||
AddParameter(command, "primary_vuln_id", advisory.PrimaryVulnId);
|
||||
AddParameter(command, "source_id", advisory.SourceId);
|
||||
AddParameter(command, "title", advisory.Title);
|
||||
AddParameter(command, "summary", advisory.Summary);
|
||||
AddParameter(command, "description", advisory.Description);
|
||||
AddParameter(command, "severity", advisory.Severity);
|
||||
AddParameter(command, "published_at", advisory.PublishedAt);
|
||||
AddParameter(command, "modified_at", advisory.ModifiedAt);
|
||||
AddParameter(command, "withdrawn_at", advisory.WithdrawnAt);
|
||||
AddJsonbParameter(command, "provenance", advisory.Provenance);
|
||||
AddJsonbParameter(command, "raw_payload", advisory.RawPayload);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapAdvisory(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AdvisoryEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at
|
||||
FROM vuln.advisories
|
||||
WHERE id = @id
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd => AddParameter(cmd, "id", id),
|
||||
MapAdvisory,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AdvisoryEntity?> GetByKeyAsync(string advisoryKey, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at
|
||||
FROM vuln.advisories
|
||||
WHERE advisory_key = @advisory_key
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd => AddParameter(cmd, "advisory_key", advisoryKey),
|
||||
MapAdvisory,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AdvisoryEntity?> GetByVulnIdAsync(string vulnId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at
|
||||
FROM vuln.advisories
|
||||
WHERE primary_vuln_id = @vuln_id
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd => AddParameter(cmd, "vuln_id", vulnId),
|
||||
MapAdvisory,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<AdvisoryEntity>> SearchAsync(
|
||||
string query,
|
||||
string? severity = null,
|
||||
int limit = 50,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = """
|
||||
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at,
|
||||
ts_rank(search_vector, websearch_to_tsquery('english', @query)) as rank
|
||||
FROM vuln.advisories
|
||||
WHERE search_vector @@ websearch_to_tsquery('english', @query)
|
||||
""";
|
||||
|
||||
if (!string.IsNullOrEmpty(severity))
|
||||
{
|
||||
sql += " AND severity = @severity";
|
||||
}
|
||||
|
||||
sql += " ORDER BY rank DESC, modified_at DESC, id LIMIT @limit OFFSET @offset";
|
||||
|
||||
return await QueryAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "query", query);
|
||||
if (!string.IsNullOrEmpty(severity))
|
||||
{
|
||||
AddParameter(cmd, "severity", severity);
|
||||
}
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapAdvisory,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<AdvisoryEntity>> GetBySeverityAsync(
|
||||
string severity,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at
|
||||
FROM vuln.advisories
|
||||
WHERE severity = @severity
|
||||
ORDER BY modified_at DESC, id
|
||||
LIMIT @limit OFFSET @offset
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "severity", severity);
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapAdvisory,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<AdvisoryEntity>> GetModifiedSinceAsync(
|
||||
DateTimeOffset since,
|
||||
int limit = 1000,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at
|
||||
FROM vuln.advisories
|
||||
WHERE modified_at > @since
|
||||
ORDER BY modified_at, id
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "since", since);
|
||||
AddParameter(cmd, "limit", limit);
|
||||
},
|
||||
MapAdvisory,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<AdvisoryEntity>> GetBySourceAsync(
|
||||
Guid sourceId,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
|
||||
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
|
||||
created_at, updated_at
|
||||
FROM vuln.advisories
|
||||
WHERE source_id = @source_id
|
||||
ORDER BY modified_at DESC, id
|
||||
LIMIT @limit OFFSET @offset
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "source_id", sourceId);
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapAdvisory,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<long> CountAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = "SELECT COUNT(*) FROM vuln.advisories";
|
||||
|
||||
var result = await ExecuteScalarAsync<long>(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
null,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IDictionary<string, long>> CountBySeverityAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT COALESCE(severity, 'unknown') as severity, COUNT(*) as count
|
||||
FROM vuln.advisories
|
||||
GROUP BY severity
|
||||
ORDER BY severity
|
||||
""";
|
||||
|
||||
var results = await QueryAsync(
|
||||
SystemTenantId,
|
||||
sql,
|
||||
null,
|
||||
reader => (
|
||||
Severity: reader.GetString(0),
|
||||
Count: reader.GetInt64(1)
|
||||
),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return results.ToDictionary(r => r.Severity, r => r.Count);
|
||||
}
|
||||
|
||||
private static AdvisoryEntity MapAdvisory(NpgsqlDataReader reader) => new()
|
||||
{
|
||||
Id = reader.GetGuid(0),
|
||||
AdvisoryKey = reader.GetString(1),
|
||||
PrimaryVulnId = reader.GetString(2),
|
||||
SourceId = GetNullableGuid(reader, 3),
|
||||
Title = GetNullableString(reader, 4),
|
||||
Summary = GetNullableString(reader, 5),
|
||||
Description = GetNullableString(reader, 6),
|
||||
Severity = GetNullableString(reader, 7),
|
||||
PublishedAt = GetNullableDateTimeOffset(reader, 8),
|
||||
ModifiedAt = GetNullableDateTimeOffset(reader, 9),
|
||||
WithdrawnAt = GetNullableDateTimeOffset(reader, 10),
|
||||
Provenance = reader.GetString(11),
|
||||
RawPayload = GetNullableString(reader, 12),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(13),
|
||||
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(14)
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
using StellaOps.Concelier.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for advisory operations.
|
||||
/// </summary>
|
||||
public interface IAdvisoryRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates or updates an advisory (upsert by advisory_key).
|
||||
/// </summary>
|
||||
Task<AdvisoryEntity> UpsertAsync(AdvisoryEntity advisory, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an advisory by ID.
|
||||
/// </summary>
|
||||
Task<AdvisoryEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an advisory by key.
|
||||
/// </summary>
|
||||
Task<AdvisoryEntity?> GetByKeyAsync(string advisoryKey, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an advisory by primary vulnerability ID (CVE, GHSA, etc.).
|
||||
/// </summary>
|
||||
Task<AdvisoryEntity?> GetByVulnIdAsync(string vulnId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Searches advisories by full-text search.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AdvisoryEntity>> SearchAsync(
|
||||
string query,
|
||||
string? severity = null,
|
||||
int limit = 50,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets advisories by severity.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AdvisoryEntity>> GetBySeverityAsync(
|
||||
string severity,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets advisories modified since a given time.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AdvisoryEntity>> GetModifiedSinceAsync(
|
||||
DateTimeOffset since,
|
||||
int limit = 1000,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets advisories by source.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AdvisoryEntity>> GetBySourceAsync(
|
||||
Guid sourceId,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Counts total advisories.
|
||||
/// </summary>
|
||||
Task<long> CountAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Counts advisories by severity.
|
||||
/// </summary>
|
||||
Task<IDictionary<string, long>> CountBySeverityAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Concelier.Storage.Postgres.Repositories;
|
||||
using StellaOps.Infrastructure.Postgres;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
|
||||
namespace StellaOps.Concelier.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for configuring Concelier PostgreSQL storage services.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds Concelier PostgreSQL storage services.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configuration">Configuration root.</param>
|
||||
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddConcelierPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration,
|
||||
string sectionName = "Postgres:Concelier")
|
||||
{
|
||||
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
|
||||
services.AddSingleton<ConcelierDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<IAdvisoryRepository, AdvisoryRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds Concelier PostgreSQL storage services with explicit options.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configureOptions">Options configuration action.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddConcelierPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
Action<PostgresOptions> configureOptions)
|
||||
{
|
||||
services.Configure(configureOptions);
|
||||
services.AddSingleton<ConcelierDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<IAdvisoryRepository, AdvisoryRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" ?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Concelier.Storage.Postgres</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,50 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Connections;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
|
||||
namespace StellaOps.Excititor.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL data source for the Excititor (VEX) module.
|
||||
/// Manages connections with tenant context for VEX statements and dependency graphs.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The Excititor module handles high-volume graph data (nodes/edges) and requires
|
||||
/// optimized queries for graph traversal and VEX consensus computation.
|
||||
/// </remarks>
|
||||
public sealed class ExcititorDataSource : DataSourceBase
|
||||
{
|
||||
/// <summary>
|
||||
/// Default schema name for Excititor/VEX tables.
|
||||
/// </summary>
|
||||
public const string DefaultSchemaName = "vex";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Excititor data source.
|
||||
/// </summary>
|
||||
public ExcititorDataSource(IOptions<PostgresOptions> options, ILogger<ExcititorDataSource> logger)
|
||||
: base(CreateOptions(options.Value), logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override string ModuleName => "Excititor";
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void ConfigureDataSourceBuilder(NpgsqlDataSourceBuilder builder)
|
||||
{
|
||||
base.ConfigureDataSourceBuilder(builder);
|
||||
// Configure for high-throughput graph operations
|
||||
}
|
||||
|
||||
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
|
||||
{
|
||||
baseOptions.SchemaName = DefaultSchemaName;
|
||||
}
|
||||
return baseOptions;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,324 @@
|
||||
-- VEX Schema Migration 001: Initial Schema
|
||||
-- Creates the vex schema for VEX statements and dependency graphs
|
||||
|
||||
-- Create schema
|
||||
CREATE SCHEMA IF NOT EXISTS vex;
|
||||
|
||||
-- Projects table
|
||||
CREATE TABLE IF NOT EXISTS vex.projects (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
display_name TEXT,
|
||||
description TEXT,
|
||||
repository_url TEXT,
|
||||
default_branch TEXT,
|
||||
settings JSONB NOT NULL DEFAULT '{}',
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by TEXT,
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_projects_tenant ON vex.projects(tenant_id);
|
||||
|
||||
-- Graph revisions table
|
||||
CREATE TABLE IF NOT EXISTS vex.graph_revisions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
project_id UUID NOT NULL REFERENCES vex.projects(id) ON DELETE CASCADE,
|
||||
revision_id TEXT NOT NULL UNIQUE,
|
||||
parent_revision_id TEXT,
|
||||
sbom_digest TEXT NOT NULL,
|
||||
feed_snapshot_id TEXT,
|
||||
policy_version TEXT,
|
||||
node_count INT NOT NULL DEFAULT 0,
|
||||
edge_count INT NOT NULL DEFAULT 0,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_graph_revisions_project ON vex.graph_revisions(project_id);
|
||||
CREATE INDEX idx_graph_revisions_revision ON vex.graph_revisions(revision_id);
|
||||
CREATE INDEX idx_graph_revisions_created ON vex.graph_revisions(project_id, created_at DESC);
|
||||
|
||||
-- Graph nodes table (BIGSERIAL for high volume)
|
||||
CREATE TABLE IF NOT EXISTS vex.graph_nodes (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
graph_revision_id UUID NOT NULL REFERENCES vex.graph_revisions(id) ON DELETE CASCADE,
|
||||
node_key TEXT NOT NULL,
|
||||
node_type TEXT NOT NULL,
|
||||
purl TEXT,
|
||||
name TEXT,
|
||||
version TEXT,
|
||||
attributes JSONB NOT NULL DEFAULT '{}',
|
||||
UNIQUE(graph_revision_id, node_key)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_graph_nodes_revision ON vex.graph_nodes(graph_revision_id);
|
||||
CREATE INDEX idx_graph_nodes_key ON vex.graph_nodes(graph_revision_id, node_key);
|
||||
CREATE INDEX idx_graph_nodes_purl ON vex.graph_nodes(purl);
|
||||
CREATE INDEX idx_graph_nodes_type ON vex.graph_nodes(graph_revision_id, node_type);
|
||||
|
||||
-- Graph edges table (BIGSERIAL for high volume)
|
||||
CREATE TABLE IF NOT EXISTS vex.graph_edges (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
graph_revision_id UUID NOT NULL REFERENCES vex.graph_revisions(id) ON DELETE CASCADE,
|
||||
from_node_id BIGINT NOT NULL REFERENCES vex.graph_nodes(id) ON DELETE CASCADE,
|
||||
to_node_id BIGINT NOT NULL REFERENCES vex.graph_nodes(id) ON DELETE CASCADE,
|
||||
edge_type TEXT NOT NULL,
|
||||
attributes JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_graph_edges_revision ON vex.graph_edges(graph_revision_id);
|
||||
CREATE INDEX idx_graph_edges_from ON vex.graph_edges(from_node_id);
|
||||
CREATE INDEX idx_graph_edges_to ON vex.graph_edges(to_node_id);
|
||||
|
||||
-- VEX statements table
|
||||
CREATE TABLE IF NOT EXISTS vex.statements (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
project_id UUID REFERENCES vex.projects(id),
|
||||
graph_revision_id UUID REFERENCES vex.graph_revisions(id),
|
||||
vulnerability_id TEXT NOT NULL,
|
||||
product_id TEXT,
|
||||
status TEXT NOT NULL CHECK (status IN (
|
||||
'not_affected', 'affected', 'fixed', 'under_investigation'
|
||||
)),
|
||||
justification TEXT CHECK (justification IN (
|
||||
'component_not_present', 'vulnerable_code_not_present',
|
||||
'vulnerable_code_not_in_execute_path', 'vulnerable_code_cannot_be_controlled_by_adversary',
|
||||
'inline_mitigations_already_exist'
|
||||
)),
|
||||
impact_statement TEXT,
|
||||
action_statement TEXT,
|
||||
action_statement_timestamp TIMESTAMPTZ,
|
||||
first_issued TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
last_updated TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
source TEXT,
|
||||
source_url TEXT,
|
||||
evidence JSONB NOT NULL DEFAULT '{}',
|
||||
provenance JSONB NOT NULL DEFAULT '{}',
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_by TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_statements_tenant ON vex.statements(tenant_id);
|
||||
CREATE INDEX idx_statements_project ON vex.statements(project_id);
|
||||
CREATE INDEX idx_statements_revision ON vex.statements(graph_revision_id);
|
||||
CREATE INDEX idx_statements_vuln ON vex.statements(vulnerability_id);
|
||||
CREATE INDEX idx_statements_status ON vex.statements(tenant_id, status);
|
||||
|
||||
-- VEX observations table
|
||||
CREATE TABLE IF NOT EXISTS vex.observations (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
statement_id UUID REFERENCES vex.statements(id) ON DELETE CASCADE,
|
||||
vulnerability_id TEXT NOT NULL,
|
||||
product_id TEXT NOT NULL,
|
||||
observed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
observer TEXT NOT NULL,
|
||||
observation_type TEXT NOT NULL,
|
||||
confidence NUMERIC(3,2),
|
||||
details JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, vulnerability_id, product_id, observer, observation_type)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_observations_tenant ON vex.observations(tenant_id);
|
||||
CREATE INDEX idx_observations_statement ON vex.observations(statement_id);
|
||||
CREATE INDEX idx_observations_vuln ON vex.observations(vulnerability_id, product_id);
|
||||
|
||||
-- Linksets table
|
||||
CREATE TABLE IF NOT EXISTS vex.linksets (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT,
|
||||
source_type TEXT NOT NULL,
|
||||
source_url TEXT,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
priority INT NOT NULL DEFAULT 0,
|
||||
filter JSONB NOT NULL DEFAULT '{}',
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_linksets_tenant ON vex.linksets(tenant_id);
|
||||
CREATE INDEX idx_linksets_enabled ON vex.linksets(tenant_id, enabled, priority DESC);
|
||||
|
||||
-- Linkset events table
|
||||
CREATE TABLE IF NOT EXISTS vex.linkset_events (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
linkset_id UUID NOT NULL REFERENCES vex.linksets(id) ON DELETE CASCADE,
|
||||
event_type TEXT NOT NULL,
|
||||
statement_count INT NOT NULL DEFAULT 0,
|
||||
error_message TEXT,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_linkset_events_linkset ON vex.linkset_events(linkset_id);
|
||||
CREATE INDEX idx_linkset_events_created ON vex.linkset_events(created_at);
|
||||
|
||||
-- Consensus table (VEX consensus state)
|
||||
CREATE TABLE IF NOT EXISTS vex.consensus (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
vulnerability_id TEXT NOT NULL,
|
||||
product_id TEXT NOT NULL,
|
||||
consensus_status TEXT NOT NULL,
|
||||
contributing_statements UUID[] NOT NULL DEFAULT '{}',
|
||||
confidence NUMERIC(3,2),
|
||||
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
UNIQUE(tenant_id, vulnerability_id, product_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_consensus_tenant ON vex.consensus(tenant_id);
|
||||
CREATE INDEX idx_consensus_vuln ON vex.consensus(vulnerability_id, product_id);
|
||||
|
||||
-- Consensus holds table
|
||||
CREATE TABLE IF NOT EXISTS vex.consensus_holds (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
consensus_id UUID NOT NULL REFERENCES vex.consensus(id) ON DELETE CASCADE,
|
||||
hold_type TEXT NOT NULL,
|
||||
reason TEXT NOT NULL,
|
||||
held_by TEXT NOT NULL,
|
||||
held_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
released_at TIMESTAMPTZ,
|
||||
released_by TEXT,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_consensus_holds_consensus ON vex.consensus_holds(consensus_id);
|
||||
CREATE INDEX idx_consensus_holds_active ON vex.consensus_holds(consensus_id, released_at)
|
||||
WHERE released_at IS NULL;
|
||||
|
||||
-- Unknown snapshots table
|
||||
CREATE TABLE IF NOT EXISTS vex.unknowns_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
project_id UUID REFERENCES vex.projects(id),
|
||||
graph_revision_id UUID REFERENCES vex.graph_revisions(id),
|
||||
snapshot_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
unknown_count INT NOT NULL DEFAULT 0,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_unknowns_snapshots_tenant ON vex.unknowns_snapshots(tenant_id);
|
||||
CREATE INDEX idx_unknowns_snapshots_project ON vex.unknowns_snapshots(project_id);
|
||||
|
||||
-- Unknown items table
|
||||
CREATE TABLE IF NOT EXISTS vex.unknown_items (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
snapshot_id UUID NOT NULL REFERENCES vex.unknowns_snapshots(id) ON DELETE CASCADE,
|
||||
vulnerability_id TEXT NOT NULL,
|
||||
product_id TEXT,
|
||||
reason TEXT NOT NULL,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_unknown_items_snapshot ON vex.unknown_items(snapshot_id);
|
||||
CREATE INDEX idx_unknown_items_vuln ON vex.unknown_items(vulnerability_id);
|
||||
|
||||
-- Evidence manifests table
|
||||
CREATE TABLE IF NOT EXISTS vex.evidence_manifests (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
statement_id UUID REFERENCES vex.statements(id) ON DELETE CASCADE,
|
||||
manifest_type TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
content JSONB NOT NULL,
|
||||
source TEXT,
|
||||
collected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_evidence_manifests_tenant ON vex.evidence_manifests(tenant_id);
|
||||
CREATE INDEX idx_evidence_manifests_statement ON vex.evidence_manifests(statement_id);
|
||||
|
||||
-- CVSS receipts table
|
||||
CREATE TABLE IF NOT EXISTS vex.cvss_receipts (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
statement_id UUID REFERENCES vex.statements(id) ON DELETE CASCADE,
|
||||
vulnerability_id TEXT NOT NULL,
|
||||
cvss_version TEXT NOT NULL,
|
||||
vector_string TEXT NOT NULL,
|
||||
base_score NUMERIC(3,1) NOT NULL,
|
||||
environmental_score NUMERIC(3,1),
|
||||
temporal_score NUMERIC(3,1),
|
||||
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_cvss_receipts_tenant ON vex.cvss_receipts(tenant_id);
|
||||
CREATE INDEX idx_cvss_receipts_statement ON vex.cvss_receipts(statement_id);
|
||||
CREATE INDEX idx_cvss_receipts_vuln ON vex.cvss_receipts(vulnerability_id);
|
||||
|
||||
-- Attestations table
|
||||
CREATE TABLE IF NOT EXISTS vex.attestations (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
statement_id UUID REFERENCES vex.statements(id),
|
||||
subject_digest TEXT NOT NULL,
|
||||
predicate_type TEXT NOT NULL,
|
||||
predicate JSONB NOT NULL,
|
||||
signature TEXT,
|
||||
signature_algorithm TEXT,
|
||||
signed_by TEXT,
|
||||
signed_at TIMESTAMPTZ,
|
||||
verified BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
verified_at TIMESTAMPTZ,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_attestations_tenant ON vex.attestations(tenant_id);
|
||||
CREATE INDEX idx_attestations_statement ON vex.attestations(statement_id);
|
||||
CREATE INDEX idx_attestations_subject ON vex.attestations(subject_digest);
|
||||
|
||||
-- Timeline events table
|
||||
CREATE TABLE IF NOT EXISTS vex.timeline_events (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
tenant_id TEXT NOT NULL,
|
||||
project_id UUID REFERENCES vex.projects(id),
|
||||
statement_id UUID REFERENCES vex.statements(id),
|
||||
event_type TEXT NOT NULL,
|
||||
event_data JSONB NOT NULL DEFAULT '{}',
|
||||
actor TEXT,
|
||||
correlation_id TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_timeline_events_tenant ON vex.timeline_events(tenant_id);
|
||||
CREATE INDEX idx_timeline_events_project ON vex.timeline_events(project_id);
|
||||
CREATE INDEX idx_timeline_events_statement ON vex.timeline_events(statement_id);
|
||||
CREATE INDEX idx_timeline_events_created ON vex.timeline_events(tenant_id, created_at);
|
||||
CREATE INDEX idx_timeline_events_correlation ON vex.timeline_events(correlation_id);
|
||||
|
||||
-- Update timestamp function
|
||||
CREATE OR REPLACE FUNCTION vex.update_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Triggers
|
||||
CREATE TRIGGER trg_projects_updated_at
|
||||
BEFORE UPDATE ON vex.projects
|
||||
FOR EACH ROW EXECUTE FUNCTION vex.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_linksets_updated_at
|
||||
BEFORE UPDATE ON vex.linksets
|
||||
FOR EACH ROW EXECUTE FUNCTION vex.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_statements_updated_at
|
||||
BEFORE UPDATE ON vex.statements
|
||||
FOR EACH ROW EXECUTE FUNCTION vex.update_updated_at();
|
||||
@@ -0,0 +1,67 @@
|
||||
namespace StellaOps.Excititor.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a project entity in the vex schema.
|
||||
/// </summary>
|
||||
public sealed class ProjectEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique project identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this project belongs to.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Project name (unique per tenant).
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name.
|
||||
/// </summary>
|
||||
public string? DisplayName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Project description.
|
||||
/// </summary>
|
||||
public string? Description { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Repository URL.
|
||||
/// </summary>
|
||||
public string? RepositoryUrl { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Default branch name.
|
||||
/// </summary>
|
||||
public string? DefaultBranch { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Project settings as JSON.
|
||||
/// </summary>
|
||||
public string Settings { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Project metadata as JSON.
|
||||
/// </summary>
|
||||
public string Metadata { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// When the project was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the project was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// User who created the project.
|
||||
/// </summary>
|
||||
public string? CreatedBy { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
namespace StellaOps.Excititor.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// VEX status values per OpenVEX specification.
|
||||
/// </summary>
|
||||
public enum VexStatus
|
||||
{
|
||||
/// <summary>Product is not affected by the vulnerability.</summary>
|
||||
NotAffected,
|
||||
/// <summary>Product is affected by the vulnerability.</summary>
|
||||
Affected,
|
||||
/// <summary>Vulnerability is fixed in this product version.</summary>
|
||||
Fixed,
|
||||
/// <summary>Vulnerability is under investigation.</summary>
|
||||
UnderInvestigation
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// VEX justification codes per OpenVEX specification.
|
||||
/// </summary>
|
||||
public enum VexJustification
|
||||
{
|
||||
/// <summary>The vulnerable component is not present.</summary>
|
||||
ComponentNotPresent,
|
||||
/// <summary>The vulnerable code is not present.</summary>
|
||||
VulnerableCodeNotPresent,
|
||||
/// <summary>The vulnerable code is not in execute path.</summary>
|
||||
VulnerableCodeNotInExecutePath,
|
||||
/// <summary>The vulnerable code cannot be controlled by adversary.</summary>
|
||||
VulnerableCodeCannotBeControlledByAdversary,
|
||||
/// <summary>Inline mitigations already exist.</summary>
|
||||
InlineMitigationsAlreadyExist
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a VEX statement entity in the vex schema.
|
||||
/// </summary>
|
||||
public sealed class VexStatementEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique statement identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this statement belongs to.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Project this statement applies to.
|
||||
/// </summary>
|
||||
public Guid? ProjectId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Graph revision this statement is associated with.
|
||||
/// </summary>
|
||||
public Guid? GraphRevisionId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Vulnerability ID (CVE, GHSA, etc.).
|
||||
/// </summary>
|
||||
public required string VulnerabilityId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Product identifier (PURL or product key).
|
||||
/// </summary>
|
||||
public string? ProductId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// VEX status.
|
||||
/// </summary>
|
||||
public required VexStatus Status { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Justification for not_affected status.
|
||||
/// </summary>
|
||||
public VexJustification? Justification { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Impact statement describing effects.
|
||||
/// </summary>
|
||||
public string? ImpactStatement { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Action statement describing remediation.
|
||||
/// </summary>
|
||||
public string? ActionStatement { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When action statement was issued.
|
||||
/// </summary>
|
||||
public DateTimeOffset? ActionStatementTimestamp { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When statement was first issued.
|
||||
/// </summary>
|
||||
public DateTimeOffset FirstIssued { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When statement was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset LastUpdated { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Source of the statement.
|
||||
/// </summary>
|
||||
public string? Source { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// URL to source document.
|
||||
/// </summary>
|
||||
public string? SourceUrl { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Evidence supporting the statement as JSON.
|
||||
/// </summary>
|
||||
public string Evidence { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Provenance information as JSON.
|
||||
/// </summary>
|
||||
public string Provenance { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Statement metadata as JSON.
|
||||
/// </summary>
|
||||
public string Metadata { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// User who created the statement.
|
||||
/// </summary>
|
||||
public string? CreatedBy { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
using StellaOps.Excititor.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Excititor.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for VEX statement operations.
|
||||
/// </summary>
|
||||
public interface IVexStatementRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new VEX statement.
|
||||
/// </summary>
|
||||
Task<VexStatementEntity> CreateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a VEX statement by ID.
|
||||
/// </summary>
|
||||
Task<VexStatementEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets VEX statements for a vulnerability.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<VexStatementEntity>> GetByVulnerabilityAsync(
|
||||
string tenantId,
|
||||
string vulnerabilityId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets VEX statements for a product.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<VexStatementEntity>> GetByProductAsync(
|
||||
string tenantId,
|
||||
string productId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets VEX statements for a project.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<VexStatementEntity>> GetByProjectAsync(
|
||||
string tenantId,
|
||||
Guid projectId,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets VEX statements by status.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<VexStatementEntity>> GetByStatusAsync(
|
||||
string tenantId,
|
||||
VexStatus status,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a VEX statement.
|
||||
/// </summary>
|
||||
Task<bool> UpdateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a VEX statement.
|
||||
/// </summary>
|
||||
Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the effective VEX status for a vulnerability/product combination.
|
||||
/// Applies lattice logic for status precedence.
|
||||
/// </summary>
|
||||
Task<VexStatementEntity?> GetEffectiveStatementAsync(
|
||||
string tenantId,
|
||||
string vulnerabilityId,
|
||||
string productId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,385 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Excititor.Storage.Postgres.Models;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Excititor.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for VEX statement operations.
|
||||
/// </summary>
|
||||
public sealed class VexStatementRepository : RepositoryBase<ExcititorDataSource>, IVexStatementRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new VEX statement repository.
|
||||
/// </summary>
|
||||
public VexStatementRepository(ExcititorDataSource dataSource, ILogger<VexStatementRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<VexStatementEntity> CreateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO vex.statements (
|
||||
id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
source, source_url, evidence, provenance, metadata, created_by
|
||||
)
|
||||
VALUES (
|
||||
@id, @tenant_id, @project_id, @graph_revision_id, @vulnerability_id, @product_id,
|
||||
@status, @justification, @impact_statement, @action_statement, @action_statement_timestamp,
|
||||
@source, @source_url, @evidence::jsonb, @provenance::jsonb, @metadata::jsonb, @created_by
|
||||
)
|
||||
RETURNING id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
first_issued, last_updated, source, source_url,
|
||||
evidence::text, provenance::text, metadata::text, created_by
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(statement.TenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddStatementParameters(command, statement);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapStatement(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<VexStatementEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
first_issued, last_updated, source, source_url,
|
||||
evidence::text, provenance::text, metadata::text, created_by
|
||||
FROM vex.statements
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
MapStatement,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<VexStatementEntity>> GetByVulnerabilityAsync(
|
||||
string tenantId,
|
||||
string vulnerabilityId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
first_issued, last_updated, source, source_url,
|
||||
evidence::text, provenance::text, metadata::text, created_by
|
||||
FROM vex.statements
|
||||
WHERE tenant_id = @tenant_id AND vulnerability_id = @vulnerability_id
|
||||
ORDER BY last_updated DESC, id
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "vulnerability_id", vulnerabilityId);
|
||||
},
|
||||
MapStatement,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<VexStatementEntity>> GetByProductAsync(
|
||||
string tenantId,
|
||||
string productId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
first_issued, last_updated, source, source_url,
|
||||
evidence::text, provenance::text, metadata::text, created_by
|
||||
FROM vex.statements
|
||||
WHERE tenant_id = @tenant_id AND product_id = @product_id
|
||||
ORDER BY last_updated DESC, id
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "product_id", productId);
|
||||
},
|
||||
MapStatement,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<VexStatementEntity>> GetByProjectAsync(
|
||||
string tenantId,
|
||||
Guid projectId,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
first_issued, last_updated, source, source_url,
|
||||
evidence::text, provenance::text, metadata::text, created_by
|
||||
FROM vex.statements
|
||||
WHERE tenant_id = @tenant_id AND project_id = @project_id
|
||||
ORDER BY last_updated DESC, id
|
||||
LIMIT @limit OFFSET @offset
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "project_id", projectId);
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapStatement,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<VexStatementEntity>> GetByStatusAsync(
|
||||
string tenantId,
|
||||
VexStatus status,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
first_issued, last_updated, source, source_url,
|
||||
evidence::text, provenance::text, metadata::text, created_by
|
||||
FROM vex.statements
|
||||
WHERE tenant_id = @tenant_id AND status = @status
|
||||
ORDER BY last_updated DESC, id
|
||||
LIMIT @limit OFFSET @offset
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "status", StatusToString(status));
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapStatement,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> UpdateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE vex.statements
|
||||
SET status = @status,
|
||||
justification = @justification,
|
||||
impact_statement = @impact_statement,
|
||||
action_statement = @action_statement,
|
||||
action_statement_timestamp = @action_statement_timestamp,
|
||||
source = @source,
|
||||
source_url = @source_url,
|
||||
evidence = @evidence::jsonb,
|
||||
provenance = @provenance::jsonb,
|
||||
metadata = @metadata::jsonb
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
statement.TenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", statement.TenantId);
|
||||
AddParameter(cmd, "id", statement.Id);
|
||||
AddParameter(cmd, "status", StatusToString(statement.Status));
|
||||
AddParameter(cmd, "justification", statement.Justification.HasValue
|
||||
? JustificationToString(statement.Justification.Value)
|
||||
: null);
|
||||
AddParameter(cmd, "impact_statement", statement.ImpactStatement);
|
||||
AddParameter(cmd, "action_statement", statement.ActionStatement);
|
||||
AddParameter(cmd, "action_statement_timestamp", statement.ActionStatementTimestamp);
|
||||
AddParameter(cmd, "source", statement.Source);
|
||||
AddParameter(cmd, "source_url", statement.SourceUrl);
|
||||
AddJsonbParameter(cmd, "evidence", statement.Evidence);
|
||||
AddJsonbParameter(cmd, "provenance", statement.Provenance);
|
||||
AddJsonbParameter(cmd, "metadata", statement.Metadata);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = "DELETE FROM vex.statements WHERE tenant_id = @tenant_id AND id = @id";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<VexStatementEntity?> GetEffectiveStatementAsync(
|
||||
string tenantId,
|
||||
string vulnerabilityId,
|
||||
string productId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// VEX lattice precedence: fixed > not_affected > affected > under_investigation
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
|
||||
status, justification, impact_statement, action_statement, action_statement_timestamp,
|
||||
first_issued, last_updated, source, source_url,
|
||||
evidence::text, provenance::text, metadata::text, created_by
|
||||
FROM vex.statements
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND vulnerability_id = @vulnerability_id
|
||||
AND product_id = @product_id
|
||||
ORDER BY
|
||||
CASE status
|
||||
WHEN 'fixed' THEN 1
|
||||
WHEN 'not_affected' THEN 2
|
||||
WHEN 'affected' THEN 3
|
||||
WHEN 'under_investigation' THEN 4
|
||||
END,
|
||||
last_updated DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "vulnerability_id", vulnerabilityId);
|
||||
AddParameter(cmd, "product_id", productId);
|
||||
},
|
||||
MapStatement,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private static void AddStatementParameters(NpgsqlCommand command, VexStatementEntity statement)
|
||||
{
|
||||
AddParameter(command, "id", statement.Id);
|
||||
AddParameter(command, "tenant_id", statement.TenantId);
|
||||
AddParameter(command, "project_id", statement.ProjectId);
|
||||
AddParameter(command, "graph_revision_id", statement.GraphRevisionId);
|
||||
AddParameter(command, "vulnerability_id", statement.VulnerabilityId);
|
||||
AddParameter(command, "product_id", statement.ProductId);
|
||||
AddParameter(command, "status", StatusToString(statement.Status));
|
||||
AddParameter(command, "justification", statement.Justification.HasValue
|
||||
? JustificationToString(statement.Justification.Value)
|
||||
: null);
|
||||
AddParameter(command, "impact_statement", statement.ImpactStatement);
|
||||
AddParameter(command, "action_statement", statement.ActionStatement);
|
||||
AddParameter(command, "action_statement_timestamp", statement.ActionStatementTimestamp);
|
||||
AddParameter(command, "source", statement.Source);
|
||||
AddParameter(command, "source_url", statement.SourceUrl);
|
||||
AddJsonbParameter(command, "evidence", statement.Evidence);
|
||||
AddJsonbParameter(command, "provenance", statement.Provenance);
|
||||
AddJsonbParameter(command, "metadata", statement.Metadata);
|
||||
AddParameter(command, "created_by", statement.CreatedBy);
|
||||
}
|
||||
|
||||
private static VexStatementEntity MapStatement(NpgsqlDataReader reader) => new()
|
||||
{
|
||||
Id = reader.GetGuid(0),
|
||||
TenantId = reader.GetString(1),
|
||||
ProjectId = GetNullableGuid(reader, 2),
|
||||
GraphRevisionId = GetNullableGuid(reader, 3),
|
||||
VulnerabilityId = reader.GetString(4),
|
||||
ProductId = GetNullableString(reader, 5),
|
||||
Status = ParseStatus(reader.GetString(6)),
|
||||
Justification = ParseJustification(GetNullableString(reader, 7)),
|
||||
ImpactStatement = GetNullableString(reader, 8),
|
||||
ActionStatement = GetNullableString(reader, 9),
|
||||
ActionStatementTimestamp = GetNullableDateTimeOffset(reader, 10),
|
||||
FirstIssued = reader.GetFieldValue<DateTimeOffset>(11),
|
||||
LastUpdated = reader.GetFieldValue<DateTimeOffset>(12),
|
||||
Source = GetNullableString(reader, 13),
|
||||
SourceUrl = GetNullableString(reader, 14),
|
||||
Evidence = reader.GetString(15),
|
||||
Provenance = reader.GetString(16),
|
||||
Metadata = reader.GetString(17),
|
||||
CreatedBy = GetNullableString(reader, 18)
|
||||
};
|
||||
|
||||
private static string StatusToString(VexStatus status) => status switch
|
||||
{
|
||||
VexStatus.NotAffected => "not_affected",
|
||||
VexStatus.Affected => "affected",
|
||||
VexStatus.Fixed => "fixed",
|
||||
VexStatus.UnderInvestigation => "under_investigation",
|
||||
_ => throw new ArgumentException($"Unknown VEX status: {status}", nameof(status))
|
||||
};
|
||||
|
||||
private static VexStatus ParseStatus(string status) => status switch
|
||||
{
|
||||
"not_affected" => VexStatus.NotAffected,
|
||||
"affected" => VexStatus.Affected,
|
||||
"fixed" => VexStatus.Fixed,
|
||||
"under_investigation" => VexStatus.UnderInvestigation,
|
||||
_ => throw new ArgumentException($"Unknown VEX status: {status}", nameof(status))
|
||||
};
|
||||
|
||||
private static string JustificationToString(VexJustification justification) => justification switch
|
||||
{
|
||||
VexJustification.ComponentNotPresent => "component_not_present",
|
||||
VexJustification.VulnerableCodeNotPresent => "vulnerable_code_not_present",
|
||||
VexJustification.VulnerableCodeNotInExecutePath => "vulnerable_code_not_in_execute_path",
|
||||
VexJustification.VulnerableCodeCannotBeControlledByAdversary => "vulnerable_code_cannot_be_controlled_by_adversary",
|
||||
VexJustification.InlineMitigationsAlreadyExist => "inline_mitigations_already_exist",
|
||||
_ => throw new ArgumentException($"Unknown VEX justification: {justification}", nameof(justification))
|
||||
};
|
||||
|
||||
private static VexJustification? ParseJustification(string? justification) => justification switch
|
||||
{
|
||||
null => null,
|
||||
"component_not_present" => VexJustification.ComponentNotPresent,
|
||||
"vulnerable_code_not_present" => VexJustification.VulnerableCodeNotPresent,
|
||||
"vulnerable_code_not_in_execute_path" => VexJustification.VulnerableCodeNotInExecutePath,
|
||||
"vulnerable_code_cannot_be_controlled_by_adversary" => VexJustification.VulnerableCodeCannotBeControlledByAdversary,
|
||||
"inline_mitigations_already_exist" => VexJustification.InlineMitigationsAlreadyExist,
|
||||
_ => throw new ArgumentException($"Unknown VEX justification: {justification}", nameof(justification))
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Excititor.Storage.Postgres.Repositories;
|
||||
using StellaOps.Infrastructure.Postgres;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
|
||||
namespace StellaOps.Excititor.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for configuring Excititor PostgreSQL storage services.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds Excititor PostgreSQL storage services.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configuration">Configuration root.</param>
|
||||
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddExcititorPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration,
|
||||
string sectionName = "Postgres:Excititor")
|
||||
{
|
||||
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
|
||||
services.AddSingleton<ExcititorDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<IVexStatementRepository, VexStatementRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds Excititor PostgreSQL storage services with explicit options.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configureOptions">Options configuration action.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddExcititorPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
Action<PostgresOptions> configureOptions)
|
||||
{
|
||||
services.Configure(configureOptions);
|
||||
services.AddSingleton<ExcititorDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<IVexStatementRepository, VexStatementRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" ?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Excititor.Storage.Postgres</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,326 @@
|
||||
-- Notify Schema Migration 001: Initial Schema
|
||||
-- Creates the notify schema for notifications, channels, and delivery tracking
|
||||
|
||||
-- Create schema
|
||||
CREATE SCHEMA IF NOT EXISTS notify;
|
||||
|
||||
-- Channel types
|
||||
DO $$ BEGIN
|
||||
CREATE TYPE notify.channel_type AS ENUM (
|
||||
'email', 'slack', 'teams', 'webhook', 'pagerduty', 'opsgenie'
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN null;
|
||||
END $$;
|
||||
|
||||
-- Delivery status
|
||||
DO $$ BEGIN
|
||||
CREATE TYPE notify.delivery_status AS ENUM (
|
||||
'pending', 'queued', 'sending', 'sent', 'delivered', 'failed', 'bounced'
|
||||
);
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN null;
|
||||
END $$;
|
||||
|
||||
-- Channels table
|
||||
CREATE TABLE IF NOT EXISTS notify.channels (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
channel_type notify.channel_type NOT NULL,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
config JSONB NOT NULL DEFAULT '{}',
|
||||
credentials JSONB,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by TEXT,
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_channels_tenant ON notify.channels(tenant_id);
|
||||
CREATE INDEX idx_channels_type ON notify.channels(tenant_id, channel_type);
|
||||
|
||||
-- Rules table (notification routing rules)
|
||||
CREATE TABLE IF NOT EXISTS notify.rules (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
priority INT NOT NULL DEFAULT 0,
|
||||
event_types TEXT[] NOT NULL DEFAULT '{}',
|
||||
filter JSONB NOT NULL DEFAULT '{}',
|
||||
channel_ids UUID[] NOT NULL DEFAULT '{}',
|
||||
template_id UUID,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_rules_tenant ON notify.rules(tenant_id);
|
||||
CREATE INDEX idx_rules_enabled ON notify.rules(tenant_id, enabled, priority DESC);
|
||||
|
||||
-- Templates table
|
||||
CREATE TABLE IF NOT EXISTS notify.templates (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
channel_type notify.channel_type NOT NULL,
|
||||
subject_template TEXT,
|
||||
body_template TEXT NOT NULL,
|
||||
locale TEXT NOT NULL DEFAULT 'en',
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, name, channel_type, locale)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_templates_tenant ON notify.templates(tenant_id);
|
||||
|
||||
-- Deliveries table
|
||||
CREATE TABLE IF NOT EXISTS notify.deliveries (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
channel_id UUID NOT NULL REFERENCES notify.channels(id),
|
||||
rule_id UUID REFERENCES notify.rules(id),
|
||||
template_id UUID REFERENCES notify.templates(id),
|
||||
status notify.delivery_status NOT NULL DEFAULT 'pending',
|
||||
recipient TEXT NOT NULL,
|
||||
subject TEXT,
|
||||
body TEXT,
|
||||
event_type TEXT NOT NULL,
|
||||
event_payload JSONB NOT NULL DEFAULT '{}',
|
||||
attempt INT NOT NULL DEFAULT 0,
|
||||
max_attempts INT NOT NULL DEFAULT 3,
|
||||
next_retry_at TIMESTAMPTZ,
|
||||
error_message TEXT,
|
||||
external_id TEXT,
|
||||
correlation_id TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
queued_at TIMESTAMPTZ,
|
||||
sent_at TIMESTAMPTZ,
|
||||
delivered_at TIMESTAMPTZ,
|
||||
failed_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
CREATE INDEX idx_deliveries_tenant ON notify.deliveries(tenant_id);
|
||||
CREATE INDEX idx_deliveries_status ON notify.deliveries(tenant_id, status);
|
||||
CREATE INDEX idx_deliveries_pending ON notify.deliveries(status, next_retry_at)
|
||||
WHERE status IN ('pending', 'queued');
|
||||
CREATE INDEX idx_deliveries_channel ON notify.deliveries(channel_id);
|
||||
CREATE INDEX idx_deliveries_correlation ON notify.deliveries(correlation_id);
|
||||
CREATE INDEX idx_deliveries_created ON notify.deliveries(tenant_id, created_at);
|
||||
|
||||
-- Digests table (aggregated notifications)
|
||||
CREATE TABLE IF NOT EXISTS notify.digests (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
channel_id UUID NOT NULL REFERENCES notify.channels(id),
|
||||
recipient TEXT NOT NULL,
|
||||
digest_key TEXT NOT NULL,
|
||||
event_count INT NOT NULL DEFAULT 0,
|
||||
events JSONB NOT NULL DEFAULT '[]',
|
||||
status TEXT NOT NULL DEFAULT 'collecting' CHECK (status IN ('collecting', 'sending', 'sent')),
|
||||
collect_until TIMESTAMPTZ NOT NULL,
|
||||
sent_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, channel_id, recipient, digest_key)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_digests_tenant ON notify.digests(tenant_id);
|
||||
CREATE INDEX idx_digests_collect ON notify.digests(status, collect_until)
|
||||
WHERE status = 'collecting';
|
||||
|
||||
-- Quiet hours table
|
||||
CREATE TABLE IF NOT EXISTS notify.quiet_hours (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
user_id UUID,
|
||||
channel_id UUID REFERENCES notify.channels(id),
|
||||
start_time TIME NOT NULL,
|
||||
end_time TIME NOT NULL,
|
||||
timezone TEXT NOT NULL DEFAULT 'UTC',
|
||||
days_of_week INT[] NOT NULL DEFAULT '{0,1,2,3,4,5,6}',
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_quiet_hours_tenant ON notify.quiet_hours(tenant_id);
|
||||
|
||||
-- Maintenance windows table
|
||||
CREATE TABLE IF NOT EXISTS notify.maintenance_windows (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT,
|
||||
start_at TIMESTAMPTZ NOT NULL,
|
||||
end_at TIMESTAMPTZ NOT NULL,
|
||||
suppress_channels UUID[],
|
||||
suppress_event_types TEXT[],
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by TEXT,
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_maintenance_windows_tenant ON notify.maintenance_windows(tenant_id);
|
||||
CREATE INDEX idx_maintenance_windows_active ON notify.maintenance_windows(start_at, end_at);
|
||||
|
||||
-- Escalation policies table
|
||||
CREATE TABLE IF NOT EXISTS notify.escalation_policies (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
steps JSONB NOT NULL DEFAULT '[]',
|
||||
repeat_count INT NOT NULL DEFAULT 0,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_escalation_policies_tenant ON notify.escalation_policies(tenant_id);
|
||||
|
||||
-- Escalation states table
|
||||
CREATE TABLE IF NOT EXISTS notify.escalation_states (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
policy_id UUID NOT NULL REFERENCES notify.escalation_policies(id),
|
||||
incident_id UUID,
|
||||
correlation_id TEXT NOT NULL,
|
||||
current_step INT NOT NULL DEFAULT 0,
|
||||
repeat_iteration INT NOT NULL DEFAULT 0,
|
||||
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'acknowledged', 'resolved', 'expired')),
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
next_escalation_at TIMESTAMPTZ,
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
acknowledged_by TEXT,
|
||||
resolved_at TIMESTAMPTZ,
|
||||
resolved_by TEXT,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_escalation_states_tenant ON notify.escalation_states(tenant_id);
|
||||
CREATE INDEX idx_escalation_states_active ON notify.escalation_states(status, next_escalation_at)
|
||||
WHERE status = 'active';
|
||||
CREATE INDEX idx_escalation_states_correlation ON notify.escalation_states(correlation_id);
|
||||
|
||||
-- On-call schedules table
|
||||
CREATE TABLE IF NOT EXISTS notify.on_call_schedules (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT,
|
||||
timezone TEXT NOT NULL DEFAULT 'UTC',
|
||||
rotation_type TEXT NOT NULL DEFAULT 'weekly' CHECK (rotation_type IN ('daily', 'weekly', 'custom')),
|
||||
participants JSONB NOT NULL DEFAULT '[]',
|
||||
overrides JSONB NOT NULL DEFAULT '[]',
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(tenant_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_on_call_schedules_tenant ON notify.on_call_schedules(tenant_id);
|
||||
|
||||
-- Inbox table (in-app notifications)
|
||||
CREATE TABLE IF NOT EXISTS notify.inbox (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
user_id UUID NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
body TEXT,
|
||||
event_type TEXT NOT NULL,
|
||||
event_payload JSONB NOT NULL DEFAULT '{}',
|
||||
read BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
archived BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
action_url TEXT,
|
||||
correlation_id TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
read_at TIMESTAMPTZ,
|
||||
archived_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
CREATE INDEX idx_inbox_tenant_user ON notify.inbox(tenant_id, user_id);
|
||||
CREATE INDEX idx_inbox_unread ON notify.inbox(tenant_id, user_id, read, created_at DESC)
|
||||
WHERE read = FALSE AND archived = FALSE;
|
||||
|
||||
-- Incidents table
|
||||
CREATE TABLE IF NOT EXISTS notify.incidents (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
severity TEXT NOT NULL DEFAULT 'medium' CHECK (severity IN ('critical', 'high', 'medium', 'low')),
|
||||
status TEXT NOT NULL DEFAULT 'open' CHECK (status IN ('open', 'acknowledged', 'resolved', 'closed')),
|
||||
source TEXT,
|
||||
correlation_id TEXT,
|
||||
assigned_to UUID,
|
||||
escalation_policy_id UUID REFERENCES notify.escalation_policies(id),
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
resolved_at TIMESTAMPTZ,
|
||||
closed_at TIMESTAMPTZ,
|
||||
created_by TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_incidents_tenant ON notify.incidents(tenant_id);
|
||||
CREATE INDEX idx_incidents_status ON notify.incidents(tenant_id, status);
|
||||
CREATE INDEX idx_incidents_severity ON notify.incidents(tenant_id, severity);
|
||||
CREATE INDEX idx_incidents_correlation ON notify.incidents(correlation_id);
|
||||
|
||||
-- Audit log table
|
||||
CREATE TABLE IF NOT EXISTS notify.audit (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
tenant_id TEXT NOT NULL,
|
||||
user_id UUID,
|
||||
action TEXT NOT NULL,
|
||||
resource_type TEXT NOT NULL,
|
||||
resource_id TEXT,
|
||||
details JSONB,
|
||||
correlation_id TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_audit_tenant ON notify.audit(tenant_id);
|
||||
CREATE INDEX idx_audit_created ON notify.audit(tenant_id, created_at);
|
||||
|
||||
-- Update timestamp function
|
||||
CREATE OR REPLACE FUNCTION notify.update_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Triggers
|
||||
CREATE TRIGGER trg_channels_updated_at
|
||||
BEFORE UPDATE ON notify.channels
|
||||
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_rules_updated_at
|
||||
BEFORE UPDATE ON notify.rules
|
||||
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_templates_updated_at
|
||||
BEFORE UPDATE ON notify.templates
|
||||
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_digests_updated_at
|
||||
BEFORE UPDATE ON notify.digests
|
||||
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_escalation_policies_updated_at
|
||||
BEFORE UPDATE ON notify.escalation_policies
|
||||
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_on_call_schedules_updated_at
|
||||
BEFORE UPDATE ON notify.on_call_schedules
|
||||
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
|
||||
@@ -0,0 +1,81 @@
|
||||
namespace StellaOps.Notify.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Channel types for notifications.
|
||||
/// </summary>
|
||||
public enum ChannelType
|
||||
{
|
||||
/// <summary>Email channel.</summary>
|
||||
Email,
|
||||
/// <summary>Slack channel.</summary>
|
||||
Slack,
|
||||
/// <summary>Microsoft Teams channel.</summary>
|
||||
Teams,
|
||||
/// <summary>Generic webhook channel.</summary>
|
||||
Webhook,
|
||||
/// <summary>PagerDuty integration.</summary>
|
||||
PagerDuty,
|
||||
/// <summary>OpsGenie integration.</summary>
|
||||
OpsGenie
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a notification channel entity.
|
||||
/// </summary>
|
||||
public sealed class ChannelEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique channel identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this channel belongs to.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Channel name (unique per tenant).
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Type of channel.
|
||||
/// </summary>
|
||||
public required ChannelType ChannelType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Channel is enabled.
|
||||
/// </summary>
|
||||
public bool Enabled { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Channel configuration as JSON.
|
||||
/// </summary>
|
||||
public string Config { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Channel credentials as JSON (encrypted).
|
||||
/// </summary>
|
||||
public string? Credentials { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Channel metadata as JSON.
|
||||
/// </summary>
|
||||
public string Metadata { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// When the channel was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the channel was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// User who created the channel.
|
||||
/// </summary>
|
||||
public string? CreatedBy { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
namespace StellaOps.Notify.Storage.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Delivery status values.
|
||||
/// </summary>
|
||||
public enum DeliveryStatus
|
||||
{
|
||||
/// <summary>Delivery is pending.</summary>
|
||||
Pending,
|
||||
/// <summary>Delivery is queued for sending.</summary>
|
||||
Queued,
|
||||
/// <summary>Delivery is being sent.</summary>
|
||||
Sending,
|
||||
/// <summary>Delivery was sent.</summary>
|
||||
Sent,
|
||||
/// <summary>Delivery was confirmed delivered.</summary>
|
||||
Delivered,
|
||||
/// <summary>Delivery failed.</summary>
|
||||
Failed,
|
||||
/// <summary>Delivery bounced.</summary>
|
||||
Bounced
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a notification delivery entity.
|
||||
/// </summary>
|
||||
public sealed class DeliveryEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique delivery identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this delivery belongs to.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Channel used for this delivery.
|
||||
/// </summary>
|
||||
public required Guid ChannelId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Rule that triggered this delivery.
|
||||
/// </summary>
|
||||
public Guid? RuleId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Template used for this delivery.
|
||||
/// </summary>
|
||||
public Guid? TemplateId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Current delivery status.
|
||||
/// </summary>
|
||||
public DeliveryStatus Status { get; init; } = DeliveryStatus.Pending;
|
||||
|
||||
/// <summary>
|
||||
/// Recipient address/identifier.
|
||||
/// </summary>
|
||||
public required string Recipient { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Notification subject.
|
||||
/// </summary>
|
||||
public string? Subject { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Notification body.
|
||||
/// </summary>
|
||||
public string? Body { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Event type that triggered this notification.
|
||||
/// </summary>
|
||||
public required string EventType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Event payload as JSON.
|
||||
/// </summary>
|
||||
public string EventPayload { get; init; } = "{}";
|
||||
|
||||
/// <summary>
|
||||
/// Current attempt number.
|
||||
/// </summary>
|
||||
public int Attempt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of attempts.
|
||||
/// </summary>
|
||||
public int MaxAttempts { get; init; } = 3;
|
||||
|
||||
/// <summary>
|
||||
/// Next retry time.
|
||||
/// </summary>
|
||||
public DateTimeOffset? NextRetryAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Error message if failed.
|
||||
/// </summary>
|
||||
public string? ErrorMessage { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// External ID from the channel provider.
|
||||
/// </summary>
|
||||
public string? ExternalId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Correlation ID for tracing.
|
||||
/// </summary>
|
||||
public string? CorrelationId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the delivery was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the delivery was queued.
|
||||
/// </summary>
|
||||
public DateTimeOffset? QueuedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the delivery was sent.
|
||||
/// </summary>
|
||||
public DateTimeOffset? SentAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the delivery was confirmed delivered.
|
||||
/// </summary>
|
||||
public DateTimeOffset? DeliveredAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the delivery failed.
|
||||
/// </summary>
|
||||
public DateTimeOffset? FailedAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Infrastructure.Postgres.Connections;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
|
||||
namespace StellaOps.Notify.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL data source for the Notify module.
|
||||
/// Manages connections with tenant context for notifications and delivery tracking.
|
||||
/// </summary>
|
||||
public sealed class NotifyDataSource : DataSourceBase
|
||||
{
|
||||
/// <summary>
|
||||
/// Default schema name for Notify tables.
|
||||
/// </summary>
|
||||
public const string DefaultSchemaName = "notify";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Notify data source.
|
||||
/// </summary>
|
||||
public NotifyDataSource(IOptions<PostgresOptions> options, ILogger<NotifyDataSource> logger)
|
||||
: base(CreateOptions(options.Value), logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override string ModuleName => "Notify";
|
||||
|
||||
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
|
||||
{
|
||||
baseOptions.SchemaName = DefaultSchemaName;
|
||||
}
|
||||
return baseOptions;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,264 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
using StellaOps.Notify.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Notify.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for notification channel operations.
|
||||
/// </summary>
|
||||
public sealed class ChannelRepository : RepositoryBase<NotifyDataSource>, IChannelRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new channel repository.
|
||||
/// </summary>
|
||||
public ChannelRepository(NotifyDataSource dataSource, ILogger<ChannelRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChannelEntity> CreateAsync(ChannelEntity channel, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO notify.channels (
|
||||
id, tenant_id, name, channel_type, enabled, config, credentials, metadata, created_by
|
||||
)
|
||||
VALUES (
|
||||
@id, @tenant_id, @name, @channel_type::notify.channel_type, @enabled,
|
||||
@config::jsonb, @credentials::jsonb, @metadata::jsonb, @created_by
|
||||
)
|
||||
RETURNING id, tenant_id, name, channel_type::text, enabled,
|
||||
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(channel.TenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "id", channel.Id);
|
||||
AddParameter(command, "tenant_id", channel.TenantId);
|
||||
AddParameter(command, "name", channel.Name);
|
||||
AddParameter(command, "channel_type", ChannelTypeToString(channel.ChannelType));
|
||||
AddParameter(command, "enabled", channel.Enabled);
|
||||
AddJsonbParameter(command, "config", channel.Config);
|
||||
AddJsonbParameter(command, "credentials", channel.Credentials);
|
||||
AddJsonbParameter(command, "metadata", channel.Metadata);
|
||||
AddParameter(command, "created_by", channel.CreatedBy);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapChannel(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChannelEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, name, channel_type::text, enabled,
|
||||
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM notify.channels
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
MapChannel,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChannelEntity?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, name, channel_type::text, enabled,
|
||||
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM notify.channels
|
||||
WHERE tenant_id = @tenant_id AND name = @name
|
||||
""";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "name", name);
|
||||
},
|
||||
MapChannel,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<ChannelEntity>> GetAllAsync(
|
||||
string tenantId,
|
||||
bool? enabled = null,
|
||||
ChannelType? channelType = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = """
|
||||
SELECT id, tenant_id, name, channel_type::text, enabled,
|
||||
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM notify.channels
|
||||
WHERE tenant_id = @tenant_id
|
||||
""";
|
||||
|
||||
if (enabled.HasValue)
|
||||
{
|
||||
sql += " AND enabled = @enabled";
|
||||
}
|
||||
|
||||
if (channelType.HasValue)
|
||||
{
|
||||
sql += " AND channel_type = @channel_type::notify.channel_type";
|
||||
}
|
||||
|
||||
sql += " ORDER BY name, id LIMIT @limit OFFSET @offset";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
if (enabled.HasValue)
|
||||
{
|
||||
AddParameter(cmd, "enabled", enabled.Value);
|
||||
}
|
||||
if (channelType.HasValue)
|
||||
{
|
||||
AddParameter(cmd, "channel_type", ChannelTypeToString(channelType.Value));
|
||||
}
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapChannel,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> UpdateAsync(ChannelEntity channel, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE notify.channels
|
||||
SET name = @name,
|
||||
channel_type = @channel_type::notify.channel_type,
|
||||
enabled = @enabled,
|
||||
config = @config::jsonb,
|
||||
credentials = @credentials::jsonb,
|
||||
metadata = @metadata::jsonb
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
channel.TenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", channel.TenantId);
|
||||
AddParameter(cmd, "id", channel.Id);
|
||||
AddParameter(cmd, "name", channel.Name);
|
||||
AddParameter(cmd, "channel_type", ChannelTypeToString(channel.ChannelType));
|
||||
AddParameter(cmd, "enabled", channel.Enabled);
|
||||
AddJsonbParameter(cmd, "config", channel.Config);
|
||||
AddJsonbParameter(cmd, "credentials", channel.Credentials);
|
||||
AddJsonbParameter(cmd, "metadata", channel.Metadata);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = "DELETE FROM notify.channels WHERE tenant_id = @tenant_id AND id = @id";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<ChannelEntity>> GetEnabledByTypeAsync(
|
||||
string tenantId,
|
||||
ChannelType channelType,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT id, tenant_id, name, channel_type::text, enabled,
|
||||
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
|
||||
FROM notify.channels
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND channel_type = @channel_type::notify.channel_type
|
||||
AND enabled = TRUE
|
||||
ORDER BY name, id
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "channel_type", ChannelTypeToString(channelType));
|
||||
},
|
||||
MapChannel,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private static ChannelEntity MapChannel(NpgsqlDataReader reader) => new()
|
||||
{
|
||||
Id = reader.GetGuid(0),
|
||||
TenantId = reader.GetString(1),
|
||||
Name = reader.GetString(2),
|
||||
ChannelType = ParseChannelType(reader.GetString(3)),
|
||||
Enabled = reader.GetBoolean(4),
|
||||
Config = reader.GetString(5),
|
||||
Credentials = GetNullableString(reader, 6),
|
||||
Metadata = reader.GetString(7),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(8),
|
||||
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(9),
|
||||
CreatedBy = GetNullableString(reader, 10)
|
||||
};
|
||||
|
||||
private static string ChannelTypeToString(ChannelType channelType) => channelType switch
|
||||
{
|
||||
ChannelType.Email => "email",
|
||||
ChannelType.Slack => "slack",
|
||||
ChannelType.Teams => "teams",
|
||||
ChannelType.Webhook => "webhook",
|
||||
ChannelType.PagerDuty => "pagerduty",
|
||||
ChannelType.OpsGenie => "opsgenie",
|
||||
_ => throw new ArgumentException($"Unknown channel type: {channelType}", nameof(channelType))
|
||||
};
|
||||
|
||||
private static ChannelType ParseChannelType(string channelType) => channelType switch
|
||||
{
|
||||
"email" => ChannelType.Email,
|
||||
"slack" => ChannelType.Slack,
|
||||
"teams" => ChannelType.Teams,
|
||||
"webhook" => ChannelType.Webhook,
|
||||
"pagerduty" => ChannelType.PagerDuty,
|
||||
"opsgenie" => ChannelType.OpsGenie,
|
||||
_ => throw new ArgumentException($"Unknown channel type: {channelType}", nameof(channelType))
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,363 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
using StellaOps.Notify.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Notify.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for notification delivery operations.
|
||||
/// </summary>
|
||||
public sealed class DeliveryRepository : RepositoryBase<NotifyDataSource>, IDeliveryRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new delivery repository.
|
||||
/// </summary>
|
||||
public DeliveryRepository(NotifyDataSource dataSource, ILogger<DeliveryRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DeliveryEntity> CreateAsync(DeliveryEntity delivery, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO notify.deliveries (
|
||||
id, tenant_id, channel_id, rule_id, template_id, status, recipient,
|
||||
subject, body, event_type, event_payload, max_attempts, correlation_id
|
||||
)
|
||||
VALUES (
|
||||
@id, @tenant_id, @channel_id, @rule_id, @template_id, @status::notify.delivery_status, @recipient,
|
||||
@subject, @body, @event_type, @event_payload::jsonb, @max_attempts, @correlation_id
|
||||
)
|
||||
RETURNING *
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(delivery.TenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddDeliveryParameters(command, delivery);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return MapDelivery(reader);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DeliveryEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = "SELECT * FROM notify.deliveries WHERE tenant_id = @tenant_id AND id = @id";
|
||||
|
||||
return await QuerySingleOrDefaultAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
MapDelivery,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<DeliveryEntity>> GetPendingAsync(
|
||||
string tenantId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM notify.deliveries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND status IN ('pending', 'queued')
|
||||
AND (next_retry_at IS NULL OR next_retry_at <= NOW())
|
||||
AND attempt < max_attempts
|
||||
ORDER BY created_at, id
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "limit", limit);
|
||||
},
|
||||
MapDelivery,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<DeliveryEntity>> GetByStatusAsync(
|
||||
string tenantId,
|
||||
DeliveryStatus status,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM notify.deliveries
|
||||
WHERE tenant_id = @tenant_id AND status = @status::notify.delivery_status
|
||||
ORDER BY created_at DESC, id
|
||||
LIMIT @limit OFFSET @offset
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "status", StatusToString(status));
|
||||
AddParameter(cmd, "limit", limit);
|
||||
AddParameter(cmd, "offset", offset);
|
||||
},
|
||||
MapDelivery,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<DeliveryEntity>> GetByCorrelationIdAsync(
|
||||
string tenantId,
|
||||
string correlationId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM notify.deliveries
|
||||
WHERE tenant_id = @tenant_id AND correlation_id = @correlation_id
|
||||
ORDER BY created_at, id
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "correlation_id", correlationId);
|
||||
},
|
||||
MapDelivery,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> MarkQueuedAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE notify.deliveries
|
||||
SET status = 'queued'::notify.delivery_status,
|
||||
queued_at = NOW()
|
||||
WHERE tenant_id = @tenant_id AND id = @id AND status = 'pending'
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> MarkSentAsync(string tenantId, Guid id, string? externalId = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE notify.deliveries
|
||||
SET status = 'sent'::notify.delivery_status,
|
||||
sent_at = NOW(),
|
||||
external_id = COALESCE(@external_id, external_id)
|
||||
WHERE tenant_id = @tenant_id AND id = @id AND status IN ('queued', 'sending')
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
AddParameter(cmd, "external_id", externalId);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> MarkDeliveredAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
UPDATE notify.deliveries
|
||||
SET status = 'delivered'::notify.delivery_status,
|
||||
delivered_at = NOW()
|
||||
WHERE tenant_id = @tenant_id AND id = @id AND status = 'sent'
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> MarkFailedAsync(
|
||||
string tenantId,
|
||||
Guid id,
|
||||
string errorMessage,
|
||||
TimeSpan? retryDelay = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = """
|
||||
UPDATE notify.deliveries
|
||||
SET status = CASE
|
||||
WHEN attempt + 1 < max_attempts AND @retry_delay IS NOT NULL THEN 'pending'::notify.delivery_status
|
||||
ELSE 'failed'::notify.delivery_status
|
||||
END,
|
||||
attempt = attempt + 1,
|
||||
error_message = @error_message,
|
||||
failed_at = CASE WHEN attempt + 1 >= max_attempts OR @retry_delay IS NULL THEN NOW() ELSE failed_at END,
|
||||
next_retry_at = CASE
|
||||
WHEN attempt + 1 < max_attempts AND @retry_delay IS NOT NULL THEN NOW() + @retry_delay
|
||||
ELSE NULL
|
||||
END
|
||||
WHERE tenant_id = @tenant_id AND id = @id
|
||||
""";
|
||||
|
||||
var rows = await ExecuteAsync(
|
||||
tenantId,
|
||||
sql,
|
||||
cmd =>
|
||||
{
|
||||
AddParameter(cmd, "tenant_id", tenantId);
|
||||
AddParameter(cmd, "id", id);
|
||||
AddParameter(cmd, "error_message", errorMessage);
|
||||
AddParameter(cmd, "retry_delay", retryDelay);
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DeliveryStats> GetStatsAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset from,
|
||||
DateTimeOffset to,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||
COUNT(*) FILTER (WHERE status = 'sent') as sent,
|
||||
COUNT(*) FILTER (WHERE status = 'delivered') as delivered,
|
||||
COUNT(*) FILTER (WHERE status = 'failed') as failed,
|
||||
COUNT(*) FILTER (WHERE status = 'bounced') as bounced
|
||||
FROM notify.deliveries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND created_at >= @from
|
||||
AND created_at < @to
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "from", from);
|
||||
AddParameter(command, "to", to);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return new DeliveryStats(
|
||||
Total: reader.GetInt64(0),
|
||||
Pending: reader.GetInt64(1),
|
||||
Sent: reader.GetInt64(2),
|
||||
Delivered: reader.GetInt64(3),
|
||||
Failed: reader.GetInt64(4),
|
||||
Bounced: reader.GetInt64(5));
|
||||
}
|
||||
|
||||
private static void AddDeliveryParameters(NpgsqlCommand command, DeliveryEntity delivery)
|
||||
{
|
||||
AddParameter(command, "id", delivery.Id);
|
||||
AddParameter(command, "tenant_id", delivery.TenantId);
|
||||
AddParameter(command, "channel_id", delivery.ChannelId);
|
||||
AddParameter(command, "rule_id", delivery.RuleId);
|
||||
AddParameter(command, "template_id", delivery.TemplateId);
|
||||
AddParameter(command, "status", StatusToString(delivery.Status));
|
||||
AddParameter(command, "recipient", delivery.Recipient);
|
||||
AddParameter(command, "subject", delivery.Subject);
|
||||
AddParameter(command, "body", delivery.Body);
|
||||
AddParameter(command, "event_type", delivery.EventType);
|
||||
AddJsonbParameter(command, "event_payload", delivery.EventPayload);
|
||||
AddParameter(command, "max_attempts", delivery.MaxAttempts);
|
||||
AddParameter(command, "correlation_id", delivery.CorrelationId);
|
||||
}
|
||||
|
||||
private static DeliveryEntity MapDelivery(NpgsqlDataReader reader) => new()
|
||||
{
|
||||
Id = reader.GetGuid(reader.GetOrdinal("id")),
|
||||
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
|
||||
ChannelId = reader.GetGuid(reader.GetOrdinal("channel_id")),
|
||||
RuleId = GetNullableGuid(reader, reader.GetOrdinal("rule_id")),
|
||||
TemplateId = GetNullableGuid(reader, reader.GetOrdinal("template_id")),
|
||||
Status = ParseStatus(reader.GetString(reader.GetOrdinal("status"))),
|
||||
Recipient = reader.GetString(reader.GetOrdinal("recipient")),
|
||||
Subject = GetNullableString(reader, reader.GetOrdinal("subject")),
|
||||
Body = GetNullableString(reader, reader.GetOrdinal("body")),
|
||||
EventType = reader.GetString(reader.GetOrdinal("event_type")),
|
||||
EventPayload = reader.GetString(reader.GetOrdinal("event_payload")),
|
||||
Attempt = reader.GetInt32(reader.GetOrdinal("attempt")),
|
||||
MaxAttempts = reader.GetInt32(reader.GetOrdinal("max_attempts")),
|
||||
NextRetryAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("next_retry_at")),
|
||||
ErrorMessage = GetNullableString(reader, reader.GetOrdinal("error_message")),
|
||||
ExternalId = GetNullableString(reader, reader.GetOrdinal("external_id")),
|
||||
CorrelationId = GetNullableString(reader, reader.GetOrdinal("correlation_id")),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
|
||||
QueuedAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("queued_at")),
|
||||
SentAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("sent_at")),
|
||||
DeliveredAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("delivered_at")),
|
||||
FailedAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("failed_at"))
|
||||
};
|
||||
|
||||
private static string StatusToString(DeliveryStatus status) => status switch
|
||||
{
|
||||
DeliveryStatus.Pending => "pending",
|
||||
DeliveryStatus.Queued => "queued",
|
||||
DeliveryStatus.Sending => "sending",
|
||||
DeliveryStatus.Sent => "sent",
|
||||
DeliveryStatus.Delivered => "delivered",
|
||||
DeliveryStatus.Failed => "failed",
|
||||
DeliveryStatus.Bounced => "bounced",
|
||||
_ => throw new ArgumentException($"Unknown delivery status: {status}", nameof(status))
|
||||
};
|
||||
|
||||
private static DeliveryStatus ParseStatus(string status) => status switch
|
||||
{
|
||||
"pending" => DeliveryStatus.Pending,
|
||||
"queued" => DeliveryStatus.Queued,
|
||||
"sending" => DeliveryStatus.Sending,
|
||||
"sent" => DeliveryStatus.Sent,
|
||||
"delivered" => DeliveryStatus.Delivered,
|
||||
"failed" => DeliveryStatus.Failed,
|
||||
"bounced" => DeliveryStatus.Bounced,
|
||||
_ => throw new ArgumentException($"Unknown delivery status: {status}", nameof(status))
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
using StellaOps.Notify.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Notify.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for notification channel operations.
|
||||
/// </summary>
|
||||
public interface IChannelRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new channel.
|
||||
/// </summary>
|
||||
Task<ChannelEntity> CreateAsync(ChannelEntity channel, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a channel by ID.
|
||||
/// </summary>
|
||||
Task<ChannelEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a channel by name.
|
||||
/// </summary>
|
||||
Task<ChannelEntity?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all channels for a tenant.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<ChannelEntity>> GetAllAsync(
|
||||
string tenantId,
|
||||
bool? enabled = null,
|
||||
ChannelType? channelType = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a channel.
|
||||
/// </summary>
|
||||
Task<bool> UpdateAsync(ChannelEntity channel, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a channel.
|
||||
/// </summary>
|
||||
Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets enabled channels by type.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<ChannelEntity>> GetEnabledByTypeAsync(
|
||||
string tenantId,
|
||||
ChannelType channelType,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
using StellaOps.Notify.Storage.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Notify.Storage.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for notification delivery operations.
|
||||
/// </summary>
|
||||
public interface IDeliveryRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new delivery.
|
||||
/// </summary>
|
||||
Task<DeliveryEntity> CreateAsync(DeliveryEntity delivery, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a delivery by ID.
|
||||
/// </summary>
|
||||
Task<DeliveryEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets pending deliveries ready to send.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DeliveryEntity>> GetPendingAsync(
|
||||
string tenantId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets deliveries by status.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DeliveryEntity>> GetByStatusAsync(
|
||||
string tenantId,
|
||||
DeliveryStatus status,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets deliveries by correlation ID.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DeliveryEntity>> GetByCorrelationIdAsync(
|
||||
string tenantId,
|
||||
string correlationId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Marks a delivery as queued.
|
||||
/// </summary>
|
||||
Task<bool> MarkQueuedAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Marks a delivery as sent.
|
||||
/// </summary>
|
||||
Task<bool> MarkSentAsync(string tenantId, Guid id, string? externalId = null, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Marks a delivery as delivered.
|
||||
/// </summary>
|
||||
Task<bool> MarkDeliveredAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Marks a delivery as failed with retry scheduling.
|
||||
/// </summary>
|
||||
Task<bool> MarkFailedAsync(
|
||||
string tenantId,
|
||||
Guid id,
|
||||
string errorMessage,
|
||||
TimeSpan? retryDelay = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets delivery statistics for a time range.
|
||||
/// </summary>
|
||||
Task<DeliveryStats> GetStatsAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset from,
|
||||
DateTimeOffset to,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Delivery statistics.
|
||||
/// </summary>
|
||||
public sealed record DeliveryStats(
|
||||
long Total,
|
||||
long Pending,
|
||||
long Sent,
|
||||
long Delivered,
|
||||
long Failed,
|
||||
long Bounced);
|
||||
@@ -0,0 +1,55 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Infrastructure.Postgres;
|
||||
using StellaOps.Infrastructure.Postgres.Options;
|
||||
using StellaOps.Notify.Storage.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Notify.Storage.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for configuring Notify PostgreSQL storage services.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds Notify PostgreSQL storage services.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configuration">Configuration root.</param>
|
||||
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddNotifyPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration,
|
||||
string sectionName = "Postgres:Notify")
|
||||
{
|
||||
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
|
||||
services.AddSingleton<NotifyDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<IChannelRepository, ChannelRepository>();
|
||||
services.AddScoped<IDeliveryRepository, DeliveryRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds Notify PostgreSQL storage services with explicit options.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="configureOptions">Options configuration action.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddNotifyPostgresStorage(
|
||||
this IServiceCollection services,
|
||||
Action<PostgresOptions> configureOptions)
|
||||
{
|
||||
services.Configure(configureOptions);
|
||||
services.AddSingleton<NotifyDataSource>();
|
||||
|
||||
// Register repositories
|
||||
services.AddScoped<IChannelRepository, ChannelRepository>();
|
||||
services.AddScoped<IDeliveryRepository, DeliveryRepository>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" ?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Notify.Storage.Postgres</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,583 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.Backfill;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for the backfill manager.
|
||||
/// </summary>
|
||||
public sealed record BackfillManagerOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Maximum number of events allowed in a single backfill request.
|
||||
/// </summary>
|
||||
public long MaxEventsPerBackfill { get; init; } = 1_000_000;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum duration allowed for a backfill operation.
|
||||
/// </summary>
|
||||
public TimeSpan MaxBackfillDuration { get; init; } = TimeSpan.FromHours(24);
|
||||
|
||||
/// <summary>
|
||||
/// Data retention period - backfills cannot extend beyond this.
|
||||
/// </summary>
|
||||
public TimeSpan RetentionPeriod { get; init; } = TimeSpan.FromDays(90);
|
||||
|
||||
/// <summary>
|
||||
/// Default TTL for processed event records.
|
||||
/// </summary>
|
||||
public TimeSpan DefaultProcessedEventTtl { get; init; } = TimeSpan.FromDays(30);
|
||||
|
||||
/// <summary>
|
||||
/// Number of sample event keys to include in previews.
|
||||
/// </summary>
|
||||
public int PreviewSampleSize { get; init; } = 10;
|
||||
|
||||
/// <summary>
|
||||
/// Estimated events per second for duration estimation.
|
||||
/// </summary>
|
||||
public double EstimatedEventsPerSecond { get; init; } = 100;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coordinates backfill operations with safety validations.
|
||||
/// </summary>
|
||||
public interface IBackfillManager
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new backfill request with validation.
|
||||
/// </summary>
|
||||
Task<BackfillRequest> CreateRequestAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
string reason,
|
||||
string createdBy,
|
||||
int batchSize = 100,
|
||||
bool dryRun = false,
|
||||
bool forceReprocess = false,
|
||||
string? ticket = null,
|
||||
TimeSpan? maxDuration = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Validates a backfill request and runs safety checks.
|
||||
/// </summary>
|
||||
Task<BackfillRequest> ValidateRequestAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Generates a preview of what a backfill would process (dry-run).
|
||||
/// </summary>
|
||||
Task<BackfillPreview> PreviewAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
int batchSize = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Starts execution of a validated backfill request.
|
||||
/// </summary>
|
||||
Task<BackfillRequest> StartAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Pauses a running backfill.
|
||||
/// </summary>
|
||||
Task<BackfillRequest> PauseAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Resumes a paused backfill.
|
||||
/// </summary>
|
||||
Task<BackfillRequest> ResumeAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Cancels a backfill request.
|
||||
/// </summary>
|
||||
Task<BackfillRequest> CancelAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current status of a backfill request.
|
||||
/// </summary>
|
||||
Task<BackfillRequest?> GetStatusAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists backfill requests with filters.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<BackfillRequest>> ListAsync(
|
||||
string tenantId,
|
||||
BackfillStatus? status = null,
|
||||
Guid? sourceId = null,
|
||||
string? jobType = null,
|
||||
int limit = 50,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Provides event counting for backfill estimation.
|
||||
/// </summary>
|
||||
public interface IBackfillEventCounter
|
||||
{
|
||||
/// <summary>
|
||||
/// Estimates the number of events in a time window.
|
||||
/// </summary>
|
||||
Task<long> EstimateEventCountAsync(
|
||||
string tenantId,
|
||||
string scopeKey,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets sample event keys from a time window.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<string>> GetSampleEventKeysAsync(
|
||||
string tenantId,
|
||||
string scopeKey,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
int sampleSize,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Validates backfill safety conditions.
|
||||
/// </summary>
|
||||
public interface IBackfillSafetyValidator
|
||||
{
|
||||
/// <summary>
|
||||
/// Runs all safety validations for a backfill request.
|
||||
/// </summary>
|
||||
Task<BackfillSafetyChecks> ValidateAsync(
|
||||
BackfillRequest request,
|
||||
long estimatedEvents,
|
||||
TimeSpan estimatedDuration,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of backfill safety validator.
|
||||
/// </summary>
|
||||
public sealed class DefaultBackfillSafetyValidator : IBackfillSafetyValidator
|
||||
{
|
||||
private readonly ISourceValidator _sourceValidator;
|
||||
private readonly IOverlapChecker _overlapChecker;
|
||||
private readonly BackfillManagerOptions _options;
|
||||
|
||||
public DefaultBackfillSafetyValidator(
|
||||
ISourceValidator sourceValidator,
|
||||
IOverlapChecker overlapChecker,
|
||||
BackfillManagerOptions options)
|
||||
{
|
||||
_sourceValidator = sourceValidator;
|
||||
_overlapChecker = overlapChecker;
|
||||
_options = options;
|
||||
}
|
||||
|
||||
public async Task<BackfillSafetyChecks> ValidateAsync(
|
||||
BackfillRequest request,
|
||||
long estimatedEvents,
|
||||
TimeSpan estimatedDuration,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var warnings = new List<string>();
|
||||
var errors = new List<string>();
|
||||
|
||||
// Check source exists
|
||||
var sourceExists = true;
|
||||
if (request.SourceId.HasValue)
|
||||
{
|
||||
sourceExists = await _sourceValidator.ExistsAsync(
|
||||
request.TenantId, request.SourceId.Value, cancellationToken);
|
||||
if (!sourceExists)
|
||||
{
|
||||
errors.Add($"Source {request.SourceId} not found.");
|
||||
}
|
||||
}
|
||||
|
||||
// Check for overlapping backfills
|
||||
var hasOverlap = await _overlapChecker.HasOverlapAsync(
|
||||
request.TenantId,
|
||||
request.ScopeKey,
|
||||
request.WindowStart,
|
||||
request.WindowEnd,
|
||||
request.BackfillId,
|
||||
cancellationToken);
|
||||
if (hasOverlap)
|
||||
{
|
||||
errors.Add("An active backfill already exists for this scope and time window.");
|
||||
}
|
||||
|
||||
// Check retention period
|
||||
var retentionLimit = DateTimeOffset.UtcNow - _options.RetentionPeriod;
|
||||
var withinRetention = request.WindowStart >= retentionLimit;
|
||||
if (!withinRetention)
|
||||
{
|
||||
errors.Add($"Window start {request.WindowStart:O} is beyond the retention period ({_options.RetentionPeriod.TotalDays} days).");
|
||||
}
|
||||
|
||||
// Check event limit
|
||||
var withinEventLimit = estimatedEvents <= _options.MaxEventsPerBackfill;
|
||||
if (!withinEventLimit)
|
||||
{
|
||||
errors.Add($"Estimated {estimatedEvents:N0} events exceeds maximum allowed ({_options.MaxEventsPerBackfill:N0}).");
|
||||
}
|
||||
else if (estimatedEvents > _options.MaxEventsPerBackfill * 0.8)
|
||||
{
|
||||
warnings.Add($"Estimated {estimatedEvents:N0} events is approaching the maximum limit.");
|
||||
}
|
||||
|
||||
// Check duration limit
|
||||
var maxDuration = request.MaxDuration ?? _options.MaxBackfillDuration;
|
||||
var withinDurationLimit = estimatedDuration <= maxDuration;
|
||||
if (!withinDurationLimit)
|
||||
{
|
||||
errors.Add($"Estimated duration {estimatedDuration} exceeds maximum allowed ({maxDuration}).");
|
||||
}
|
||||
|
||||
// Check quota availability (placeholder - always true for now)
|
||||
var quotaAvailable = true;
|
||||
|
||||
// Add warnings for large backfills
|
||||
if (request.WindowDuration > TimeSpan.FromDays(7))
|
||||
{
|
||||
warnings.Add("Large time window may take significant time to process.");
|
||||
}
|
||||
|
||||
if (request.ForceReprocess)
|
||||
{
|
||||
warnings.Add("Force reprocess is enabled - events will be processed even if already seen.");
|
||||
}
|
||||
|
||||
return new BackfillSafetyChecks(
|
||||
SourceExists: sourceExists,
|
||||
HasOverlappingBackfill: hasOverlap,
|
||||
WithinRetention: withinRetention,
|
||||
WithinEventLimit: withinEventLimit,
|
||||
WithinDurationLimit: withinDurationLimit,
|
||||
QuotaAvailable: quotaAvailable,
|
||||
Warnings: warnings,
|
||||
Errors: errors);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Validates that a source exists.
|
||||
/// </summary>
|
||||
public interface ISourceValidator
|
||||
{
|
||||
/// <summary>
|
||||
/// Checks if a source exists.
|
||||
/// </summary>
|
||||
Task<bool> ExistsAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks for overlapping backfill operations.
|
||||
/// </summary>
|
||||
public interface IOverlapChecker
|
||||
{
|
||||
/// <summary>
|
||||
/// Checks if there's an overlapping active backfill.
|
||||
/// </summary>
|
||||
Task<bool> HasOverlapAsync(
|
||||
string tenantId,
|
||||
string scopeKey,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
Guid? excludeBackfillId,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of the backfill manager.
|
||||
/// </summary>
|
||||
public sealed class BackfillManager : IBackfillManager
|
||||
{
|
||||
private readonly IBackfillRepository _backfillRepository;
|
||||
private readonly IBackfillSafetyValidator _safetyValidator;
|
||||
private readonly IBackfillEventCounter _eventCounter;
|
||||
private readonly IDuplicateSuppressor _duplicateSuppressor;
|
||||
private readonly BackfillManagerOptions _options;
|
||||
private readonly ILogger<BackfillManager> _logger;
|
||||
|
||||
public BackfillManager(
|
||||
IBackfillRepository backfillRepository,
|
||||
IBackfillSafetyValidator safetyValidator,
|
||||
IBackfillEventCounter eventCounter,
|
||||
IDuplicateSuppressor duplicateSuppressor,
|
||||
BackfillManagerOptions options,
|
||||
ILogger<BackfillManager> logger)
|
||||
{
|
||||
_backfillRepository = backfillRepository;
|
||||
_safetyValidator = safetyValidator;
|
||||
_eventCounter = eventCounter;
|
||||
_duplicateSuppressor = duplicateSuppressor;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest> CreateRequestAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
string reason,
|
||||
string createdBy,
|
||||
int batchSize = 100,
|
||||
bool dryRun = false,
|
||||
bool forceReprocess = false,
|
||||
string? ticket = null,
|
||||
TimeSpan? maxDuration = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var request = BackfillRequest.Create(
|
||||
tenantId: tenantId,
|
||||
sourceId: sourceId,
|
||||
jobType: jobType,
|
||||
windowStart: windowStart,
|
||||
windowEnd: windowEnd,
|
||||
reason: reason,
|
||||
createdBy: createdBy,
|
||||
batchSize: batchSize,
|
||||
dryRun: dryRun,
|
||||
forceReprocess: forceReprocess,
|
||||
ticket: ticket,
|
||||
maxDuration: maxDuration);
|
||||
|
||||
await _backfillRepository.CreateAsync(request, cancellationToken);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Created backfill request {BackfillId} for scope {ScopeKey} from {WindowStart} to {WindowEnd}",
|
||||
request.BackfillId, request.ScopeKey, windowStart, windowEnd);
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest> ValidateRequestAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
|
||||
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
|
||||
|
||||
request = request.StartValidation(updatedBy);
|
||||
await _backfillRepository.UpdateAsync(request, cancellationToken);
|
||||
|
||||
// Estimate event count
|
||||
var estimatedEvents = await _eventCounter.EstimateEventCountAsync(
|
||||
tenantId, request.ScopeKey, request.WindowStart, request.WindowEnd, cancellationToken);
|
||||
|
||||
// Calculate estimated duration
|
||||
var estimatedDuration = TimeSpan.FromSeconds(estimatedEvents / _options.EstimatedEventsPerSecond);
|
||||
|
||||
// Run safety validations
|
||||
var safetyChecks = await _safetyValidator.ValidateAsync(
|
||||
request, estimatedEvents, estimatedDuration, cancellationToken);
|
||||
|
||||
request = request.WithSafetyChecks(safetyChecks, estimatedEvents, estimatedDuration, updatedBy);
|
||||
await _backfillRepository.UpdateAsync(request, cancellationToken);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Validated backfill request {BackfillId}: {EstimatedEvents} events, safe={IsSafe}",
|
||||
backfillId, estimatedEvents, safetyChecks.IsSafe);
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
public async Task<BackfillPreview> PreviewAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
int batchSize = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var scopeKey = GetScopeKey(sourceId, jobType);
|
||||
|
||||
// Estimate total events
|
||||
var estimatedEvents = await _eventCounter.EstimateEventCountAsync(
|
||||
tenantId, scopeKey, windowStart, windowEnd, cancellationToken);
|
||||
|
||||
// Get already processed count
|
||||
var processedCount = await _duplicateSuppressor.CountProcessedAsync(
|
||||
scopeKey, windowStart, windowEnd, cancellationToken);
|
||||
|
||||
// Get sample event keys
|
||||
var sampleKeys = await _eventCounter.GetSampleEventKeysAsync(
|
||||
tenantId, scopeKey, windowStart, windowEnd, _options.PreviewSampleSize, cancellationToken);
|
||||
|
||||
// Calculate estimates
|
||||
var processableEvents = Math.Max(0, estimatedEvents - processedCount);
|
||||
var estimatedDuration = TimeSpan.FromSeconds(processableEvents / _options.EstimatedEventsPerSecond);
|
||||
var estimatedBatches = (int)Math.Ceiling((double)processableEvents / batchSize);
|
||||
|
||||
// Run safety checks
|
||||
var tempRequest = BackfillRequest.Create(
|
||||
tenantId, sourceId, jobType, windowStart, windowEnd,
|
||||
"preview", "system", batchSize);
|
||||
|
||||
var safetyChecks = await _safetyValidator.ValidateAsync(
|
||||
tempRequest, estimatedEvents, estimatedDuration, cancellationToken);
|
||||
|
||||
return new BackfillPreview(
|
||||
ScopeKey: scopeKey,
|
||||
WindowStart: windowStart,
|
||||
WindowEnd: windowEnd,
|
||||
EstimatedEvents: estimatedEvents,
|
||||
SkippedEvents: processedCount,
|
||||
ProcessableEvents: processableEvents,
|
||||
EstimatedDuration: estimatedDuration,
|
||||
EstimatedBatches: estimatedBatches,
|
||||
SafetyChecks: safetyChecks,
|
||||
SampleEventKeys: sampleKeys);
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest> StartAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
|
||||
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
|
||||
|
||||
request = request.Start(updatedBy);
|
||||
await _backfillRepository.UpdateAsync(request, cancellationToken);
|
||||
|
||||
_logger.LogInformation("Started backfill request {BackfillId}", backfillId);
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest> PauseAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
|
||||
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
|
||||
|
||||
request = request.Pause(updatedBy);
|
||||
await _backfillRepository.UpdateAsync(request, cancellationToken);
|
||||
|
||||
_logger.LogInformation("Paused backfill request {BackfillId}", backfillId);
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest> ResumeAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
|
||||
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
|
||||
|
||||
request = request.Resume(updatedBy);
|
||||
await _backfillRepository.UpdateAsync(request, cancellationToken);
|
||||
|
||||
_logger.LogInformation("Resumed backfill request {BackfillId}", backfillId);
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest> CancelAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
|
||||
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
|
||||
|
||||
request = request.Cancel(updatedBy);
|
||||
await _backfillRepository.UpdateAsync(request, cancellationToken);
|
||||
|
||||
_logger.LogInformation("Canceled backfill request {BackfillId}", backfillId);
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
public Task<BackfillRequest?> GetStatusAsync(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
return _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken);
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<BackfillRequest>> ListAsync(
|
||||
string tenantId,
|
||||
BackfillStatus? status = null,
|
||||
Guid? sourceId = null,
|
||||
string? jobType = null,
|
||||
int limit = 50,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
return _backfillRepository.ListAsync(tenantId, status, sourceId, jobType, limit, offset, cancellationToken);
|
||||
}
|
||||
|
||||
private static string GetScopeKey(Guid? sourceId, string? jobType)
|
||||
{
|
||||
return (sourceId, jobType) switch
|
||||
{
|
||||
(Guid s, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(s, j),
|
||||
(Guid s, _) => Watermark.CreateScopeKey(s),
|
||||
(_, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(j),
|
||||
_ => throw new ArgumentException("Either sourceId or jobType must be specified.")
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for backfill persistence (imported for convenience).
|
||||
/// </summary>
|
||||
public interface IBackfillRepository
|
||||
{
|
||||
Task<BackfillRequest?> GetByIdAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
|
||||
Task CreateAsync(BackfillRequest request, CancellationToken cancellationToken);
|
||||
Task UpdateAsync(BackfillRequest request, CancellationToken cancellationToken);
|
||||
Task<IReadOnlyList<BackfillRequest>> ListAsync(
|
||||
string tenantId,
|
||||
BackfillStatus? status,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,318 @@
|
||||
namespace StellaOps.Orchestrator.Core.Backfill;
|
||||
|
||||
/// <summary>
|
||||
/// Tracks processed events for duplicate suppression.
|
||||
/// </summary>
|
||||
public interface IDuplicateSuppressor
|
||||
{
|
||||
/// <summary>
|
||||
/// Checks if an event has already been processed.
|
||||
/// </summary>
|
||||
/// <param name="scopeKey">Scope identifier.</param>
|
||||
/// <param name="eventKey">Unique event identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if the event was already processed.</returns>
|
||||
Task<bool> HasProcessedAsync(string scopeKey, string eventKey, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Checks multiple events for duplicate status.
|
||||
/// </summary>
|
||||
/// <param name="scopeKey">Scope identifier.</param>
|
||||
/// <param name="eventKeys">Event identifiers to check.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Set of event keys that have already been processed.</returns>
|
||||
Task<IReadOnlySet<string>> GetProcessedAsync(string scopeKey, IEnumerable<string> eventKeys, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Marks an event as processed.
|
||||
/// </summary>
|
||||
/// <param name="scopeKey">Scope identifier.</param>
|
||||
/// <param name="eventKey">Unique event identifier.</param>
|
||||
/// <param name="eventTime">Event timestamp.</param>
|
||||
/// <param name="batchId">Optional batch/backfill identifier.</param>
|
||||
/// <param name="ttl">Time-to-live for the record.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task MarkProcessedAsync(
|
||||
string scopeKey,
|
||||
string eventKey,
|
||||
DateTimeOffset eventTime,
|
||||
Guid? batchId,
|
||||
TimeSpan ttl,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Marks multiple events as processed.
|
||||
/// </summary>
|
||||
/// <param name="scopeKey">Scope identifier.</param>
|
||||
/// <param name="events">Events to mark as processed.</param>
|
||||
/// <param name="batchId">Optional batch/backfill identifier.</param>
|
||||
/// <param name="ttl">Time-to-live for the records.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task MarkProcessedBatchAsync(
|
||||
string scopeKey,
|
||||
IEnumerable<ProcessedEvent> events,
|
||||
Guid? batchId,
|
||||
TimeSpan ttl,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Counts processed events within a time range.
|
||||
/// </summary>
|
||||
/// <param name="scopeKey">Scope identifier.</param>
|
||||
/// <param name="from">Start of time range.</param>
|
||||
/// <param name="to">End of time range.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Count of processed events.</returns>
|
||||
Task<long> CountProcessedAsync(string scopeKey, DateTimeOffset from, DateTimeOffset to, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Removes expired records (cleanup).
|
||||
/// </summary>
|
||||
/// <param name="batchLimit">Maximum records to remove per call.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Number of records removed.</returns>
|
||||
Task<int> CleanupExpiredAsync(int batchLimit, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Event data for duplicate tracking.
|
||||
/// </summary>
|
||||
public sealed record ProcessedEvent(
|
||||
/// <summary>Unique event identifier.</summary>
|
||||
string EventKey,
|
||||
|
||||
/// <summary>Event timestamp.</summary>
|
||||
DateTimeOffset EventTime);
|
||||
|
||||
/// <summary>
|
||||
/// In-memory duplicate suppressor for testing.
|
||||
/// </summary>
|
||||
public sealed class InMemoryDuplicateSuppressor : IDuplicateSuppressor
|
||||
{
|
||||
private readonly Dictionary<string, Dictionary<string, ProcessedEventEntry>> _store = new();
|
||||
private readonly object _lock = new();
|
||||
|
||||
private sealed record ProcessedEventEntry(
|
||||
DateTimeOffset EventTime,
|
||||
DateTimeOffset ProcessedAt,
|
||||
Guid? BatchId,
|
||||
DateTimeOffset ExpiresAt);
|
||||
|
||||
public Task<bool> HasProcessedAsync(string scopeKey, string eventKey, CancellationToken cancellationToken)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_store.TryGetValue(scopeKey, out var scopeStore))
|
||||
return Task.FromResult(false);
|
||||
|
||||
if (!scopeStore.TryGetValue(eventKey, out var entry))
|
||||
return Task.FromResult(false);
|
||||
|
||||
// Check if expired
|
||||
if (entry.ExpiresAt < DateTimeOffset.UtcNow)
|
||||
{
|
||||
scopeStore.Remove(eventKey);
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
}
|
||||
|
||||
public Task<IReadOnlySet<string>> GetProcessedAsync(string scopeKey, IEnumerable<string> eventKeys, CancellationToken cancellationToken)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var result = new HashSet<string>();
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_store.TryGetValue(scopeKey, out var scopeStore))
|
||||
return Task.FromResult<IReadOnlySet<string>>(result);
|
||||
|
||||
foreach (var eventKey in eventKeys)
|
||||
{
|
||||
if (scopeStore.TryGetValue(eventKey, out var entry) && entry.ExpiresAt >= now)
|
||||
{
|
||||
result.Add(eventKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult<IReadOnlySet<string>>(result);
|
||||
}
|
||||
|
||||
public Task MarkProcessedAsync(
|
||||
string scopeKey,
|
||||
string eventKey,
|
||||
DateTimeOffset eventTime,
|
||||
Guid? batchId,
|
||||
TimeSpan ttl,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var entry = new ProcessedEventEntry(eventTime, now, batchId, now + ttl);
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_store.TryGetValue(scopeKey, out var scopeStore))
|
||||
{
|
||||
scopeStore = new Dictionary<string, ProcessedEventEntry>();
|
||||
_store[scopeKey] = scopeStore;
|
||||
}
|
||||
|
||||
scopeStore[eventKey] = entry;
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task MarkProcessedBatchAsync(
|
||||
string scopeKey,
|
||||
IEnumerable<ProcessedEvent> events,
|
||||
Guid? batchId,
|
||||
TimeSpan ttl,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var expiresAt = now + ttl;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_store.TryGetValue(scopeKey, out var scopeStore))
|
||||
{
|
||||
scopeStore = new Dictionary<string, ProcessedEventEntry>();
|
||||
_store[scopeKey] = scopeStore;
|
||||
}
|
||||
|
||||
foreach (var evt in events)
|
||||
{
|
||||
scopeStore[evt.EventKey] = new ProcessedEventEntry(evt.EventTime, now, batchId, expiresAt);
|
||||
}
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task<long> CountProcessedAsync(string scopeKey, DateTimeOffset from, DateTimeOffset to, CancellationToken cancellationToken)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
long count = 0;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_store.TryGetValue(scopeKey, out var scopeStore))
|
||||
{
|
||||
count = scopeStore.Values
|
||||
.Count(e => e.ExpiresAt >= now && e.EventTime >= from && e.EventTime < to);
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult(count);
|
||||
}
|
||||
|
||||
public Task<int> CleanupExpiredAsync(int batchLimit, CancellationToken cancellationToken)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var removed = 0;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
foreach (var scopeStore in _store.Values)
|
||||
{
|
||||
var expiredKeys = scopeStore
|
||||
.Where(kvp => kvp.Value.ExpiresAt < now)
|
||||
.Take(batchLimit - removed)
|
||||
.Select(kvp => kvp.Key)
|
||||
.ToList();
|
||||
|
||||
foreach (var key in expiredKeys)
|
||||
{
|
||||
scopeStore.Remove(key);
|
||||
removed++;
|
||||
}
|
||||
|
||||
if (removed >= batchLimit)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult(removed);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of filtering events through duplicate suppression.
|
||||
/// </summary>
|
||||
public sealed record DuplicateFilterResult<T>(
|
||||
/// <summary>Events that should be processed (not duplicates).</summary>
|
||||
IReadOnlyList<T> ToProcess,
|
||||
|
||||
/// <summary>Events that were filtered as duplicates.</summary>
|
||||
IReadOnlyList<T> Duplicates,
|
||||
|
||||
/// <summary>Total events evaluated.</summary>
|
||||
int Total)
|
||||
{
|
||||
/// <summary>
|
||||
/// Number of events that passed filtering.
|
||||
/// </summary>
|
||||
public int ProcessCount => ToProcess.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Number of duplicates filtered.
|
||||
/// </summary>
|
||||
public int DuplicateCount => Duplicates.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Duplicate percentage.
|
||||
/// </summary>
|
||||
public double DuplicatePercent => Total > 0 ? Math.Round((double)DuplicateCount / Total * 100, 2) : 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Helper methods for duplicate suppression.
|
||||
/// </summary>
|
||||
public static class DuplicateSuppressorExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Filters a batch of events, removing duplicates.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Event type.</typeparam>
|
||||
/// <param name="suppressor">Duplicate suppressor.</param>
|
||||
/// <param name="scopeKey">Scope identifier.</param>
|
||||
/// <param name="events">Events to filter.</param>
|
||||
/// <param name="keySelector">Function to extract event key.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Filter result with events to process and duplicates.</returns>
|
||||
public static async Task<DuplicateFilterResult<T>> FilterAsync<T>(
|
||||
this IDuplicateSuppressor suppressor,
|
||||
string scopeKey,
|
||||
IReadOnlyList<T> events,
|
||||
Func<T, string> keySelector,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (events.Count == 0)
|
||||
return new DuplicateFilterResult<T>([], [], 0);
|
||||
|
||||
var eventKeys = events.Select(keySelector).ToList();
|
||||
var processed = await suppressor.GetProcessedAsync(scopeKey, eventKeys, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var toProcess = new List<T>();
|
||||
var duplicates = new List<T>();
|
||||
|
||||
foreach (var evt in events)
|
||||
{
|
||||
var key = keySelector(evt);
|
||||
if (processed.Contains(key))
|
||||
{
|
||||
duplicates.Add(evt);
|
||||
}
|
||||
else
|
||||
{
|
||||
toProcess.Add(evt);
|
||||
}
|
||||
}
|
||||
|
||||
return new DuplicateFilterResult<T>(toProcess, duplicates, events.Count);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
namespace StellaOps.Orchestrator.Core.Backfill;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an event-time window for batch processing.
|
||||
/// </summary>
|
||||
public sealed record EventTimeWindow(
|
||||
/// <summary>Start of the window (inclusive).</summary>
|
||||
DateTimeOffset Start,
|
||||
|
||||
/// <summary>End of the window (exclusive).</summary>
|
||||
DateTimeOffset End)
|
||||
{
|
||||
/// <summary>
|
||||
/// Duration of the window.
|
||||
/// </summary>
|
||||
public TimeSpan Duration => End - Start;
|
||||
|
||||
/// <summary>
|
||||
/// Whether the window is empty (zero duration).
|
||||
/// </summary>
|
||||
public bool IsEmpty => End <= Start;
|
||||
|
||||
/// <summary>
|
||||
/// Whether a timestamp falls within this window.
|
||||
/// </summary>
|
||||
public bool Contains(DateTimeOffset timestamp) => timestamp >= Start && timestamp < End;
|
||||
|
||||
/// <summary>
|
||||
/// Whether this window overlaps with another.
|
||||
/// </summary>
|
||||
public bool Overlaps(EventTimeWindow other) =>
|
||||
Start < other.End && End > other.Start;
|
||||
|
||||
/// <summary>
|
||||
/// Creates the intersection of two windows.
|
||||
/// </summary>
|
||||
public EventTimeWindow? Intersect(EventTimeWindow other)
|
||||
{
|
||||
var newStart = Start > other.Start ? Start : other.Start;
|
||||
var newEnd = End < other.End ? End : other.End;
|
||||
|
||||
return newEnd > newStart ? new EventTimeWindow(newStart, newEnd) : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Splits the window into batches of the specified duration.
|
||||
/// </summary>
|
||||
public IEnumerable<EventTimeWindow> Split(TimeSpan batchDuration)
|
||||
{
|
||||
if (batchDuration <= TimeSpan.Zero)
|
||||
throw new ArgumentOutOfRangeException(nameof(batchDuration), "Batch duration must be positive.");
|
||||
|
||||
var current = Start;
|
||||
while (current < End)
|
||||
{
|
||||
var batchEnd = current + batchDuration;
|
||||
if (batchEnd > End)
|
||||
batchEnd = End;
|
||||
|
||||
yield return new EventTimeWindow(current, batchEnd);
|
||||
current = batchEnd;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a window from a duration ending at the specified time.
|
||||
/// </summary>
|
||||
public static EventTimeWindow FromDuration(DateTimeOffset end, TimeSpan duration) =>
|
||||
new(end - duration, end);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a window covering the last N hours from now.
|
||||
/// </summary>
|
||||
public static EventTimeWindow LastHours(int hours, DateTimeOffset? now = null)
|
||||
{
|
||||
var endTime = now ?? DateTimeOffset.UtcNow;
|
||||
return FromDuration(endTime, TimeSpan.FromHours(hours));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a window covering the last N days from now.
|
||||
/// </summary>
|
||||
public static EventTimeWindow LastDays(int days, DateTimeOffset? now = null)
|
||||
{
|
||||
var endTime = now ?? DateTimeOffset.UtcNow;
|
||||
return FromDuration(endTime, TimeSpan.FromDays(days));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for event-time window computation.
|
||||
/// </summary>
|
||||
public sealed record EventTimeWindowOptions(
|
||||
/// <summary>Minimum window size (prevents too-small batches).</summary>
|
||||
TimeSpan MinWindowSize,
|
||||
|
||||
/// <summary>Maximum window size (prevents too-large batches).</summary>
|
||||
TimeSpan MaxWindowSize,
|
||||
|
||||
/// <summary>Overlap with previous window for late-arriving events.</summary>
|
||||
TimeSpan OverlapDuration,
|
||||
|
||||
/// <summary>Maximum lag allowed before triggering alerts.</summary>
|
||||
TimeSpan MaxLag,
|
||||
|
||||
/// <summary>Default lookback for initial fetch when no watermark exists.</summary>
|
||||
TimeSpan InitialLookback)
|
||||
{
|
||||
/// <summary>
|
||||
/// Default options for hourly batching.
|
||||
/// </summary>
|
||||
public static EventTimeWindowOptions HourlyBatches => new(
|
||||
MinWindowSize: TimeSpan.FromMinutes(5),
|
||||
MaxWindowSize: TimeSpan.FromHours(1),
|
||||
OverlapDuration: TimeSpan.FromMinutes(5),
|
||||
MaxLag: TimeSpan.FromHours(2),
|
||||
InitialLookback: TimeSpan.FromDays(7));
|
||||
|
||||
/// <summary>
|
||||
/// Default options for daily batching.
|
||||
/// </summary>
|
||||
public static EventTimeWindowOptions DailyBatches => new(
|
||||
MinWindowSize: TimeSpan.FromHours(1),
|
||||
MaxWindowSize: TimeSpan.FromDays(1),
|
||||
OverlapDuration: TimeSpan.FromHours(1),
|
||||
MaxLag: TimeSpan.FromDays(1),
|
||||
InitialLookback: TimeSpan.FromDays(30));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes event-time windows for incremental processing.
|
||||
/// </summary>
|
||||
public static class EventTimeWindowPlanner
|
||||
{
|
||||
/// <summary>
|
||||
/// Computes the next window to process based on current watermark.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <param name="highWatermark">Current high watermark (null for initial fetch).</param>
|
||||
/// <param name="options">Window configuration options.</param>
|
||||
/// <returns>The next window to process, or null if caught up.</returns>
|
||||
public static EventTimeWindow? GetNextWindow(
|
||||
DateTimeOffset now,
|
||||
DateTimeOffset? highWatermark,
|
||||
EventTimeWindowOptions options)
|
||||
{
|
||||
DateTimeOffset windowStart;
|
||||
|
||||
if (highWatermark is null)
|
||||
{
|
||||
// Initial fetch: start from initial lookback
|
||||
windowStart = now - options.InitialLookback;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Incremental fetch: start from watermark minus overlap
|
||||
windowStart = highWatermark.Value - options.OverlapDuration;
|
||||
|
||||
// If we're caught up (watermark + min window > now), no work needed
|
||||
if (highWatermark.Value + options.MinWindowSize > now)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate window end (at most now, at most max window from start)
|
||||
var windowEnd = windowStart + options.MaxWindowSize;
|
||||
if (windowEnd > now)
|
||||
{
|
||||
windowEnd = now;
|
||||
}
|
||||
|
||||
// Ensure minimum window size
|
||||
if (windowEnd - windowStart < options.MinWindowSize)
|
||||
{
|
||||
// If window would be too small, extend end (but not past now)
|
||||
windowEnd = windowStart + options.MinWindowSize;
|
||||
if (windowEnd > now)
|
||||
{
|
||||
return null; // Not enough data accumulated yet
|
||||
}
|
||||
}
|
||||
|
||||
return new EventTimeWindow(windowStart, windowEnd);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Calculates the current lag from the high watermark.
|
||||
/// </summary>
|
||||
public static TimeSpan CalculateLag(DateTimeOffset now, DateTimeOffset highWatermark) =>
|
||||
now - highWatermark;
|
||||
|
||||
/// <summary>
|
||||
/// Determines if the lag exceeds the maximum allowed.
|
||||
/// </summary>
|
||||
public static bool IsLagging(DateTimeOffset now, DateTimeOffset highWatermark, EventTimeWindowOptions options) =>
|
||||
CalculateLag(now, highWatermark) > options.MaxLag;
|
||||
|
||||
/// <summary>
|
||||
/// Estimates the number of windows needed to catch up.
|
||||
/// </summary>
|
||||
public static int EstimateWindowsToProcess(
|
||||
DateTimeOffset now,
|
||||
DateTimeOffset? highWatermark,
|
||||
EventTimeWindowOptions options)
|
||||
{
|
||||
if (highWatermark is null)
|
||||
{
|
||||
// Initial fetch
|
||||
var totalDuration = options.InitialLookback;
|
||||
return (int)Math.Ceiling(totalDuration / options.MaxWindowSize);
|
||||
}
|
||||
|
||||
var lag = CalculateLag(now, highWatermark.Value);
|
||||
if (lag <= options.MinWindowSize)
|
||||
return 0;
|
||||
|
||||
return (int)Math.Ceiling(lag / options.MaxWindowSize);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,502 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.DeadLetter;
|
||||
|
||||
/// <summary>
|
||||
/// Notification channel types.
|
||||
/// </summary>
|
||||
public enum NotificationChannel
|
||||
{
|
||||
Email,
|
||||
Slack,
|
||||
Teams,
|
||||
Webhook,
|
||||
PagerDuty
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Notification rule for dead-letter events.
|
||||
/// </summary>
|
||||
public sealed record NotificationRule(
|
||||
Guid RuleId,
|
||||
string TenantId,
|
||||
string? JobTypePattern,
|
||||
string? ErrorCodePattern,
|
||||
ErrorCategory? Category,
|
||||
Guid? SourceId,
|
||||
bool Enabled,
|
||||
NotificationChannel Channel,
|
||||
string Endpoint,
|
||||
int CooldownMinutes,
|
||||
int MaxPerHour,
|
||||
bool Aggregate,
|
||||
DateTimeOffset? LastNotifiedAt,
|
||||
int NotificationsSent,
|
||||
DateTimeOffset CreatedAt,
|
||||
DateTimeOffset UpdatedAt,
|
||||
string CreatedBy,
|
||||
string UpdatedBy)
|
||||
{
|
||||
/// <summary>Creates a new notification rule.</summary>
|
||||
public static NotificationRule Create(
|
||||
string tenantId,
|
||||
NotificationChannel channel,
|
||||
string endpoint,
|
||||
string createdBy,
|
||||
string? jobTypePattern = null,
|
||||
string? errorCodePattern = null,
|
||||
ErrorCategory? category = null,
|
||||
Guid? sourceId = null,
|
||||
int cooldownMinutes = 15,
|
||||
int maxPerHour = 10,
|
||||
bool aggregate = true)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
return new NotificationRule(
|
||||
RuleId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
JobTypePattern: jobTypePattern,
|
||||
ErrorCodePattern: errorCodePattern,
|
||||
Category: category,
|
||||
SourceId: sourceId,
|
||||
Enabled: true,
|
||||
Channel: channel,
|
||||
Endpoint: endpoint,
|
||||
CooldownMinutes: cooldownMinutes,
|
||||
MaxPerHour: maxPerHour,
|
||||
Aggregate: aggregate,
|
||||
LastNotifiedAt: null,
|
||||
NotificationsSent: 0,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
CreatedBy: createdBy,
|
||||
UpdatedBy: createdBy);
|
||||
}
|
||||
|
||||
/// <summary>Checks if this rule matches the given entry.</summary>
|
||||
public bool Matches(DeadLetterEntry entry)
|
||||
{
|
||||
if (!Enabled) return false;
|
||||
|
||||
if (SourceId.HasValue && entry.SourceId != SourceId.Value) return false;
|
||||
if (Category.HasValue && entry.Category != Category.Value) return false;
|
||||
|
||||
if (!string.IsNullOrEmpty(JobTypePattern))
|
||||
{
|
||||
if (!System.Text.RegularExpressions.Regex.IsMatch(entry.JobType, JobTypePattern))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(ErrorCodePattern))
|
||||
{
|
||||
if (!System.Text.RegularExpressions.Regex.IsMatch(entry.ErrorCode, ErrorCodePattern))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>Checks if this rule is within rate limits.</summary>
|
||||
public bool CanNotify(DateTimeOffset now, int notificationsSentThisHour)
|
||||
{
|
||||
if (!Enabled) return false;
|
||||
|
||||
if (notificationsSentThisHour >= MaxPerHour) return false;
|
||||
|
||||
if (LastNotifiedAt.HasValue)
|
||||
{
|
||||
var elapsed = now - LastNotifiedAt.Value;
|
||||
if (elapsed < TimeSpan.FromMinutes(CooldownMinutes))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>Records a notification sent.</summary>
|
||||
public NotificationRule RecordNotification(DateTimeOffset now) =>
|
||||
this with
|
||||
{
|
||||
LastNotifiedAt = now,
|
||||
NotificationsSent = NotificationsSent + 1,
|
||||
UpdatedAt = now
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Notification log entry.
|
||||
/// </summary>
|
||||
public sealed record NotificationLogEntry(
|
||||
Guid LogId,
|
||||
string TenantId,
|
||||
Guid RuleId,
|
||||
IReadOnlyList<Guid> EntryIds,
|
||||
NotificationChannel Channel,
|
||||
string Endpoint,
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
string? Subject,
|
||||
int EntryCount,
|
||||
DateTimeOffset SentAt);
|
||||
|
||||
/// <summary>
|
||||
/// Notification payload for dead-letter events.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterNotificationPayload(
|
||||
string TenantId,
|
||||
string EventType,
|
||||
IReadOnlyList<DeadLetterEntrySummary> Entries,
|
||||
DeadLetterStatsSnapshot? Stats,
|
||||
DateTimeOffset Timestamp,
|
||||
string? ActionUrl);
|
||||
|
||||
/// <summary>
|
||||
/// Summary of a dead-letter entry for notifications.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterEntrySummary(
|
||||
Guid EntryId,
|
||||
Guid OriginalJobId,
|
||||
string JobType,
|
||||
string ErrorCode,
|
||||
ErrorCategory Category,
|
||||
string FailureReason,
|
||||
string? RemediationHint,
|
||||
bool IsRetryable,
|
||||
int ReplayAttempts,
|
||||
DateTimeOffset FailedAt);
|
||||
|
||||
/// <summary>
|
||||
/// Stats snapshot for notifications.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterStatsSnapshot(
|
||||
long PendingCount,
|
||||
long RetryableCount,
|
||||
long ExhaustedCount);
|
||||
|
||||
/// <summary>
|
||||
/// Interface for dead-letter event notifications.
|
||||
/// </summary>
|
||||
public interface IDeadLetterNotifier
|
||||
{
|
||||
/// <summary>Notifies when a new entry is added to dead-letter store.</summary>
|
||||
Task NotifyNewEntryAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Notifies when an entry is successfully replayed.</summary>
|
||||
Task NotifyReplaySuccessAsync(
|
||||
DeadLetterEntry entry,
|
||||
Guid newJobId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Notifies when an entry exhausts all replay attempts.</summary>
|
||||
Task NotifyExhaustedAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Sends aggregated notifications for pending entries.</summary>
|
||||
Task SendAggregatedNotificationsAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface for notification delivery.
|
||||
/// </summary>
|
||||
public interface INotificationDelivery
|
||||
{
|
||||
/// <summary>Sends a notification to the specified endpoint.</summary>
|
||||
Task<bool> SendAsync(
|
||||
NotificationChannel channel,
|
||||
string endpoint,
|
||||
DeadLetterNotificationPayload payload,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository for notification rules.
|
||||
/// </summary>
|
||||
public interface INotificationRuleRepository
|
||||
{
|
||||
Task<NotificationRule?> GetByIdAsync(string tenantId, Guid ruleId, CancellationToken cancellationToken);
|
||||
Task<IReadOnlyList<NotificationRule>> ListAsync(string tenantId, bool enabledOnly, CancellationToken cancellationToken);
|
||||
Task<IReadOnlyList<NotificationRule>> GetMatchingRulesAsync(string tenantId, DeadLetterEntry entry, CancellationToken cancellationToken);
|
||||
Task CreateAsync(NotificationRule rule, CancellationToken cancellationToken);
|
||||
Task<bool> UpdateAsync(NotificationRule rule, CancellationToken cancellationToken);
|
||||
Task<bool> DeleteAsync(string tenantId, Guid ruleId, CancellationToken cancellationToken);
|
||||
Task<int> GetNotificationCountThisHourAsync(string tenantId, Guid ruleId, CancellationToken cancellationToken);
|
||||
Task LogNotificationAsync(NotificationLogEntry log, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default dead-letter notifier implementation.
|
||||
/// </summary>
|
||||
public sealed class DeadLetterNotifier : IDeadLetterNotifier
|
||||
{
|
||||
private readonly INotificationRuleRepository _ruleRepository;
|
||||
private readonly IDeadLetterRepository _deadLetterRepository;
|
||||
private readonly INotificationDelivery _delivery;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<DeadLetterNotifier> _logger;
|
||||
|
||||
public DeadLetterNotifier(
|
||||
INotificationRuleRepository ruleRepository,
|
||||
IDeadLetterRepository deadLetterRepository,
|
||||
INotificationDelivery delivery,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<DeadLetterNotifier> logger)
|
||||
{
|
||||
_ruleRepository = ruleRepository ?? throw new ArgumentNullException(nameof(ruleRepository));
|
||||
_deadLetterRepository = deadLetterRepository ?? throw new ArgumentNullException(nameof(deadLetterRepository));
|
||||
_delivery = delivery ?? throw new ArgumentNullException(nameof(delivery));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task NotifyNewEntryAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var rules = await _ruleRepository.GetMatchingRulesAsync(entry.TenantId, entry, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
foreach (var rule in rules)
|
||||
{
|
||||
if (rule.Aggregate)
|
||||
{
|
||||
// Skip immediate notification for aggregated rules
|
||||
continue;
|
||||
}
|
||||
|
||||
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
|
||||
entry.TenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!rule.CanNotify(now, notificationsThisHour))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
await SendNotificationAsync(rule, "new_entry", [entry], null, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task NotifyReplaySuccessAsync(
|
||||
DeadLetterEntry entry,
|
||||
Guid newJobId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var rules = await _ruleRepository.GetMatchingRulesAsync(entry.TenantId, entry, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
foreach (var rule in rules)
|
||||
{
|
||||
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
|
||||
entry.TenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!rule.CanNotify(now, notificationsThisHour))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var payload = new DeadLetterNotificationPayload(
|
||||
TenantId: entry.TenantId,
|
||||
EventType: "replay_success",
|
||||
Entries: [ToSummary(entry)],
|
||||
Stats: null,
|
||||
Timestamp: now,
|
||||
ActionUrl: null);
|
||||
|
||||
var success = await _delivery.SendAsync(rule.Channel, rule.Endpoint, payload, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
await LogNotificationAsync(rule, [entry.EntryId], success, null, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task NotifyExhaustedAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var rules = await _ruleRepository.GetMatchingRulesAsync(entry.TenantId, entry, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
foreach (var rule in rules)
|
||||
{
|
||||
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
|
||||
entry.TenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!rule.CanNotify(now, notificationsThisHour))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
await SendNotificationAsync(rule, "exhausted", [entry], null, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task SendAggregatedNotificationsAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var rules = await _ruleRepository.ListAsync(tenantId, enabledOnly: true, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var stats = await _deadLetterRepository.GetStatsAsync(tenantId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
foreach (var rule in rules.Where(r => r.Aggregate))
|
||||
{
|
||||
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
|
||||
tenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!rule.CanNotify(now, notificationsThisHour))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get pending entries matching this rule
|
||||
var options = new DeadLetterListOptions(
|
||||
Status: DeadLetterStatus.Pending,
|
||||
Category: rule.Category,
|
||||
Limit: 10);
|
||||
|
||||
var entries = await _deadLetterRepository.ListAsync(tenantId, options, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
// Filter to only matching entries
|
||||
var matchingEntries = entries.Where(e => rule.Matches(e)).ToList();
|
||||
|
||||
if (matchingEntries.Count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var statsSnapshot = new DeadLetterStatsSnapshot(
|
||||
PendingCount: stats.PendingEntries,
|
||||
RetryableCount: stats.RetryableEntries,
|
||||
ExhaustedCount: stats.ExhaustedEntries);
|
||||
|
||||
await SendNotificationAsync(rule, "aggregated", matchingEntries, statsSnapshot, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task SendNotificationAsync(
|
||||
NotificationRule rule,
|
||||
string eventType,
|
||||
IReadOnlyList<DeadLetterEntry> entries,
|
||||
DeadLetterStatsSnapshot? stats,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
var payload = new DeadLetterNotificationPayload(
|
||||
TenantId: rule.TenantId,
|
||||
EventType: eventType,
|
||||
Entries: entries.Select(ToSummary).ToList(),
|
||||
Stats: stats,
|
||||
Timestamp: now,
|
||||
ActionUrl: null);
|
||||
|
||||
string? errorMessage = null;
|
||||
bool success;
|
||||
|
||||
try
|
||||
{
|
||||
success = await _delivery.SendAsync(rule.Channel, rule.Endpoint, payload, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
success = false;
|
||||
errorMessage = ex.Message;
|
||||
_logger.LogError(ex, "Failed to send {EventType} notification for rule {RuleId}", eventType, rule.RuleId);
|
||||
}
|
||||
|
||||
await LogNotificationAsync(rule, entries.Select(e => e.EntryId).ToList(), success, errorMessage, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (success)
|
||||
{
|
||||
var updatedRule = rule.RecordNotification(now);
|
||||
await _ruleRepository.UpdateAsync(updatedRule, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"Dead-letter notification sent: tenant={TenantId}, channel={Channel}, eventType={EventType}",
|
||||
rule.TenantId, rule.Channel, eventType);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Dead-letter notification failed: tenant={TenantId}, channel={Channel}, eventType={EventType}",
|
||||
rule.TenantId, rule.Channel, eventType);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task LogNotificationAsync(
|
||||
NotificationRule rule,
|
||||
IReadOnlyList<Guid> entryIds,
|
||||
bool success,
|
||||
string? errorMessage,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var log = new NotificationLogEntry(
|
||||
LogId: Guid.NewGuid(),
|
||||
TenantId: rule.TenantId,
|
||||
RuleId: rule.RuleId,
|
||||
EntryIds: entryIds,
|
||||
Channel: rule.Channel,
|
||||
Endpoint: rule.Endpoint,
|
||||
Success: success,
|
||||
ErrorMessage: errorMessage,
|
||||
Subject: null,
|
||||
EntryCount: entryIds.Count,
|
||||
SentAt: _timeProvider.GetUtcNow());
|
||||
|
||||
await _ruleRepository.LogNotificationAsync(log, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private static DeadLetterEntrySummary ToSummary(DeadLetterEntry entry) =>
|
||||
new(
|
||||
EntryId: entry.EntryId,
|
||||
OriginalJobId: entry.OriginalJobId,
|
||||
JobType: entry.JobType,
|
||||
ErrorCode: entry.ErrorCode,
|
||||
Category: entry.Category,
|
||||
FailureReason: entry.FailureReason,
|
||||
RemediationHint: entry.RemediationHint,
|
||||
IsRetryable: entry.IsRetryable,
|
||||
ReplayAttempts: entry.ReplayAttempts,
|
||||
FailedAt: entry.FailedAt);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// No-op notifier for when notifications are disabled.
|
||||
/// </summary>
|
||||
public sealed class NullDeadLetterNotifier : IDeadLetterNotifier
|
||||
{
|
||||
public static readonly NullDeadLetterNotifier Instance = new();
|
||||
|
||||
private NullDeadLetterNotifier() { }
|
||||
|
||||
public Task NotifyNewEntryAsync(DeadLetterEntry entry, CancellationToken cancellationToken) =>
|
||||
Task.CompletedTask;
|
||||
|
||||
public Task NotifyReplaySuccessAsync(DeadLetterEntry entry, Guid newJobId, CancellationToken cancellationToken) =>
|
||||
Task.CompletedTask;
|
||||
|
||||
public Task NotifyExhaustedAsync(DeadLetterEntry entry, CancellationToken cancellationToken) =>
|
||||
Task.CompletedTask;
|
||||
|
||||
public Task SendAggregatedNotificationsAsync(string tenantId, CancellationToken cancellationToken) =>
|
||||
Task.CompletedTask;
|
||||
}
|
||||
@@ -0,0 +1,578 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.DeadLetter;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a classified error with remediation guidance.
|
||||
/// </summary>
|
||||
public sealed record ClassifiedError(
|
||||
/// <summary>Error code (e.g., "ORCH-ERR-001").</summary>
|
||||
string ErrorCode,
|
||||
|
||||
/// <summary>Error category.</summary>
|
||||
ErrorCategory Category,
|
||||
|
||||
/// <summary>Human-readable description.</summary>
|
||||
string Description,
|
||||
|
||||
/// <summary>Remediation hint for operators.</summary>
|
||||
string RemediationHint,
|
||||
|
||||
/// <summary>Whether this error is potentially retryable.</summary>
|
||||
bool IsRetryable,
|
||||
|
||||
/// <summary>Suggested retry delay if retryable.</summary>
|
||||
TimeSpan? SuggestedRetryDelay);
|
||||
|
||||
/// <summary>
|
||||
/// Classifies errors and provides remediation hints.
|
||||
/// </summary>
|
||||
public interface IErrorClassifier
|
||||
{
|
||||
/// <summary>Classifies an exception into a categorized error.</summary>
|
||||
ClassifiedError Classify(Exception exception);
|
||||
|
||||
/// <summary>Classifies an error code and message.</summary>
|
||||
ClassifiedError Classify(string errorCode, string message);
|
||||
|
||||
/// <summary>Classifies based on HTTP status code and message.</summary>
|
||||
ClassifiedError ClassifyHttpError(int statusCode, string? message);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default error classifier with standard error codes and remediation hints.
|
||||
/// </summary>
|
||||
public sealed class DefaultErrorClassifier : IErrorClassifier
|
||||
{
|
||||
/// <summary>Known error codes with classifications.</summary>
|
||||
public static class ErrorCodes
|
||||
{
|
||||
// Transient errors (ORCH-TRN-xxx)
|
||||
public const string NetworkTimeout = "ORCH-TRN-001";
|
||||
public const string ConnectionRefused = "ORCH-TRN-002";
|
||||
public const string DnsResolutionFailed = "ORCH-TRN-003";
|
||||
public const string ServiceUnavailable = "ORCH-TRN-004";
|
||||
public const string GatewayTimeout = "ORCH-TRN-005";
|
||||
public const string TemporaryFailure = "ORCH-TRN-099";
|
||||
|
||||
// Not found errors (ORCH-NF-xxx)
|
||||
public const string ImageNotFound = "ORCH-NF-001";
|
||||
public const string SourceNotFound = "ORCH-NF-002";
|
||||
public const string RegistryNotFound = "ORCH-NF-003";
|
||||
public const string ManifestNotFound = "ORCH-NF-004";
|
||||
public const string ResourceNotFound = "ORCH-NF-099";
|
||||
|
||||
// Auth errors (ORCH-AUTH-xxx)
|
||||
public const string InvalidCredentials = "ORCH-AUTH-001";
|
||||
public const string TokenExpired = "ORCH-AUTH-002";
|
||||
public const string InsufficientPermissions = "ORCH-AUTH-003";
|
||||
public const string CertificateError = "ORCH-AUTH-004";
|
||||
public const string AuthenticationFailed = "ORCH-AUTH-099";
|
||||
|
||||
// Rate limit errors (ORCH-RL-xxx)
|
||||
public const string RateLimited = "ORCH-RL-001";
|
||||
public const string QuotaExceeded = "ORCH-RL-002";
|
||||
public const string ConcurrencyLimitReached = "ORCH-RL-003";
|
||||
public const string ThrottlingError = "ORCH-RL-099";
|
||||
|
||||
// Validation errors (ORCH-VAL-xxx)
|
||||
public const string InvalidPayload = "ORCH-VAL-001";
|
||||
public const string InvalidConfiguration = "ORCH-VAL-002";
|
||||
public const string SchemaValidationFailed = "ORCH-VAL-003";
|
||||
public const string MissingRequiredField = "ORCH-VAL-004";
|
||||
public const string ValidationFailed = "ORCH-VAL-099";
|
||||
|
||||
// Upstream errors (ORCH-UP-xxx)
|
||||
public const string RegistryError = "ORCH-UP-001";
|
||||
public const string AdvisoryFeedError = "ORCH-UP-002";
|
||||
public const string DatabaseError = "ORCH-UP-003";
|
||||
public const string ExternalServiceError = "ORCH-UP-099";
|
||||
|
||||
// Internal errors (ORCH-INT-xxx)
|
||||
public const string InternalError = "ORCH-INT-001";
|
||||
public const string StateCorruption = "ORCH-INT-002";
|
||||
public const string ProcessingError = "ORCH-INT-003";
|
||||
public const string UnexpectedError = "ORCH-INT-099";
|
||||
|
||||
// Conflict errors (ORCH-CON-xxx)
|
||||
public const string DuplicateJob = "ORCH-CON-001";
|
||||
public const string VersionMismatch = "ORCH-CON-002";
|
||||
public const string ConcurrentModification = "ORCH-CON-003";
|
||||
public const string ConflictError = "ORCH-CON-099";
|
||||
|
||||
// Canceled errors (ORCH-CAN-xxx)
|
||||
public const string UserCanceled = "ORCH-CAN-001";
|
||||
public const string SystemCanceled = "ORCH-CAN-002";
|
||||
public const string TimeoutCanceled = "ORCH-CAN-003";
|
||||
public const string OperationCanceled = "ORCH-CAN-099";
|
||||
}
|
||||
|
||||
private static readonly Dictionary<string, ClassifiedError> KnownErrors = new()
|
||||
{
|
||||
// Transient errors
|
||||
[ErrorCodes.NetworkTimeout] = new(
|
||||
ErrorCodes.NetworkTimeout,
|
||||
ErrorCategory.Transient,
|
||||
"Network operation timed out",
|
||||
"Check network connectivity and firewall rules. If the target service is healthy, increase timeout settings.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
|
||||
|
||||
[ErrorCodes.ConnectionRefused] = new(
|
||||
ErrorCodes.ConnectionRefused,
|
||||
ErrorCategory.Transient,
|
||||
"Connection refused by target host",
|
||||
"Verify the target service is running and accessible. Check firewall rules and network policies.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
|
||||
|
||||
[ErrorCodes.DnsResolutionFailed] = new(
|
||||
ErrorCodes.DnsResolutionFailed,
|
||||
ErrorCategory.Transient,
|
||||
"DNS resolution failed",
|
||||
"Verify the hostname is correct. Check DNS server configuration and network connectivity.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
|
||||
|
||||
[ErrorCodes.ServiceUnavailable] = new(
|
||||
ErrorCodes.ServiceUnavailable,
|
||||
ErrorCategory.Transient,
|
||||
"Service temporarily unavailable (503)",
|
||||
"The target service is temporarily overloaded or under maintenance. Retry with exponential backoff.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
|
||||
|
||||
[ErrorCodes.GatewayTimeout] = new(
|
||||
ErrorCodes.GatewayTimeout,
|
||||
ErrorCategory.Transient,
|
||||
"Gateway timeout (504)",
|
||||
"An upstream service took too long to respond. This is typically transient; retry with backoff.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
|
||||
|
||||
[ErrorCodes.TemporaryFailure] = new(
|
||||
ErrorCodes.TemporaryFailure,
|
||||
ErrorCategory.Transient,
|
||||
"Temporary failure",
|
||||
"A transient error occurred. Retry the operation after a brief delay.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
|
||||
|
||||
// Not found errors
|
||||
[ErrorCodes.ImageNotFound] = new(
|
||||
ErrorCodes.ImageNotFound,
|
||||
ErrorCategory.NotFound,
|
||||
"Container image not found",
|
||||
"Verify the image reference is correct (repository, tag, digest). Check registry access and that the image exists.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.SourceNotFound] = new(
|
||||
ErrorCodes.SourceNotFound,
|
||||
ErrorCategory.NotFound,
|
||||
"Source configuration not found",
|
||||
"The referenced source may have been deleted. Verify the source ID and recreate if necessary.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.RegistryNotFound] = new(
|
||||
ErrorCodes.RegistryNotFound,
|
||||
ErrorCategory.NotFound,
|
||||
"Container registry not found",
|
||||
"Verify the registry URL is correct. Check DNS resolution and that the registry is operational.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.ManifestNotFound] = new(
|
||||
ErrorCodes.ManifestNotFound,
|
||||
ErrorCategory.NotFound,
|
||||
"Image manifest not found",
|
||||
"The image exists but the manifest is missing. The image may have been deleted or the tag moved.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.ResourceNotFound] = new(
|
||||
ErrorCodes.ResourceNotFound,
|
||||
ErrorCategory.NotFound,
|
||||
"Resource not found",
|
||||
"The requested resource does not exist. Verify the resource identifier is correct.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
// Auth errors
|
||||
[ErrorCodes.InvalidCredentials] = new(
|
||||
ErrorCodes.InvalidCredentials,
|
||||
ErrorCategory.AuthFailure,
|
||||
"Invalid credentials",
|
||||
"The provided credentials are invalid. Update the registry credentials in the source configuration.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.TokenExpired] = new(
|
||||
ErrorCodes.TokenExpired,
|
||||
ErrorCategory.AuthFailure,
|
||||
"Authentication token expired",
|
||||
"The authentication token has expired. Refresh credentials or re-authenticate to obtain a new token.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
|
||||
|
||||
[ErrorCodes.InsufficientPermissions] = new(
|
||||
ErrorCodes.InsufficientPermissions,
|
||||
ErrorCategory.AuthFailure,
|
||||
"Insufficient permissions",
|
||||
"The authenticated user lacks required permissions. Request access from the registry administrator.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.CertificateError] = new(
|
||||
ErrorCodes.CertificateError,
|
||||
ErrorCategory.AuthFailure,
|
||||
"TLS certificate error",
|
||||
"Certificate validation failed. Verify the CA bundle or add the registry's certificate to trusted roots.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.AuthenticationFailed] = new(
|
||||
ErrorCodes.AuthenticationFailed,
|
||||
ErrorCategory.AuthFailure,
|
||||
"Authentication failed",
|
||||
"Unable to authenticate with the target service. Verify credentials and authentication configuration.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
// Rate limit errors
|
||||
[ErrorCodes.RateLimited] = new(
|
||||
ErrorCodes.RateLimited,
|
||||
ErrorCategory.RateLimited,
|
||||
"Rate limit exceeded (429)",
|
||||
"Request rate limit exceeded. Reduce request frequency or upgrade service tier. Will auto-retry with backoff.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
|
||||
|
||||
[ErrorCodes.QuotaExceeded] = new(
|
||||
ErrorCodes.QuotaExceeded,
|
||||
ErrorCategory.RateLimited,
|
||||
"Quota exceeded",
|
||||
"Usage quota has been exceeded. Wait for quota reset or request quota increase.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromHours(1)),
|
||||
|
||||
[ErrorCodes.ConcurrencyLimitReached] = new(
|
||||
ErrorCodes.ConcurrencyLimitReached,
|
||||
ErrorCategory.RateLimited,
|
||||
"Concurrency limit reached",
|
||||
"Maximum concurrent operations limit reached. Reduce parallel operations or increase limit.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
|
||||
|
||||
[ErrorCodes.ThrottlingError] = new(
|
||||
ErrorCodes.ThrottlingError,
|
||||
ErrorCategory.RateLimited,
|
||||
"Request throttled",
|
||||
"Request was throttled due to rate limits. Retry with exponential backoff.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
|
||||
|
||||
// Validation errors
|
||||
[ErrorCodes.InvalidPayload] = new(
|
||||
ErrorCodes.InvalidPayload,
|
||||
ErrorCategory.ValidationError,
|
||||
"Invalid job payload",
|
||||
"The job payload is malformed or invalid. Review the payload structure and fix validation errors.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.InvalidConfiguration] = new(
|
||||
ErrorCodes.InvalidConfiguration,
|
||||
ErrorCategory.ValidationError,
|
||||
"Invalid configuration",
|
||||
"Source or job configuration is invalid. Review and correct the configuration settings.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.SchemaValidationFailed] = new(
|
||||
ErrorCodes.SchemaValidationFailed,
|
||||
ErrorCategory.ValidationError,
|
||||
"Schema validation failed",
|
||||
"Input data failed schema validation. Ensure data conforms to the expected schema.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.MissingRequiredField] = new(
|
||||
ErrorCodes.MissingRequiredField,
|
||||
ErrorCategory.ValidationError,
|
||||
"Missing required field",
|
||||
"A required field is missing from the input. Provide all required fields.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.ValidationFailed] = new(
|
||||
ErrorCodes.ValidationFailed,
|
||||
ErrorCategory.ValidationError,
|
||||
"Validation failed",
|
||||
"Input validation failed. Review the error details and correct the input.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
// Upstream errors
|
||||
[ErrorCodes.RegistryError] = new(
|
||||
ErrorCodes.RegistryError,
|
||||
ErrorCategory.UpstreamError,
|
||||
"Container registry error",
|
||||
"The container registry returned an error. Check registry status and logs for details.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
|
||||
|
||||
[ErrorCodes.AdvisoryFeedError] = new(
|
||||
ErrorCodes.AdvisoryFeedError,
|
||||
ErrorCategory.UpstreamError,
|
||||
"Advisory feed error",
|
||||
"Error fetching from advisory feed. Check feed URL and authentication. May be temporary.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(15)),
|
||||
|
||||
[ErrorCodes.DatabaseError] = new(
|
||||
ErrorCodes.DatabaseError,
|
||||
ErrorCategory.UpstreamError,
|
||||
"Database error",
|
||||
"Database operation failed. Check database connectivity and status.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
|
||||
|
||||
[ErrorCodes.ExternalServiceError] = new(
|
||||
ErrorCodes.ExternalServiceError,
|
||||
ErrorCategory.UpstreamError,
|
||||
"External service error",
|
||||
"An external service dependency failed. Check service status and connectivity.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
|
||||
|
||||
// Internal errors
|
||||
[ErrorCodes.InternalError] = new(
|
||||
ErrorCodes.InternalError,
|
||||
ErrorCategory.InternalError,
|
||||
"Internal processing error",
|
||||
"An internal error occurred. This may indicate a bug. Please report if persistent.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.StateCorruption] = new(
|
||||
ErrorCodes.StateCorruption,
|
||||
ErrorCategory.InternalError,
|
||||
"State corruption detected",
|
||||
"Internal state corruption detected. Manual intervention may be required.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.ProcessingError] = new(
|
||||
ErrorCodes.ProcessingError,
|
||||
ErrorCategory.InternalError,
|
||||
"Processing error",
|
||||
"Error during job processing. Review job payload and configuration.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.UnexpectedError] = new(
|
||||
ErrorCodes.UnexpectedError,
|
||||
ErrorCategory.InternalError,
|
||||
"Unexpected error",
|
||||
"An unexpected error occurred. This may indicate a bug. Please report with error details.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
// Conflict errors
|
||||
[ErrorCodes.DuplicateJob] = new(
|
||||
ErrorCodes.DuplicateJob,
|
||||
ErrorCategory.Conflict,
|
||||
"Duplicate job detected",
|
||||
"A job with the same idempotency key already exists. This is expected for retry scenarios.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.VersionMismatch] = new(
|
||||
ErrorCodes.VersionMismatch,
|
||||
ErrorCategory.Conflict,
|
||||
"Version mismatch",
|
||||
"Resource version conflict detected. Refresh and retry the operation.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromSeconds(5)),
|
||||
|
||||
[ErrorCodes.ConcurrentModification] = new(
|
||||
ErrorCodes.ConcurrentModification,
|
||||
ErrorCategory.Conflict,
|
||||
"Concurrent modification",
|
||||
"Resource was modified concurrently. Refresh state and retry.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromSeconds(5)),
|
||||
|
||||
[ErrorCodes.ConflictError] = new(
|
||||
ErrorCodes.ConflictError,
|
||||
ErrorCategory.Conflict,
|
||||
"Resource conflict",
|
||||
"A resource conflict occurred. Check for concurrent operations.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromSeconds(10)),
|
||||
|
||||
// Canceled errors
|
||||
[ErrorCodes.UserCanceled] = new(
|
||||
ErrorCodes.UserCanceled,
|
||||
ErrorCategory.Canceled,
|
||||
"Canceled by user",
|
||||
"Operation was canceled by user request. No action required unless retry is desired.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
|
||||
[ErrorCodes.SystemCanceled] = new(
|
||||
ErrorCodes.SystemCanceled,
|
||||
ErrorCategory.Canceled,
|
||||
"Canceled by system",
|
||||
"Operation was canceled by the system (e.g., shutdown, quota). May be automatically rescheduled.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
|
||||
|
||||
[ErrorCodes.TimeoutCanceled] = new(
|
||||
ErrorCodes.TimeoutCanceled,
|
||||
ErrorCategory.Canceled,
|
||||
"Canceled due to timeout",
|
||||
"Operation exceeded its time limit. Consider increasing timeout or optimizing the operation.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
|
||||
|
||||
[ErrorCodes.OperationCanceled] = new(
|
||||
ErrorCodes.OperationCanceled,
|
||||
ErrorCategory.Canceled,
|
||||
"Operation canceled",
|
||||
"The operation was canceled. Check cancellation source for details.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null)
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public ClassifiedError Classify(Exception exception)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(exception);
|
||||
|
||||
return exception switch
|
||||
{
|
||||
OperationCanceledException => KnownErrors[ErrorCodes.OperationCanceled],
|
||||
TimeoutException => KnownErrors[ErrorCodes.NetworkTimeout],
|
||||
HttpRequestException httpEx => ClassifyHttpException(httpEx),
|
||||
_ when exception.Message.Contains("connection refused", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.ConnectionRefused],
|
||||
_ when exception.Message.Contains("DNS", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.DnsResolutionFailed],
|
||||
_ when exception.Message.Contains("timeout", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.NetworkTimeout],
|
||||
_ when exception.Message.Contains("certificate", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.CertificateError],
|
||||
_ when exception.Message.Contains("unauthorized", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.AuthenticationFailed],
|
||||
_ when exception.Message.Contains("forbidden", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.InsufficientPermissions],
|
||||
_ => new ClassifiedError(
|
||||
ErrorCodes.UnexpectedError,
|
||||
ErrorCategory.InternalError,
|
||||
exception.GetType().Name,
|
||||
$"Unexpected error: {exception.Message}. Review stack trace for details.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null)
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public ClassifiedError Classify(string errorCode, string message)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(errorCode);
|
||||
|
||||
if (KnownErrors.TryGetValue(errorCode, out var known))
|
||||
{
|
||||
return known;
|
||||
}
|
||||
|
||||
// Try to infer from error code prefix
|
||||
var category = errorCode switch
|
||||
{
|
||||
_ when errorCode.StartsWith("ORCH-TRN-", StringComparison.Ordinal) => ErrorCategory.Transient,
|
||||
_ when errorCode.StartsWith("ORCH-NF-", StringComparison.Ordinal) => ErrorCategory.NotFound,
|
||||
_ when errorCode.StartsWith("ORCH-AUTH-", StringComparison.Ordinal) => ErrorCategory.AuthFailure,
|
||||
_ when errorCode.StartsWith("ORCH-RL-", StringComparison.Ordinal) => ErrorCategory.RateLimited,
|
||||
_ when errorCode.StartsWith("ORCH-VAL-", StringComparison.Ordinal) => ErrorCategory.ValidationError,
|
||||
_ when errorCode.StartsWith("ORCH-UP-", StringComparison.Ordinal) => ErrorCategory.UpstreamError,
|
||||
_ when errorCode.StartsWith("ORCH-INT-", StringComparison.Ordinal) => ErrorCategory.InternalError,
|
||||
_ when errorCode.StartsWith("ORCH-CON-", StringComparison.Ordinal) => ErrorCategory.Conflict,
|
||||
_ when errorCode.StartsWith("ORCH-CAN-", StringComparison.Ordinal) => ErrorCategory.Canceled,
|
||||
_ => ErrorCategory.Unknown
|
||||
};
|
||||
|
||||
var isRetryable = category is ErrorCategory.Transient or ErrorCategory.RateLimited or ErrorCategory.UpstreamError;
|
||||
|
||||
return new ClassifiedError(
|
||||
errorCode,
|
||||
category,
|
||||
message,
|
||||
"Unknown error code. Review the error message for details.",
|
||||
isRetryable,
|
||||
isRetryable ? TimeSpan.FromMinutes(5) : null);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public ClassifiedError ClassifyHttpError(int statusCode, string? message)
|
||||
{
|
||||
return statusCode switch
|
||||
{
|
||||
400 => KnownErrors[ErrorCodes.ValidationFailed],
|
||||
401 => KnownErrors[ErrorCodes.AuthenticationFailed],
|
||||
403 => KnownErrors[ErrorCodes.InsufficientPermissions],
|
||||
404 => KnownErrors[ErrorCodes.ResourceNotFound],
|
||||
408 => KnownErrors[ErrorCodes.NetworkTimeout],
|
||||
409 => KnownErrors[ErrorCodes.ConflictError],
|
||||
429 => KnownErrors[ErrorCodes.RateLimited],
|
||||
500 => KnownErrors[ErrorCodes.InternalError],
|
||||
502 => KnownErrors[ErrorCodes.ExternalServiceError],
|
||||
503 => KnownErrors[ErrorCodes.ServiceUnavailable],
|
||||
504 => KnownErrors[ErrorCodes.GatewayTimeout],
|
||||
_ when statusCode >= 400 && statusCode < 500 => new ClassifiedError(
|
||||
$"HTTP-{statusCode}",
|
||||
ErrorCategory.ValidationError,
|
||||
message ?? $"HTTP {statusCode} error",
|
||||
"Client error. Review request parameters.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null),
|
||||
_ when statusCode >= 500 => new ClassifiedError(
|
||||
$"HTTP-{statusCode}",
|
||||
ErrorCategory.UpstreamError,
|
||||
message ?? $"HTTP {statusCode} error",
|
||||
"Server error. May be transient; retry with backoff.",
|
||||
IsRetryable: true,
|
||||
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
|
||||
_ => new ClassifiedError(
|
||||
$"HTTP-{statusCode}",
|
||||
ErrorCategory.Unknown,
|
||||
message ?? $"HTTP {statusCode}",
|
||||
"Unexpected HTTP status. Review response for details.",
|
||||
IsRetryable: false,
|
||||
SuggestedRetryDelay: null)
|
||||
};
|
||||
}
|
||||
|
||||
private ClassifiedError ClassifyHttpException(HttpRequestException ex)
|
||||
{
|
||||
if (ex.StatusCode.HasValue)
|
||||
{
|
||||
return ClassifyHttpError((int)ex.StatusCode.Value, ex.Message);
|
||||
}
|
||||
|
||||
// No status code - likely a connection error
|
||||
return ex.Message switch
|
||||
{
|
||||
_ when ex.Message.Contains("connection refused", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.ConnectionRefused],
|
||||
_ when ex.Message.Contains("name resolution", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.DnsResolutionFailed],
|
||||
_ when ex.Message.Contains("SSL", StringComparison.OrdinalIgnoreCase) ||
|
||||
ex.Message.Contains("TLS", StringComparison.OrdinalIgnoreCase)
|
||||
=> KnownErrors[ErrorCodes.CertificateError],
|
||||
_ => KnownErrors[ErrorCodes.ExternalServiceError]
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.DeadLetter;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for dead-letter entry persistence.
|
||||
/// </summary>
|
||||
public interface IDeadLetterRepository
|
||||
{
|
||||
/// <summary>Gets a dead-letter entry by ID.</summary>
|
||||
Task<DeadLetterEntry?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets a dead-letter entry by original job ID.</summary>
|
||||
Task<DeadLetterEntry?> GetByOriginalJobIdAsync(
|
||||
string tenantId,
|
||||
Guid originalJobId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Lists dead-letter entries with filtering and pagination.</summary>
|
||||
Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
|
||||
string tenantId,
|
||||
DeadLetterListOptions options,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Counts dead-letter entries with filtering.</summary>
|
||||
Task<long> CountAsync(
|
||||
string tenantId,
|
||||
DeadLetterListOptions options,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Creates a new dead-letter entry.</summary>
|
||||
Task CreateAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Updates an existing dead-letter entry.</summary>
|
||||
Task<bool> UpdateAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets entries pending replay that are retryable.</summary>
|
||||
Task<IReadOnlyList<DeadLetterEntry>> GetPendingRetryableAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets entries by error code.</summary>
|
||||
Task<IReadOnlyList<DeadLetterEntry>> GetByErrorCodeAsync(
|
||||
string tenantId,
|
||||
string errorCode,
|
||||
DeadLetterStatus? status,
|
||||
int limit,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets entries by category.</summary>
|
||||
Task<IReadOnlyList<DeadLetterEntry>> GetByCategoryAsync(
|
||||
string tenantId,
|
||||
ErrorCategory category,
|
||||
DeadLetterStatus? status,
|
||||
int limit,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets aggregated statistics.</summary>
|
||||
Task<DeadLetterStats> GetStatsAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets a summary of actionable entries grouped by error code.</summary>
|
||||
Task<IReadOnlyList<DeadLetterSummary>> GetActionableSummaryAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Marks expired entries.</summary>
|
||||
Task<int> MarkExpiredAsync(
|
||||
int batchLimit,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Purges old resolved/expired entries.</summary>
|
||||
Task<int> PurgeOldEntriesAsync(
|
||||
int retentionDays,
|
||||
int batchLimit,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for listing dead-letter entries.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterListOptions(
|
||||
DeadLetterStatus? Status = null,
|
||||
ErrorCategory? Category = null,
|
||||
string? JobType = null,
|
||||
string? ErrorCode = null,
|
||||
Guid? SourceId = null,
|
||||
Guid? RunId = null,
|
||||
bool? IsRetryable = null,
|
||||
DateTimeOffset? CreatedAfter = null,
|
||||
DateTimeOffset? CreatedBefore = null,
|
||||
string? Cursor = null,
|
||||
int Limit = 50,
|
||||
bool Ascending = false);
|
||||
|
||||
/// <summary>
|
||||
/// Aggregated dead-letter statistics.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterStats(
|
||||
long TotalEntries,
|
||||
long PendingEntries,
|
||||
long ReplayingEntries,
|
||||
long ReplayedEntries,
|
||||
long ResolvedEntries,
|
||||
long ExhaustedEntries,
|
||||
long ExpiredEntries,
|
||||
long RetryableEntries,
|
||||
IReadOnlyDictionary<ErrorCategory, long> ByCategory,
|
||||
IReadOnlyDictionary<string, long> TopErrorCodes,
|
||||
IReadOnlyDictionary<string, long> TopJobTypes);
|
||||
|
||||
/// <summary>
|
||||
/// Summary of dead-letter entries grouped by error code.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterSummary(
|
||||
string ErrorCode,
|
||||
ErrorCategory Category,
|
||||
long EntryCount,
|
||||
long RetryableCount,
|
||||
DateTimeOffset OldestEntry,
|
||||
string? SampleReason);
|
||||
|
||||
/// <summary>
|
||||
/// Repository for replay audit records.
|
||||
/// </summary>
|
||||
public interface IReplayAuditRepository
|
||||
{
|
||||
/// <summary>Gets audit records for an entry.</summary>
|
||||
Task<IReadOnlyList<ReplayAuditRecord>> GetByEntryAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets a specific audit record.</summary>
|
||||
Task<ReplayAuditRecord?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid auditId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Creates a new audit record.</summary>
|
||||
Task CreateAsync(
|
||||
ReplayAuditRecord record,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Updates an audit record (completion).</summary>
|
||||
Task<bool> UpdateAsync(
|
||||
ReplayAuditRecord record,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets audit records for a new job ID (to find replay source).</summary>
|
||||
Task<ReplayAuditRecord?> GetByNewJobIdAsync(
|
||||
string tenantId,
|
||||
Guid newJobId,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Replay attempt audit record.
|
||||
/// </summary>
|
||||
public sealed record ReplayAuditRecord(
|
||||
Guid AuditId,
|
||||
string TenantId,
|
||||
Guid EntryId,
|
||||
int AttemptNumber,
|
||||
bool Success,
|
||||
Guid? NewJobId,
|
||||
string? ErrorMessage,
|
||||
string TriggeredBy,
|
||||
DateTimeOffset TriggeredAt,
|
||||
DateTimeOffset? CompletedAt,
|
||||
string InitiatedBy)
|
||||
{
|
||||
/// <summary>Creates a new audit record for a replay attempt.</summary>
|
||||
public static ReplayAuditRecord Create(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
int attemptNumber,
|
||||
string triggeredBy,
|
||||
string initiatedBy,
|
||||
DateTimeOffset now) =>
|
||||
new(
|
||||
AuditId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
EntryId: entryId,
|
||||
AttemptNumber: attemptNumber,
|
||||
Success: false,
|
||||
NewJobId: null,
|
||||
ErrorMessage: null,
|
||||
TriggeredBy: triggeredBy,
|
||||
TriggeredAt: now,
|
||||
CompletedAt: null,
|
||||
InitiatedBy: initiatedBy);
|
||||
|
||||
/// <summary>Marks the replay as successful.</summary>
|
||||
public ReplayAuditRecord Complete(Guid newJobId, DateTimeOffset now) =>
|
||||
this with
|
||||
{
|
||||
Success = true,
|
||||
NewJobId = newJobId,
|
||||
CompletedAt = now
|
||||
};
|
||||
|
||||
/// <summary>Marks the replay as failed.</summary>
|
||||
public ReplayAuditRecord Fail(string errorMessage, DateTimeOffset now) =>
|
||||
this with
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = errorMessage,
|
||||
CompletedAt = now
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,472 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.DeadLetter;
|
||||
|
||||
/// <summary>
|
||||
/// Options for replay manager configuration.
|
||||
/// </summary>
|
||||
public sealed record ReplayManagerOptions(
|
||||
/// <summary>Default maximum replay attempts.</summary>
|
||||
int DefaultMaxReplayAttempts = 3,
|
||||
|
||||
/// <summary>Default retention period for dead-letter entries.</summary>
|
||||
TimeSpan DefaultRetention = default,
|
||||
|
||||
/// <summary>Minimum delay between replay attempts.</summary>
|
||||
TimeSpan MinReplayDelay = default,
|
||||
|
||||
/// <summary>Maximum batch size for bulk operations.</summary>
|
||||
int MaxBatchSize = 100,
|
||||
|
||||
/// <summary>Enable automatic replay of retryable entries.</summary>
|
||||
bool AutoReplayEnabled = false,
|
||||
|
||||
/// <summary>Delay before automatic replay.</summary>
|
||||
TimeSpan AutoReplayDelay = default)
|
||||
{
|
||||
/// <summary>Default options.</summary>
|
||||
public static ReplayManagerOptions Default => new(
|
||||
DefaultMaxReplayAttempts: 3,
|
||||
DefaultRetention: TimeSpan.FromDays(30),
|
||||
MinReplayDelay: TimeSpan.FromMinutes(5),
|
||||
MaxBatchSize: 100,
|
||||
AutoReplayEnabled: false,
|
||||
AutoReplayDelay: TimeSpan.FromMinutes(15));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a replay operation.
|
||||
/// </summary>
|
||||
public sealed record ReplayResult(
|
||||
bool Success,
|
||||
Guid? NewJobId,
|
||||
string? ErrorMessage,
|
||||
DeadLetterEntry UpdatedEntry);
|
||||
|
||||
/// <summary>
|
||||
/// Result of a batch replay operation.
|
||||
/// </summary>
|
||||
public sealed record BatchReplayResult(
|
||||
int Attempted,
|
||||
int Succeeded,
|
||||
int Failed,
|
||||
IReadOnlyList<ReplayResult> Results);
|
||||
|
||||
/// <summary>
|
||||
/// Manages dead-letter entry replay operations.
|
||||
/// </summary>
|
||||
public interface IReplayManager
|
||||
{
|
||||
/// <summary>Replays a single dead-letter entry.</summary>
|
||||
Task<ReplayResult> ReplayAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
string initiatedBy,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Replays multiple entries by ID.</summary>
|
||||
Task<BatchReplayResult> ReplayBatchAsync(
|
||||
string tenantId,
|
||||
IReadOnlyList<Guid> entryIds,
|
||||
string initiatedBy,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Replays all pending retryable entries matching criteria.</summary>
|
||||
Task<BatchReplayResult> ReplayPendingAsync(
|
||||
string tenantId,
|
||||
string? errorCode,
|
||||
ErrorCategory? category,
|
||||
int maxCount,
|
||||
string initiatedBy,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Resolves an entry without replay.</summary>
|
||||
Task<DeadLetterEntry> ResolveAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
string notes,
|
||||
string resolvedBy,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Resolves multiple entries without replay.</summary>
|
||||
Task<int> ResolveBatchAsync(
|
||||
string tenantId,
|
||||
IReadOnlyList<Guid> entryIds,
|
||||
string notes,
|
||||
string resolvedBy,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Job creator interface for replay operations.
|
||||
/// </summary>
|
||||
public interface IJobCreator
|
||||
{
|
||||
/// <summary>Creates a new job from a dead-letter entry payload.</summary>
|
||||
Task<Job> CreateFromReplayAsync(
|
||||
string tenantId,
|
||||
string jobType,
|
||||
string payload,
|
||||
string payloadDigest,
|
||||
string idempotencyKey,
|
||||
string? correlationId,
|
||||
Guid replayOf,
|
||||
string createdBy,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default replay manager implementation.
|
||||
/// </summary>
|
||||
public sealed class ReplayManager : IReplayManager
|
||||
{
|
||||
private readonly IDeadLetterRepository _deadLetterRepository;
|
||||
private readonly IReplayAuditRepository _auditRepository;
|
||||
private readonly IJobCreator _jobCreator;
|
||||
private readonly IDeadLetterNotifier _notifier;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ReplayManagerOptions _options;
|
||||
private readonly ILogger<ReplayManager> _logger;
|
||||
|
||||
public ReplayManager(
|
||||
IDeadLetterRepository deadLetterRepository,
|
||||
IReplayAuditRepository auditRepository,
|
||||
IJobCreator jobCreator,
|
||||
IDeadLetterNotifier notifier,
|
||||
TimeProvider timeProvider,
|
||||
ReplayManagerOptions options,
|
||||
ILogger<ReplayManager> logger)
|
||||
{
|
||||
_deadLetterRepository = deadLetterRepository ?? throw new ArgumentNullException(nameof(deadLetterRepository));
|
||||
_auditRepository = auditRepository ?? throw new ArgumentNullException(nameof(auditRepository));
|
||||
_jobCreator = jobCreator ?? throw new ArgumentNullException(nameof(jobCreator));
|
||||
_notifier = notifier ?? throw new ArgumentNullException(nameof(notifier));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_options = options ?? ReplayManagerOptions.Default;
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<ReplayResult> ReplayAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
string initiatedBy,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(initiatedBy);
|
||||
|
||||
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (entry is null)
|
||||
{
|
||||
throw new InvalidOperationException($"Dead-letter entry {entryId} not found.");
|
||||
}
|
||||
|
||||
return await ReplayEntryAsync(entry, "manual", initiatedBy, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task<BatchReplayResult> ReplayBatchAsync(
|
||||
string tenantId,
|
||||
IReadOnlyList<Guid> entryIds,
|
||||
string initiatedBy,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentNullException.ThrowIfNull(entryIds);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(initiatedBy);
|
||||
|
||||
if (entryIds.Count > _options.MaxBatchSize)
|
||||
{
|
||||
throw new ArgumentException($"Batch size {entryIds.Count} exceeds maximum {_options.MaxBatchSize}.");
|
||||
}
|
||||
|
||||
var results = new List<ReplayResult>();
|
||||
var succeeded = 0;
|
||||
var failed = 0;
|
||||
|
||||
foreach (var entryId in entryIds)
|
||||
{
|
||||
try
|
||||
{
|
||||
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (entry is null)
|
||||
{
|
||||
results.Add(new ReplayResult(
|
||||
Success: false,
|
||||
NewJobId: null,
|
||||
ErrorMessage: $"Entry {entryId} not found.",
|
||||
UpdatedEntry: null!));
|
||||
failed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var result = await ReplayEntryAsync(entry, "batch", initiatedBy, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
results.Add(result);
|
||||
|
||||
if (result.Success)
|
||||
succeeded++;
|
||||
else
|
||||
failed++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to replay entry {EntryId}", entryId);
|
||||
results.Add(new ReplayResult(
|
||||
Success: false,
|
||||
NewJobId: null,
|
||||
ErrorMessage: ex.Message,
|
||||
UpdatedEntry: null!));
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
return new BatchReplayResult(
|
||||
Attempted: entryIds.Count,
|
||||
Succeeded: succeeded,
|
||||
Failed: failed,
|
||||
Results: results);
|
||||
}
|
||||
|
||||
public async Task<BatchReplayResult> ReplayPendingAsync(
|
||||
string tenantId,
|
||||
string? errorCode,
|
||||
ErrorCategory? category,
|
||||
int maxCount,
|
||||
string initiatedBy,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(initiatedBy);
|
||||
|
||||
var effectiveLimit = Math.Min(maxCount, _options.MaxBatchSize);
|
||||
|
||||
IReadOnlyList<DeadLetterEntry> entries;
|
||||
if (!string.IsNullOrEmpty(errorCode))
|
||||
{
|
||||
entries = await _deadLetterRepository.GetByErrorCodeAsync(
|
||||
tenantId, errorCode, DeadLetterStatus.Pending, effectiveLimit, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
else if (category.HasValue)
|
||||
{
|
||||
entries = await _deadLetterRepository.GetByCategoryAsync(
|
||||
tenantId, category.Value, DeadLetterStatus.Pending, effectiveLimit, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
entries = await _deadLetterRepository.GetPendingRetryableAsync(tenantId, effectiveLimit, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
var results = new List<ReplayResult>();
|
||||
var succeeded = 0;
|
||||
var failed = 0;
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
if (!entry.CanReplay)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var result = await ReplayEntryAsync(entry, "auto", initiatedBy, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
results.Add(result);
|
||||
|
||||
if (result.Success)
|
||||
succeeded++;
|
||||
else
|
||||
failed++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to replay entry {EntryId}", entry.EntryId);
|
||||
results.Add(new ReplayResult(
|
||||
Success: false,
|
||||
NewJobId: null,
|
||||
ErrorMessage: ex.Message,
|
||||
UpdatedEntry: entry));
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
return new BatchReplayResult(
|
||||
Attempted: results.Count,
|
||||
Succeeded: succeeded,
|
||||
Failed: failed,
|
||||
Results: results);
|
||||
}
|
||||
|
||||
public async Task<DeadLetterEntry> ResolveAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
string notes,
|
||||
string resolvedBy,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(resolvedBy);
|
||||
|
||||
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (entry is null)
|
||||
{
|
||||
throw new InvalidOperationException($"Dead-letter entry {entryId} not found.");
|
||||
}
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var resolved = entry.Resolve(notes, resolvedBy, now);
|
||||
|
||||
await _deadLetterRepository.UpdateAsync(resolved, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Resolved dead-letter entry {EntryId} for job {JobId}. Notes: {Notes}",
|
||||
entryId, entry.OriginalJobId, notes);
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
public async Task<int> ResolveBatchAsync(
|
||||
string tenantId,
|
||||
IReadOnlyList<Guid> entryIds,
|
||||
string notes,
|
||||
string resolvedBy,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentNullException.ThrowIfNull(entryIds);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(resolvedBy);
|
||||
|
||||
var resolved = 0;
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
foreach (var entryId in entryIds)
|
||||
{
|
||||
try
|
||||
{
|
||||
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (entry is null || entry.IsTerminal)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var resolvedEntry = entry.Resolve(notes, resolvedBy, now);
|
||||
await _deadLetterRepository.UpdateAsync(resolvedEntry, cancellationToken).ConfigureAwait(false);
|
||||
resolved++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to resolve entry {EntryId}", entryId);
|
||||
}
|
||||
}
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
private async Task<ReplayResult> ReplayEntryAsync(
|
||||
DeadLetterEntry entry,
|
||||
string triggeredBy,
|
||||
string initiatedBy,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!entry.CanReplay)
|
||||
{
|
||||
return new ReplayResult(
|
||||
Success: false,
|
||||
NewJobId: null,
|
||||
ErrorMessage: $"Entry cannot be replayed: status={entry.Status}, attempts={entry.ReplayAttempts}/{entry.MaxReplayAttempts}, retryable={entry.IsRetryable}",
|
||||
UpdatedEntry: entry);
|
||||
}
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
// Mark entry as replaying
|
||||
var replaying = entry.StartReplay(initiatedBy, now);
|
||||
await _deadLetterRepository.UpdateAsync(replaying, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Create audit record
|
||||
var auditRecord = ReplayAuditRecord.Create(
|
||||
entry.TenantId,
|
||||
entry.EntryId,
|
||||
replaying.ReplayAttempts,
|
||||
triggeredBy,
|
||||
initiatedBy,
|
||||
now);
|
||||
await _auditRepository.CreateAsync(auditRecord, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
// Create new job with updated idempotency key
|
||||
var newIdempotencyKey = $"{entry.IdempotencyKey}:replay:{replaying.ReplayAttempts}";
|
||||
var newJob = await _jobCreator.CreateFromReplayAsync(
|
||||
entry.TenantId,
|
||||
entry.JobType,
|
||||
entry.Payload,
|
||||
entry.PayloadDigest,
|
||||
newIdempotencyKey,
|
||||
entry.CorrelationId,
|
||||
entry.OriginalJobId,
|
||||
initiatedBy,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Mark replay successful
|
||||
now = _timeProvider.GetUtcNow();
|
||||
var completed = replaying.CompleteReplay(newJob.JobId, initiatedBy, now);
|
||||
await _deadLetterRepository.UpdateAsync(completed, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Update audit record
|
||||
var completedAudit = auditRecord.Complete(newJob.JobId, now);
|
||||
await _auditRepository.UpdateAsync(completedAudit, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Replayed dead-letter entry {EntryId} as new job {NewJobId}",
|
||||
entry.EntryId, newJob.JobId);
|
||||
|
||||
// Notify on success
|
||||
await _notifier.NotifyReplaySuccessAsync(completed, newJob.JobId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return new ReplayResult(
|
||||
Success: true,
|
||||
NewJobId: newJob.JobId,
|
||||
ErrorMessage: null,
|
||||
UpdatedEntry: completed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to replay entry {EntryId}", entry.EntryId);
|
||||
|
||||
// Mark replay failed
|
||||
now = _timeProvider.GetUtcNow();
|
||||
var failed = replaying.FailReplay(ex.Message, initiatedBy, now);
|
||||
await _deadLetterRepository.UpdateAsync(failed, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Update audit record
|
||||
var failedAudit = auditRecord.Fail(ex.Message, now);
|
||||
await _auditRepository.UpdateAsync(failedAudit, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Notify on exhausted
|
||||
if (failed.Status == DeadLetterStatus.Exhausted)
|
||||
{
|
||||
await _notifier.NotifyExhaustedAsync(failed, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return new ReplayResult(
|
||||
Success: false,
|
||||
NewJobId: null,
|
||||
ErrorMessage: ex.Message,
|
||||
UpdatedEntry: failed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an artifact produced by a job execution.
|
||||
/// Artifacts are immutable outputs with content digests for provenance.
|
||||
/// </summary>
|
||||
public sealed record Artifact(
|
||||
/// <summary>Unique artifact identifier.</summary>
|
||||
Guid ArtifactId,
|
||||
|
||||
/// <summary>Tenant owning this artifact.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Job that produced this artifact.</summary>
|
||||
Guid JobId,
|
||||
|
||||
/// <summary>Run containing the producing job (if any).</summary>
|
||||
Guid? RunId,
|
||||
|
||||
/// <summary>Artifact type (e.g., "sbom", "scan-result", "attestation", "log").</summary>
|
||||
string ArtifactType,
|
||||
|
||||
/// <summary>Storage URI (e.g., "s3://bucket/path", "file:///local/path").</summary>
|
||||
string Uri,
|
||||
|
||||
/// <summary>Content digest (SHA-256) for integrity verification.</summary>
|
||||
string Digest,
|
||||
|
||||
/// <summary>MIME type (e.g., "application/json", "application/vnd.cyclonedx+json").</summary>
|
||||
string? MimeType,
|
||||
|
||||
/// <summary>Artifact size in bytes.</summary>
|
||||
long? SizeBytes,
|
||||
|
||||
/// <summary>When the artifact was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>Optional metadata JSON blob.</summary>
|
||||
string? Metadata);
|
||||
@@ -0,0 +1,250 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an immutable audit log entry for orchestrator operations.
|
||||
/// Captures who did what, when, and with what effect.
|
||||
/// </summary>
|
||||
public sealed record AuditEntry(
|
||||
/// <summary>Unique audit entry identifier.</summary>
|
||||
Guid EntryId,
|
||||
|
||||
/// <summary>Tenant owning this entry.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Type of audited event.</summary>
|
||||
AuditEventType EventType,
|
||||
|
||||
/// <summary>Resource type being audited (job, run, source, quota, etc.).</summary>
|
||||
string ResourceType,
|
||||
|
||||
/// <summary>Resource identifier being audited.</summary>
|
||||
Guid ResourceId,
|
||||
|
||||
/// <summary>Actor who performed the action.</summary>
|
||||
string ActorId,
|
||||
|
||||
/// <summary>Actor type (user, system, worker, api-key).</summary>
|
||||
ActorType ActorType,
|
||||
|
||||
/// <summary>IP address of the actor (if applicable).</summary>
|
||||
string? ActorIp,
|
||||
|
||||
/// <summary>User agent string (if applicable).</summary>
|
||||
string? UserAgent,
|
||||
|
||||
/// <summary>HTTP method used (if applicable).</summary>
|
||||
string? HttpMethod,
|
||||
|
||||
/// <summary>Request path (if applicable).</summary>
|
||||
string? RequestPath,
|
||||
|
||||
/// <summary>State before the change (JSON).</summary>
|
||||
string? OldState,
|
||||
|
||||
/// <summary>State after the change (JSON).</summary>
|
||||
string? NewState,
|
||||
|
||||
/// <summary>Human-readable description of the change.</summary>
|
||||
string Description,
|
||||
|
||||
/// <summary>Correlation ID for distributed tracing.</summary>
|
||||
string? CorrelationId,
|
||||
|
||||
/// <summary>SHA-256 hash of the previous entry for chain integrity.</summary>
|
||||
string? PreviousEntryHash,
|
||||
|
||||
/// <summary>SHA-256 hash of this entry's content for integrity.</summary>
|
||||
string ContentHash,
|
||||
|
||||
/// <summary>Sequence number within the tenant's audit stream.</summary>
|
||||
long SequenceNumber,
|
||||
|
||||
/// <summary>When the event occurred.</summary>
|
||||
DateTimeOffset OccurredAt,
|
||||
|
||||
/// <summary>Optional metadata JSON blob.</summary>
|
||||
string? Metadata)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new audit entry with computed hash.
|
||||
/// </summary>
|
||||
public static AuditEntry Create(
|
||||
string tenantId,
|
||||
AuditEventType eventType,
|
||||
string resourceType,
|
||||
Guid resourceId,
|
||||
string actorId,
|
||||
ActorType actorType,
|
||||
string description,
|
||||
string? oldState = null,
|
||||
string? newState = null,
|
||||
string? actorIp = null,
|
||||
string? userAgent = null,
|
||||
string? httpMethod = null,
|
||||
string? requestPath = null,
|
||||
string? correlationId = null,
|
||||
string? previousEntryHash = null,
|
||||
long sequenceNumber = 0,
|
||||
string? metadata = null)
|
||||
{
|
||||
var entryId = Guid.NewGuid();
|
||||
var occurredAt = DateTimeOffset.UtcNow;
|
||||
|
||||
// Compute content hash from entry data
|
||||
var contentToHash = $"{entryId}|{tenantId}|{eventType}|{resourceType}|{resourceId}|{actorId}|{actorType}|{description}|{oldState}|{newState}|{occurredAt:O}|{sequenceNumber}";
|
||||
var contentHash = ComputeSha256(contentToHash);
|
||||
|
||||
return new AuditEntry(
|
||||
EntryId: entryId,
|
||||
TenantId: tenantId,
|
||||
EventType: eventType,
|
||||
ResourceType: resourceType,
|
||||
ResourceId: resourceId,
|
||||
ActorId: actorId,
|
||||
ActorType: actorType,
|
||||
ActorIp: actorIp,
|
||||
UserAgent: userAgent,
|
||||
HttpMethod: httpMethod,
|
||||
RequestPath: requestPath,
|
||||
OldState: oldState,
|
||||
NewState: newState,
|
||||
Description: description,
|
||||
CorrelationId: correlationId,
|
||||
PreviousEntryHash: previousEntryHash,
|
||||
ContentHash: contentHash,
|
||||
SequenceNumber: sequenceNumber,
|
||||
OccurredAt: occurredAt,
|
||||
Metadata: metadata);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the integrity of this entry's content hash.
|
||||
/// </summary>
|
||||
public bool VerifyIntegrity()
|
||||
{
|
||||
var contentToHash = $"{EntryId}|{TenantId}|{EventType}|{ResourceType}|{ResourceId}|{ActorId}|{ActorType}|{Description}|{OldState}|{NewState}|{OccurredAt:O}|{SequenceNumber}";
|
||||
var computed = ComputeSha256(contentToHash);
|
||||
return string.Equals(ContentHash, computed, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the chain link to the previous entry.
|
||||
/// </summary>
|
||||
public bool VerifyChainLink(AuditEntry? previousEntry)
|
||||
{
|
||||
if (previousEntry is null)
|
||||
{
|
||||
return PreviousEntryHash is null || SequenceNumber == 1;
|
||||
}
|
||||
|
||||
return string.Equals(PreviousEntryHash, previousEntry.ContentHash, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static string ComputeSha256(string content)
|
||||
{
|
||||
var bytes = System.Text.Encoding.UTF8.GetBytes(content);
|
||||
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Types of auditable events in the orchestrator.
|
||||
/// </summary>
|
||||
public enum AuditEventType
|
||||
{
|
||||
// Job lifecycle events
|
||||
JobCreated = 100,
|
||||
JobScheduled = 101,
|
||||
JobLeased = 102,
|
||||
JobCompleted = 103,
|
||||
JobFailed = 104,
|
||||
JobCanceled = 105,
|
||||
JobRetried = 106,
|
||||
|
||||
// Run lifecycle events
|
||||
RunCreated = 200,
|
||||
RunStarted = 201,
|
||||
RunCompleted = 202,
|
||||
RunFailed = 203,
|
||||
RunCanceled = 204,
|
||||
|
||||
// Source management events
|
||||
SourceCreated = 300,
|
||||
SourceUpdated = 301,
|
||||
SourcePaused = 302,
|
||||
SourceResumed = 303,
|
||||
SourceDeleted = 304,
|
||||
|
||||
// Quota management events
|
||||
QuotaCreated = 400,
|
||||
QuotaUpdated = 401,
|
||||
QuotaPaused = 402,
|
||||
QuotaResumed = 403,
|
||||
QuotaDeleted = 404,
|
||||
|
||||
// SLO management events
|
||||
SloCreated = 500,
|
||||
SloUpdated = 501,
|
||||
SloEnabled = 502,
|
||||
SloDisabled = 503,
|
||||
SloDeleted = 504,
|
||||
SloAlertTriggered = 505,
|
||||
SloAlertAcknowledged = 506,
|
||||
SloAlertResolved = 507,
|
||||
|
||||
// Dead-letter events
|
||||
DeadLetterCreated = 600,
|
||||
DeadLetterReplayed = 601,
|
||||
DeadLetterResolved = 602,
|
||||
DeadLetterExpired = 603,
|
||||
|
||||
// Backfill events
|
||||
BackfillCreated = 700,
|
||||
BackfillStarted = 701,
|
||||
BackfillCompleted = 702,
|
||||
BackfillFailed = 703,
|
||||
BackfillCanceled = 704,
|
||||
|
||||
// Ledger events
|
||||
LedgerExportRequested = 800,
|
||||
LedgerExportCompleted = 801,
|
||||
LedgerExportFailed = 802,
|
||||
|
||||
// Worker events
|
||||
WorkerClaimed = 900,
|
||||
WorkerHeartbeat = 901,
|
||||
WorkerProgressReported = 902,
|
||||
WorkerCompleted = 903,
|
||||
|
||||
// Security events
|
||||
AuthenticationSuccess = 1000,
|
||||
AuthenticationFailure = 1001,
|
||||
AuthorizationDenied = 1002,
|
||||
ApiKeyCreated = 1003,
|
||||
ApiKeyRevoked = 1004
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Types of actors that can perform auditable actions.
|
||||
/// </summary>
|
||||
public enum ActorType
|
||||
{
|
||||
/// <summary>Human user via UI or API.</summary>
|
||||
User = 0,
|
||||
|
||||
/// <summary>System-initiated action (scheduler, background job).</summary>
|
||||
System = 1,
|
||||
|
||||
/// <summary>Worker process.</summary>
|
||||
Worker = 2,
|
||||
|
||||
/// <summary>API key authentication.</summary>
|
||||
ApiKey = 3,
|
||||
|
||||
/// <summary>Service-to-service call.</summary>
|
||||
Service = 4,
|
||||
|
||||
/// <summary>Unknown or unidentified actor.</summary>
|
||||
Unknown = 99
|
||||
}
|
||||
@@ -0,0 +1,429 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a request to backfill/reprocess events within a time window.
|
||||
/// </summary>
|
||||
public sealed record BackfillRequest(
|
||||
/// <summary>Unique backfill request identifier.</summary>
|
||||
Guid BackfillId,
|
||||
|
||||
/// <summary>Tenant this backfill applies to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Source to backfill (null if job-type scoped).</summary>
|
||||
Guid? SourceId,
|
||||
|
||||
/// <summary>Job type to backfill (null if source-scoped).</summary>
|
||||
string? JobType,
|
||||
|
||||
/// <summary>Normalized scope key.</summary>
|
||||
string ScopeKey,
|
||||
|
||||
/// <summary>Current status of the backfill.</summary>
|
||||
BackfillStatus Status,
|
||||
|
||||
/// <summary>Start of the time window to backfill (inclusive).</summary>
|
||||
DateTimeOffset WindowStart,
|
||||
|
||||
/// <summary>End of the time window to backfill (exclusive).</summary>
|
||||
DateTimeOffset WindowEnd,
|
||||
|
||||
/// <summary>Current processing position within the window.</summary>
|
||||
DateTimeOffset? CurrentPosition,
|
||||
|
||||
/// <summary>Total events estimated in the window.</summary>
|
||||
long? TotalEvents,
|
||||
|
||||
/// <summary>Events successfully processed.</summary>
|
||||
long ProcessedEvents,
|
||||
|
||||
/// <summary>Events skipped due to duplicate suppression.</summary>
|
||||
long SkippedEvents,
|
||||
|
||||
/// <summary>Events that failed processing.</summary>
|
||||
long FailedEvents,
|
||||
|
||||
/// <summary>Number of events to process per batch.</summary>
|
||||
int BatchSize,
|
||||
|
||||
/// <summary>Whether this is a dry-run (preview only, no changes).</summary>
|
||||
bool DryRun,
|
||||
|
||||
/// <summary>Whether to force reprocessing (ignore duplicate suppression).</summary>
|
||||
bool ForceReprocess,
|
||||
|
||||
/// <summary>Estimated duration for the backfill.</summary>
|
||||
TimeSpan? EstimatedDuration,
|
||||
|
||||
/// <summary>Maximum allowed duration (safety limit).</summary>
|
||||
TimeSpan? MaxDuration,
|
||||
|
||||
/// <summary>Results of safety validation checks.</summary>
|
||||
BackfillSafetyChecks? SafetyChecks,
|
||||
|
||||
/// <summary>Reason for the backfill request.</summary>
|
||||
string Reason,
|
||||
|
||||
/// <summary>Optional ticket reference for audit.</summary>
|
||||
string? Ticket,
|
||||
|
||||
/// <summary>When the request was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When processing started.</summary>
|
||||
DateTimeOffset? StartedAt,
|
||||
|
||||
/// <summary>When processing completed.</summary>
|
||||
DateTimeOffset? CompletedAt,
|
||||
|
||||
/// <summary>Actor who created the request.</summary>
|
||||
string CreatedBy,
|
||||
|
||||
/// <summary>Actor who last modified the request.</summary>
|
||||
string UpdatedBy,
|
||||
|
||||
/// <summary>Error message if failed.</summary>
|
||||
string? ErrorMessage)
|
||||
{
|
||||
/// <summary>
|
||||
/// Window duration.
|
||||
/// </summary>
|
||||
public TimeSpan WindowDuration => WindowEnd - WindowStart;
|
||||
|
||||
/// <summary>
|
||||
/// Progress percentage (0-100).
|
||||
/// </summary>
|
||||
public double ProgressPercent => TotalEvents > 0
|
||||
? Math.Round((double)(ProcessedEvents + SkippedEvents + FailedEvents) / TotalEvents.Value * 100, 2)
|
||||
: 0;
|
||||
|
||||
/// <summary>
|
||||
/// Whether the backfill is in a terminal state.
|
||||
/// </summary>
|
||||
public bool IsTerminal => Status is BackfillStatus.Completed or BackfillStatus.Failed or BackfillStatus.Canceled;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new backfill request.
|
||||
/// </summary>
|
||||
public static BackfillRequest Create(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
string reason,
|
||||
string createdBy,
|
||||
int batchSize = 100,
|
||||
bool dryRun = false,
|
||||
bool forceReprocess = false,
|
||||
string? ticket = null,
|
||||
TimeSpan? maxDuration = null)
|
||||
{
|
||||
if (windowEnd <= windowStart)
|
||||
throw new ArgumentException("Window end must be after window start.", nameof(windowEnd));
|
||||
|
||||
if (batchSize <= 0 || batchSize > 10000)
|
||||
throw new ArgumentOutOfRangeException(nameof(batchSize), "Batch size must be between 1 and 10000.");
|
||||
|
||||
var scopeKey = (sourceId, jobType) switch
|
||||
{
|
||||
(Guid s, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(s, j),
|
||||
(Guid s, _) => Watermark.CreateScopeKey(s),
|
||||
(_, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(j),
|
||||
_ => throw new ArgumentException("Either sourceId or jobType must be specified.")
|
||||
};
|
||||
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
return new BackfillRequest(
|
||||
BackfillId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
SourceId: sourceId,
|
||||
JobType: jobType,
|
||||
ScopeKey: scopeKey,
|
||||
Status: BackfillStatus.Pending,
|
||||
WindowStart: windowStart,
|
||||
WindowEnd: windowEnd,
|
||||
CurrentPosition: null,
|
||||
TotalEvents: null,
|
||||
ProcessedEvents: 0,
|
||||
SkippedEvents: 0,
|
||||
FailedEvents: 0,
|
||||
BatchSize: batchSize,
|
||||
DryRun: dryRun,
|
||||
ForceReprocess: forceReprocess,
|
||||
EstimatedDuration: null,
|
||||
MaxDuration: maxDuration,
|
||||
SafetyChecks: null,
|
||||
Reason: reason,
|
||||
Ticket: ticket,
|
||||
CreatedAt: now,
|
||||
StartedAt: null,
|
||||
CompletedAt: null,
|
||||
CreatedBy: createdBy,
|
||||
UpdatedBy: createdBy,
|
||||
ErrorMessage: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Transitions to validating status.
|
||||
/// </summary>
|
||||
public BackfillRequest StartValidation(string updatedBy)
|
||||
{
|
||||
if (Status != BackfillStatus.Pending)
|
||||
throw new InvalidOperationException($"Cannot start validation from status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = BackfillStatus.Validating,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records safety check results.
|
||||
/// </summary>
|
||||
public BackfillRequest WithSafetyChecks(BackfillSafetyChecks checks, long? totalEvents, TimeSpan? estimatedDuration, string updatedBy)
|
||||
{
|
||||
return this with
|
||||
{
|
||||
SafetyChecks = checks,
|
||||
TotalEvents = totalEvents,
|
||||
EstimatedDuration = estimatedDuration,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Transitions to running status.
|
||||
/// </summary>
|
||||
public BackfillRequest Start(string updatedBy)
|
||||
{
|
||||
if (Status != BackfillStatus.Validating)
|
||||
throw new InvalidOperationException($"Cannot start from status {Status}.");
|
||||
|
||||
if (SafetyChecks?.HasBlockingIssues == true)
|
||||
throw new InvalidOperationException("Cannot start backfill with blocking safety issues.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = BackfillStatus.Running,
|
||||
StartedAt = DateTimeOffset.UtcNow,
|
||||
CurrentPosition = WindowStart,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Updates progress after processing a batch.
|
||||
/// </summary>
|
||||
public BackfillRequest UpdateProgress(
|
||||
DateTimeOffset newPosition,
|
||||
long processed,
|
||||
long skipped,
|
||||
long failed,
|
||||
string updatedBy)
|
||||
{
|
||||
if (Status != BackfillStatus.Running)
|
||||
throw new InvalidOperationException($"Cannot update progress in status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
CurrentPosition = newPosition,
|
||||
ProcessedEvents = ProcessedEvents + processed,
|
||||
SkippedEvents = SkippedEvents + skipped,
|
||||
FailedEvents = FailedEvents + failed,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pauses the backfill.
|
||||
/// </summary>
|
||||
public BackfillRequest Pause(string updatedBy)
|
||||
{
|
||||
if (Status != BackfillStatus.Running)
|
||||
throw new InvalidOperationException($"Cannot pause from status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = BackfillStatus.Paused,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resumes a paused backfill.
|
||||
/// </summary>
|
||||
public BackfillRequest Resume(string updatedBy)
|
||||
{
|
||||
if (Status != BackfillStatus.Paused)
|
||||
throw new InvalidOperationException($"Cannot resume from status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = BackfillStatus.Running,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Completes the backfill successfully.
|
||||
/// </summary>
|
||||
public BackfillRequest Complete(string updatedBy)
|
||||
{
|
||||
if (Status != BackfillStatus.Running)
|
||||
throw new InvalidOperationException($"Cannot complete from status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = BackfillStatus.Completed,
|
||||
CompletedAt = DateTimeOffset.UtcNow,
|
||||
CurrentPosition = WindowEnd,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fails the backfill with an error.
|
||||
/// </summary>
|
||||
public BackfillRequest Fail(string error, string updatedBy)
|
||||
{
|
||||
return this with
|
||||
{
|
||||
Status = BackfillStatus.Failed,
|
||||
CompletedAt = DateTimeOffset.UtcNow,
|
||||
ErrorMessage = error,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Cancels the backfill.
|
||||
/// </summary>
|
||||
public BackfillRequest Cancel(string updatedBy)
|
||||
{
|
||||
if (IsTerminal)
|
||||
throw new InvalidOperationException($"Cannot cancel from terminal status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = BackfillStatus.Canceled,
|
||||
CompletedAt = DateTimeOffset.UtcNow,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Status of a backfill request.
|
||||
/// </summary>
|
||||
public enum BackfillStatus
|
||||
{
|
||||
/// <summary>Request created, awaiting validation.</summary>
|
||||
Pending,
|
||||
|
||||
/// <summary>Running safety validations.</summary>
|
||||
Validating,
|
||||
|
||||
/// <summary>Actively processing events.</summary>
|
||||
Running,
|
||||
|
||||
/// <summary>Temporarily paused.</summary>
|
||||
Paused,
|
||||
|
||||
/// <summary>Successfully completed.</summary>
|
||||
Completed,
|
||||
|
||||
/// <summary>Failed with error.</summary>
|
||||
Failed,
|
||||
|
||||
/// <summary>Canceled by operator.</summary>
|
||||
Canceled
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Results of backfill safety validation checks.
|
||||
/// </summary>
|
||||
public sealed record BackfillSafetyChecks(
|
||||
/// <summary>Whether the source exists and is accessible.</summary>
|
||||
bool SourceExists,
|
||||
|
||||
/// <summary>Whether there are overlapping active backfills.</summary>
|
||||
bool HasOverlappingBackfill,
|
||||
|
||||
/// <summary>Whether the window is within retention period.</summary>
|
||||
bool WithinRetention,
|
||||
|
||||
/// <summary>Whether the estimated event count is within limits.</summary>
|
||||
bool WithinEventLimit,
|
||||
|
||||
/// <summary>Whether estimated duration is within max duration.</summary>
|
||||
bool WithinDurationLimit,
|
||||
|
||||
/// <summary>Whether required quotas are available.</summary>
|
||||
bool QuotaAvailable,
|
||||
|
||||
/// <summary>Warning messages (non-blocking).</summary>
|
||||
IReadOnlyList<string> Warnings,
|
||||
|
||||
/// <summary>Error messages (blocking).</summary>
|
||||
IReadOnlyList<string> Errors)
|
||||
{
|
||||
/// <summary>
|
||||
/// Whether there are any blocking issues.
|
||||
/// </summary>
|
||||
public bool HasBlockingIssues => !SourceExists || HasOverlappingBackfill || !WithinRetention
|
||||
|| !WithinEventLimit || !WithinDurationLimit || Errors.Count > 0;
|
||||
|
||||
/// <summary>
|
||||
/// Whether the backfill is safe to proceed.
|
||||
/// </summary>
|
||||
public bool IsSafe => !HasBlockingIssues;
|
||||
|
||||
/// <summary>
|
||||
/// Creates successful safety checks with no issues.
|
||||
/// </summary>
|
||||
public static BackfillSafetyChecks AllPassed() => new(
|
||||
SourceExists: true,
|
||||
HasOverlappingBackfill: false,
|
||||
WithinRetention: true,
|
||||
WithinEventLimit: true,
|
||||
WithinDurationLimit: true,
|
||||
QuotaAvailable: true,
|
||||
Warnings: [],
|
||||
Errors: []);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Preview result for dry-run backfill.
|
||||
/// </summary>
|
||||
public sealed record BackfillPreview(
|
||||
/// <summary>Scope being backfilled.</summary>
|
||||
string ScopeKey,
|
||||
|
||||
/// <summary>Time window for backfill.</summary>
|
||||
DateTimeOffset WindowStart,
|
||||
|
||||
/// <summary>Time window for backfill.</summary>
|
||||
DateTimeOffset WindowEnd,
|
||||
|
||||
/// <summary>Estimated total events in window.</summary>
|
||||
long EstimatedEvents,
|
||||
|
||||
/// <summary>Events that would be skipped (already processed).</summary>
|
||||
long SkippedEvents,
|
||||
|
||||
/// <summary>Events that would be processed.</summary>
|
||||
long ProcessableEvents,
|
||||
|
||||
/// <summary>Estimated duration.</summary>
|
||||
TimeSpan EstimatedDuration,
|
||||
|
||||
/// <summary>Number of batches required.</summary>
|
||||
int EstimatedBatches,
|
||||
|
||||
/// <summary>Safety validation results.</summary>
|
||||
BackfillSafetyChecks SafetyChecks,
|
||||
|
||||
/// <summary>Sample of event keys that would be processed.</summary>
|
||||
IReadOnlyList<string> SampleEventKeys);
|
||||
@@ -0,0 +1,42 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a dependency edge in a job DAG (Directed Acyclic Graph).
|
||||
/// The child job cannot start until the parent job succeeds.
|
||||
/// </summary>
|
||||
public sealed record DagEdge(
|
||||
/// <summary>Unique edge identifier.</summary>
|
||||
Guid EdgeId,
|
||||
|
||||
/// <summary>Tenant owning this edge.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Run containing these jobs.</summary>
|
||||
Guid RunId,
|
||||
|
||||
/// <summary>Parent job ID (must complete first).</summary>
|
||||
Guid ParentJobId,
|
||||
|
||||
/// <summary>Child job ID (depends on parent).</summary>
|
||||
Guid ChildJobId,
|
||||
|
||||
/// <summary>Edge type (e.g., "success", "always", "failure").</summary>
|
||||
string EdgeType,
|
||||
|
||||
/// <summary>When this edge was created.</summary>
|
||||
DateTimeOffset CreatedAt);
|
||||
|
||||
/// <summary>
|
||||
/// Edge types defining dependency semantics.
|
||||
/// </summary>
|
||||
public static class DagEdgeTypes
|
||||
{
|
||||
/// <summary>Child runs only if parent succeeds.</summary>
|
||||
public const string Success = "success";
|
||||
|
||||
/// <summary>Child runs regardless of parent outcome.</summary>
|
||||
public const string Always = "always";
|
||||
|
||||
/// <summary>Child runs only if parent fails.</summary>
|
||||
public const string Failure = "failure";
|
||||
}
|
||||
@@ -0,0 +1,292 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a job that has been moved to the dead-letter store after exhausting retries
|
||||
/// or encountering a non-retryable error.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterEntry(
|
||||
/// <summary>Unique dead-letter entry identifier.</summary>
|
||||
Guid EntryId,
|
||||
|
||||
/// <summary>Tenant owning this entry.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Original job that failed.</summary>
|
||||
Guid OriginalJobId,
|
||||
|
||||
/// <summary>Run the job belonged to (if any).</summary>
|
||||
Guid? RunId,
|
||||
|
||||
/// <summary>Source the job was processing (if any).</summary>
|
||||
Guid? SourceId,
|
||||
|
||||
/// <summary>Job type (e.g., "scan.image", "advisory.nvd").</summary>
|
||||
string JobType,
|
||||
|
||||
/// <summary>Job payload JSON (inputs, parameters).</summary>
|
||||
string Payload,
|
||||
|
||||
/// <summary>SHA-256 digest of the payload.</summary>
|
||||
string PayloadDigest,
|
||||
|
||||
/// <summary>Idempotency key from original job.</summary>
|
||||
string IdempotencyKey,
|
||||
|
||||
/// <summary>Correlation ID for distributed tracing.</summary>
|
||||
string? CorrelationId,
|
||||
|
||||
/// <summary>Current entry status.</summary>
|
||||
DeadLetterStatus Status,
|
||||
|
||||
/// <summary>Classified error code.</summary>
|
||||
string ErrorCode,
|
||||
|
||||
/// <summary>Human-readable failure reason.</summary>
|
||||
string FailureReason,
|
||||
|
||||
/// <summary>Suggested remediation hint for operators.</summary>
|
||||
string? RemediationHint,
|
||||
|
||||
/// <summary>Error classification category.</summary>
|
||||
ErrorCategory Category,
|
||||
|
||||
/// <summary>Whether this error is potentially retryable.</summary>
|
||||
bool IsRetryable,
|
||||
|
||||
/// <summary>Number of attempts made by original job.</summary>
|
||||
int OriginalAttempts,
|
||||
|
||||
/// <summary>Number of replay attempts from dead-letter.</summary>
|
||||
int ReplayAttempts,
|
||||
|
||||
/// <summary>Maximum replay attempts allowed.</summary>
|
||||
int MaxReplayAttempts,
|
||||
|
||||
/// <summary>When the job originally failed.</summary>
|
||||
DateTimeOffset FailedAt,
|
||||
|
||||
/// <summary>When the entry was created in dead-letter store.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the entry was last updated.</summary>
|
||||
DateTimeOffset UpdatedAt,
|
||||
|
||||
/// <summary>When the entry expires and can be purged.</summary>
|
||||
DateTimeOffset ExpiresAt,
|
||||
|
||||
/// <summary>When the entry was resolved (if applicable).</summary>
|
||||
DateTimeOffset? ResolvedAt,
|
||||
|
||||
/// <summary>Resolution notes (if resolved).</summary>
|
||||
string? ResolutionNotes,
|
||||
|
||||
/// <summary>Actor who created/submitted the original job.</summary>
|
||||
string CreatedBy,
|
||||
|
||||
/// <summary>Actor who last updated the entry.</summary>
|
||||
string UpdatedBy)
|
||||
{
|
||||
/// <summary>Default retention period for dead-letter entries.</summary>
|
||||
public static readonly TimeSpan DefaultRetention = TimeSpan.FromDays(30);
|
||||
|
||||
/// <summary>Default maximum replay attempts.</summary>
|
||||
public const int DefaultMaxReplayAttempts = 3;
|
||||
|
||||
/// <summary>Whether this entry is in a terminal state.</summary>
|
||||
public bool IsTerminal => Status is DeadLetterStatus.Replayed
|
||||
or DeadLetterStatus.Resolved
|
||||
or DeadLetterStatus.Exhausted
|
||||
or DeadLetterStatus.Expired;
|
||||
|
||||
/// <summary>Whether more replay attempts are allowed.</summary>
|
||||
public bool CanReplay => !IsTerminal && IsRetryable && ReplayAttempts < MaxReplayAttempts;
|
||||
|
||||
/// <summary>Creates a new dead-letter entry from a failed job.</summary>
|
||||
public static DeadLetterEntry FromFailedJob(
|
||||
Job job,
|
||||
string errorCode,
|
||||
string failureReason,
|
||||
string? remediationHint,
|
||||
ErrorCategory category,
|
||||
bool isRetryable,
|
||||
DateTimeOffset now,
|
||||
TimeSpan? retention = null,
|
||||
int? maxReplayAttempts = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(job);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(errorCode);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(failureReason);
|
||||
|
||||
var effectiveRetention = retention ?? DefaultRetention;
|
||||
var effectiveMaxReplays = maxReplayAttempts ?? DefaultMaxReplayAttempts;
|
||||
|
||||
return new DeadLetterEntry(
|
||||
EntryId: Guid.NewGuid(),
|
||||
TenantId: job.TenantId,
|
||||
OriginalJobId: job.JobId,
|
||||
RunId: job.RunId,
|
||||
SourceId: null, // Would be extracted from payload if available
|
||||
JobType: job.JobType,
|
||||
Payload: job.Payload,
|
||||
PayloadDigest: job.PayloadDigest,
|
||||
IdempotencyKey: job.IdempotencyKey,
|
||||
CorrelationId: job.CorrelationId,
|
||||
Status: DeadLetterStatus.Pending,
|
||||
ErrorCode: errorCode,
|
||||
FailureReason: failureReason,
|
||||
RemediationHint: remediationHint,
|
||||
Category: category,
|
||||
IsRetryable: isRetryable,
|
||||
OriginalAttempts: job.Attempt,
|
||||
ReplayAttempts: 0,
|
||||
MaxReplayAttempts: effectiveMaxReplays,
|
||||
FailedAt: job.CompletedAt ?? now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
ExpiresAt: now.Add(effectiveRetention),
|
||||
ResolvedAt: null,
|
||||
ResolutionNotes: null,
|
||||
CreatedBy: job.CreatedBy,
|
||||
UpdatedBy: "system");
|
||||
}
|
||||
|
||||
/// <summary>Marks entry as being replayed.</summary>
|
||||
public DeadLetterEntry StartReplay(string updatedBy, DateTimeOffset now)
|
||||
{
|
||||
if (!CanReplay)
|
||||
throw new InvalidOperationException($"Cannot replay entry in status {Status} with {ReplayAttempts}/{MaxReplayAttempts} attempts.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = DeadLetterStatus.Replaying,
|
||||
ReplayAttempts = ReplayAttempts + 1,
|
||||
UpdatedAt = now,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Marks entry as successfully replayed.</summary>
|
||||
public DeadLetterEntry CompleteReplay(Guid newJobId, string updatedBy, DateTimeOffset now)
|
||||
{
|
||||
if (Status != DeadLetterStatus.Replaying)
|
||||
throw new InvalidOperationException($"Cannot complete replay from status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = DeadLetterStatus.Replayed,
|
||||
ResolvedAt = now,
|
||||
ResolutionNotes = $"Replayed as job {newJobId}",
|
||||
UpdatedAt = now,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Marks replay as failed.</summary>
|
||||
public DeadLetterEntry FailReplay(string reason, string updatedBy, DateTimeOffset now)
|
||||
{
|
||||
if (Status != DeadLetterStatus.Replaying)
|
||||
throw new InvalidOperationException($"Cannot fail replay from status {Status}.");
|
||||
|
||||
var newStatus = ReplayAttempts >= MaxReplayAttempts
|
||||
? DeadLetterStatus.Exhausted
|
||||
: DeadLetterStatus.Pending;
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = newStatus,
|
||||
FailureReason = reason,
|
||||
UpdatedAt = now,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Manually resolves the entry without replay.</summary>
|
||||
public DeadLetterEntry Resolve(string notes, string updatedBy, DateTimeOffset now)
|
||||
{
|
||||
if (IsTerminal)
|
||||
throw new InvalidOperationException($"Cannot resolve entry in terminal status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = DeadLetterStatus.Resolved,
|
||||
ResolvedAt = now,
|
||||
ResolutionNotes = notes,
|
||||
UpdatedAt = now,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Marks entry as expired for cleanup.</summary>
|
||||
public DeadLetterEntry MarkExpired(DateTimeOffset now)
|
||||
{
|
||||
if (IsTerminal)
|
||||
throw new InvalidOperationException($"Cannot expire entry in terminal status {Status}.");
|
||||
|
||||
return this with
|
||||
{
|
||||
Status = DeadLetterStatus.Expired,
|
||||
UpdatedAt = now,
|
||||
UpdatedBy = "system"
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Dead-letter entry lifecycle states.
|
||||
/// </summary>
|
||||
public enum DeadLetterStatus
|
||||
{
|
||||
/// <summary>Entry awaiting operator action or replay.</summary>
|
||||
Pending = 0,
|
||||
|
||||
/// <summary>Entry currently being replayed.</summary>
|
||||
Replaying = 1,
|
||||
|
||||
/// <summary>Entry successfully replayed as a new job.</summary>
|
||||
Replayed = 2,
|
||||
|
||||
/// <summary>Entry manually resolved without replay.</summary>
|
||||
Resolved = 3,
|
||||
|
||||
/// <summary>Entry exhausted all replay attempts.</summary>
|
||||
Exhausted = 4,
|
||||
|
||||
/// <summary>Entry expired and eligible for purge.</summary>
|
||||
Expired = 5
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Error classification categories for dead-letter entries.
|
||||
/// </summary>
|
||||
public enum ErrorCategory
|
||||
{
|
||||
/// <summary>Unknown or unclassified error.</summary>
|
||||
Unknown = 0,
|
||||
|
||||
/// <summary>Transient infrastructure error (network, timeout).</summary>
|
||||
Transient = 1,
|
||||
|
||||
/// <summary>Resource not found (image, source, etc.).</summary>
|
||||
NotFound = 2,
|
||||
|
||||
/// <summary>Authentication or authorization failure.</summary>
|
||||
AuthFailure = 3,
|
||||
|
||||
/// <summary>Rate limiting or quota exceeded.</summary>
|
||||
RateLimited = 4,
|
||||
|
||||
/// <summary>Invalid input or configuration.</summary>
|
||||
ValidationError = 5,
|
||||
|
||||
/// <summary>Upstream service error (registry, advisory feed).</summary>
|
||||
UpstreamError = 6,
|
||||
|
||||
/// <summary>Internal processing error (bug, corruption).</summary>
|
||||
InternalError = 7,
|
||||
|
||||
/// <summary>Resource conflict (duplicate, version mismatch).</summary>
|
||||
Conflict = 8,
|
||||
|
||||
/// <summary>Operation canceled by user or system.</summary>
|
||||
Canceled = 9
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an operational incident triggered by threshold breaches.
|
||||
/// Incidents are generated when failure rates exceed configured limits.
|
||||
/// </summary>
|
||||
public sealed record Incident(
|
||||
/// <summary>Unique incident identifier.</summary>
|
||||
Guid IncidentId,
|
||||
|
||||
/// <summary>Tenant affected by this incident.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Incident type (e.g., "failure_rate", "quota_exhausted", "circuit_open").</summary>
|
||||
string IncidentType,
|
||||
|
||||
/// <summary>Incident severity (e.g., "warning", "critical").</summary>
|
||||
string Severity,
|
||||
|
||||
/// <summary>Affected job type (if applicable).</summary>
|
||||
string? JobType,
|
||||
|
||||
/// <summary>Affected source (if applicable).</summary>
|
||||
Guid? SourceId,
|
||||
|
||||
/// <summary>Human-readable incident title.</summary>
|
||||
string Title,
|
||||
|
||||
/// <summary>Detailed incident description.</summary>
|
||||
string Description,
|
||||
|
||||
/// <summary>Current incident status.</summary>
|
||||
IncidentStatus Status,
|
||||
|
||||
/// <summary>When the incident was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the incident was acknowledged.</summary>
|
||||
DateTimeOffset? AcknowledgedAt,
|
||||
|
||||
/// <summary>Actor who acknowledged the incident.</summary>
|
||||
string? AcknowledgedBy,
|
||||
|
||||
/// <summary>When the incident was resolved.</summary>
|
||||
DateTimeOffset? ResolvedAt,
|
||||
|
||||
/// <summary>Actor who resolved the incident.</summary>
|
||||
string? ResolvedBy,
|
||||
|
||||
/// <summary>Resolution notes.</summary>
|
||||
string? ResolutionNotes,
|
||||
|
||||
/// <summary>Optional metadata JSON blob.</summary>
|
||||
string? Metadata);
|
||||
|
||||
/// <summary>
|
||||
/// Incident lifecycle states.
|
||||
/// </summary>
|
||||
public enum IncidentStatus
|
||||
{
|
||||
/// <summary>Incident is open and unacknowledged.</summary>
|
||||
Open = 0,
|
||||
|
||||
/// <summary>Incident acknowledged by operator.</summary>
|
||||
Acknowledged = 1,
|
||||
|
||||
/// <summary>Incident resolved.</summary>
|
||||
Resolved = 2
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a unit of work to be executed by a worker.
|
||||
/// Jobs are scheduled, leased to workers, and tracked through completion.
|
||||
/// </summary>
|
||||
public sealed record Job(
|
||||
/// <summary>Unique job identifier.</summary>
|
||||
Guid JobId,
|
||||
|
||||
/// <summary>Tenant owning this job.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Optional project scope within tenant.</summary>
|
||||
string? ProjectId,
|
||||
|
||||
/// <summary>Run this job belongs to (if any).</summary>
|
||||
Guid? RunId,
|
||||
|
||||
/// <summary>Job type (e.g., "scan.image", "advisory.nvd", "export.sbom").</summary>
|
||||
string JobType,
|
||||
|
||||
/// <summary>Current job status.</summary>
|
||||
JobStatus Status,
|
||||
|
||||
/// <summary>Priority (higher = more urgent). Default 0.</summary>
|
||||
int Priority,
|
||||
|
||||
/// <summary>Current attempt number (1-based).</summary>
|
||||
int Attempt,
|
||||
|
||||
/// <summary>Maximum retry attempts.</summary>
|
||||
int MaxAttempts,
|
||||
|
||||
/// <summary>SHA-256 digest of the payload for determinism verification.</summary>
|
||||
string PayloadDigest,
|
||||
|
||||
/// <summary>Job payload JSON (inputs, parameters).</summary>
|
||||
string Payload,
|
||||
|
||||
/// <summary>Idempotency key for deduplication.</summary>
|
||||
string IdempotencyKey,
|
||||
|
||||
/// <summary>Correlation ID for distributed tracing.</summary>
|
||||
string? CorrelationId,
|
||||
|
||||
/// <summary>Current lease ID (if leased).</summary>
|
||||
Guid? LeaseId,
|
||||
|
||||
/// <summary>Worker holding the lease (if leased).</summary>
|
||||
string? WorkerId,
|
||||
|
||||
/// <summary>Task runner ID executing the job (if applicable).</summary>
|
||||
string? TaskRunnerId,
|
||||
|
||||
/// <summary>Lease expiration time.</summary>
|
||||
DateTimeOffset? LeaseUntil,
|
||||
|
||||
/// <summary>When the job was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the job was scheduled (quota cleared).</summary>
|
||||
DateTimeOffset? ScheduledAt,
|
||||
|
||||
/// <summary>When the job was leased to a worker.</summary>
|
||||
DateTimeOffset? LeasedAt,
|
||||
|
||||
/// <summary>When the job completed (terminal state).</summary>
|
||||
DateTimeOffset? CompletedAt,
|
||||
|
||||
/// <summary>Earliest time the job can be scheduled (for backoff).</summary>
|
||||
DateTimeOffset? NotBefore,
|
||||
|
||||
/// <summary>Terminal status reason (failure message, cancel reason, etc.).</summary>
|
||||
string? Reason,
|
||||
|
||||
/// <summary>ID of the original job if this is a replay.</summary>
|
||||
Guid? ReplayOf,
|
||||
|
||||
/// <summary>Actor who created/submitted the job.</summary>
|
||||
string CreatedBy);
|
||||
@@ -0,0 +1,48 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an immutable history entry for job state changes.
|
||||
/// Provides audit trail for all job lifecycle transitions.
|
||||
/// </summary>
|
||||
public sealed record JobHistory(
|
||||
/// <summary>Unique history entry identifier.</summary>
|
||||
Guid HistoryId,
|
||||
|
||||
/// <summary>Tenant owning this entry.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Job this history entry belongs to.</summary>
|
||||
Guid JobId,
|
||||
|
||||
/// <summary>Sequence number within the job's history (1-based).</summary>
|
||||
int SequenceNo,
|
||||
|
||||
/// <summary>Previous job status.</summary>
|
||||
JobStatus? FromStatus,
|
||||
|
||||
/// <summary>New job status.</summary>
|
||||
JobStatus ToStatus,
|
||||
|
||||
/// <summary>Attempt number at time of transition.</summary>
|
||||
int Attempt,
|
||||
|
||||
/// <summary>Lease ID (if applicable).</summary>
|
||||
Guid? LeaseId,
|
||||
|
||||
/// <summary>Worker ID (if applicable).</summary>
|
||||
string? WorkerId,
|
||||
|
||||
/// <summary>Reason for the transition.</summary>
|
||||
string? Reason,
|
||||
|
||||
/// <summary>When this transition occurred.</summary>
|
||||
DateTimeOffset OccurredAt,
|
||||
|
||||
/// <summary>When this entry was recorded.</summary>
|
||||
DateTimeOffset RecordedAt,
|
||||
|
||||
/// <summary>Actor who caused this transition.</summary>
|
||||
string ActorId,
|
||||
|
||||
/// <summary>Actor type (system, operator, worker).</summary>
|
||||
string ActorType);
|
||||
@@ -0,0 +1,30 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Job lifecycle states. Transitions follow the state machine:
|
||||
/// Pending → Scheduled → Leased → (Succeeded | Failed | Canceled | TimedOut)
|
||||
/// Failed jobs may transition to Pending via replay.
|
||||
/// </summary>
|
||||
public enum JobStatus
|
||||
{
|
||||
/// <summary>Job enqueued but not yet scheduled (e.g., quota exceeded).</summary>
|
||||
Pending = 0,
|
||||
|
||||
/// <summary>Job scheduled and awaiting worker lease.</summary>
|
||||
Scheduled = 1,
|
||||
|
||||
/// <summary>Job leased to a worker for execution.</summary>
|
||||
Leased = 2,
|
||||
|
||||
/// <summary>Job completed successfully.</summary>
|
||||
Succeeded = 3,
|
||||
|
||||
/// <summary>Job failed after exhausting retries.</summary>
|
||||
Failed = 4,
|
||||
|
||||
/// <summary>Job canceled by operator or system.</summary>
|
||||
Canceled = 5,
|
||||
|
||||
/// <summary>Job timed out (lease expired without completion).</summary>
|
||||
TimedOut = 6
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents rate-limit and concurrency quotas for job scheduling.
|
||||
/// Quotas are scoped to tenant and optionally job type.
|
||||
/// </summary>
|
||||
public sealed record Quota(
|
||||
/// <summary>Unique quota identifier.</summary>
|
||||
Guid QuotaId,
|
||||
|
||||
/// <summary>Tenant this quota applies to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Job type this quota applies to (null = all job types).</summary>
|
||||
string? JobType,
|
||||
|
||||
/// <summary>Maximum concurrent active (leased) jobs.</summary>
|
||||
int MaxActive,
|
||||
|
||||
/// <summary>Maximum jobs per hour (sliding window).</summary>
|
||||
int MaxPerHour,
|
||||
|
||||
/// <summary>Burst capacity for token bucket.</summary>
|
||||
int BurstCapacity,
|
||||
|
||||
/// <summary>Token refill rate (tokens per second).</summary>
|
||||
double RefillRate,
|
||||
|
||||
/// <summary>Current available tokens.</summary>
|
||||
double CurrentTokens,
|
||||
|
||||
/// <summary>Last time tokens were refilled.</summary>
|
||||
DateTimeOffset LastRefillAt,
|
||||
|
||||
/// <summary>Current count of active (leased) jobs.</summary>
|
||||
int CurrentActive,
|
||||
|
||||
/// <summary>Jobs scheduled in current hour window.</summary>
|
||||
int CurrentHourCount,
|
||||
|
||||
/// <summary>Start of current hour window.</summary>
|
||||
DateTimeOffset CurrentHourStart,
|
||||
|
||||
/// <summary>Whether this quota is currently paused (operator override).</summary>
|
||||
bool Paused,
|
||||
|
||||
/// <summary>Operator-provided reason for pause.</summary>
|
||||
string? PauseReason,
|
||||
|
||||
/// <summary>Ticket reference for quota change audit.</summary>
|
||||
string? QuotaTicket,
|
||||
|
||||
/// <summary>When the quota was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the quota was last updated.</summary>
|
||||
DateTimeOffset UpdatedAt,
|
||||
|
||||
/// <summary>Actor who last modified the quota.</summary>
|
||||
string UpdatedBy);
|
||||
@@ -0,0 +1,78 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a run (batch/workflow execution) containing multiple jobs.
|
||||
/// Runs group related jobs (e.g., scanning an image produces multiple analyzer jobs).
|
||||
/// </summary>
|
||||
public sealed record Run(
|
||||
/// <summary>Unique run identifier.</summary>
|
||||
Guid RunId,
|
||||
|
||||
/// <summary>Tenant owning this run.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Optional project scope within tenant.</summary>
|
||||
string? ProjectId,
|
||||
|
||||
/// <summary>Source that initiated this run.</summary>
|
||||
Guid SourceId,
|
||||
|
||||
/// <summary>Run type (e.g., "scan", "advisory-sync", "export").</summary>
|
||||
string RunType,
|
||||
|
||||
/// <summary>Current aggregate status of the run.</summary>
|
||||
RunStatus Status,
|
||||
|
||||
/// <summary>Correlation ID for distributed tracing.</summary>
|
||||
string? CorrelationId,
|
||||
|
||||
/// <summary>Total number of jobs in this run.</summary>
|
||||
int TotalJobs,
|
||||
|
||||
/// <summary>Number of completed jobs (succeeded + failed + canceled).</summary>
|
||||
int CompletedJobs,
|
||||
|
||||
/// <summary>Number of succeeded jobs.</summary>
|
||||
int SucceededJobs,
|
||||
|
||||
/// <summary>Number of failed jobs.</summary>
|
||||
int FailedJobs,
|
||||
|
||||
/// <summary>When the run was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the run started executing (first job leased).</summary>
|
||||
DateTimeOffset? StartedAt,
|
||||
|
||||
/// <summary>When the run completed (all jobs terminal).</summary>
|
||||
DateTimeOffset? CompletedAt,
|
||||
|
||||
/// <summary>Actor who initiated the run.</summary>
|
||||
string CreatedBy,
|
||||
|
||||
/// <summary>Optional metadata JSON blob.</summary>
|
||||
string? Metadata);
|
||||
|
||||
/// <summary>
|
||||
/// Run lifecycle states.
|
||||
/// </summary>
|
||||
public enum RunStatus
|
||||
{
|
||||
/// <summary>Run created, jobs being enqueued.</summary>
|
||||
Pending = 0,
|
||||
|
||||
/// <summary>Run is executing (at least one job leased).</summary>
|
||||
Running = 1,
|
||||
|
||||
/// <summary>All jobs completed successfully.</summary>
|
||||
Succeeded = 2,
|
||||
|
||||
/// <summary>Run completed with some failures.</summary>
|
||||
PartiallySucceeded = 3,
|
||||
|
||||
/// <summary>All jobs failed.</summary>
|
||||
Failed = 4,
|
||||
|
||||
/// <summary>Run canceled by operator.</summary>
|
||||
Canceled = 5
|
||||
}
|
||||
@@ -0,0 +1,341 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Immutable ledger entry for run execution records.
|
||||
/// Provides a tamper-evident history of run outcomes with provenance to artifacts.
|
||||
/// </summary>
|
||||
public sealed record RunLedgerEntry(
|
||||
/// <summary>Unique ledger entry identifier.</summary>
|
||||
Guid LedgerId,
|
||||
|
||||
/// <summary>Tenant owning this entry.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Run this entry records.</summary>
|
||||
Guid RunId,
|
||||
|
||||
/// <summary>Source that initiated the run.</summary>
|
||||
Guid SourceId,
|
||||
|
||||
/// <summary>Run type (scan, advisory-sync, export).</summary>
|
||||
string RunType,
|
||||
|
||||
/// <summary>Final run status.</summary>
|
||||
RunStatus FinalStatus,
|
||||
|
||||
/// <summary>Total jobs in the run.</summary>
|
||||
int TotalJobs,
|
||||
|
||||
/// <summary>Successfully completed jobs.</summary>
|
||||
int SucceededJobs,
|
||||
|
||||
/// <summary>Failed jobs.</summary>
|
||||
int FailedJobs,
|
||||
|
||||
/// <summary>When the run was created.</summary>
|
||||
DateTimeOffset RunCreatedAt,
|
||||
|
||||
/// <summary>When the run started executing.</summary>
|
||||
DateTimeOffset? RunStartedAt,
|
||||
|
||||
/// <summary>When the run completed.</summary>
|
||||
DateTimeOffset RunCompletedAt,
|
||||
|
||||
/// <summary>Total execution duration.</summary>
|
||||
TimeSpan ExecutionDuration,
|
||||
|
||||
/// <summary>Actor who initiated the run.</summary>
|
||||
string InitiatedBy,
|
||||
|
||||
/// <summary>SHA-256 digest of the run's input payload.</summary>
|
||||
string InputDigest,
|
||||
|
||||
/// <summary>Aggregated SHA-256 digest of all outputs.</summary>
|
||||
string OutputDigest,
|
||||
|
||||
/// <summary>JSON array of artifact references with their digests.</summary>
|
||||
string ArtifactManifest,
|
||||
|
||||
/// <summary>Sequence number in the tenant's ledger.</summary>
|
||||
long SequenceNumber,
|
||||
|
||||
/// <summary>SHA-256 hash of the previous ledger entry.</summary>
|
||||
string? PreviousEntryHash,
|
||||
|
||||
/// <summary>SHA-256 hash of this entry's content.</summary>
|
||||
string ContentHash,
|
||||
|
||||
/// <summary>When this ledger entry was created.</summary>
|
||||
DateTimeOffset LedgerCreatedAt,
|
||||
|
||||
/// <summary>Correlation ID for tracing.</summary>
|
||||
string? CorrelationId,
|
||||
|
||||
/// <summary>Optional metadata JSON.</summary>
|
||||
string? Metadata)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a ledger entry from a completed run.
|
||||
/// </summary>
|
||||
public static RunLedgerEntry FromCompletedRun(
|
||||
Run run,
|
||||
IReadOnlyList<Artifact> artifacts,
|
||||
string inputDigest,
|
||||
long sequenceNumber,
|
||||
string? previousEntryHash,
|
||||
string? metadata = null)
|
||||
{
|
||||
if (run.CompletedAt is null)
|
||||
{
|
||||
throw new InvalidOperationException("Cannot create ledger entry from an incomplete run.");
|
||||
}
|
||||
|
||||
var ledgerId = Guid.NewGuid();
|
||||
var ledgerCreatedAt = DateTimeOffset.UtcNow;
|
||||
|
||||
// Build artifact manifest
|
||||
var artifactManifest = BuildArtifactManifest(artifacts);
|
||||
|
||||
// Compute output digest from all artifact digests
|
||||
var outputDigest = ComputeOutputDigest(artifacts);
|
||||
|
||||
// Compute execution duration
|
||||
var startTime = run.StartedAt ?? run.CreatedAt;
|
||||
var executionDuration = run.CompletedAt.Value - startTime;
|
||||
|
||||
// Compute content hash for tamper evidence
|
||||
var contentToHash = $"{ledgerId}|{run.TenantId}|{run.RunId}|{run.SourceId}|{run.RunType}|{run.Status}|{run.TotalJobs}|{run.SucceededJobs}|{run.FailedJobs}|{run.CreatedAt:O}|{run.StartedAt:O}|{run.CompletedAt:O}|{inputDigest}|{outputDigest}|{sequenceNumber}|{previousEntryHash}|{ledgerCreatedAt:O}";
|
||||
var contentHash = ComputeSha256(contentToHash);
|
||||
|
||||
return new RunLedgerEntry(
|
||||
LedgerId: ledgerId,
|
||||
TenantId: run.TenantId,
|
||||
RunId: run.RunId,
|
||||
SourceId: run.SourceId,
|
||||
RunType: run.RunType,
|
||||
FinalStatus: run.Status,
|
||||
TotalJobs: run.TotalJobs,
|
||||
SucceededJobs: run.SucceededJobs,
|
||||
FailedJobs: run.FailedJobs,
|
||||
RunCreatedAt: run.CreatedAt,
|
||||
RunStartedAt: run.StartedAt,
|
||||
RunCompletedAt: run.CompletedAt.Value,
|
||||
ExecutionDuration: executionDuration,
|
||||
InitiatedBy: run.CreatedBy,
|
||||
InputDigest: inputDigest,
|
||||
OutputDigest: outputDigest,
|
||||
ArtifactManifest: artifactManifest,
|
||||
SequenceNumber: sequenceNumber,
|
||||
PreviousEntryHash: previousEntryHash,
|
||||
ContentHash: contentHash,
|
||||
LedgerCreatedAt: ledgerCreatedAt,
|
||||
CorrelationId: run.CorrelationId,
|
||||
Metadata: metadata);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the integrity of this ledger entry.
|
||||
/// </summary>
|
||||
public bool VerifyIntegrity()
|
||||
{
|
||||
var contentToHash = $"{LedgerId}|{TenantId}|{RunId}|{SourceId}|{RunType}|{FinalStatus}|{TotalJobs}|{SucceededJobs}|{FailedJobs}|{RunCreatedAt:O}|{RunStartedAt:O}|{RunCompletedAt:O}|{InputDigest}|{OutputDigest}|{SequenceNumber}|{PreviousEntryHash}|{LedgerCreatedAt:O}";
|
||||
var computed = ComputeSha256(contentToHash);
|
||||
return string.Equals(ContentHash, computed, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the chain link to the previous entry.
|
||||
/// </summary>
|
||||
public bool VerifyChainLink(RunLedgerEntry? previousEntry)
|
||||
{
|
||||
if (previousEntry is null)
|
||||
{
|
||||
return PreviousEntryHash is null || SequenceNumber == 1;
|
||||
}
|
||||
|
||||
return string.Equals(PreviousEntryHash, previousEntry.ContentHash, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static string BuildArtifactManifest(IReadOnlyList<Artifact> artifacts)
|
||||
{
|
||||
var entries = artifacts.Select(a => new
|
||||
{
|
||||
a.ArtifactId,
|
||||
a.ArtifactType,
|
||||
a.Uri,
|
||||
a.Digest,
|
||||
a.MimeType,
|
||||
a.SizeBytes,
|
||||
a.CreatedAt
|
||||
});
|
||||
|
||||
return System.Text.Json.JsonSerializer.Serialize(entries);
|
||||
}
|
||||
|
||||
private static string ComputeOutputDigest(IReadOnlyList<Artifact> artifacts)
|
||||
{
|
||||
if (artifacts.Count == 0)
|
||||
{
|
||||
return ComputeSha256("(no artifacts)");
|
||||
}
|
||||
|
||||
// Sort by artifact ID for deterministic ordering
|
||||
var sortedDigests = artifacts
|
||||
.OrderBy(a => a.ArtifactId)
|
||||
.Select(a => a.Digest)
|
||||
.ToList();
|
||||
|
||||
var combined = string.Join("|", sortedDigests);
|
||||
return ComputeSha256(combined);
|
||||
}
|
||||
|
||||
private static string ComputeSha256(string content)
|
||||
{
|
||||
var bytes = System.Text.Encoding.UTF8.GetBytes(content);
|
||||
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a ledger export operation.
|
||||
/// </summary>
|
||||
public sealed record LedgerExport(
|
||||
/// <summary>Unique export identifier.</summary>
|
||||
Guid ExportId,
|
||||
|
||||
/// <summary>Tenant requesting the export.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Export status.</summary>
|
||||
LedgerExportStatus Status,
|
||||
|
||||
/// <summary>Export format (json, ndjson, csv).</summary>
|
||||
string Format,
|
||||
|
||||
/// <summary>Start of the time range to export.</summary>
|
||||
DateTimeOffset? StartTime,
|
||||
|
||||
/// <summary>End of the time range to export.</summary>
|
||||
DateTimeOffset? EndTime,
|
||||
|
||||
/// <summary>Run types to include (null = all).</summary>
|
||||
string? RunTypeFilter,
|
||||
|
||||
/// <summary>Source ID filter (null = all).</summary>
|
||||
Guid? SourceIdFilter,
|
||||
|
||||
/// <summary>Number of entries exported.</summary>
|
||||
int EntryCount,
|
||||
|
||||
/// <summary>URI where the export is stored.</summary>
|
||||
string? OutputUri,
|
||||
|
||||
/// <summary>SHA-256 digest of the export file.</summary>
|
||||
string? OutputDigest,
|
||||
|
||||
/// <summary>Size of the export in bytes.</summary>
|
||||
long? OutputSizeBytes,
|
||||
|
||||
/// <summary>Actor who requested the export.</summary>
|
||||
string RequestedBy,
|
||||
|
||||
/// <summary>When the export was requested.</summary>
|
||||
DateTimeOffset RequestedAt,
|
||||
|
||||
/// <summary>When the export started processing.</summary>
|
||||
DateTimeOffset? StartedAt,
|
||||
|
||||
/// <summary>When the export completed.</summary>
|
||||
DateTimeOffset? CompletedAt,
|
||||
|
||||
/// <summary>Error message if export failed.</summary>
|
||||
string? ErrorMessage)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new pending export request.
|
||||
/// </summary>
|
||||
public static LedgerExport CreateRequest(
|
||||
string tenantId,
|
||||
string format,
|
||||
string requestedBy,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
string? runTypeFilter = null,
|
||||
Guid? sourceIdFilter = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(format))
|
||||
{
|
||||
throw new ArgumentException("Format is required.", nameof(format));
|
||||
}
|
||||
|
||||
var validFormats = new[] { "json", "ndjson", "csv" };
|
||||
if (!validFormats.Contains(format.ToLowerInvariant()))
|
||||
{
|
||||
throw new ArgumentException($"Invalid format. Must be one of: {string.Join(", ", validFormats)}", nameof(format));
|
||||
}
|
||||
|
||||
return new LedgerExport(
|
||||
ExportId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
Status: LedgerExportStatus.Pending,
|
||||
Format: format.ToLowerInvariant(),
|
||||
StartTime: startTime,
|
||||
EndTime: endTime,
|
||||
RunTypeFilter: runTypeFilter,
|
||||
SourceIdFilter: sourceIdFilter,
|
||||
EntryCount: 0,
|
||||
OutputUri: null,
|
||||
OutputDigest: null,
|
||||
OutputSizeBytes: null,
|
||||
RequestedBy: requestedBy,
|
||||
RequestedAt: DateTimeOffset.UtcNow,
|
||||
StartedAt: null,
|
||||
CompletedAt: null,
|
||||
ErrorMessage: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Marks the export as started.
|
||||
/// </summary>
|
||||
public LedgerExport Start() => this with
|
||||
{
|
||||
Status = LedgerExportStatus.Processing,
|
||||
StartedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Marks the export as completed.
|
||||
/// </summary>
|
||||
public LedgerExport Complete(string outputUri, string outputDigest, long outputSizeBytes, int entryCount) => this with
|
||||
{
|
||||
Status = LedgerExportStatus.Completed,
|
||||
OutputUri = outputUri,
|
||||
OutputDigest = outputDigest,
|
||||
OutputSizeBytes = outputSizeBytes,
|
||||
EntryCount = entryCount,
|
||||
CompletedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Marks the export as failed.
|
||||
/// </summary>
|
||||
public LedgerExport Fail(string errorMessage) => this with
|
||||
{
|
||||
Status = LedgerExportStatus.Failed,
|
||||
ErrorMessage = errorMessage,
|
||||
CompletedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Status of a ledger export operation.
|
||||
/// </summary>
|
||||
public enum LedgerExportStatus
|
||||
{
|
||||
Pending = 0,
|
||||
Processing = 1,
|
||||
Completed = 2,
|
||||
Failed = 3,
|
||||
Canceled = 4
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a scheduled job trigger (cron-based or interval-based).
|
||||
/// Schedules automatically create jobs at specified times.
|
||||
/// </summary>
|
||||
public sealed record Schedule(
|
||||
/// <summary>Unique schedule identifier.</summary>
|
||||
Guid ScheduleId,
|
||||
|
||||
/// <summary>Tenant owning this schedule.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Optional project scope within tenant.</summary>
|
||||
string? ProjectId,
|
||||
|
||||
/// <summary>Source that will be used for jobs.</summary>
|
||||
Guid SourceId,
|
||||
|
||||
/// <summary>Human-readable schedule name.</summary>
|
||||
string Name,
|
||||
|
||||
/// <summary>Job type to create.</summary>
|
||||
string JobType,
|
||||
|
||||
/// <summary>Cron expression (6-field with seconds, UTC).</summary>
|
||||
string CronExpression,
|
||||
|
||||
/// <summary>Timezone for cron evaluation (IANA, e.g., "UTC", "America/New_York").</summary>
|
||||
string Timezone,
|
||||
|
||||
/// <summary>Whether the schedule is enabled.</summary>
|
||||
bool Enabled,
|
||||
|
||||
/// <summary>Job payload template JSON.</summary>
|
||||
string PayloadTemplate,
|
||||
|
||||
/// <summary>Job priority for scheduled jobs.</summary>
|
||||
int Priority,
|
||||
|
||||
/// <summary>Maximum retry attempts for scheduled jobs.</summary>
|
||||
int MaxAttempts,
|
||||
|
||||
/// <summary>Last time a job was triggered from this schedule.</summary>
|
||||
DateTimeOffset? LastTriggeredAt,
|
||||
|
||||
/// <summary>Next scheduled trigger time.</summary>
|
||||
DateTimeOffset? NextTriggerAt,
|
||||
|
||||
/// <summary>When the schedule was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the schedule was last updated.</summary>
|
||||
DateTimeOffset UpdatedAt,
|
||||
|
||||
/// <summary>Actor who created the schedule.</summary>
|
||||
string CreatedBy,
|
||||
|
||||
/// <summary>Actor who last modified the schedule.</summary>
|
||||
string UpdatedBy);
|
||||
@@ -0,0 +1,423 @@
|
||||
using System.Text.Json;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Signed manifest providing provenance chain from ledger entries to artifacts.
|
||||
/// Enables verification of artifact authenticity and integrity.
|
||||
/// </summary>
|
||||
public sealed record SignedManifest(
|
||||
/// <summary>Unique manifest identifier.</summary>
|
||||
Guid ManifestId,
|
||||
|
||||
/// <summary>Manifest schema version.</summary>
|
||||
string SchemaVersion,
|
||||
|
||||
/// <summary>Tenant owning this manifest.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Type of provenance (run, export, attestation).</summary>
|
||||
ProvenanceType ProvenanceType,
|
||||
|
||||
/// <summary>Subject of the provenance (run ID, export ID, etc.).</summary>
|
||||
Guid SubjectId,
|
||||
|
||||
/// <summary>Provenance statements (JSON array).</summary>
|
||||
string Statements,
|
||||
|
||||
/// <summary>Artifact references with digests (JSON array).</summary>
|
||||
string Artifacts,
|
||||
|
||||
/// <summary>Materials (inputs) used to produce the artifacts (JSON array).</summary>
|
||||
string Materials,
|
||||
|
||||
/// <summary>Build environment information (JSON object).</summary>
|
||||
string? BuildInfo,
|
||||
|
||||
/// <summary>SHA-256 digest of the manifest payload (excluding signature).</summary>
|
||||
string PayloadDigest,
|
||||
|
||||
/// <summary>Signature algorithm used.</summary>
|
||||
string SignatureAlgorithm,
|
||||
|
||||
/// <summary>Base64-encoded signature.</summary>
|
||||
string Signature,
|
||||
|
||||
/// <summary>Key ID used for signing.</summary>
|
||||
string KeyId,
|
||||
|
||||
/// <summary>When the manifest was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>Expiration time of the manifest (if any).</summary>
|
||||
DateTimeOffset? ExpiresAt,
|
||||
|
||||
/// <summary>Additional metadata (JSON object).</summary>
|
||||
string? Metadata)
|
||||
{
|
||||
/// <summary>
|
||||
/// Current schema version for manifests.
|
||||
/// </summary>
|
||||
public const string CurrentSchemaVersion = "1.0.0";
|
||||
|
||||
/// <summary>
|
||||
/// Creates an unsigned manifest from a ledger entry.
|
||||
/// The manifest must be signed separately using SigningService.
|
||||
/// </summary>
|
||||
public static SignedManifest CreateFromLedgerEntry(
|
||||
RunLedgerEntry ledger,
|
||||
string? buildInfo = null,
|
||||
string? metadata = null)
|
||||
{
|
||||
var statements = CreateStatementsFromLedger(ledger);
|
||||
var artifacts = ledger.ArtifactManifest;
|
||||
var materials = CreateMaterialsFromLedger(ledger);
|
||||
|
||||
var payloadDigest = ComputePayloadDigest(
|
||||
ledger.TenantId,
|
||||
ProvenanceType.Run,
|
||||
ledger.RunId,
|
||||
statements,
|
||||
artifacts,
|
||||
materials);
|
||||
|
||||
return new SignedManifest(
|
||||
ManifestId: Guid.NewGuid(),
|
||||
SchemaVersion: CurrentSchemaVersion,
|
||||
TenantId: ledger.TenantId,
|
||||
ProvenanceType: ProvenanceType.Run,
|
||||
SubjectId: ledger.RunId,
|
||||
Statements: statements,
|
||||
Artifacts: artifacts,
|
||||
Materials: materials,
|
||||
BuildInfo: buildInfo,
|
||||
PayloadDigest: payloadDigest,
|
||||
SignatureAlgorithm: "none",
|
||||
Signature: string.Empty,
|
||||
KeyId: string.Empty,
|
||||
CreatedAt: DateTimeOffset.UtcNow,
|
||||
ExpiresAt: null,
|
||||
Metadata: metadata);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates an unsigned manifest from a ledger export.
|
||||
/// </summary>
|
||||
public static SignedManifest CreateFromExport(
|
||||
LedgerExport export,
|
||||
IReadOnlyList<RunLedgerEntry> entries,
|
||||
string? buildInfo = null,
|
||||
string? metadata = null)
|
||||
{
|
||||
if (export.Status != LedgerExportStatus.Completed)
|
||||
{
|
||||
throw new InvalidOperationException("Cannot create manifest from incomplete export.");
|
||||
}
|
||||
|
||||
var statements = CreateStatementsFromExport(export, entries);
|
||||
var artifacts = CreateExportArtifacts(export);
|
||||
var materials = CreateExportMaterials(entries);
|
||||
|
||||
var payloadDigest = ComputePayloadDigest(
|
||||
export.TenantId,
|
||||
ProvenanceType.Export,
|
||||
export.ExportId,
|
||||
statements,
|
||||
artifacts,
|
||||
materials);
|
||||
|
||||
return new SignedManifest(
|
||||
ManifestId: Guid.NewGuid(),
|
||||
SchemaVersion: CurrentSchemaVersion,
|
||||
TenantId: export.TenantId,
|
||||
ProvenanceType: ProvenanceType.Export,
|
||||
SubjectId: export.ExportId,
|
||||
Statements: statements,
|
||||
Artifacts: artifacts,
|
||||
Materials: materials,
|
||||
BuildInfo: buildInfo,
|
||||
PayloadDigest: payloadDigest,
|
||||
SignatureAlgorithm: "none",
|
||||
Signature: string.Empty,
|
||||
KeyId: string.Empty,
|
||||
CreatedAt: DateTimeOffset.UtcNow,
|
||||
ExpiresAt: null,
|
||||
Metadata: metadata);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Signs the manifest with the provided signature.
|
||||
/// </summary>
|
||||
public SignedManifest Sign(string signatureAlgorithm, string signature, string keyId, DateTimeOffset? expiresAt = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(signatureAlgorithm))
|
||||
{
|
||||
throw new ArgumentException("Signature algorithm is required.", nameof(signatureAlgorithm));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(signature))
|
||||
{
|
||||
throw new ArgumentException("Signature is required.", nameof(signature));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(keyId))
|
||||
{
|
||||
throw new ArgumentException("Key ID is required.", nameof(keyId));
|
||||
}
|
||||
|
||||
return this with
|
||||
{
|
||||
SignatureAlgorithm = signatureAlgorithm,
|
||||
Signature = signature,
|
||||
KeyId = keyId,
|
||||
ExpiresAt = expiresAt
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the manifest is signed.
|
||||
/// </summary>
|
||||
public bool IsSigned => !string.IsNullOrEmpty(Signature) && SignatureAlgorithm != "none";
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the manifest has expired.
|
||||
/// </summary>
|
||||
public bool IsExpired => ExpiresAt.HasValue && ExpiresAt.Value < DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the payload digest integrity.
|
||||
/// </summary>
|
||||
public bool VerifyPayloadIntegrity()
|
||||
{
|
||||
var computed = ComputePayloadDigest(TenantId, ProvenanceType, SubjectId, Statements, Artifacts, Materials);
|
||||
return string.Equals(PayloadDigest, computed, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses the artifact manifest into typed objects.
|
||||
/// </summary>
|
||||
public IReadOnlyList<ArtifactReference> GetArtifactReferences()
|
||||
{
|
||||
if (string.IsNullOrEmpty(Artifacts) || Artifacts == "[]")
|
||||
{
|
||||
return Array.Empty<ArtifactReference>();
|
||||
}
|
||||
|
||||
return JsonSerializer.Deserialize<List<ArtifactReference>>(Artifacts) ?? [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses the material manifest into typed objects.
|
||||
/// </summary>
|
||||
public IReadOnlyList<MaterialReference> GetMaterialReferences()
|
||||
{
|
||||
if (string.IsNullOrEmpty(Materials) || Materials == "[]")
|
||||
{
|
||||
return Array.Empty<MaterialReference>();
|
||||
}
|
||||
|
||||
return JsonSerializer.Deserialize<List<MaterialReference>>(Materials) ?? [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses the statements into typed objects.
|
||||
/// </summary>
|
||||
public IReadOnlyList<ProvenanceStatement> GetStatements()
|
||||
{
|
||||
if (string.IsNullOrEmpty(Statements) || Statements == "[]")
|
||||
{
|
||||
return Array.Empty<ProvenanceStatement>();
|
||||
}
|
||||
|
||||
return JsonSerializer.Deserialize<List<ProvenanceStatement>>(Statements) ?? [];
|
||||
}
|
||||
|
||||
private static string CreateStatementsFromLedger(RunLedgerEntry ledger)
|
||||
{
|
||||
var statements = new List<ProvenanceStatement>
|
||||
{
|
||||
new(
|
||||
StatementType: "run_completed",
|
||||
Subject: $"run:{ledger.RunId}",
|
||||
Predicate: "produced",
|
||||
Object: $"outputs:{ledger.OutputDigest}",
|
||||
Timestamp: ledger.RunCompletedAt,
|
||||
Metadata: JsonSerializer.Serialize(new
|
||||
{
|
||||
ledger.RunType,
|
||||
ledger.FinalStatus,
|
||||
ledger.TotalJobs,
|
||||
ledger.SucceededJobs,
|
||||
ledger.FailedJobs,
|
||||
ledger.ExecutionDuration
|
||||
})),
|
||||
new(
|
||||
StatementType: "chain_link",
|
||||
Subject: $"ledger:{ledger.LedgerId}",
|
||||
Predicate: "follows",
|
||||
Object: ledger.PreviousEntryHash ?? "(genesis)",
|
||||
Timestamp: ledger.LedgerCreatedAt,
|
||||
Metadata: JsonSerializer.Serialize(new
|
||||
{
|
||||
ledger.SequenceNumber,
|
||||
ledger.ContentHash
|
||||
}))
|
||||
};
|
||||
|
||||
return JsonSerializer.Serialize(statements);
|
||||
}
|
||||
|
||||
private static string CreateMaterialsFromLedger(RunLedgerEntry ledger)
|
||||
{
|
||||
var materials = new List<MaterialReference>
|
||||
{
|
||||
new(
|
||||
Uri: $"input:{ledger.RunId}",
|
||||
Digest: ledger.InputDigest,
|
||||
MediaType: "application/json",
|
||||
Name: "run_input")
|
||||
};
|
||||
|
||||
return JsonSerializer.Serialize(materials);
|
||||
}
|
||||
|
||||
private static string CreateStatementsFromExport(LedgerExport export, IReadOnlyList<RunLedgerEntry> entries)
|
||||
{
|
||||
var statements = new List<ProvenanceStatement>
|
||||
{
|
||||
new(
|
||||
StatementType: "export_completed",
|
||||
Subject: $"export:{export.ExportId}",
|
||||
Predicate: "contains",
|
||||
Object: $"entries:{entries.Count}",
|
||||
Timestamp: export.CompletedAt ?? DateTimeOffset.UtcNow,
|
||||
Metadata: JsonSerializer.Serialize(new
|
||||
{
|
||||
export.Format,
|
||||
export.EntryCount,
|
||||
export.StartTime,
|
||||
export.EndTime,
|
||||
export.RunTypeFilter,
|
||||
export.SourceIdFilter
|
||||
}))
|
||||
};
|
||||
|
||||
// Add chain integrity statement
|
||||
if (entries.Count > 0)
|
||||
{
|
||||
var first = entries.MinBy(e => e.SequenceNumber);
|
||||
var last = entries.MaxBy(e => e.SequenceNumber);
|
||||
if (first is not null && last is not null)
|
||||
{
|
||||
statements.Add(new ProvenanceStatement(
|
||||
StatementType: "chain_range",
|
||||
Subject: $"export:{export.ExportId}",
|
||||
Predicate: "covers",
|
||||
Object: $"sequence:{first.SequenceNumber}-{last.SequenceNumber}",
|
||||
Timestamp: export.CompletedAt ?? DateTimeOffset.UtcNow,
|
||||
Metadata: JsonSerializer.Serialize(new
|
||||
{
|
||||
FirstEntryHash = first.ContentHash,
|
||||
LastEntryHash = last.ContentHash
|
||||
})));
|
||||
}
|
||||
}
|
||||
|
||||
return JsonSerializer.Serialize(statements);
|
||||
}
|
||||
|
||||
private static string CreateExportArtifacts(LedgerExport export)
|
||||
{
|
||||
var artifacts = new List<ArtifactReference>
|
||||
{
|
||||
new(
|
||||
ArtifactId: export.ExportId,
|
||||
ArtifactType: "ledger_export",
|
||||
Uri: export.OutputUri ?? string.Empty,
|
||||
Digest: export.OutputDigest ?? string.Empty,
|
||||
MediaType: GetMediaType(export.Format),
|
||||
SizeBytes: export.OutputSizeBytes ?? 0)
|
||||
};
|
||||
|
||||
return JsonSerializer.Serialize(artifacts);
|
||||
}
|
||||
|
||||
private static string CreateExportMaterials(IReadOnlyList<RunLedgerEntry> entries)
|
||||
{
|
||||
var materials = entries.Select(e => new MaterialReference(
|
||||
Uri: $"ledger:{e.LedgerId}",
|
||||
Digest: e.ContentHash,
|
||||
MediaType: "application/json",
|
||||
Name: $"run_{e.RunId}")).ToList();
|
||||
|
||||
return JsonSerializer.Serialize(materials);
|
||||
}
|
||||
|
||||
private static string GetMediaType(string format) => format.ToLowerInvariant() switch
|
||||
{
|
||||
"json" => "application/json",
|
||||
"ndjson" => "application/x-ndjson",
|
||||
"csv" => "text/csv",
|
||||
_ => "application/octet-stream"
|
||||
};
|
||||
|
||||
private static string ComputePayloadDigest(
|
||||
string tenantId,
|
||||
ProvenanceType provenanceType,
|
||||
Guid subjectId,
|
||||
string statements,
|
||||
string artifacts,
|
||||
string materials)
|
||||
{
|
||||
var payload = $"{tenantId}|{provenanceType}|{subjectId}|{statements}|{artifacts}|{materials}";
|
||||
var bytes = System.Text.Encoding.UTF8.GetBytes(payload);
|
||||
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Types of provenance tracked by manifests.
|
||||
/// </summary>
|
||||
public enum ProvenanceType
|
||||
{
|
||||
/// <summary>Provenance for a completed run.</summary>
|
||||
Run = 0,
|
||||
|
||||
/// <summary>Provenance for a ledger export.</summary>
|
||||
Export = 1,
|
||||
|
||||
/// <summary>Provenance for an attestation.</summary>
|
||||
Attestation = 2
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reference to an artifact in a manifest.
|
||||
/// </summary>
|
||||
public sealed record ArtifactReference(
|
||||
Guid ArtifactId,
|
||||
string ArtifactType,
|
||||
string Uri,
|
||||
string Digest,
|
||||
string MediaType,
|
||||
long SizeBytes);
|
||||
|
||||
/// <summary>
|
||||
/// Reference to a material (input) in a manifest.
|
||||
/// </summary>
|
||||
public sealed record MaterialReference(
|
||||
string Uri,
|
||||
string Digest,
|
||||
string MediaType,
|
||||
string Name);
|
||||
|
||||
/// <summary>
|
||||
/// A provenance statement in a manifest.
|
||||
/// </summary>
|
||||
public sealed record ProvenanceStatement(
|
||||
string StatementType,
|
||||
string Subject,
|
||||
string Predicate,
|
||||
string Object,
|
||||
DateTimeOffset Timestamp,
|
||||
string? Metadata);
|
||||
@@ -0,0 +1,567 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Service Level Objective type.
|
||||
/// </summary>
|
||||
public enum SloType
|
||||
{
|
||||
/// <summary>Availability SLO (percentage of successful requests).</summary>
|
||||
Availability,
|
||||
|
||||
/// <summary>Latency SLO (percentile-based response time).</summary>
|
||||
Latency,
|
||||
|
||||
/// <summary>Throughput SLO (minimum jobs processed per period).</summary>
|
||||
Throughput
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Time window for SLO computation.
|
||||
/// </summary>
|
||||
public enum SloWindow
|
||||
{
|
||||
/// <summary>Rolling 1 hour window.</summary>
|
||||
OneHour,
|
||||
|
||||
/// <summary>Rolling 1 day window.</summary>
|
||||
OneDay,
|
||||
|
||||
/// <summary>Rolling 7 day window.</summary>
|
||||
SevenDays,
|
||||
|
||||
/// <summary>Rolling 30 day window.</summary>
|
||||
ThirtyDays
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Alert severity for SLO violations.
|
||||
/// </summary>
|
||||
public enum AlertSeverity
|
||||
{
|
||||
/// <summary>Informational - SLO approaching threshold.</summary>
|
||||
Info,
|
||||
|
||||
/// <summary>Warning - SLO at risk.</summary>
|
||||
Warning,
|
||||
|
||||
/// <summary>Critical - SLO likely to be breached.</summary>
|
||||
Critical,
|
||||
|
||||
/// <summary>Emergency - SLO breached.</summary>
|
||||
Emergency
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Service Level Objective definition.
|
||||
/// </summary>
|
||||
public sealed record Slo(
|
||||
/// <summary>Unique SLO identifier.</summary>
|
||||
Guid SloId,
|
||||
|
||||
/// <summary>Tenant this SLO belongs to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Human-readable name.</summary>
|
||||
string Name,
|
||||
|
||||
/// <summary>Optional description.</summary>
|
||||
string? Description,
|
||||
|
||||
/// <summary>Type of SLO.</summary>
|
||||
SloType Type,
|
||||
|
||||
/// <summary>Job type this SLO applies to (null = all job types).</summary>
|
||||
string? JobType,
|
||||
|
||||
/// <summary>Source ID this SLO applies to (null = all sources).</summary>
|
||||
Guid? SourceId,
|
||||
|
||||
/// <summary>Target objective (e.g., 0.999 for 99.9% availability).</summary>
|
||||
double Target,
|
||||
|
||||
/// <summary>Time window for SLO evaluation.</summary>
|
||||
SloWindow Window,
|
||||
|
||||
/// <summary>For latency SLOs: the percentile (e.g., 0.95 for P95).</summary>
|
||||
double? LatencyPercentile,
|
||||
|
||||
/// <summary>For latency SLOs: the target latency in seconds.</summary>
|
||||
double? LatencyTargetSeconds,
|
||||
|
||||
/// <summary>For throughput SLOs: minimum jobs per period.</summary>
|
||||
int? ThroughputMinimum,
|
||||
|
||||
/// <summary>Whether this SLO is actively monitored.</summary>
|
||||
bool Enabled,
|
||||
|
||||
/// <summary>When the SLO was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the SLO was last updated.</summary>
|
||||
DateTimeOffset UpdatedAt,
|
||||
|
||||
/// <summary>Actor who created the SLO.</summary>
|
||||
string CreatedBy,
|
||||
|
||||
/// <summary>Actor who last modified the SLO.</summary>
|
||||
string UpdatedBy)
|
||||
{
|
||||
/// <summary>Calculates the error budget as a decimal (1 - target).</summary>
|
||||
public double ErrorBudget => 1.0 - Target;
|
||||
|
||||
/// <summary>Creates a new availability SLO.</summary>
|
||||
public static Slo CreateAvailability(
|
||||
string tenantId,
|
||||
string name,
|
||||
double target,
|
||||
SloWindow window,
|
||||
string createdBy,
|
||||
string? description = null,
|
||||
string? jobType = null,
|
||||
Guid? sourceId = null)
|
||||
{
|
||||
ValidateTarget(target);
|
||||
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
return new Slo(
|
||||
SloId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
Name: name,
|
||||
Description: description,
|
||||
Type: SloType.Availability,
|
||||
JobType: jobType,
|
||||
SourceId: sourceId,
|
||||
Target: target,
|
||||
Window: window,
|
||||
LatencyPercentile: null,
|
||||
LatencyTargetSeconds: null,
|
||||
ThroughputMinimum: null,
|
||||
Enabled: true,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
CreatedBy: createdBy,
|
||||
UpdatedBy: createdBy);
|
||||
}
|
||||
|
||||
/// <summary>Creates a new latency SLO.</summary>
|
||||
public static Slo CreateLatency(
|
||||
string tenantId,
|
||||
string name,
|
||||
double percentile,
|
||||
double targetSeconds,
|
||||
double target,
|
||||
SloWindow window,
|
||||
string createdBy,
|
||||
string? description = null,
|
||||
string? jobType = null,
|
||||
Guid? sourceId = null)
|
||||
{
|
||||
ValidateTarget(target);
|
||||
if (percentile < 0 || percentile > 1)
|
||||
throw new ArgumentOutOfRangeException(nameof(percentile), "Percentile must be between 0 and 1");
|
||||
if (targetSeconds <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(targetSeconds), "Target latency must be positive");
|
||||
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
return new Slo(
|
||||
SloId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
Name: name,
|
||||
Description: description,
|
||||
Type: SloType.Latency,
|
||||
JobType: jobType,
|
||||
SourceId: sourceId,
|
||||
Target: target,
|
||||
Window: window,
|
||||
LatencyPercentile: percentile,
|
||||
LatencyTargetSeconds: targetSeconds,
|
||||
ThroughputMinimum: null,
|
||||
Enabled: true,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
CreatedBy: createdBy,
|
||||
UpdatedBy: createdBy);
|
||||
}
|
||||
|
||||
/// <summary>Creates a new throughput SLO.</summary>
|
||||
public static Slo CreateThroughput(
|
||||
string tenantId,
|
||||
string name,
|
||||
int minimum,
|
||||
double target,
|
||||
SloWindow window,
|
||||
string createdBy,
|
||||
string? description = null,
|
||||
string? jobType = null,
|
||||
Guid? sourceId = null)
|
||||
{
|
||||
ValidateTarget(target);
|
||||
if (minimum <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(minimum), "Throughput minimum must be positive");
|
||||
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
return new Slo(
|
||||
SloId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
Name: name,
|
||||
Description: description,
|
||||
Type: SloType.Throughput,
|
||||
JobType: jobType,
|
||||
SourceId: sourceId,
|
||||
Target: target,
|
||||
Window: window,
|
||||
LatencyPercentile: null,
|
||||
LatencyTargetSeconds: null,
|
||||
ThroughputMinimum: minimum,
|
||||
Enabled: true,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
CreatedBy: createdBy,
|
||||
UpdatedBy: createdBy);
|
||||
}
|
||||
|
||||
/// <summary>Updates the SLO with new values.</summary>
|
||||
public Slo Update(
|
||||
string? name = null,
|
||||
string? description = null,
|
||||
double? target = null,
|
||||
bool? enabled = null,
|
||||
string? updatedBy = null)
|
||||
{
|
||||
if (target.HasValue)
|
||||
ValidateTarget(target.Value);
|
||||
|
||||
return this with
|
||||
{
|
||||
Name = name ?? Name,
|
||||
Description = description ?? Description,
|
||||
Target = target ?? Target,
|
||||
Enabled = enabled ?? Enabled,
|
||||
UpdatedAt = DateTimeOffset.UtcNow,
|
||||
UpdatedBy = updatedBy ?? UpdatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Disables the SLO.</summary>
|
||||
public Slo Disable(string updatedBy) =>
|
||||
this with
|
||||
{
|
||||
Enabled = false,
|
||||
UpdatedAt = DateTimeOffset.UtcNow,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
|
||||
/// <summary>Enables the SLO.</summary>
|
||||
public Slo Enable(string updatedBy) =>
|
||||
this with
|
||||
{
|
||||
Enabled = true,
|
||||
UpdatedAt = DateTimeOffset.UtcNow,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
|
||||
/// <summary>Gets the window duration as a TimeSpan.</summary>
|
||||
public TimeSpan GetWindowDuration() => Window switch
|
||||
{
|
||||
SloWindow.OneHour => TimeSpan.FromHours(1),
|
||||
SloWindow.OneDay => TimeSpan.FromDays(1),
|
||||
SloWindow.SevenDays => TimeSpan.FromDays(7),
|
||||
SloWindow.ThirtyDays => TimeSpan.FromDays(30),
|
||||
_ => throw new InvalidOperationException($"Unknown window: {Window}")
|
||||
};
|
||||
|
||||
private static void ValidateTarget(double target)
|
||||
{
|
||||
if (target <= 0 || target > 1)
|
||||
throw new ArgumentOutOfRangeException(nameof(target), "Target must be between 0 (exclusive) and 1 (inclusive)");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Current state of an SLO including burn rate and budget consumption.
|
||||
/// </summary>
|
||||
public sealed record SloState(
|
||||
/// <summary>The SLO this state belongs to.</summary>
|
||||
Guid SloId,
|
||||
|
||||
/// <summary>Tenant this state belongs to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Current SLI value (actual performance).</summary>
|
||||
double CurrentSli,
|
||||
|
||||
/// <summary>Total events/requests in the window.</summary>
|
||||
long TotalEvents,
|
||||
|
||||
/// <summary>Good events (successful) in the window.</summary>
|
||||
long GoodEvents,
|
||||
|
||||
/// <summary>Bad events (failed) in the window.</summary>
|
||||
long BadEvents,
|
||||
|
||||
/// <summary>Error budget consumed (0-1 where 1 = fully consumed).</summary>
|
||||
double BudgetConsumed,
|
||||
|
||||
/// <summary>Error budget remaining (0-1 where 1 = fully available).</summary>
|
||||
double BudgetRemaining,
|
||||
|
||||
/// <summary>Current burn rate (1.0 = consuming budget at sustainable rate).</summary>
|
||||
double BurnRate,
|
||||
|
||||
/// <summary>Projected time until budget exhaustion (null if not burning).</summary>
|
||||
TimeSpan? TimeToExhaustion,
|
||||
|
||||
/// <summary>Whether the SLO is currently met.</summary>
|
||||
bool IsMet,
|
||||
|
||||
/// <summary>Current alert severity based on budget consumption.</summary>
|
||||
AlertSeverity AlertSeverity,
|
||||
|
||||
/// <summary>When this state was computed.</summary>
|
||||
DateTimeOffset ComputedAt,
|
||||
|
||||
/// <summary>Start of the evaluation window.</summary>
|
||||
DateTimeOffset WindowStart,
|
||||
|
||||
/// <summary>End of the evaluation window.</summary>
|
||||
DateTimeOffset WindowEnd)
|
||||
{
|
||||
/// <summary>Creates a state indicating no data is available.</summary>
|
||||
public static SloState NoData(Guid sloId, string tenantId, DateTimeOffset now, SloWindow window)
|
||||
{
|
||||
var windowDuration = GetWindowDuration(window);
|
||||
return new SloState(
|
||||
SloId: sloId,
|
||||
TenantId: tenantId,
|
||||
CurrentSli: 1.0, // Assume good when no data
|
||||
TotalEvents: 0,
|
||||
GoodEvents: 0,
|
||||
BadEvents: 0,
|
||||
BudgetConsumed: 0,
|
||||
BudgetRemaining: 1.0,
|
||||
BurnRate: 0,
|
||||
TimeToExhaustion: null,
|
||||
IsMet: true,
|
||||
AlertSeverity: AlertSeverity.Info,
|
||||
ComputedAt: now,
|
||||
WindowStart: now - windowDuration,
|
||||
WindowEnd: now);
|
||||
}
|
||||
|
||||
private static TimeSpan GetWindowDuration(SloWindow window) => window switch
|
||||
{
|
||||
SloWindow.OneHour => TimeSpan.FromHours(1),
|
||||
SloWindow.OneDay => TimeSpan.FromDays(1),
|
||||
SloWindow.SevenDays => TimeSpan.FromDays(7),
|
||||
SloWindow.ThirtyDays => TimeSpan.FromDays(30),
|
||||
_ => TimeSpan.FromDays(1)
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Alert budget threshold configuration.
|
||||
/// </summary>
|
||||
public sealed record AlertBudgetThreshold(
|
||||
/// <summary>Unique threshold identifier.</summary>
|
||||
Guid ThresholdId,
|
||||
|
||||
/// <summary>SLO this threshold applies to.</summary>
|
||||
Guid SloId,
|
||||
|
||||
/// <summary>Tenant this threshold belongs to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Budget consumed percentage that triggers this alert (0-1).</summary>
|
||||
double BudgetConsumedThreshold,
|
||||
|
||||
/// <summary>Burn rate threshold that triggers this alert.</summary>
|
||||
double? BurnRateThreshold,
|
||||
|
||||
/// <summary>Severity of the alert.</summary>
|
||||
AlertSeverity Severity,
|
||||
|
||||
/// <summary>Whether this threshold is enabled.</summary>
|
||||
bool Enabled,
|
||||
|
||||
/// <summary>Notification channel for this alert.</summary>
|
||||
string? NotificationChannel,
|
||||
|
||||
/// <summary>Notification endpoint for this alert.</summary>
|
||||
string? NotificationEndpoint,
|
||||
|
||||
/// <summary>Cooldown period between alerts.</summary>
|
||||
TimeSpan Cooldown,
|
||||
|
||||
/// <summary>When an alert was last triggered.</summary>
|
||||
DateTimeOffset? LastTriggeredAt,
|
||||
|
||||
/// <summary>When the threshold was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the threshold was last updated.</summary>
|
||||
DateTimeOffset UpdatedAt,
|
||||
|
||||
/// <summary>Actor who created the threshold.</summary>
|
||||
string CreatedBy,
|
||||
|
||||
/// <summary>Actor who last modified the threshold.</summary>
|
||||
string UpdatedBy)
|
||||
{
|
||||
/// <summary>Creates a new alert threshold.</summary>
|
||||
public static AlertBudgetThreshold Create(
|
||||
Guid sloId,
|
||||
string tenantId,
|
||||
double budgetConsumedThreshold,
|
||||
AlertSeverity severity,
|
||||
string createdBy,
|
||||
double? burnRateThreshold = null,
|
||||
string? notificationChannel = null,
|
||||
string? notificationEndpoint = null,
|
||||
TimeSpan? cooldown = null)
|
||||
{
|
||||
if (budgetConsumedThreshold < 0 || budgetConsumedThreshold > 1)
|
||||
throw new ArgumentOutOfRangeException(nameof(budgetConsumedThreshold), "Threshold must be between 0 and 1");
|
||||
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
return new AlertBudgetThreshold(
|
||||
ThresholdId: Guid.NewGuid(),
|
||||
SloId: sloId,
|
||||
TenantId: tenantId,
|
||||
BudgetConsumedThreshold: budgetConsumedThreshold,
|
||||
BurnRateThreshold: burnRateThreshold,
|
||||
Severity: severity,
|
||||
Enabled: true,
|
||||
NotificationChannel: notificationChannel,
|
||||
NotificationEndpoint: notificationEndpoint,
|
||||
Cooldown: cooldown ?? TimeSpan.FromHours(1),
|
||||
LastTriggeredAt: null,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
CreatedBy: createdBy,
|
||||
UpdatedBy: createdBy);
|
||||
}
|
||||
|
||||
/// <summary>Checks if this threshold should trigger based on current state.</summary>
|
||||
public bool ShouldTrigger(SloState state, DateTimeOffset now)
|
||||
{
|
||||
if (!Enabled) return false;
|
||||
|
||||
// Check cooldown
|
||||
if (LastTriggeredAt.HasValue && (now - LastTriggeredAt.Value) < Cooldown)
|
||||
return false;
|
||||
|
||||
// Check budget consumed threshold
|
||||
if (state.BudgetConsumed >= BudgetConsumedThreshold)
|
||||
return true;
|
||||
|
||||
// Check burn rate threshold if set
|
||||
if (BurnRateThreshold.HasValue && state.BurnRate >= BurnRateThreshold.Value)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>Records that this threshold was triggered.</summary>
|
||||
public AlertBudgetThreshold RecordTrigger(DateTimeOffset now) =>
|
||||
this with
|
||||
{
|
||||
LastTriggeredAt = now,
|
||||
UpdatedAt = now
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// SLO alert event.
|
||||
/// </summary>
|
||||
public sealed record SloAlert(
|
||||
/// <summary>Unique alert identifier.</summary>
|
||||
Guid AlertId,
|
||||
|
||||
/// <summary>SLO this alert relates to.</summary>
|
||||
Guid SloId,
|
||||
|
||||
/// <summary>Threshold that triggered this alert.</summary>
|
||||
Guid ThresholdId,
|
||||
|
||||
/// <summary>Tenant this alert belongs to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Severity of the alert.</summary>
|
||||
AlertSeverity Severity,
|
||||
|
||||
/// <summary>Alert message.</summary>
|
||||
string Message,
|
||||
|
||||
/// <summary>Budget consumed at time of alert.</summary>
|
||||
double BudgetConsumed,
|
||||
|
||||
/// <summary>Burn rate at time of alert.</summary>
|
||||
double BurnRate,
|
||||
|
||||
/// <summary>Current SLI value at time of alert.</summary>
|
||||
double CurrentSli,
|
||||
|
||||
/// <summary>When the alert was triggered.</summary>
|
||||
DateTimeOffset TriggeredAt,
|
||||
|
||||
/// <summary>When the alert was acknowledged (null if not acknowledged).</summary>
|
||||
DateTimeOffset? AcknowledgedAt,
|
||||
|
||||
/// <summary>Who acknowledged the alert.</summary>
|
||||
string? AcknowledgedBy,
|
||||
|
||||
/// <summary>When the alert was resolved (null if not resolved).</summary>
|
||||
DateTimeOffset? ResolvedAt,
|
||||
|
||||
/// <summary>How the alert was resolved.</summary>
|
||||
string? ResolutionNotes)
|
||||
{
|
||||
/// <summary>Creates a new alert from an SLO state and threshold.</summary>
|
||||
public static SloAlert Create(
|
||||
Slo slo,
|
||||
SloState state,
|
||||
AlertBudgetThreshold threshold)
|
||||
{
|
||||
var message = threshold.BurnRateThreshold.HasValue && state.BurnRate >= threshold.BurnRateThreshold.Value
|
||||
? $"SLO '{slo.Name}' burn rate {state.BurnRate:F2}x exceeds threshold {threshold.BurnRateThreshold.Value:F2}x"
|
||||
: $"SLO '{slo.Name}' error budget {state.BudgetConsumed:P1} consumed exceeds threshold {threshold.BudgetConsumedThreshold:P1}";
|
||||
|
||||
return new SloAlert(
|
||||
AlertId: Guid.NewGuid(),
|
||||
SloId: slo.SloId,
|
||||
ThresholdId: threshold.ThresholdId,
|
||||
TenantId: slo.TenantId,
|
||||
Severity: threshold.Severity,
|
||||
Message: message,
|
||||
BudgetConsumed: state.BudgetConsumed,
|
||||
BurnRate: state.BurnRate,
|
||||
CurrentSli: state.CurrentSli,
|
||||
TriggeredAt: state.ComputedAt,
|
||||
AcknowledgedAt: null,
|
||||
AcknowledgedBy: null,
|
||||
ResolvedAt: null,
|
||||
ResolutionNotes: null);
|
||||
}
|
||||
|
||||
/// <summary>Acknowledges the alert.</summary>
|
||||
public SloAlert Acknowledge(string acknowledgedBy, DateTimeOffset now) =>
|
||||
this with
|
||||
{
|
||||
AcknowledgedAt = now,
|
||||
AcknowledgedBy = acknowledgedBy
|
||||
};
|
||||
|
||||
/// <summary>Resolves the alert.</summary>
|
||||
public SloAlert Resolve(string notes, DateTimeOffset now) =>
|
||||
this with
|
||||
{
|
||||
ResolvedAt = now,
|
||||
ResolutionNotes = notes
|
||||
};
|
||||
|
||||
/// <summary>Whether this alert has been acknowledged.</summary>
|
||||
public bool IsAcknowledged => AcknowledgedAt.HasValue;
|
||||
|
||||
/// <summary>Whether this alert has been resolved.</summary>
|
||||
public bool IsResolved => ResolvedAt.HasValue;
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a job source (producer) that submits jobs to the orchestrator.
|
||||
/// Examples: Concelier, Excititor, Scheduler, Export Center, Policy Engine.
|
||||
/// </summary>
|
||||
public sealed record Source(
|
||||
/// <summary>Unique source identifier.</summary>
|
||||
Guid SourceId,
|
||||
|
||||
/// <summary>Tenant owning this source.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Human-readable source name (e.g., "concelier-nvd").</summary>
|
||||
string Name,
|
||||
|
||||
/// <summary>Source type/category (e.g., "advisory-ingest", "scanner", "export").</summary>
|
||||
string SourceType,
|
||||
|
||||
/// <summary>Whether the source is currently enabled.</summary>
|
||||
bool Enabled,
|
||||
|
||||
/// <summary>Whether the source is paused (throttled by operator).</summary>
|
||||
bool Paused,
|
||||
|
||||
/// <summary>Operator-provided reason for pause (if paused).</summary>
|
||||
string? PauseReason,
|
||||
|
||||
/// <summary>Ticket reference for pause audit trail.</summary>
|
||||
string? PauseTicket,
|
||||
|
||||
/// <summary>Optional configuration JSON blob.</summary>
|
||||
string? Configuration,
|
||||
|
||||
/// <summary>When the source was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the source was last updated.</summary>
|
||||
DateTimeOffset UpdatedAt,
|
||||
|
||||
/// <summary>Actor who last modified the source.</summary>
|
||||
string UpdatedBy);
|
||||
@@ -0,0 +1,60 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a dynamic rate-limit override (throttle) for a source or job type.
|
||||
/// Throttles are temporary pause/slow-down mechanisms, often in response to upstream pressure.
|
||||
/// </summary>
|
||||
public sealed record Throttle(
|
||||
/// <summary>Unique throttle identifier.</summary>
|
||||
Guid ThrottleId,
|
||||
|
||||
/// <summary>Tenant this throttle applies to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Source to throttle (null if job-type scoped).</summary>
|
||||
Guid? SourceId,
|
||||
|
||||
/// <summary>Job type to throttle (null if source-scoped).</summary>
|
||||
string? JobType,
|
||||
|
||||
/// <summary>Whether this throttle is currently active.</summary>
|
||||
bool Active,
|
||||
|
||||
/// <summary>Reason for the throttle (e.g., "429 from upstream", "Manual pause").</summary>
|
||||
string Reason,
|
||||
|
||||
/// <summary>Optional ticket reference for audit.</summary>
|
||||
string? Ticket,
|
||||
|
||||
/// <summary>When the throttle was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the throttle expires (null = indefinite).</summary>
|
||||
DateTimeOffset? ExpiresAt,
|
||||
|
||||
/// <summary>Actor who created the throttle.</summary>
|
||||
string CreatedBy);
|
||||
|
||||
/// <summary>
|
||||
/// Reason categories for throttle creation.
|
||||
/// </summary>
|
||||
public static class ThrottleReasons
|
||||
{
|
||||
/// <summary>Upstream returned 429 Too Many Requests.</summary>
|
||||
public const string UpstreamRateLimited = "upstream_429";
|
||||
|
||||
/// <summary>Upstream returned 503 Service Unavailable.</summary>
|
||||
public const string UpstreamUnavailable = "upstream_503";
|
||||
|
||||
/// <summary>Upstream returned 5xx error repeatedly.</summary>
|
||||
public const string UpstreamErrors = "upstream_5xx";
|
||||
|
||||
/// <summary>Manual operator intervention.</summary>
|
||||
public const string ManualPause = "manual_pause";
|
||||
|
||||
/// <summary>Circuit breaker triggered.</summary>
|
||||
public const string CircuitBreaker = "circuit_breaker";
|
||||
|
||||
/// <summary>Quota exhausted.</summary>
|
||||
public const string QuotaExhausted = "quota_exhausted";
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
namespace StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an event-time watermark for tracking processing progress.
|
||||
/// Watermarks are scoped by source, job type, or custom key.
|
||||
/// </summary>
|
||||
public sealed record Watermark(
|
||||
/// <summary>Unique watermark identifier.</summary>
|
||||
Guid WatermarkId,
|
||||
|
||||
/// <summary>Tenant this watermark belongs to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Source this watermark tracks (null if job-type scoped).</summary>
|
||||
Guid? SourceId,
|
||||
|
||||
/// <summary>Job type this watermark tracks (null if source-scoped).</summary>
|
||||
string? JobType,
|
||||
|
||||
/// <summary>Normalized scope key for uniqueness.</summary>
|
||||
string ScopeKey,
|
||||
|
||||
/// <summary>Latest processed event time (high watermark).</summary>
|
||||
DateTimeOffset HighWatermark,
|
||||
|
||||
/// <summary>Earliest event time in current window (low watermark for windowing).</summary>
|
||||
DateTimeOffset? LowWatermark,
|
||||
|
||||
/// <summary>Monotonic sequence number for ordering.</summary>
|
||||
long SequenceNumber,
|
||||
|
||||
/// <summary>Total events processed through this watermark.</summary>
|
||||
long ProcessedCount,
|
||||
|
||||
/// <summary>SHA-256 hash of last processed batch for integrity verification.</summary>
|
||||
string? LastBatchHash,
|
||||
|
||||
/// <summary>When the watermark was created.</summary>
|
||||
DateTimeOffset CreatedAt,
|
||||
|
||||
/// <summary>When the watermark was last updated.</summary>
|
||||
DateTimeOffset UpdatedAt,
|
||||
|
||||
/// <summary>Actor who last modified the watermark.</summary>
|
||||
string UpdatedBy)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a scope key for source-scoped watermarks.
|
||||
/// </summary>
|
||||
public static string CreateScopeKey(Guid sourceId) =>
|
||||
$"source:{sourceId:N}";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a scope key for job-type-scoped watermarks.
|
||||
/// </summary>
|
||||
public static string CreateScopeKey(string jobType) =>
|
||||
$"job_type:{jobType.ToLowerInvariant()}";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a scope key for source+job-type scoped watermarks.
|
||||
/// </summary>
|
||||
public static string CreateScopeKey(Guid sourceId, string jobType) =>
|
||||
$"source:{sourceId:N}:job_type:{jobType.ToLowerInvariant()}";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new watermark with initial values.
|
||||
/// </summary>
|
||||
public static Watermark Create(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
DateTimeOffset highWatermark,
|
||||
string createdBy)
|
||||
{
|
||||
var scopeKey = (sourceId, jobType) switch
|
||||
{
|
||||
(Guid s, string j) when !string.IsNullOrEmpty(j) => CreateScopeKey(s, j),
|
||||
(Guid s, _) => CreateScopeKey(s),
|
||||
(_, string j) when !string.IsNullOrEmpty(j) => CreateScopeKey(j),
|
||||
_ => throw new ArgumentException("Either sourceId or jobType must be specified.")
|
||||
};
|
||||
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
return new Watermark(
|
||||
WatermarkId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
SourceId: sourceId,
|
||||
JobType: jobType,
|
||||
ScopeKey: scopeKey,
|
||||
HighWatermark: highWatermark,
|
||||
LowWatermark: null,
|
||||
SequenceNumber: 0,
|
||||
ProcessedCount: 0,
|
||||
LastBatchHash: null,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
UpdatedBy: createdBy);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Advances the watermark after successful batch processing.
|
||||
/// </summary>
|
||||
public Watermark Advance(
|
||||
DateTimeOffset newHighWatermark,
|
||||
long eventsProcessed,
|
||||
string? batchHash,
|
||||
string updatedBy)
|
||||
{
|
||||
if (newHighWatermark < HighWatermark)
|
||||
throw new ArgumentException("New high watermark cannot be before current high watermark.", nameof(newHighWatermark));
|
||||
|
||||
return this with
|
||||
{
|
||||
HighWatermark = newHighWatermark,
|
||||
SequenceNumber = SequenceNumber + 1,
|
||||
ProcessedCount = ProcessedCount + eventsProcessed,
|
||||
LastBatchHash = batchHash,
|
||||
UpdatedAt = DateTimeOffset.UtcNow,
|
||||
UpdatedBy = updatedBy
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets the event-time window bounds.
|
||||
/// </summary>
|
||||
public Watermark WithWindow(DateTimeOffset lowWatermark, DateTimeOffset highWatermark)
|
||||
{
|
||||
if (highWatermark < lowWatermark)
|
||||
throw new ArgumentException("High watermark cannot be before low watermark.");
|
||||
|
||||
return this with
|
||||
{
|
||||
LowWatermark = lowWatermark,
|
||||
HighWatermark = highWatermark,
|
||||
UpdatedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot of watermark state for observability.
|
||||
/// </summary>
|
||||
public sealed record WatermarkSnapshot(
|
||||
string ScopeKey,
|
||||
DateTimeOffset HighWatermark,
|
||||
DateTimeOffset? LowWatermark,
|
||||
long SequenceNumber,
|
||||
long ProcessedCount,
|
||||
TimeSpan? Lag)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a snapshot from a watermark with calculated lag.
|
||||
/// </summary>
|
||||
public static WatermarkSnapshot FromWatermark(Watermark watermark, DateTimeOffset now) =>
|
||||
new(
|
||||
ScopeKey: watermark.ScopeKey,
|
||||
HighWatermark: watermark.HighWatermark,
|
||||
LowWatermark: watermark.LowWatermark,
|
||||
SequenceNumber: watermark.SequenceNumber,
|
||||
ProcessedCount: watermark.ProcessedCount,
|
||||
Lag: now - watermark.HighWatermark);
|
||||
}
|
||||
@@ -0,0 +1,450 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.RateLimiting;
|
||||
|
||||
/// <summary>
|
||||
/// Adaptive rate limiter that combines token bucket, concurrency limiting, and backpressure handling.
|
||||
/// Provides per-tenant/job-type rate limiting with automatic adaptation to upstream pressure.
|
||||
/// </summary>
|
||||
public sealed class AdaptiveRateLimiter
|
||||
{
|
||||
private readonly TokenBucket _tokenBucket;
|
||||
private readonly ConcurrencyLimiter _concurrencyLimiter;
|
||||
private readonly BackpressureHandler _backpressureHandler;
|
||||
private readonly HourlyCounter _hourlyCounter;
|
||||
private readonly object _lock = new();
|
||||
|
||||
/// <summary>
|
||||
/// Tenant ID this limiter applies to.
|
||||
/// </summary>
|
||||
public string TenantId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Job type this limiter applies to (null = all types).
|
||||
/// </summary>
|
||||
public string? JobType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum jobs per hour.
|
||||
/// </summary>
|
||||
public int MaxPerHour { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the limiter is paused by operator.
|
||||
/// </summary>
|
||||
public bool IsPaused { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Reason for pause (if paused).
|
||||
/// </summary>
|
||||
public string? PauseReason { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new adaptive rate limiter from quota configuration.
|
||||
/// </summary>
|
||||
public AdaptiveRateLimiter(Quota quota, TimeProvider? timeProvider = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(quota);
|
||||
|
||||
TenantId = quota.TenantId;
|
||||
JobType = quota.JobType;
|
||||
MaxPerHour = quota.MaxPerHour;
|
||||
IsPaused = quota.Paused;
|
||||
PauseReason = quota.PauseReason;
|
||||
|
||||
_tokenBucket = new TokenBucket(
|
||||
quota.BurstCapacity,
|
||||
quota.RefillRate,
|
||||
quota.CurrentTokens,
|
||||
quota.LastRefillAt);
|
||||
|
||||
_concurrencyLimiter = new ConcurrencyLimiter(
|
||||
quota.MaxActive,
|
||||
quota.CurrentActive);
|
||||
|
||||
_backpressureHandler = new BackpressureHandler(
|
||||
baseDelay: TimeSpan.FromSeconds(1),
|
||||
maxDelay: TimeSpan.FromMinutes(5),
|
||||
failureThreshold: 3,
|
||||
jitterFactor: 0.2);
|
||||
|
||||
_hourlyCounter = new HourlyCounter(
|
||||
quota.MaxPerHour,
|
||||
quota.CurrentHourCount,
|
||||
quota.CurrentHourStart);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new adaptive rate limiter with explicit configuration.
|
||||
/// </summary>
|
||||
public AdaptiveRateLimiter(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
int maxActive,
|
||||
int maxPerHour,
|
||||
int burstCapacity,
|
||||
double refillRate)
|
||||
{
|
||||
TenantId = tenantId ?? throw new ArgumentNullException(nameof(tenantId));
|
||||
JobType = jobType;
|
||||
MaxPerHour = maxPerHour;
|
||||
|
||||
_tokenBucket = new TokenBucket(burstCapacity, refillRate);
|
||||
_concurrencyLimiter = new ConcurrencyLimiter(maxActive);
|
||||
_backpressureHandler = new BackpressureHandler();
|
||||
_hourlyCounter = new HourlyCounter(maxPerHour);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to acquire permission to execute a job.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>Result indicating whether acquisition was successful and why.</returns>
|
||||
public RateLimitResult TryAcquire(DateTimeOffset now)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
// Check if paused
|
||||
if (IsPaused)
|
||||
{
|
||||
return RateLimitResult.Denied(RateLimitDenialReason.Paused, PauseReason);
|
||||
}
|
||||
|
||||
// Check backpressure
|
||||
if (!_backpressureHandler.ShouldAllow(now))
|
||||
{
|
||||
var snapshot = _backpressureHandler.GetSnapshot(now);
|
||||
return RateLimitResult.Denied(
|
||||
RateLimitDenialReason.Backpressure,
|
||||
snapshot.LastFailureReason,
|
||||
retryAfter: snapshot.TimeRemaining);
|
||||
}
|
||||
|
||||
// Check hourly limit
|
||||
if (!_hourlyCounter.TryIncrement(now))
|
||||
{
|
||||
var hourlySnapshot = _hourlyCounter.GetSnapshot(now);
|
||||
return RateLimitResult.Denied(
|
||||
RateLimitDenialReason.HourlyLimitExceeded,
|
||||
$"Hourly limit of {MaxPerHour} exceeded",
|
||||
retryAfter: hourlySnapshot.TimeUntilReset);
|
||||
}
|
||||
|
||||
// Check concurrency
|
||||
if (!_concurrencyLimiter.TryAcquire())
|
||||
{
|
||||
// Rollback hourly counter
|
||||
_hourlyCounter.Decrement();
|
||||
var concurrencySnapshot = _concurrencyLimiter.GetSnapshot();
|
||||
return RateLimitResult.Denied(
|
||||
RateLimitDenialReason.ConcurrencyLimitExceeded,
|
||||
$"Concurrency limit of {concurrencySnapshot.MaxActive} exceeded");
|
||||
}
|
||||
|
||||
// Check token bucket
|
||||
if (!_tokenBucket.TryConsume(now))
|
||||
{
|
||||
// Rollback concurrency and hourly counter
|
||||
_concurrencyLimiter.Release();
|
||||
_hourlyCounter.Decrement();
|
||||
var waitTime = _tokenBucket.EstimatedWaitTime(now);
|
||||
return RateLimitResult.Denied(
|
||||
RateLimitDenialReason.TokensExhausted,
|
||||
"Token bucket exhausted",
|
||||
retryAfter: waitTime);
|
||||
}
|
||||
|
||||
return RateLimitResult.Allowed();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases a concurrency slot when a job completes.
|
||||
/// </summary>
|
||||
public void Release()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_concurrencyLimiter.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records an upstream failure for backpressure calculation.
|
||||
/// </summary>
|
||||
/// <param name="statusCode">HTTP status code from upstream.</param>
|
||||
/// <param name="retryAfter">Optional Retry-After header value.</param>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>Backpressure result.</returns>
|
||||
public BackpressureResult RecordUpstreamFailure(int statusCode, TimeSpan? retryAfter = null, DateTimeOffset? now = null)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _backpressureHandler.RecordFailure(statusCode, retryAfter, now);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a successful upstream request.
|
||||
/// </summary>
|
||||
public void RecordUpstreamSuccess()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_backpressureHandler.RecordSuccess();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pauses the limiter.
|
||||
/// </summary>
|
||||
/// <param name="reason">Reason for pause.</param>
|
||||
public void Pause(string reason)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
IsPaused = true;
|
||||
PauseReason = reason;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resumes the limiter.
|
||||
/// </summary>
|
||||
public void Resume()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
IsPaused = false;
|
||||
PauseReason = null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a snapshot of the current limiter state.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>Snapshot of limiter state.</returns>
|
||||
public AdaptiveRateLimiterSnapshot GetSnapshot(DateTimeOffset now)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return new AdaptiveRateLimiterSnapshot(
|
||||
TenantId: TenantId,
|
||||
JobType: JobType,
|
||||
IsPaused: IsPaused,
|
||||
PauseReason: PauseReason,
|
||||
TokenBucket: _tokenBucket.GetSnapshot(now),
|
||||
Concurrency: _concurrencyLimiter.GetSnapshot(),
|
||||
Backpressure: _backpressureHandler.GetSnapshot(now),
|
||||
HourlyCounter: _hourlyCounter.GetSnapshot(now));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exports the current state to a quota record for persistence.
|
||||
/// </summary>
|
||||
/// <param name="quotaId">Original quota ID.</param>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <param name="updatedBy">Actor performing the update.</param>
|
||||
/// <returns>Quota record with current state.</returns>
|
||||
public Quota ExportToQuota(Guid quotaId, DateTimeOffset now, string updatedBy)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var tokenSnapshot = _tokenBucket.GetSnapshot(now);
|
||||
var concurrencySnapshot = _concurrencyLimiter.GetSnapshot();
|
||||
var hourlySnapshot = _hourlyCounter.GetSnapshot(now);
|
||||
|
||||
return new Quota(
|
||||
QuotaId: quotaId,
|
||||
TenantId: TenantId,
|
||||
JobType: JobType,
|
||||
MaxActive: concurrencySnapshot.MaxActive,
|
||||
MaxPerHour: MaxPerHour,
|
||||
BurstCapacity: tokenSnapshot.BurstCapacity,
|
||||
RefillRate: tokenSnapshot.RefillRate,
|
||||
CurrentTokens: tokenSnapshot.CurrentTokens,
|
||||
LastRefillAt: tokenSnapshot.LastRefillAt,
|
||||
CurrentActive: concurrencySnapshot.CurrentActive,
|
||||
CurrentHourCount: hourlySnapshot.CurrentCount,
|
||||
CurrentHourStart: hourlySnapshot.HourStart,
|
||||
Paused: IsPaused,
|
||||
PauseReason: PauseReason,
|
||||
QuotaTicket: null,
|
||||
CreatedAt: now, // This should be preserved from original
|
||||
UpdatedAt: now,
|
||||
UpdatedBy: updatedBy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a rate limit acquisition attempt.
|
||||
/// </summary>
|
||||
public sealed record RateLimitResult(
|
||||
bool IsAllowed,
|
||||
RateLimitDenialReason? DenialReason,
|
||||
string? DenialMessage,
|
||||
TimeSpan? RetryAfter)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates an allowed result.
|
||||
/// </summary>
|
||||
public static RateLimitResult Allowed() => new(true, null, null, null);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a denied result.
|
||||
/// </summary>
|
||||
public static RateLimitResult Denied(
|
||||
RateLimitDenialReason reason,
|
||||
string? message = null,
|
||||
TimeSpan? retryAfter = null) =>
|
||||
new(false, reason, message, retryAfter);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reasons for rate limit denial.
|
||||
/// </summary>
|
||||
public enum RateLimitDenialReason
|
||||
{
|
||||
/// <summary>Limiter is paused by operator.</summary>
|
||||
Paused,
|
||||
|
||||
/// <summary>In backpressure backoff period.</summary>
|
||||
Backpressure,
|
||||
|
||||
/// <summary>Hourly request limit exceeded.</summary>
|
||||
HourlyLimitExceeded,
|
||||
|
||||
/// <summary>Concurrency limit exceeded.</summary>
|
||||
ConcurrencyLimitExceeded,
|
||||
|
||||
/// <summary>Token bucket exhausted.</summary>
|
||||
TokensExhausted
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot of adaptive rate limiter state.
|
||||
/// </summary>
|
||||
public sealed record AdaptiveRateLimiterSnapshot(
|
||||
string TenantId,
|
||||
string? JobType,
|
||||
bool IsPaused,
|
||||
string? PauseReason,
|
||||
TokenBucketSnapshot TokenBucket,
|
||||
ConcurrencySnapshot Concurrency,
|
||||
BackpressureSnapshot Backpressure,
|
||||
HourlyCounterSnapshot HourlyCounter);
|
||||
|
||||
/// <summary>
|
||||
/// Tracks requests per hour with automatic reset.
|
||||
/// </summary>
|
||||
public sealed class HourlyCounter
|
||||
{
|
||||
private readonly object _lock = new();
|
||||
private int _currentCount;
|
||||
private DateTimeOffset _hourStart;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum allowed requests per hour.
|
||||
/// </summary>
|
||||
public int MaxPerHour { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new hourly counter.
|
||||
/// </summary>
|
||||
public HourlyCounter(int maxPerHour, int currentCount = 0, DateTimeOffset? hourStart = null)
|
||||
{
|
||||
if (maxPerHour <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(maxPerHour), "Max per hour must be positive.");
|
||||
|
||||
MaxPerHour = maxPerHour;
|
||||
_currentCount = currentCount;
|
||||
_hourStart = hourStart ?? TruncateToHour(DateTimeOffset.UtcNow);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to increment the counter.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>True if increment was allowed, false if limit reached.</returns>
|
||||
public bool TryIncrement(DateTimeOffset now)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
MaybeResetHour(now);
|
||||
|
||||
if (_currentCount < MaxPerHour)
|
||||
{
|
||||
_currentCount++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decrements the counter (for rollback).
|
||||
/// </summary>
|
||||
public void Decrement()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentCount > 0)
|
||||
_currentCount--;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a snapshot of the counter state.
|
||||
/// </summary>
|
||||
public HourlyCounterSnapshot GetSnapshot(DateTimeOffset now)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
MaybeResetHour(now);
|
||||
var nextHour = _hourStart.AddHours(1);
|
||||
var timeUntilReset = nextHour - now;
|
||||
|
||||
return new HourlyCounterSnapshot(
|
||||
MaxPerHour: MaxPerHour,
|
||||
CurrentCount: _currentCount,
|
||||
HourStart: _hourStart,
|
||||
TimeUntilReset: timeUntilReset > TimeSpan.Zero ? timeUntilReset : TimeSpan.Zero);
|
||||
}
|
||||
}
|
||||
|
||||
private void MaybeResetHour(DateTimeOffset now)
|
||||
{
|
||||
var currentHour = TruncateToHour(now);
|
||||
if (currentHour > _hourStart)
|
||||
{
|
||||
_hourStart = currentHour;
|
||||
_currentCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static DateTimeOffset TruncateToHour(DateTimeOffset dt) =>
|
||||
new(dt.Year, dt.Month, dt.Day, dt.Hour, 0, 0, dt.Offset);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot of hourly counter state.
|
||||
/// </summary>
|
||||
public sealed record HourlyCounterSnapshot(
|
||||
int MaxPerHour,
|
||||
int CurrentCount,
|
||||
DateTimeOffset HourStart,
|
||||
TimeSpan TimeUntilReset)
|
||||
{
|
||||
/// <summary>
|
||||
/// Remaining requests in current hour.
|
||||
/// </summary>
|
||||
public int Remaining => Math.Max(0, MaxPerHour - CurrentCount);
|
||||
|
||||
/// <summary>
|
||||
/// Whether the hourly limit has been reached.
|
||||
/// </summary>
|
||||
public bool IsExhausted => CurrentCount >= MaxPerHour;
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
namespace StellaOps.Orchestrator.Core.RateLimiting;
|
||||
|
||||
/// <summary>
|
||||
/// Handles backpressure from upstream services (429, 503, etc.).
|
||||
/// Implements exponential backoff with jitter for retry timing.
|
||||
/// </summary>
|
||||
public sealed class BackpressureHandler
|
||||
{
|
||||
private readonly object _lock = new();
|
||||
private int _consecutiveFailures;
|
||||
private DateTimeOffset? _backoffUntil;
|
||||
private DateTimeOffset _lastFailureAt;
|
||||
private string? _lastFailureReason;
|
||||
|
||||
/// <summary>
|
||||
/// Base delay for backoff calculation.
|
||||
/// </summary>
|
||||
public TimeSpan BaseDelay { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum delay cap.
|
||||
/// </summary>
|
||||
public TimeSpan MaxDelay { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of failures before triggering full backoff.
|
||||
/// </summary>
|
||||
public int FailureThreshold { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum random jitter to add (0.0 to 1.0 fraction of delay).
|
||||
/// </summary>
|
||||
public double JitterFactor { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether currently in backoff state.
|
||||
/// </summary>
|
||||
public bool IsInBackoff
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _backoffUntil.HasValue && DateTimeOffset.UtcNow < _backoffUntil.Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Number of consecutive failures.
|
||||
/// </summary>
|
||||
public int ConsecutiveFailures
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _consecutiveFailures;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Time until backoff expires (or TimeSpan.Zero if not in backoff).
|
||||
/// </summary>
|
||||
public TimeSpan TimeUntilReady
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_backoffUntil.HasValue)
|
||||
return TimeSpan.Zero;
|
||||
|
||||
var remaining = _backoffUntil.Value - DateTimeOffset.UtcNow;
|
||||
return remaining > TimeSpan.Zero ? remaining : TimeSpan.Zero;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new backpressure handler.
|
||||
/// </summary>
|
||||
/// <param name="baseDelay">Base delay for exponential backoff.</param>
|
||||
/// <param name="maxDelay">Maximum delay cap.</param>
|
||||
/// <param name="failureThreshold">Failures before entering backoff.</param>
|
||||
/// <param name="jitterFactor">Random jitter factor (0.0 to 1.0).</param>
|
||||
public BackpressureHandler(
|
||||
TimeSpan? baseDelay = null,
|
||||
TimeSpan? maxDelay = null,
|
||||
int failureThreshold = 1,
|
||||
double jitterFactor = 0.2)
|
||||
{
|
||||
BaseDelay = baseDelay ?? TimeSpan.FromSeconds(1);
|
||||
MaxDelay = maxDelay ?? TimeSpan.FromMinutes(5);
|
||||
FailureThreshold = failureThreshold > 0 ? failureThreshold : 1;
|
||||
JitterFactor = Math.Clamp(jitterFactor, 0.0, 1.0);
|
||||
|
||||
if (BaseDelay <= TimeSpan.Zero)
|
||||
throw new ArgumentOutOfRangeException(nameof(baseDelay), "Base delay must be positive.");
|
||||
if (MaxDelay < BaseDelay)
|
||||
throw new ArgumentOutOfRangeException(nameof(maxDelay), "Max delay must be >= base delay.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records an upstream failure and potentially triggers backoff.
|
||||
/// </summary>
|
||||
/// <param name="statusCode">HTTP status code from upstream.</param>
|
||||
/// <param name="retryAfter">Optional Retry-After header value.</param>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>Backoff result with recommended delay.</returns>
|
||||
public BackpressureResult RecordFailure(int statusCode, TimeSpan? retryAfter = null, DateTimeOffset? now = null)
|
||||
{
|
||||
var timestamp = now ?? DateTimeOffset.UtcNow;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
_consecutiveFailures++;
|
||||
_lastFailureAt = timestamp;
|
||||
_lastFailureReason = GetFailureReason(statusCode);
|
||||
|
||||
// Use Retry-After if provided and reasonable
|
||||
if (retryAfter.HasValue && retryAfter.Value > TimeSpan.Zero && retryAfter.Value <= MaxDelay)
|
||||
{
|
||||
_backoffUntil = timestamp + retryAfter.Value;
|
||||
return new BackpressureResult(
|
||||
ShouldBackoff: true,
|
||||
BackoffDuration: retryAfter.Value,
|
||||
BackoffUntil: _backoffUntil.Value,
|
||||
ConsecutiveFailures: _consecutiveFailures,
|
||||
Reason: _lastFailureReason,
|
||||
StatusCode: statusCode);
|
||||
}
|
||||
|
||||
// Calculate exponential backoff with jitter
|
||||
var delay = CalculateBackoffDelay(_consecutiveFailures, timestamp);
|
||||
_backoffUntil = timestamp + delay;
|
||||
|
||||
return new BackpressureResult(
|
||||
ShouldBackoff: _consecutiveFailures >= FailureThreshold,
|
||||
BackoffDuration: delay,
|
||||
BackoffUntil: _backoffUntil.Value,
|
||||
ConsecutiveFailures: _consecutiveFailures,
|
||||
Reason: _lastFailureReason,
|
||||
StatusCode: statusCode);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a successful request, resetting failure count.
|
||||
/// </summary>
|
||||
public void RecordSuccess()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_consecutiveFailures = 0;
|
||||
_backoffUntil = null;
|
||||
_lastFailureReason = null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a request should be allowed based on backoff state.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>True if request should proceed, false if in backoff.</returns>
|
||||
public bool ShouldAllow(DateTimeOffset? now = null)
|
||||
{
|
||||
var timestamp = now ?? DateTimeOffset.UtcNow;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_backoffUntil.HasValue)
|
||||
return true;
|
||||
|
||||
if (timestamp >= _backoffUntil.Value)
|
||||
{
|
||||
// Backoff expired
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resets the handler to initial state.
|
||||
/// </summary>
|
||||
public void Reset()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_consecutiveFailures = 0;
|
||||
_backoffUntil = null;
|
||||
_lastFailureReason = null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a snapshot of the current backpressure state.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>Snapshot of backpressure state.</returns>
|
||||
public BackpressureSnapshot GetSnapshot(DateTimeOffset? now = null)
|
||||
{
|
||||
var timestamp = now ?? DateTimeOffset.UtcNow;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
var isInBackoff = _backoffUntil.HasValue && timestamp < _backoffUntil.Value;
|
||||
var timeRemaining = isInBackoff ? _backoffUntil!.Value - timestamp : TimeSpan.Zero;
|
||||
|
||||
return new BackpressureSnapshot(
|
||||
IsInBackoff: isInBackoff,
|
||||
ConsecutiveFailures: _consecutiveFailures,
|
||||
BackoffUntil: _backoffUntil,
|
||||
TimeRemaining: timeRemaining > TimeSpan.Zero ? timeRemaining : TimeSpan.Zero,
|
||||
LastFailureAt: _lastFailureAt,
|
||||
LastFailureReason: _lastFailureReason);
|
||||
}
|
||||
}
|
||||
|
||||
private TimeSpan CalculateBackoffDelay(int failures, DateTimeOffset now)
|
||||
{
|
||||
// Exponential backoff: baseDelay * 2^(failures-1)
|
||||
var exponent = Math.Min(failures - 1, 10); // Cap exponent to prevent overflow
|
||||
var delayMs = BaseDelay.TotalMilliseconds * Math.Pow(2, exponent);
|
||||
|
||||
// Add jitter
|
||||
if (JitterFactor > 0)
|
||||
{
|
||||
var jitter = delayMs * JitterFactor * Random.Shared.NextDouble();
|
||||
delayMs += jitter;
|
||||
}
|
||||
|
||||
// Cap at max delay
|
||||
var delay = TimeSpan.FromMilliseconds(Math.Min(delayMs, MaxDelay.TotalMilliseconds));
|
||||
return delay;
|
||||
}
|
||||
|
||||
private static string GetFailureReason(int statusCode) => statusCode switch
|
||||
{
|
||||
429 => "upstream_rate_limited",
|
||||
503 => "upstream_unavailable",
|
||||
502 => "upstream_bad_gateway",
|
||||
504 => "upstream_timeout",
|
||||
>= 500 and < 600 => "upstream_server_error",
|
||||
>= 400 and < 500 => "upstream_client_error",
|
||||
_ => "upstream_error"
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of recording a failure.
|
||||
/// </summary>
|
||||
public sealed record BackpressureResult(
|
||||
bool ShouldBackoff,
|
||||
TimeSpan BackoffDuration,
|
||||
DateTimeOffset BackoffUntil,
|
||||
int ConsecutiveFailures,
|
||||
string Reason,
|
||||
int StatusCode);
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot of backpressure handler state.
|
||||
/// </summary>
|
||||
public sealed record BackpressureSnapshot(
|
||||
bool IsInBackoff,
|
||||
int ConsecutiveFailures,
|
||||
DateTimeOffset? BackoffUntil,
|
||||
TimeSpan TimeRemaining,
|
||||
DateTimeOffset LastFailureAt,
|
||||
string? LastFailureReason);
|
||||
@@ -0,0 +1,226 @@
|
||||
namespace StellaOps.Orchestrator.Core.RateLimiting;
|
||||
|
||||
/// <summary>
|
||||
/// Concurrency limiter that tracks active jobs and enforces maximum concurrent execution.
|
||||
/// </summary>
|
||||
public sealed class ConcurrencyLimiter
|
||||
{
|
||||
private readonly object _lock = new();
|
||||
private int _currentActive;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum allowed concurrent active jobs.
|
||||
/// </summary>
|
||||
public int MaxActive { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Current count of active jobs.
|
||||
/// </summary>
|
||||
public int CurrentActive
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _currentActive;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Number of available slots.
|
||||
/// </summary>
|
||||
public int AvailableSlots
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return Math.Max(0, MaxActive - _currentActive);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new concurrency limiter.
|
||||
/// </summary>
|
||||
/// <param name="maxActive">Maximum concurrent jobs allowed.</param>
|
||||
/// <param name="currentActive">Starting count of active jobs.</param>
|
||||
public ConcurrencyLimiter(int maxActive, int currentActive = 0)
|
||||
{
|
||||
if (maxActive <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(maxActive), "Max active must be positive.");
|
||||
if (currentActive < 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(currentActive), "Current active cannot be negative.");
|
||||
|
||||
MaxActive = maxActive;
|
||||
_currentActive = currentActive;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to acquire a slot for a new active job.
|
||||
/// </summary>
|
||||
/// <returns>True if slot was acquired, false if at capacity.</returns>
|
||||
public bool TryAcquire()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentActive < MaxActive)
|
||||
{
|
||||
_currentActive++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to acquire multiple slots.
|
||||
/// </summary>
|
||||
/// <param name="count">Number of slots to acquire.</param>
|
||||
/// <returns>True if all slots were acquired, false otherwise (no partial acquisition).</returns>
|
||||
public bool TryAcquire(int count)
|
||||
{
|
||||
if (count <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(count), "Count must be positive.");
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentActive + count <= MaxActive)
|
||||
{
|
||||
_currentActive += count;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases a slot when a job completes.
|
||||
/// </summary>
|
||||
/// <returns>True if slot was released, false if already at zero.</returns>
|
||||
public bool Release()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_currentActive > 0)
|
||||
{
|
||||
_currentActive--;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases multiple slots.
|
||||
/// </summary>
|
||||
/// <param name="count">Number of slots to release.</param>
|
||||
/// <returns>Number of slots actually released.</returns>
|
||||
public int Release(int count)
|
||||
{
|
||||
if (count <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(count), "Count must be positive.");
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
var released = Math.Min(count, _currentActive);
|
||||
_currentActive -= released;
|
||||
return released;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a slot is available without acquiring it.
|
||||
/// </summary>
|
||||
/// <returns>True if at least one slot is available.</returns>
|
||||
public bool HasCapacity()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _currentActive < MaxActive;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if multiple slots are available without acquiring them.
|
||||
/// </summary>
|
||||
/// <param name="count">Number of slots to check for.</param>
|
||||
/// <returns>True if requested slots are available.</returns>
|
||||
public bool HasCapacity(int count)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _currentActive + count <= MaxActive;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resets the limiter to zero active jobs.
|
||||
/// </summary>
|
||||
/// <returns>Number of slots that were released.</returns>
|
||||
public int Reset()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var released = _currentActive;
|
||||
_currentActive = 0;
|
||||
return released;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets the current active count directly (for recovery/sync scenarios).
|
||||
/// </summary>
|
||||
/// <param name="count">New active count.</param>
|
||||
public void SetActive(int count)
|
||||
{
|
||||
if (count < 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(count), "Count cannot be negative.");
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
_currentActive = count;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a snapshot of the current limiter state.
|
||||
/// </summary>
|
||||
/// <returns>Snapshot of limiter state.</returns>
|
||||
public ConcurrencySnapshot GetSnapshot()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return new ConcurrencySnapshot(MaxActive, _currentActive);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Immutable snapshot of concurrency limiter state.
|
||||
/// </summary>
|
||||
public sealed record ConcurrencySnapshot(
|
||||
int MaxActive,
|
||||
int CurrentActive)
|
||||
{
|
||||
/// <summary>
|
||||
/// Number of available slots.
|
||||
/// </summary>
|
||||
public int AvailableSlots => Math.Max(0, MaxActive - CurrentActive);
|
||||
|
||||
/// <summary>
|
||||
/// Utilization percentage (0.0 to 1.0).
|
||||
/// </summary>
|
||||
public double Utilization => (double)CurrentActive / MaxActive;
|
||||
|
||||
/// <summary>
|
||||
/// Whether the limiter is at capacity.
|
||||
/// </summary>
|
||||
public bool IsAtCapacity => CurrentActive >= MaxActive;
|
||||
|
||||
/// <summary>
|
||||
/// Whether there are no active jobs.
|
||||
/// </summary>
|
||||
public bool IsIdle => CurrentActive == 0;
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
namespace StellaOps.Orchestrator.Core.RateLimiting;
|
||||
|
||||
/// <summary>
|
||||
/// Token bucket rate limiter implementation.
|
||||
/// Tokens refill at a constant rate up to a burst capacity.
|
||||
/// </summary>
|
||||
public sealed class TokenBucket
|
||||
{
|
||||
private readonly object _lock = new();
|
||||
private double _currentTokens;
|
||||
private DateTimeOffset _lastRefillAt;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum tokens the bucket can hold (burst capacity).
|
||||
/// </summary>
|
||||
public int BurstCapacity { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Rate at which tokens are added (tokens per second).
|
||||
/// </summary>
|
||||
public double RefillRate { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Current number of available tokens.
|
||||
/// </summary>
|
||||
public double CurrentTokens
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _currentTokens;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Last time the bucket was refilled.
|
||||
/// </summary>
|
||||
public DateTimeOffset LastRefillAt
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _lastRefillAt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new token bucket.
|
||||
/// </summary>
|
||||
/// <param name="burstCapacity">Maximum tokens the bucket can hold.</param>
|
||||
/// <param name="refillRate">Tokens per second to add.</param>
|
||||
/// <param name="initialTokens">Starting number of tokens (defaults to burst capacity).</param>
|
||||
/// <param name="lastRefillAt">Starting time for refill calculation.</param>
|
||||
public TokenBucket(
|
||||
int burstCapacity,
|
||||
double refillRate,
|
||||
double? initialTokens = null,
|
||||
DateTimeOffset? lastRefillAt = null)
|
||||
{
|
||||
if (burstCapacity <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(burstCapacity), "Burst capacity must be positive.");
|
||||
if (refillRate <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(refillRate), "Refill rate must be positive.");
|
||||
|
||||
BurstCapacity = burstCapacity;
|
||||
RefillRate = refillRate;
|
||||
_currentTokens = Math.Min(initialTokens ?? burstCapacity, burstCapacity);
|
||||
_lastRefillAt = lastRefillAt ?? DateTimeOffset.UtcNow;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to consume a token from the bucket.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time for refill calculation.</param>
|
||||
/// <param name="tokensRequired">Number of tokens to consume (default 1).</param>
|
||||
/// <returns>True if tokens were consumed, false if insufficient tokens.</returns>
|
||||
public bool TryConsume(DateTimeOffset now, int tokensRequired = 1)
|
||||
{
|
||||
if (tokensRequired <= 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(tokensRequired), "Tokens required must be positive.");
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
Refill(now);
|
||||
|
||||
if (_currentTokens >= tokensRequired)
|
||||
{
|
||||
_currentTokens -= tokensRequired;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the bucket has enough tokens without consuming them.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time for refill calculation.</param>
|
||||
/// <param name="tokensRequired">Number of tokens to check for.</param>
|
||||
/// <returns>True if sufficient tokens are available.</returns>
|
||||
public bool HasTokens(DateTimeOffset now, int tokensRequired = 1)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
Refill(now);
|
||||
return _currentTokens >= tokensRequired;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets estimated time until the specified number of tokens will be available.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time for calculation.</param>
|
||||
/// <param name="tokensRequired">Number of tokens needed.</param>
|
||||
/// <returns>Time until tokens available, or TimeSpan.Zero if already available.</returns>
|
||||
public TimeSpan EstimatedWaitTime(DateTimeOffset now, int tokensRequired = 1)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
Refill(now);
|
||||
|
||||
if (_currentTokens >= tokensRequired)
|
||||
return TimeSpan.Zero;
|
||||
|
||||
var tokensNeeded = tokensRequired - _currentTokens;
|
||||
var secondsToWait = tokensNeeded / RefillRate;
|
||||
return TimeSpan.FromSeconds(secondsToWait);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Refills tokens based on elapsed time.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
public void Refill(DateTimeOffset now)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (now <= _lastRefillAt)
|
||||
return;
|
||||
|
||||
var elapsed = (now - _lastRefillAt).TotalSeconds;
|
||||
var tokensToAdd = elapsed * RefillRate;
|
||||
|
||||
_currentTokens = Math.Min(_currentTokens + tokensToAdd, BurstCapacity);
|
||||
_lastRefillAt = now;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resets the bucket to full capacity.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time.</param>
|
||||
public void Reset(DateTimeOffset now)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_currentTokens = BurstCapacity;
|
||||
_lastRefillAt = now;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a snapshot of the current bucket state.
|
||||
/// </summary>
|
||||
/// <param name="now">Current time for refill calculation.</param>
|
||||
/// <returns>Snapshot of bucket state.</returns>
|
||||
public TokenBucketSnapshot GetSnapshot(DateTimeOffset now)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
Refill(now);
|
||||
return new TokenBucketSnapshot(
|
||||
BurstCapacity,
|
||||
RefillRate,
|
||||
_currentTokens,
|
||||
_lastRefillAt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Immutable snapshot of token bucket state.
|
||||
/// </summary>
|
||||
public sealed record TokenBucketSnapshot(
|
||||
int BurstCapacity,
|
||||
double RefillRate,
|
||||
double CurrentTokens,
|
||||
DateTimeOffset LastRefillAt)
|
||||
{
|
||||
/// <summary>
|
||||
/// Percentage of bucket that is full (0.0 to 1.0).
|
||||
/// </summary>
|
||||
public double FillPercent => CurrentTokens / BurstCapacity;
|
||||
|
||||
/// <summary>
|
||||
/// Whether the bucket is empty.
|
||||
/// </summary>
|
||||
public bool IsEmpty => CurrentTokens < 1;
|
||||
|
||||
/// <summary>
|
||||
/// Whether the bucket is full.
|
||||
/// </summary>
|
||||
public bool IsFull => CurrentTokens >= BurstCapacity;
|
||||
}
|
||||
@@ -0,0 +1,399 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.Scheduling;
|
||||
|
||||
/// <summary>
|
||||
/// Plans and manages job DAG (Directed Acyclic Graph) execution.
|
||||
/// Handles dependency resolution, topological sorting, and critical path analysis.
|
||||
/// </summary>
|
||||
public sealed class DagPlanner
|
||||
{
|
||||
/// <summary>
|
||||
/// Validates that the given edges form a valid DAG (no cycles).
|
||||
/// </summary>
|
||||
/// <param name="edges">DAG edges to validate.</param>
|
||||
/// <returns>Validation result with any detected cycles.</returns>
|
||||
public static DagValidationResult ValidateDag(IEnumerable<DagEdge> edges)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(edges);
|
||||
|
||||
var edgeList = edges.ToList();
|
||||
if (edgeList.Count == 0)
|
||||
{
|
||||
return DagValidationResult.Valid();
|
||||
}
|
||||
|
||||
// Build adjacency list
|
||||
var adjacency = new Dictionary<Guid, List<Guid>>();
|
||||
var allNodes = new HashSet<Guid>();
|
||||
|
||||
foreach (var edge in edgeList)
|
||||
{
|
||||
allNodes.Add(edge.ParentJobId);
|
||||
allNodes.Add(edge.ChildJobId);
|
||||
|
||||
if (!adjacency.TryGetValue(edge.ParentJobId, out var children))
|
||||
{
|
||||
children = [];
|
||||
adjacency[edge.ParentJobId] = children;
|
||||
}
|
||||
children.Add(edge.ChildJobId);
|
||||
}
|
||||
|
||||
// Detect cycles using DFS with coloring
|
||||
var white = new HashSet<Guid>(allNodes); // Unvisited
|
||||
var gray = new HashSet<Guid>(); // In progress
|
||||
var cycleNodes = new List<Guid>();
|
||||
|
||||
foreach (var node in allNodes)
|
||||
{
|
||||
if (white.Contains(node))
|
||||
{
|
||||
if (HasCycleDfs(node, adjacency, white, gray, cycleNodes))
|
||||
{
|
||||
return DagValidationResult.CycleDetected(cycleNodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return DagValidationResult.Valid();
|
||||
}
|
||||
|
||||
private static bool HasCycleDfs(
|
||||
Guid node,
|
||||
Dictionary<Guid, List<Guid>> adjacency,
|
||||
HashSet<Guid> white,
|
||||
HashSet<Guid> gray,
|
||||
List<Guid> cycleNodes)
|
||||
{
|
||||
white.Remove(node);
|
||||
gray.Add(node);
|
||||
|
||||
if (adjacency.TryGetValue(node, out var children))
|
||||
{
|
||||
foreach (var child in children)
|
||||
{
|
||||
if (gray.Contains(child))
|
||||
{
|
||||
// Back edge found - cycle detected
|
||||
cycleNodes.Add(child);
|
||||
cycleNodes.Add(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (white.Contains(child) && HasCycleDfs(child, adjacency, white, gray, cycleNodes))
|
||||
{
|
||||
cycleNodes.Add(node);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gray.Remove(node);
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performs topological sort on jobs based on their dependencies.
|
||||
/// </summary>
|
||||
/// <param name="jobIds">Job IDs to sort.</param>
|
||||
/// <param name="edges">Dependency edges.</param>
|
||||
/// <returns>Jobs in topologically sorted order (parents before children).</returns>
|
||||
public static IReadOnlyList<Guid> TopologicalSort(IEnumerable<Guid> jobIds, IEnumerable<DagEdge> edges)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(jobIds);
|
||||
ArgumentNullException.ThrowIfNull(edges);
|
||||
|
||||
var jobs = jobIds.ToHashSet();
|
||||
var edgeList = edges.ToList();
|
||||
|
||||
// Build in-degree map and adjacency list
|
||||
var inDegree = jobs.ToDictionary(j => j, _ => 0);
|
||||
var adjacency = new Dictionary<Guid, List<Guid>>();
|
||||
|
||||
foreach (var edge in edgeList)
|
||||
{
|
||||
if (!jobs.Contains(edge.ParentJobId) || !jobs.Contains(edge.ChildJobId))
|
||||
{
|
||||
continue; // Skip edges for jobs not in our set
|
||||
}
|
||||
|
||||
inDegree[edge.ChildJobId]++;
|
||||
|
||||
if (!adjacency.TryGetValue(edge.ParentJobId, out var children))
|
||||
{
|
||||
children = [];
|
||||
adjacency[edge.ParentJobId] = children;
|
||||
}
|
||||
children.Add(edge.ChildJobId);
|
||||
}
|
||||
|
||||
// Kahn's algorithm
|
||||
var queue = new Queue<Guid>(inDegree.Where(kv => kv.Value == 0).Select(kv => kv.Key));
|
||||
var result = new List<Guid>(jobs.Count);
|
||||
|
||||
while (queue.Count > 0)
|
||||
{
|
||||
var current = queue.Dequeue();
|
||||
result.Add(current);
|
||||
|
||||
if (adjacency.TryGetValue(current, out var children))
|
||||
{
|
||||
foreach (var child in children)
|
||||
{
|
||||
inDegree[child]--;
|
||||
if (inDegree[child] == 0)
|
||||
{
|
||||
queue.Enqueue(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (result.Count != jobs.Count)
|
||||
{
|
||||
throw new InvalidOperationException("Cycle detected in job DAG - topological sort failed.");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all jobs that have no unmet dependencies (ready to schedule).
|
||||
/// </summary>
|
||||
/// <param name="jobs">All jobs in the DAG.</param>
|
||||
/// <param name="edges">Dependency edges.</param>
|
||||
/// <returns>Jobs with all dependencies satisfied or no dependencies.</returns>
|
||||
public static IReadOnlyList<Job> GetReadyJobs(IEnumerable<Job> jobs, IEnumerable<DagEdge> edges)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(jobs);
|
||||
ArgumentNullException.ThrowIfNull(edges);
|
||||
|
||||
var jobList = jobs.ToList();
|
||||
var edgeList = edges.ToList();
|
||||
|
||||
// Build map of job ID to job and set of succeeded job IDs
|
||||
var jobMap = jobList.ToDictionary(j => j.JobId);
|
||||
var succeededJobs = jobList
|
||||
.Where(j => JobStateMachine.IsSuccess(j.Status))
|
||||
.Select(j => j.JobId)
|
||||
.ToHashSet();
|
||||
|
||||
// Build map of job ID to parent dependencies
|
||||
var dependencies = new Dictionary<Guid, List<DagEdge>>();
|
||||
foreach (var edge in edgeList)
|
||||
{
|
||||
if (!dependencies.TryGetValue(edge.ChildJobId, out var deps))
|
||||
{
|
||||
deps = [];
|
||||
dependencies[edge.ChildJobId] = deps;
|
||||
}
|
||||
deps.Add(edge);
|
||||
}
|
||||
|
||||
var ready = new List<Job>();
|
||||
|
||||
foreach (var job in jobList)
|
||||
{
|
||||
// Skip jobs that aren't pending
|
||||
if (!JobStateMachine.IsPending(job.Status))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if all dependencies are satisfied
|
||||
if (!dependencies.TryGetValue(job.JobId, out var deps))
|
||||
{
|
||||
// No dependencies - ready to go
|
||||
ready.Add(job);
|
||||
continue;
|
||||
}
|
||||
|
||||
var allSatisfied = deps.All(edge => IsDependencySatisfied(edge, jobMap, succeededJobs));
|
||||
if (allSatisfied)
|
||||
{
|
||||
ready.Add(job);
|
||||
}
|
||||
}
|
||||
|
||||
return ready;
|
||||
}
|
||||
|
||||
private static bool IsDependencySatisfied(DagEdge edge, Dictionary<Guid, Job> jobMap, HashSet<Guid> succeededJobs)
|
||||
{
|
||||
if (!jobMap.TryGetValue(edge.ParentJobId, out var parentJob))
|
||||
{
|
||||
// Parent job doesn't exist - treat as satisfied (orphan edge)
|
||||
return true;
|
||||
}
|
||||
|
||||
return edge.EdgeType switch
|
||||
{
|
||||
DagEdgeTypes.Success => succeededJobs.Contains(edge.ParentJobId),
|
||||
DagEdgeTypes.Always => JobStateMachine.IsTerminal(parentJob.Status),
|
||||
DagEdgeTypes.Failure => parentJob.Status == JobStatus.Failed,
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Calculates the critical path through the DAG based on estimated durations.
|
||||
/// </summary>
|
||||
/// <param name="jobs">Jobs with estimated durations.</param>
|
||||
/// <param name="edges">Dependency edges.</param>
|
||||
/// <param name="getDuration">Function to get estimated duration for a job.</param>
|
||||
/// <returns>Critical path information.</returns>
|
||||
public static CriticalPathResult CalculateCriticalPath(
|
||||
IEnumerable<Job> jobs,
|
||||
IEnumerable<DagEdge> edges,
|
||||
Func<Job, TimeSpan> getDuration)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(jobs);
|
||||
ArgumentNullException.ThrowIfNull(edges);
|
||||
ArgumentNullException.ThrowIfNull(getDuration);
|
||||
|
||||
var jobList = jobs.ToList();
|
||||
var edgeList = edges.ToList();
|
||||
|
||||
if (jobList.Count == 0)
|
||||
{
|
||||
return new CriticalPathResult([], TimeSpan.Zero);
|
||||
}
|
||||
|
||||
var jobMap = jobList.ToDictionary(j => j.JobId);
|
||||
var sortedIds = TopologicalSort(jobList.Select(j => j.JobId), edgeList);
|
||||
|
||||
// Build reverse adjacency (child -> parents)
|
||||
var parents = new Dictionary<Guid, List<Guid>>();
|
||||
foreach (var edge in edgeList)
|
||||
{
|
||||
if (!parents.TryGetValue(edge.ChildJobId, out var parentList))
|
||||
{
|
||||
parentList = [];
|
||||
parents[edge.ChildJobId] = parentList;
|
||||
}
|
||||
parentList.Add(edge.ParentJobId);
|
||||
}
|
||||
|
||||
// Forward pass: calculate earliest start times
|
||||
var earliestStart = new Dictionary<Guid, TimeSpan>();
|
||||
var earliestFinish = new Dictionary<Guid, TimeSpan>();
|
||||
|
||||
foreach (var jobId in sortedIds)
|
||||
{
|
||||
var job = jobMap[jobId];
|
||||
var duration = getDuration(job);
|
||||
|
||||
var maxParentFinish = TimeSpan.Zero;
|
||||
if (parents.TryGetValue(jobId, out var parentIds))
|
||||
{
|
||||
foreach (var parentId in parentIds)
|
||||
{
|
||||
if (earliestFinish.TryGetValue(parentId, out var pf) && pf > maxParentFinish)
|
||||
{
|
||||
maxParentFinish = pf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
earliestStart[jobId] = maxParentFinish;
|
||||
earliestFinish[jobId] = maxParentFinish + duration;
|
||||
}
|
||||
|
||||
// Find total duration and identify critical path
|
||||
var totalDuration = earliestFinish.Values.DefaultIfEmpty(TimeSpan.Zero).Max();
|
||||
|
||||
// Backward pass: identify critical path (jobs where slack = 0)
|
||||
var criticalPath = new List<Guid>();
|
||||
var latestFinish = new Dictionary<Guid, TimeSpan>();
|
||||
|
||||
foreach (var jobId in sortedIds.Reverse())
|
||||
{
|
||||
var job = jobMap[jobId];
|
||||
var duration = getDuration(job);
|
||||
|
||||
// Find minimum latest start of children
|
||||
var minChildStart = totalDuration;
|
||||
var adjacency = edgeList.Where(e => e.ParentJobId == jobId).Select(e => e.ChildJobId);
|
||||
foreach (var childId in adjacency)
|
||||
{
|
||||
if (latestFinish.TryGetValue(childId, out var lf))
|
||||
{
|
||||
var childLatestStart = lf - getDuration(jobMap[childId]);
|
||||
if (childLatestStart < minChildStart)
|
||||
{
|
||||
minChildStart = childLatestStart;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
latestFinish[jobId] = minChildStart;
|
||||
|
||||
// Check if on critical path (slack = 0)
|
||||
var slack = minChildStart - earliestFinish[jobId];
|
||||
if (slack <= TimeSpan.Zero)
|
||||
{
|
||||
criticalPath.Add(jobId);
|
||||
}
|
||||
}
|
||||
|
||||
criticalPath.Reverse();
|
||||
return new CriticalPathResult(criticalPath, totalDuration);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets jobs that are blocked by a specific failed job.
|
||||
/// </summary>
|
||||
/// <param name="failedJobId">The failed job ID.</param>
|
||||
/// <param name="edges">Dependency edges.</param>
|
||||
/// <returns>All job IDs that are transitively blocked.</returns>
|
||||
public static IReadOnlySet<Guid> GetBlockedJobs(Guid failedJobId, IEnumerable<DagEdge> edges)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(edges);
|
||||
|
||||
var edgeList = edges.ToList();
|
||||
var blocked = new HashSet<Guid>();
|
||||
var queue = new Queue<Guid>();
|
||||
|
||||
// Find direct children with "success" dependency
|
||||
foreach (var edge in edgeList.Where(e => e.ParentJobId == failedJobId && e.EdgeType == DagEdgeTypes.Success))
|
||||
{
|
||||
queue.Enqueue(edge.ChildJobId);
|
||||
}
|
||||
|
||||
// BFS to find all transitively blocked jobs
|
||||
while (queue.Count > 0)
|
||||
{
|
||||
var current = queue.Dequeue();
|
||||
if (!blocked.Add(current))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var edge in edgeList.Where(e => e.ParentJobId == current))
|
||||
{
|
||||
queue.Enqueue(edge.ChildJobId);
|
||||
}
|
||||
}
|
||||
|
||||
return blocked;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of DAG validation.
|
||||
/// </summary>
|
||||
public sealed record DagValidationResult(
|
||||
bool IsValid,
|
||||
IReadOnlyList<Guid> CycleNodes)
|
||||
{
|
||||
public static DagValidationResult Valid() => new(true, []);
|
||||
public static DagValidationResult CycleDetected(IReadOnlyList<Guid> cycleNodes) => new(false, cycleNodes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of critical path calculation.
|
||||
/// </summary>
|
||||
public sealed record CriticalPathResult(
|
||||
IReadOnlyList<Guid> CriticalPathJobIds,
|
||||
TimeSpan TotalDuration);
|
||||
@@ -0,0 +1,223 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.Scheduling;
|
||||
|
||||
/// <summary>
|
||||
/// Coordinates job scheduling decisions including quota checks,
|
||||
/// dependency resolution, and status transitions.
|
||||
/// </summary>
|
||||
public interface IJobScheduler
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates whether a job can be scheduled.
|
||||
/// </summary>
|
||||
ScheduleDecision EvaluateScheduling(Job job, SchedulingContext context);
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates the outcome of a job completion and determines next steps.
|
||||
/// </summary>
|
||||
CompletionDecision EvaluateCompletion(Job job, JobStatus outcome, string? reason, CompletionContext context);
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates which pending jobs are ready to be scheduled.
|
||||
/// </summary>
|
||||
IReadOnlyList<Job> GetSchedulableJobs(IEnumerable<Job> pendingJobs, SchedulingContext context);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of job scheduler.
|
||||
/// </summary>
|
||||
public sealed class JobScheduler : IJobScheduler
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates whether a job can transition from Pending to Scheduled.
|
||||
/// </summary>
|
||||
public ScheduleDecision EvaluateScheduling(Job job, SchedulingContext context)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(job);
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
|
||||
// Check current status
|
||||
if (job.Status != JobStatus.Pending)
|
||||
{
|
||||
return ScheduleDecision.Reject($"Job is not pending (current: {job.Status})");
|
||||
}
|
||||
|
||||
// Check if job has a not-before time that hasn't passed
|
||||
if (job.NotBefore.HasValue && job.NotBefore.Value > context.Now)
|
||||
{
|
||||
return ScheduleDecision.Defer(job.NotBefore.Value, "Backoff period not elapsed");
|
||||
}
|
||||
|
||||
// Check dependencies
|
||||
if (!context.AreDependenciesSatisfied)
|
||||
{
|
||||
return ScheduleDecision.Defer(null, "Dependencies not satisfied");
|
||||
}
|
||||
|
||||
// Check quota
|
||||
if (!context.HasQuotaAvailable)
|
||||
{
|
||||
return ScheduleDecision.Defer(context.QuotaAvailableAt, "Quota exhausted");
|
||||
}
|
||||
|
||||
// Check if source/job type is throttled
|
||||
if (context.IsThrottled)
|
||||
{
|
||||
return ScheduleDecision.Defer(context.ThrottleExpiresAt, context.ThrottleReason ?? "Throttled");
|
||||
}
|
||||
|
||||
return ScheduleDecision.Schedule();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates the outcome of a job completion.
|
||||
/// </summary>
|
||||
public CompletionDecision EvaluateCompletion(Job job, JobStatus outcome, string? reason, CompletionContext context)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(job);
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
|
||||
// Validate transition
|
||||
if (!JobStateMachine.IsValidTransition(job.Status, outcome))
|
||||
{
|
||||
throw new InvalidJobTransitionException(job.Status, outcome);
|
||||
}
|
||||
|
||||
// Success - job is done
|
||||
if (outcome == JobStatus.Succeeded)
|
||||
{
|
||||
return CompletionDecision.Complete(outcome, reason);
|
||||
}
|
||||
|
||||
// Canceled - no retry
|
||||
if (outcome == JobStatus.Canceled)
|
||||
{
|
||||
return CompletionDecision.Complete(outcome, reason ?? "Canceled");
|
||||
}
|
||||
|
||||
// Failed or TimedOut - check retry policy
|
||||
if (outcome == JobStatus.Failed || outcome == JobStatus.TimedOut)
|
||||
{
|
||||
var retryDecision = RetryEvaluator.Evaluate(job.Attempt, context.RetryPolicy, context.Now);
|
||||
|
||||
if (retryDecision.ShouldRetry)
|
||||
{
|
||||
return CompletionDecision.Retry(
|
||||
retryDecision.NextAttempt,
|
||||
retryDecision.NotBefore!.Value,
|
||||
$"{outcome}: {reason ?? "Unknown error"}. Retry scheduled.");
|
||||
}
|
||||
|
||||
return CompletionDecision.Complete(
|
||||
JobStatus.Failed,
|
||||
$"{outcome}: {reason ?? "Unknown error"}. {retryDecision.Reason}");
|
||||
}
|
||||
|
||||
return CompletionDecision.Complete(outcome, reason);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all pending jobs that are ready to be scheduled.
|
||||
/// </summary>
|
||||
public IReadOnlyList<Job> GetSchedulableJobs(IEnumerable<Job> pendingJobs, SchedulingContext context)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(pendingJobs);
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
|
||||
var schedulable = new List<Job>();
|
||||
|
||||
foreach (var job in pendingJobs)
|
||||
{
|
||||
if (job.Status != JobStatus.Pending)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if in backoff period
|
||||
if (job.NotBefore.HasValue && job.NotBefore.Value > context.Now)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Dependencies are checked via context.ReadyJobIds
|
||||
if (context.ReadyJobIds != null && !context.ReadyJobIds.Contains(job.JobId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
schedulable.Add(job);
|
||||
}
|
||||
|
||||
// Sort by priority (descending) then created time (ascending)
|
||||
return schedulable
|
||||
.OrderByDescending(j => j.Priority)
|
||||
.ThenBy(j => j.CreatedAt)
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Context for scheduling decisions.
|
||||
/// </summary>
|
||||
public sealed record SchedulingContext(
|
||||
DateTimeOffset Now,
|
||||
bool AreDependenciesSatisfied,
|
||||
bool HasQuotaAvailable,
|
||||
DateTimeOffset? QuotaAvailableAt,
|
||||
bool IsThrottled,
|
||||
string? ThrottleReason,
|
||||
DateTimeOffset? ThrottleExpiresAt,
|
||||
IReadOnlySet<Guid>? ReadyJobIds = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a context where scheduling is allowed.
|
||||
/// </summary>
|
||||
public static SchedulingContext AllowScheduling(DateTimeOffset now) => new(
|
||||
now,
|
||||
AreDependenciesSatisfied: true,
|
||||
HasQuotaAvailable: true,
|
||||
QuotaAvailableAt: null,
|
||||
IsThrottled: false,
|
||||
ThrottleReason: null,
|
||||
ThrottleExpiresAt: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Context for completion decisions.
|
||||
/// </summary>
|
||||
public sealed record CompletionContext(
|
||||
DateTimeOffset Now,
|
||||
RetryPolicy RetryPolicy);
|
||||
|
||||
/// <summary>
|
||||
/// Decision about whether to schedule a job.
|
||||
/// </summary>
|
||||
public sealed record ScheduleDecision(
|
||||
bool CanSchedule,
|
||||
bool ShouldDefer,
|
||||
DateTimeOffset? DeferUntil,
|
||||
string? Reason)
|
||||
{
|
||||
public static ScheduleDecision Schedule() => new(true, false, null, null);
|
||||
public static ScheduleDecision Defer(DateTimeOffset? until, string reason) => new(false, true, until, reason);
|
||||
public static ScheduleDecision Reject(string reason) => new(false, false, null, reason);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decision about job completion outcome.
|
||||
/// </summary>
|
||||
public sealed record CompletionDecision(
|
||||
bool IsComplete,
|
||||
bool ShouldRetry,
|
||||
JobStatus FinalStatus,
|
||||
int? NextAttempt,
|
||||
DateTimeOffset? RetryNotBefore,
|
||||
string? Reason)
|
||||
{
|
||||
public static CompletionDecision Complete(JobStatus status, string? reason)
|
||||
=> new(true, false, status, null, null, reason);
|
||||
|
||||
public static CompletionDecision Retry(int nextAttempt, DateTimeOffset notBefore, string reason)
|
||||
=> new(false, true, JobStatus.Pending, nextAttempt, notBefore, reason);
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.Scheduling;
|
||||
|
||||
/// <summary>
|
||||
/// Manages job status transitions and validates state machine rules.
|
||||
///
|
||||
/// State machine:
|
||||
/// Pending → Scheduled (quota cleared, dependencies satisfied)
|
||||
/// Scheduled → Leased (worker acquired lease)
|
||||
/// Leased → Succeeded | Failed | Canceled | TimedOut
|
||||
/// Failed → Pending (retry) | Failed (exhausted)
|
||||
/// TimedOut → Pending (retry) | Failed (exhausted)
|
||||
/// </summary>
|
||||
public static class JobStateMachine
|
||||
{
|
||||
/// <summary>
|
||||
/// Validates whether a status transition is allowed.
|
||||
/// </summary>
|
||||
/// <param name="from">Current status.</param>
|
||||
/// <param name="to">Target status.</param>
|
||||
/// <returns>True if transition is valid.</returns>
|
||||
public static bool IsValidTransition(JobStatus from, JobStatus to)
|
||||
{
|
||||
return (from, to) switch
|
||||
{
|
||||
// From Pending
|
||||
(JobStatus.Pending, JobStatus.Scheduled) => true,
|
||||
(JobStatus.Pending, JobStatus.Canceled) => true,
|
||||
|
||||
// From Scheduled
|
||||
(JobStatus.Scheduled, JobStatus.Leased) => true,
|
||||
(JobStatus.Scheduled, JobStatus.Canceled) => true,
|
||||
(JobStatus.Scheduled, JobStatus.Pending) => true, // Back to pending (quota exceeded, dependency failed)
|
||||
|
||||
// From Leased
|
||||
(JobStatus.Leased, JobStatus.Succeeded) => true,
|
||||
(JobStatus.Leased, JobStatus.Failed) => true,
|
||||
(JobStatus.Leased, JobStatus.Canceled) => true,
|
||||
(JobStatus.Leased, JobStatus.TimedOut) => true,
|
||||
|
||||
// Retry transitions (Failed/TimedOut back to Pending)
|
||||
(JobStatus.Failed, JobStatus.Pending) => true,
|
||||
(JobStatus.TimedOut, JobStatus.Pending) => true,
|
||||
|
||||
// Same status (idempotent)
|
||||
_ when from == to => true,
|
||||
|
||||
// All other transitions are invalid
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a job status is terminal (no further transitions except replay).
|
||||
/// </summary>
|
||||
public static bool IsTerminal(JobStatus status) => status switch
|
||||
{
|
||||
JobStatus.Succeeded => true,
|
||||
JobStatus.Failed => true,
|
||||
JobStatus.Canceled => true,
|
||||
JobStatus.TimedOut => true,
|
||||
_ => false
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a job status represents a successful completion.
|
||||
/// </summary>
|
||||
public static bool IsSuccess(JobStatus status) => status == JobStatus.Succeeded;
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a job status represents a failure that may be retried.
|
||||
/// </summary>
|
||||
public static bool IsRetryable(JobStatus status) => status switch
|
||||
{
|
||||
JobStatus.Failed => true,
|
||||
JobStatus.TimedOut => true,
|
||||
_ => false
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a job is in a state where it can be leased by a worker.
|
||||
/// </summary>
|
||||
public static bool IsLeasable(JobStatus status) => status == JobStatus.Scheduled;
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a job is waiting to be scheduled.
|
||||
/// </summary>
|
||||
public static bool IsPending(JobStatus status) => status == JobStatus.Pending;
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a job is currently being executed.
|
||||
/// </summary>
|
||||
public static bool IsActive(JobStatus status) => status == JobStatus.Leased;
|
||||
|
||||
/// <summary>
|
||||
/// Gets all valid transitions from a given status.
|
||||
/// </summary>
|
||||
public static IReadOnlyList<JobStatus> GetValidTransitions(JobStatus from)
|
||||
{
|
||||
return from switch
|
||||
{
|
||||
JobStatus.Pending => [JobStatus.Scheduled, JobStatus.Canceled],
|
||||
JobStatus.Scheduled => [JobStatus.Leased, JobStatus.Canceled, JobStatus.Pending],
|
||||
JobStatus.Leased => [JobStatus.Succeeded, JobStatus.Failed, JobStatus.Canceled, JobStatus.TimedOut],
|
||||
JobStatus.Failed => [JobStatus.Pending], // Retry only
|
||||
JobStatus.TimedOut => [JobStatus.Pending], // Retry only
|
||||
JobStatus.Succeeded => [],
|
||||
JobStatus.Canceled => [],
|
||||
_ => []
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Validates a transition and throws if invalid.
|
||||
/// </summary>
|
||||
/// <exception cref="InvalidJobTransitionException">Thrown when transition is not allowed.</exception>
|
||||
public static void ValidateTransition(JobStatus from, JobStatus to)
|
||||
{
|
||||
if (!IsValidTransition(from, to))
|
||||
{
|
||||
throw new InvalidJobTransitionException(from, to);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when an invalid job status transition is attempted.
|
||||
/// </summary>
|
||||
public sealed class InvalidJobTransitionException : Exception
|
||||
{
|
||||
public JobStatus FromStatus { get; }
|
||||
public JobStatus ToStatus { get; }
|
||||
|
||||
public InvalidJobTransitionException(JobStatus from, JobStatus to)
|
||||
: base($"Invalid job status transition from '{from}' to '{to}'.")
|
||||
{
|
||||
FromStatus = from;
|
||||
ToStatus = to;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
namespace StellaOps.Orchestrator.Core.Scheduling;
|
||||
|
||||
/// <summary>
|
||||
/// Defines retry behavior for failed jobs.
|
||||
/// </summary>
|
||||
public sealed record RetryPolicy(
|
||||
/// <summary>Maximum number of retry attempts (including initial attempt).</summary>
|
||||
int MaxAttempts,
|
||||
|
||||
/// <summary>Initial backoff delay in seconds.</summary>
|
||||
double InitialBackoffSeconds,
|
||||
|
||||
/// <summary>Maximum backoff delay in seconds.</summary>
|
||||
double MaxBackoffSeconds,
|
||||
|
||||
/// <summary>Backoff multiplier for exponential growth.</summary>
|
||||
double BackoffMultiplier,
|
||||
|
||||
/// <summary>Jitter factor (0.0-1.0) to add randomness to backoff.</summary>
|
||||
double JitterFactor)
|
||||
{
|
||||
/// <summary>
|
||||
/// Default retry policy: 3 attempts, exponential backoff from 5s to 300s.
|
||||
/// </summary>
|
||||
public static RetryPolicy Default { get; } = new(
|
||||
MaxAttempts: 3,
|
||||
InitialBackoffSeconds: 5.0,
|
||||
MaxBackoffSeconds: 300.0,
|
||||
BackoffMultiplier: 2.0,
|
||||
JitterFactor: 0.1);
|
||||
|
||||
/// <summary>
|
||||
/// Aggressive retry policy for critical jobs: 5 attempts, quick retries.
|
||||
/// </summary>
|
||||
public static RetryPolicy Aggressive { get; } = new(
|
||||
MaxAttempts: 5,
|
||||
InitialBackoffSeconds: 1.0,
|
||||
MaxBackoffSeconds: 60.0,
|
||||
BackoffMultiplier: 1.5,
|
||||
JitterFactor: 0.2);
|
||||
|
||||
/// <summary>
|
||||
/// Conservative retry policy: 2 attempts, longer delays.
|
||||
/// </summary>
|
||||
public static RetryPolicy Conservative { get; } = new(
|
||||
MaxAttempts: 2,
|
||||
InitialBackoffSeconds: 30.0,
|
||||
MaxBackoffSeconds: 600.0,
|
||||
BackoffMultiplier: 3.0,
|
||||
JitterFactor: 0.1);
|
||||
|
||||
/// <summary>
|
||||
/// No retry policy: single attempt only.
|
||||
/// </summary>
|
||||
public static RetryPolicy NoRetry { get; } = new(
|
||||
MaxAttempts: 1,
|
||||
InitialBackoffSeconds: 0,
|
||||
MaxBackoffSeconds: 0,
|
||||
BackoffMultiplier: 1.0,
|
||||
JitterFactor: 0);
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a job should be retried based on current attempt.
|
||||
/// </summary>
|
||||
/// <param name="currentAttempt">Current attempt number (1-based).</param>
|
||||
/// <returns>True if retry is allowed.</returns>
|
||||
public bool ShouldRetry(int currentAttempt) => currentAttempt < MaxAttempts;
|
||||
|
||||
/// <summary>
|
||||
/// Calculates the next retry time based on current attempt.
|
||||
/// </summary>
|
||||
/// <param name="currentAttempt">Current attempt number (1-based).</param>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>Earliest time for next retry attempt.</returns>
|
||||
public DateTimeOffset CalculateNextRetryTime(int currentAttempt, DateTimeOffset now)
|
||||
{
|
||||
if (!ShouldRetry(currentAttempt))
|
||||
{
|
||||
throw new InvalidOperationException($"No retry allowed after attempt {currentAttempt} (max: {MaxAttempts}).");
|
||||
}
|
||||
|
||||
var backoffSeconds = CalculateBackoffSeconds(currentAttempt);
|
||||
return now.AddSeconds(backoffSeconds);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Calculates backoff duration in seconds for a given attempt.
|
||||
/// </summary>
|
||||
/// <param name="attempt">Attempt number (1-based).</param>
|
||||
/// <returns>Backoff duration in seconds.</returns>
|
||||
public double CalculateBackoffSeconds(int attempt)
|
||||
{
|
||||
if (attempt < 1)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(attempt), "Attempt must be >= 1.");
|
||||
}
|
||||
|
||||
// Exponential backoff: initial * multiplier^(attempt-1)
|
||||
var exponentialBackoff = InitialBackoffSeconds * Math.Pow(BackoffMultiplier, attempt - 1);
|
||||
|
||||
// Cap at maximum
|
||||
var cappedBackoff = Math.Min(exponentialBackoff, MaxBackoffSeconds);
|
||||
|
||||
// Add jitter to prevent thundering herd
|
||||
var jitter = cappedBackoff * JitterFactor * (Random.Shared.NextDouble() * 2 - 1);
|
||||
var finalBackoff = Math.Max(0, cappedBackoff + jitter);
|
||||
|
||||
return finalBackoff;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of evaluating retry policy for a failed job.
|
||||
/// </summary>
|
||||
public sealed record RetryDecision(
|
||||
/// <summary>Whether the job should be retried.</summary>
|
||||
bool ShouldRetry,
|
||||
|
||||
/// <summary>Next attempt number (if retrying).</summary>
|
||||
int NextAttempt,
|
||||
|
||||
/// <summary>Earliest time for next attempt (if retrying).</summary>
|
||||
DateTimeOffset? NotBefore,
|
||||
|
||||
/// <summary>Reason for the decision.</summary>
|
||||
string Reason)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a retry decision.
|
||||
/// </summary>
|
||||
public static RetryDecision Retry(int nextAttempt, DateTimeOffset notBefore)
|
||||
=> new(true, nextAttempt, notBefore, $"Scheduling retry attempt {nextAttempt}");
|
||||
|
||||
/// <summary>
|
||||
/// Creates a no-retry decision (exhausted).
|
||||
/// </summary>
|
||||
public static RetryDecision Exhausted(int maxAttempts)
|
||||
=> new(false, 0, null, $"Max attempts ({maxAttempts}) exhausted");
|
||||
|
||||
/// <summary>
|
||||
/// Creates a no-retry decision (not retryable status).
|
||||
/// </summary>
|
||||
public static RetryDecision NotRetryable(string reason)
|
||||
=> new(false, 0, null, reason);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Service for evaluating retry decisions.
|
||||
/// </summary>
|
||||
public static class RetryEvaluator
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates whether a job should be retried and calculates timing.
|
||||
/// </summary>
|
||||
/// <param name="currentAttempt">Current attempt number.</param>
|
||||
/// <param name="policy">Retry policy to apply.</param>
|
||||
/// <param name="now">Current time.</param>
|
||||
/// <returns>Retry decision.</returns>
|
||||
public static RetryDecision Evaluate(int currentAttempt, RetryPolicy policy, DateTimeOffset now)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(policy);
|
||||
|
||||
if (!policy.ShouldRetry(currentAttempt))
|
||||
{
|
||||
return RetryDecision.Exhausted(policy.MaxAttempts);
|
||||
}
|
||||
|
||||
var nextAttempt = currentAttempt + 1;
|
||||
var notBefore = policy.CalculateNextRetryTime(currentAttempt, now);
|
||||
|
||||
return RetryDecision.Retry(nextAttempt, notBefore);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,341 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Core.SloManagement;
|
||||
|
||||
/// <summary>
|
||||
/// Options for burn rate computation.
|
||||
/// </summary>
|
||||
public sealed record BurnRateOptions
|
||||
{
|
||||
/// <summary>Short window multiplier for multi-window burn rate.</summary>
|
||||
public double ShortWindowMultiplier { get; init; } = 14.4; // 5% budget in 1 hour
|
||||
|
||||
/// <summary>Long window multiplier for multi-window burn rate.</summary>
|
||||
public double LongWindowMultiplier { get; init; } = 6.0; // 20% budget in 6 hours
|
||||
|
||||
/// <summary>Minimum events required for meaningful computation.</summary>
|
||||
public int MinimumEvents { get; init; } = 10;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Event counts for SLO computation.
|
||||
/// </summary>
|
||||
public sealed record SloEventCounts(
|
||||
/// <summary>Total events in the window.</summary>
|
||||
long TotalEvents,
|
||||
|
||||
/// <summary>Good events (successful) in the window.</summary>
|
||||
long GoodEvents,
|
||||
|
||||
/// <summary>Bad events (failed) in the window.</summary>
|
||||
long BadEvents,
|
||||
|
||||
/// <summary>Start of the evaluation window.</summary>
|
||||
DateTimeOffset WindowStart,
|
||||
|
||||
/// <summary>End of the evaluation window.</summary>
|
||||
DateTimeOffset WindowEnd);
|
||||
|
||||
/// <summary>
|
||||
/// Interface for retrieving SLO event counts.
|
||||
/// </summary>
|
||||
public interface ISloEventSource
|
||||
{
|
||||
/// <summary>Gets event counts for an availability SLO.</summary>
|
||||
Task<SloEventCounts> GetAvailabilityCountsAsync(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
Guid? sourceId,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets event counts for a latency SLO.</summary>
|
||||
Task<SloEventCounts> GetLatencyCountsAsync(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
Guid? sourceId,
|
||||
double percentile,
|
||||
double targetSeconds,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Gets event counts for a throughput SLO.</summary>
|
||||
Task<SloEventCounts> GetThroughputCountsAsync(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
Guid? sourceId,
|
||||
int minimumRequired,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Engine for computing SLO burn rates and error budget consumption.
|
||||
/// </summary>
|
||||
public interface IBurnRateEngine
|
||||
{
|
||||
/// <summary>Computes the current state of an SLO.</summary>
|
||||
Task<SloState> ComputeStateAsync(
|
||||
Slo slo,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Computes states for all enabled SLOs for a tenant.</summary>
|
||||
Task<IReadOnlyList<SloState>> ComputeAllStatesAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Evaluates alert thresholds and creates alerts if needed.</summary>
|
||||
Task<IReadOnlyList<SloAlert>> EvaluateAlertsAsync(
|
||||
Slo slo,
|
||||
SloState state,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of burn rate computation engine.
|
||||
/// </summary>
|
||||
public sealed class BurnRateEngine : IBurnRateEngine
|
||||
{
|
||||
private readonly ISloRepository _sloRepository;
|
||||
private readonly ISloEventSource _eventSource;
|
||||
private readonly IAlertThresholdRepository _thresholdRepository;
|
||||
private readonly ISloAlertRepository _alertRepository;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly BurnRateOptions _options;
|
||||
private readonly ILogger<BurnRateEngine> _logger;
|
||||
|
||||
public BurnRateEngine(
|
||||
ISloRepository sloRepository,
|
||||
ISloEventSource eventSource,
|
||||
IAlertThresholdRepository thresholdRepository,
|
||||
ISloAlertRepository alertRepository,
|
||||
TimeProvider timeProvider,
|
||||
BurnRateOptions options,
|
||||
ILogger<BurnRateEngine> logger)
|
||||
{
|
||||
_sloRepository = sloRepository ?? throw new ArgumentNullException(nameof(sloRepository));
|
||||
_eventSource = eventSource ?? throw new ArgumentNullException(nameof(eventSource));
|
||||
_thresholdRepository = thresholdRepository ?? throw new ArgumentNullException(nameof(thresholdRepository));
|
||||
_alertRepository = alertRepository ?? throw new ArgumentNullException(nameof(alertRepository));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<SloState> ComputeStateAsync(
|
||||
Slo slo,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var windowDuration = slo.GetWindowDuration();
|
||||
var windowStart = now - windowDuration;
|
||||
|
||||
// Get event counts based on SLO type
|
||||
var counts = slo.Type switch
|
||||
{
|
||||
SloType.Availability => await _eventSource.GetAvailabilityCountsAsync(
|
||||
slo.TenantId, slo.JobType, slo.SourceId, windowStart, now, cancellationToken).ConfigureAwait(false),
|
||||
|
||||
SloType.Latency => await _eventSource.GetLatencyCountsAsync(
|
||||
slo.TenantId, slo.JobType, slo.SourceId,
|
||||
slo.LatencyPercentile ?? 0.95,
|
||||
slo.LatencyTargetSeconds ?? 1.0,
|
||||
windowStart, now, cancellationToken).ConfigureAwait(false),
|
||||
|
||||
SloType.Throughput => await _eventSource.GetThroughputCountsAsync(
|
||||
slo.TenantId, slo.JobType, slo.SourceId,
|
||||
slo.ThroughputMinimum ?? 1,
|
||||
windowStart, now, cancellationToken).ConfigureAwait(false),
|
||||
|
||||
_ => throw new InvalidOperationException($"Unknown SLO type: {slo.Type}")
|
||||
};
|
||||
|
||||
// Handle no data case
|
||||
if (counts.TotalEvents < _options.MinimumEvents)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"SLO {SloId} has insufficient data ({Events} events, minimum {Min})",
|
||||
slo.SloId, counts.TotalEvents, _options.MinimumEvents);
|
||||
return SloState.NoData(slo.SloId, slo.TenantId, now, slo.Window);
|
||||
}
|
||||
|
||||
// Compute SLI (Service Level Indicator)
|
||||
var sli = (double)counts.GoodEvents / counts.TotalEvents;
|
||||
|
||||
// Compute error budget consumption
|
||||
var errorBudget = slo.ErrorBudget;
|
||||
var errorRate = 1.0 - sli;
|
||||
var budgetConsumed = errorBudget > 0 ? errorRate / errorBudget : (errorRate > 0 ? 1.0 : 0.0);
|
||||
budgetConsumed = Math.Clamp(budgetConsumed, 0, 2.0); // Allow showing overconsumption up to 200%
|
||||
|
||||
var budgetRemaining = Math.Max(0, 1.0 - budgetConsumed);
|
||||
|
||||
// Compute burn rate
|
||||
// Burn rate = (actual error rate) / (allowed error rate for sustainable consumption)
|
||||
// Sustainable consumption = error budget / window duration * elapsed time
|
||||
var elapsedRatio = (now - counts.WindowStart).TotalSeconds / windowDuration.TotalSeconds;
|
||||
var sustainableErrorRate = errorBudget * elapsedRatio;
|
||||
var burnRate = sustainableErrorRate > 0 ? errorRate / sustainableErrorRate : 0;
|
||||
|
||||
// Compute time to exhaustion
|
||||
TimeSpan? timeToExhaustion = null;
|
||||
if (burnRate > 0 && budgetRemaining > 0)
|
||||
{
|
||||
var remainingBudget = errorBudget * budgetRemaining;
|
||||
var currentErrorRatePerSecond = errorRate / (now - counts.WindowStart).TotalSeconds;
|
||||
if (currentErrorRatePerSecond > 0)
|
||||
{
|
||||
var secondsToExhaustion = remainingBudget / currentErrorRatePerSecond;
|
||||
timeToExhaustion = TimeSpan.FromSeconds(Math.Min(secondsToExhaustion, windowDuration.TotalSeconds));
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if SLO is met
|
||||
var isMet = sli >= slo.Target;
|
||||
|
||||
// Determine alert severity
|
||||
var alertSeverity = DetermineAlertSeverity(budgetConsumed, burnRate);
|
||||
|
||||
var state = new SloState(
|
||||
SloId: slo.SloId,
|
||||
TenantId: slo.TenantId,
|
||||
CurrentSli: sli,
|
||||
TotalEvents: counts.TotalEvents,
|
||||
GoodEvents: counts.GoodEvents,
|
||||
BadEvents: counts.BadEvents,
|
||||
BudgetConsumed: budgetConsumed,
|
||||
BudgetRemaining: budgetRemaining,
|
||||
BurnRate: burnRate,
|
||||
TimeToExhaustion: timeToExhaustion,
|
||||
IsMet: isMet,
|
||||
AlertSeverity: alertSeverity,
|
||||
ComputedAt: now,
|
||||
WindowStart: counts.WindowStart,
|
||||
WindowEnd: counts.WindowEnd);
|
||||
|
||||
_logger.LogDebug(
|
||||
"SLO {SloId} state computed: SLI={Sli:P2}, BudgetConsumed={BudgetConsumed:P1}, BurnRate={BurnRate:F2}x",
|
||||
slo.SloId, state.CurrentSli, state.BudgetConsumed, state.BurnRate);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SloState>> ComputeAllStatesAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var slos = await _sloRepository.ListAsync(tenantId, enabledOnly: true, cancellationToken: cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var states = new List<SloState>(slos.Count);
|
||||
|
||||
foreach (var slo in slos)
|
||||
{
|
||||
try
|
||||
{
|
||||
var state = await ComputeStateAsync(slo, cancellationToken).ConfigureAwait(false);
|
||||
states.Add(state);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to compute state for SLO {SloId}", slo.SloId);
|
||||
// Add no-data state for failed computation
|
||||
states.Add(SloState.NoData(slo.SloId, slo.TenantId, _timeProvider.GetUtcNow(), slo.Window));
|
||||
}
|
||||
}
|
||||
|
||||
return states;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SloAlert>> EvaluateAlertsAsync(
|
||||
Slo slo,
|
||||
SloState state,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var thresholds = await _thresholdRepository.ListBySloAsync(slo.SloId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var alerts = new List<SloAlert>();
|
||||
|
||||
foreach (var threshold in thresholds)
|
||||
{
|
||||
if (!threshold.ShouldTrigger(state, now))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var alert = SloAlert.Create(slo, state, threshold);
|
||||
await _alertRepository.CreateAsync(alert, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var updatedThreshold = threshold.RecordTrigger(now);
|
||||
await _thresholdRepository.UpdateAsync(updatedThreshold, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
alerts.Add(alert);
|
||||
|
||||
_logger.LogWarning(
|
||||
"SLO alert triggered: SloId={SloId}, Severity={Severity}, Message={Message}",
|
||||
slo.SloId, alert.Severity, alert.Message);
|
||||
}
|
||||
|
||||
return alerts;
|
||||
}
|
||||
|
||||
private static AlertSeverity DetermineAlertSeverity(double budgetConsumed, double burnRate)
|
||||
{
|
||||
// Emergency: Budget exhausted or burn rate extremely high
|
||||
if (budgetConsumed >= 1.0 || burnRate >= 10.0)
|
||||
return AlertSeverity.Emergency;
|
||||
|
||||
// Critical: Budget nearly exhausted or burn rate very high
|
||||
if (budgetConsumed >= 0.8 || burnRate >= 5.0)
|
||||
return AlertSeverity.Critical;
|
||||
|
||||
// Warning: Budget significantly consumed or elevated burn rate
|
||||
if (budgetConsumed >= 0.5 || burnRate >= 2.0)
|
||||
return AlertSeverity.Warning;
|
||||
|
||||
// Info: Everything is normal
|
||||
return AlertSeverity.Info;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for SLO persistence.
|
||||
/// </summary>
|
||||
public interface ISloRepository
|
||||
{
|
||||
Task<Slo?> GetByIdAsync(string tenantId, Guid sloId, CancellationToken cancellationToken);
|
||||
Task<IReadOnlyList<Slo>> ListAsync(string tenantId, bool enabledOnly, string? jobType = null, CancellationToken cancellationToken = default);
|
||||
Task CreateAsync(Slo slo, CancellationToken cancellationToken);
|
||||
Task UpdateAsync(Slo slo, CancellationToken cancellationToken);
|
||||
Task<bool> DeleteAsync(string tenantId, Guid sloId, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for alert threshold persistence.
|
||||
/// </summary>
|
||||
public interface IAlertThresholdRepository
|
||||
{
|
||||
Task<AlertBudgetThreshold?> GetByIdAsync(string tenantId, Guid thresholdId, CancellationToken cancellationToken);
|
||||
Task<IReadOnlyList<AlertBudgetThreshold>> ListBySloAsync(Guid sloId, CancellationToken cancellationToken);
|
||||
Task CreateAsync(AlertBudgetThreshold threshold, CancellationToken cancellationToken);
|
||||
Task UpdateAsync(AlertBudgetThreshold threshold, CancellationToken cancellationToken);
|
||||
Task<bool> DeleteAsync(string tenantId, Guid thresholdId, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for SLO alert persistence.
|
||||
/// </summary>
|
||||
public interface ISloAlertRepository
|
||||
{
|
||||
Task<SloAlert?> GetByIdAsync(string tenantId, Guid alertId, CancellationToken cancellationToken);
|
||||
Task<IReadOnlyList<SloAlert>> ListAsync(string tenantId, Guid? sloId, bool? acknowledged, bool? resolved, int limit, int offset, CancellationToken cancellationToken);
|
||||
Task CreateAsync(SloAlert alert, CancellationToken cancellationToken);
|
||||
Task UpdateAsync(SloAlert alert, CancellationToken cancellationToken);
|
||||
Task<int> GetActiveAlertCountAsync(string tenantId, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -1,18 +1,20 @@
|
||||
<?xml version="1.0" ?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<PropertyGroup>
|
||||
|
||||
|
||||
|
||||
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0-rc.2.25502.107" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
namespace StellaOps.Orchestrator.Infrastructure;
|
||||
|
||||
public class Class1
|
||||
{
|
||||
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Ledger;
|
||||
|
||||
/// <summary>
|
||||
/// Service for exporting ledger data in various formats.
|
||||
/// </summary>
|
||||
public interface ILedgerExporter
|
||||
{
|
||||
/// <summary>
|
||||
/// Exports ledger entries to a file.
|
||||
/// </summary>
|
||||
/// <param name="export">The export request.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The completed export with output details.</returns>
|
||||
Task<LedgerExport> ExportAsync(
|
||||
LedgerExport export,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Generates a signed manifest for a ledger entry.
|
||||
/// </summary>
|
||||
/// <param name="entry">The ledger entry.</param>
|
||||
/// <param name="artifacts">The artifacts from the run.</param>
|
||||
/// <param name="buildInfo">Optional build information.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The generated manifest.</returns>
|
||||
Task<SignedManifest> GenerateManifestAsync(
|
||||
RunLedgerEntry entry,
|
||||
IReadOnlyList<Artifact> artifacts,
|
||||
string? buildInfo = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Generates a signed manifest for an export.
|
||||
/// </summary>
|
||||
/// <param name="export">The completed export.</param>
|
||||
/// <param name="entries">The entries included in the export.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The generated manifest.</returns>
|
||||
Task<SignedManifest> GenerateExportManifestAsync(
|
||||
LedgerExport export,
|
||||
IReadOnlyList<RunLedgerEntry> entries,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,309 @@
|
||||
using System.Globalization;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Ledger;
|
||||
|
||||
/// <summary>
|
||||
/// Service for exporting ledger data in various formats.
|
||||
/// </summary>
|
||||
public sealed class LedgerExporter : ILedgerExporter
|
||||
{
|
||||
private readonly ILedgerRepository _ledgerRepository;
|
||||
private readonly ILedgerExportRepository _exportRepository;
|
||||
private readonly ILogger<LedgerExporter> _logger;
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
WriteIndented = true,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
|
||||
};
|
||||
|
||||
private static readonly JsonSerializerOptions NdjsonOptions = new()
|
||||
{
|
||||
WriteIndented = false,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
|
||||
};
|
||||
|
||||
public LedgerExporter(
|
||||
ILedgerRepository ledgerRepository,
|
||||
ILedgerExportRepository exportRepository,
|
||||
ILogger<LedgerExporter> logger)
|
||||
{
|
||||
_ledgerRepository = ledgerRepository;
|
||||
_exportRepository = exportRepository;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<LedgerExport> ExportAsync(
|
||||
LedgerExport export,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var startTime = DateTimeOffset.UtcNow;
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Starting ledger export {ExportId} for tenant {TenantId} in format {Format}",
|
||||
export.ExportId, export.TenantId, export.Format);
|
||||
|
||||
// Mark export as started
|
||||
export = export.Start();
|
||||
export = await _exportRepository.UpdateAsync(export, cancellationToken);
|
||||
|
||||
// Fetch entries based on filters
|
||||
var entries = await _ledgerRepository.ListAsync(
|
||||
export.TenantId,
|
||||
export.RunTypeFilter,
|
||||
export.SourceIdFilter,
|
||||
finalStatus: null,
|
||||
export.StartTime,
|
||||
export.EndTime,
|
||||
limit: int.MaxValue,
|
||||
offset: 0,
|
||||
cancellationToken);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Found {EntryCount} ledger entries for export {ExportId}",
|
||||
entries.Count, export.ExportId);
|
||||
|
||||
// Generate output based on format
|
||||
var (content, digest) = await GenerateOutputAsync(entries, export.Format, cancellationToken);
|
||||
|
||||
// Generate output path (in production, this would write to storage)
|
||||
var outputUri = GenerateOutputUri(export);
|
||||
var sizeBytes = Encoding.UTF8.GetByteCount(content);
|
||||
|
||||
// Complete the export
|
||||
export = export.Complete(outputUri, digest, sizeBytes, entries.Count);
|
||||
export = await _exportRepository.UpdateAsync(export, cancellationToken);
|
||||
|
||||
var duration = DateTimeOffset.UtcNow - startTime;
|
||||
OrchestratorMetrics.LedgerExportCompleted(export.TenantId, export.Format);
|
||||
OrchestratorMetrics.RecordLedgerExportDuration(export.TenantId, export.Format, duration.TotalSeconds);
|
||||
OrchestratorMetrics.RecordLedgerExportSize(export.TenantId, export.Format, sizeBytes);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Completed ledger export {ExportId} with {EntryCount} entries, {SizeBytes} bytes",
|
||||
export.ExportId, entries.Count, sizeBytes);
|
||||
|
||||
return export;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex,
|
||||
"Failed to export ledger {ExportId} for tenant {TenantId}",
|
||||
export.ExportId, export.TenantId);
|
||||
|
||||
OrchestratorMetrics.LedgerExportFailed(export.TenantId, export.Format);
|
||||
|
||||
export = export.Fail(ex.Message);
|
||||
export = await _exportRepository.UpdateAsync(export, cancellationToken);
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<SignedManifest> GenerateManifestAsync(
|
||||
RunLedgerEntry entry,
|
||||
IReadOnlyList<Artifact> artifacts,
|
||||
string? buildInfo = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Generating manifest for ledger entry {LedgerId}, run {RunId}",
|
||||
entry.LedgerId, entry.RunId);
|
||||
|
||||
var manifest = SignedManifest.CreateFromLedgerEntry(entry, buildInfo);
|
||||
|
||||
OrchestratorMetrics.ManifestCreated(entry.TenantId, "run");
|
||||
|
||||
return Task.FromResult(manifest);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<SignedManifest> GenerateExportManifestAsync(
|
||||
LedgerExport export,
|
||||
IReadOnlyList<RunLedgerEntry> entries,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Generating manifest for export {ExportId} with {EntryCount} entries",
|
||||
export.ExportId, entries.Count);
|
||||
|
||||
var manifest = SignedManifest.CreateFromExport(export, entries);
|
||||
|
||||
OrchestratorMetrics.ManifestCreated(export.TenantId, "export");
|
||||
|
||||
return Task.FromResult(manifest);
|
||||
}
|
||||
|
||||
private async Task<(string Content, string Digest)> GenerateOutputAsync(
|
||||
IReadOnlyList<RunLedgerEntry> entries,
|
||||
string format,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var content = format.ToLowerInvariant() switch
|
||||
{
|
||||
"json" => GenerateJson(entries),
|
||||
"ndjson" => GenerateNdjson(entries),
|
||||
"csv" => GenerateCsv(entries),
|
||||
_ => throw new ArgumentException($"Unsupported export format: {format}", nameof(format))
|
||||
};
|
||||
|
||||
// Compute digest
|
||||
var bytes = Encoding.UTF8.GetBytes(content);
|
||||
var hash = await Task.Run(() => SHA256.HashData(bytes), cancellationToken);
|
||||
var digest = $"sha256:{Convert.ToHexStringLower(hash)}";
|
||||
|
||||
return (content, digest);
|
||||
}
|
||||
|
||||
private static string GenerateJson(IReadOnlyList<RunLedgerEntry> entries)
|
||||
{
|
||||
var exportData = new LedgerExportData
|
||||
{
|
||||
SchemaVersion = "1.0.0",
|
||||
ExportedAt = DateTimeOffset.UtcNow,
|
||||
EntryCount = entries.Count,
|
||||
Entries = entries.Select(MapEntry).ToList()
|
||||
};
|
||||
|
||||
return JsonSerializer.Serialize(exportData, JsonOptions);
|
||||
}
|
||||
|
||||
private static string GenerateNdjson(IReadOnlyList<RunLedgerEntry> entries)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
var mapped = MapEntry(entry);
|
||||
sb.AppendLine(JsonSerializer.Serialize(mapped, NdjsonOptions));
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
private static string GenerateCsv(IReadOnlyList<RunLedgerEntry> entries)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Header
|
||||
sb.AppendLine("LedgerId,TenantId,RunId,SourceId,RunType,FinalStatus,TotalJobs,SucceededJobs,FailedJobs,ExecutionDurationMs,InputDigest,OutputDigest,SequenceNumber,ContentHash,PreviousEntryHash,RunCreatedAt,RunCompletedAt,LedgerCreatedAt");
|
||||
|
||||
// Data rows
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
sb.AppendLine(string.Join(",",
|
||||
EscapeCsv(entry.LedgerId.ToString()),
|
||||
EscapeCsv(entry.TenantId),
|
||||
EscapeCsv(entry.RunId.ToString()),
|
||||
EscapeCsv(entry.SourceId.ToString()),
|
||||
EscapeCsv(entry.RunType),
|
||||
EscapeCsv(entry.FinalStatus.ToString()),
|
||||
entry.TotalJobs,
|
||||
entry.SucceededJobs,
|
||||
entry.FailedJobs,
|
||||
entry.ExecutionDuration.TotalMilliseconds.ToString(CultureInfo.InvariantCulture),
|
||||
EscapeCsv(entry.InputDigest),
|
||||
EscapeCsv(entry.OutputDigest),
|
||||
entry.SequenceNumber,
|
||||
EscapeCsv(entry.ContentHash),
|
||||
EscapeCsv(entry.PreviousEntryHash ?? ""),
|
||||
EscapeCsv(entry.RunCreatedAt.ToString("O")),
|
||||
EscapeCsv(entry.RunCompletedAt.ToString("O")),
|
||||
EscapeCsv(entry.LedgerCreatedAt.ToString("O"))));
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
private static string EscapeCsv(string value)
|
||||
{
|
||||
if (string.IsNullOrEmpty(value))
|
||||
return "";
|
||||
|
||||
if (value.Contains(',') || value.Contains('"') || value.Contains('\n'))
|
||||
{
|
||||
return $"\"{value.Replace("\"", "\"\"")}\"";
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private static LedgerEntryDto MapEntry(RunLedgerEntry entry) => new()
|
||||
{
|
||||
LedgerId = entry.LedgerId,
|
||||
TenantId = entry.TenantId,
|
||||
RunId = entry.RunId,
|
||||
SourceId = entry.SourceId,
|
||||
RunType = entry.RunType,
|
||||
FinalStatus = entry.FinalStatus.ToString(),
|
||||
TotalJobs = entry.TotalJobs,
|
||||
SucceededJobs = entry.SucceededJobs,
|
||||
FailedJobs = entry.FailedJobs,
|
||||
ExecutionDurationMs = entry.ExecutionDuration.TotalMilliseconds,
|
||||
InputDigest = entry.InputDigest,
|
||||
OutputDigest = entry.OutputDigest,
|
||||
ArtifactManifest = entry.ArtifactManifest,
|
||||
SequenceNumber = entry.SequenceNumber,
|
||||
ContentHash = entry.ContentHash,
|
||||
PreviousEntryHash = entry.PreviousEntryHash,
|
||||
RunCreatedAt = entry.RunCreatedAt,
|
||||
RunCompletedAt = entry.RunCompletedAt,
|
||||
LedgerCreatedAt = entry.LedgerCreatedAt,
|
||||
Metadata = entry.Metadata
|
||||
};
|
||||
|
||||
private static string GenerateOutputUri(LedgerExport export)
|
||||
{
|
||||
var extension = export.Format.ToLowerInvariant() switch
|
||||
{
|
||||
"json" => "json",
|
||||
"ndjson" => "ndjson",
|
||||
"csv" => "csv",
|
||||
_ => "dat"
|
||||
};
|
||||
|
||||
return $"ledger://exports/{export.TenantId}/{export.ExportId}.{extension}";
|
||||
}
|
||||
|
||||
private sealed class LedgerExportData
|
||||
{
|
||||
public required string SchemaVersion { get; init; }
|
||||
public required DateTimeOffset ExportedAt { get; init; }
|
||||
public required int EntryCount { get; init; }
|
||||
public required List<LedgerEntryDto> Entries { get; init; }
|
||||
}
|
||||
|
||||
private sealed class LedgerEntryDto
|
||||
{
|
||||
public required Guid LedgerId { get; init; }
|
||||
public required string TenantId { get; init; }
|
||||
public required Guid RunId { get; init; }
|
||||
public required Guid SourceId { get; init; }
|
||||
public required string RunType { get; init; }
|
||||
public required string FinalStatus { get; init; }
|
||||
public required int TotalJobs { get; init; }
|
||||
public required int SucceededJobs { get; init; }
|
||||
public required int FailedJobs { get; init; }
|
||||
public required double ExecutionDurationMs { get; init; }
|
||||
public required string InputDigest { get; init; }
|
||||
public required string OutputDigest { get; init; }
|
||||
public required string ArtifactManifest { get; init; }
|
||||
public required long SequenceNumber { get; init; }
|
||||
public required string ContentHash { get; init; }
|
||||
public string? PreviousEntryHash { get; init; }
|
||||
public required DateTimeOffset RunCreatedAt { get; init; }
|
||||
public required DateTimeOffset RunCompletedAt { get; init; }
|
||||
public required DateTimeOffset LedgerCreatedAt { get; init; }
|
||||
public string? Metadata { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,660 @@
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure;
|
||||
|
||||
/// <summary>
|
||||
/// Metrics instrumentation for the Orchestrator service.
|
||||
/// </summary>
|
||||
public static class OrchestratorMetrics
|
||||
{
|
||||
private static readonly Meter Meter = new("StellaOps.Orchestrator", "1.0.0");
|
||||
|
||||
private static readonly Counter<long> JobsEnqueued = Meter.CreateCounter<long>(
|
||||
"orchestrator.jobs.enqueued",
|
||||
description: "Total jobs enqueued");
|
||||
|
||||
private static readonly Counter<long> JobsScheduled = Meter.CreateCounter<long>(
|
||||
"orchestrator.jobs.scheduled",
|
||||
description: "Total jobs scheduled");
|
||||
|
||||
private static readonly Counter<long> JobsLeased = Meter.CreateCounter<long>(
|
||||
"orchestrator.jobs.leased",
|
||||
description: "Total jobs leased to workers");
|
||||
|
||||
private static readonly Counter<long> JobsCompleted = Meter.CreateCounter<long>(
|
||||
"orchestrator.jobs.completed",
|
||||
description: "Total jobs completed");
|
||||
|
||||
private static readonly Counter<long> JobsFailed = Meter.CreateCounter<long>(
|
||||
"orchestrator.jobs.failed",
|
||||
description: "Total jobs failed");
|
||||
|
||||
private static readonly Counter<long> JobsRetried = Meter.CreateCounter<long>(
|
||||
"orchestrator.jobs.retried",
|
||||
description: "Total job retry attempts");
|
||||
|
||||
private static readonly Counter<long> LeaseExtensions = Meter.CreateCounter<long>(
|
||||
"orchestrator.lease.extensions",
|
||||
description: "Total lease extensions");
|
||||
|
||||
private static readonly Counter<long> LeaseExpirations = Meter.CreateCounter<long>(
|
||||
"orchestrator.lease.expirations",
|
||||
description: "Total lease expirations");
|
||||
|
||||
private static readonly Histogram<double> JobDuration = Meter.CreateHistogram<double>(
|
||||
"orchestrator.job.duration.seconds",
|
||||
unit: "s",
|
||||
description: "Job execution duration");
|
||||
|
||||
private static readonly Histogram<double> SchedulingLatency = Meter.CreateHistogram<double>(
|
||||
"orchestrator.scheduling.latency.seconds",
|
||||
unit: "s",
|
||||
description: "Time from job creation to scheduling");
|
||||
|
||||
private static readonly UpDownCounter<long> ActiveConnections = Meter.CreateUpDownCounter<long>(
|
||||
"orchestrator.db.connections.active",
|
||||
description: "Active database connections");
|
||||
|
||||
private static readonly UpDownCounter<long> QueueDepth = Meter.CreateUpDownCounter<long>(
|
||||
"orchestrator.queue.depth",
|
||||
description: "Number of pending jobs in queue");
|
||||
|
||||
private static readonly Counter<long> ArtifactsCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.artifacts.created",
|
||||
description: "Total artifacts created");
|
||||
|
||||
private static readonly Counter<long> HeartbeatsReceived = Meter.CreateCounter<long>(
|
||||
"orchestrator.heartbeats.received",
|
||||
description: "Total worker heartbeats received");
|
||||
|
||||
private static readonly Counter<long> ProgressReports = Meter.CreateCounter<long>(
|
||||
"orchestrator.progress.reports",
|
||||
description: "Total job progress reports");
|
||||
|
||||
private static readonly Counter<long> SourcesCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.sources.created",
|
||||
description: "Total sources created");
|
||||
|
||||
private static readonly Counter<long> SourcesPaused = Meter.CreateCounter<long>(
|
||||
"orchestrator.sources.paused",
|
||||
description: "Total source pause operations");
|
||||
|
||||
private static readonly Counter<long> SourcesResumed = Meter.CreateCounter<long>(
|
||||
"orchestrator.sources.resumed",
|
||||
description: "Total source resume operations");
|
||||
|
||||
private static readonly Counter<long> RunsCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.runs.created",
|
||||
description: "Total runs created");
|
||||
|
||||
private static readonly Counter<long> RunsCompleted = Meter.CreateCounter<long>(
|
||||
"orchestrator.runs.completed",
|
||||
description: "Total runs completed");
|
||||
|
||||
private static readonly Counter<long> QuotasCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.quotas.created",
|
||||
description: "Total quotas created");
|
||||
|
||||
private static readonly Counter<long> QuotasPaused = Meter.CreateCounter<long>(
|
||||
"orchestrator.quotas.paused",
|
||||
description: "Total quota pause operations");
|
||||
|
||||
private static readonly Counter<long> QuotasResumed = Meter.CreateCounter<long>(
|
||||
"orchestrator.quotas.resumed",
|
||||
description: "Total quota resume operations");
|
||||
|
||||
private static readonly Counter<long> ThrottlesCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.throttles.created",
|
||||
description: "Total throttles created");
|
||||
|
||||
private static readonly Counter<long> ThrottlesDeactivated = Meter.CreateCounter<long>(
|
||||
"orchestrator.throttles.deactivated",
|
||||
description: "Total throttles deactivated");
|
||||
|
||||
private static readonly Counter<long> RateLimitDenials = Meter.CreateCounter<long>(
|
||||
"orchestrator.ratelimit.denials",
|
||||
description: "Total rate limit denials");
|
||||
|
||||
private static readonly Counter<long> BackpressureEvents = Meter.CreateCounter<long>(
|
||||
"orchestrator.backpressure.events",
|
||||
description: "Total backpressure events from upstream");
|
||||
|
||||
private static readonly Histogram<double> TokenBucketUtilization = Meter.CreateHistogram<double>(
|
||||
"orchestrator.ratelimit.token_utilization",
|
||||
unit: "ratio",
|
||||
description: "Token bucket utilization ratio (0-1)");
|
||||
|
||||
private static readonly Histogram<double> ConcurrencyUtilization = Meter.CreateHistogram<double>(
|
||||
"orchestrator.ratelimit.concurrency_utilization",
|
||||
unit: "ratio",
|
||||
description: "Concurrency limiter utilization ratio (0-1)");
|
||||
|
||||
public static void JobEnqueued(string tenantId, string jobType)
|
||||
=> JobsEnqueued.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void JobScheduled(string tenantId, string jobType)
|
||||
=> JobsScheduled.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void JobLeased(string tenantId, string jobType)
|
||||
=> JobsLeased.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void JobCompleted(string tenantId, string jobType, string status)
|
||||
=> JobsCompleted.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType),
|
||||
new KeyValuePair<string, object?>("status", status));
|
||||
|
||||
public static void JobFailed(string tenantId, string jobType)
|
||||
=> JobsFailed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void JobRetried(string tenantId, string jobType, int attempt)
|
||||
=> JobsRetried.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType),
|
||||
new KeyValuePair<string, object?>("attempt", attempt));
|
||||
|
||||
public static void LeaseExtended(string tenantId, string jobType)
|
||||
=> LeaseExtensions.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void LeaseExpired(string tenantId, string jobType)
|
||||
=> LeaseExpirations.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void RecordJobDuration(string tenantId, string jobType, double durationSeconds)
|
||||
=> JobDuration.Record(durationSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void RecordSchedulingLatency(string tenantId, string jobType, double latencySeconds)
|
||||
=> SchedulingLatency.Record(latencySeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void ConnectionOpened(string role)
|
||||
=> ActiveConnections.Add(1, new KeyValuePair<string, object?>("role", role));
|
||||
|
||||
public static void ConnectionClosed(string role)
|
||||
=> ActiveConnections.Add(-1, new KeyValuePair<string, object?>("role", role));
|
||||
|
||||
public static void QueueDepthChanged(string tenantId, string jobType, long delta)
|
||||
=> QueueDepth.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void ArtifactCreated(string tenantId, string artifactType)
|
||||
=> ArtifactsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("artifact_type", artifactType));
|
||||
|
||||
public static void HeartbeatReceived(string tenantId, string jobType)
|
||||
=> HeartbeatsReceived.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void ProgressReported(string tenantId, string jobType)
|
||||
=> ProgressReports.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType));
|
||||
|
||||
public static void SourceCreated(string tenantId, string sourceType)
|
||||
=> SourcesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("source_type", sourceType));
|
||||
|
||||
public static void SourcePaused(string tenantId)
|
||||
=> SourcesPaused.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void SourceResumed(string tenantId)
|
||||
=> SourcesResumed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void RunCreated(string tenantId, string runType)
|
||||
=> RunsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("run_type", runType));
|
||||
|
||||
public static void RunCompleted(string tenantId, string runType, string status)
|
||||
=> RunsCompleted.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("run_type", runType),
|
||||
new KeyValuePair<string, object?>("status", status));
|
||||
|
||||
public static void QuotaCreated(string tenantId, string? jobType)
|
||||
=> QuotasCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
|
||||
|
||||
public static void QuotaPaused(string tenantId)
|
||||
=> QuotasPaused.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void QuotaResumed(string tenantId)
|
||||
=> QuotasResumed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void ThrottleCreated(string tenantId, string reason)
|
||||
=> ThrottlesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("reason", reason));
|
||||
|
||||
public static void ThrottleDeactivated(string tenantId)
|
||||
=> ThrottlesDeactivated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void RateLimitDenied(string tenantId, string? jobType, string reason)
|
||||
=> RateLimitDenials.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"),
|
||||
new KeyValuePair<string, object?>("reason", reason));
|
||||
|
||||
public static void BackpressureEvent(string tenantId, int statusCode, string reason)
|
||||
=> BackpressureEvents.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("status_code", statusCode),
|
||||
new KeyValuePair<string, object?>("reason", reason));
|
||||
|
||||
public static void RecordTokenBucketUtilization(string tenantId, string? jobType, double utilization)
|
||||
=> TokenBucketUtilization.Record(utilization, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
|
||||
|
||||
public static void RecordConcurrencyUtilization(string tenantId, string? jobType, double utilization)
|
||||
=> ConcurrencyUtilization.Record(utilization, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
|
||||
|
||||
// Watermark metrics
|
||||
private static readonly Counter<long> WatermarksCreatedCounter = Meter.CreateCounter<long>(
|
||||
"orchestrator.watermarks.created",
|
||||
description: "Total watermarks created");
|
||||
|
||||
private static readonly Counter<long> WatermarksAdvanced = Meter.CreateCounter<long>(
|
||||
"orchestrator.watermarks.advanced",
|
||||
description: "Total watermark advancement operations");
|
||||
|
||||
private static readonly Histogram<double> WatermarkLag = Meter.CreateHistogram<double>(
|
||||
"orchestrator.watermark.lag.seconds",
|
||||
unit: "s",
|
||||
description: "Watermark lag from current time");
|
||||
|
||||
// Backfill metrics
|
||||
private static readonly Counter<long> BackfillsCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.backfills.created",
|
||||
description: "Total backfill requests created");
|
||||
|
||||
private static readonly Counter<long> BackfillStatusChanges = Meter.CreateCounter<long>(
|
||||
"orchestrator.backfills.status_changes",
|
||||
description: "Total backfill status changes");
|
||||
|
||||
private static readonly Counter<long> BackfillEventsProcessed = Meter.CreateCounter<long>(
|
||||
"orchestrator.backfills.events_processed",
|
||||
description: "Total events processed by backfills");
|
||||
|
||||
private static readonly Counter<long> BackfillEventsSkipped = Meter.CreateCounter<long>(
|
||||
"orchestrator.backfills.events_skipped",
|
||||
description: "Total events skipped by backfills (duplicates)");
|
||||
|
||||
private static readonly Histogram<double> BackfillDuration = Meter.CreateHistogram<double>(
|
||||
"orchestrator.backfill.duration.seconds",
|
||||
unit: "s",
|
||||
description: "Backfill execution duration");
|
||||
|
||||
private static readonly Histogram<double> BackfillProgress = Meter.CreateHistogram<double>(
|
||||
"orchestrator.backfill.progress",
|
||||
unit: "percent",
|
||||
description: "Backfill progress percentage");
|
||||
|
||||
// Duplicate suppression metrics
|
||||
private static readonly Counter<long> ProcessedEventsMarkedCounter = Meter.CreateCounter<long>(
|
||||
"orchestrator.processed_events.marked",
|
||||
description: "Total processed events marked for duplicate suppression");
|
||||
|
||||
private static readonly Counter<long> ProcessedEventsCleanedUpCounter = Meter.CreateCounter<long>(
|
||||
"orchestrator.processed_events.cleaned_up",
|
||||
description: "Total expired processed events cleaned up");
|
||||
|
||||
private static readonly Counter<long> DuplicatesDetected = Meter.CreateCounter<long>(
|
||||
"orchestrator.duplicates.detected",
|
||||
description: "Total duplicate events detected");
|
||||
|
||||
public static void WatermarkCreated(string tenantId, string scopeKey)
|
||||
=> WatermarksCreatedCounter.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void WatermarkAdvanced(string tenantId, string scopeKey)
|
||||
=> WatermarksAdvanced.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void RecordWatermarkLag(string tenantId, string scopeKey, double lagSeconds)
|
||||
=> WatermarkLag.Record(lagSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void BackfillCreated(string tenantId, string scopeKey)
|
||||
=> BackfillsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void BackfillStatusChanged(string tenantId, string scopeKey, string status)
|
||||
=> BackfillStatusChanges.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey),
|
||||
new KeyValuePair<string, object?>("status", status));
|
||||
|
||||
public static void BackfillEventProcessed(string tenantId, string scopeKey, long count)
|
||||
=> BackfillEventsProcessed.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void BackfillEventSkipped(string tenantId, string scopeKey, long count)
|
||||
=> BackfillEventsSkipped.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void RecordBackfillDuration(string tenantId, string scopeKey, double durationSeconds)
|
||||
=> BackfillDuration.Record(durationSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void RecordBackfillProgress(string tenantId, string scopeKey, double progressPercent)
|
||||
=> BackfillProgress.Record(progressPercent, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void ProcessedEventsMarked(string tenantId, string scopeKey, long count)
|
||||
=> ProcessedEventsMarkedCounter.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
public static void ProcessedEventsCleanedUp(string tenantId, long count)
|
||||
=> ProcessedEventsCleanedUpCounter.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void DuplicateDetected(string tenantId, string scopeKey)
|
||||
=> DuplicatesDetected.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("scope_key", scopeKey));
|
||||
|
||||
// Dead-letter metrics
|
||||
private static readonly Counter<long> DeadLetterEntriesCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.created",
|
||||
description: "Total dead-letter entries created");
|
||||
|
||||
private static readonly Counter<long> DeadLetterStatusChanges = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.status_changes",
|
||||
description: "Total dead-letter status changes");
|
||||
|
||||
private static readonly Counter<long> DeadLetterReplayAttempts = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.replay_attempts",
|
||||
description: "Total dead-letter replay attempts");
|
||||
|
||||
private static readonly Counter<long> DeadLetterReplaySuccesses = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.replay_successes",
|
||||
description: "Total successful dead-letter replays");
|
||||
|
||||
private static readonly Counter<long> DeadLetterReplayFailures = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.replay_failures",
|
||||
description: "Total failed dead-letter replays");
|
||||
|
||||
private static readonly Counter<long> DeadLetterEntriesExpired = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.expired",
|
||||
description: "Total dead-letter entries marked as expired");
|
||||
|
||||
private static readonly Counter<long> DeadLetterEntriesPurged = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.purged",
|
||||
description: "Total dead-letter entries purged");
|
||||
|
||||
private static readonly Counter<long> DeadLetterNotificationsSent = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.notifications_sent",
|
||||
description: "Total dead-letter notifications sent");
|
||||
|
||||
private static readonly Counter<long> DeadLetterNotificationsFailed = Meter.CreateCounter<long>(
|
||||
"orchestrator.deadletter.notifications_failed",
|
||||
description: "Total failed dead-letter notifications");
|
||||
|
||||
private static readonly UpDownCounter<long> DeadLetterPendingCount = Meter.CreateUpDownCounter<long>(
|
||||
"orchestrator.deadletter.pending",
|
||||
description: "Current number of pending dead-letter entries");
|
||||
|
||||
public static void DeadLetterCreated(string tenantId, string jobType, string errorCode, string category)
|
||||
=> DeadLetterEntriesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType),
|
||||
new KeyValuePair<string, object?>("error_code", errorCode),
|
||||
new KeyValuePair<string, object?>("category", category));
|
||||
|
||||
public static void DeadLetterStatusChanged(string tenantId, string jobType, string status)
|
||||
=> DeadLetterStatusChanges.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("job_type", jobType),
|
||||
new KeyValuePair<string, object?>("status", status));
|
||||
|
||||
public static void DeadLetterReplayAttempted(string tenantId, string triggeredBy)
|
||||
=> DeadLetterReplayAttempts.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("triggered_by", triggeredBy));
|
||||
|
||||
public static void DeadLetterReplaySucceeded(string tenantId)
|
||||
=> DeadLetterReplaySuccesses.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void DeadLetterReplayFailed(string tenantId)
|
||||
=> DeadLetterReplayFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void DeadLetterExpired(int count)
|
||||
=> DeadLetterEntriesExpired.Add(count);
|
||||
|
||||
public static void DeadLetterPurged(int count)
|
||||
=> DeadLetterEntriesPurged.Add(count);
|
||||
|
||||
public static void DeadLetterNotificationSent(string tenantId, string channel, string eventType)
|
||||
=> DeadLetterNotificationsSent.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("channel", channel),
|
||||
new KeyValuePair<string, object?>("event_type", eventType));
|
||||
|
||||
public static void DeadLetterNotificationFailed(string tenantId, string channel, string eventType)
|
||||
=> DeadLetterNotificationsFailed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("channel", channel),
|
||||
new KeyValuePair<string, object?>("event_type", eventType));
|
||||
|
||||
public static void DeadLetterPendingChanged(string tenantId, long delta)
|
||||
=> DeadLetterPendingCount.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
// SLO metrics
|
||||
private static readonly Counter<long> SlosCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.slos.created",
|
||||
description: "Total SLOs created");
|
||||
|
||||
private static readonly Counter<long> SlosUpdated = Meter.CreateCounter<long>(
|
||||
"orchestrator.slos.updated",
|
||||
description: "Total SLO updates");
|
||||
|
||||
private static readonly Counter<long> SloAlertsTriggered = Meter.CreateCounter<long>(
|
||||
"orchestrator.slo.alerts_triggered",
|
||||
description: "Total SLO alerts triggered");
|
||||
|
||||
private static readonly Counter<long> SloAlertsAcknowledged = Meter.CreateCounter<long>(
|
||||
"orchestrator.slo.alerts_acknowledged",
|
||||
description: "Total SLO alerts acknowledged");
|
||||
|
||||
private static readonly Counter<long> SloAlertsResolved = Meter.CreateCounter<long>(
|
||||
"orchestrator.slo.alerts_resolved",
|
||||
description: "Total SLO alerts resolved");
|
||||
|
||||
private static readonly Histogram<double> SloBudgetConsumed = Meter.CreateHistogram<double>(
|
||||
"orchestrator.slo.budget_consumed",
|
||||
unit: "ratio",
|
||||
description: "SLO error budget consumed (0-1)");
|
||||
|
||||
private static readonly Histogram<double> SloBurnRate = Meter.CreateHistogram<double>(
|
||||
"orchestrator.slo.burn_rate",
|
||||
unit: "ratio",
|
||||
description: "SLO burn rate (1.0 = sustainable)");
|
||||
|
||||
private static readonly Histogram<double> SloCurrentSli = Meter.CreateHistogram<double>(
|
||||
"orchestrator.slo.current_sli",
|
||||
unit: "ratio",
|
||||
description: "Current SLI value (0-1)");
|
||||
|
||||
private static readonly UpDownCounter<long> SloActiveAlerts = Meter.CreateUpDownCounter<long>(
|
||||
"orchestrator.slo.active_alerts",
|
||||
description: "Current number of active SLO alerts");
|
||||
|
||||
private static readonly Histogram<double> SloBudgetRemaining = Meter.CreateHistogram<double>(
|
||||
"orchestrator.slo.budget_remaining",
|
||||
unit: "ratio",
|
||||
description: "SLO error budget remaining (0-1)");
|
||||
|
||||
private static readonly Histogram<double> SloTimeToExhaustion = Meter.CreateHistogram<double>(
|
||||
"orchestrator.slo.time_to_exhaustion.seconds",
|
||||
unit: "s",
|
||||
description: "Estimated time until error budget exhaustion");
|
||||
|
||||
public static void SloCreated(string tenantId, string sloType, string? jobType)
|
||||
=> SlosCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_type", sloType),
|
||||
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
|
||||
|
||||
public static void SloUpdated(string tenantId, string sloName)
|
||||
=> SlosUpdated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName));
|
||||
|
||||
public static void SloAlertTriggered(string tenantId, string sloName, string severity)
|
||||
=> SloAlertsTriggered.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName),
|
||||
new KeyValuePair<string, object?>("severity", severity));
|
||||
|
||||
public static void SloAlertAcknowledged(string tenantId, string sloName)
|
||||
=> SloAlertsAcknowledged.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName));
|
||||
|
||||
public static void SloAlertResolved(string tenantId, string sloName)
|
||||
=> SloAlertsResolved.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName));
|
||||
|
||||
public static void RecordSloBudgetConsumed(string tenantId, string sloName, string sloType, double consumed)
|
||||
=> SloBudgetConsumed.Record(consumed, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName),
|
||||
new KeyValuePair<string, object?>("slo_type", sloType));
|
||||
|
||||
public static void RecordSloBurnRate(string tenantId, string sloName, string sloType, double burnRate)
|
||||
=> SloBurnRate.Record(burnRate, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName),
|
||||
new KeyValuePair<string, object?>("slo_type", sloType));
|
||||
|
||||
public static void RecordSloCurrentSli(string tenantId, string sloName, string sloType, double sli)
|
||||
=> SloCurrentSli.Record(sli, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName),
|
||||
new KeyValuePair<string, object?>("slo_type", sloType));
|
||||
|
||||
public static void SloActiveAlertsChanged(string tenantId, long delta)
|
||||
=> SloActiveAlerts.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void RecordSloBudgetRemaining(string tenantId, string sloName, string sloType, double remaining)
|
||||
=> SloBudgetRemaining.Record(remaining, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName),
|
||||
new KeyValuePair<string, object?>("slo_type", sloType));
|
||||
|
||||
public static void RecordSloTimeToExhaustion(string tenantId, string sloName, double seconds)
|
||||
=> SloTimeToExhaustion.Record(seconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("slo_name", sloName));
|
||||
|
||||
// Audit log metrics
|
||||
private static readonly Counter<long> AuditEntriesCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.audit.entries_created",
|
||||
description: "Total audit log entries created");
|
||||
|
||||
private static readonly Counter<long> AuditChainVerifications = Meter.CreateCounter<long>(
|
||||
"orchestrator.audit.chain_verifications",
|
||||
description: "Total audit chain verification operations");
|
||||
|
||||
private static readonly Counter<long> AuditChainFailures = Meter.CreateCounter<long>(
|
||||
"orchestrator.audit.chain_failures",
|
||||
description: "Total audit chain verification failures");
|
||||
|
||||
private static readonly UpDownCounter<long> AuditEntryCount = Meter.CreateUpDownCounter<long>(
|
||||
"orchestrator.audit.entry_count",
|
||||
description: "Current number of audit entries");
|
||||
|
||||
// Ledger metrics
|
||||
private static readonly Counter<long> LedgerEntriesCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.ledger.entries_created",
|
||||
description: "Total ledger entries created");
|
||||
|
||||
private static readonly Counter<long> LedgerChainVerifications = Meter.CreateCounter<long>(
|
||||
"orchestrator.ledger.chain_verifications",
|
||||
description: "Total ledger chain verification operations");
|
||||
|
||||
private static readonly Counter<long> LedgerChainFailures = Meter.CreateCounter<long>(
|
||||
"orchestrator.ledger.chain_failures",
|
||||
description: "Total ledger chain verification failures");
|
||||
|
||||
private static readonly Counter<long> LedgerExportsRequested = Meter.CreateCounter<long>(
|
||||
"orchestrator.ledger.exports_requested",
|
||||
description: "Total ledger export requests");
|
||||
|
||||
private static readonly Counter<long> LedgerExportsCompleted = Meter.CreateCounter<long>(
|
||||
"orchestrator.ledger.exports_completed",
|
||||
description: "Total ledger exports completed successfully");
|
||||
|
||||
private static readonly Counter<long> LedgerExportsFailed = Meter.CreateCounter<long>(
|
||||
"orchestrator.ledger.exports_failed",
|
||||
description: "Total ledger exports that failed");
|
||||
|
||||
private static readonly Histogram<double> LedgerExportDuration = Meter.CreateHistogram<double>(
|
||||
"orchestrator.ledger.export_duration.seconds",
|
||||
unit: "s",
|
||||
description: "Ledger export duration");
|
||||
|
||||
private static readonly Histogram<long> LedgerExportSize = Meter.CreateHistogram<long>(
|
||||
"orchestrator.ledger.export_size.bytes",
|
||||
unit: "bytes",
|
||||
description: "Ledger export file size");
|
||||
|
||||
// Manifest metrics
|
||||
private static readonly Counter<long> ManifestsCreated = Meter.CreateCounter<long>(
|
||||
"orchestrator.manifests.created",
|
||||
description: "Total signed manifests created");
|
||||
|
||||
private static readonly Counter<long> ManifestVerifications = Meter.CreateCounter<long>(
|
||||
"orchestrator.manifests.verifications",
|
||||
description: "Total manifest verification operations");
|
||||
|
||||
private static readonly Counter<long> ManifestVerificationFailures = Meter.CreateCounter<long>(
|
||||
"orchestrator.manifests.verification_failures",
|
||||
description: "Total manifest verification failures");
|
||||
|
||||
public static void AuditEntryCreated(string tenantId, string eventType, string resourceType)
|
||||
=> AuditEntriesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("event_type", eventType),
|
||||
new KeyValuePair<string, object?>("resource_type", resourceType));
|
||||
|
||||
public static void AuditChainVerified(string tenantId, bool success)
|
||||
{
|
||||
AuditChainVerifications.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
if (!success)
|
||||
{
|
||||
AuditChainFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
}
|
||||
}
|
||||
|
||||
public static void AuditEntryCountChanged(string tenantId, long delta)
|
||||
=> AuditEntryCount.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
|
||||
public static void LedgerEntryCreated(string tenantId, string runType, string finalStatus)
|
||||
=> LedgerEntriesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("run_type", runType),
|
||||
new KeyValuePair<string, object?>("final_status", finalStatus));
|
||||
|
||||
public static void LedgerChainVerified(string tenantId, bool success)
|
||||
{
|
||||
LedgerChainVerifications.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
if (!success)
|
||||
{
|
||||
LedgerChainFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
}
|
||||
}
|
||||
|
||||
public static void LedgerExportRequested(string tenantId, string format)
|
||||
=> LedgerExportsRequested.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("format", format));
|
||||
|
||||
public static void LedgerExportCompleted(string tenantId, string format)
|
||||
=> LedgerExportsCompleted.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("format", format));
|
||||
|
||||
public static void LedgerExportFailed(string tenantId, string format)
|
||||
=> LedgerExportsFailed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("format", format));
|
||||
|
||||
public static void RecordLedgerExportDuration(string tenantId, string format, double durationSeconds)
|
||||
=> LedgerExportDuration.Record(durationSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("format", format));
|
||||
|
||||
public static void RecordLedgerExportSize(string tenantId, string format, long sizeBytes)
|
||||
=> LedgerExportSize.Record(sizeBytes, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("format", format));
|
||||
|
||||
public static void ManifestCreated(string tenantId, string provenanceType)
|
||||
=> ManifestsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
|
||||
new KeyValuePair<string, object?>("provenance_type", provenanceType));
|
||||
|
||||
public static void ManifestVerified(string tenantId, bool success)
|
||||
{
|
||||
ManifestVerifications.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
if (!success)
|
||||
{
|
||||
ManifestVerificationFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Options;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for the Orchestrator service.
|
||||
/// </summary>
|
||||
public sealed class OrchestratorServiceOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Configuration section name.
|
||||
/// </summary>
|
||||
public const string SectionName = "Orchestrator";
|
||||
|
||||
/// <summary>
|
||||
/// HTTP header name for tenant identification.
|
||||
/// </summary>
|
||||
public string TenantHeader { get; set; } = "X-Tenant-Id";
|
||||
|
||||
/// <summary>
|
||||
/// Database connection options.
|
||||
/// </summary>
|
||||
public DatabaseOptions Database { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Lease management options.
|
||||
/// </summary>
|
||||
public LeaseOptions Lease { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Rate-limiting options.
|
||||
/// </summary>
|
||||
public RateLimitOptions RateLimit { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Database connection options.
|
||||
/// </summary>
|
||||
public sealed class DatabaseOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// PostgreSQL connection string.
|
||||
/// </summary>
|
||||
public string ConnectionString { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Command timeout in seconds.
|
||||
/// </summary>
|
||||
public int CommandTimeoutSeconds { get; set; } = 30;
|
||||
|
||||
/// <summary>
|
||||
/// Enable connection pooling.
|
||||
/// </summary>
|
||||
public bool EnablePooling { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum pool size.
|
||||
/// </summary>
|
||||
public int MinPoolSize { get; set; } = 1;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum pool size.
|
||||
/// </summary>
|
||||
public int MaxPoolSize { get; set; } = 100;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Lease management options.
|
||||
/// </summary>
|
||||
public sealed class LeaseOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Default lease duration in seconds.
|
||||
/// </summary>
|
||||
public int DefaultLeaseDurationSeconds { get; set; } = 300;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum lease duration in seconds.
|
||||
/// </summary>
|
||||
public int MaxLeaseDurationSeconds { get; set; } = 3600;
|
||||
|
||||
/// <summary>
|
||||
/// Lease renewal threshold (renew when this fraction of lease remains).
|
||||
/// </summary>
|
||||
public double RenewalThreshold { get; set; } = 0.5;
|
||||
|
||||
/// <summary>
|
||||
/// Interval for checking expired leases in seconds.
|
||||
/// </summary>
|
||||
public int ExpiryCheckIntervalSeconds { get; set; } = 30;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Rate-limiting options.
|
||||
/// </summary>
|
||||
public sealed class RateLimitOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Default maximum concurrent active jobs per tenant.
|
||||
/// </summary>
|
||||
public int DefaultMaxActive { get; set; } = 10;
|
||||
|
||||
/// <summary>
|
||||
/// Default maximum jobs per hour per tenant.
|
||||
/// </summary>
|
||||
public int DefaultMaxPerHour { get; set; } = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Default burst capacity for token bucket.
|
||||
/// </summary>
|
||||
public int DefaultBurstCapacity { get; set; } = 50;
|
||||
|
||||
/// <summary>
|
||||
/// Default token refill rate (tokens per second).
|
||||
/// </summary>
|
||||
public double DefaultRefillRate { get; set; } = 1.0;
|
||||
|
||||
/// <summary>
|
||||
/// Failure rate threshold for circuit breaker (0.0-1.0).
|
||||
/// </summary>
|
||||
public double CircuitBreakerThreshold { get; set; } = 0.5;
|
||||
|
||||
/// <summary>
|
||||
/// Window size in minutes for failure rate calculation.
|
||||
/// </summary>
|
||||
public int CircuitBreakerWindowMinutes { get; set; } = 5;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum sample size before circuit breaker can trip.
|
||||
/// </summary>
|
||||
public int CircuitBreakerMinSamples { get; set; } = 10;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
using System.Data;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Infrastructure.Options;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// Manages PostgreSQL connections for the Orchestrator service.
|
||||
/// Configures session-level tenant context for row-level security.
|
||||
/// </summary>
|
||||
public sealed class OrchestratorDataSource : IAsyncDisposable
|
||||
{
|
||||
private readonly NpgsqlDataSource _dataSource;
|
||||
private readonly OrchestratorServiceOptions.DatabaseOptions _options;
|
||||
private readonly ILogger<OrchestratorDataSource> _logger;
|
||||
|
||||
public OrchestratorDataSource(
|
||||
IOptions<OrchestratorServiceOptions> options,
|
||||
ILogger<OrchestratorDataSource> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
_options = options.Value.Database;
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
|
||||
var builder = new NpgsqlDataSourceBuilder(_options.ConnectionString);
|
||||
_dataSource = builder.Build();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Command timeout in seconds.
|
||||
/// </summary>
|
||||
public int CommandTimeoutSeconds => _options.CommandTimeoutSeconds;
|
||||
|
||||
/// <summary>
|
||||
/// Disposes the data source and releases all connections.
|
||||
/// </summary>
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await _dataSource.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Opens a connection with tenant context configured.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier for session configuration.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Open PostgreSQL connection.</returns>
|
||||
public Task<NpgsqlConnection> OpenConnectionAsync(string tenantId, CancellationToken cancellationToken)
|
||||
=> OpenConnectionInternalAsync(tenantId, "unspecified", cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Opens a connection with tenant context and role label configured.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier for session configuration.</param>
|
||||
/// <param name="role">Role label for metrics/logging (e.g., "reader", "writer").</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Open PostgreSQL connection.</returns>
|
||||
public Task<NpgsqlConnection> OpenConnectionAsync(string tenantId, string role, CancellationToken cancellationToken)
|
||||
=> OpenConnectionInternalAsync(tenantId, role, cancellationToken);
|
||||
|
||||
private async Task<NpgsqlConnection> OpenConnectionInternalAsync(string tenantId, string role, CancellationToken cancellationToken)
|
||||
{
|
||||
var connection = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
await ConfigureSessionAsync(connection, tenantId, cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.ConnectionOpened(role);
|
||||
connection.StateChange += (_, args) =>
|
||||
{
|
||||
if (args.CurrentState == ConnectionState.Closed)
|
||||
{
|
||||
OrchestratorMetrics.ConnectionClosed(role);
|
||||
}
|
||||
};
|
||||
}
|
||||
catch
|
||||
{
|
||||
await connection.DisposeAsync().ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
|
||||
return connection;
|
||||
}
|
||||
|
||||
private async Task ConfigureSessionAsync(NpgsqlConnection connection, string tenantId, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Set UTC timezone for deterministic timestamps
|
||||
await using (var command = new NpgsqlCommand("SET TIME ZONE 'UTC';", connection))
|
||||
{
|
||||
command.CommandTimeout = _options.CommandTimeoutSeconds;
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Set tenant context for row-level security
|
||||
if (!string.IsNullOrWhiteSpace(tenantId))
|
||||
{
|
||||
await using var tenantCommand = new NpgsqlCommand("SELECT set_config('app.current_tenant', @tenant, false);", connection);
|
||||
tenantCommand.CommandTimeout = _options.CommandTimeoutSeconds;
|
||||
tenantCommand.Parameters.AddWithValue("tenant", tenantId);
|
||||
await tenantCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
if (_logger.IsEnabled(LogLevel.Error))
|
||||
{
|
||||
_logger.LogError(ex, "Failed to configure PostgreSQL session for tenant {TenantId}.", tenantId);
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,362 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using NpgsqlTypes;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of artifact repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresArtifactRepository : IArtifactRepository
|
||||
{
|
||||
private const string SelectArtifactColumns = """
|
||||
artifact_id, tenant_id, job_id, run_id, artifact_type, uri, digest,
|
||||
mime_type, size_bytes, created_at, metadata
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectArtifactColumns}
|
||||
FROM artifacts
|
||||
WHERE tenant_id = @tenant_id AND artifact_id = @artifact_id
|
||||
""";
|
||||
|
||||
private const string SelectByJobIdSql = $"""
|
||||
SELECT {SelectArtifactColumns}
|
||||
FROM artifacts
|
||||
WHERE tenant_id = @tenant_id AND job_id = @job_id
|
||||
ORDER BY created_at
|
||||
""";
|
||||
|
||||
private const string SelectByRunIdSql = $"""
|
||||
SELECT {SelectArtifactColumns}
|
||||
FROM artifacts
|
||||
WHERE tenant_id = @tenant_id AND run_id = @run_id
|
||||
ORDER BY created_at
|
||||
""";
|
||||
|
||||
private const string SelectByDigestSql = $"""
|
||||
SELECT {SelectArtifactColumns}
|
||||
FROM artifacts
|
||||
WHERE tenant_id = @tenant_id AND digest = @digest
|
||||
""";
|
||||
|
||||
private const string InsertArtifactSql = """
|
||||
INSERT INTO artifacts (
|
||||
artifact_id, tenant_id, job_id, run_id, artifact_type, uri, digest,
|
||||
mime_type, size_bytes, created_at, metadata)
|
||||
VALUES (
|
||||
@artifact_id, @tenant_id, @job_id, @run_id, @artifact_type, @uri, @digest,
|
||||
@mime_type, @size_bytes, @created_at, @metadata)
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresArtifactRepository> _logger;
|
||||
|
||||
public PostgresArtifactRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresArtifactRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<Artifact?> GetByIdAsync(string tenantId, Guid artifactId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("artifact_id", artifactId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapArtifact(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Artifact>> GetByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByJobIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_id", jobId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var artifacts = new List<Artifact>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
artifacts.Add(MapArtifact(reader));
|
||||
}
|
||||
return artifacts;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Artifact>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByRunIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("run_id", runId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var artifacts = new List<Artifact>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
artifacts.Add(MapArtifact(reader));
|
||||
}
|
||||
return artifacts;
|
||||
}
|
||||
|
||||
public async Task<Artifact?> GetByDigestAsync(string tenantId, string digest, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByDigestSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("digest", digest);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapArtifact(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(Artifact artifact, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(artifact.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertArtifactSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddArtifactParameters(command, artifact);
|
||||
|
||||
try
|
||||
{
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.ArtifactCreated(artifact.TenantId, artifact.ArtifactType);
|
||||
}
|
||||
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning("Duplicate artifact ID or digest: {ArtifactId}, {Digest}", artifact.ArtifactId, artifact.Digest);
|
||||
throw new DuplicateArtifactException(artifact.ArtifactId, artifact.Digest, ex);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task CreateBatchAsync(IEnumerable<Artifact> artifacts, CancellationToken cancellationToken)
|
||||
{
|
||||
var artifactList = artifacts.ToList();
|
||||
if (artifactList.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var tenantId = artifactList[0].TenantId;
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var artifact in artifactList)
|
||||
{
|
||||
await using var command = new NpgsqlCommand(InsertArtifactSql, connection, transaction);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
AddArtifactParameters(command, artifact);
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.ArtifactCreated(artifact.TenantId, artifact.ArtifactType);
|
||||
}
|
||||
|
||||
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
|
||||
{
|
||||
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogWarning(ex, "Duplicate artifact in batch insert");
|
||||
throw;
|
||||
}
|
||||
catch
|
||||
{
|
||||
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Artifact>> ListAsync(
|
||||
string tenantId,
|
||||
string? artifactType,
|
||||
string? jobType,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, artifactType, jobType, createdAfter, createdBefore, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var artifacts = new List<Artifact>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
artifacts.Add(MapArtifact(reader));
|
||||
}
|
||||
return artifacts;
|
||||
}
|
||||
|
||||
public async Task<int> CountAsync(
|
||||
string tenantId,
|
||||
string? artifactType,
|
||||
string? jobType,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildCountQuery(tenantId, artifactType, jobType);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt32(result);
|
||||
}
|
||||
|
||||
private static void AddArtifactParameters(NpgsqlCommand command, Artifact artifact)
|
||||
{
|
||||
command.Parameters.AddWithValue("artifact_id", artifact.ArtifactId);
|
||||
command.Parameters.AddWithValue("tenant_id", artifact.TenantId);
|
||||
command.Parameters.AddWithValue("job_id", artifact.JobId);
|
||||
command.Parameters.AddWithValue("run_id", (object?)artifact.RunId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("artifact_type", artifact.ArtifactType);
|
||||
command.Parameters.AddWithValue("uri", artifact.Uri);
|
||||
command.Parameters.AddWithValue("digest", artifact.Digest);
|
||||
command.Parameters.AddWithValue("mime_type", (object?)artifact.MimeType ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("size_bytes", (object?)artifact.SizeBytes ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_at", artifact.CreatedAt);
|
||||
command.Parameters.Add(new NpgsqlParameter("metadata", NpgsqlDbType.Jsonb)
|
||||
{
|
||||
Value = (object?)artifact.Metadata ?? DBNull.Value
|
||||
});
|
||||
}
|
||||
|
||||
private static Artifact MapArtifact(NpgsqlDataReader reader)
|
||||
{
|
||||
return new Artifact(
|
||||
ArtifactId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
JobId: reader.GetGuid(2),
|
||||
RunId: reader.IsDBNull(3) ? null : reader.GetGuid(3),
|
||||
ArtifactType: reader.GetString(4),
|
||||
Uri: reader.GetString(5),
|
||||
Digest: reader.GetString(6),
|
||||
MimeType: reader.IsDBNull(7) ? null : reader.GetString(7),
|
||||
SizeBytes: reader.IsDBNull(8) ? null : reader.GetInt64(8),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(9),
|
||||
Metadata: reader.IsDBNull(10) ? null : reader.GetString(10));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
string? artifactType,
|
||||
string? jobType,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectArtifactColumns} FROM artifacts a WHERE a.tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (!string.IsNullOrEmpty(artifactType))
|
||||
{
|
||||
sb.Append(" AND a.artifact_type = @artifact_type");
|
||||
parameters.Add(("artifact_type", artifactType));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(jobType))
|
||||
{
|
||||
sb.Append(" AND EXISTS (SELECT 1 FROM jobs j WHERE j.job_id = a.job_id AND j.tenant_id = a.tenant_id AND j.job_type = @job_type)");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
if (createdAfter.HasValue)
|
||||
{
|
||||
sb.Append(" AND a.created_at >= @created_after");
|
||||
parameters.Add(("created_after", createdAfter.Value));
|
||||
}
|
||||
|
||||
if (createdBefore.HasValue)
|
||||
{
|
||||
sb.Append(" AND a.created_at < @created_before");
|
||||
parameters.Add(("created_before", createdBefore.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY a.created_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
|
||||
string tenantId,
|
||||
string? artifactType,
|
||||
string? jobType)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append("SELECT COUNT(*) FROM artifacts a WHERE a.tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (!string.IsNullOrEmpty(artifactType))
|
||||
{
|
||||
sb.Append(" AND a.artifact_type = @artifact_type");
|
||||
parameters.Add(("artifact_type", artifactType));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(jobType))
|
||||
{
|
||||
sb.Append(" AND EXISTS (SELECT 1 FROM jobs j WHERE j.job_id = a.job_id AND j.tenant_id = a.tenant_id AND j.job_type = @job_type)");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when attempting to create a duplicate artifact.
|
||||
/// </summary>
|
||||
public sealed class DuplicateArtifactException : Exception
|
||||
{
|
||||
public Guid ArtifactId { get; }
|
||||
public string Digest { get; }
|
||||
|
||||
public DuplicateArtifactException(Guid artifactId, string digest, Exception innerException)
|
||||
: base($"Artifact with ID '{artifactId}' or digest '{digest}' already exists.", innerException)
|
||||
{
|
||||
ArtifactId = artifactId;
|
||||
Digest = digest;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,504 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of the audit repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresAuditRepository : IAuditRepository
|
||||
{
|
||||
private const string SelectAuditColumns = """
|
||||
entry_id, tenant_id, event_type, resource_type, resource_id, actor_id, actor_type,
|
||||
actor_ip, user_agent, http_method, request_path, old_state, new_state, description,
|
||||
correlation_id, previous_entry_hash, content_hash, sequence_number, occurred_at, metadata
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectAuditColumns}
|
||||
FROM audit_entries
|
||||
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
|
||||
""";
|
||||
|
||||
private const string InsertEntrySql = """
|
||||
INSERT INTO audit_entries (
|
||||
entry_id, tenant_id, event_type, resource_type, resource_id, actor_id, actor_type,
|
||||
actor_ip, user_agent, http_method, request_path, old_state, new_state, description,
|
||||
correlation_id, previous_entry_hash, content_hash, sequence_number, occurred_at, metadata)
|
||||
VALUES (
|
||||
@entry_id, @tenant_id, @event_type, @resource_type, @resource_id, @actor_id, @actor_type,
|
||||
@actor_ip, @user_agent, @http_method, @request_path, @old_state::jsonb, @new_state::jsonb, @description,
|
||||
@correlation_id, @previous_entry_hash, @content_hash, @sequence_number, @occurred_at, @metadata::jsonb)
|
||||
""";
|
||||
|
||||
private const string SelectLatestSql = $"""
|
||||
SELECT {SelectAuditColumns}
|
||||
FROM audit_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
ORDER BY sequence_number DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
private const string GetSequenceSql = """
|
||||
SELECT next_seq, prev_hash FROM next_audit_sequence(@tenant_id)
|
||||
""";
|
||||
|
||||
private const string UpdateSequenceHashSql = """
|
||||
SELECT update_audit_sequence_hash(@tenant_id, @content_hash)
|
||||
""";
|
||||
|
||||
private const string VerifyChainSql = """
|
||||
SELECT is_valid, invalid_entry_id, invalid_sequence, error_message
|
||||
FROM verify_audit_chain(@tenant_id, @start_seq, @end_seq)
|
||||
""";
|
||||
|
||||
private const string GetSummarySql = """
|
||||
SELECT total_entries, entries_since, event_types, unique_actors, unique_resources, earliest_entry, latest_entry
|
||||
FROM get_audit_summary(@tenant_id, @since)
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresAuditRepository> _logger;
|
||||
|
||||
public PostgresAuditRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresAuditRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<AuditEntry> AppendAsync(
|
||||
string tenantId,
|
||||
AuditEventType eventType,
|
||||
string resourceType,
|
||||
Guid resourceId,
|
||||
string actorId,
|
||||
ActorType actorType,
|
||||
string description,
|
||||
string? oldState = null,
|
||||
string? newState = null,
|
||||
string? actorIp = null,
|
||||
string? userAgent = null,
|
||||
string? httpMethod = null,
|
||||
string? requestPath = null,
|
||||
string? correlationId = null,
|
||||
string? metadata = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
// Get next sequence number and previous hash
|
||||
long sequenceNumber;
|
||||
string? previousEntryHash;
|
||||
|
||||
await using (var seqCommand = new NpgsqlCommand(GetSequenceSql, connection, transaction))
|
||||
{
|
||||
seqCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
seqCommand.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
|
||||
await using var reader = await seqCommand.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
throw new InvalidOperationException("Failed to get next audit sequence.");
|
||||
}
|
||||
|
||||
sequenceNumber = reader.GetInt64(0);
|
||||
previousEntryHash = reader.IsDBNull(1) ? null : reader.GetString(1);
|
||||
}
|
||||
|
||||
// Create the entry
|
||||
var entry = AuditEntry.Create(
|
||||
tenantId: tenantId,
|
||||
eventType: eventType,
|
||||
resourceType: resourceType,
|
||||
resourceId: resourceId,
|
||||
actorId: actorId,
|
||||
actorType: actorType,
|
||||
description: description,
|
||||
oldState: oldState,
|
||||
newState: newState,
|
||||
actorIp: actorIp,
|
||||
userAgent: userAgent,
|
||||
httpMethod: httpMethod,
|
||||
requestPath: requestPath,
|
||||
correlationId: correlationId,
|
||||
previousEntryHash: previousEntryHash,
|
||||
sequenceNumber: sequenceNumber,
|
||||
metadata: metadata);
|
||||
|
||||
// Insert the entry
|
||||
await using (var insertCommand = new NpgsqlCommand(InsertEntrySql, connection, transaction))
|
||||
{
|
||||
insertCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
AddEntryParameters(insertCommand, entry);
|
||||
await insertCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Update sequence hash
|
||||
await using (var updateCommand = new NpgsqlCommand(UpdateSequenceHashSql, connection, transaction))
|
||||
{
|
||||
updateCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
updateCommand.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
updateCommand.Parameters.AddWithValue("content_hash", entry.ContentHash);
|
||||
await updateCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
OrchestratorMetrics.AuditEntryCreated(tenantId, eventType.ToString(), resourceType);
|
||||
_logger.LogDebug("Audit entry {EntryId} appended for tenant {TenantId}, sequence {Sequence}",
|
||||
entry.EntryId, tenantId, sequenceNumber);
|
||||
|
||||
return entry;
|
||||
}
|
||||
catch
|
||||
{
|
||||
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<AuditEntry?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("entry_id", entryId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapEntry(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<AuditEntry>> ListAsync(
|
||||
string tenantId,
|
||||
AuditEventType? eventType = null,
|
||||
string? resourceType = null,
|
||||
Guid? resourceId = null,
|
||||
string? actorId = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, eventType, resourceType, resourceId, actorId, startTime, endTime, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<AuditEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<AuditEntry>> GetBySequenceRangeAsync(
|
||||
string tenantId,
|
||||
long startSequence,
|
||||
long endSequence,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectAuditColumns}
|
||||
FROM audit_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND sequence_number >= @start_seq
|
||||
AND sequence_number <= @end_seq
|
||||
ORDER BY sequence_number ASC
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("start_seq", startSequence);
|
||||
command.Parameters.AddWithValue("end_seq", endSequence);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<AuditEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<AuditEntry?> GetLatestAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectLatestSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapEntry(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<AuditEntry>> GetByResourceAsync(
|
||||
string tenantId,
|
||||
string resourceType,
|
||||
Guid resourceId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectAuditColumns}
|
||||
FROM audit_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND resource_type = @resource_type
|
||||
AND resource_id = @resource_id
|
||||
ORDER BY occurred_at DESC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("resource_type", resourceType);
|
||||
command.Parameters.AddWithValue("resource_id", resourceId);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<AuditEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<long> GetCountAsync(
|
||||
string tenantId,
|
||||
AuditEventType? eventType = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sb = new StringBuilder("SELECT COUNT(*) FROM audit_entries WHERE tenant_id = @tenant_id");
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (eventType.HasValue)
|
||||
{
|
||||
sb.Append(" AND event_type = @event_type");
|
||||
parameters.Add(("event_type", (int)eventType.Value));
|
||||
}
|
||||
|
||||
if (startTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND occurred_at >= @start_time");
|
||||
parameters.Add(("start_time", startTime.Value));
|
||||
}
|
||||
|
||||
if (endTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND occurred_at <= @end_time");
|
||||
parameters.Add(("end_time", endTime.Value));
|
||||
}
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sb.ToString(), connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt64(result);
|
||||
}
|
||||
|
||||
public async Task<ChainVerificationResult> VerifyChainAsync(
|
||||
string tenantId,
|
||||
long? startSequence = null,
|
||||
long? endSequence = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(VerifyChainSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("start_seq", (object?)startSequence ?? 1L);
|
||||
command.Parameters.AddWithValue("end_seq", (object?)endSequence ?? DBNull.Value);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return new ChainVerificationResult(true, null, null, null);
|
||||
}
|
||||
|
||||
return new ChainVerificationResult(
|
||||
IsValid: reader.GetBoolean(0),
|
||||
InvalidEntryId: reader.IsDBNull(1) ? null : reader.GetGuid(1),
|
||||
InvalidSequence: reader.IsDBNull(2) ? null : reader.GetInt64(2),
|
||||
ErrorMessage: reader.IsDBNull(3) ? null : reader.GetString(3));
|
||||
}
|
||||
|
||||
public async Task<AuditSummary> GetSummaryAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset? since = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(GetSummarySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("since", (object?)since ?? DBNull.Value);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return new AuditSummary(0, 0, 0, 0, 0, null, null);
|
||||
}
|
||||
|
||||
return new AuditSummary(
|
||||
TotalEntries: reader.GetInt64(0),
|
||||
EntriesSince: reader.GetInt64(1),
|
||||
EventTypes: reader.GetInt64(2),
|
||||
UniqueActors: reader.GetInt64(3),
|
||||
UniqueResources: reader.GetInt64(4),
|
||||
EarliestEntry: reader.IsDBNull(5) ? null : reader.GetFieldValue<DateTimeOffset>(5),
|
||||
LatestEntry: reader.IsDBNull(6) ? null : reader.GetFieldValue<DateTimeOffset>(6));
|
||||
}
|
||||
|
||||
private static void AddEntryParameters(NpgsqlCommand command, AuditEntry entry)
|
||||
{
|
||||
command.Parameters.AddWithValue("entry_id", entry.EntryId);
|
||||
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
|
||||
command.Parameters.AddWithValue("event_type", (int)entry.EventType);
|
||||
command.Parameters.AddWithValue("resource_type", entry.ResourceType);
|
||||
command.Parameters.AddWithValue("resource_id", entry.ResourceId);
|
||||
command.Parameters.AddWithValue("actor_id", entry.ActorId);
|
||||
command.Parameters.AddWithValue("actor_type", (int)entry.ActorType);
|
||||
command.Parameters.AddWithValue("actor_ip", (object?)entry.ActorIp ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("user_agent", (object?)entry.UserAgent ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("http_method", (object?)entry.HttpMethod ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("request_path", (object?)entry.RequestPath ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("old_state", (object?)entry.OldState ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("new_state", (object?)entry.NewState ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("description", entry.Description);
|
||||
command.Parameters.AddWithValue("correlation_id", (object?)entry.CorrelationId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("previous_entry_hash", (object?)entry.PreviousEntryHash ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("content_hash", entry.ContentHash);
|
||||
command.Parameters.AddWithValue("sequence_number", entry.SequenceNumber);
|
||||
command.Parameters.AddWithValue("occurred_at", entry.OccurredAt);
|
||||
command.Parameters.AddWithValue("metadata", (object?)entry.Metadata ?? DBNull.Value);
|
||||
}
|
||||
|
||||
private static AuditEntry MapEntry(NpgsqlDataReader reader)
|
||||
{
|
||||
return new AuditEntry(
|
||||
EntryId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
EventType: (AuditEventType)reader.GetInt32(2),
|
||||
ResourceType: reader.GetString(3),
|
||||
ResourceId: reader.GetGuid(4),
|
||||
ActorId: reader.GetString(5),
|
||||
ActorType: (ActorType)reader.GetInt32(6),
|
||||
ActorIp: reader.IsDBNull(7) ? null : reader.GetString(7),
|
||||
UserAgent: reader.IsDBNull(8) ? null : reader.GetString(8),
|
||||
HttpMethod: reader.IsDBNull(9) ? null : reader.GetString(9),
|
||||
RequestPath: reader.IsDBNull(10) ? null : reader.GetString(10),
|
||||
OldState: reader.IsDBNull(11) ? null : reader.GetString(11),
|
||||
NewState: reader.IsDBNull(12) ? null : reader.GetString(12),
|
||||
Description: reader.GetString(13),
|
||||
CorrelationId: reader.IsDBNull(14) ? null : reader.GetString(14),
|
||||
PreviousEntryHash: reader.IsDBNull(15) ? null : reader.GetString(15),
|
||||
ContentHash: reader.GetString(16),
|
||||
SequenceNumber: reader.GetInt64(17),
|
||||
OccurredAt: reader.GetFieldValue<DateTimeOffset>(18),
|
||||
Metadata: reader.IsDBNull(19) ? null : reader.GetString(19));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
AuditEventType? eventType,
|
||||
string? resourceType,
|
||||
Guid? resourceId,
|
||||
string? actorId,
|
||||
DateTimeOffset? startTime,
|
||||
DateTimeOffset? endTime,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectAuditColumns} FROM audit_entries WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (eventType.HasValue)
|
||||
{
|
||||
sb.Append(" AND event_type = @event_type");
|
||||
parameters.Add(("event_type", (int)eventType.Value));
|
||||
}
|
||||
|
||||
if (resourceType is not null)
|
||||
{
|
||||
sb.Append(" AND resource_type = @resource_type");
|
||||
parameters.Add(("resource_type", resourceType));
|
||||
}
|
||||
|
||||
if (resourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND resource_id = @resource_id");
|
||||
parameters.Add(("resource_id", resourceId.Value));
|
||||
}
|
||||
|
||||
if (actorId is not null)
|
||||
{
|
||||
sb.Append(" AND actor_id = @actor_id");
|
||||
parameters.Add(("actor_id", actorId));
|
||||
}
|
||||
|
||||
if (startTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND occurred_at >= @start_time");
|
||||
parameters.Add(("start_time", startTime.Value));
|
||||
}
|
||||
|
||||
if (endTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND occurred_at <= @end_time");
|
||||
parameters.Add(("end_time", endTime.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY occurred_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,395 @@
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using NpgsqlTypes;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of backfill request repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresBackfillRepository : IBackfillRepository
|
||||
{
|
||||
private const string SelectBackfillColumns = """
|
||||
backfill_id, tenant_id, source_id, job_type, scope_key, status,
|
||||
window_start, window_end, current_position, total_events,
|
||||
processed_events, skipped_events, failed_events, batch_size,
|
||||
dry_run, force_reprocess, estimated_duration, max_duration,
|
||||
safety_checks, reason, ticket, created_at, started_at, completed_at,
|
||||
created_by, updated_by, error_message
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectBackfillColumns}
|
||||
FROM backfill_requests
|
||||
WHERE tenant_id = @tenant_id AND backfill_id = @backfill_id
|
||||
""";
|
||||
|
||||
private const string InsertBackfillSql = """
|
||||
INSERT INTO backfill_requests (
|
||||
backfill_id, tenant_id, source_id, job_type, scope_key, status,
|
||||
window_start, window_end, current_position, total_events,
|
||||
processed_events, skipped_events, failed_events, batch_size,
|
||||
dry_run, force_reprocess, estimated_duration, max_duration,
|
||||
safety_checks, reason, ticket, created_at, started_at, completed_at,
|
||||
created_by, updated_by, error_message)
|
||||
VALUES (
|
||||
@backfill_id, @tenant_id, @source_id, @job_type, @scope_key, @status,
|
||||
@window_start, @window_end, @current_position, @total_events,
|
||||
@processed_events, @skipped_events, @failed_events, @batch_size,
|
||||
@dry_run, @force_reprocess, @estimated_duration, @max_duration,
|
||||
@safety_checks, @reason, @ticket, @created_at, @started_at, @completed_at,
|
||||
@created_by, @updated_by, @error_message)
|
||||
""";
|
||||
|
||||
private const string UpdateBackfillSql = """
|
||||
UPDATE backfill_requests
|
||||
SET status = @status,
|
||||
current_position = @current_position,
|
||||
total_events = @total_events,
|
||||
processed_events = @processed_events,
|
||||
skipped_events = @skipped_events,
|
||||
failed_events = @failed_events,
|
||||
estimated_duration = @estimated_duration,
|
||||
safety_checks = @safety_checks,
|
||||
started_at = @started_at,
|
||||
completed_at = @completed_at,
|
||||
updated_by = @updated_by,
|
||||
error_message = @error_message
|
||||
WHERE tenant_id = @tenant_id AND backfill_id = @backfill_id
|
||||
""";
|
||||
|
||||
private const string SelectOverlappingSql = """
|
||||
SELECT COUNT(*) FROM backfill_requests
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND scope_key = @scope_key
|
||||
AND status IN ('pending', 'validating', 'running', 'paused')
|
||||
AND window_start < @window_end
|
||||
AND window_end > @window_start
|
||||
AND (@exclude_backfill_id IS NULL OR backfill_id != @exclude_backfill_id)
|
||||
""";
|
||||
|
||||
private const string SelectActiveByScopeSql = $"""
|
||||
SELECT {SelectBackfillColumns}
|
||||
FROM backfill_requests
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND scope_key = @scope_key
|
||||
AND status IN ('pending', 'validating', 'running', 'paused')
|
||||
ORDER BY created_at DESC
|
||||
""";
|
||||
|
||||
private const string CountByStatusSql = """
|
||||
SELECT status, COUNT(*) as count
|
||||
FROM backfill_requests
|
||||
WHERE tenant_id = @tenant_id
|
||||
GROUP BY status
|
||||
""";
|
||||
|
||||
private const string SelectNextPendingSql = $"""
|
||||
SELECT {SelectBackfillColumns}
|
||||
FROM backfill_requests
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND status = 'pending'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresBackfillRepository> _logger;
|
||||
private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
|
||||
|
||||
public PostgresBackfillRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresBackfillRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest?> GetByIdAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("backfill_id", backfillId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapBackfillRequest(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(BackfillRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(request.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertBackfillSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddBackfillParameters(command, request);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.BackfillCreated(request.TenantId, request.ScopeKey);
|
||||
}
|
||||
|
||||
public async Task UpdateAsync(BackfillRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(request.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateBackfillSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", request.TenantId);
|
||||
command.Parameters.AddWithValue("backfill_id", request.BackfillId);
|
||||
command.Parameters.AddWithValue("status", request.Status.ToString().ToLowerInvariant());
|
||||
command.Parameters.AddWithValue("current_position", (object?)request.CurrentPosition ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("total_events", (object?)request.TotalEvents ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("processed_events", request.ProcessedEvents);
|
||||
command.Parameters.AddWithValue("skipped_events", request.SkippedEvents);
|
||||
command.Parameters.AddWithValue("failed_events", request.FailedEvents);
|
||||
command.Parameters.AddWithValue("estimated_duration", (object?)request.EstimatedDuration ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("safety_checks", request.SafetyChecks is not null
|
||||
? JsonSerializer.Serialize(request.SafetyChecks, JsonOptions)
|
||||
: DBNull.Value);
|
||||
command.Parameters.AddWithValue("started_at", (object?)request.StartedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)request.CompletedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("updated_by", request.UpdatedBy);
|
||||
command.Parameters.AddWithValue("error_message", (object?)request.ErrorMessage ?? DBNull.Value);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows == 0)
|
||||
{
|
||||
_logger.LogWarning("Backfill request not found for update: {BackfillId}", request.BackfillId);
|
||||
}
|
||||
else
|
||||
{
|
||||
OrchestratorMetrics.BackfillStatusChanged(request.TenantId, request.ScopeKey, request.Status.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<BackfillRequest>> ListAsync(
|
||||
string tenantId,
|
||||
BackfillStatus? status,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, status, sourceId, jobType, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var requests = new List<BackfillRequest>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
requests.Add(MapBackfillRequest(reader));
|
||||
}
|
||||
return requests;
|
||||
}
|
||||
|
||||
public async Task<bool> HasOverlappingActiveAsync(
|
||||
string tenantId,
|
||||
string scopeKey,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
Guid? excludeBackfillId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectOverlappingSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
command.Parameters.AddWithValue("window_start", windowStart);
|
||||
command.Parameters.AddWithValue("window_end", windowEnd);
|
||||
command.Parameters.AddWithValue("exclude_backfill_id", (object?)excludeBackfillId ?? DBNull.Value);
|
||||
|
||||
var count = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt64(count) > 0;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<BackfillRequest>> GetActiveByScope(
|
||||
string tenantId,
|
||||
string scopeKey,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectActiveByScopeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var requests = new List<BackfillRequest>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
requests.Add(MapBackfillRequest(reader));
|
||||
}
|
||||
return requests;
|
||||
}
|
||||
|
||||
public async Task<IDictionary<BackfillStatus, int>> CountByStatusAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(CountByStatusSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var counts = new Dictionary<BackfillStatus, int>();
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var statusStr = reader.GetString(0);
|
||||
var count = reader.GetInt32(1);
|
||||
if (Enum.TryParse<BackfillStatus>(statusStr, true, out var status))
|
||||
{
|
||||
counts[status] = count;
|
||||
}
|
||||
}
|
||||
|
||||
return counts;
|
||||
}
|
||||
|
||||
public async Task<BackfillRequest?> GetNextPendingAsync(string tenantId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectNextPendingSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapBackfillRequest(reader);
|
||||
}
|
||||
|
||||
private static void AddBackfillParameters(NpgsqlCommand command, BackfillRequest request)
|
||||
{
|
||||
command.Parameters.AddWithValue("backfill_id", request.BackfillId);
|
||||
command.Parameters.AddWithValue("tenant_id", request.TenantId);
|
||||
command.Parameters.AddWithValue("source_id", (object?)request.SourceId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("job_type", (object?)request.JobType ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("scope_key", request.ScopeKey);
|
||||
command.Parameters.AddWithValue("status", request.Status.ToString().ToLowerInvariant());
|
||||
command.Parameters.AddWithValue("window_start", request.WindowStart);
|
||||
command.Parameters.AddWithValue("window_end", request.WindowEnd);
|
||||
command.Parameters.AddWithValue("current_position", (object?)request.CurrentPosition ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("total_events", (object?)request.TotalEvents ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("processed_events", request.ProcessedEvents);
|
||||
command.Parameters.AddWithValue("skipped_events", request.SkippedEvents);
|
||||
command.Parameters.AddWithValue("failed_events", request.FailedEvents);
|
||||
command.Parameters.AddWithValue("batch_size", request.BatchSize);
|
||||
command.Parameters.AddWithValue("dry_run", request.DryRun);
|
||||
command.Parameters.AddWithValue("force_reprocess", request.ForceReprocess);
|
||||
command.Parameters.AddWithValue("estimated_duration", (object?)request.EstimatedDuration ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("max_duration", (object?)request.MaxDuration ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("safety_checks", request.SafetyChecks is not null
|
||||
? JsonSerializer.Serialize(request.SafetyChecks, JsonOptions)
|
||||
: DBNull.Value);
|
||||
command.Parameters.AddWithValue("reason", request.Reason);
|
||||
command.Parameters.AddWithValue("ticket", (object?)request.Ticket ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_at", request.CreatedAt);
|
||||
command.Parameters.AddWithValue("started_at", (object?)request.StartedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)request.CompletedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_by", request.CreatedBy);
|
||||
command.Parameters.AddWithValue("updated_by", request.UpdatedBy);
|
||||
command.Parameters.AddWithValue("error_message", (object?)request.ErrorMessage ?? DBNull.Value);
|
||||
}
|
||||
|
||||
private static BackfillRequest MapBackfillRequest(NpgsqlDataReader reader)
|
||||
{
|
||||
var safetyChecksJson = reader.IsDBNull(18) ? null : reader.GetString(18);
|
||||
var safetyChecks = safetyChecksJson is not null
|
||||
? JsonSerializer.Deserialize<BackfillSafetyChecks>(safetyChecksJson, JsonOptions)
|
||||
: null;
|
||||
|
||||
return new BackfillRequest(
|
||||
BackfillId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
SourceId: reader.IsDBNull(2) ? null : reader.GetGuid(2),
|
||||
JobType: reader.IsDBNull(3) ? null : reader.GetString(3),
|
||||
ScopeKey: reader.GetString(4),
|
||||
Status: Enum.Parse<BackfillStatus>(reader.GetString(5), ignoreCase: true),
|
||||
WindowStart: reader.GetFieldValue<DateTimeOffset>(6),
|
||||
WindowEnd: reader.GetFieldValue<DateTimeOffset>(7),
|
||||
CurrentPosition: reader.IsDBNull(8) ? null : reader.GetFieldValue<DateTimeOffset>(8),
|
||||
TotalEvents: reader.IsDBNull(9) ? null : reader.GetInt64(9),
|
||||
ProcessedEvents: reader.GetInt64(10),
|
||||
SkippedEvents: reader.GetInt64(11),
|
||||
FailedEvents: reader.GetInt64(12),
|
||||
BatchSize: reader.GetInt32(13),
|
||||
DryRun: reader.GetBoolean(14),
|
||||
ForceReprocess: reader.GetBoolean(15),
|
||||
EstimatedDuration: reader.IsDBNull(16) ? null : reader.GetFieldValue<TimeSpan>(16),
|
||||
MaxDuration: reader.IsDBNull(17) ? null : reader.GetFieldValue<TimeSpan>(17),
|
||||
SafetyChecks: safetyChecks,
|
||||
Reason: reader.GetString(19),
|
||||
Ticket: reader.IsDBNull(20) ? null : reader.GetString(20),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(21),
|
||||
StartedAt: reader.IsDBNull(22) ? null : reader.GetFieldValue<DateTimeOffset>(22),
|
||||
CompletedAt: reader.IsDBNull(23) ? null : reader.GetFieldValue<DateTimeOffset>(23),
|
||||
CreatedBy: reader.GetString(24),
|
||||
UpdatedBy: reader.GetString(25),
|
||||
ErrorMessage: reader.IsDBNull(26) ? null : reader.GetString(26));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
BackfillStatus? status,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectBackfillColumns} FROM backfill_requests WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (status.HasValue)
|
||||
{
|
||||
sb.Append(" AND status = @status");
|
||||
parameters.Add(("status", status.Value.ToString().ToLowerInvariant()));
|
||||
}
|
||||
|
||||
if (sourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", sourceId.Value));
|
||||
}
|
||||
|
||||
if (jobType is not null)
|
||||
{
|
||||
sb.Append(" AND job_type = @job_type");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,678 @@
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.DeadLetter;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of dead-letter entry repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresDeadLetterRepository : IDeadLetterRepository
|
||||
{
|
||||
private const string SelectEntryColumns = """
|
||||
entry_id, tenant_id, original_job_id, run_id, source_id, job_type,
|
||||
payload, payload_digest, idempotency_key, correlation_id,
|
||||
status, error_code, failure_reason, remediation_hint, category, is_retryable,
|
||||
original_attempts, replay_attempts, max_replay_attempts,
|
||||
failed_at, created_at, updated_at, expires_at, resolved_at,
|
||||
resolution_notes, created_by, updated_by
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectEntryColumns}
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
|
||||
""";
|
||||
|
||||
private const string SelectByJobIdSql = $"""
|
||||
SELECT {SelectEntryColumns}
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id AND original_job_id = @original_job_id
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
private const string InsertEntrySql = """
|
||||
INSERT INTO dead_letter_entries (
|
||||
entry_id, tenant_id, original_job_id, run_id, source_id, job_type,
|
||||
payload, payload_digest, idempotency_key, correlation_id,
|
||||
status, error_code, failure_reason, remediation_hint, category, is_retryable,
|
||||
original_attempts, replay_attempts, max_replay_attempts,
|
||||
failed_at, created_at, updated_at, expires_at, resolved_at,
|
||||
resolution_notes, created_by, updated_by)
|
||||
VALUES (
|
||||
@entry_id, @tenant_id, @original_job_id, @run_id, @source_id, @job_type,
|
||||
@payload::jsonb, @payload_digest, @idempotency_key, @correlation_id,
|
||||
@status, @error_code, @failure_reason, @remediation_hint, @category, @is_retryable,
|
||||
@original_attempts, @replay_attempts, @max_replay_attempts,
|
||||
@failed_at, @created_at, @updated_at, @expires_at, @resolved_at,
|
||||
@resolution_notes, @created_by, @updated_by)
|
||||
""";
|
||||
|
||||
private const string UpdateEntrySql = """
|
||||
UPDATE dead_letter_entries
|
||||
SET status = @status,
|
||||
replay_attempts = @replay_attempts,
|
||||
failure_reason = @failure_reason,
|
||||
updated_at = @updated_at,
|
||||
resolved_at = @resolved_at,
|
||||
resolution_notes = @resolution_notes,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
|
||||
""";
|
||||
|
||||
private const string SelectPendingRetryableSql = $"""
|
||||
SELECT {SelectEntryColumns}
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND status = 'pending'
|
||||
AND is_retryable = TRUE
|
||||
AND replay_attempts < max_replay_attempts
|
||||
ORDER BY created_at ASC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
private const string SelectByErrorCodeSql = $"""
|
||||
SELECT {SelectEntryColumns}
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND error_code = @error_code
|
||||
AND (@status IS NULL OR status = @status)
|
||||
ORDER BY created_at DESC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
private const string SelectByCategorySql = $"""
|
||||
SELECT {SelectEntryColumns}
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND category = @category
|
||||
AND (@status IS NULL OR status = @status)
|
||||
ORDER BY created_at DESC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
private const string MarkExpiredSql = """
|
||||
SELECT mark_expired_dead_letter_entries(@batch_limit)
|
||||
""";
|
||||
|
||||
private const string PurgeSql = """
|
||||
SELECT purge_dead_letter_entries(@retention_days, @batch_limit)
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresDeadLetterRepository> _logger;
|
||||
private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
|
||||
|
||||
public PostgresDeadLetterRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresDeadLetterRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<DeadLetterEntry?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("entry_id", entryId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapEntry(reader);
|
||||
}
|
||||
|
||||
public async Task<DeadLetterEntry?> GetByOriginalJobIdAsync(
|
||||
string tenantId,
|
||||
Guid originalJobId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByJobIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("original_job_id", originalJobId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapEntry(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
|
||||
string tenantId,
|
||||
DeadLetterListOptions options,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, options);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<DeadLetterEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<long> CountAsync(
|
||||
string tenantId,
|
||||
DeadLetterListOptions options,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildCountQuery(tenantId, options);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt64(result);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(entry.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertEntrySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddEntryParameters(command, entry);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.DeadLetterCreated(entry.TenantId, entry.JobType, entry.ErrorCode, entry.Category.ToString());
|
||||
}
|
||||
|
||||
public async Task<bool> UpdateAsync(
|
||||
DeadLetterEntry entry,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(entry.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateEntrySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
|
||||
command.Parameters.AddWithValue("entry_id", entry.EntryId);
|
||||
command.Parameters.AddWithValue("status", entry.Status.ToString().ToLowerInvariant());
|
||||
command.Parameters.AddWithValue("replay_attempts", entry.ReplayAttempts);
|
||||
command.Parameters.AddWithValue("failure_reason", entry.FailureReason);
|
||||
command.Parameters.AddWithValue("updated_at", entry.UpdatedAt);
|
||||
command.Parameters.AddWithValue("resolved_at", (object?)entry.ResolvedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("resolution_notes", (object?)entry.ResolutionNotes ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("updated_by", entry.UpdatedBy);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
OrchestratorMetrics.DeadLetterStatusChanged(entry.TenantId, entry.JobType, entry.Status.ToString());
|
||||
}
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DeadLetterEntry>> GetPendingRetryableAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectPendingRetryableSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<DeadLetterEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DeadLetterEntry>> GetByErrorCodeAsync(
|
||||
string tenantId,
|
||||
string errorCode,
|
||||
DeadLetterStatus? status,
|
||||
int limit,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByErrorCodeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("error_code", errorCode);
|
||||
command.Parameters.AddWithValue("status", status.HasValue ? status.Value.ToString().ToLowerInvariant() : DBNull.Value);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<DeadLetterEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DeadLetterEntry>> GetByCategoryAsync(
|
||||
string tenantId,
|
||||
ErrorCategory category,
|
||||
DeadLetterStatus? status,
|
||||
int limit,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByCategorySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("category", category.ToString().ToLowerInvariant());
|
||||
command.Parameters.AddWithValue("status", status.HasValue ? status.Value.ToString().ToLowerInvariant() : DBNull.Value);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<DeadLetterEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<DeadLetterStats> GetStatsAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
const string statsSql = """
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
COUNT(*) FILTER (WHERE status = 'pending') AS pending,
|
||||
COUNT(*) FILTER (WHERE status = 'replaying') AS replaying,
|
||||
COUNT(*) FILTER (WHERE status = 'replayed') AS replayed,
|
||||
COUNT(*) FILTER (WHERE status = 'resolved') AS resolved,
|
||||
COUNT(*) FILTER (WHERE status = 'exhausted') AS exhausted,
|
||||
COUNT(*) FILTER (WHERE status = 'expired') AS expired,
|
||||
COUNT(*) FILTER (WHERE is_retryable = TRUE AND status = 'pending') AS retryable
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
""";
|
||||
|
||||
const string byCategorySql = """
|
||||
SELECT category, COUNT(*) as cnt
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
GROUP BY category
|
||||
""";
|
||||
|
||||
const string topErrorCodesSql = """
|
||||
SELECT error_code, COUNT(*) as cnt
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id AND status = 'pending'
|
||||
GROUP BY error_code
|
||||
ORDER BY cnt DESC
|
||||
LIMIT 10
|
||||
""";
|
||||
|
||||
const string topJobTypesSql = """
|
||||
SELECT job_type, COUNT(*) as cnt
|
||||
FROM dead_letter_entries
|
||||
WHERE tenant_id = @tenant_id AND status = 'pending'
|
||||
GROUP BY job_type
|
||||
ORDER BY cnt DESC
|
||||
LIMIT 10
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Get counts
|
||||
long total = 0, pending = 0, replaying = 0, replayed = 0, resolved = 0, exhausted = 0, expired = 0, retryable = 0;
|
||||
await using (var command = new NpgsqlCommand(statsSql, connection))
|
||||
{
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
total = reader.GetInt64(0);
|
||||
pending = reader.GetInt64(1);
|
||||
replaying = reader.GetInt64(2);
|
||||
replayed = reader.GetInt64(3);
|
||||
resolved = reader.GetInt64(4);
|
||||
exhausted = reader.GetInt64(5);
|
||||
expired = reader.GetInt64(6);
|
||||
retryable = reader.GetInt64(7);
|
||||
}
|
||||
}
|
||||
|
||||
// Get by category
|
||||
var byCategory = new Dictionary<ErrorCategory, long>();
|
||||
await using (var command = new NpgsqlCommand(byCategorySql, connection))
|
||||
{
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
if (Enum.TryParse<ErrorCategory>(reader.GetString(0), true, out var cat))
|
||||
{
|
||||
byCategory[cat] = reader.GetInt64(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get top error codes
|
||||
var topErrorCodes = new Dictionary<string, long>();
|
||||
await using (var command = new NpgsqlCommand(topErrorCodesSql, connection))
|
||||
{
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
topErrorCodes[reader.GetString(0)] = reader.GetInt64(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Get top job types
|
||||
var topJobTypes = new Dictionary<string, long>();
|
||||
await using (var command = new NpgsqlCommand(topJobTypesSql, connection))
|
||||
{
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
topJobTypes[reader.GetString(0)] = reader.GetInt64(1);
|
||||
}
|
||||
}
|
||||
|
||||
return new DeadLetterStats(
|
||||
TotalEntries: total,
|
||||
PendingEntries: pending,
|
||||
ReplayingEntries: replaying,
|
||||
ReplayedEntries: replayed,
|
||||
ResolvedEntries: resolved,
|
||||
ExhaustedEntries: exhausted,
|
||||
ExpiredEntries: expired,
|
||||
RetryableEntries: retryable,
|
||||
ByCategory: byCategory,
|
||||
TopErrorCodes: topErrorCodes,
|
||||
TopJobTypes: topJobTypes);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DeadLetterSummary>> GetActionableSummaryAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT error_code, category, entry_count, retryable_count, oldest_entry, sample_reason
|
||||
FROM get_actionable_dead_letter_summary(@tenant_id, @limit)
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var summaries = new List<DeadLetterSummary>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var categoryStr = reader.GetString(1);
|
||||
var category = Enum.TryParse<ErrorCategory>(categoryStr, true, out var cat) ? cat : ErrorCategory.Unknown;
|
||||
|
||||
summaries.Add(new DeadLetterSummary(
|
||||
ErrorCode: reader.GetString(0),
|
||||
Category: category,
|
||||
EntryCount: reader.GetInt64(2),
|
||||
RetryableCount: reader.GetInt64(3),
|
||||
OldestEntry: reader.GetFieldValue<DateTimeOffset>(4),
|
||||
SampleReason: reader.IsDBNull(5) ? null : reader.GetString(5)));
|
||||
}
|
||||
return summaries;
|
||||
}
|
||||
|
||||
public async Task<int> MarkExpiredAsync(
|
||||
int batchLimit,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// Use a system-level connection (no tenant context needed for maintenance)
|
||||
await using var connection = await _dataSource.OpenConnectionAsync("system", "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(MarkExpiredSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("batch_limit", batchLimit);
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
var marked = Convert.ToInt32(result);
|
||||
|
||||
if (marked > 0)
|
||||
{
|
||||
OrchestratorMetrics.DeadLetterExpired(marked);
|
||||
_logger.LogInformation("Marked {Count} dead-letter entries as expired", marked);
|
||||
}
|
||||
|
||||
return marked;
|
||||
}
|
||||
|
||||
public async Task<int> PurgeOldEntriesAsync(
|
||||
int retentionDays,
|
||||
int batchLimit,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync("system", "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(PurgeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("retention_days", retentionDays);
|
||||
command.Parameters.AddWithValue("batch_limit", batchLimit);
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
var purged = Convert.ToInt32(result);
|
||||
|
||||
if (purged > 0)
|
||||
{
|
||||
OrchestratorMetrics.DeadLetterPurged(purged);
|
||||
_logger.LogInformation("Purged {Count} old dead-letter entries (retention: {RetentionDays} days)", purged, retentionDays);
|
||||
}
|
||||
|
||||
return purged;
|
||||
}
|
||||
|
||||
private static void AddEntryParameters(NpgsqlCommand command, DeadLetterEntry entry)
|
||||
{
|
||||
command.Parameters.AddWithValue("entry_id", entry.EntryId);
|
||||
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
|
||||
command.Parameters.AddWithValue("original_job_id", entry.OriginalJobId);
|
||||
command.Parameters.AddWithValue("run_id", (object?)entry.RunId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("source_id", (object?)entry.SourceId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("job_type", entry.JobType);
|
||||
command.Parameters.AddWithValue("payload", entry.Payload);
|
||||
command.Parameters.AddWithValue("payload_digest", entry.PayloadDigest);
|
||||
command.Parameters.AddWithValue("idempotency_key", entry.IdempotencyKey);
|
||||
command.Parameters.AddWithValue("correlation_id", (object?)entry.CorrelationId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("status", entry.Status.ToString().ToLowerInvariant());
|
||||
command.Parameters.AddWithValue("error_code", entry.ErrorCode);
|
||||
command.Parameters.AddWithValue("failure_reason", entry.FailureReason);
|
||||
command.Parameters.AddWithValue("remediation_hint", (object?)entry.RemediationHint ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("category", entry.Category.ToString().ToLowerInvariant());
|
||||
command.Parameters.AddWithValue("is_retryable", entry.IsRetryable);
|
||||
command.Parameters.AddWithValue("original_attempts", entry.OriginalAttempts);
|
||||
command.Parameters.AddWithValue("replay_attempts", entry.ReplayAttempts);
|
||||
command.Parameters.AddWithValue("max_replay_attempts", entry.MaxReplayAttempts);
|
||||
command.Parameters.AddWithValue("failed_at", entry.FailedAt);
|
||||
command.Parameters.AddWithValue("created_at", entry.CreatedAt);
|
||||
command.Parameters.AddWithValue("updated_at", entry.UpdatedAt);
|
||||
command.Parameters.AddWithValue("expires_at", entry.ExpiresAt);
|
||||
command.Parameters.AddWithValue("resolved_at", (object?)entry.ResolvedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("resolution_notes", (object?)entry.ResolutionNotes ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_by", entry.CreatedBy);
|
||||
command.Parameters.AddWithValue("updated_by", entry.UpdatedBy);
|
||||
}
|
||||
|
||||
private static DeadLetterEntry MapEntry(NpgsqlDataReader reader)
|
||||
{
|
||||
var statusStr = reader.GetString(10);
|
||||
var categoryStr = reader.GetString(14);
|
||||
|
||||
return new DeadLetterEntry(
|
||||
EntryId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
OriginalJobId: reader.GetGuid(2),
|
||||
RunId: reader.IsDBNull(3) ? null : reader.GetGuid(3),
|
||||
SourceId: reader.IsDBNull(4) ? null : reader.GetGuid(4),
|
||||
JobType: reader.GetString(5),
|
||||
Payload: reader.GetString(6),
|
||||
PayloadDigest: reader.GetString(7),
|
||||
IdempotencyKey: reader.GetString(8),
|
||||
CorrelationId: reader.IsDBNull(9) ? null : reader.GetString(9),
|
||||
Status: Enum.TryParse<DeadLetterStatus>(statusStr, true, out var status) ? status : DeadLetterStatus.Pending,
|
||||
ErrorCode: reader.GetString(11),
|
||||
FailureReason: reader.GetString(12),
|
||||
RemediationHint: reader.IsDBNull(13) ? null : reader.GetString(13),
|
||||
Category: Enum.TryParse<ErrorCategory>(categoryStr, true, out var cat) ? cat : ErrorCategory.Unknown,
|
||||
IsRetryable: reader.GetBoolean(15),
|
||||
OriginalAttempts: reader.GetInt32(16),
|
||||
ReplayAttempts: reader.GetInt32(17),
|
||||
MaxReplayAttempts: reader.GetInt32(18),
|
||||
FailedAt: reader.GetFieldValue<DateTimeOffset>(19),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(20),
|
||||
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(21),
|
||||
ExpiresAt: reader.GetFieldValue<DateTimeOffset>(22),
|
||||
ResolvedAt: reader.IsDBNull(23) ? null : reader.GetFieldValue<DateTimeOffset>(23),
|
||||
ResolutionNotes: reader.IsDBNull(24) ? null : reader.GetString(24),
|
||||
CreatedBy: reader.GetString(25),
|
||||
UpdatedBy: reader.GetString(26));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
DeadLetterListOptions options)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectEntryColumns} FROM dead_letter_entries WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
AppendFilters(sb, parameters, options);
|
||||
|
||||
var order = options.Ascending ? "ASC" : "DESC";
|
||||
sb.Append($" ORDER BY created_at {order}");
|
||||
|
||||
if (!string.IsNullOrEmpty(options.Cursor))
|
||||
{
|
||||
// Cursor is the created_at timestamp
|
||||
var op = options.Ascending ? ">" : "<";
|
||||
sb.Append($" AND created_at {op} @cursor");
|
||||
if (DateTimeOffset.TryParse(options.Cursor, out var cursor))
|
||||
{
|
||||
parameters.Add(("cursor", cursor));
|
||||
}
|
||||
}
|
||||
|
||||
sb.Append(" LIMIT @limit");
|
||||
parameters.Add(("limit", options.Limit));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
|
||||
string tenantId,
|
||||
DeadLetterListOptions options)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append("SELECT COUNT(*) FROM dead_letter_entries WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
AppendFilters(sb, parameters, options);
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
|
||||
private static void AppendFilters(StringBuilder sb, List<(string, object)> parameters, DeadLetterListOptions options)
|
||||
{
|
||||
if (options.Status.HasValue)
|
||||
{
|
||||
sb.Append(" AND status = @status");
|
||||
parameters.Add(("status", options.Status.Value.ToString().ToLowerInvariant()));
|
||||
}
|
||||
|
||||
if (options.Category.HasValue)
|
||||
{
|
||||
sb.Append(" AND category = @category");
|
||||
parameters.Add(("category", options.Category.Value.ToString().ToLowerInvariant()));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(options.JobType))
|
||||
{
|
||||
sb.Append(" AND job_type = @job_type");
|
||||
parameters.Add(("job_type", options.JobType));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(options.ErrorCode))
|
||||
{
|
||||
sb.Append(" AND error_code = @error_code");
|
||||
parameters.Add(("error_code", options.ErrorCode));
|
||||
}
|
||||
|
||||
if (options.SourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", options.SourceId.Value));
|
||||
}
|
||||
|
||||
if (options.RunId.HasValue)
|
||||
{
|
||||
sb.Append(" AND run_id = @run_id");
|
||||
parameters.Add(("run_id", options.RunId.Value));
|
||||
}
|
||||
|
||||
if (options.IsRetryable.HasValue)
|
||||
{
|
||||
sb.Append(" AND is_retryable = @is_retryable");
|
||||
parameters.Add(("is_retryable", options.IsRetryable.Value));
|
||||
}
|
||||
|
||||
if (options.CreatedAfter.HasValue)
|
||||
{
|
||||
sb.Append(" AND created_at >= @created_after");
|
||||
parameters.Add(("created_after", options.CreatedAfter.Value));
|
||||
}
|
||||
|
||||
if (options.CreatedBefore.HasValue)
|
||||
{
|
||||
sb.Append(" AND created_at <= @created_before");
|
||||
parameters.Add(("created_before", options.CreatedBefore.Value));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,247 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.Backfill;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of duplicate suppressor.
|
||||
/// </summary>
|
||||
public sealed class PostgresDuplicateSuppressor : IDuplicateSuppressor
|
||||
{
|
||||
private const string SelectProcessedSql = """
|
||||
SELECT 1 FROM processed_events
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND scope_key = @scope_key
|
||||
AND event_key = @event_key
|
||||
AND expires_at > NOW()
|
||||
""";
|
||||
|
||||
private const string SelectMultipleProcessedSql = """
|
||||
SELECT event_key FROM processed_events
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND scope_key = @scope_key
|
||||
AND event_key = ANY(@event_keys)
|
||||
AND expires_at > NOW()
|
||||
""";
|
||||
|
||||
private const string UpsertProcessedSql = """
|
||||
INSERT INTO processed_events (tenant_id, scope_key, event_key, event_time, processed_at, batch_id, expires_at)
|
||||
VALUES (@tenant_id, @scope_key, @event_key, @event_time, NOW(), @batch_id, @expires_at)
|
||||
ON CONFLICT (tenant_id, scope_key, event_key) DO UPDATE
|
||||
SET event_time = EXCLUDED.event_time,
|
||||
processed_at = NOW(),
|
||||
batch_id = EXCLUDED.batch_id,
|
||||
expires_at = EXCLUDED.expires_at
|
||||
""";
|
||||
|
||||
private const string CountProcessedSql = """
|
||||
SELECT COUNT(*) FROM processed_events
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND scope_key = @scope_key
|
||||
AND event_time >= @from
|
||||
AND event_time < @to
|
||||
AND expires_at > NOW()
|
||||
""";
|
||||
|
||||
private const string CleanupExpiredSql = """
|
||||
DELETE FROM processed_events
|
||||
WHERE ctid IN (
|
||||
SELECT ctid FROM processed_events
|
||||
WHERE expires_at < NOW()
|
||||
LIMIT @batch_limit
|
||||
)
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly string _tenantId;
|
||||
private readonly ILogger<PostgresDuplicateSuppressor> _logger;
|
||||
|
||||
public PostgresDuplicateSuppressor(
|
||||
OrchestratorDataSource dataSource,
|
||||
string tenantId,
|
||||
ILogger<PostgresDuplicateSuppressor> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_tenantId = tenantId ?? throw new ArgumentNullException(nameof(tenantId));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<bool> HasProcessedAsync(string scopeKey, string eventKey, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectProcessedSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", _tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
command.Parameters.AddWithValue("event_key", eventKey);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlySet<string>> GetProcessedAsync(string scopeKey, IEnumerable<string> eventKeys, CancellationToken cancellationToken)
|
||||
{
|
||||
var keyList = eventKeys.ToArray();
|
||||
if (keyList.Length == 0)
|
||||
{
|
||||
return new HashSet<string>();
|
||||
}
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectMultipleProcessedSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", _tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
command.Parameters.AddWithValue("event_keys", keyList);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var result = new HashSet<string>();
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
result.Add(reader.GetString(0));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public async Task MarkProcessedAsync(
|
||||
string scopeKey,
|
||||
string eventKey,
|
||||
DateTimeOffset eventTime,
|
||||
Guid? batchId,
|
||||
TimeSpan ttl,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpsertProcessedSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", _tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
command.Parameters.AddWithValue("event_key", eventKey);
|
||||
command.Parameters.AddWithValue("event_time", eventTime);
|
||||
command.Parameters.AddWithValue("batch_id", (object?)batchId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("expires_at", DateTimeOffset.UtcNow + ttl);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task MarkProcessedBatchAsync(
|
||||
string scopeKey,
|
||||
IEnumerable<ProcessedEvent> events,
|
||||
Guid? batchId,
|
||||
TimeSpan ttl,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var eventList = events.ToList();
|
||||
if (eventList.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var expiresAt = DateTimeOffset.UtcNow + ttl;
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var evt in eventList)
|
||||
{
|
||||
await using var command = new NpgsqlCommand(UpsertProcessedSql, connection, transaction);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", _tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
command.Parameters.AddWithValue("event_key", evt.EventKey);
|
||||
command.Parameters.AddWithValue("event_time", evt.EventTime);
|
||||
command.Parameters.AddWithValue("batch_id", (object?)batchId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("expires_at", expiresAt);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.ProcessedEventsMarked(_tenantId, scopeKey, eventList.Count);
|
||||
}
|
||||
catch
|
||||
{
|
||||
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<long> CountProcessedAsync(string scopeKey, DateTimeOffset from, DateTimeOffset to, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(CountProcessedSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", _tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
command.Parameters.AddWithValue("from", from);
|
||||
command.Parameters.AddWithValue("to", to);
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt64(result);
|
||||
}
|
||||
|
||||
public async Task<int> CleanupExpiredAsync(int batchLimit, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(CleanupExpiredSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("batch_limit", batchLimit);
|
||||
|
||||
var deleted = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (deleted > 0)
|
||||
{
|
||||
_logger.LogInformation("Cleaned up {DeletedCount} expired processed events", deleted);
|
||||
OrchestratorMetrics.ProcessedEventsCleanedUp(_tenantId, deleted);
|
||||
}
|
||||
|
||||
return deleted;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Factory for creating tenant-scoped duplicate suppressors.
|
||||
/// </summary>
|
||||
public interface IDuplicateSuppressorFactory
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a duplicate suppressor for the specified tenant.
|
||||
/// </summary>
|
||||
IDuplicateSuppressor Create(string tenantId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Factory implementation for PostgreSQL duplicate suppressors.
|
||||
/// </summary>
|
||||
public sealed class PostgresDuplicateSuppressorFactory : IDuplicateSuppressorFactory
|
||||
{
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
|
||||
public PostgresDuplicateSuppressorFactory(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILoggerFactory loggerFactory)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
|
||||
}
|
||||
|
||||
public IDuplicateSuppressor Create(string tenantId)
|
||||
{
|
||||
return new PostgresDuplicateSuppressor(
|
||||
_dataSource,
|
||||
tenantId,
|
||||
_loggerFactory.CreateLogger<PostgresDuplicateSuppressor>());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,540 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using NpgsqlTypes;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of job repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresJobRepository : IJobRepository
|
||||
{
|
||||
private const string SelectJobColumns = """
|
||||
job_id, tenant_id, project_id, run_id, job_type, status, priority, attempt, max_attempts,
|
||||
payload_digest, payload, idempotency_key, correlation_id, lease_id, worker_id, task_runner_id,
|
||||
lease_until, created_at, scheduled_at, leased_at, completed_at, not_before, reason, replay_of, created_by
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectJobColumns}
|
||||
FROM jobs
|
||||
WHERE tenant_id = @tenant_id AND job_id = @job_id
|
||||
""";
|
||||
|
||||
private const string SelectByIdempotencyKeySql = $"""
|
||||
SELECT {SelectJobColumns}
|
||||
FROM jobs
|
||||
WHERE tenant_id = @tenant_id AND idempotency_key = @idempotency_key
|
||||
""";
|
||||
|
||||
private const string InsertJobSql = """
|
||||
INSERT INTO jobs (
|
||||
job_id, tenant_id, project_id, run_id, job_type, status, priority, attempt, max_attempts,
|
||||
payload_digest, payload, idempotency_key, correlation_id, lease_id, worker_id, task_runner_id,
|
||||
lease_until, created_at, scheduled_at, leased_at, completed_at, not_before, reason, replay_of, created_by)
|
||||
VALUES (
|
||||
@job_id, @tenant_id, @project_id, @run_id, @job_type, @status::job_status, @priority, @attempt, @max_attempts,
|
||||
@payload_digest, @payload, @idempotency_key, @correlation_id, @lease_id, @worker_id, @task_runner_id,
|
||||
@lease_until, @created_at, @scheduled_at, @leased_at, @completed_at, @not_before, @reason, @replay_of, @created_by)
|
||||
""";
|
||||
|
||||
private const string UpdateStatusSql = """
|
||||
UPDATE jobs
|
||||
SET status = @status::job_status,
|
||||
attempt = @attempt,
|
||||
lease_id = @lease_id,
|
||||
worker_id = @worker_id,
|
||||
task_runner_id = @task_runner_id,
|
||||
lease_until = @lease_until,
|
||||
scheduled_at = @scheduled_at,
|
||||
leased_at = @leased_at,
|
||||
completed_at = @completed_at,
|
||||
not_before = @not_before,
|
||||
reason = @reason
|
||||
WHERE tenant_id = @tenant_id AND job_id = @job_id
|
||||
""";
|
||||
|
||||
private const string LeaseNextSqlTemplate = """
|
||||
UPDATE jobs
|
||||
SET status = 'leased'::job_status,
|
||||
lease_id = @lease_id,
|
||||
worker_id = @worker_id,
|
||||
lease_until = @lease_until,
|
||||
leased_at = @leased_at
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND job_id = (
|
||||
SELECT job_id
|
||||
FROM jobs
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND status = 'scheduled'::job_status
|
||||
AND (not_before IS NULL OR not_before <= @now)
|
||||
{0}
|
||||
ORDER BY priority DESC, created_at
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING
|
||||
""";
|
||||
|
||||
private const string ExtendLeaseSql = """
|
||||
UPDATE jobs
|
||||
SET lease_until = @new_lease_until
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND job_id = @job_id
|
||||
AND lease_id = @lease_id
|
||||
AND status = 'leased'::job_status
|
||||
AND lease_until > @now
|
||||
""";
|
||||
|
||||
private const string SelectByRunIdSql = $"""
|
||||
SELECT {SelectJobColumns}
|
||||
FROM jobs
|
||||
WHERE tenant_id = @tenant_id AND run_id = @run_id
|
||||
ORDER BY created_at
|
||||
""";
|
||||
|
||||
private const string SelectExpiredLeasesSql = $"""
|
||||
SELECT {SelectJobColumns}
|
||||
FROM jobs
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND status = 'leased'::job_status
|
||||
AND lease_until < @cutoff
|
||||
ORDER BY lease_until
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresJobRepository> _logger;
|
||||
|
||||
public PostgresJobRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresJobRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<Job?> GetByIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_id", jobId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapJob(reader);
|
||||
}
|
||||
|
||||
public async Task<Job?> GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdempotencyKeySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("idempotency_key", idempotencyKey);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapJob(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(Job job, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(job.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertJobSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddJobParameters(command, job);
|
||||
|
||||
try
|
||||
{
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.JobEnqueued(job.TenantId, job.JobType);
|
||||
OrchestratorMetrics.QueueDepthChanged(job.TenantId, job.JobType, 1);
|
||||
}
|
||||
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning("Duplicate job idempotency key: {IdempotencyKey}", job.IdempotencyKey);
|
||||
throw new DuplicateJobException(job.IdempotencyKey, ex);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UpdateStatusAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
JobStatus status,
|
||||
int attempt,
|
||||
Guid? leaseId,
|
||||
string? workerId,
|
||||
string? taskRunnerId,
|
||||
DateTimeOffset? leaseUntil,
|
||||
DateTimeOffset? scheduledAt,
|
||||
DateTimeOffset? leasedAt,
|
||||
DateTimeOffset? completedAt,
|
||||
DateTimeOffset? notBefore,
|
||||
string? reason,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateStatusSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_id", jobId);
|
||||
command.Parameters.AddWithValue("status", StatusToString(status));
|
||||
command.Parameters.AddWithValue("attempt", attempt);
|
||||
command.Parameters.AddWithValue("lease_id", (object?)leaseId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("worker_id", (object?)workerId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("task_runner_id", (object?)taskRunnerId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("lease_until", (object?)leaseUntil ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("scheduled_at", (object?)scheduledAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("leased_at", (object?)leasedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)completedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("not_before", (object?)notBefore ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("reason", (object?)reason ?? DBNull.Value);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task<Job?> LeaseNextAsync(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
Guid leaseId,
|
||||
string workerId,
|
||||
DateTimeOffset leaseUntil,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var jobTypeFilter = jobType != null ? "AND job_type = @job_type" : "";
|
||||
var sql = string.Format(LeaseNextSqlTemplate, jobTypeFilter) + " " + SelectJobColumns;
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("lease_id", leaseId);
|
||||
command.Parameters.AddWithValue("worker_id", workerId);
|
||||
command.Parameters.AddWithValue("lease_until", leaseUntil);
|
||||
command.Parameters.AddWithValue("leased_at", DateTimeOffset.UtcNow);
|
||||
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
|
||||
|
||||
if (jobType != null)
|
||||
{
|
||||
command.Parameters.AddWithValue("job_type", jobType);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var job = MapJob(reader);
|
||||
OrchestratorMetrics.JobLeased(job.TenantId, job.JobType);
|
||||
OrchestratorMetrics.QueueDepthChanged(job.TenantId, job.JobType, -1);
|
||||
return job;
|
||||
}
|
||||
|
||||
public async Task<bool> ExtendLeaseAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
Guid leaseId,
|
||||
DateTimeOffset newLeaseUntil,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(ExtendLeaseSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_id", jobId);
|
||||
command.Parameters.AddWithValue("lease_id", leaseId);
|
||||
command.Parameters.AddWithValue("new_lease_until", newLeaseUntil);
|
||||
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Job>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByRunIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("run_id", runId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var jobs = new List<Job>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
jobs.Add(MapJob(reader));
|
||||
}
|
||||
return jobs;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Job>> GetExpiredLeasesAsync(string tenantId, DateTimeOffset cutoff, int limit, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectExpiredLeasesSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("cutoff", cutoff);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var jobs = new List<Job>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
jobs.Add(MapJob(reader));
|
||||
}
|
||||
return jobs;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Job>> ListAsync(
|
||||
string tenantId,
|
||||
JobStatus? status,
|
||||
string? jobType,
|
||||
string? projectId,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, status, jobType, projectId, createdAfter, createdBefore, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var jobs = new List<Job>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
jobs.Add(MapJob(reader));
|
||||
}
|
||||
return jobs;
|
||||
}
|
||||
|
||||
public async Task<int> CountAsync(
|
||||
string tenantId,
|
||||
JobStatus? status,
|
||||
string? jobType,
|
||||
string? projectId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildCountQuery(tenantId, status, jobType, projectId);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt32(result);
|
||||
}
|
||||
|
||||
private static void AddJobParameters(NpgsqlCommand command, Job job)
|
||||
{
|
||||
command.Parameters.AddWithValue("job_id", job.JobId);
|
||||
command.Parameters.AddWithValue("tenant_id", job.TenantId);
|
||||
command.Parameters.AddWithValue("project_id", (object?)job.ProjectId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("run_id", (object?)job.RunId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("job_type", job.JobType);
|
||||
command.Parameters.AddWithValue("status", StatusToString(job.Status));
|
||||
command.Parameters.AddWithValue("priority", job.Priority);
|
||||
command.Parameters.AddWithValue("attempt", job.Attempt);
|
||||
command.Parameters.AddWithValue("max_attempts", job.MaxAttempts);
|
||||
command.Parameters.AddWithValue("payload_digest", job.PayloadDigest);
|
||||
command.Parameters.Add(new NpgsqlParameter<string>("payload", NpgsqlDbType.Jsonb) { TypedValue = job.Payload });
|
||||
command.Parameters.AddWithValue("idempotency_key", job.IdempotencyKey);
|
||||
command.Parameters.AddWithValue("correlation_id", (object?)job.CorrelationId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("lease_id", (object?)job.LeaseId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("worker_id", (object?)job.WorkerId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("task_runner_id", (object?)job.TaskRunnerId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("lease_until", (object?)job.LeaseUntil ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_at", job.CreatedAt);
|
||||
command.Parameters.AddWithValue("scheduled_at", (object?)job.ScheduledAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("leased_at", (object?)job.LeasedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)job.CompletedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("not_before", (object?)job.NotBefore ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("reason", (object?)job.Reason ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("replay_of", (object?)job.ReplayOf ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_by", job.CreatedBy);
|
||||
}
|
||||
|
||||
private static Job MapJob(NpgsqlDataReader reader)
|
||||
{
|
||||
return new Job(
|
||||
JobId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
ProjectId: reader.IsDBNull(2) ? null : reader.GetString(2),
|
||||
RunId: reader.IsDBNull(3) ? null : reader.GetGuid(3),
|
||||
JobType: reader.GetString(4),
|
||||
Status: ParseStatus(reader.GetString(5)),
|
||||
Priority: reader.GetInt32(6),
|
||||
Attempt: reader.GetInt32(7),
|
||||
MaxAttempts: reader.GetInt32(8),
|
||||
PayloadDigest: reader.GetString(9),
|
||||
Payload: reader.GetString(10),
|
||||
IdempotencyKey: reader.GetString(11),
|
||||
CorrelationId: reader.IsDBNull(12) ? null : reader.GetString(12),
|
||||
LeaseId: reader.IsDBNull(13) ? null : reader.GetGuid(13),
|
||||
WorkerId: reader.IsDBNull(14) ? null : reader.GetString(14),
|
||||
TaskRunnerId: reader.IsDBNull(15) ? null : reader.GetString(15),
|
||||
LeaseUntil: reader.IsDBNull(16) ? null : reader.GetFieldValue<DateTimeOffset>(16),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(17),
|
||||
ScheduledAt: reader.IsDBNull(18) ? null : reader.GetFieldValue<DateTimeOffset>(18),
|
||||
LeasedAt: reader.IsDBNull(19) ? null : reader.GetFieldValue<DateTimeOffset>(19),
|
||||
CompletedAt: reader.IsDBNull(20) ? null : reader.GetFieldValue<DateTimeOffset>(20),
|
||||
NotBefore: reader.IsDBNull(21) ? null : reader.GetFieldValue<DateTimeOffset>(21),
|
||||
Reason: reader.IsDBNull(22) ? null : reader.GetString(22),
|
||||
ReplayOf: reader.IsDBNull(23) ? null : reader.GetGuid(23),
|
||||
CreatedBy: reader.GetString(24));
|
||||
}
|
||||
|
||||
private static string StatusToString(JobStatus status) => status switch
|
||||
{
|
||||
JobStatus.Pending => "pending",
|
||||
JobStatus.Scheduled => "scheduled",
|
||||
JobStatus.Leased => "leased",
|
||||
JobStatus.Succeeded => "succeeded",
|
||||
JobStatus.Failed => "failed",
|
||||
JobStatus.Canceled => "canceled",
|
||||
JobStatus.TimedOut => "timed_out",
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(status))
|
||||
};
|
||||
|
||||
private static JobStatus ParseStatus(string status) => status switch
|
||||
{
|
||||
"pending" => JobStatus.Pending,
|
||||
"scheduled" => JobStatus.Scheduled,
|
||||
"leased" => JobStatus.Leased,
|
||||
"succeeded" => JobStatus.Succeeded,
|
||||
"failed" => JobStatus.Failed,
|
||||
"canceled" => JobStatus.Canceled,
|
||||
"timed_out" => JobStatus.TimedOut,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(status))
|
||||
};
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
JobStatus? status,
|
||||
string? jobType,
|
||||
string? projectId,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectJobColumns} FROM jobs WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (status.HasValue)
|
||||
{
|
||||
sb.Append(" AND status = @status::job_status");
|
||||
parameters.Add(("status", StatusToString(status.Value)));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(jobType))
|
||||
{
|
||||
sb.Append(" AND job_type = @job_type");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(projectId))
|
||||
{
|
||||
sb.Append(" AND project_id = @project_id");
|
||||
parameters.Add(("project_id", projectId));
|
||||
}
|
||||
|
||||
if (createdAfter.HasValue)
|
||||
{
|
||||
sb.Append(" AND created_at >= @created_after");
|
||||
parameters.Add(("created_after", createdAfter.Value));
|
||||
}
|
||||
|
||||
if (createdBefore.HasValue)
|
||||
{
|
||||
sb.Append(" AND created_at < @created_before");
|
||||
parameters.Add(("created_before", createdBefore.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
|
||||
string tenantId,
|
||||
JobStatus? status,
|
||||
string? jobType,
|
||||
string? projectId)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append("SELECT COUNT(*) FROM jobs WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (status.HasValue)
|
||||
{
|
||||
sb.Append(" AND status = @status::job_status");
|
||||
parameters.Add(("status", StatusToString(status.Value)));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(jobType))
|
||||
{
|
||||
sb.Append(" AND job_type = @job_type");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(projectId))
|
||||
{
|
||||
sb.Append(" AND project_id = @project_id");
|
||||
parameters.Add(("project_id", projectId));
|
||||
}
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when attempting to create a job with a duplicate idempotency key.
|
||||
/// </summary>
|
||||
public sealed class DuplicateJobException : Exception
|
||||
{
|
||||
public string IdempotencyKey { get; }
|
||||
|
||||
public DuplicateJobException(string idempotencyKey, Exception innerException)
|
||||
: base($"Job with idempotency key '{idempotencyKey}' already exists.", innerException)
|
||||
{
|
||||
IdempotencyKey = idempotencyKey;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,949 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of the ledger repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresLedgerRepository : ILedgerRepository
|
||||
{
|
||||
private const string SelectLedgerColumns = """
|
||||
ledger_id, tenant_id, run_id, source_id, run_type, final_status, total_jobs,
|
||||
succeeded_jobs, failed_jobs, run_created_at, run_started_at, run_completed_at,
|
||||
execution_duration_ms, initiated_by, input_digest, output_digest, artifact_manifest,
|
||||
sequence_number, previous_entry_hash, content_hash, ledger_created_at, correlation_id, metadata
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectLedgerColumns}
|
||||
FROM run_ledger_entries
|
||||
WHERE tenant_id = @tenant_id AND ledger_id = @ledger_id
|
||||
""";
|
||||
|
||||
private const string SelectByRunIdSql = $"""
|
||||
SELECT {SelectLedgerColumns}
|
||||
FROM run_ledger_entries
|
||||
WHERE tenant_id = @tenant_id AND run_id = @run_id
|
||||
""";
|
||||
|
||||
private const string InsertEntrySql = """
|
||||
INSERT INTO run_ledger_entries (
|
||||
ledger_id, tenant_id, run_id, source_id, run_type, final_status, total_jobs,
|
||||
succeeded_jobs, failed_jobs, run_created_at, run_started_at, run_completed_at,
|
||||
execution_duration_ms, initiated_by, input_digest, output_digest, artifact_manifest,
|
||||
sequence_number, previous_entry_hash, content_hash, ledger_created_at, correlation_id, metadata)
|
||||
VALUES (
|
||||
@ledger_id, @tenant_id, @run_id, @source_id, @run_type, @final_status, @total_jobs,
|
||||
@succeeded_jobs, @failed_jobs, @run_created_at, @run_started_at, @run_completed_at,
|
||||
@execution_duration_ms, @initiated_by, @input_digest, @output_digest, @artifact_manifest::jsonb,
|
||||
@sequence_number, @previous_entry_hash, @content_hash, @ledger_created_at, @correlation_id, @metadata::jsonb)
|
||||
""";
|
||||
|
||||
private const string SelectLatestSql = $"""
|
||||
SELECT {SelectLedgerColumns}
|
||||
FROM run_ledger_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
ORDER BY sequence_number DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
private const string GetSequenceSql = """
|
||||
SELECT next_seq, prev_hash FROM next_ledger_sequence(@tenant_id)
|
||||
""";
|
||||
|
||||
private const string UpdateSequenceHashSql = """
|
||||
SELECT update_ledger_sequence_hash(@tenant_id, @content_hash)
|
||||
""";
|
||||
|
||||
private const string VerifyChainSql = """
|
||||
SELECT is_valid, invalid_ledger_id, invalid_sequence, error_message
|
||||
FROM verify_ledger_chain(@tenant_id, @start_seq, @end_seq)
|
||||
""";
|
||||
|
||||
private const string GetSummarySql = """
|
||||
SELECT total_entries, entries_since, total_runs, successful_runs, failed_runs,
|
||||
total_jobs, unique_sources, unique_run_types, earliest_entry, latest_entry
|
||||
FROM get_ledger_summary(@tenant_id, @since)
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresLedgerRepository> _logger;
|
||||
|
||||
public PostgresLedgerRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresLedgerRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<RunLedgerEntry> AppendAsync(
|
||||
Run run,
|
||||
IReadOnlyList<Artifact> artifacts,
|
||||
string inputDigest,
|
||||
string? metadata = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (run.CompletedAt is null)
|
||||
{
|
||||
throw new InvalidOperationException("Cannot create ledger entry from an incomplete run.");
|
||||
}
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(run.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
// Get next sequence number and previous hash
|
||||
long sequenceNumber;
|
||||
string? previousEntryHash;
|
||||
|
||||
await using (var seqCommand = new NpgsqlCommand(GetSequenceSql, connection, transaction))
|
||||
{
|
||||
seqCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
seqCommand.Parameters.AddWithValue("tenant_id", run.TenantId);
|
||||
|
||||
await using var reader = await seqCommand.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
throw new InvalidOperationException("Failed to get next ledger sequence.");
|
||||
}
|
||||
|
||||
sequenceNumber = reader.GetInt64(0);
|
||||
previousEntryHash = reader.IsDBNull(1) ? null : reader.GetString(1);
|
||||
}
|
||||
|
||||
// Create the ledger entry
|
||||
var entry = RunLedgerEntry.FromCompletedRun(
|
||||
run: run,
|
||||
artifacts: artifacts,
|
||||
inputDigest: inputDigest,
|
||||
sequenceNumber: sequenceNumber,
|
||||
previousEntryHash: previousEntryHash,
|
||||
metadata: metadata);
|
||||
|
||||
// Insert the entry
|
||||
await using (var insertCommand = new NpgsqlCommand(InsertEntrySql, connection, transaction))
|
||||
{
|
||||
insertCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
AddEntryParameters(insertCommand, entry);
|
||||
await insertCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Update sequence hash
|
||||
await using (var updateCommand = new NpgsqlCommand(UpdateSequenceHashSql, connection, transaction))
|
||||
{
|
||||
updateCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
updateCommand.Parameters.AddWithValue("tenant_id", run.TenantId);
|
||||
updateCommand.Parameters.AddWithValue("content_hash", entry.ContentHash);
|
||||
await updateCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
OrchestratorMetrics.LedgerEntryCreated(run.TenantId, run.RunType, entry.FinalStatus.ToString());
|
||||
_logger.LogDebug("Ledger entry {LedgerId} appended for run {RunId}, sequence {Sequence}",
|
||||
entry.LedgerId, run.RunId, sequenceNumber);
|
||||
|
||||
return entry;
|
||||
}
|
||||
catch
|
||||
{
|
||||
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<RunLedgerEntry?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid ledgerId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("ledger_id", ledgerId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapEntry(reader);
|
||||
}
|
||||
|
||||
public async Task<RunLedgerEntry?> GetByRunIdAsync(
|
||||
string tenantId,
|
||||
Guid runId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByRunIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("run_id", runId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapEntry(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<RunLedgerEntry>> ListAsync(
|
||||
string tenantId,
|
||||
string? runType = null,
|
||||
Guid? sourceId = null,
|
||||
RunStatus? finalStatus = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, runType, sourceId, finalStatus, startTime, endTime, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<RunLedgerEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<RunLedgerEntry>> GetBySequenceRangeAsync(
|
||||
string tenantId,
|
||||
long startSequence,
|
||||
long endSequence,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectLedgerColumns}
|
||||
FROM run_ledger_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND sequence_number >= @start_seq
|
||||
AND sequence_number <= @end_seq
|
||||
ORDER BY sequence_number ASC
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("start_seq", startSequence);
|
||||
command.Parameters.AddWithValue("end_seq", endSequence);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<RunLedgerEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<RunLedgerEntry?> GetLatestAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectLatestSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapEntry(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<RunLedgerEntry>> GetBySourceAsync(
|
||||
string tenantId,
|
||||
Guid sourceId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectLedgerColumns}
|
||||
FROM run_ledger_entries
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND source_id = @source_id
|
||||
ORDER BY ledger_created_at DESC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var entries = new List<RunLedgerEntry>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public async Task<long> GetCountAsync(
|
||||
string tenantId,
|
||||
string? runType = null,
|
||||
Guid? sourceId = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sb = new StringBuilder("SELECT COUNT(*) FROM run_ledger_entries WHERE tenant_id = @tenant_id");
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (runType is not null)
|
||||
{
|
||||
sb.Append(" AND run_type = @run_type");
|
||||
parameters.Add(("run_type", runType));
|
||||
}
|
||||
|
||||
if (sourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", sourceId.Value));
|
||||
}
|
||||
|
||||
if (startTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND ledger_created_at >= @start_time");
|
||||
parameters.Add(("start_time", startTime.Value));
|
||||
}
|
||||
|
||||
if (endTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND ledger_created_at <= @end_time");
|
||||
parameters.Add(("end_time", endTime.Value));
|
||||
}
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sb.ToString(), connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt64(result);
|
||||
}
|
||||
|
||||
public async Task<ChainVerificationResult> VerifyChainAsync(
|
||||
string tenantId,
|
||||
long? startSequence = null,
|
||||
long? endSequence = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(VerifyChainSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("start_seq", (object?)startSequence ?? 1L);
|
||||
command.Parameters.AddWithValue("end_seq", (object?)endSequence ?? DBNull.Value);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return new ChainVerificationResult(true, null, null, null);
|
||||
}
|
||||
|
||||
return new ChainVerificationResult(
|
||||
IsValid: reader.GetBoolean(0),
|
||||
InvalidEntryId: reader.IsDBNull(1) ? null : reader.GetGuid(1),
|
||||
InvalidSequence: reader.IsDBNull(2) ? null : reader.GetInt64(2),
|
||||
ErrorMessage: reader.IsDBNull(3) ? null : reader.GetString(3));
|
||||
}
|
||||
|
||||
public async Task<LedgerSummary> GetSummaryAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset? since = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(GetSummarySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("since", (object?)since ?? DBNull.Value);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return new LedgerSummary(0, 0, 0, 0, 0, 0, 0, 0, null, null);
|
||||
}
|
||||
|
||||
return new LedgerSummary(
|
||||
TotalEntries: reader.GetInt64(0),
|
||||
EntriesSince: reader.GetInt64(1),
|
||||
TotalRuns: reader.GetInt64(2),
|
||||
SuccessfulRuns: reader.GetInt64(3),
|
||||
FailedRuns: reader.GetInt64(4),
|
||||
TotalJobs: reader.GetInt64(5),
|
||||
UniqueSources: reader.GetInt64(6),
|
||||
UniqueRunTypes: reader.GetInt64(7),
|
||||
EarliestEntry: reader.IsDBNull(8) ? null : reader.GetFieldValue<DateTimeOffset>(8),
|
||||
LatestEntry: reader.IsDBNull(9) ? null : reader.GetFieldValue<DateTimeOffset>(9));
|
||||
}
|
||||
|
||||
private static void AddEntryParameters(NpgsqlCommand command, RunLedgerEntry entry)
|
||||
{
|
||||
command.Parameters.AddWithValue("ledger_id", entry.LedgerId);
|
||||
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
|
||||
command.Parameters.AddWithValue("run_id", entry.RunId);
|
||||
command.Parameters.AddWithValue("source_id", entry.SourceId);
|
||||
command.Parameters.AddWithValue("run_type", entry.RunType);
|
||||
command.Parameters.AddWithValue("final_status", (int)entry.FinalStatus);
|
||||
command.Parameters.AddWithValue("total_jobs", entry.TotalJobs);
|
||||
command.Parameters.AddWithValue("succeeded_jobs", entry.SucceededJobs);
|
||||
command.Parameters.AddWithValue("failed_jobs", entry.FailedJobs);
|
||||
command.Parameters.AddWithValue("run_created_at", entry.RunCreatedAt);
|
||||
command.Parameters.AddWithValue("run_started_at", (object?)entry.RunStartedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("run_completed_at", entry.RunCompletedAt);
|
||||
command.Parameters.AddWithValue("execution_duration_ms", (long)entry.ExecutionDuration.TotalMilliseconds);
|
||||
command.Parameters.AddWithValue("initiated_by", entry.InitiatedBy);
|
||||
command.Parameters.AddWithValue("input_digest", entry.InputDigest);
|
||||
command.Parameters.AddWithValue("output_digest", entry.OutputDigest);
|
||||
command.Parameters.AddWithValue("artifact_manifest", entry.ArtifactManifest);
|
||||
command.Parameters.AddWithValue("sequence_number", entry.SequenceNumber);
|
||||
command.Parameters.AddWithValue("previous_entry_hash", (object?)entry.PreviousEntryHash ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("content_hash", entry.ContentHash);
|
||||
command.Parameters.AddWithValue("ledger_created_at", entry.LedgerCreatedAt);
|
||||
command.Parameters.AddWithValue("correlation_id", (object?)entry.CorrelationId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("metadata", (object?)entry.Metadata ?? DBNull.Value);
|
||||
}
|
||||
|
||||
private static RunLedgerEntry MapEntry(NpgsqlDataReader reader)
|
||||
{
|
||||
return new RunLedgerEntry(
|
||||
LedgerId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
RunId: reader.GetGuid(2),
|
||||
SourceId: reader.GetGuid(3),
|
||||
RunType: reader.GetString(4),
|
||||
FinalStatus: (RunStatus)reader.GetInt32(5),
|
||||
TotalJobs: reader.GetInt32(6),
|
||||
SucceededJobs: reader.GetInt32(7),
|
||||
FailedJobs: reader.GetInt32(8),
|
||||
RunCreatedAt: reader.GetFieldValue<DateTimeOffset>(9),
|
||||
RunStartedAt: reader.IsDBNull(10) ? null : reader.GetFieldValue<DateTimeOffset>(10),
|
||||
RunCompletedAt: reader.GetFieldValue<DateTimeOffset>(11),
|
||||
ExecutionDuration: TimeSpan.FromMilliseconds(reader.GetInt64(12)),
|
||||
InitiatedBy: reader.GetString(13),
|
||||
InputDigest: reader.GetString(14),
|
||||
OutputDigest: reader.GetString(15),
|
||||
ArtifactManifest: reader.GetString(16),
|
||||
SequenceNumber: reader.GetInt64(17),
|
||||
PreviousEntryHash: reader.IsDBNull(18) ? null : reader.GetString(18),
|
||||
ContentHash: reader.GetString(19),
|
||||
LedgerCreatedAt: reader.GetFieldValue<DateTimeOffset>(20),
|
||||
CorrelationId: reader.IsDBNull(21) ? null : reader.GetString(21),
|
||||
Metadata: reader.IsDBNull(22) ? null : reader.GetString(22));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
string? runType,
|
||||
Guid? sourceId,
|
||||
RunStatus? finalStatus,
|
||||
DateTimeOffset? startTime,
|
||||
DateTimeOffset? endTime,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectLedgerColumns} FROM run_ledger_entries WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (runType is not null)
|
||||
{
|
||||
sb.Append(" AND run_type = @run_type");
|
||||
parameters.Add(("run_type", runType));
|
||||
}
|
||||
|
||||
if (sourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", sourceId.Value));
|
||||
}
|
||||
|
||||
if (finalStatus.HasValue)
|
||||
{
|
||||
sb.Append(" AND final_status = @final_status");
|
||||
parameters.Add(("final_status", (int)finalStatus.Value));
|
||||
}
|
||||
|
||||
if (startTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND ledger_created_at >= @start_time");
|
||||
parameters.Add(("start_time", startTime.Value));
|
||||
}
|
||||
|
||||
if (endTime.HasValue)
|
||||
{
|
||||
sb.Append(" AND ledger_created_at <= @end_time");
|
||||
parameters.Add(("end_time", endTime.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY ledger_created_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of the ledger export repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresLedgerExportRepository : ILedgerExportRepository
|
||||
{
|
||||
private const string SelectExportColumns = """
|
||||
export_id, tenant_id, status, format, start_time, end_time, run_type_filter,
|
||||
source_id_filter, entry_count, output_uri, output_digest, output_size_bytes,
|
||||
requested_by, requested_at, started_at, completed_at, error_message
|
||||
""";
|
||||
|
||||
private const string InsertExportSql = """
|
||||
INSERT INTO ledger_exports (
|
||||
export_id, tenant_id, status, format, start_time, end_time, run_type_filter,
|
||||
source_id_filter, entry_count, output_uri, output_digest, output_size_bytes,
|
||||
requested_by, requested_at, started_at, completed_at, error_message)
|
||||
VALUES (
|
||||
@export_id, @tenant_id, @status, @format, @start_time, @end_time, @run_type_filter,
|
||||
@source_id_filter, @entry_count, @output_uri, @output_digest, @output_size_bytes,
|
||||
@requested_by, @requested_at, @started_at, @completed_at, @error_message)
|
||||
""";
|
||||
|
||||
private const string UpdateExportSql = """
|
||||
UPDATE ledger_exports
|
||||
SET status = @status,
|
||||
entry_count = @entry_count,
|
||||
output_uri = @output_uri,
|
||||
output_digest = @output_digest,
|
||||
output_size_bytes = @output_size_bytes,
|
||||
started_at = @started_at,
|
||||
completed_at = @completed_at,
|
||||
error_message = @error_message
|
||||
WHERE tenant_id = @tenant_id AND export_id = @export_id
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresLedgerExportRepository> _logger;
|
||||
|
||||
public PostgresLedgerExportRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresLedgerExportRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<LedgerExport> CreateAsync(LedgerExport export, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(export.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertExportSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
AddExportParameters(command, export);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
OrchestratorMetrics.LedgerExportRequested(export.TenantId, export.Format);
|
||||
_logger.LogDebug("Ledger export {ExportId} created for tenant {TenantId}", export.ExportId, export.TenantId);
|
||||
|
||||
return export;
|
||||
}
|
||||
|
||||
public async Task<LedgerExport?> GetByIdAsync(string tenantId, Guid exportId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectExportColumns}
|
||||
FROM ledger_exports
|
||||
WHERE tenant_id = @tenant_id AND export_id = @export_id
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("export_id", exportId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapExport(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<LedgerExport>> ListAsync(
|
||||
string tenantId,
|
||||
LedgerExportStatus? status = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sb = new StringBuilder($"SELECT {SelectExportColumns} FROM ledger_exports WHERE tenant_id = @tenant_id");
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (status.HasValue)
|
||||
{
|
||||
sb.Append(" AND status = @status");
|
||||
parameters.Add(("status", (int)status.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY requested_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sb.ToString(), connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var exports = new List<LedgerExport>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
exports.Add(MapExport(reader));
|
||||
}
|
||||
return exports;
|
||||
}
|
||||
|
||||
public async Task<LedgerExport> UpdateAsync(LedgerExport export, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(export.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateExportSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("export_id", export.ExportId);
|
||||
command.Parameters.AddWithValue("tenant_id", export.TenantId);
|
||||
command.Parameters.AddWithValue("status", (int)export.Status);
|
||||
command.Parameters.AddWithValue("entry_count", export.EntryCount);
|
||||
command.Parameters.AddWithValue("output_uri", (object?)export.OutputUri ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("output_digest", (object?)export.OutputDigest ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("output_size_bytes", (object?)export.OutputSizeBytes ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("started_at", (object?)export.StartedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)export.CompletedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("error_message", (object?)export.ErrorMessage ?? DBNull.Value);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (export.Status == LedgerExportStatus.Completed)
|
||||
{
|
||||
OrchestratorMetrics.LedgerExportCompleted(export.TenantId, export.Format);
|
||||
}
|
||||
else if (export.Status == LedgerExportStatus.Failed)
|
||||
{
|
||||
OrchestratorMetrics.LedgerExportFailed(export.TenantId, export.Format);
|
||||
}
|
||||
|
||||
return export;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<LedgerExport>> GetPendingAsync(int limit = 10, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectExportColumns}
|
||||
FROM ledger_exports
|
||||
WHERE status = @status
|
||||
ORDER BY requested_at ASC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync("_system", "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("status", (int)LedgerExportStatus.Pending);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var exports = new List<LedgerExport>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
exports.Add(MapExport(reader));
|
||||
}
|
||||
return exports;
|
||||
}
|
||||
|
||||
private static void AddExportParameters(NpgsqlCommand command, LedgerExport export)
|
||||
{
|
||||
command.Parameters.AddWithValue("export_id", export.ExportId);
|
||||
command.Parameters.AddWithValue("tenant_id", export.TenantId);
|
||||
command.Parameters.AddWithValue("status", (int)export.Status);
|
||||
command.Parameters.AddWithValue("format", export.Format);
|
||||
command.Parameters.AddWithValue("start_time", (object?)export.StartTime ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("end_time", (object?)export.EndTime ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("run_type_filter", (object?)export.RunTypeFilter ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("source_id_filter", (object?)export.SourceIdFilter ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("entry_count", export.EntryCount);
|
||||
command.Parameters.AddWithValue("output_uri", (object?)export.OutputUri ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("output_digest", (object?)export.OutputDigest ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("output_size_bytes", (object?)export.OutputSizeBytes ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("requested_by", export.RequestedBy);
|
||||
command.Parameters.AddWithValue("requested_at", export.RequestedAt);
|
||||
command.Parameters.AddWithValue("started_at", (object?)export.StartedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)export.CompletedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("error_message", (object?)export.ErrorMessage ?? DBNull.Value);
|
||||
}
|
||||
|
||||
private static LedgerExport MapExport(NpgsqlDataReader reader)
|
||||
{
|
||||
return new LedgerExport(
|
||||
ExportId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
Status: (LedgerExportStatus)reader.GetInt32(2),
|
||||
Format: reader.GetString(3),
|
||||
StartTime: reader.IsDBNull(4) ? null : reader.GetFieldValue<DateTimeOffset>(4),
|
||||
EndTime: reader.IsDBNull(5) ? null : reader.GetFieldValue<DateTimeOffset>(5),
|
||||
RunTypeFilter: reader.IsDBNull(6) ? null : reader.GetString(6),
|
||||
SourceIdFilter: reader.IsDBNull(7) ? null : reader.GetGuid(7),
|
||||
EntryCount: reader.GetInt32(8),
|
||||
OutputUri: reader.IsDBNull(9) ? null : reader.GetString(9),
|
||||
OutputDigest: reader.IsDBNull(10) ? null : reader.GetString(10),
|
||||
OutputSizeBytes: reader.IsDBNull(11) ? null : reader.GetInt64(11),
|
||||
RequestedBy: reader.GetString(12),
|
||||
RequestedAt: reader.GetFieldValue<DateTimeOffset>(13),
|
||||
StartedAt: reader.IsDBNull(14) ? null : reader.GetFieldValue<DateTimeOffset>(14),
|
||||
CompletedAt: reader.IsDBNull(15) ? null : reader.GetFieldValue<DateTimeOffset>(15),
|
||||
ErrorMessage: reader.IsDBNull(16) ? null : reader.GetString(16));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of the manifest repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresManifestRepository : IManifestRepository
|
||||
{
|
||||
private const string SelectManifestColumns = """
|
||||
manifest_id, schema_version, tenant_id, provenance_type, subject_id, statements,
|
||||
artifacts, materials, build_info, payload_digest, signature_algorithm, signature,
|
||||
key_id, created_at, expires_at, metadata
|
||||
""";
|
||||
|
||||
private const string InsertManifestSql = """
|
||||
INSERT INTO signed_manifests (
|
||||
manifest_id, schema_version, tenant_id, provenance_type, subject_id, statements,
|
||||
artifacts, materials, build_info, payload_digest, signature_algorithm, signature,
|
||||
key_id, created_at, expires_at, metadata)
|
||||
VALUES (
|
||||
@manifest_id, @schema_version, @tenant_id, @provenance_type, @subject_id, @statements::jsonb,
|
||||
@artifacts::jsonb, @materials::jsonb, @build_info::jsonb, @payload_digest, @signature_algorithm, @signature,
|
||||
@key_id, @created_at, @expires_at, @metadata::jsonb)
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresManifestRepository> _logger;
|
||||
|
||||
public PostgresManifestRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresManifestRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<SignedManifest> CreateAsync(SignedManifest manifest, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(manifest.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertManifestSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("manifest_id", manifest.ManifestId);
|
||||
command.Parameters.AddWithValue("schema_version", manifest.SchemaVersion);
|
||||
command.Parameters.AddWithValue("tenant_id", manifest.TenantId);
|
||||
command.Parameters.AddWithValue("provenance_type", (int)manifest.ProvenanceType);
|
||||
command.Parameters.AddWithValue("subject_id", manifest.SubjectId);
|
||||
command.Parameters.AddWithValue("statements", manifest.Statements);
|
||||
command.Parameters.AddWithValue("artifacts", manifest.Artifacts);
|
||||
command.Parameters.AddWithValue("materials", manifest.Materials);
|
||||
command.Parameters.AddWithValue("build_info", (object?)manifest.BuildInfo ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("payload_digest", manifest.PayloadDigest);
|
||||
command.Parameters.AddWithValue("signature_algorithm", manifest.SignatureAlgorithm);
|
||||
command.Parameters.AddWithValue("signature", manifest.Signature);
|
||||
command.Parameters.AddWithValue("key_id", manifest.KeyId);
|
||||
command.Parameters.AddWithValue("created_at", manifest.CreatedAt);
|
||||
command.Parameters.AddWithValue("expires_at", (object?)manifest.ExpiresAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("metadata", (object?)manifest.Metadata ?? DBNull.Value);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
OrchestratorMetrics.ManifestCreated(manifest.TenantId, manifest.ProvenanceType.ToString());
|
||||
_logger.LogDebug("Manifest {ManifestId} created for subject {SubjectId}", manifest.ManifestId, manifest.SubjectId);
|
||||
|
||||
return manifest;
|
||||
}
|
||||
|
||||
public async Task<SignedManifest?> GetByIdAsync(string tenantId, Guid manifestId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectManifestColumns}
|
||||
FROM signed_manifests
|
||||
WHERE tenant_id = @tenant_id AND manifest_id = @manifest_id
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("manifest_id", manifestId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapManifest(reader);
|
||||
}
|
||||
|
||||
public async Task<SignedManifest?> GetBySubjectAsync(
|
||||
string tenantId,
|
||||
ProvenanceType provenanceType,
|
||||
Guid subjectId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectManifestColumns}
|
||||
FROM signed_manifests
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND provenance_type = @provenance_type
|
||||
AND subject_id = @subject_id
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("provenance_type", (int)provenanceType);
|
||||
command.Parameters.AddWithValue("subject_id", subjectId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapManifest(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SignedManifest>> ListAsync(
|
||||
string tenantId,
|
||||
ProvenanceType? provenanceType = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sb = new StringBuilder($"SELECT {SelectManifestColumns} FROM signed_manifests WHERE tenant_id = @tenant_id");
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (provenanceType.HasValue)
|
||||
{
|
||||
sb.Append(" AND provenance_type = @provenance_type");
|
||||
parameters.Add(("provenance_type", (int)provenanceType.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sb.ToString(), connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var manifests = new List<SignedManifest>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
manifests.Add(MapManifest(reader));
|
||||
}
|
||||
return manifests;
|
||||
}
|
||||
|
||||
public async Task<SignedManifest?> GetByPayloadDigestAsync(
|
||||
string tenantId,
|
||||
string payloadDigest,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = $"""
|
||||
SELECT {SelectManifestColumns}
|
||||
FROM signed_manifests
|
||||
WHERE tenant_id = @tenant_id AND payload_digest = @payload_digest
|
||||
""";
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("payload_digest", payloadDigest);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapManifest(reader);
|
||||
}
|
||||
|
||||
private static SignedManifest MapManifest(NpgsqlDataReader reader)
|
||||
{
|
||||
return new SignedManifest(
|
||||
ManifestId: reader.GetGuid(0),
|
||||
SchemaVersion: reader.GetString(1),
|
||||
TenantId: reader.GetString(2),
|
||||
ProvenanceType: (ProvenanceType)reader.GetInt32(3),
|
||||
SubjectId: reader.GetGuid(4),
|
||||
Statements: reader.GetString(5),
|
||||
Artifacts: reader.GetString(6),
|
||||
Materials: reader.GetString(7),
|
||||
BuildInfo: reader.IsDBNull(8) ? null : reader.GetString(8),
|
||||
PayloadDigest: reader.GetString(9),
|
||||
SignatureAlgorithm: reader.GetString(10),
|
||||
Signature: reader.GetString(11),
|
||||
KeyId: reader.GetString(12),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(13),
|
||||
ExpiresAt: reader.IsDBNull(14) ? null : reader.GetFieldValue<DateTimeOffset>(14),
|
||||
Metadata: reader.IsDBNull(15) ? null : reader.GetString(15));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,434 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of quota repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresQuotaRepository : IQuotaRepository
|
||||
{
|
||||
private const string SelectQuotaColumns = """
|
||||
quota_id, tenant_id, job_type, max_active, max_per_hour, burst_capacity,
|
||||
refill_rate, current_tokens, last_refill_at, current_active, current_hour_count,
|
||||
current_hour_start, paused, pause_reason, quota_ticket, created_at, updated_at, updated_by
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectQuotaColumns}
|
||||
FROM quotas
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private const string SelectByTenantAndJobTypeSql = $"""
|
||||
SELECT {SelectQuotaColumns}
|
||||
FROM quotas
|
||||
WHERE tenant_id = @tenant_id AND (job_type = @job_type OR (job_type IS NULL AND @job_type IS NULL))
|
||||
""";
|
||||
|
||||
private const string InsertQuotaSql = """
|
||||
INSERT INTO quotas (
|
||||
quota_id, tenant_id, job_type, max_active, max_per_hour, burst_capacity,
|
||||
refill_rate, current_tokens, last_refill_at, current_active, current_hour_count,
|
||||
current_hour_start, paused, pause_reason, quota_ticket, created_at, updated_at, updated_by)
|
||||
VALUES (
|
||||
@quota_id, @tenant_id, @job_type, @max_active, @max_per_hour, @burst_capacity,
|
||||
@refill_rate, @current_tokens, @last_refill_at, @current_active, @current_hour_count,
|
||||
@current_hour_start, @paused, @pause_reason, @quota_ticket, @created_at, @updated_at, @updated_by)
|
||||
""";
|
||||
|
||||
private const string UpdateQuotaSql = """
|
||||
UPDATE quotas
|
||||
SET job_type = @job_type,
|
||||
max_active = @max_active,
|
||||
max_per_hour = @max_per_hour,
|
||||
burst_capacity = @burst_capacity,
|
||||
refill_rate = @refill_rate,
|
||||
current_tokens = @current_tokens,
|
||||
last_refill_at = @last_refill_at,
|
||||
current_active = @current_active,
|
||||
current_hour_count = @current_hour_count,
|
||||
current_hour_start = @current_hour_start,
|
||||
paused = @paused,
|
||||
pause_reason = @pause_reason,
|
||||
quota_ticket = @quota_ticket,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private const string UpdateStateSql = """
|
||||
UPDATE quotas
|
||||
SET current_tokens = @current_tokens,
|
||||
last_refill_at = @last_refill_at,
|
||||
current_active = @current_active,
|
||||
current_hour_count = @current_hour_count,
|
||||
current_hour_start = @current_hour_start,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private const string PauseQuotaSql = """
|
||||
UPDATE quotas
|
||||
SET paused = TRUE,
|
||||
pause_reason = @pause_reason,
|
||||
quota_ticket = @quota_ticket,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private const string ResumeQuotaSql = """
|
||||
UPDATE quotas
|
||||
SET paused = FALSE,
|
||||
pause_reason = NULL,
|
||||
quota_ticket = NULL,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private const string IncrementActiveSql = """
|
||||
UPDATE quotas
|
||||
SET current_active = current_active + 1,
|
||||
updated_at = @updated_at
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private const string DecrementActiveSql = """
|
||||
UPDATE quotas
|
||||
SET current_active = GREATEST(current_active - 1, 0),
|
||||
updated_at = @updated_at
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private const string DeleteQuotaSql = """
|
||||
DELETE FROM quotas
|
||||
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresQuotaRepository> _logger;
|
||||
|
||||
public PostgresQuotaRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresQuotaRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<Quota?> GetByIdAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quotaId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapQuota(reader);
|
||||
}
|
||||
|
||||
public async Task<Quota?> GetByTenantAndJobTypeAsync(string tenantId, string? jobType, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByTenantAndJobTypeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_type", (object?)jobType ?? DBNull.Value);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapQuota(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(Quota quota, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(quota.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertQuotaSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddQuotaParameters(command, quota);
|
||||
|
||||
try
|
||||
{
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.QuotaCreated(quota.TenantId, quota.JobType);
|
||||
}
|
||||
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning("Duplicate quota for tenant {TenantId} job type {JobType}", quota.TenantId, quota.JobType);
|
||||
throw new DuplicateQuotaException(quota.TenantId, quota.JobType, ex);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UpdateAsync(Quota quota, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(quota.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateQuotaSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", quota.TenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quota.QuotaId);
|
||||
command.Parameters.AddWithValue("job_type", (object?)quota.JobType ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("max_active", quota.MaxActive);
|
||||
command.Parameters.AddWithValue("max_per_hour", quota.MaxPerHour);
|
||||
command.Parameters.AddWithValue("burst_capacity", quota.BurstCapacity);
|
||||
command.Parameters.AddWithValue("refill_rate", quota.RefillRate);
|
||||
command.Parameters.AddWithValue("current_tokens", quota.CurrentTokens);
|
||||
command.Parameters.AddWithValue("last_refill_at", quota.LastRefillAt);
|
||||
command.Parameters.AddWithValue("current_active", quota.CurrentActive);
|
||||
command.Parameters.AddWithValue("current_hour_count", quota.CurrentHourCount);
|
||||
command.Parameters.AddWithValue("current_hour_start", quota.CurrentHourStart);
|
||||
command.Parameters.AddWithValue("paused", quota.Paused);
|
||||
command.Parameters.AddWithValue("pause_reason", (object?)quota.PauseReason ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("quota_ticket", (object?)quota.QuotaTicket ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("updated_at", quota.UpdatedAt);
|
||||
command.Parameters.AddWithValue("updated_by", quota.UpdatedBy);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows == 0)
|
||||
{
|
||||
_logger.LogWarning("Quota not found for update: {QuotaId}", quota.QuotaId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UpdateStateAsync(
|
||||
string tenantId,
|
||||
Guid quotaId,
|
||||
double currentTokens,
|
||||
DateTimeOffset lastRefillAt,
|
||||
int currentActive,
|
||||
int currentHourCount,
|
||||
DateTimeOffset currentHourStart,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateStateSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quotaId);
|
||||
command.Parameters.AddWithValue("current_tokens", currentTokens);
|
||||
command.Parameters.AddWithValue("last_refill_at", lastRefillAt);
|
||||
command.Parameters.AddWithValue("current_active", currentActive);
|
||||
command.Parameters.AddWithValue("current_hour_count", currentHourCount);
|
||||
command.Parameters.AddWithValue("current_hour_start", currentHourStart);
|
||||
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
|
||||
command.Parameters.AddWithValue("updated_by", updatedBy);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task PauseAsync(string tenantId, Guid quotaId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(PauseQuotaSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quotaId);
|
||||
command.Parameters.AddWithValue("pause_reason", reason);
|
||||
command.Parameters.AddWithValue("quota_ticket", (object?)ticket ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
|
||||
command.Parameters.AddWithValue("updated_by", updatedBy);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
OrchestratorMetrics.QuotaPaused(tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task ResumeAsync(string tenantId, Guid quotaId, string updatedBy, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(ResumeQuotaSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quotaId);
|
||||
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
|
||||
command.Parameters.AddWithValue("updated_by", updatedBy);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
OrchestratorMetrics.QuotaResumed(tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task IncrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(IncrementActiveSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quotaId);
|
||||
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task DecrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(DecrementActiveSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quotaId);
|
||||
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Quota>> ListAsync(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
bool? paused,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, jobType, paused, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var quotas = new List<Quota>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
quotas.Add(MapQuota(reader));
|
||||
}
|
||||
return quotas;
|
||||
}
|
||||
|
||||
public async Task<bool> DeleteAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(DeleteQuotaSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("quota_id", quotaId);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
private static void AddQuotaParameters(NpgsqlCommand command, Quota quota)
|
||||
{
|
||||
command.Parameters.AddWithValue("quota_id", quota.QuotaId);
|
||||
command.Parameters.AddWithValue("tenant_id", quota.TenantId);
|
||||
command.Parameters.AddWithValue("job_type", (object?)quota.JobType ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("max_active", quota.MaxActive);
|
||||
command.Parameters.AddWithValue("max_per_hour", quota.MaxPerHour);
|
||||
command.Parameters.AddWithValue("burst_capacity", quota.BurstCapacity);
|
||||
command.Parameters.AddWithValue("refill_rate", quota.RefillRate);
|
||||
command.Parameters.AddWithValue("current_tokens", quota.CurrentTokens);
|
||||
command.Parameters.AddWithValue("last_refill_at", quota.LastRefillAt);
|
||||
command.Parameters.AddWithValue("current_active", quota.CurrentActive);
|
||||
command.Parameters.AddWithValue("current_hour_count", quota.CurrentHourCount);
|
||||
command.Parameters.AddWithValue("current_hour_start", quota.CurrentHourStart);
|
||||
command.Parameters.AddWithValue("paused", quota.Paused);
|
||||
command.Parameters.AddWithValue("pause_reason", (object?)quota.PauseReason ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("quota_ticket", (object?)quota.QuotaTicket ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_at", quota.CreatedAt);
|
||||
command.Parameters.AddWithValue("updated_at", quota.UpdatedAt);
|
||||
command.Parameters.AddWithValue("updated_by", quota.UpdatedBy);
|
||||
}
|
||||
|
||||
private static Quota MapQuota(NpgsqlDataReader reader)
|
||||
{
|
||||
return new Quota(
|
||||
QuotaId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
JobType: reader.IsDBNull(2) ? null : reader.GetString(2),
|
||||
MaxActive: reader.GetInt32(3),
|
||||
MaxPerHour: reader.GetInt32(4),
|
||||
BurstCapacity: reader.GetInt32(5),
|
||||
RefillRate: reader.GetDouble(6),
|
||||
CurrentTokens: reader.GetDouble(7),
|
||||
LastRefillAt: reader.GetFieldValue<DateTimeOffset>(8),
|
||||
CurrentActive: reader.GetInt32(9),
|
||||
CurrentHourCount: reader.GetInt32(10),
|
||||
CurrentHourStart: reader.GetFieldValue<DateTimeOffset>(11),
|
||||
Paused: reader.GetBoolean(12),
|
||||
PauseReason: reader.IsDBNull(13) ? null : reader.GetString(13),
|
||||
QuotaTicket: reader.IsDBNull(14) ? null : reader.GetString(14),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(15),
|
||||
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(16),
|
||||
UpdatedBy: reader.GetString(17));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
bool? paused,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectQuotaColumns} FROM quotas WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (jobType is not null)
|
||||
{
|
||||
sb.Append(" AND job_type = @job_type");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
if (paused.HasValue)
|
||||
{
|
||||
sb.Append(" AND paused = @paused");
|
||||
parameters.Add(("paused", paused.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY job_type NULLS FIRST LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when attempting to create a duplicate quota.
|
||||
/// </summary>
|
||||
public sealed class DuplicateQuotaException : Exception
|
||||
{
|
||||
public string TenantId { get; }
|
||||
public string? JobType { get; }
|
||||
|
||||
public DuplicateQuotaException(string tenantId, string? jobType, Exception innerException)
|
||||
: base($"Quota for tenant '{tenantId}' and job type '{jobType ?? "(all)"}' already exists.", innerException)
|
||||
{
|
||||
TenantId = tenantId;
|
||||
JobType = jobType;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,199 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.DeadLetter;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of replay audit repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresReplayAuditRepository : IReplayAuditRepository
|
||||
{
|
||||
private const string SelectAuditColumns = """
|
||||
audit_id, tenant_id, entry_id, attempt_number,
|
||||
success, new_job_id, error_message,
|
||||
triggered_by, triggered_at, completed_at, initiated_by
|
||||
""";
|
||||
|
||||
private const string SelectByEntrySql = $"""
|
||||
SELECT {SelectAuditColumns}
|
||||
FROM dead_letter_replay_audit
|
||||
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
|
||||
ORDER BY attempt_number ASC
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectAuditColumns}
|
||||
FROM dead_letter_replay_audit
|
||||
WHERE tenant_id = @tenant_id AND audit_id = @audit_id
|
||||
""";
|
||||
|
||||
private const string SelectByNewJobIdSql = $"""
|
||||
SELECT {SelectAuditColumns}
|
||||
FROM dead_letter_replay_audit
|
||||
WHERE tenant_id = @tenant_id AND new_job_id = @new_job_id
|
||||
""";
|
||||
|
||||
private const string InsertAuditSql = """
|
||||
INSERT INTO dead_letter_replay_audit (
|
||||
audit_id, tenant_id, entry_id, attempt_number,
|
||||
success, new_job_id, error_message,
|
||||
triggered_by, triggered_at, completed_at, initiated_by)
|
||||
VALUES (
|
||||
@audit_id, @tenant_id, @entry_id, @attempt_number,
|
||||
@success, @new_job_id, @error_message,
|
||||
@triggered_by, @triggered_at, @completed_at, @initiated_by)
|
||||
""";
|
||||
|
||||
private const string UpdateAuditSql = """
|
||||
UPDATE dead_letter_replay_audit
|
||||
SET success = @success,
|
||||
new_job_id = @new_job_id,
|
||||
error_message = @error_message,
|
||||
completed_at = @completed_at
|
||||
WHERE tenant_id = @tenant_id AND audit_id = @audit_id
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresReplayAuditRepository> _logger;
|
||||
|
||||
public PostgresReplayAuditRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresReplayAuditRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<ReplayAuditRecord>> GetByEntryAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByEntrySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("entry_id", entryId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var records = new List<ReplayAuditRecord>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
records.Add(MapRecord(reader));
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
public async Task<ReplayAuditRecord?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid auditId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("audit_id", auditId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapRecord(reader);
|
||||
}
|
||||
|
||||
public async Task<ReplayAuditRecord?> GetByNewJobIdAsync(
|
||||
string tenantId,
|
||||
Guid newJobId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByNewJobIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("new_job_id", newJobId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapRecord(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(
|
||||
ReplayAuditRecord record,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(record.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertAuditSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddParameters(command, record);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.DeadLetterReplayAttempted(record.TenantId, record.TriggeredBy);
|
||||
}
|
||||
|
||||
public async Task<bool> UpdateAsync(
|
||||
ReplayAuditRecord record,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(record.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateAuditSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", record.TenantId);
|
||||
command.Parameters.AddWithValue("audit_id", record.AuditId);
|
||||
command.Parameters.AddWithValue("success", record.Success);
|
||||
command.Parameters.AddWithValue("new_job_id", (object?)record.NewJobId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("error_message", (object?)record.ErrorMessage ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)record.CompletedAt ?? DBNull.Value);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (rows > 0 && record.Success)
|
||||
{
|
||||
OrchestratorMetrics.DeadLetterReplaySucceeded(record.TenantId);
|
||||
}
|
||||
else if (rows > 0 && !record.Success)
|
||||
{
|
||||
OrchestratorMetrics.DeadLetterReplayFailed(record.TenantId);
|
||||
}
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
private static void AddParameters(NpgsqlCommand command, ReplayAuditRecord record)
|
||||
{
|
||||
command.Parameters.AddWithValue("audit_id", record.AuditId);
|
||||
command.Parameters.AddWithValue("tenant_id", record.TenantId);
|
||||
command.Parameters.AddWithValue("entry_id", record.EntryId);
|
||||
command.Parameters.AddWithValue("attempt_number", record.AttemptNumber);
|
||||
command.Parameters.AddWithValue("success", record.Success);
|
||||
command.Parameters.AddWithValue("new_job_id", (object?)record.NewJobId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("error_message", (object?)record.ErrorMessage ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("triggered_by", record.TriggeredBy);
|
||||
command.Parameters.AddWithValue("triggered_at", record.TriggeredAt);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)record.CompletedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("initiated_by", record.InitiatedBy);
|
||||
}
|
||||
|
||||
private static ReplayAuditRecord MapRecord(NpgsqlDataReader reader) =>
|
||||
new(
|
||||
AuditId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
EntryId: reader.GetGuid(2),
|
||||
AttemptNumber: reader.GetInt32(3),
|
||||
Success: reader.GetBoolean(4),
|
||||
NewJobId: reader.IsDBNull(5) ? null : reader.GetGuid(5),
|
||||
ErrorMessage: reader.IsDBNull(6) ? null : reader.GetString(6),
|
||||
TriggeredBy: reader.GetString(7),
|
||||
TriggeredAt: reader.GetFieldValue<DateTimeOffset>(8),
|
||||
CompletedAt: reader.IsDBNull(9) ? null : reader.GetFieldValue<DateTimeOffset>(9),
|
||||
InitiatedBy: reader.GetString(10));
|
||||
}
|
||||
@@ -0,0 +1,388 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using NpgsqlTypes;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of run repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresRunRepository : IRunRepository
|
||||
{
|
||||
private const string SelectRunColumns = """
|
||||
run_id, tenant_id, project_id, source_id, run_type, status, correlation_id,
|
||||
total_jobs, completed_jobs, succeeded_jobs, failed_jobs, created_at,
|
||||
started_at, completed_at, created_by, metadata
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectRunColumns}
|
||||
FROM runs
|
||||
WHERE tenant_id = @tenant_id AND run_id = @run_id
|
||||
""";
|
||||
|
||||
private const string InsertRunSql = """
|
||||
INSERT INTO runs (
|
||||
run_id, tenant_id, project_id, source_id, run_type, status, correlation_id,
|
||||
total_jobs, completed_jobs, succeeded_jobs, failed_jobs, created_at,
|
||||
started_at, completed_at, created_by, metadata)
|
||||
VALUES (
|
||||
@run_id, @tenant_id, @project_id, @source_id, @run_type, @status::run_status, @correlation_id,
|
||||
@total_jobs, @completed_jobs, @succeeded_jobs, @failed_jobs, @created_at,
|
||||
@started_at, @completed_at, @created_by, @metadata)
|
||||
""";
|
||||
|
||||
private const string UpdateStatusSql = """
|
||||
UPDATE runs
|
||||
SET status = @status::run_status,
|
||||
total_jobs = @total_jobs,
|
||||
completed_jobs = @completed_jobs,
|
||||
succeeded_jobs = @succeeded_jobs,
|
||||
failed_jobs = @failed_jobs,
|
||||
started_at = @started_at,
|
||||
completed_at = @completed_at
|
||||
WHERE tenant_id = @tenant_id AND run_id = @run_id
|
||||
""";
|
||||
|
||||
private const string IncrementJobCountsSql = """
|
||||
UPDATE runs
|
||||
SET completed_jobs = completed_jobs + 1,
|
||||
succeeded_jobs = CASE WHEN @succeeded THEN succeeded_jobs + 1 ELSE succeeded_jobs END,
|
||||
failed_jobs = CASE WHEN NOT @succeeded THEN failed_jobs + 1 ELSE failed_jobs END,
|
||||
started_at = COALESCE(started_at, @now),
|
||||
status = CASE
|
||||
WHEN completed_jobs + 1 >= total_jobs THEN
|
||||
CASE
|
||||
WHEN @succeeded AND (failed_jobs = 0 OR (NOT @succeeded AND failed_jobs + 1 = total_jobs)) THEN 'succeeded'::run_status
|
||||
WHEN NOT @succeeded AND succeeded_jobs = 0 THEN 'failed'::run_status
|
||||
ELSE 'partially_succeeded'::run_status
|
||||
END
|
||||
ELSE 'running'::run_status
|
||||
END,
|
||||
completed_at = CASE WHEN completed_jobs + 1 >= total_jobs THEN @now ELSE completed_at END
|
||||
WHERE tenant_id = @tenant_id AND run_id = @run_id
|
||||
RETURNING status
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresRunRepository> _logger;
|
||||
|
||||
public PostgresRunRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresRunRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<Run?> GetByIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("run_id", runId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapRun(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(Run run, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(run.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertRunSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddRunParameters(command, run);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.RunCreated(run.TenantId, run.RunType);
|
||||
}
|
||||
|
||||
public async Task UpdateStatusAsync(
|
||||
string tenantId,
|
||||
Guid runId,
|
||||
RunStatus status,
|
||||
int totalJobs,
|
||||
int completedJobs,
|
||||
int succeededJobs,
|
||||
int failedJobs,
|
||||
DateTimeOffset? startedAt,
|
||||
DateTimeOffset? completedAt,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateStatusSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("run_id", runId);
|
||||
command.Parameters.AddWithValue("status", StatusToString(status));
|
||||
command.Parameters.AddWithValue("total_jobs", totalJobs);
|
||||
command.Parameters.AddWithValue("completed_jobs", completedJobs);
|
||||
command.Parameters.AddWithValue("succeeded_jobs", succeededJobs);
|
||||
command.Parameters.AddWithValue("failed_jobs", failedJobs);
|
||||
command.Parameters.AddWithValue("started_at", (object?)startedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)completedAt ?? DBNull.Value);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async Task IncrementJobCountsAsync(
|
||||
string tenantId,
|
||||
Guid runId,
|
||||
bool succeeded,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(IncrementJobCountsSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("run_id", runId);
|
||||
command.Parameters.AddWithValue("succeeded", succeeded);
|
||||
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var newStatus = reader.GetString(0);
|
||||
if (newStatus is "succeeded" or "failed" or "partially_succeeded")
|
||||
{
|
||||
// Run completed - get the full run for metrics
|
||||
var run = await GetByIdAsync(tenantId, runId, cancellationToken).ConfigureAwait(false);
|
||||
if (run is not null)
|
||||
{
|
||||
OrchestratorMetrics.RunCompleted(tenantId, run.RunType, newStatus);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Run>> ListAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? runType,
|
||||
RunStatus? status,
|
||||
string? projectId,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, sourceId, runType, status, projectId, createdAfter, createdBefore, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var runs = new List<Run>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
runs.Add(MapRun(reader));
|
||||
}
|
||||
return runs;
|
||||
}
|
||||
|
||||
public async Task<int> CountAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? runType,
|
||||
RunStatus? status,
|
||||
string? projectId,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildCountQuery(tenantId, sourceId, runType, status, projectId);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt32(result);
|
||||
}
|
||||
|
||||
private static void AddRunParameters(NpgsqlCommand command, Run run)
|
||||
{
|
||||
command.Parameters.AddWithValue("run_id", run.RunId);
|
||||
command.Parameters.AddWithValue("tenant_id", run.TenantId);
|
||||
command.Parameters.AddWithValue("project_id", (object?)run.ProjectId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("source_id", run.SourceId);
|
||||
command.Parameters.AddWithValue("run_type", run.RunType);
|
||||
command.Parameters.AddWithValue("status", StatusToString(run.Status));
|
||||
command.Parameters.AddWithValue("correlation_id", (object?)run.CorrelationId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("total_jobs", run.TotalJobs);
|
||||
command.Parameters.AddWithValue("completed_jobs", run.CompletedJobs);
|
||||
command.Parameters.AddWithValue("succeeded_jobs", run.SucceededJobs);
|
||||
command.Parameters.AddWithValue("failed_jobs", run.FailedJobs);
|
||||
command.Parameters.AddWithValue("created_at", run.CreatedAt);
|
||||
command.Parameters.AddWithValue("started_at", (object?)run.StartedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("completed_at", (object?)run.CompletedAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_by", run.CreatedBy);
|
||||
command.Parameters.Add(new NpgsqlParameter("metadata", NpgsqlDbType.Jsonb)
|
||||
{
|
||||
Value = (object?)run.Metadata ?? DBNull.Value
|
||||
});
|
||||
}
|
||||
|
||||
private static Run MapRun(NpgsqlDataReader reader)
|
||||
{
|
||||
return new Run(
|
||||
RunId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
ProjectId: reader.IsDBNull(2) ? null : reader.GetString(2),
|
||||
SourceId: reader.GetGuid(3),
|
||||
RunType: reader.GetString(4),
|
||||
Status: ParseStatus(reader.GetString(5)),
|
||||
CorrelationId: reader.IsDBNull(6) ? null : reader.GetString(6),
|
||||
TotalJobs: reader.GetInt32(7),
|
||||
CompletedJobs: reader.GetInt32(8),
|
||||
SucceededJobs: reader.GetInt32(9),
|
||||
FailedJobs: reader.GetInt32(10),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(11),
|
||||
StartedAt: reader.IsDBNull(12) ? null : reader.GetFieldValue<DateTimeOffset>(12),
|
||||
CompletedAt: reader.IsDBNull(13) ? null : reader.GetFieldValue<DateTimeOffset>(13),
|
||||
CreatedBy: reader.GetString(14),
|
||||
Metadata: reader.IsDBNull(15) ? null : reader.GetString(15));
|
||||
}
|
||||
|
||||
private static string StatusToString(RunStatus status) => status switch
|
||||
{
|
||||
RunStatus.Pending => "pending",
|
||||
RunStatus.Running => "running",
|
||||
RunStatus.Succeeded => "succeeded",
|
||||
RunStatus.PartiallySucceeded => "partially_succeeded",
|
||||
RunStatus.Failed => "failed",
|
||||
RunStatus.Canceled => "canceled",
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(status))
|
||||
};
|
||||
|
||||
private static RunStatus ParseStatus(string status) => status switch
|
||||
{
|
||||
"pending" => RunStatus.Pending,
|
||||
"running" => RunStatus.Running,
|
||||
"succeeded" => RunStatus.Succeeded,
|
||||
"partially_succeeded" => RunStatus.PartiallySucceeded,
|
||||
"failed" => RunStatus.Failed,
|
||||
"canceled" => RunStatus.Canceled,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(status))
|
||||
};
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? runType,
|
||||
RunStatus? status,
|
||||
string? projectId,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectRunColumns} FROM runs WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (sourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", sourceId.Value));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(runType))
|
||||
{
|
||||
sb.Append(" AND run_type = @run_type");
|
||||
parameters.Add(("run_type", runType));
|
||||
}
|
||||
|
||||
if (status.HasValue)
|
||||
{
|
||||
sb.Append(" AND status = @status::run_status");
|
||||
parameters.Add(("status", StatusToString(status.Value)));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(projectId))
|
||||
{
|
||||
sb.Append(" AND project_id = @project_id");
|
||||
parameters.Add(("project_id", projectId));
|
||||
}
|
||||
|
||||
if (createdAfter.HasValue)
|
||||
{
|
||||
sb.Append(" AND created_at >= @created_after");
|
||||
parameters.Add(("created_after", createdAfter.Value));
|
||||
}
|
||||
|
||||
if (createdBefore.HasValue)
|
||||
{
|
||||
sb.Append(" AND created_at < @created_before");
|
||||
parameters.Add(("created_before", createdBefore.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? runType,
|
||||
RunStatus? status,
|
||||
string? projectId)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append("SELECT COUNT(*) FROM runs WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (sourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", sourceId.Value));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(runType))
|
||||
{
|
||||
sb.Append(" AND run_type = @run_type");
|
||||
parameters.Add(("run_type", runType));
|
||||
}
|
||||
|
||||
if (status.HasValue)
|
||||
{
|
||||
sb.Append(" AND status = @status::run_status");
|
||||
parameters.Add(("status", StatusToString(status.Value)));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(projectId))
|
||||
{
|
||||
sb.Append(" AND project_id = @project_id");
|
||||
parameters.Add(("project_id", projectId));
|
||||
}
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,314 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using NpgsqlTypes;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of source repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresSourceRepository : ISourceRepository
|
||||
{
|
||||
private const string SelectSourceColumns = """
|
||||
source_id, tenant_id, name, source_type, enabled, paused, pause_reason,
|
||||
pause_ticket, configuration, created_at, updated_at, updated_by
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectSourceColumns}
|
||||
FROM sources
|
||||
WHERE tenant_id = @tenant_id AND source_id = @source_id
|
||||
""";
|
||||
|
||||
private const string SelectByNameSql = $"""
|
||||
SELECT {SelectSourceColumns}
|
||||
FROM sources
|
||||
WHERE tenant_id = @tenant_id AND name = @name
|
||||
""";
|
||||
|
||||
private const string InsertSourceSql = """
|
||||
INSERT INTO sources (
|
||||
source_id, tenant_id, name, source_type, enabled, paused, pause_reason,
|
||||
pause_ticket, configuration, created_at, updated_at, updated_by)
|
||||
VALUES (
|
||||
@source_id, @tenant_id, @name, @source_type, @enabled, @paused, @pause_reason,
|
||||
@pause_ticket, @configuration, @created_at, @updated_at, @updated_by)
|
||||
""";
|
||||
|
||||
private const string UpdateSourceSql = """
|
||||
UPDATE sources
|
||||
SET name = @name,
|
||||
source_type = @source_type,
|
||||
enabled = @enabled,
|
||||
paused = @paused,
|
||||
pause_reason = @pause_reason,
|
||||
pause_ticket = @pause_ticket,
|
||||
configuration = @configuration,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND source_id = @source_id
|
||||
""";
|
||||
|
||||
private const string PauseSourceSql = """
|
||||
UPDATE sources
|
||||
SET paused = TRUE,
|
||||
pause_reason = @pause_reason,
|
||||
pause_ticket = @pause_ticket,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND source_id = @source_id
|
||||
""";
|
||||
|
||||
private const string ResumeSourceSql = """
|
||||
UPDATE sources
|
||||
SET paused = FALSE,
|
||||
pause_reason = NULL,
|
||||
pause_ticket = NULL,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND source_id = @source_id
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresSourceRepository> _logger;
|
||||
|
||||
public PostgresSourceRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresSourceRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<Source?> GetByIdAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapSource(reader);
|
||||
}
|
||||
|
||||
public async Task<Source?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByNameSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("name", name);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapSource(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(Source source, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(source.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertSourceSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddSourceParameters(command, source);
|
||||
|
||||
try
|
||||
{
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.SourceCreated(source.TenantId, source.SourceType);
|
||||
}
|
||||
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning("Duplicate source name: {Name}", source.Name);
|
||||
throw new DuplicateSourceException(source.Name, ex);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UpdateAsync(Source source, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(source.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateSourceSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", source.TenantId);
|
||||
command.Parameters.AddWithValue("source_id", source.SourceId);
|
||||
command.Parameters.AddWithValue("name", source.Name);
|
||||
command.Parameters.AddWithValue("source_type", source.SourceType);
|
||||
command.Parameters.AddWithValue("enabled", source.Enabled);
|
||||
command.Parameters.AddWithValue("paused", source.Paused);
|
||||
command.Parameters.AddWithValue("pause_reason", (object?)source.PauseReason ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("pause_ticket", (object?)source.PauseTicket ?? DBNull.Value);
|
||||
command.Parameters.Add(new NpgsqlParameter("configuration", NpgsqlDbType.Jsonb)
|
||||
{
|
||||
Value = (object?)source.Configuration ?? DBNull.Value
|
||||
});
|
||||
command.Parameters.AddWithValue("updated_at", source.UpdatedAt);
|
||||
command.Parameters.AddWithValue("updated_by", source.UpdatedBy);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows == 0)
|
||||
{
|
||||
_logger.LogWarning("Source not found for update: {SourceId}", source.SourceId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task PauseAsync(string tenantId, Guid sourceId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(PauseSourceSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
command.Parameters.AddWithValue("pause_reason", reason);
|
||||
command.Parameters.AddWithValue("pause_ticket", (object?)ticket ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
|
||||
command.Parameters.AddWithValue("updated_by", updatedBy);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
OrchestratorMetrics.SourcePaused(tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task ResumeAsync(string tenantId, Guid sourceId, string updatedBy, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(ResumeSourceSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
|
||||
command.Parameters.AddWithValue("updated_by", updatedBy);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
OrchestratorMetrics.SourceResumed(tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Source>> ListAsync(
|
||||
string tenantId,
|
||||
string? sourceType,
|
||||
bool? enabled,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, sourceType, enabled, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var sources = new List<Source>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
sources.Add(MapSource(reader));
|
||||
}
|
||||
return sources;
|
||||
}
|
||||
|
||||
private static void AddSourceParameters(NpgsqlCommand command, Source source)
|
||||
{
|
||||
command.Parameters.AddWithValue("source_id", source.SourceId);
|
||||
command.Parameters.AddWithValue("tenant_id", source.TenantId);
|
||||
command.Parameters.AddWithValue("name", source.Name);
|
||||
command.Parameters.AddWithValue("source_type", source.SourceType);
|
||||
command.Parameters.AddWithValue("enabled", source.Enabled);
|
||||
command.Parameters.AddWithValue("paused", source.Paused);
|
||||
command.Parameters.AddWithValue("pause_reason", (object?)source.PauseReason ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("pause_ticket", (object?)source.PauseTicket ?? DBNull.Value);
|
||||
command.Parameters.Add(new NpgsqlParameter("configuration", NpgsqlDbType.Jsonb)
|
||||
{
|
||||
Value = (object?)source.Configuration ?? DBNull.Value
|
||||
});
|
||||
command.Parameters.AddWithValue("created_at", source.CreatedAt);
|
||||
command.Parameters.AddWithValue("updated_at", source.UpdatedAt);
|
||||
command.Parameters.AddWithValue("updated_by", source.UpdatedBy);
|
||||
}
|
||||
|
||||
private static Source MapSource(NpgsqlDataReader reader)
|
||||
{
|
||||
return new Source(
|
||||
SourceId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
Name: reader.GetString(2),
|
||||
SourceType: reader.GetString(3),
|
||||
Enabled: reader.GetBoolean(4),
|
||||
Paused: reader.GetBoolean(5),
|
||||
PauseReason: reader.IsDBNull(6) ? null : reader.GetString(6),
|
||||
PauseTicket: reader.IsDBNull(7) ? null : reader.GetString(7),
|
||||
Configuration: reader.IsDBNull(8) ? null : reader.GetString(8),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(9),
|
||||
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(10),
|
||||
UpdatedBy: reader.GetString(11));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
string? sourceType,
|
||||
bool? enabled,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectSourceColumns} FROM sources WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (!string.IsNullOrEmpty(sourceType))
|
||||
{
|
||||
sb.Append(" AND source_type = @source_type");
|
||||
parameters.Add(("source_type", sourceType));
|
||||
}
|
||||
|
||||
if (enabled.HasValue)
|
||||
{
|
||||
sb.Append(" AND enabled = @enabled");
|
||||
parameters.Add(("enabled", enabled.Value));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY name LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when attempting to create a source with a duplicate name.
|
||||
/// </summary>
|
||||
public sealed class DuplicateSourceException : Exception
|
||||
{
|
||||
public string Name { get; }
|
||||
|
||||
public DuplicateSourceException(string name, Exception innerException)
|
||||
: base($"Source with name '{name}' already exists.", innerException)
|
||||
{
|
||||
Name = name;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,310 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of throttle repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresThrottleRepository : IThrottleRepository
|
||||
{
|
||||
private const string SelectThrottleColumns = """
|
||||
throttle_id, tenant_id, source_id, job_type, active, reason, ticket,
|
||||
created_at, expires_at, created_by
|
||||
""";
|
||||
|
||||
private const string SelectByIdSql = $"""
|
||||
SELECT {SelectThrottleColumns}
|
||||
FROM throttles
|
||||
WHERE tenant_id = @tenant_id AND throttle_id = @throttle_id
|
||||
""";
|
||||
|
||||
private const string SelectActiveBySourceSql = $"""
|
||||
SELECT {SelectThrottleColumns}
|
||||
FROM throttles
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND source_id = @source_id
|
||||
AND active = TRUE
|
||||
AND (expires_at IS NULL OR expires_at > @now)
|
||||
ORDER BY created_at DESC
|
||||
""";
|
||||
|
||||
private const string SelectActiveByJobTypeSql = $"""
|
||||
SELECT {SelectThrottleColumns}
|
||||
FROM throttles
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND job_type = @job_type
|
||||
AND active = TRUE
|
||||
AND (expires_at IS NULL OR expires_at > @now)
|
||||
ORDER BY created_at DESC
|
||||
""";
|
||||
|
||||
private const string InsertThrottleSql = """
|
||||
INSERT INTO throttles (
|
||||
throttle_id, tenant_id, source_id, job_type, active, reason, ticket,
|
||||
created_at, expires_at, created_by)
|
||||
VALUES (
|
||||
@throttle_id, @tenant_id, @source_id, @job_type, @active, @reason, @ticket,
|
||||
@created_at, @expires_at, @created_by)
|
||||
""";
|
||||
|
||||
private const string DeactivateSql = """
|
||||
UPDATE throttles
|
||||
SET active = FALSE
|
||||
WHERE tenant_id = @tenant_id AND throttle_id = @throttle_id
|
||||
""";
|
||||
|
||||
private const string DeactivateBySourceSql = """
|
||||
UPDATE throttles
|
||||
SET active = FALSE
|
||||
WHERE tenant_id = @tenant_id AND source_id = @source_id AND active = TRUE
|
||||
""";
|
||||
|
||||
private const string DeactivateByJobTypeSql = """
|
||||
UPDATE throttles
|
||||
SET active = FALSE
|
||||
WHERE tenant_id = @tenant_id AND job_type = @job_type AND active = TRUE
|
||||
""";
|
||||
|
||||
private const string CleanupExpiredSql = """
|
||||
UPDATE throttles
|
||||
SET active = FALSE
|
||||
WHERE active = TRUE AND expires_at IS NOT NULL AND expires_at <= @now
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresThrottleRepository> _logger;
|
||||
|
||||
public PostgresThrottleRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresThrottleRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<Throttle?> GetByIdAsync(string tenantId, Guid throttleId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("throttle_id", throttleId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapThrottle(reader);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Throttle>> GetActiveBySourceAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectActiveBySourceSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var throttles = new List<Throttle>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
throttles.Add(MapThrottle(reader));
|
||||
}
|
||||
return throttles;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Throttle>> GetActiveByJobTypeAsync(string tenantId, string jobType, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectActiveByJobTypeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_type", jobType);
|
||||
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var throttles = new List<Throttle>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
throttles.Add(MapThrottle(reader));
|
||||
}
|
||||
return throttles;
|
||||
}
|
||||
|
||||
public async Task CreateAsync(Throttle throttle, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(throttle.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertThrottleSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("throttle_id", throttle.ThrottleId);
|
||||
command.Parameters.AddWithValue("tenant_id", throttle.TenantId);
|
||||
command.Parameters.AddWithValue("source_id", (object?)throttle.SourceId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("job_type", (object?)throttle.JobType ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("active", throttle.Active);
|
||||
command.Parameters.AddWithValue("reason", throttle.Reason);
|
||||
command.Parameters.AddWithValue("ticket", (object?)throttle.Ticket ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_at", throttle.CreatedAt);
|
||||
command.Parameters.AddWithValue("expires_at", (object?)throttle.ExpiresAt ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_by", throttle.CreatedBy);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.ThrottleCreated(throttle.TenantId, throttle.Reason);
|
||||
}
|
||||
|
||||
public async Task DeactivateAsync(string tenantId, Guid throttleId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(DeactivateSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("throttle_id", throttleId);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
OrchestratorMetrics.ThrottleDeactivated(tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task DeactivateBySourceAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(DeactivateBySourceSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
_logger.LogInformation("Deactivated {Count} throttles for source {SourceId}", rows, sourceId);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task DeactivateByJobTypeAsync(string tenantId, string jobType, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(DeactivateByJobTypeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_type", jobType);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
_logger.LogInformation("Deactivated {Count} throttles for job type {JobType}", rows, jobType);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<int> CleanupExpiredAsync(DateTimeOffset now, CancellationToken cancellationToken)
|
||||
{
|
||||
// Use system tenant for cross-tenant cleanup operations
|
||||
// In production, this should use a dedicated admin connection or be run by a background service
|
||||
await using var connection = await _dataSource.OpenConnectionAsync("system", "admin", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(CleanupExpiredSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("now", now);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (rows > 0)
|
||||
{
|
||||
_logger.LogInformation("Cleaned up {Count} expired throttles", rows);
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Throttle>> ListAsync(
|
||||
string tenantId,
|
||||
bool? active,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, active, sourceId, jobType, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var throttles = new List<Throttle>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
throttles.Add(MapThrottle(reader));
|
||||
}
|
||||
return throttles;
|
||||
}
|
||||
|
||||
private static Throttle MapThrottle(NpgsqlDataReader reader)
|
||||
{
|
||||
return new Throttle(
|
||||
ThrottleId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
SourceId: reader.IsDBNull(2) ? null : reader.GetGuid(2),
|
||||
JobType: reader.IsDBNull(3) ? null : reader.GetString(3),
|
||||
Active: reader.GetBoolean(4),
|
||||
Reason: reader.GetString(5),
|
||||
Ticket: reader.IsDBNull(6) ? null : reader.GetString(6),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(7),
|
||||
ExpiresAt: reader.IsDBNull(8) ? null : reader.GetFieldValue<DateTimeOffset>(8),
|
||||
CreatedBy: reader.GetString(9));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
bool? active,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectThrottleColumns} FROM throttles WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (active.HasValue)
|
||||
{
|
||||
sb.Append(" AND active = @active");
|
||||
parameters.Add(("active", active.Value));
|
||||
}
|
||||
|
||||
if (sourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", sourceId.Value));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(jobType))
|
||||
{
|
||||
sb.Append(" AND job_type = @job_type");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,386 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
using StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL implementation of watermark repository.
|
||||
/// </summary>
|
||||
public sealed class PostgresWatermarkRepository : IWatermarkRepository
|
||||
{
|
||||
private const string SelectWatermarkColumns = """
|
||||
watermark_id, tenant_id, source_id, job_type, scope_key,
|
||||
high_watermark, low_watermark, sequence_number, processed_count,
|
||||
last_batch_hash, created_at, updated_at, updated_by
|
||||
""";
|
||||
|
||||
private const string SelectByScopeKeySql = $"""
|
||||
SELECT {SelectWatermarkColumns}
|
||||
FROM watermarks
|
||||
WHERE tenant_id = @tenant_id AND scope_key = @scope_key
|
||||
""";
|
||||
|
||||
private const string SelectBySourceIdSql = $"""
|
||||
SELECT {SelectWatermarkColumns}
|
||||
FROM watermarks
|
||||
WHERE tenant_id = @tenant_id AND source_id = @source_id AND job_type IS NULL
|
||||
""";
|
||||
|
||||
private const string SelectByJobTypeSql = $"""
|
||||
SELECT {SelectWatermarkColumns}
|
||||
FROM watermarks
|
||||
WHERE tenant_id = @tenant_id AND job_type = @job_type AND source_id IS NULL
|
||||
""";
|
||||
|
||||
private const string SelectBySourceAndJobTypeSql = $"""
|
||||
SELECT {SelectWatermarkColumns}
|
||||
FROM watermarks
|
||||
WHERE tenant_id = @tenant_id AND source_id = @source_id AND job_type = @job_type
|
||||
""";
|
||||
|
||||
private const string InsertWatermarkSql = """
|
||||
INSERT INTO watermarks (
|
||||
watermark_id, tenant_id, source_id, job_type, scope_key,
|
||||
high_watermark, low_watermark, sequence_number, processed_count,
|
||||
last_batch_hash, created_at, updated_at, updated_by)
|
||||
VALUES (
|
||||
@watermark_id, @tenant_id, @source_id, @job_type, @scope_key,
|
||||
@high_watermark, @low_watermark, @sequence_number, @processed_count,
|
||||
@last_batch_hash, @created_at, @updated_at, @updated_by)
|
||||
""";
|
||||
|
||||
private const string UpdateWatermarkSql = """
|
||||
UPDATE watermarks
|
||||
SET high_watermark = @high_watermark,
|
||||
low_watermark = @low_watermark,
|
||||
sequence_number = @sequence_number,
|
||||
processed_count = @processed_count,
|
||||
last_batch_hash = @last_batch_hash,
|
||||
updated_at = @updated_at,
|
||||
updated_by = @updated_by
|
||||
WHERE tenant_id = @tenant_id AND watermark_id = @watermark_id
|
||||
AND sequence_number = @expected_sequence_number
|
||||
""";
|
||||
|
||||
private const string UpsertWatermarkSql = """
|
||||
INSERT INTO watermarks (
|
||||
watermark_id, tenant_id, source_id, job_type, scope_key,
|
||||
high_watermark, low_watermark, sequence_number, processed_count,
|
||||
last_batch_hash, created_at, updated_at, updated_by)
|
||||
VALUES (
|
||||
@watermark_id, @tenant_id, @source_id, @job_type, @scope_key,
|
||||
@high_watermark, @low_watermark, @sequence_number, @processed_count,
|
||||
@last_batch_hash, @created_at, @updated_at, @updated_by)
|
||||
ON CONFLICT (tenant_id, scope_key) DO UPDATE
|
||||
SET high_watermark = EXCLUDED.high_watermark,
|
||||
low_watermark = EXCLUDED.low_watermark,
|
||||
sequence_number = EXCLUDED.sequence_number,
|
||||
processed_count = EXCLUDED.processed_count,
|
||||
last_batch_hash = EXCLUDED.last_batch_hash,
|
||||
updated_at = EXCLUDED.updated_at,
|
||||
updated_by = EXCLUDED.updated_by
|
||||
""";
|
||||
|
||||
private const string DeleteWatermarkSql = """
|
||||
DELETE FROM watermarks
|
||||
WHERE tenant_id = @tenant_id AND scope_key = @scope_key
|
||||
""";
|
||||
|
||||
private const string SelectLaggingSql = $"""
|
||||
SELECT {SelectWatermarkColumns}
|
||||
FROM watermarks
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND high_watermark < @lag_threshold
|
||||
ORDER BY high_watermark ASC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
private readonly OrchestratorDataSource _dataSource;
|
||||
private readonly ILogger<PostgresWatermarkRepository> _logger;
|
||||
|
||||
public PostgresWatermarkRepository(
|
||||
OrchestratorDataSource dataSource,
|
||||
ILogger<PostgresWatermarkRepository> logger)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<Watermark?> GetByScopeKeyAsync(string tenantId, string scopeKey, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByScopeKeySql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapWatermark(reader);
|
||||
}
|
||||
|
||||
public async Task<Watermark?> GetBySourceIdAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectBySourceIdSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapWatermark(reader);
|
||||
}
|
||||
|
||||
public async Task<Watermark?> GetByJobTypeAsync(string tenantId, string jobType, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectByJobTypeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("job_type", jobType);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapWatermark(reader);
|
||||
}
|
||||
|
||||
public async Task<Watermark?> GetBySourceAndJobTypeAsync(string tenantId, Guid sourceId, string jobType, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectBySourceAndJobTypeSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("source_id", sourceId);
|
||||
command.Parameters.AddWithValue("job_type", jobType);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return MapWatermark(reader);
|
||||
}
|
||||
|
||||
public async Task CreateAsync(Watermark watermark, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(watermark.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(InsertWatermarkSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddWatermarkParameters(command, watermark);
|
||||
|
||||
try
|
||||
{
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.WatermarkCreated(watermark.TenantId, watermark.ScopeKey);
|
||||
}
|
||||
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning("Duplicate watermark for tenant {TenantId} scope {ScopeKey}", watermark.TenantId, watermark.ScopeKey);
|
||||
throw new DuplicateWatermarkException(watermark.TenantId, watermark.ScopeKey, ex);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<bool> UpdateAsync(Watermark watermark, long expectedSequenceNumber, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(watermark.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpdateWatermarkSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", watermark.TenantId);
|
||||
command.Parameters.AddWithValue("watermark_id", watermark.WatermarkId);
|
||||
command.Parameters.AddWithValue("high_watermark", watermark.HighWatermark);
|
||||
command.Parameters.AddWithValue("low_watermark", (object?)watermark.LowWatermark ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("sequence_number", watermark.SequenceNumber);
|
||||
command.Parameters.AddWithValue("processed_count", watermark.ProcessedCount);
|
||||
command.Parameters.AddWithValue("last_batch_hash", (object?)watermark.LastBatchHash ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("updated_at", watermark.UpdatedAt);
|
||||
command.Parameters.AddWithValue("updated_by", watermark.UpdatedBy);
|
||||
command.Parameters.AddWithValue("expected_sequence_number", expectedSequenceNumber);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (rows > 0)
|
||||
{
|
||||
OrchestratorMetrics.WatermarkAdvanced(watermark.TenantId, watermark.ScopeKey);
|
||||
}
|
||||
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
public async Task UpsertAsync(Watermark watermark, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(watermark.TenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(UpsertWatermarkSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
AddWatermarkParameters(command, watermark);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
OrchestratorMetrics.WatermarkAdvanced(watermark.TenantId, watermark.ScopeKey);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Watermark>> ListAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var (sql, parameters) = BuildListQuery(tenantId, sourceId, jobType, limit, offset);
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(sql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
foreach (var (name, value) in parameters)
|
||||
{
|
||||
command.Parameters.AddWithValue(name, value);
|
||||
}
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var watermarks = new List<Watermark>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
watermarks.Add(MapWatermark(reader));
|
||||
}
|
||||
return watermarks;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Watermark>> GetLaggingAsync(
|
||||
string tenantId,
|
||||
TimeSpan lagThreshold,
|
||||
int limit,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var thresholdTime = DateTimeOffset.UtcNow - lagThreshold;
|
||||
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(SelectLaggingSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("lag_threshold", thresholdTime);
|
||||
command.Parameters.AddWithValue("limit", limit);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
var watermarks = new List<Watermark>();
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
watermarks.Add(MapWatermark(reader));
|
||||
}
|
||||
return watermarks;
|
||||
}
|
||||
|
||||
public async Task<bool> DeleteAsync(string tenantId, string scopeKey, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
|
||||
await using var command = new NpgsqlCommand(DeleteWatermarkSql, connection);
|
||||
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
|
||||
|
||||
command.Parameters.AddWithValue("tenant_id", tenantId);
|
||||
command.Parameters.AddWithValue("scope_key", scopeKey);
|
||||
|
||||
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
return rows > 0;
|
||||
}
|
||||
|
||||
private static void AddWatermarkParameters(NpgsqlCommand command, Watermark watermark)
|
||||
{
|
||||
command.Parameters.AddWithValue("watermark_id", watermark.WatermarkId);
|
||||
command.Parameters.AddWithValue("tenant_id", watermark.TenantId);
|
||||
command.Parameters.AddWithValue("source_id", (object?)watermark.SourceId ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("job_type", (object?)watermark.JobType ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("scope_key", watermark.ScopeKey);
|
||||
command.Parameters.AddWithValue("high_watermark", watermark.HighWatermark);
|
||||
command.Parameters.AddWithValue("low_watermark", (object?)watermark.LowWatermark ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("sequence_number", watermark.SequenceNumber);
|
||||
command.Parameters.AddWithValue("processed_count", watermark.ProcessedCount);
|
||||
command.Parameters.AddWithValue("last_batch_hash", (object?)watermark.LastBatchHash ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("created_at", watermark.CreatedAt);
|
||||
command.Parameters.AddWithValue("updated_at", watermark.UpdatedAt);
|
||||
command.Parameters.AddWithValue("updated_by", watermark.UpdatedBy);
|
||||
}
|
||||
|
||||
private static Watermark MapWatermark(NpgsqlDataReader reader)
|
||||
{
|
||||
return new Watermark(
|
||||
WatermarkId: reader.GetGuid(0),
|
||||
TenantId: reader.GetString(1),
|
||||
SourceId: reader.IsDBNull(2) ? null : reader.GetGuid(2),
|
||||
JobType: reader.IsDBNull(3) ? null : reader.GetString(3),
|
||||
ScopeKey: reader.GetString(4),
|
||||
HighWatermark: reader.GetFieldValue<DateTimeOffset>(5),
|
||||
LowWatermark: reader.IsDBNull(6) ? null : reader.GetFieldValue<DateTimeOffset>(6),
|
||||
SequenceNumber: reader.GetInt64(7),
|
||||
ProcessedCount: reader.GetInt64(8),
|
||||
LastBatchHash: reader.IsDBNull(9) ? null : reader.GetString(9),
|
||||
CreatedAt: reader.GetFieldValue<DateTimeOffset>(10),
|
||||
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(11),
|
||||
UpdatedBy: reader.GetString(12));
|
||||
}
|
||||
|
||||
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.Append($"SELECT {SelectWatermarkColumns} FROM watermarks WHERE tenant_id = @tenant_id");
|
||||
|
||||
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
|
||||
|
||||
if (sourceId.HasValue)
|
||||
{
|
||||
sb.Append(" AND source_id = @source_id");
|
||||
parameters.Add(("source_id", sourceId.Value));
|
||||
}
|
||||
|
||||
if (jobType is not null)
|
||||
{
|
||||
sb.Append(" AND job_type = @job_type");
|
||||
parameters.Add(("job_type", jobType));
|
||||
}
|
||||
|
||||
sb.Append(" ORDER BY updated_at DESC LIMIT @limit OFFSET @offset");
|
||||
parameters.Add(("limit", limit));
|
||||
parameters.Add(("offset", offset));
|
||||
|
||||
return (sb.ToString(), parameters);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when attempting to create a duplicate watermark.
|
||||
/// </summary>
|
||||
public sealed class DuplicateWatermarkException : Exception
|
||||
{
|
||||
public string TenantId { get; }
|
||||
public string ScopeKey { get; }
|
||||
|
||||
public DuplicateWatermarkException(string tenantId, string scopeKey, Exception innerException)
|
||||
: base($"Watermark for tenant '{tenantId}' and scope '{scopeKey}' already exists.", innerException)
|
||||
{
|
||||
TenantId = tenantId;
|
||||
ScopeKey = scopeKey;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for artifact persistence operations.
|
||||
/// </summary>
|
||||
public interface IArtifactRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets an artifact by ID.
|
||||
/// </summary>
|
||||
Task<Artifact?> GetByIdAsync(string tenantId, Guid artifactId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets artifacts by job ID.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Artifact>> GetByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets artifacts by run ID.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Artifact>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an artifact by its content digest.
|
||||
/// </summary>
|
||||
Task<Artifact?> GetByDigestAsync(string tenantId, string digest, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new artifact.
|
||||
/// </summary>
|
||||
Task CreateAsync(Artifact artifact, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates multiple artifacts in a batch.
|
||||
/// </summary>
|
||||
Task CreateBatchAsync(IEnumerable<Artifact> artifacts, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Lists artifacts with pagination and filters.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Artifact>> ListAsync(
|
||||
string tenantId,
|
||||
string? artifactType,
|
||||
string? jobType,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Counts artifacts matching the filters.
|
||||
/// </summary>
|
||||
Task<int> CountAsync(
|
||||
string tenantId,
|
||||
string? artifactType,
|
||||
string? jobType,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for audit log entries.
|
||||
/// </summary>
|
||||
public interface IAuditRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Appends a new audit entry to the log.
|
||||
/// </summary>
|
||||
Task<AuditEntry> AppendAsync(
|
||||
string tenantId,
|
||||
AuditEventType eventType,
|
||||
string resourceType,
|
||||
Guid resourceId,
|
||||
string actorId,
|
||||
ActorType actorType,
|
||||
string description,
|
||||
string? oldState = null,
|
||||
string? newState = null,
|
||||
string? actorIp = null,
|
||||
string? userAgent = null,
|
||||
string? httpMethod = null,
|
||||
string? requestPath = null,
|
||||
string? correlationId = null,
|
||||
string? metadata = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an audit entry by ID.
|
||||
/// </summary>
|
||||
Task<AuditEntry?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid entryId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists audit entries with optional filters.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AuditEntry>> ListAsync(
|
||||
string tenantId,
|
||||
AuditEventType? eventType = null,
|
||||
string? resourceType = null,
|
||||
Guid? resourceId = null,
|
||||
string? actorId = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets audit entries by sequence range.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AuditEntry>> GetBySequenceRangeAsync(
|
||||
string tenantId,
|
||||
long startSequence,
|
||||
long endSequence,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the latest audit entry for a tenant.
|
||||
/// </summary>
|
||||
Task<AuditEntry?> GetLatestAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets audit entries for a specific resource.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AuditEntry>> GetByResourceAsync(
|
||||
string tenantId,
|
||||
string resourceType,
|
||||
Guid resourceId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the count of audit entries.
|
||||
/// </summary>
|
||||
Task<long> GetCountAsync(
|
||||
string tenantId,
|
||||
AuditEventType? eventType = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the chain integrity for a range of entries.
|
||||
/// </summary>
|
||||
Task<ChainVerificationResult> VerifyChainAsync(
|
||||
string tenantId,
|
||||
long? startSequence = null,
|
||||
long? endSequence = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets audit summary statistics.
|
||||
/// </summary>
|
||||
Task<AuditSummary> GetSummaryAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset? since = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of chain verification.
|
||||
/// </summary>
|
||||
public sealed record ChainVerificationResult(
|
||||
bool IsValid,
|
||||
Guid? InvalidEntryId,
|
||||
long? InvalidSequence,
|
||||
string? ErrorMessage);
|
||||
|
||||
/// <summary>
|
||||
/// Audit summary statistics.
|
||||
/// </summary>
|
||||
public sealed record AuditSummary(
|
||||
long TotalEntries,
|
||||
long EntriesSince,
|
||||
long EventTypes,
|
||||
long UniqueActors,
|
||||
long UniqueResources,
|
||||
DateTimeOffset? EarliestEntry,
|
||||
DateTimeOffset? LatestEntry);
|
||||
@@ -0,0 +1,200 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for backfill request persistence operations.
|
||||
/// </summary>
|
||||
public interface IBackfillRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets a backfill request by ID.
|
||||
/// </summary>
|
||||
Task<BackfillRequest?> GetByIdAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new backfill request.
|
||||
/// </summary>
|
||||
Task CreateAsync(BackfillRequest request, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a backfill request.
|
||||
/// </summary>
|
||||
Task UpdateAsync(BackfillRequest request, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Lists backfill requests with filters.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<BackfillRequest>> ListAsync(
|
||||
string tenantId,
|
||||
BackfillStatus? status,
|
||||
Guid? sourceId,
|
||||
string? jobType,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Checks for overlapping active backfills.
|
||||
/// </summary>
|
||||
Task<bool> HasOverlappingActiveAsync(
|
||||
string tenantId,
|
||||
string scopeKey,
|
||||
DateTimeOffset windowStart,
|
||||
DateTimeOffset windowEnd,
|
||||
Guid? excludeBackfillId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets running backfills for a scope.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<BackfillRequest>> GetActiveByScope(
|
||||
string tenantId,
|
||||
string scopeKey,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Counts backfill requests by status.
|
||||
/// </summary>
|
||||
Task<IDictionary<BackfillStatus, int>> CountByStatusAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the next backfill ready for processing.
|
||||
/// </summary>
|
||||
Task<BackfillRequest?> GetNextPendingAsync(string tenantId, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for backfill checkpoint persistence.
|
||||
/// </summary>
|
||||
public interface IBackfillCheckpointRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the latest checkpoint for a backfill.
|
||||
/// </summary>
|
||||
Task<BackfillCheckpoint?> GetLatestAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all checkpoints for a backfill.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<BackfillCheckpoint>> GetAllAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new checkpoint.
|
||||
/// </summary>
|
||||
Task CreateAsync(BackfillCheckpoint checkpoint, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a checkpoint (e.g., mark complete).
|
||||
/// </summary>
|
||||
Task UpdateAsync(BackfillCheckpoint checkpoint, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a backfill processing checkpoint.
|
||||
/// </summary>
|
||||
public sealed record BackfillCheckpoint(
|
||||
/// <summary>Unique checkpoint identifier.</summary>
|
||||
Guid CheckpointId,
|
||||
|
||||
/// <summary>Tenant this checkpoint belongs to.</summary>
|
||||
string TenantId,
|
||||
|
||||
/// <summary>Parent backfill request ID.</summary>
|
||||
Guid BackfillId,
|
||||
|
||||
/// <summary>Batch sequence number.</summary>
|
||||
int BatchNumber,
|
||||
|
||||
/// <summary>Start of batch time window.</summary>
|
||||
DateTimeOffset BatchStart,
|
||||
|
||||
/// <summary>End of batch time window.</summary>
|
||||
DateTimeOffset BatchEnd,
|
||||
|
||||
/// <summary>Total events in batch.</summary>
|
||||
int EventsInBatch,
|
||||
|
||||
/// <summary>Events processed in batch.</summary>
|
||||
int EventsProcessed,
|
||||
|
||||
/// <summary>Events skipped as duplicates.</summary>
|
||||
int EventsSkipped,
|
||||
|
||||
/// <summary>Events that failed processing.</summary>
|
||||
int EventsFailed,
|
||||
|
||||
/// <summary>Hash of the batch for integrity verification.</summary>
|
||||
string? BatchHash,
|
||||
|
||||
/// <summary>When batch processing started.</summary>
|
||||
DateTimeOffset StartedAt,
|
||||
|
||||
/// <summary>When batch processing completed.</summary>
|
||||
DateTimeOffset? CompletedAt,
|
||||
|
||||
/// <summary>Error message if batch failed.</summary>
|
||||
string? ErrorMessage)
|
||||
{
|
||||
/// <summary>
|
||||
/// Whether this checkpoint is complete.
|
||||
/// </summary>
|
||||
public bool IsComplete => CompletedAt.HasValue;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new checkpoint for a batch.
|
||||
/// </summary>
|
||||
public static BackfillCheckpoint Create(
|
||||
string tenantId,
|
||||
Guid backfillId,
|
||||
int batchNumber,
|
||||
DateTimeOffset batchStart,
|
||||
DateTimeOffset batchEnd,
|
||||
int eventsInBatch)
|
||||
{
|
||||
return new BackfillCheckpoint(
|
||||
CheckpointId: Guid.NewGuid(),
|
||||
TenantId: tenantId,
|
||||
BackfillId: backfillId,
|
||||
BatchNumber: batchNumber,
|
||||
BatchStart: batchStart,
|
||||
BatchEnd: batchEnd,
|
||||
EventsInBatch: eventsInBatch,
|
||||
EventsProcessed: 0,
|
||||
EventsSkipped: 0,
|
||||
EventsFailed: 0,
|
||||
BatchHash: null,
|
||||
StartedAt: DateTimeOffset.UtcNow,
|
||||
CompletedAt: null,
|
||||
ErrorMessage: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Marks the checkpoint as complete.
|
||||
/// </summary>
|
||||
public BackfillCheckpoint Complete(int processed, int skipped, int failed, string? batchHash)
|
||||
{
|
||||
return this with
|
||||
{
|
||||
EventsProcessed = processed,
|
||||
EventsSkipped = skipped,
|
||||
EventsFailed = failed,
|
||||
BatchHash = batchHash,
|
||||
CompletedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Marks the checkpoint as failed.
|
||||
/// </summary>
|
||||
public BackfillCheckpoint Fail(string error)
|
||||
{
|
||||
return this with
|
||||
{
|
||||
CompletedAt = DateTimeOffset.UtcNow,
|
||||
ErrorMessage = error
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for DAG edge persistence operations.
|
||||
/// </summary>
|
||||
public interface IDagEdgeRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new DAG edge.
|
||||
/// </summary>
|
||||
Task CreateAsync(DagEdge edge, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates multiple DAG edges in a batch.
|
||||
/// </summary>
|
||||
Task CreateBatchAsync(IEnumerable<DagEdge> edges, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all edges for a run.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DagEdge>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets parent edges (incoming) for a job.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DagEdge>> GetParentEdgesAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets child edges (outgoing) for a job.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DagEdge>> GetChildEdgesAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if all parent dependencies are satisfied for a job.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant ID.</param>
|
||||
/// <param name="jobId">Job to check dependencies for.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if all dependencies are satisfied.</returns>
|
||||
Task<bool> AreDependenciesSatisfiedAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for job history persistence operations.
|
||||
/// </summary>
|
||||
public interface IJobHistoryRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Appends a history entry for a job state change.
|
||||
/// </summary>
|
||||
Task AppendAsync(JobHistory history, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the history for a job.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<JobHistory>> GetByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the latest history entry for a job.
|
||||
/// </summary>
|
||||
Task<JobHistory?> GetLatestByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the next sequence number for a job's history.
|
||||
/// </summary>
|
||||
Task<int> GetNextSequenceNoAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for job persistence operations.
|
||||
/// </summary>
|
||||
public interface IJobRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets a job by ID.
|
||||
/// </summary>
|
||||
Task<Job?> GetByIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a job by idempotency key.
|
||||
/// </summary>
|
||||
Task<Job?> GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new job.
|
||||
/// </summary>
|
||||
Task CreateAsync(Job job, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a job's status and related fields.
|
||||
/// </summary>
|
||||
Task UpdateStatusAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
JobStatus status,
|
||||
int attempt,
|
||||
Guid? leaseId,
|
||||
string? workerId,
|
||||
string? taskRunnerId,
|
||||
DateTimeOffset? leaseUntil,
|
||||
DateTimeOffset? scheduledAt,
|
||||
DateTimeOffset? leasedAt,
|
||||
DateTimeOffset? completedAt,
|
||||
DateTimeOffset? notBefore,
|
||||
string? reason,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Acquires a lease on a pending/scheduled job for worker execution.
|
||||
/// </summary>
|
||||
/// <returns>The leased job, or null if no jobs available.</returns>
|
||||
Task<Job?> LeaseNextAsync(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
Guid leaseId,
|
||||
string workerId,
|
||||
DateTimeOffset leaseUntil,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Extends an existing lease.
|
||||
/// </summary>
|
||||
/// <returns>True if lease was extended, false if lease not found or expired.</returns>
|
||||
Task<bool> ExtendLeaseAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
Guid leaseId,
|
||||
DateTimeOffset newLeaseUntil,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets jobs by run ID.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Job>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets jobs with expired leases.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Job>> GetExpiredLeasesAsync(string tenantId, DateTimeOffset cutoff, int limit, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Lists jobs with pagination and filters.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Job>> ListAsync(
|
||||
string tenantId,
|
||||
JobStatus? status,
|
||||
string? jobType,
|
||||
string? projectId,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Counts jobs matching the filters.
|
||||
/// </summary>
|
||||
Task<int> CountAsync(
|
||||
string tenantId,
|
||||
JobStatus? status,
|
||||
string? jobType,
|
||||
string? projectId,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for run ledger entries.
|
||||
/// </summary>
|
||||
public interface ILedgerRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Appends a new ledger entry from a completed run.
|
||||
/// </summary>
|
||||
Task<RunLedgerEntry> AppendAsync(
|
||||
Run run,
|
||||
IReadOnlyList<Artifact> artifacts,
|
||||
string inputDigest,
|
||||
string? metadata = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a ledger entry by ID.
|
||||
/// </summary>
|
||||
Task<RunLedgerEntry?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid ledgerId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a ledger entry by run ID.
|
||||
/// </summary>
|
||||
Task<RunLedgerEntry?> GetByRunIdAsync(
|
||||
string tenantId,
|
||||
Guid runId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists ledger entries with optional filters.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<RunLedgerEntry>> ListAsync(
|
||||
string tenantId,
|
||||
string? runType = null,
|
||||
Guid? sourceId = null,
|
||||
RunStatus? finalStatus = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets ledger entries by sequence range.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<RunLedgerEntry>> GetBySequenceRangeAsync(
|
||||
string tenantId,
|
||||
long startSequence,
|
||||
long endSequence,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the latest ledger entry for a tenant.
|
||||
/// </summary>
|
||||
Task<RunLedgerEntry?> GetLatestAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets ledger entries for a specific source.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<RunLedgerEntry>> GetBySourceAsync(
|
||||
string tenantId,
|
||||
Guid sourceId,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the count of ledger entries.
|
||||
/// </summary>
|
||||
Task<long> GetCountAsync(
|
||||
string tenantId,
|
||||
string? runType = null,
|
||||
Guid? sourceId = null,
|
||||
DateTimeOffset? startTime = null,
|
||||
DateTimeOffset? endTime = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the chain integrity for a range of entries.
|
||||
/// </summary>
|
||||
Task<ChainVerificationResult> VerifyChainAsync(
|
||||
string tenantId,
|
||||
long? startSequence = null,
|
||||
long? endSequence = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets ledger summary statistics.
|
||||
/// </summary>
|
||||
Task<LedgerSummary> GetSummaryAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset? since = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ledger summary statistics.
|
||||
/// </summary>
|
||||
public sealed record LedgerSummary(
|
||||
long TotalEntries,
|
||||
long EntriesSince,
|
||||
long TotalRuns,
|
||||
long SuccessfulRuns,
|
||||
long FailedRuns,
|
||||
long TotalJobs,
|
||||
long UniqueSources,
|
||||
long UniqueRunTypes,
|
||||
DateTimeOffset? EarliestEntry,
|
||||
DateTimeOffset? LatestEntry);
|
||||
|
||||
/// <summary>
|
||||
/// Repository for ledger exports.
|
||||
/// </summary>
|
||||
public interface ILedgerExportRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new export request.
|
||||
/// </summary>
|
||||
Task<LedgerExport> CreateAsync(
|
||||
LedgerExport export,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an export by ID.
|
||||
/// </summary>
|
||||
Task<LedgerExport?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid exportId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists exports for a tenant.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<LedgerExport>> ListAsync(
|
||||
string tenantId,
|
||||
LedgerExportStatus? status = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates an export.
|
||||
/// </summary>
|
||||
Task<LedgerExport> UpdateAsync(
|
||||
LedgerExport export,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets pending exports.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<LedgerExport>> GetPendingAsync(
|
||||
int limit = 10,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Repository for signed manifests.
|
||||
/// </summary>
|
||||
public interface IManifestRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new manifest.
|
||||
/// </summary>
|
||||
Task<SignedManifest> CreateAsync(
|
||||
SignedManifest manifest,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a manifest by ID.
|
||||
/// </summary>
|
||||
Task<SignedManifest?> GetByIdAsync(
|
||||
string tenantId,
|
||||
Guid manifestId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a manifest by subject.
|
||||
/// </summary>
|
||||
Task<SignedManifest?> GetBySubjectAsync(
|
||||
string tenantId,
|
||||
ProvenanceType provenanceType,
|
||||
Guid subjectId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists manifests for a tenant.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<SignedManifest>> ListAsync(
|
||||
string tenantId,
|
||||
ProvenanceType? provenanceType = null,
|
||||
int limit = 100,
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a manifest by payload digest.
|
||||
/// </summary>
|
||||
Task<SignedManifest?> GetByPayloadDigestAsync(
|
||||
string tenantId,
|
||||
string payloadDigest,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for quota persistence operations.
|
||||
/// </summary>
|
||||
public interface IQuotaRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets a quota by ID.
|
||||
/// </summary>
|
||||
Task<Quota?> GetByIdAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the quota for a tenant and optional job type.
|
||||
/// </summary>
|
||||
Task<Quota?> GetByTenantAndJobTypeAsync(string tenantId, string? jobType, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new quota.
|
||||
/// </summary>
|
||||
Task CreateAsync(Quota quota, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a quota (including token/counter state).
|
||||
/// </summary>
|
||||
Task UpdateAsync(Quota quota, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Pauses a quota with reason.
|
||||
/// </summary>
|
||||
Task PauseAsync(string tenantId, Guid quotaId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Resumes a paused quota.
|
||||
/// </summary>
|
||||
Task ResumeAsync(string tenantId, Guid quotaId, string updatedBy, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Updates the rate limiter state (tokens, counters) without changing configuration.
|
||||
/// </summary>
|
||||
Task UpdateStateAsync(
|
||||
string tenantId,
|
||||
Guid quotaId,
|
||||
double currentTokens,
|
||||
DateTimeOffset lastRefillAt,
|
||||
int currentActive,
|
||||
int currentHourCount,
|
||||
DateTimeOffset currentHourStart,
|
||||
string updatedBy,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Increments the current active count.
|
||||
/// </summary>
|
||||
Task IncrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Decrements the current active count.
|
||||
/// </summary>
|
||||
Task DecrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Lists quotas for a tenant with pagination.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Quota>> ListAsync(
|
||||
string tenantId,
|
||||
string? jobType,
|
||||
bool? paused,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a quota.
|
||||
/// </summary>
|
||||
Task<bool> DeleteAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for run persistence operations.
|
||||
/// </summary>
|
||||
public interface IRunRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets a run by ID.
|
||||
/// </summary>
|
||||
Task<Run?> GetByIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new run.
|
||||
/// </summary>
|
||||
Task CreateAsync(Run run, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Updates run status and job counts.
|
||||
/// </summary>
|
||||
Task UpdateStatusAsync(
|
||||
string tenantId,
|
||||
Guid runId,
|
||||
RunStatus status,
|
||||
int totalJobs,
|
||||
int completedJobs,
|
||||
int succeededJobs,
|
||||
int failedJobs,
|
||||
DateTimeOffset? startedAt,
|
||||
DateTimeOffset? completedAt,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Increments job counters when a job completes.
|
||||
/// </summary>
|
||||
Task IncrementJobCountsAsync(
|
||||
string tenantId,
|
||||
Guid runId,
|
||||
bool succeeded,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Lists runs with pagination and filters.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Run>> ListAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? runType,
|
||||
RunStatus? status,
|
||||
string? projectId,
|
||||
DateTimeOffset? createdAfter,
|
||||
DateTimeOffset? createdBefore,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Counts runs matching the filters.
|
||||
/// </summary>
|
||||
Task<int> CountAsync(
|
||||
string tenantId,
|
||||
Guid? sourceId,
|
||||
string? runType,
|
||||
RunStatus? status,
|
||||
string? projectId,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
using StellaOps.Orchestrator.Core.Domain;
|
||||
|
||||
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for source persistence operations.
|
||||
/// </summary>
|
||||
public interface ISourceRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets a source by ID.
|
||||
/// </summary>
|
||||
Task<Source?> GetByIdAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a source by name.
|
||||
/// </summary>
|
||||
Task<Source?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new source.
|
||||
/// </summary>
|
||||
Task CreateAsync(Source source, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Updates a source.
|
||||
/// </summary>
|
||||
Task UpdateAsync(Source source, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Pauses a source with reason.
|
||||
/// </summary>
|
||||
Task PauseAsync(string tenantId, Guid sourceId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Resumes a paused source.
|
||||
/// </summary>
|
||||
Task ResumeAsync(string tenantId, Guid sourceId, string updatedBy, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Lists sources with pagination.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Source>> ListAsync(
|
||||
string tenantId,
|
||||
string? sourceType,
|
||||
bool? enabled,
|
||||
int limit,
|
||||
int offset,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user