up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-28 20:55:22 +02:00
parent d040c001ac
commit 2548abc56f
231 changed files with 47468 additions and 68 deletions

View File

@@ -0,0 +1,39 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Infrastructure.Postgres.Connections;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Authority.Storage.Postgres;
/// <summary>
/// PostgreSQL data source for the Authority module.
/// Manages connections with tenant context for authentication and authorization data.
/// </summary>
public sealed class AuthorityDataSource : DataSourceBase
{
/// <summary>
/// Default schema name for Authority tables.
/// </summary>
public const string DefaultSchemaName = "auth";
/// <summary>
/// Creates a new Authority data source.
/// </summary>
public AuthorityDataSource(IOptions<PostgresOptions> options, ILogger<AuthorityDataSource> logger)
: base(CreateOptions(options.Value), logger)
{
}
/// <inheritdoc />
protected override string ModuleName => "Authority";
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
{
// Use default schema if not specified
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
{
baseOptions.SchemaName = DefaultSchemaName;
}
return baseOptions;
}
}

View File

@@ -0,0 +1,232 @@
-- Authority Schema Migration 001: Initial Schema
-- Creates the authority schema for IAM, tenants, users, and tokens
-- Create schema
CREATE SCHEMA IF NOT EXISTS authority;
-- Tenants table
CREATE TABLE IF NOT EXISTS authority.tenants (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL UNIQUE,
name TEXT NOT NULL,
display_name TEXT,
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'suspended', 'deleted')),
settings JSONB NOT NULL DEFAULT '{}',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT,
updated_by TEXT
);
CREATE INDEX idx_tenants_status ON authority.tenants(status);
CREATE INDEX idx_tenants_created_at ON authority.tenants(created_at);
-- Users table
CREATE TABLE IF NOT EXISTS authority.users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
username TEXT NOT NULL,
email TEXT,
display_name TEXT,
password_hash TEXT,
password_salt TEXT,
password_algorithm TEXT DEFAULT 'argon2id',
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'inactive', 'locked', 'deleted')),
email_verified BOOLEAN NOT NULL DEFAULT FALSE,
mfa_enabled BOOLEAN NOT NULL DEFAULT FALSE,
mfa_secret TEXT,
failed_login_attempts INT NOT NULL DEFAULT 0,
last_login_at TIMESTAMPTZ,
last_password_change_at TIMESTAMPTZ,
password_expires_at TIMESTAMPTZ,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT,
updated_by TEXT,
UNIQUE(tenant_id, username),
UNIQUE(tenant_id, email)
);
CREATE INDEX idx_users_tenant_id ON authority.users(tenant_id);
CREATE INDEX idx_users_status ON authority.users(tenant_id, status);
CREATE INDEX idx_users_email ON authority.users(tenant_id, email);
-- Roles table
CREATE TABLE IF NOT EXISTS authority.roles (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
name TEXT NOT NULL,
display_name TEXT,
description TEXT,
is_system BOOLEAN NOT NULL DEFAULT FALSE,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_roles_tenant_id ON authority.roles(tenant_id);
-- Permissions table
CREATE TABLE IF NOT EXISTS authority.permissions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
name TEXT NOT NULL,
resource TEXT NOT NULL,
action TEXT NOT NULL,
description TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_permissions_tenant_id ON authority.permissions(tenant_id);
CREATE INDEX idx_permissions_resource ON authority.permissions(tenant_id, resource);
-- Role-Permission assignments
CREATE TABLE IF NOT EXISTS authority.role_permissions (
role_id UUID NOT NULL REFERENCES authority.roles(id) ON DELETE CASCADE,
permission_id UUID NOT NULL REFERENCES authority.permissions(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (role_id, permission_id)
);
-- User-Role assignments
CREATE TABLE IF NOT EXISTS authority.user_roles (
user_id UUID NOT NULL REFERENCES authority.users(id) ON DELETE CASCADE,
role_id UUID NOT NULL REFERENCES authority.roles(id) ON DELETE CASCADE,
granted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
granted_by TEXT,
expires_at TIMESTAMPTZ,
PRIMARY KEY (user_id, role_id)
);
-- API Keys table
CREATE TABLE IF NOT EXISTS authority.api_keys (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
user_id UUID REFERENCES authority.users(id) ON DELETE CASCADE,
name TEXT NOT NULL,
key_hash TEXT NOT NULL,
key_prefix TEXT NOT NULL,
scopes TEXT[] NOT NULL DEFAULT '{}',
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'revoked', 'expired')),
last_used_at TIMESTAMPTZ,
expires_at TIMESTAMPTZ,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
revoked_at TIMESTAMPTZ,
revoked_by TEXT
);
CREATE INDEX idx_api_keys_tenant_id ON authority.api_keys(tenant_id);
CREATE INDEX idx_api_keys_key_prefix ON authority.api_keys(key_prefix);
CREATE INDEX idx_api_keys_user_id ON authority.api_keys(user_id);
CREATE INDEX idx_api_keys_status ON authority.api_keys(tenant_id, status);
-- Tokens table (access tokens)
CREATE TABLE IF NOT EXISTS authority.tokens (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
user_id UUID REFERENCES authority.users(id) ON DELETE CASCADE,
token_hash TEXT NOT NULL UNIQUE,
token_type TEXT NOT NULL DEFAULT 'access' CHECK (token_type IN ('access', 'refresh', 'api')),
scopes TEXT[] NOT NULL DEFAULT '{}',
client_id TEXT,
issued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL,
revoked_at TIMESTAMPTZ,
revoked_by TEXT,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_tokens_tenant_id ON authority.tokens(tenant_id);
CREATE INDEX idx_tokens_user_id ON authority.tokens(user_id);
CREATE INDEX idx_tokens_expires_at ON authority.tokens(expires_at);
CREATE INDEX idx_tokens_token_hash ON authority.tokens(token_hash);
-- Refresh Tokens table
CREATE TABLE IF NOT EXISTS authority.refresh_tokens (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
user_id UUID NOT NULL REFERENCES authority.users(id) ON DELETE CASCADE,
token_hash TEXT NOT NULL UNIQUE,
access_token_id UUID REFERENCES authority.tokens(id) ON DELETE SET NULL,
client_id TEXT,
issued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL,
revoked_at TIMESTAMPTZ,
revoked_by TEXT,
replaced_by UUID,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_refresh_tokens_tenant_id ON authority.refresh_tokens(tenant_id);
CREATE INDEX idx_refresh_tokens_user_id ON authority.refresh_tokens(user_id);
CREATE INDEX idx_refresh_tokens_expires_at ON authority.refresh_tokens(expires_at);
-- Sessions table
CREATE TABLE IF NOT EXISTS authority.sessions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL REFERENCES authority.tenants(tenant_id),
user_id UUID NOT NULL REFERENCES authority.users(id) ON DELETE CASCADE,
session_token_hash TEXT NOT NULL UNIQUE,
ip_address TEXT,
user_agent TEXT,
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_activity_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL,
ended_at TIMESTAMPTZ,
end_reason TEXT,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_sessions_tenant_id ON authority.sessions(tenant_id);
CREATE INDEX idx_sessions_user_id ON authority.sessions(user_id);
CREATE INDEX idx_sessions_expires_at ON authority.sessions(expires_at);
-- Audit log table
CREATE TABLE IF NOT EXISTS authority.audit (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
user_id UUID,
action TEXT NOT NULL,
resource_type TEXT NOT NULL,
resource_id TEXT,
old_value JSONB,
new_value JSONB,
ip_address TEXT,
user_agent TEXT,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_audit_tenant_id ON authority.audit(tenant_id);
CREATE INDEX idx_audit_user_id ON authority.audit(user_id);
CREATE INDEX idx_audit_action ON authority.audit(action);
CREATE INDEX idx_audit_resource ON authority.audit(resource_type, resource_id);
CREATE INDEX idx_audit_created_at ON authority.audit(created_at);
CREATE INDEX idx_audit_correlation_id ON authority.audit(correlation_id);
-- Function to update updated_at timestamp
CREATE OR REPLACE FUNCTION authority.update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Triggers for updated_at
CREATE TRIGGER trg_tenants_updated_at
BEFORE UPDATE ON authority.tenants
FOR EACH ROW EXECUTE FUNCTION authority.update_updated_at();
CREATE TRIGGER trg_users_updated_at
BEFORE UPDATE ON authority.users
FOR EACH ROW EXECUTE FUNCTION authority.update_updated_at();
CREATE TRIGGER trg_roles_updated_at
BEFORE UPDATE ON authority.roles
FOR EACH ROW EXECUTE FUNCTION authority.update_updated_at();

View File

@@ -0,0 +1,62 @@
namespace StellaOps.Authority.Storage.Postgres.Models;
/// <summary>
/// Represents a tenant entity in the auth schema.
/// </summary>
public sealed class TenantEntity
{
/// <summary>
/// Unique tenant identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant slug/key (unique).
/// </summary>
public required string Slug { get; init; }
/// <summary>
/// Display name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Optional description.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Contact email for the tenant.
/// </summary>
public string? ContactEmail { get; init; }
/// <summary>
/// Tenant is enabled.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// Tenant settings as JSON.
/// </summary>
public string Settings { get; init; } = "{}";
/// <summary>
/// Tenant metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
/// <summary>
/// When the tenant was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the tenant was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
/// <summary>
/// User who created the tenant.
/// </summary>
public string? CreatedBy { get; init; }
}

View File

@@ -0,0 +1,112 @@
namespace StellaOps.Authority.Storage.Postgres.Models;
/// <summary>
/// Represents a user entity in the auth schema.
/// </summary>
public sealed class UserEntity
{
/// <summary>
/// Unique user identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this user belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Username (unique per tenant).
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Email address (unique per tenant).
/// </summary>
public required string Email { get; init; }
/// <summary>
/// User's display name.
/// </summary>
public string? DisplayName { get; init; }
/// <summary>
/// Argon2id password hash.
/// </summary>
public string? PasswordHash { get; init; }
/// <summary>
/// Password salt.
/// </summary>
public string? PasswordSalt { get; init; }
/// <summary>
/// User is enabled.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// Email has been verified.
/// </summary>
public bool EmailVerified { get; init; }
/// <summary>
/// MFA is enabled for this user.
/// </summary>
public bool MfaEnabled { get; init; }
/// <summary>
/// MFA secret (encrypted).
/// </summary>
public string? MfaSecret { get; init; }
/// <summary>
/// MFA backup codes (encrypted JSON array).
/// </summary>
public string? MfaBackupCodes { get; init; }
/// <summary>
/// Number of failed login attempts.
/// </summary>
public int FailedLoginAttempts { get; init; }
/// <summary>
/// Account locked until this time.
/// </summary>
public DateTimeOffset? LockedUntil { get; init; }
/// <summary>
/// Last successful login time.
/// </summary>
public DateTimeOffset? LastLoginAt { get; init; }
/// <summary>
/// When the password was last changed.
/// </summary>
public DateTimeOffset? PasswordChangedAt { get; init; }
/// <summary>
/// User settings as JSON.
/// </summary>
public string Settings { get; init; } = "{}";
/// <summary>
/// User metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
/// <summary>
/// When the user was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the user was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
/// <summary>
/// User who created this user.
/// </summary>
public string? CreatedBy { get; init; }
}

View File

@@ -0,0 +1,48 @@
using StellaOps.Authority.Storage.Postgres.Models;
namespace StellaOps.Authority.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for tenant operations.
/// </summary>
public interface ITenantRepository
{
/// <summary>
/// Creates a new tenant.
/// </summary>
Task<TenantEntity> CreateAsync(TenantEntity tenant, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a tenant by ID.
/// </summary>
Task<TenantEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a tenant by slug.
/// </summary>
Task<TenantEntity?> GetBySlugAsync(string slug, CancellationToken cancellationToken = default);
/// <summary>
/// Gets all tenants with optional filtering.
/// </summary>
Task<IReadOnlyList<TenantEntity>> GetAllAsync(
bool? enabled = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates a tenant.
/// </summary>
Task<bool> UpdateAsync(TenantEntity tenant, CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a tenant.
/// </summary>
Task<bool> DeleteAsync(Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a tenant slug exists.
/// </summary>
Task<bool> SlugExistsAsync(string slug, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,76 @@
using StellaOps.Authority.Storage.Postgres.Models;
namespace StellaOps.Authority.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for user operations.
/// </summary>
public interface IUserRepository
{
/// <summary>
/// Creates a new user.
/// </summary>
Task<UserEntity> CreateAsync(UserEntity user, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a user by ID.
/// </summary>
Task<UserEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a user by username.
/// </summary>
Task<UserEntity?> GetByUsernameAsync(string tenantId, string username, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a user by email.
/// </summary>
Task<UserEntity?> GetByEmailAsync(string tenantId, string email, CancellationToken cancellationToken = default);
/// <summary>
/// Gets all users for a tenant with optional filtering.
/// </summary>
Task<IReadOnlyList<UserEntity>> GetAllAsync(
string tenantId,
bool? enabled = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates a user.
/// </summary>
Task<bool> UpdateAsync(UserEntity user, CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a user.
/// </summary>
Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Updates the user's password hash.
/// </summary>
Task<bool> UpdatePasswordAsync(
string tenantId,
Guid userId,
string passwordHash,
string passwordSalt,
CancellationToken cancellationToken = default);
/// <summary>
/// Records a failed login attempt.
/// </summary>
Task<int> RecordFailedLoginAsync(
string tenantId,
Guid userId,
DateTimeOffset? lockUntil = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Records a successful login.
/// </summary>
Task RecordSuccessfulLoginAsync(
string tenantId,
Guid userId,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,194 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Authority.Storage.Postgres.Models;
using StellaOps.Infrastructure.Postgres.Repositories;
namespace StellaOps.Authority.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for tenant operations.
/// </summary>
public sealed class TenantRepository : RepositoryBase<AuthorityDataSource>, ITenantRepository
{
private const string SystemTenantId = "_system";
/// <summary>
/// Creates a new tenant repository.
/// </summary>
public TenantRepository(AuthorityDataSource dataSource, ILogger<TenantRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<TenantEntity> CreateAsync(TenantEntity tenant, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO auth.tenants (id, slug, name, description, contact_email, enabled, settings, metadata, created_by)
VALUES (@id, @slug, @name, @description, @contact_email, @enabled, @settings::jsonb, @metadata::jsonb, @created_by)
RETURNING id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", tenant.Id);
AddParameter(command, "slug", tenant.Slug);
AddParameter(command, "name", tenant.Name);
AddParameter(command, "description", tenant.Description);
AddParameter(command, "contact_email", tenant.ContactEmail);
AddParameter(command, "enabled", tenant.Enabled);
AddJsonbParameter(command, "settings", tenant.Settings);
AddJsonbParameter(command, "metadata", tenant.Metadata);
AddParameter(command, "created_by", tenant.CreatedBy);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapTenant(reader);
}
/// <inheritdoc />
public async Task<TenantEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
FROM auth.tenants
WHERE id = @id
""";
return await QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "id", id),
MapTenant,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<TenantEntity?> GetBySlugAsync(string slug, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
FROM auth.tenants
WHERE slug = @slug
""";
return await QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "slug", slug),
MapTenant,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<TenantEntity>> GetAllAsync(
bool? enabled = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
var sql = """
SELECT id, slug, name, description, contact_email, enabled, settings::text, metadata::text, created_at, updated_at, created_by
FROM auth.tenants
""";
if (enabled.HasValue)
{
sql += " WHERE enabled = @enabled";
}
sql += " ORDER BY name, id LIMIT @limit OFFSET @offset";
return await QueryAsync(
SystemTenantId,
sql,
cmd =>
{
if (enabled.HasValue)
{
AddParameter(cmd, "enabled", enabled.Value);
}
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapTenant,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<bool> UpdateAsync(TenantEntity tenant, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE auth.tenants
SET name = @name,
description = @description,
contact_email = @contact_email,
enabled = @enabled,
settings = @settings::jsonb,
metadata = @metadata::jsonb
WHERE id = @id
""";
var rows = await ExecuteAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "id", tenant.Id);
AddParameter(cmd, "name", tenant.Name);
AddParameter(cmd, "description", tenant.Description);
AddParameter(cmd, "contact_email", tenant.ContactEmail);
AddParameter(cmd, "enabled", tenant.Enabled);
AddJsonbParameter(cmd, "settings", tenant.Settings);
AddJsonbParameter(cmd, "metadata", tenant.Metadata);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(Guid id, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM auth.tenants WHERE id = @id";
var rows = await ExecuteAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "id", id),
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> SlugExistsAsync(string slug, CancellationToken cancellationToken = default)
{
const string sql = "SELECT EXISTS(SELECT 1 FROM auth.tenants WHERE slug = @slug)";
var result = await ExecuteScalarAsync<bool>(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "slug", slug),
cancellationToken).ConfigureAwait(false);
return result;
}
private static TenantEntity MapTenant(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
Slug = reader.GetString(1),
Name = reader.GetString(2),
Description = GetNullableString(reader, 3),
ContactEmail = GetNullableString(reader, 4),
Enabled = reader.GetBoolean(5),
Settings = reader.GetString(6),
Metadata = reader.GetString(7),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(8),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(9),
CreatedBy = GetNullableString(reader, 10)
};
}

View File

@@ -0,0 +1,353 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Authority.Storage.Postgres.Models;
using StellaOps.Infrastructure.Postgres.Repositories;
namespace StellaOps.Authority.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for user operations.
/// </summary>
public sealed class UserRepository : RepositoryBase<AuthorityDataSource>, IUserRepository
{
/// <summary>
/// Creates a new user repository.
/// </summary>
public UserRepository(AuthorityDataSource dataSource, ILogger<UserRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<UserEntity> CreateAsync(UserEntity user, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO auth.users (
id, tenant_id, username, email, display_name, password_hash, password_salt,
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
settings, metadata, created_by
)
VALUES (
@id, @tenant_id, @username, @email, @display_name, @password_hash, @password_salt,
@enabled, @email_verified, @mfa_enabled, @mfa_secret, @mfa_backup_codes,
@settings::jsonb, @metadata::jsonb, @created_by
)
RETURNING id, tenant_id, username, email, display_name, password_hash, password_salt,
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
failed_login_attempts, locked_until, last_login_at, password_changed_at,
settings::text, metadata::text, created_at, updated_at, created_by
""";
await using var connection = await DataSource.OpenConnectionAsync(user.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddUserParameters(command, user);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapUser(reader);
}
/// <inheritdoc />
public async Task<UserEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
failed_login_attempts, locked_until, last_login_at, password_changed_at,
settings::text, metadata::text, created_at, updated_at, created_by
FROM auth.users
WHERE tenant_id = @tenant_id AND id = @id
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
MapUser,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<UserEntity?> GetByUsernameAsync(string tenantId, string username, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
failed_login_attempts, locked_until, last_login_at, password_changed_at,
settings::text, metadata::text, created_at, updated_at, created_by
FROM auth.users
WHERE tenant_id = @tenant_id AND username = @username
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "username", username);
},
MapUser,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<UserEntity?> GetByEmailAsync(string tenantId, string email, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
failed_login_attempts, locked_until, last_login_at, password_changed_at,
settings::text, metadata::text, created_at, updated_at, created_by
FROM auth.users
WHERE tenant_id = @tenant_id AND email = @email
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "email", email);
},
MapUser,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<UserEntity>> GetAllAsync(
string tenantId,
bool? enabled = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
var sql = """
SELECT id, tenant_id, username, email, display_name, password_hash, password_salt,
enabled, email_verified, mfa_enabled, mfa_secret, mfa_backup_codes,
failed_login_attempts, locked_until, last_login_at, password_changed_at,
settings::text, metadata::text, created_at, updated_at, created_by
FROM auth.users
WHERE tenant_id = @tenant_id
""";
if (enabled.HasValue)
{
sql += " AND enabled = @enabled";
}
sql += " ORDER BY username, id LIMIT @limit OFFSET @offset";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
if (enabled.HasValue)
{
AddParameter(cmd, "enabled", enabled.Value);
}
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapUser,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<bool> UpdateAsync(UserEntity user, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE auth.users
SET username = @username,
email = @email,
display_name = @display_name,
enabled = @enabled,
email_verified = @email_verified,
mfa_enabled = @mfa_enabled,
mfa_secret = @mfa_secret,
mfa_backup_codes = @mfa_backup_codes,
settings = @settings::jsonb,
metadata = @metadata::jsonb
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
user.TenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", user.TenantId);
AddParameter(cmd, "id", user.Id);
AddParameter(cmd, "username", user.Username);
AddParameter(cmd, "email", user.Email);
AddParameter(cmd, "display_name", user.DisplayName);
AddParameter(cmd, "enabled", user.Enabled);
AddParameter(cmd, "email_verified", user.EmailVerified);
AddParameter(cmd, "mfa_enabled", user.MfaEnabled);
AddParameter(cmd, "mfa_secret", user.MfaSecret);
AddParameter(cmd, "mfa_backup_codes", user.MfaBackupCodes);
AddJsonbParameter(cmd, "settings", user.Settings);
AddJsonbParameter(cmd, "metadata", user.Metadata);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM auth.users WHERE tenant_id = @tenant_id AND id = @id";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> UpdatePasswordAsync(
string tenantId,
Guid userId,
string passwordHash,
string passwordSalt,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE auth.users
SET password_hash = @password_hash,
password_salt = @password_salt,
password_changed_at = NOW()
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", userId);
AddParameter(cmd, "password_hash", passwordHash);
AddParameter(cmd, "password_salt", passwordSalt);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<int> RecordFailedLoginAsync(
string tenantId,
Guid userId,
DateTimeOffset? lockUntil = null,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE auth.users
SET failed_login_attempts = failed_login_attempts + 1,
locked_until = @locked_until
WHERE tenant_id = @tenant_id AND id = @id
RETURNING failed_login_attempts
""";
var result = await ExecuteScalarAsync<int>(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", userId);
AddParameter(cmd, "locked_until", lockUntil);
},
cancellationToken).ConfigureAwait(false);
return result;
}
/// <inheritdoc />
public async Task RecordSuccessfulLoginAsync(
string tenantId,
Guid userId,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE auth.users
SET failed_login_attempts = 0,
locked_until = NULL,
last_login_at = NOW()
WHERE tenant_id = @tenant_id AND id = @id
""";
await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", userId);
},
cancellationToken).ConfigureAwait(false);
}
private static void AddUserParameters(NpgsqlCommand command, UserEntity user)
{
AddParameter(command, "id", user.Id);
AddParameter(command, "tenant_id", user.TenantId);
AddParameter(command, "username", user.Username);
AddParameter(command, "email", user.Email);
AddParameter(command, "display_name", user.DisplayName);
AddParameter(command, "password_hash", user.PasswordHash);
AddParameter(command, "password_salt", user.PasswordSalt);
AddParameter(command, "enabled", user.Enabled);
AddParameter(command, "email_verified", user.EmailVerified);
AddParameter(command, "mfa_enabled", user.MfaEnabled);
AddParameter(command, "mfa_secret", user.MfaSecret);
AddParameter(command, "mfa_backup_codes", user.MfaBackupCodes);
AddJsonbParameter(command, "settings", user.Settings);
AddJsonbParameter(command, "metadata", user.Metadata);
AddParameter(command, "created_by", user.CreatedBy);
}
private static UserEntity MapUser(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
TenantId = reader.GetString(1),
Username = reader.GetString(2),
Email = reader.GetString(3),
DisplayName = GetNullableString(reader, 4),
PasswordHash = GetNullableString(reader, 5),
PasswordSalt = GetNullableString(reader, 6),
Enabled = reader.GetBoolean(7),
EmailVerified = reader.GetBoolean(8),
MfaEnabled = reader.GetBoolean(9),
MfaSecret = GetNullableString(reader, 10),
MfaBackupCodes = GetNullableString(reader, 11),
FailedLoginAttempts = reader.GetInt32(12),
LockedUntil = GetNullableDateTimeOffset(reader, 13),
LastLoginAt = GetNullableDateTimeOffset(reader, 14),
PasswordChangedAt = GetNullableDateTimeOffset(reader, 15),
Settings = reader.GetString(16),
Metadata = reader.GetString(17),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(18),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(19),
CreatedBy = GetNullableString(reader, 20)
};
}

View File

@@ -0,0 +1,55 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Authority.Storage.Postgres.Repositories;
using StellaOps.Infrastructure.Postgres;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Authority.Storage.Postgres;
/// <summary>
/// Extension methods for configuring Authority PostgreSQL storage services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds Authority PostgreSQL storage services.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configuration">Configuration root.</param>
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddAuthorityPostgresStorage(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = "Postgres:Authority")
{
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
services.AddSingleton<AuthorityDataSource>();
// Register repositories
services.AddScoped<ITenantRepository, TenantRepository>();
services.AddScoped<IUserRepository, UserRepository>();
return services;
}
/// <summary>
/// Adds Authority PostgreSQL storage services with explicit options.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configureOptions">Options configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddAuthorityPostgresStorage(
this IServiceCollection services,
Action<PostgresOptions> configureOptions)
{
services.Configure(configureOptions);
services.AddSingleton<AuthorityDataSource>();
// Register repositories
services.AddScoped<ITenantRepository, TenantRepository>();
services.AddScoped<IUserRepository, UserRepository>();
return services;
}
}

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" ?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Authority.Storage.Postgres</RootNamespace>
</PropertyGroup>
<ItemGroup>
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,50 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Connections;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Concelier.Storage.Postgres;
/// <summary>
/// PostgreSQL data source for the Concelier (vulnerability) module.
/// Manages connections for advisory ingestion, merging, and vulnerability data.
/// </summary>
/// <remarks>
/// The Concelier module stores global vulnerability data that is not tenant-scoped.
/// Advisories and their metadata are shared across all tenants.
/// </remarks>
public sealed class ConcelierDataSource : DataSourceBase
{
/// <summary>
/// Default schema name for Concelier/vulnerability tables.
/// </summary>
public const string DefaultSchemaName = "vuln";
/// <summary>
/// Creates a new Concelier data source.
/// </summary>
public ConcelierDataSource(IOptions<PostgresOptions> options, ILogger<ConcelierDataSource> logger)
: base(CreateOptions(options.Value), logger)
{
}
/// <inheritdoc />
protected override string ModuleName => "Concelier";
/// <inheritdoc />
protected override void ConfigureDataSourceBuilder(NpgsqlDataSourceBuilder builder)
{
base.ConfigureDataSourceBuilder(builder);
// Enable full-text search vector support for advisory searching
}
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
{
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
{
baseOptions.SchemaName = DefaultSchemaName;
}
return baseOptions;
}
}

View File

@@ -0,0 +1,261 @@
-- Vulnerability Schema Migration 001: Initial Schema
-- Creates the vuln schema for advisories and vulnerability data
-- Create schema
CREATE SCHEMA IF NOT EXISTS vuln;
-- Enable pg_trgm for fuzzy text search
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- Sources table (feed sources)
CREATE TABLE IF NOT EXISTS vuln.sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
key TEXT NOT NULL UNIQUE,
name TEXT NOT NULL,
source_type TEXT NOT NULL,
url TEXT,
priority INT NOT NULL DEFAULT 0,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
config JSONB NOT NULL DEFAULT '{}',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_sources_enabled ON vuln.sources(enabled, priority DESC);
-- Feed snapshots table
CREATE TABLE IF NOT EXISTS vuln.feed_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_id UUID NOT NULL REFERENCES vuln.sources(id),
snapshot_id TEXT NOT NULL,
advisory_count INT NOT NULL DEFAULT 0,
checksum TEXT,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(source_id, snapshot_id)
);
CREATE INDEX idx_feed_snapshots_source ON vuln.feed_snapshots(source_id);
CREATE INDEX idx_feed_snapshots_created ON vuln.feed_snapshots(created_at);
-- Advisory snapshots table (point-in-time snapshots)
CREATE TABLE IF NOT EXISTS vuln.advisory_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
feed_snapshot_id UUID NOT NULL REFERENCES vuln.feed_snapshots(id),
advisory_key TEXT NOT NULL,
content_hash TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(feed_snapshot_id, advisory_key)
);
CREATE INDEX idx_advisory_snapshots_feed ON vuln.advisory_snapshots(feed_snapshot_id);
CREATE INDEX idx_advisory_snapshots_key ON vuln.advisory_snapshots(advisory_key);
-- Advisories table (main vulnerability data)
CREATE TABLE IF NOT EXISTS vuln.advisories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_key TEXT NOT NULL UNIQUE,
primary_vuln_id TEXT NOT NULL,
source_id UUID REFERENCES vuln.sources(id),
title TEXT,
summary TEXT,
description TEXT,
severity TEXT CHECK (severity IN ('critical', 'high', 'medium', 'low', 'unknown')),
published_at TIMESTAMPTZ,
modified_at TIMESTAMPTZ,
withdrawn_at TIMESTAMPTZ,
provenance JSONB NOT NULL DEFAULT '{}',
raw_payload JSONB,
search_vector TSVECTOR,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_advisories_vuln_id ON vuln.advisories(primary_vuln_id);
CREATE INDEX idx_advisories_source ON vuln.advisories(source_id);
CREATE INDEX idx_advisories_severity ON vuln.advisories(severity);
CREATE INDEX idx_advisories_published ON vuln.advisories(published_at);
CREATE INDEX idx_advisories_modified ON vuln.advisories(modified_at);
CREATE INDEX idx_advisories_search ON vuln.advisories USING GIN(search_vector);
-- Advisory aliases table (CVE, GHSA, etc.)
CREATE TABLE IF NOT EXISTS vuln.advisory_aliases (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
alias_type TEXT NOT NULL,
alias_value TEXT NOT NULL,
is_primary BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(advisory_id, alias_type, alias_value)
);
CREATE INDEX idx_advisory_aliases_advisory ON vuln.advisory_aliases(advisory_id);
CREATE INDEX idx_advisory_aliases_value ON vuln.advisory_aliases(alias_type, alias_value);
CREATE INDEX idx_advisory_aliases_cve ON vuln.advisory_aliases(alias_value)
WHERE alias_type = 'CVE';
-- Advisory CVSS scores table
CREATE TABLE IF NOT EXISTS vuln.advisory_cvss (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
cvss_version TEXT NOT NULL,
vector_string TEXT NOT NULL,
base_score NUMERIC(3,1) NOT NULL,
base_severity TEXT,
exploitability_score NUMERIC(3,1),
impact_score NUMERIC(3,1),
source TEXT,
is_primary BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(advisory_id, cvss_version, source)
);
CREATE INDEX idx_advisory_cvss_advisory ON vuln.advisory_cvss(advisory_id);
CREATE INDEX idx_advisory_cvss_score ON vuln.advisory_cvss(base_score DESC);
-- Advisory affected packages table
CREATE TABLE IF NOT EXISTS vuln.advisory_affected (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
ecosystem TEXT NOT NULL,
package_name TEXT NOT NULL,
purl TEXT,
version_range JSONB NOT NULL DEFAULT '{}',
versions_affected TEXT[],
versions_fixed TEXT[],
database_specific JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_advisory_affected_advisory ON vuln.advisory_affected(advisory_id);
CREATE INDEX idx_advisory_affected_ecosystem ON vuln.advisory_affected(ecosystem, package_name);
CREATE INDEX idx_advisory_affected_purl ON vuln.advisory_affected(purl);
CREATE INDEX idx_advisory_affected_purl_trgm ON vuln.advisory_affected USING GIN(purl gin_trgm_ops);
-- Advisory references table
CREATE TABLE IF NOT EXISTS vuln.advisory_references (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
ref_type TEXT NOT NULL,
url TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_advisory_references_advisory ON vuln.advisory_references(advisory_id);
-- Advisory credits table
CREATE TABLE IF NOT EXISTS vuln.advisory_credits (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
name TEXT NOT NULL,
contact TEXT,
credit_type TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_advisory_credits_advisory ON vuln.advisory_credits(advisory_id);
-- Advisory weaknesses table (CWE)
CREATE TABLE IF NOT EXISTS vuln.advisory_weaknesses (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
cwe_id TEXT NOT NULL,
description TEXT,
source TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(advisory_id, cwe_id)
);
CREATE INDEX idx_advisory_weaknesses_advisory ON vuln.advisory_weaknesses(advisory_id);
CREATE INDEX idx_advisory_weaknesses_cwe ON vuln.advisory_weaknesses(cwe_id);
-- KEV flags table (Known Exploited Vulnerabilities)
CREATE TABLE IF NOT EXISTS vuln.kev_flags (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
cve_id TEXT NOT NULL,
vendor_project TEXT,
product TEXT,
vulnerability_name TEXT,
date_added DATE NOT NULL,
due_date DATE,
known_ransomware_use BOOLEAN NOT NULL DEFAULT FALSE,
notes TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(advisory_id, cve_id)
);
CREATE INDEX idx_kev_flags_advisory ON vuln.kev_flags(advisory_id);
CREATE INDEX idx_kev_flags_cve ON vuln.kev_flags(cve_id);
CREATE INDEX idx_kev_flags_date ON vuln.kev_flags(date_added);
-- Source states table (cursor tracking)
CREATE TABLE IF NOT EXISTS vuln.source_states (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_id UUID NOT NULL REFERENCES vuln.sources(id) UNIQUE,
cursor TEXT,
last_sync_at TIMESTAMPTZ,
last_success_at TIMESTAMPTZ,
last_error TEXT,
sync_count BIGINT NOT NULL DEFAULT 0,
error_count INT NOT NULL DEFAULT 0,
metadata JSONB NOT NULL DEFAULT '{}',
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_source_states_source ON vuln.source_states(source_id);
-- Merge events table (advisory merge audit)
CREATE TABLE IF NOT EXISTS vuln.merge_events (
id BIGSERIAL PRIMARY KEY,
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id),
source_id UUID REFERENCES vuln.sources(id),
event_type TEXT NOT NULL,
old_value JSONB,
new_value JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_merge_events_advisory ON vuln.merge_events(advisory_id);
CREATE INDEX idx_merge_events_created ON vuln.merge_events(created_at);
-- Function to update search vector
CREATE OR REPLACE FUNCTION vuln.update_advisory_search_vector()
RETURNS TRIGGER AS $$
BEGIN
NEW.search_vector =
setweight(to_tsvector('english', COALESCE(NEW.primary_vuln_id, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'B') ||
setweight(to_tsvector('english', COALESCE(NEW.summary, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(NEW.description, '')), 'D');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Trigger for search vector
CREATE TRIGGER trg_advisories_search_vector
BEFORE INSERT OR UPDATE ON vuln.advisories
FOR EACH ROW EXECUTE FUNCTION vuln.update_advisory_search_vector();
-- Update timestamp function
CREATE OR REPLACE FUNCTION vuln.update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Triggers
CREATE TRIGGER trg_sources_updated_at
BEFORE UPDATE ON vuln.sources
FOR EACH ROW EXECUTE FUNCTION vuln.update_updated_at();
CREATE TRIGGER trg_advisories_updated_at
BEFORE UPDATE ON vuln.advisories
FOR EACH ROW EXECUTE FUNCTION vuln.update_updated_at();
CREATE TRIGGER trg_source_states_updated_at
BEFORE UPDATE ON vuln.source_states
FOR EACH ROW EXECUTE FUNCTION vuln.update_updated_at();

View File

@@ -0,0 +1,82 @@
namespace StellaOps.Concelier.Storage.Postgres.Models;
/// <summary>
/// Represents an advisory entity in the vuln schema.
/// </summary>
public sealed class AdvisoryEntity
{
/// <summary>
/// Unique advisory identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Advisory key (unique identifier, e.g., "ghsa:GHSA-xxxx").
/// </summary>
public required string AdvisoryKey { get; init; }
/// <summary>
/// Primary vulnerability ID (CVE, GHSA, etc.).
/// </summary>
public required string PrimaryVulnId { get; init; }
/// <summary>
/// Source that provided this advisory.
/// </summary>
public Guid? SourceId { get; init; }
/// <summary>
/// Advisory title.
/// </summary>
public string? Title { get; init; }
/// <summary>
/// Brief summary.
/// </summary>
public string? Summary { get; init; }
/// <summary>
/// Full description.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Severity level.
/// </summary>
public string? Severity { get; init; }
/// <summary>
/// When the advisory was published.
/// </summary>
public DateTimeOffset? PublishedAt { get; init; }
/// <summary>
/// When the advisory was last modified.
/// </summary>
public DateTimeOffset? ModifiedAt { get; init; }
/// <summary>
/// When the advisory was withdrawn (if applicable).
/// </summary>
public DateTimeOffset? WithdrawnAt { get; init; }
/// <summary>
/// Provenance information as JSON.
/// </summary>
public string Provenance { get; init; } = "{}";
/// <summary>
/// Raw payload from the source as JSON.
/// </summary>
public string? RawPayload { get; init; }
/// <summary>
/// When the record was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the record was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
}

View File

@@ -0,0 +1,62 @@
namespace StellaOps.Concelier.Storage.Postgres.Models;
/// <summary>
/// Represents a vulnerability feed source entity.
/// </summary>
public sealed class SourceEntity
{
/// <summary>
/// Unique source identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Unique source key (e.g., "nvd", "ghsa", "osv").
/// </summary>
public required string Key { get; init; }
/// <summary>
/// Display name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Source type (e.g., "nvd", "osv", "github").
/// </summary>
public required string SourceType { get; init; }
/// <summary>
/// Source URL.
/// </summary>
public string? Url { get; init; }
/// <summary>
/// Priority for merge precedence (higher = more authoritative).
/// </summary>
public int Priority { get; init; }
/// <summary>
/// Source is enabled.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// Source-specific configuration as JSON.
/// </summary>
public string Config { get; init; } = "{}";
/// <summary>
/// Source metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
/// <summary>
/// When the record was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the record was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
}

View File

@@ -0,0 +1,320 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Concelier.Storage.Postgres.Models;
using StellaOps.Infrastructure.Postgres.Repositories;
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for advisory operations.
/// </summary>
/// <remarks>
/// Advisory data is global (not tenant-scoped) as vulnerability information
/// is shared across all tenants.
/// </remarks>
public sealed class AdvisoryRepository : RepositoryBase<ConcelierDataSource>, IAdvisoryRepository
{
private const string SystemTenantId = "_system";
/// <summary>
/// Creates a new advisory repository.
/// </summary>
public AdvisoryRepository(ConcelierDataSource dataSource, ILogger<AdvisoryRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<AdvisoryEntity> UpsertAsync(AdvisoryEntity advisory, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO vuln.advisories (
id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance, raw_payload
)
VALUES (
@id, @advisory_key, @primary_vuln_id, @source_id, @title, @summary, @description,
@severity, @published_at, @modified_at, @withdrawn_at, @provenance::jsonb, @raw_payload::jsonb
)
ON CONFLICT (advisory_key) DO UPDATE SET
primary_vuln_id = EXCLUDED.primary_vuln_id,
source_id = COALESCE(EXCLUDED.source_id, vuln.advisories.source_id),
title = COALESCE(EXCLUDED.title, vuln.advisories.title),
summary = COALESCE(EXCLUDED.summary, vuln.advisories.summary),
description = COALESCE(EXCLUDED.description, vuln.advisories.description),
severity = COALESCE(EXCLUDED.severity, vuln.advisories.severity),
published_at = COALESCE(EXCLUDED.published_at, vuln.advisories.published_at),
modified_at = COALESCE(EXCLUDED.modified_at, vuln.advisories.modified_at),
withdrawn_at = EXCLUDED.withdrawn_at,
provenance = vuln.advisories.provenance || EXCLUDED.provenance,
raw_payload = EXCLUDED.raw_payload
RETURNING id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", advisory.Id);
AddParameter(command, "advisory_key", advisory.AdvisoryKey);
AddParameter(command, "primary_vuln_id", advisory.PrimaryVulnId);
AddParameter(command, "source_id", advisory.SourceId);
AddParameter(command, "title", advisory.Title);
AddParameter(command, "summary", advisory.Summary);
AddParameter(command, "description", advisory.Description);
AddParameter(command, "severity", advisory.Severity);
AddParameter(command, "published_at", advisory.PublishedAt);
AddParameter(command, "modified_at", advisory.ModifiedAt);
AddParameter(command, "withdrawn_at", advisory.WithdrawnAt);
AddJsonbParameter(command, "provenance", advisory.Provenance);
AddJsonbParameter(command, "raw_payload", advisory.RawPayload);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapAdvisory(reader);
}
/// <inheritdoc />
public async Task<AdvisoryEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at
FROM vuln.advisories
WHERE id = @id
""";
return await QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "id", id),
MapAdvisory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<AdvisoryEntity?> GetByKeyAsync(string advisoryKey, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at
FROM vuln.advisories
WHERE advisory_key = @advisory_key
""";
return await QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "advisory_key", advisoryKey),
MapAdvisory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<AdvisoryEntity?> GetByVulnIdAsync(string vulnId, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at
FROM vuln.advisories
WHERE primary_vuln_id = @vuln_id
""";
return await QuerySingleOrDefaultAsync(
SystemTenantId,
sql,
cmd => AddParameter(cmd, "vuln_id", vulnId),
MapAdvisory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<AdvisoryEntity>> SearchAsync(
string query,
string? severity = null,
int limit = 50,
int offset = 0,
CancellationToken cancellationToken = default)
{
var sql = """
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at,
ts_rank(search_vector, websearch_to_tsquery('english', @query)) as rank
FROM vuln.advisories
WHERE search_vector @@ websearch_to_tsquery('english', @query)
""";
if (!string.IsNullOrEmpty(severity))
{
sql += " AND severity = @severity";
}
sql += " ORDER BY rank DESC, modified_at DESC, id LIMIT @limit OFFSET @offset";
return await QueryAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "query", query);
if (!string.IsNullOrEmpty(severity))
{
AddParameter(cmd, "severity", severity);
}
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapAdvisory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<AdvisoryEntity>> GetBySeverityAsync(
string severity,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at
FROM vuln.advisories
WHERE severity = @severity
ORDER BY modified_at DESC, id
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "severity", severity);
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapAdvisory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<AdvisoryEntity>> GetModifiedSinceAsync(
DateTimeOffset since,
int limit = 1000,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at
FROM vuln.advisories
WHERE modified_at > @since
ORDER BY modified_at, id
LIMIT @limit
""";
return await QueryAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "since", since);
AddParameter(cmd, "limit", limit);
},
MapAdvisory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<AdvisoryEntity>> GetBySourceAsync(
Guid sourceId,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, advisory_key, primary_vuln_id, source_id, title, summary, description,
severity, published_at, modified_at, withdrawn_at, provenance::text, raw_payload::text,
created_at, updated_at
FROM vuln.advisories
WHERE source_id = @source_id
ORDER BY modified_at DESC, id
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
SystemTenantId,
sql,
cmd =>
{
AddParameter(cmd, "source_id", sourceId);
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapAdvisory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<long> CountAsync(CancellationToken cancellationToken = default)
{
const string sql = "SELECT COUNT(*) FROM vuln.advisories";
var result = await ExecuteScalarAsync<long>(
SystemTenantId,
sql,
null,
cancellationToken).ConfigureAwait(false);
return result;
}
/// <inheritdoc />
public async Task<IDictionary<string, long>> CountBySeverityAsync(CancellationToken cancellationToken = default)
{
const string sql = """
SELECT COALESCE(severity, 'unknown') as severity, COUNT(*) as count
FROM vuln.advisories
GROUP BY severity
ORDER BY severity
""";
var results = await QueryAsync(
SystemTenantId,
sql,
null,
reader => (
Severity: reader.GetString(0),
Count: reader.GetInt64(1)
),
cancellationToken).ConfigureAwait(false);
return results.ToDictionary(r => r.Severity, r => r.Count);
}
private static AdvisoryEntity MapAdvisory(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
AdvisoryKey = reader.GetString(1),
PrimaryVulnId = reader.GetString(2),
SourceId = GetNullableGuid(reader, 3),
Title = GetNullableString(reader, 4),
Summary = GetNullableString(reader, 5),
Description = GetNullableString(reader, 6),
Severity = GetNullableString(reader, 7),
PublishedAt = GetNullableDateTimeOffset(reader, 8),
ModifiedAt = GetNullableDateTimeOffset(reader, 9),
WithdrawnAt = GetNullableDateTimeOffset(reader, 10),
Provenance = reader.GetString(11),
RawPayload = GetNullableString(reader, 12),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(13),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(14)
};
}

View File

@@ -0,0 +1,75 @@
using StellaOps.Concelier.Storage.Postgres.Models;
namespace StellaOps.Concelier.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for advisory operations.
/// </summary>
public interface IAdvisoryRepository
{
/// <summary>
/// Creates or updates an advisory (upsert by advisory_key).
/// </summary>
Task<AdvisoryEntity> UpsertAsync(AdvisoryEntity advisory, CancellationToken cancellationToken = default);
/// <summary>
/// Gets an advisory by ID.
/// </summary>
Task<AdvisoryEntity?> GetByIdAsync(Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets an advisory by key.
/// </summary>
Task<AdvisoryEntity?> GetByKeyAsync(string advisoryKey, CancellationToken cancellationToken = default);
/// <summary>
/// Gets an advisory by primary vulnerability ID (CVE, GHSA, etc.).
/// </summary>
Task<AdvisoryEntity?> GetByVulnIdAsync(string vulnId, CancellationToken cancellationToken = default);
/// <summary>
/// Searches advisories by full-text search.
/// </summary>
Task<IReadOnlyList<AdvisoryEntity>> SearchAsync(
string query,
string? severity = null,
int limit = 50,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets advisories by severity.
/// </summary>
Task<IReadOnlyList<AdvisoryEntity>> GetBySeverityAsync(
string severity,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets advisories modified since a given time.
/// </summary>
Task<IReadOnlyList<AdvisoryEntity>> GetModifiedSinceAsync(
DateTimeOffset since,
int limit = 1000,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets advisories by source.
/// </summary>
Task<IReadOnlyList<AdvisoryEntity>> GetBySourceAsync(
Guid sourceId,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Counts total advisories.
/// </summary>
Task<long> CountAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Counts advisories by severity.
/// </summary>
Task<IDictionary<string, long>> CountBySeverityAsync(CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,53 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Concelier.Storage.Postgres.Repositories;
using StellaOps.Infrastructure.Postgres;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Concelier.Storage.Postgres;
/// <summary>
/// Extension methods for configuring Concelier PostgreSQL storage services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds Concelier PostgreSQL storage services.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configuration">Configuration root.</param>
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddConcelierPostgresStorage(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = "Postgres:Concelier")
{
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
services.AddSingleton<ConcelierDataSource>();
// Register repositories
services.AddScoped<IAdvisoryRepository, AdvisoryRepository>();
return services;
}
/// <summary>
/// Adds Concelier PostgreSQL storage services with explicit options.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configureOptions">Options configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddConcelierPostgresStorage(
this IServiceCollection services,
Action<PostgresOptions> configureOptions)
{
services.Configure(configureOptions);
services.AddSingleton<ConcelierDataSource>();
// Register repositories
services.AddScoped<IAdvisoryRepository, AdvisoryRepository>();
return services;
}
}

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" ?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Concelier.Storage.Postgres</RootNamespace>
</PropertyGroup>
<ItemGroup>
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,50 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Connections;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Excititor.Storage.Postgres;
/// <summary>
/// PostgreSQL data source for the Excititor (VEX) module.
/// Manages connections with tenant context for VEX statements and dependency graphs.
/// </summary>
/// <remarks>
/// The Excititor module handles high-volume graph data (nodes/edges) and requires
/// optimized queries for graph traversal and VEX consensus computation.
/// </remarks>
public sealed class ExcititorDataSource : DataSourceBase
{
/// <summary>
/// Default schema name for Excititor/VEX tables.
/// </summary>
public const string DefaultSchemaName = "vex";
/// <summary>
/// Creates a new Excititor data source.
/// </summary>
public ExcititorDataSource(IOptions<PostgresOptions> options, ILogger<ExcititorDataSource> logger)
: base(CreateOptions(options.Value), logger)
{
}
/// <inheritdoc />
protected override string ModuleName => "Excititor";
/// <inheritdoc />
protected override void ConfigureDataSourceBuilder(NpgsqlDataSourceBuilder builder)
{
base.ConfigureDataSourceBuilder(builder);
// Configure for high-throughput graph operations
}
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
{
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
{
baseOptions.SchemaName = DefaultSchemaName;
}
return baseOptions;
}
}

View File

@@ -0,0 +1,324 @@
-- VEX Schema Migration 001: Initial Schema
-- Creates the vex schema for VEX statements and dependency graphs
-- Create schema
CREATE SCHEMA IF NOT EXISTS vex;
-- Projects table
CREATE TABLE IF NOT EXISTS vex.projects (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
display_name TEXT,
description TEXT,
repository_url TEXT,
default_branch TEXT,
settings JSONB NOT NULL DEFAULT '{}',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT,
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_projects_tenant ON vex.projects(tenant_id);
-- Graph revisions table
CREATE TABLE IF NOT EXISTS vex.graph_revisions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES vex.projects(id) ON DELETE CASCADE,
revision_id TEXT NOT NULL UNIQUE,
parent_revision_id TEXT,
sbom_digest TEXT NOT NULL,
feed_snapshot_id TEXT,
policy_version TEXT,
node_count INT NOT NULL DEFAULT 0,
edge_count INT NOT NULL DEFAULT 0,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT
);
CREATE INDEX idx_graph_revisions_project ON vex.graph_revisions(project_id);
CREATE INDEX idx_graph_revisions_revision ON vex.graph_revisions(revision_id);
CREATE INDEX idx_graph_revisions_created ON vex.graph_revisions(project_id, created_at DESC);
-- Graph nodes table (BIGSERIAL for high volume)
CREATE TABLE IF NOT EXISTS vex.graph_nodes (
id BIGSERIAL PRIMARY KEY,
graph_revision_id UUID NOT NULL REFERENCES vex.graph_revisions(id) ON DELETE CASCADE,
node_key TEXT NOT NULL,
node_type TEXT NOT NULL,
purl TEXT,
name TEXT,
version TEXT,
attributes JSONB NOT NULL DEFAULT '{}',
UNIQUE(graph_revision_id, node_key)
);
CREATE INDEX idx_graph_nodes_revision ON vex.graph_nodes(graph_revision_id);
CREATE INDEX idx_graph_nodes_key ON vex.graph_nodes(graph_revision_id, node_key);
CREATE INDEX idx_graph_nodes_purl ON vex.graph_nodes(purl);
CREATE INDEX idx_graph_nodes_type ON vex.graph_nodes(graph_revision_id, node_type);
-- Graph edges table (BIGSERIAL for high volume)
CREATE TABLE IF NOT EXISTS vex.graph_edges (
id BIGSERIAL PRIMARY KEY,
graph_revision_id UUID NOT NULL REFERENCES vex.graph_revisions(id) ON DELETE CASCADE,
from_node_id BIGINT NOT NULL REFERENCES vex.graph_nodes(id) ON DELETE CASCADE,
to_node_id BIGINT NOT NULL REFERENCES vex.graph_nodes(id) ON DELETE CASCADE,
edge_type TEXT NOT NULL,
attributes JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_graph_edges_revision ON vex.graph_edges(graph_revision_id);
CREATE INDEX idx_graph_edges_from ON vex.graph_edges(from_node_id);
CREATE INDEX idx_graph_edges_to ON vex.graph_edges(to_node_id);
-- VEX statements table
CREATE TABLE IF NOT EXISTS vex.statements (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
project_id UUID REFERENCES vex.projects(id),
graph_revision_id UUID REFERENCES vex.graph_revisions(id),
vulnerability_id TEXT NOT NULL,
product_id TEXT,
status TEXT NOT NULL CHECK (status IN (
'not_affected', 'affected', 'fixed', 'under_investigation'
)),
justification TEXT CHECK (justification IN (
'component_not_present', 'vulnerable_code_not_present',
'vulnerable_code_not_in_execute_path', 'vulnerable_code_cannot_be_controlled_by_adversary',
'inline_mitigations_already_exist'
)),
impact_statement TEXT,
action_statement TEXT,
action_statement_timestamp TIMESTAMPTZ,
first_issued TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_updated TIMESTAMPTZ NOT NULL DEFAULT NOW(),
source TEXT,
source_url TEXT,
evidence JSONB NOT NULL DEFAULT '{}',
provenance JSONB NOT NULL DEFAULT '{}',
metadata JSONB NOT NULL DEFAULT '{}',
created_by TEXT
);
CREATE INDEX idx_statements_tenant ON vex.statements(tenant_id);
CREATE INDEX idx_statements_project ON vex.statements(project_id);
CREATE INDEX idx_statements_revision ON vex.statements(graph_revision_id);
CREATE INDEX idx_statements_vuln ON vex.statements(vulnerability_id);
CREATE INDEX idx_statements_status ON vex.statements(tenant_id, status);
-- VEX observations table
CREATE TABLE IF NOT EXISTS vex.observations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
statement_id UUID REFERENCES vex.statements(id) ON DELETE CASCADE,
vulnerability_id TEXT NOT NULL,
product_id TEXT NOT NULL,
observed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
observer TEXT NOT NULL,
observation_type TEXT NOT NULL,
confidence NUMERIC(3,2),
details JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, vulnerability_id, product_id, observer, observation_type)
);
CREATE INDEX idx_observations_tenant ON vex.observations(tenant_id);
CREATE INDEX idx_observations_statement ON vex.observations(statement_id);
CREATE INDEX idx_observations_vuln ON vex.observations(vulnerability_id, product_id);
-- Linksets table
CREATE TABLE IF NOT EXISTS vex.linksets (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
source_type TEXT NOT NULL,
source_url TEXT,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
priority INT NOT NULL DEFAULT 0,
filter JSONB NOT NULL DEFAULT '{}',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_linksets_tenant ON vex.linksets(tenant_id);
CREATE INDEX idx_linksets_enabled ON vex.linksets(tenant_id, enabled, priority DESC);
-- Linkset events table
CREATE TABLE IF NOT EXISTS vex.linkset_events (
id BIGSERIAL PRIMARY KEY,
linkset_id UUID NOT NULL REFERENCES vex.linksets(id) ON DELETE CASCADE,
event_type TEXT NOT NULL,
statement_count INT NOT NULL DEFAULT 0,
error_message TEXT,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_linkset_events_linkset ON vex.linkset_events(linkset_id);
CREATE INDEX idx_linkset_events_created ON vex.linkset_events(created_at);
-- Consensus table (VEX consensus state)
CREATE TABLE IF NOT EXISTS vex.consensus (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
vulnerability_id TEXT NOT NULL,
product_id TEXT NOT NULL,
consensus_status TEXT NOT NULL,
contributing_statements UUID[] NOT NULL DEFAULT '{}',
confidence NUMERIC(3,2),
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
metadata JSONB NOT NULL DEFAULT '{}',
UNIQUE(tenant_id, vulnerability_id, product_id)
);
CREATE INDEX idx_consensus_tenant ON vex.consensus(tenant_id);
CREATE INDEX idx_consensus_vuln ON vex.consensus(vulnerability_id, product_id);
-- Consensus holds table
CREATE TABLE IF NOT EXISTS vex.consensus_holds (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
consensus_id UUID NOT NULL REFERENCES vex.consensus(id) ON DELETE CASCADE,
hold_type TEXT NOT NULL,
reason TEXT NOT NULL,
held_by TEXT NOT NULL,
held_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
released_at TIMESTAMPTZ,
released_by TEXT,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_consensus_holds_consensus ON vex.consensus_holds(consensus_id);
CREATE INDEX idx_consensus_holds_active ON vex.consensus_holds(consensus_id, released_at)
WHERE released_at IS NULL;
-- Unknown snapshots table
CREATE TABLE IF NOT EXISTS vex.unknowns_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
project_id UUID REFERENCES vex.projects(id),
graph_revision_id UUID REFERENCES vex.graph_revisions(id),
snapshot_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
unknown_count INT NOT NULL DEFAULT 0,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_unknowns_snapshots_tenant ON vex.unknowns_snapshots(tenant_id);
CREATE INDEX idx_unknowns_snapshots_project ON vex.unknowns_snapshots(project_id);
-- Unknown items table
CREATE TABLE IF NOT EXISTS vex.unknown_items (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
snapshot_id UUID NOT NULL REFERENCES vex.unknowns_snapshots(id) ON DELETE CASCADE,
vulnerability_id TEXT NOT NULL,
product_id TEXT,
reason TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_unknown_items_snapshot ON vex.unknown_items(snapshot_id);
CREATE INDEX idx_unknown_items_vuln ON vex.unknown_items(vulnerability_id);
-- Evidence manifests table
CREATE TABLE IF NOT EXISTS vex.evidence_manifests (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
statement_id UUID REFERENCES vex.statements(id) ON DELETE CASCADE,
manifest_type TEXT NOT NULL,
content_hash TEXT NOT NULL,
content JSONB NOT NULL,
source TEXT,
collected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_evidence_manifests_tenant ON vex.evidence_manifests(tenant_id);
CREATE INDEX idx_evidence_manifests_statement ON vex.evidence_manifests(statement_id);
-- CVSS receipts table
CREATE TABLE IF NOT EXISTS vex.cvss_receipts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
statement_id UUID REFERENCES vex.statements(id) ON DELETE CASCADE,
vulnerability_id TEXT NOT NULL,
cvss_version TEXT NOT NULL,
vector_string TEXT NOT NULL,
base_score NUMERIC(3,1) NOT NULL,
environmental_score NUMERIC(3,1),
temporal_score NUMERIC(3,1),
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_cvss_receipts_tenant ON vex.cvss_receipts(tenant_id);
CREATE INDEX idx_cvss_receipts_statement ON vex.cvss_receipts(statement_id);
CREATE INDEX idx_cvss_receipts_vuln ON vex.cvss_receipts(vulnerability_id);
-- Attestations table
CREATE TABLE IF NOT EXISTS vex.attestations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
statement_id UUID REFERENCES vex.statements(id),
subject_digest TEXT NOT NULL,
predicate_type TEXT NOT NULL,
predicate JSONB NOT NULL,
signature TEXT,
signature_algorithm TEXT,
signed_by TEXT,
signed_at TIMESTAMPTZ,
verified BOOLEAN NOT NULL DEFAULT FALSE,
verified_at TIMESTAMPTZ,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_attestations_tenant ON vex.attestations(tenant_id);
CREATE INDEX idx_attestations_statement ON vex.attestations(statement_id);
CREATE INDEX idx_attestations_subject ON vex.attestations(subject_digest);
-- Timeline events table
CREATE TABLE IF NOT EXISTS vex.timeline_events (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
project_id UUID REFERENCES vex.projects(id),
statement_id UUID REFERENCES vex.statements(id),
event_type TEXT NOT NULL,
event_data JSONB NOT NULL DEFAULT '{}',
actor TEXT,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_timeline_events_tenant ON vex.timeline_events(tenant_id);
CREATE INDEX idx_timeline_events_project ON vex.timeline_events(project_id);
CREATE INDEX idx_timeline_events_statement ON vex.timeline_events(statement_id);
CREATE INDEX idx_timeline_events_created ON vex.timeline_events(tenant_id, created_at);
CREATE INDEX idx_timeline_events_correlation ON vex.timeline_events(correlation_id);
-- Update timestamp function
CREATE OR REPLACE FUNCTION vex.update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Triggers
CREATE TRIGGER trg_projects_updated_at
BEFORE UPDATE ON vex.projects
FOR EACH ROW EXECUTE FUNCTION vex.update_updated_at();
CREATE TRIGGER trg_linksets_updated_at
BEFORE UPDATE ON vex.linksets
FOR EACH ROW EXECUTE FUNCTION vex.update_updated_at();
CREATE TRIGGER trg_statements_updated_at
BEFORE UPDATE ON vex.statements
FOR EACH ROW EXECUTE FUNCTION vex.update_updated_at();

View File

@@ -0,0 +1,67 @@
namespace StellaOps.Excititor.Storage.Postgres.Models;
/// <summary>
/// Represents a project entity in the vex schema.
/// </summary>
public sealed class ProjectEntity
{
/// <summary>
/// Unique project identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this project belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Project name (unique per tenant).
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Display name.
/// </summary>
public string? DisplayName { get; init; }
/// <summary>
/// Project description.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Repository URL.
/// </summary>
public string? RepositoryUrl { get; init; }
/// <summary>
/// Default branch name.
/// </summary>
public string? DefaultBranch { get; init; }
/// <summary>
/// Project settings as JSON.
/// </summary>
public string Settings { get; init; } = "{}";
/// <summary>
/// Project metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
/// <summary>
/// When the project was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the project was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
/// <summary>
/// User who created the project.
/// </summary>
public string? CreatedBy { get; init; }
}

View File

@@ -0,0 +1,134 @@
namespace StellaOps.Excititor.Storage.Postgres.Models;
/// <summary>
/// VEX status values per OpenVEX specification.
/// </summary>
public enum VexStatus
{
/// <summary>Product is not affected by the vulnerability.</summary>
NotAffected,
/// <summary>Product is affected by the vulnerability.</summary>
Affected,
/// <summary>Vulnerability is fixed in this product version.</summary>
Fixed,
/// <summary>Vulnerability is under investigation.</summary>
UnderInvestigation
}
/// <summary>
/// VEX justification codes per OpenVEX specification.
/// </summary>
public enum VexJustification
{
/// <summary>The vulnerable component is not present.</summary>
ComponentNotPresent,
/// <summary>The vulnerable code is not present.</summary>
VulnerableCodeNotPresent,
/// <summary>The vulnerable code is not in execute path.</summary>
VulnerableCodeNotInExecutePath,
/// <summary>The vulnerable code cannot be controlled by adversary.</summary>
VulnerableCodeCannotBeControlledByAdversary,
/// <summary>Inline mitigations already exist.</summary>
InlineMitigationsAlreadyExist
}
/// <summary>
/// Represents a VEX statement entity in the vex schema.
/// </summary>
public sealed class VexStatementEntity
{
/// <summary>
/// Unique statement identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this statement belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Project this statement applies to.
/// </summary>
public Guid? ProjectId { get; init; }
/// <summary>
/// Graph revision this statement is associated with.
/// </summary>
public Guid? GraphRevisionId { get; init; }
/// <summary>
/// Vulnerability ID (CVE, GHSA, etc.).
/// </summary>
public required string VulnerabilityId { get; init; }
/// <summary>
/// Product identifier (PURL or product key).
/// </summary>
public string? ProductId { get; init; }
/// <summary>
/// VEX status.
/// </summary>
public required VexStatus Status { get; init; }
/// <summary>
/// Justification for not_affected status.
/// </summary>
public VexJustification? Justification { get; init; }
/// <summary>
/// Impact statement describing effects.
/// </summary>
public string? ImpactStatement { get; init; }
/// <summary>
/// Action statement describing remediation.
/// </summary>
public string? ActionStatement { get; init; }
/// <summary>
/// When action statement was issued.
/// </summary>
public DateTimeOffset? ActionStatementTimestamp { get; init; }
/// <summary>
/// When statement was first issued.
/// </summary>
public DateTimeOffset FirstIssued { get; init; }
/// <summary>
/// When statement was last updated.
/// </summary>
public DateTimeOffset LastUpdated { get; init; }
/// <summary>
/// Source of the statement.
/// </summary>
public string? Source { get; init; }
/// <summary>
/// URL to source document.
/// </summary>
public string? SourceUrl { get; init; }
/// <summary>
/// Evidence supporting the statement as JSON.
/// </summary>
public string Evidence { get; init; } = "{}";
/// <summary>
/// Provenance information as JSON.
/// </summary>
public string Provenance { get; init; } = "{}";
/// <summary>
/// Statement metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
/// <summary>
/// User who created the statement.
/// </summary>
public string? CreatedBy { get; init; }
}

View File

@@ -0,0 +1,75 @@
using StellaOps.Excititor.Storage.Postgres.Models;
namespace StellaOps.Excititor.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for VEX statement operations.
/// </summary>
public interface IVexStatementRepository
{
/// <summary>
/// Creates a new VEX statement.
/// </summary>
Task<VexStatementEntity> CreateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a VEX statement by ID.
/// </summary>
Task<VexStatementEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets VEX statements for a vulnerability.
/// </summary>
Task<IReadOnlyList<VexStatementEntity>> GetByVulnerabilityAsync(
string tenantId,
string vulnerabilityId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets VEX statements for a product.
/// </summary>
Task<IReadOnlyList<VexStatementEntity>> GetByProductAsync(
string tenantId,
string productId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets VEX statements for a project.
/// </summary>
Task<IReadOnlyList<VexStatementEntity>> GetByProjectAsync(
string tenantId,
Guid projectId,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets VEX statements by status.
/// </summary>
Task<IReadOnlyList<VexStatementEntity>> GetByStatusAsync(
string tenantId,
VexStatus status,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates a VEX statement.
/// </summary>
Task<bool> UpdateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a VEX statement.
/// </summary>
Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets the effective VEX status for a vulnerability/product combination.
/// Applies lattice logic for status precedence.
/// </summary>
Task<VexStatementEntity?> GetEffectiveStatementAsync(
string tenantId,
string vulnerabilityId,
string productId,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,385 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Excititor.Storage.Postgres.Models;
using StellaOps.Infrastructure.Postgres.Repositories;
namespace StellaOps.Excititor.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for VEX statement operations.
/// </summary>
public sealed class VexStatementRepository : RepositoryBase<ExcititorDataSource>, IVexStatementRepository
{
/// <summary>
/// Creates a new VEX statement repository.
/// </summary>
public VexStatementRepository(ExcititorDataSource dataSource, ILogger<VexStatementRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<VexStatementEntity> CreateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO vex.statements (
id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
source, source_url, evidence, provenance, metadata, created_by
)
VALUES (
@id, @tenant_id, @project_id, @graph_revision_id, @vulnerability_id, @product_id,
@status, @justification, @impact_statement, @action_statement, @action_statement_timestamp,
@source, @source_url, @evidence::jsonb, @provenance::jsonb, @metadata::jsonb, @created_by
)
RETURNING id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
first_issued, last_updated, source, source_url,
evidence::text, provenance::text, metadata::text, created_by
""";
await using var connection = await DataSource.OpenConnectionAsync(statement.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddStatementParameters(command, statement);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapStatement(reader);
}
/// <inheritdoc />
public async Task<VexStatementEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
first_issued, last_updated, source, source_url,
evidence::text, provenance::text, metadata::text, created_by
FROM vex.statements
WHERE tenant_id = @tenant_id AND id = @id
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
MapStatement,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<VexStatementEntity>> GetByVulnerabilityAsync(
string tenantId,
string vulnerabilityId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
first_issued, last_updated, source, source_url,
evidence::text, provenance::text, metadata::text, created_by
FROM vex.statements
WHERE tenant_id = @tenant_id AND vulnerability_id = @vulnerability_id
ORDER BY last_updated DESC, id
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "vulnerability_id", vulnerabilityId);
},
MapStatement,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<VexStatementEntity>> GetByProductAsync(
string tenantId,
string productId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
first_issued, last_updated, source, source_url,
evidence::text, provenance::text, metadata::text, created_by
FROM vex.statements
WHERE tenant_id = @tenant_id AND product_id = @product_id
ORDER BY last_updated DESC, id
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "product_id", productId);
},
MapStatement,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<VexStatementEntity>> GetByProjectAsync(
string tenantId,
Guid projectId,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
first_issued, last_updated, source, source_url,
evidence::text, provenance::text, metadata::text, created_by
FROM vex.statements
WHERE tenant_id = @tenant_id AND project_id = @project_id
ORDER BY last_updated DESC, id
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "project_id", projectId);
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapStatement,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<VexStatementEntity>> GetByStatusAsync(
string tenantId,
VexStatus status,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
first_issued, last_updated, source, source_url,
evidence::text, provenance::text, metadata::text, created_by
FROM vex.statements
WHERE tenant_id = @tenant_id AND status = @status
ORDER BY last_updated DESC, id
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "status", StatusToString(status));
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapStatement,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<bool> UpdateAsync(VexStatementEntity statement, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE vex.statements
SET status = @status,
justification = @justification,
impact_statement = @impact_statement,
action_statement = @action_statement,
action_statement_timestamp = @action_statement_timestamp,
source = @source,
source_url = @source_url,
evidence = @evidence::jsonb,
provenance = @provenance::jsonb,
metadata = @metadata::jsonb
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
statement.TenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", statement.TenantId);
AddParameter(cmd, "id", statement.Id);
AddParameter(cmd, "status", StatusToString(statement.Status));
AddParameter(cmd, "justification", statement.Justification.HasValue
? JustificationToString(statement.Justification.Value)
: null);
AddParameter(cmd, "impact_statement", statement.ImpactStatement);
AddParameter(cmd, "action_statement", statement.ActionStatement);
AddParameter(cmd, "action_statement_timestamp", statement.ActionStatementTimestamp);
AddParameter(cmd, "source", statement.Source);
AddParameter(cmd, "source_url", statement.SourceUrl);
AddJsonbParameter(cmd, "evidence", statement.Evidence);
AddJsonbParameter(cmd, "provenance", statement.Provenance);
AddJsonbParameter(cmd, "metadata", statement.Metadata);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM vex.statements WHERE tenant_id = @tenant_id AND id = @id";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<VexStatementEntity?> GetEffectiveStatementAsync(
string tenantId,
string vulnerabilityId,
string productId,
CancellationToken cancellationToken = default)
{
// VEX lattice precedence: fixed > not_affected > affected > under_investigation
const string sql = """
SELECT id, tenant_id, project_id, graph_revision_id, vulnerability_id, product_id,
status, justification, impact_statement, action_statement, action_statement_timestamp,
first_issued, last_updated, source, source_url,
evidence::text, provenance::text, metadata::text, created_by
FROM vex.statements
WHERE tenant_id = @tenant_id
AND vulnerability_id = @vulnerability_id
AND product_id = @product_id
ORDER BY
CASE status
WHEN 'fixed' THEN 1
WHEN 'not_affected' THEN 2
WHEN 'affected' THEN 3
WHEN 'under_investigation' THEN 4
END,
last_updated DESC
LIMIT 1
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "vulnerability_id", vulnerabilityId);
AddParameter(cmd, "product_id", productId);
},
MapStatement,
cancellationToken).ConfigureAwait(false);
}
private static void AddStatementParameters(NpgsqlCommand command, VexStatementEntity statement)
{
AddParameter(command, "id", statement.Id);
AddParameter(command, "tenant_id", statement.TenantId);
AddParameter(command, "project_id", statement.ProjectId);
AddParameter(command, "graph_revision_id", statement.GraphRevisionId);
AddParameter(command, "vulnerability_id", statement.VulnerabilityId);
AddParameter(command, "product_id", statement.ProductId);
AddParameter(command, "status", StatusToString(statement.Status));
AddParameter(command, "justification", statement.Justification.HasValue
? JustificationToString(statement.Justification.Value)
: null);
AddParameter(command, "impact_statement", statement.ImpactStatement);
AddParameter(command, "action_statement", statement.ActionStatement);
AddParameter(command, "action_statement_timestamp", statement.ActionStatementTimestamp);
AddParameter(command, "source", statement.Source);
AddParameter(command, "source_url", statement.SourceUrl);
AddJsonbParameter(command, "evidence", statement.Evidence);
AddJsonbParameter(command, "provenance", statement.Provenance);
AddJsonbParameter(command, "metadata", statement.Metadata);
AddParameter(command, "created_by", statement.CreatedBy);
}
private static VexStatementEntity MapStatement(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
TenantId = reader.GetString(1),
ProjectId = GetNullableGuid(reader, 2),
GraphRevisionId = GetNullableGuid(reader, 3),
VulnerabilityId = reader.GetString(4),
ProductId = GetNullableString(reader, 5),
Status = ParseStatus(reader.GetString(6)),
Justification = ParseJustification(GetNullableString(reader, 7)),
ImpactStatement = GetNullableString(reader, 8),
ActionStatement = GetNullableString(reader, 9),
ActionStatementTimestamp = GetNullableDateTimeOffset(reader, 10),
FirstIssued = reader.GetFieldValue<DateTimeOffset>(11),
LastUpdated = reader.GetFieldValue<DateTimeOffset>(12),
Source = GetNullableString(reader, 13),
SourceUrl = GetNullableString(reader, 14),
Evidence = reader.GetString(15),
Provenance = reader.GetString(16),
Metadata = reader.GetString(17),
CreatedBy = GetNullableString(reader, 18)
};
private static string StatusToString(VexStatus status) => status switch
{
VexStatus.NotAffected => "not_affected",
VexStatus.Affected => "affected",
VexStatus.Fixed => "fixed",
VexStatus.UnderInvestigation => "under_investigation",
_ => throw new ArgumentException($"Unknown VEX status: {status}", nameof(status))
};
private static VexStatus ParseStatus(string status) => status switch
{
"not_affected" => VexStatus.NotAffected,
"affected" => VexStatus.Affected,
"fixed" => VexStatus.Fixed,
"under_investigation" => VexStatus.UnderInvestigation,
_ => throw new ArgumentException($"Unknown VEX status: {status}", nameof(status))
};
private static string JustificationToString(VexJustification justification) => justification switch
{
VexJustification.ComponentNotPresent => "component_not_present",
VexJustification.VulnerableCodeNotPresent => "vulnerable_code_not_present",
VexJustification.VulnerableCodeNotInExecutePath => "vulnerable_code_not_in_execute_path",
VexJustification.VulnerableCodeCannotBeControlledByAdversary => "vulnerable_code_cannot_be_controlled_by_adversary",
VexJustification.InlineMitigationsAlreadyExist => "inline_mitigations_already_exist",
_ => throw new ArgumentException($"Unknown VEX justification: {justification}", nameof(justification))
};
private static VexJustification? ParseJustification(string? justification) => justification switch
{
null => null,
"component_not_present" => VexJustification.ComponentNotPresent,
"vulnerable_code_not_present" => VexJustification.VulnerableCodeNotPresent,
"vulnerable_code_not_in_execute_path" => VexJustification.VulnerableCodeNotInExecutePath,
"vulnerable_code_cannot_be_controlled_by_adversary" => VexJustification.VulnerableCodeCannotBeControlledByAdversary,
"inline_mitigations_already_exist" => VexJustification.InlineMitigationsAlreadyExist,
_ => throw new ArgumentException($"Unknown VEX justification: {justification}", nameof(justification))
};
}

View File

@@ -0,0 +1,53 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Excititor.Storage.Postgres.Repositories;
using StellaOps.Infrastructure.Postgres;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Excititor.Storage.Postgres;
/// <summary>
/// Extension methods for configuring Excititor PostgreSQL storage services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds Excititor PostgreSQL storage services.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configuration">Configuration root.</param>
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddExcititorPostgresStorage(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = "Postgres:Excititor")
{
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
services.AddSingleton<ExcititorDataSource>();
// Register repositories
services.AddScoped<IVexStatementRepository, VexStatementRepository>();
return services;
}
/// <summary>
/// Adds Excititor PostgreSQL storage services with explicit options.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configureOptions">Options configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddExcititorPostgresStorage(
this IServiceCollection services,
Action<PostgresOptions> configureOptions)
{
services.Configure(configureOptions);
services.AddSingleton<ExcititorDataSource>();
// Register repositories
services.AddScoped<IVexStatementRepository, VexStatementRepository>();
return services;
}
}

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" ?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Excititor.Storage.Postgres</RootNamespace>
</PropertyGroup>
<ItemGroup>
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,326 @@
-- Notify Schema Migration 001: Initial Schema
-- Creates the notify schema for notifications, channels, and delivery tracking
-- Create schema
CREATE SCHEMA IF NOT EXISTS notify;
-- Channel types
DO $$ BEGIN
CREATE TYPE notify.channel_type AS ENUM (
'email', 'slack', 'teams', 'webhook', 'pagerduty', 'opsgenie'
);
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
-- Delivery status
DO $$ BEGIN
CREATE TYPE notify.delivery_status AS ENUM (
'pending', 'queued', 'sending', 'sent', 'delivered', 'failed', 'bounced'
);
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
-- Channels table
CREATE TABLE IF NOT EXISTS notify.channels (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
channel_type notify.channel_type NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
config JSONB NOT NULL DEFAULT '{}',
credentials JSONB,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT,
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_channels_tenant ON notify.channels(tenant_id);
CREATE INDEX idx_channels_type ON notify.channels(tenant_id, channel_type);
-- Rules table (notification routing rules)
CREATE TABLE IF NOT EXISTS notify.rules (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
priority INT NOT NULL DEFAULT 0,
event_types TEXT[] NOT NULL DEFAULT '{}',
filter JSONB NOT NULL DEFAULT '{}',
channel_ids UUID[] NOT NULL DEFAULT '{}',
template_id UUID,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_rules_tenant ON notify.rules(tenant_id);
CREATE INDEX idx_rules_enabled ON notify.rules(tenant_id, enabled, priority DESC);
-- Templates table
CREATE TABLE IF NOT EXISTS notify.templates (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
channel_type notify.channel_type NOT NULL,
subject_template TEXT,
body_template TEXT NOT NULL,
locale TEXT NOT NULL DEFAULT 'en',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, name, channel_type, locale)
);
CREATE INDEX idx_templates_tenant ON notify.templates(tenant_id);
-- Deliveries table
CREATE TABLE IF NOT EXISTS notify.deliveries (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
channel_id UUID NOT NULL REFERENCES notify.channels(id),
rule_id UUID REFERENCES notify.rules(id),
template_id UUID REFERENCES notify.templates(id),
status notify.delivery_status NOT NULL DEFAULT 'pending',
recipient TEXT NOT NULL,
subject TEXT,
body TEXT,
event_type TEXT NOT NULL,
event_payload JSONB NOT NULL DEFAULT '{}',
attempt INT NOT NULL DEFAULT 0,
max_attempts INT NOT NULL DEFAULT 3,
next_retry_at TIMESTAMPTZ,
error_message TEXT,
external_id TEXT,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
queued_at TIMESTAMPTZ,
sent_at TIMESTAMPTZ,
delivered_at TIMESTAMPTZ,
failed_at TIMESTAMPTZ
);
CREATE INDEX idx_deliveries_tenant ON notify.deliveries(tenant_id);
CREATE INDEX idx_deliveries_status ON notify.deliveries(tenant_id, status);
CREATE INDEX idx_deliveries_pending ON notify.deliveries(status, next_retry_at)
WHERE status IN ('pending', 'queued');
CREATE INDEX idx_deliveries_channel ON notify.deliveries(channel_id);
CREATE INDEX idx_deliveries_correlation ON notify.deliveries(correlation_id);
CREATE INDEX idx_deliveries_created ON notify.deliveries(tenant_id, created_at);
-- Digests table (aggregated notifications)
CREATE TABLE IF NOT EXISTS notify.digests (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
channel_id UUID NOT NULL REFERENCES notify.channels(id),
recipient TEXT NOT NULL,
digest_key TEXT NOT NULL,
event_count INT NOT NULL DEFAULT 0,
events JSONB NOT NULL DEFAULT '[]',
status TEXT NOT NULL DEFAULT 'collecting' CHECK (status IN ('collecting', 'sending', 'sent')),
collect_until TIMESTAMPTZ NOT NULL,
sent_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, channel_id, recipient, digest_key)
);
CREATE INDEX idx_digests_tenant ON notify.digests(tenant_id);
CREATE INDEX idx_digests_collect ON notify.digests(status, collect_until)
WHERE status = 'collecting';
-- Quiet hours table
CREATE TABLE IF NOT EXISTS notify.quiet_hours (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
user_id UUID,
channel_id UUID REFERENCES notify.channels(id),
start_time TIME NOT NULL,
end_time TIME NOT NULL,
timezone TEXT NOT NULL DEFAULT 'UTC',
days_of_week INT[] NOT NULL DEFAULT '{0,1,2,3,4,5,6}',
enabled BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_quiet_hours_tenant ON notify.quiet_hours(tenant_id);
-- Maintenance windows table
CREATE TABLE IF NOT EXISTS notify.maintenance_windows (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
start_at TIMESTAMPTZ NOT NULL,
end_at TIMESTAMPTZ NOT NULL,
suppress_channels UUID[],
suppress_event_types TEXT[],
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT,
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_maintenance_windows_tenant ON notify.maintenance_windows(tenant_id);
CREATE INDEX idx_maintenance_windows_active ON notify.maintenance_windows(start_at, end_at);
-- Escalation policies table
CREATE TABLE IF NOT EXISTS notify.escalation_policies (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
steps JSONB NOT NULL DEFAULT '[]',
repeat_count INT NOT NULL DEFAULT 0,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_escalation_policies_tenant ON notify.escalation_policies(tenant_id);
-- Escalation states table
CREATE TABLE IF NOT EXISTS notify.escalation_states (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
policy_id UUID NOT NULL REFERENCES notify.escalation_policies(id),
incident_id UUID,
correlation_id TEXT NOT NULL,
current_step INT NOT NULL DEFAULT 0,
repeat_iteration INT NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'acknowledged', 'resolved', 'expired')),
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
next_escalation_at TIMESTAMPTZ,
acknowledged_at TIMESTAMPTZ,
acknowledged_by TEXT,
resolved_at TIMESTAMPTZ,
resolved_by TEXT,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_escalation_states_tenant ON notify.escalation_states(tenant_id);
CREATE INDEX idx_escalation_states_active ON notify.escalation_states(status, next_escalation_at)
WHERE status = 'active';
CREATE INDEX idx_escalation_states_correlation ON notify.escalation_states(correlation_id);
-- On-call schedules table
CREATE TABLE IF NOT EXISTS notify.on_call_schedules (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
timezone TEXT NOT NULL DEFAULT 'UTC',
rotation_type TEXT NOT NULL DEFAULT 'weekly' CHECK (rotation_type IN ('daily', 'weekly', 'custom')),
participants JSONB NOT NULL DEFAULT '[]',
overrides JSONB NOT NULL DEFAULT '[]',
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_on_call_schedules_tenant ON notify.on_call_schedules(tenant_id);
-- Inbox table (in-app notifications)
CREATE TABLE IF NOT EXISTS notify.inbox (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
user_id UUID NOT NULL,
title TEXT NOT NULL,
body TEXT,
event_type TEXT NOT NULL,
event_payload JSONB NOT NULL DEFAULT '{}',
read BOOLEAN NOT NULL DEFAULT FALSE,
archived BOOLEAN NOT NULL DEFAULT FALSE,
action_url TEXT,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
read_at TIMESTAMPTZ,
archived_at TIMESTAMPTZ
);
CREATE INDEX idx_inbox_tenant_user ON notify.inbox(tenant_id, user_id);
CREATE INDEX idx_inbox_unread ON notify.inbox(tenant_id, user_id, read, created_at DESC)
WHERE read = FALSE AND archived = FALSE;
-- Incidents table
CREATE TABLE IF NOT EXISTS notify.incidents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT,
severity TEXT NOT NULL DEFAULT 'medium' CHECK (severity IN ('critical', 'high', 'medium', 'low')),
status TEXT NOT NULL DEFAULT 'open' CHECK (status IN ('open', 'acknowledged', 'resolved', 'closed')),
source TEXT,
correlation_id TEXT,
assigned_to UUID,
escalation_policy_id UUID REFERENCES notify.escalation_policies(id),
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
acknowledged_at TIMESTAMPTZ,
resolved_at TIMESTAMPTZ,
closed_at TIMESTAMPTZ,
created_by TEXT
);
CREATE INDEX idx_incidents_tenant ON notify.incidents(tenant_id);
CREATE INDEX idx_incidents_status ON notify.incidents(tenant_id, status);
CREATE INDEX idx_incidents_severity ON notify.incidents(tenant_id, severity);
CREATE INDEX idx_incidents_correlation ON notify.incidents(correlation_id);
-- Audit log table
CREATE TABLE IF NOT EXISTS notify.audit (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
user_id UUID,
action TEXT NOT NULL,
resource_type TEXT NOT NULL,
resource_id TEXT,
details JSONB,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_audit_tenant ON notify.audit(tenant_id);
CREATE INDEX idx_audit_created ON notify.audit(tenant_id, created_at);
-- Update timestamp function
CREATE OR REPLACE FUNCTION notify.update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Triggers
CREATE TRIGGER trg_channels_updated_at
BEFORE UPDATE ON notify.channels
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
CREATE TRIGGER trg_rules_updated_at
BEFORE UPDATE ON notify.rules
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
CREATE TRIGGER trg_templates_updated_at
BEFORE UPDATE ON notify.templates
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
CREATE TRIGGER trg_digests_updated_at
BEFORE UPDATE ON notify.digests
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
CREATE TRIGGER trg_escalation_policies_updated_at
BEFORE UPDATE ON notify.escalation_policies
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();
CREATE TRIGGER trg_on_call_schedules_updated_at
BEFORE UPDATE ON notify.on_call_schedules
FOR EACH ROW EXECUTE FUNCTION notify.update_updated_at();

View File

@@ -0,0 +1,81 @@
namespace StellaOps.Notify.Storage.Postgres.Models;
/// <summary>
/// Channel types for notifications.
/// </summary>
public enum ChannelType
{
/// <summary>Email channel.</summary>
Email,
/// <summary>Slack channel.</summary>
Slack,
/// <summary>Microsoft Teams channel.</summary>
Teams,
/// <summary>Generic webhook channel.</summary>
Webhook,
/// <summary>PagerDuty integration.</summary>
PagerDuty,
/// <summary>OpsGenie integration.</summary>
OpsGenie
}
/// <summary>
/// Represents a notification channel entity.
/// </summary>
public sealed class ChannelEntity
{
/// <summary>
/// Unique channel identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this channel belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Channel name (unique per tenant).
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Type of channel.
/// </summary>
public required ChannelType ChannelType { get; init; }
/// <summary>
/// Channel is enabled.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// Channel configuration as JSON.
/// </summary>
public string Config { get; init; } = "{}";
/// <summary>
/// Channel credentials as JSON (encrypted).
/// </summary>
public string? Credentials { get; init; }
/// <summary>
/// Channel metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
/// <summary>
/// When the channel was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the channel was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
/// <summary>
/// User who created the channel.
/// </summary>
public string? CreatedBy { get; init; }
}

View File

@@ -0,0 +1,138 @@
namespace StellaOps.Notify.Storage.Postgres.Models;
/// <summary>
/// Delivery status values.
/// </summary>
public enum DeliveryStatus
{
/// <summary>Delivery is pending.</summary>
Pending,
/// <summary>Delivery is queued for sending.</summary>
Queued,
/// <summary>Delivery is being sent.</summary>
Sending,
/// <summary>Delivery was sent.</summary>
Sent,
/// <summary>Delivery was confirmed delivered.</summary>
Delivered,
/// <summary>Delivery failed.</summary>
Failed,
/// <summary>Delivery bounced.</summary>
Bounced
}
/// <summary>
/// Represents a notification delivery entity.
/// </summary>
public sealed class DeliveryEntity
{
/// <summary>
/// Unique delivery identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this delivery belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Channel used for this delivery.
/// </summary>
public required Guid ChannelId { get; init; }
/// <summary>
/// Rule that triggered this delivery.
/// </summary>
public Guid? RuleId { get; init; }
/// <summary>
/// Template used for this delivery.
/// </summary>
public Guid? TemplateId { get; init; }
/// <summary>
/// Current delivery status.
/// </summary>
public DeliveryStatus Status { get; init; } = DeliveryStatus.Pending;
/// <summary>
/// Recipient address/identifier.
/// </summary>
public required string Recipient { get; init; }
/// <summary>
/// Notification subject.
/// </summary>
public string? Subject { get; init; }
/// <summary>
/// Notification body.
/// </summary>
public string? Body { get; init; }
/// <summary>
/// Event type that triggered this notification.
/// </summary>
public required string EventType { get; init; }
/// <summary>
/// Event payload as JSON.
/// </summary>
public string EventPayload { get; init; } = "{}";
/// <summary>
/// Current attempt number.
/// </summary>
public int Attempt { get; init; }
/// <summary>
/// Maximum number of attempts.
/// </summary>
public int MaxAttempts { get; init; } = 3;
/// <summary>
/// Next retry time.
/// </summary>
public DateTimeOffset? NextRetryAt { get; init; }
/// <summary>
/// Error message if failed.
/// </summary>
public string? ErrorMessage { get; init; }
/// <summary>
/// External ID from the channel provider.
/// </summary>
public string? ExternalId { get; init; }
/// <summary>
/// Correlation ID for tracing.
/// </summary>
public string? CorrelationId { get; init; }
/// <summary>
/// When the delivery was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the delivery was queued.
/// </summary>
public DateTimeOffset? QueuedAt { get; init; }
/// <summary>
/// When the delivery was sent.
/// </summary>
public DateTimeOffset? SentAt { get; init; }
/// <summary>
/// When the delivery was confirmed delivered.
/// </summary>
public DateTimeOffset? DeliveredAt { get; init; }
/// <summary>
/// When the delivery failed.
/// </summary>
public DateTimeOffset? FailedAt { get; init; }
}

View File

@@ -0,0 +1,38 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Infrastructure.Postgres.Connections;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Notify.Storage.Postgres;
/// <summary>
/// PostgreSQL data source for the Notify module.
/// Manages connections with tenant context for notifications and delivery tracking.
/// </summary>
public sealed class NotifyDataSource : DataSourceBase
{
/// <summary>
/// Default schema name for Notify tables.
/// </summary>
public const string DefaultSchemaName = "notify";
/// <summary>
/// Creates a new Notify data source.
/// </summary>
public NotifyDataSource(IOptions<PostgresOptions> options, ILogger<NotifyDataSource> logger)
: base(CreateOptions(options.Value), logger)
{
}
/// <inheritdoc />
protected override string ModuleName => "Notify";
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
{
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
{
baseOptions.SchemaName = DefaultSchemaName;
}
return baseOptions;
}
}

View File

@@ -0,0 +1,264 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Notify.Storage.Postgres.Models;
namespace StellaOps.Notify.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for notification channel operations.
/// </summary>
public sealed class ChannelRepository : RepositoryBase<NotifyDataSource>, IChannelRepository
{
/// <summary>
/// Creates a new channel repository.
/// </summary>
public ChannelRepository(NotifyDataSource dataSource, ILogger<ChannelRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<ChannelEntity> CreateAsync(ChannelEntity channel, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO notify.channels (
id, tenant_id, name, channel_type, enabled, config, credentials, metadata, created_by
)
VALUES (
@id, @tenant_id, @name, @channel_type::notify.channel_type, @enabled,
@config::jsonb, @credentials::jsonb, @metadata::jsonb, @created_by
)
RETURNING id, tenant_id, name, channel_type::text, enabled,
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
""";
await using var connection = await DataSource.OpenConnectionAsync(channel.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", channel.Id);
AddParameter(command, "tenant_id", channel.TenantId);
AddParameter(command, "name", channel.Name);
AddParameter(command, "channel_type", ChannelTypeToString(channel.ChannelType));
AddParameter(command, "enabled", channel.Enabled);
AddJsonbParameter(command, "config", channel.Config);
AddJsonbParameter(command, "credentials", channel.Credentials);
AddJsonbParameter(command, "metadata", channel.Metadata);
AddParameter(command, "created_by", channel.CreatedBy);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapChannel(reader);
}
/// <inheritdoc />
public async Task<ChannelEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, name, channel_type::text, enabled,
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
FROM notify.channels
WHERE tenant_id = @tenant_id AND id = @id
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
MapChannel,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<ChannelEntity?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, name, channel_type::text, enabled,
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
FROM notify.channels
WHERE tenant_id = @tenant_id AND name = @name
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "name", name);
},
MapChannel,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<ChannelEntity>> GetAllAsync(
string tenantId,
bool? enabled = null,
ChannelType? channelType = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
var sql = """
SELECT id, tenant_id, name, channel_type::text, enabled,
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
FROM notify.channels
WHERE tenant_id = @tenant_id
""";
if (enabled.HasValue)
{
sql += " AND enabled = @enabled";
}
if (channelType.HasValue)
{
sql += " AND channel_type = @channel_type::notify.channel_type";
}
sql += " ORDER BY name, id LIMIT @limit OFFSET @offset";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
if (enabled.HasValue)
{
AddParameter(cmd, "enabled", enabled.Value);
}
if (channelType.HasValue)
{
AddParameter(cmd, "channel_type", ChannelTypeToString(channelType.Value));
}
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapChannel,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<bool> UpdateAsync(ChannelEntity channel, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE notify.channels
SET name = @name,
channel_type = @channel_type::notify.channel_type,
enabled = @enabled,
config = @config::jsonb,
credentials = @credentials::jsonb,
metadata = @metadata::jsonb
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
channel.TenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", channel.TenantId);
AddParameter(cmd, "id", channel.Id);
AddParameter(cmd, "name", channel.Name);
AddParameter(cmd, "channel_type", ChannelTypeToString(channel.ChannelType));
AddParameter(cmd, "enabled", channel.Enabled);
AddJsonbParameter(cmd, "config", channel.Config);
AddJsonbParameter(cmd, "credentials", channel.Credentials);
AddJsonbParameter(cmd, "metadata", channel.Metadata);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM notify.channels WHERE tenant_id = @tenant_id AND id = @id";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<IReadOnlyList<ChannelEntity>> GetEnabledByTypeAsync(
string tenantId,
ChannelType channelType,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, name, channel_type::text, enabled,
config::text, credentials::text, metadata::text, created_at, updated_at, created_by
FROM notify.channels
WHERE tenant_id = @tenant_id
AND channel_type = @channel_type::notify.channel_type
AND enabled = TRUE
ORDER BY name, id
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "channel_type", ChannelTypeToString(channelType));
},
MapChannel,
cancellationToken).ConfigureAwait(false);
}
private static ChannelEntity MapChannel(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(0),
TenantId = reader.GetString(1),
Name = reader.GetString(2),
ChannelType = ParseChannelType(reader.GetString(3)),
Enabled = reader.GetBoolean(4),
Config = reader.GetString(5),
Credentials = GetNullableString(reader, 6),
Metadata = reader.GetString(7),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(8),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(9),
CreatedBy = GetNullableString(reader, 10)
};
private static string ChannelTypeToString(ChannelType channelType) => channelType switch
{
ChannelType.Email => "email",
ChannelType.Slack => "slack",
ChannelType.Teams => "teams",
ChannelType.Webhook => "webhook",
ChannelType.PagerDuty => "pagerduty",
ChannelType.OpsGenie => "opsgenie",
_ => throw new ArgumentException($"Unknown channel type: {channelType}", nameof(channelType))
};
private static ChannelType ParseChannelType(string channelType) => channelType switch
{
"email" => ChannelType.Email,
"slack" => ChannelType.Slack,
"teams" => ChannelType.Teams,
"webhook" => ChannelType.Webhook,
"pagerduty" => ChannelType.PagerDuty,
"opsgenie" => ChannelType.OpsGenie,
_ => throw new ArgumentException($"Unknown channel type: {channelType}", nameof(channelType))
};
}

View File

@@ -0,0 +1,363 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Notify.Storage.Postgres.Models;
namespace StellaOps.Notify.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for notification delivery operations.
/// </summary>
public sealed class DeliveryRepository : RepositoryBase<NotifyDataSource>, IDeliveryRepository
{
/// <summary>
/// Creates a new delivery repository.
/// </summary>
public DeliveryRepository(NotifyDataSource dataSource, ILogger<DeliveryRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<DeliveryEntity> CreateAsync(DeliveryEntity delivery, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO notify.deliveries (
id, tenant_id, channel_id, rule_id, template_id, status, recipient,
subject, body, event_type, event_payload, max_attempts, correlation_id
)
VALUES (
@id, @tenant_id, @channel_id, @rule_id, @template_id, @status::notify.delivery_status, @recipient,
@subject, @body, @event_type, @event_payload::jsonb, @max_attempts, @correlation_id
)
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(delivery.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddDeliveryParameters(command, delivery);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapDelivery(reader);
}
/// <inheritdoc />
public async Task<DeliveryEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = "SELECT * FROM notify.deliveries WHERE tenant_id = @tenant_id AND id = @id";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
MapDelivery,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<DeliveryEntity>> GetPendingAsync(
string tenantId,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM notify.deliveries
WHERE tenant_id = @tenant_id
AND status IN ('pending', 'queued')
AND (next_retry_at IS NULL OR next_retry_at <= NOW())
AND attempt < max_attempts
ORDER BY created_at, id
LIMIT @limit
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "limit", limit);
},
MapDelivery,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<DeliveryEntity>> GetByStatusAsync(
string tenantId,
DeliveryStatus status,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM notify.deliveries
WHERE tenant_id = @tenant_id AND status = @status::notify.delivery_status
ORDER BY created_at DESC, id
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "status", StatusToString(status));
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapDelivery,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<DeliveryEntity>> GetByCorrelationIdAsync(
string tenantId,
string correlationId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM notify.deliveries
WHERE tenant_id = @tenant_id AND correlation_id = @correlation_id
ORDER BY created_at, id
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "correlation_id", correlationId);
},
MapDelivery,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<bool> MarkQueuedAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE notify.deliveries
SET status = 'queued'::notify.delivery_status,
queued_at = NOW()
WHERE tenant_id = @tenant_id AND id = @id AND status = 'pending'
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> MarkSentAsync(string tenantId, Guid id, string? externalId = null, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE notify.deliveries
SET status = 'sent'::notify.delivery_status,
sent_at = NOW(),
external_id = COALESCE(@external_id, external_id)
WHERE tenant_id = @tenant_id AND id = @id AND status IN ('queued', 'sending')
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
AddParameter(cmd, "external_id", externalId);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> MarkDeliveredAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE notify.deliveries
SET status = 'delivered'::notify.delivery_status,
delivered_at = NOW()
WHERE tenant_id = @tenant_id AND id = @id AND status = 'sent'
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> MarkFailedAsync(
string tenantId,
Guid id,
string errorMessage,
TimeSpan? retryDelay = null,
CancellationToken cancellationToken = default)
{
var sql = """
UPDATE notify.deliveries
SET status = CASE
WHEN attempt + 1 < max_attempts AND @retry_delay IS NOT NULL THEN 'pending'::notify.delivery_status
ELSE 'failed'::notify.delivery_status
END,
attempt = attempt + 1,
error_message = @error_message,
failed_at = CASE WHEN attempt + 1 >= max_attempts OR @retry_delay IS NULL THEN NOW() ELSE failed_at END,
next_retry_at = CASE
WHEN attempt + 1 < max_attempts AND @retry_delay IS NOT NULL THEN NOW() + @retry_delay
ELSE NULL
END
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
AddParameter(cmd, "error_message", errorMessage);
AddParameter(cmd, "retry_delay", retryDelay);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<DeliveryStats> GetStatsAsync(
string tenantId,
DateTimeOffset from,
DateTimeOffset to,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE status = 'pending') as pending,
COUNT(*) FILTER (WHERE status = 'sent') as sent,
COUNT(*) FILTER (WHERE status = 'delivered') as delivered,
COUNT(*) FILTER (WHERE status = 'failed') as failed,
COUNT(*) FILTER (WHERE status = 'bounced') as bounced
FROM notify.deliveries
WHERE tenant_id = @tenant_id
AND created_at >= @from
AND created_at < @to
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "from", from);
AddParameter(command, "to", to);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return new DeliveryStats(
Total: reader.GetInt64(0),
Pending: reader.GetInt64(1),
Sent: reader.GetInt64(2),
Delivered: reader.GetInt64(3),
Failed: reader.GetInt64(4),
Bounced: reader.GetInt64(5));
}
private static void AddDeliveryParameters(NpgsqlCommand command, DeliveryEntity delivery)
{
AddParameter(command, "id", delivery.Id);
AddParameter(command, "tenant_id", delivery.TenantId);
AddParameter(command, "channel_id", delivery.ChannelId);
AddParameter(command, "rule_id", delivery.RuleId);
AddParameter(command, "template_id", delivery.TemplateId);
AddParameter(command, "status", StatusToString(delivery.Status));
AddParameter(command, "recipient", delivery.Recipient);
AddParameter(command, "subject", delivery.Subject);
AddParameter(command, "body", delivery.Body);
AddParameter(command, "event_type", delivery.EventType);
AddJsonbParameter(command, "event_payload", delivery.EventPayload);
AddParameter(command, "max_attempts", delivery.MaxAttempts);
AddParameter(command, "correlation_id", delivery.CorrelationId);
}
private static DeliveryEntity MapDelivery(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(reader.GetOrdinal("id")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
ChannelId = reader.GetGuid(reader.GetOrdinal("channel_id")),
RuleId = GetNullableGuid(reader, reader.GetOrdinal("rule_id")),
TemplateId = GetNullableGuid(reader, reader.GetOrdinal("template_id")),
Status = ParseStatus(reader.GetString(reader.GetOrdinal("status"))),
Recipient = reader.GetString(reader.GetOrdinal("recipient")),
Subject = GetNullableString(reader, reader.GetOrdinal("subject")),
Body = GetNullableString(reader, reader.GetOrdinal("body")),
EventType = reader.GetString(reader.GetOrdinal("event_type")),
EventPayload = reader.GetString(reader.GetOrdinal("event_payload")),
Attempt = reader.GetInt32(reader.GetOrdinal("attempt")),
MaxAttempts = reader.GetInt32(reader.GetOrdinal("max_attempts")),
NextRetryAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("next_retry_at")),
ErrorMessage = GetNullableString(reader, reader.GetOrdinal("error_message")),
ExternalId = GetNullableString(reader, reader.GetOrdinal("external_id")),
CorrelationId = GetNullableString(reader, reader.GetOrdinal("correlation_id")),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
QueuedAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("queued_at")),
SentAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("sent_at")),
DeliveredAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("delivered_at")),
FailedAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("failed_at"))
};
private static string StatusToString(DeliveryStatus status) => status switch
{
DeliveryStatus.Pending => "pending",
DeliveryStatus.Queued => "queued",
DeliveryStatus.Sending => "sending",
DeliveryStatus.Sent => "sent",
DeliveryStatus.Delivered => "delivered",
DeliveryStatus.Failed => "failed",
DeliveryStatus.Bounced => "bounced",
_ => throw new ArgumentException($"Unknown delivery status: {status}", nameof(status))
};
private static DeliveryStatus ParseStatus(string status) => status switch
{
"pending" => DeliveryStatus.Pending,
"queued" => DeliveryStatus.Queued,
"sending" => DeliveryStatus.Sending,
"sent" => DeliveryStatus.Sent,
"delivered" => DeliveryStatus.Delivered,
"failed" => DeliveryStatus.Failed,
"bounced" => DeliveryStatus.Bounced,
_ => throw new ArgumentException($"Unknown delivery status: {status}", nameof(status))
};
}

View File

@@ -0,0 +1,53 @@
using StellaOps.Notify.Storage.Postgres.Models;
namespace StellaOps.Notify.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for notification channel operations.
/// </summary>
public interface IChannelRepository
{
/// <summary>
/// Creates a new channel.
/// </summary>
Task<ChannelEntity> CreateAsync(ChannelEntity channel, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a channel by ID.
/// </summary>
Task<ChannelEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a channel by name.
/// </summary>
Task<ChannelEntity?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default);
/// <summary>
/// Gets all channels for a tenant.
/// </summary>
Task<IReadOnlyList<ChannelEntity>> GetAllAsync(
string tenantId,
bool? enabled = null,
ChannelType? channelType = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates a channel.
/// </summary>
Task<bool> UpdateAsync(ChannelEntity channel, CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a channel.
/// </summary>
Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets enabled channels by type.
/// </summary>
Task<IReadOnlyList<ChannelEntity>> GetEnabledByTypeAsync(
string tenantId,
ChannelType channelType,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,90 @@
using StellaOps.Notify.Storage.Postgres.Models;
namespace StellaOps.Notify.Storage.Postgres.Repositories;
/// <summary>
/// Repository interface for notification delivery operations.
/// </summary>
public interface IDeliveryRepository
{
/// <summary>
/// Creates a new delivery.
/// </summary>
Task<DeliveryEntity> CreateAsync(DeliveryEntity delivery, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a delivery by ID.
/// </summary>
Task<DeliveryEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets pending deliveries ready to send.
/// </summary>
Task<IReadOnlyList<DeliveryEntity>> GetPendingAsync(
string tenantId,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets deliveries by status.
/// </summary>
Task<IReadOnlyList<DeliveryEntity>> GetByStatusAsync(
string tenantId,
DeliveryStatus status,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets deliveries by correlation ID.
/// </summary>
Task<IReadOnlyList<DeliveryEntity>> GetByCorrelationIdAsync(
string tenantId,
string correlationId,
CancellationToken cancellationToken = default);
/// <summary>
/// Marks a delivery as queued.
/// </summary>
Task<bool> MarkQueuedAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Marks a delivery as sent.
/// </summary>
Task<bool> MarkSentAsync(string tenantId, Guid id, string? externalId = null, CancellationToken cancellationToken = default);
/// <summary>
/// Marks a delivery as delivered.
/// </summary>
Task<bool> MarkDeliveredAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Marks a delivery as failed with retry scheduling.
/// </summary>
Task<bool> MarkFailedAsync(
string tenantId,
Guid id,
string errorMessage,
TimeSpan? retryDelay = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets delivery statistics for a time range.
/// </summary>
Task<DeliveryStats> GetStatsAsync(
string tenantId,
DateTimeOffset from,
DateTimeOffset to,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Delivery statistics.
/// </summary>
public sealed record DeliveryStats(
long Total,
long Pending,
long Sent,
long Delivered,
long Failed,
long Bounced);

View File

@@ -0,0 +1,55 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Infrastructure.Postgres;
using StellaOps.Infrastructure.Postgres.Options;
using StellaOps.Notify.Storage.Postgres.Repositories;
namespace StellaOps.Notify.Storage.Postgres;
/// <summary>
/// Extension methods for configuring Notify PostgreSQL storage services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds Notify PostgreSQL storage services.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configuration">Configuration root.</param>
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddNotifyPostgresStorage(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = "Postgres:Notify")
{
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
services.AddSingleton<NotifyDataSource>();
// Register repositories
services.AddScoped<IChannelRepository, ChannelRepository>();
services.AddScoped<IDeliveryRepository, DeliveryRepository>();
return services;
}
/// <summary>
/// Adds Notify PostgreSQL storage services with explicit options.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configureOptions">Options configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddNotifyPostgresStorage(
this IServiceCollection services,
Action<PostgresOptions> configureOptions)
{
services.Configure(configureOptions);
services.AddSingleton<NotifyDataSource>();
// Register repositories
services.AddScoped<IChannelRepository, ChannelRepository>();
services.AddScoped<IDeliveryRepository, DeliveryRepository>();
return services;
}
}

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" ?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Notify.Storage.Postgres</RootNamespace>
</PropertyGroup>
<ItemGroup>
<None Include="Migrations\**\*.sql" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,583 @@
using Microsoft.Extensions.Logging;
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.Backfill;
/// <summary>
/// Configuration options for the backfill manager.
/// </summary>
public sealed record BackfillManagerOptions
{
/// <summary>
/// Maximum number of events allowed in a single backfill request.
/// </summary>
public long MaxEventsPerBackfill { get; init; } = 1_000_000;
/// <summary>
/// Maximum duration allowed for a backfill operation.
/// </summary>
public TimeSpan MaxBackfillDuration { get; init; } = TimeSpan.FromHours(24);
/// <summary>
/// Data retention period - backfills cannot extend beyond this.
/// </summary>
public TimeSpan RetentionPeriod { get; init; } = TimeSpan.FromDays(90);
/// <summary>
/// Default TTL for processed event records.
/// </summary>
public TimeSpan DefaultProcessedEventTtl { get; init; } = TimeSpan.FromDays(30);
/// <summary>
/// Number of sample event keys to include in previews.
/// </summary>
public int PreviewSampleSize { get; init; } = 10;
/// <summary>
/// Estimated events per second for duration estimation.
/// </summary>
public double EstimatedEventsPerSecond { get; init; } = 100;
}
/// <summary>
/// Coordinates backfill operations with safety validations.
/// </summary>
public interface IBackfillManager
{
/// <summary>
/// Creates a new backfill request with validation.
/// </summary>
Task<BackfillRequest> CreateRequestAsync(
string tenantId,
Guid? sourceId,
string? jobType,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
string reason,
string createdBy,
int batchSize = 100,
bool dryRun = false,
bool forceReprocess = false,
string? ticket = null,
TimeSpan? maxDuration = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Validates a backfill request and runs safety checks.
/// </summary>
Task<BackfillRequest> ValidateRequestAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default);
/// <summary>
/// Generates a preview of what a backfill would process (dry-run).
/// </summary>
Task<BackfillPreview> PreviewAsync(
string tenantId,
Guid? sourceId,
string? jobType,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
int batchSize = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Starts execution of a validated backfill request.
/// </summary>
Task<BackfillRequest> StartAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default);
/// <summary>
/// Pauses a running backfill.
/// </summary>
Task<BackfillRequest> PauseAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default);
/// <summary>
/// Resumes a paused backfill.
/// </summary>
Task<BackfillRequest> ResumeAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default);
/// <summary>
/// Cancels a backfill request.
/// </summary>
Task<BackfillRequest> CancelAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the current status of a backfill request.
/// </summary>
Task<BackfillRequest?> GetStatusAsync(
string tenantId,
Guid backfillId,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists backfill requests with filters.
/// </summary>
Task<IReadOnlyList<BackfillRequest>> ListAsync(
string tenantId,
BackfillStatus? status = null,
Guid? sourceId = null,
string? jobType = null,
int limit = 50,
int offset = 0,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Provides event counting for backfill estimation.
/// </summary>
public interface IBackfillEventCounter
{
/// <summary>
/// Estimates the number of events in a time window.
/// </summary>
Task<long> EstimateEventCountAsync(
string tenantId,
string scopeKey,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
CancellationToken cancellationToken);
/// <summary>
/// Gets sample event keys from a time window.
/// </summary>
Task<IReadOnlyList<string>> GetSampleEventKeysAsync(
string tenantId,
string scopeKey,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
int sampleSize,
CancellationToken cancellationToken);
}
/// <summary>
/// Validates backfill safety conditions.
/// </summary>
public interface IBackfillSafetyValidator
{
/// <summary>
/// Runs all safety validations for a backfill request.
/// </summary>
Task<BackfillSafetyChecks> ValidateAsync(
BackfillRequest request,
long estimatedEvents,
TimeSpan estimatedDuration,
CancellationToken cancellationToken);
}
/// <summary>
/// Default implementation of backfill safety validator.
/// </summary>
public sealed class DefaultBackfillSafetyValidator : IBackfillSafetyValidator
{
private readonly ISourceValidator _sourceValidator;
private readonly IOverlapChecker _overlapChecker;
private readonly BackfillManagerOptions _options;
public DefaultBackfillSafetyValidator(
ISourceValidator sourceValidator,
IOverlapChecker overlapChecker,
BackfillManagerOptions options)
{
_sourceValidator = sourceValidator;
_overlapChecker = overlapChecker;
_options = options;
}
public async Task<BackfillSafetyChecks> ValidateAsync(
BackfillRequest request,
long estimatedEvents,
TimeSpan estimatedDuration,
CancellationToken cancellationToken)
{
var warnings = new List<string>();
var errors = new List<string>();
// Check source exists
var sourceExists = true;
if (request.SourceId.HasValue)
{
sourceExists = await _sourceValidator.ExistsAsync(
request.TenantId, request.SourceId.Value, cancellationToken);
if (!sourceExists)
{
errors.Add($"Source {request.SourceId} not found.");
}
}
// Check for overlapping backfills
var hasOverlap = await _overlapChecker.HasOverlapAsync(
request.TenantId,
request.ScopeKey,
request.WindowStart,
request.WindowEnd,
request.BackfillId,
cancellationToken);
if (hasOverlap)
{
errors.Add("An active backfill already exists for this scope and time window.");
}
// Check retention period
var retentionLimit = DateTimeOffset.UtcNow - _options.RetentionPeriod;
var withinRetention = request.WindowStart >= retentionLimit;
if (!withinRetention)
{
errors.Add($"Window start {request.WindowStart:O} is beyond the retention period ({_options.RetentionPeriod.TotalDays} days).");
}
// Check event limit
var withinEventLimit = estimatedEvents <= _options.MaxEventsPerBackfill;
if (!withinEventLimit)
{
errors.Add($"Estimated {estimatedEvents:N0} events exceeds maximum allowed ({_options.MaxEventsPerBackfill:N0}).");
}
else if (estimatedEvents > _options.MaxEventsPerBackfill * 0.8)
{
warnings.Add($"Estimated {estimatedEvents:N0} events is approaching the maximum limit.");
}
// Check duration limit
var maxDuration = request.MaxDuration ?? _options.MaxBackfillDuration;
var withinDurationLimit = estimatedDuration <= maxDuration;
if (!withinDurationLimit)
{
errors.Add($"Estimated duration {estimatedDuration} exceeds maximum allowed ({maxDuration}).");
}
// Check quota availability (placeholder - always true for now)
var quotaAvailable = true;
// Add warnings for large backfills
if (request.WindowDuration > TimeSpan.FromDays(7))
{
warnings.Add("Large time window may take significant time to process.");
}
if (request.ForceReprocess)
{
warnings.Add("Force reprocess is enabled - events will be processed even if already seen.");
}
return new BackfillSafetyChecks(
SourceExists: sourceExists,
HasOverlappingBackfill: hasOverlap,
WithinRetention: withinRetention,
WithinEventLimit: withinEventLimit,
WithinDurationLimit: withinDurationLimit,
QuotaAvailable: quotaAvailable,
Warnings: warnings,
Errors: errors);
}
}
/// <summary>
/// Validates that a source exists.
/// </summary>
public interface ISourceValidator
{
/// <summary>
/// Checks if a source exists.
/// </summary>
Task<bool> ExistsAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken);
}
/// <summary>
/// Checks for overlapping backfill operations.
/// </summary>
public interface IOverlapChecker
{
/// <summary>
/// Checks if there's an overlapping active backfill.
/// </summary>
Task<bool> HasOverlapAsync(
string tenantId,
string scopeKey,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
Guid? excludeBackfillId,
CancellationToken cancellationToken);
}
/// <summary>
/// Default implementation of the backfill manager.
/// </summary>
public sealed class BackfillManager : IBackfillManager
{
private readonly IBackfillRepository _backfillRepository;
private readonly IBackfillSafetyValidator _safetyValidator;
private readonly IBackfillEventCounter _eventCounter;
private readonly IDuplicateSuppressor _duplicateSuppressor;
private readonly BackfillManagerOptions _options;
private readonly ILogger<BackfillManager> _logger;
public BackfillManager(
IBackfillRepository backfillRepository,
IBackfillSafetyValidator safetyValidator,
IBackfillEventCounter eventCounter,
IDuplicateSuppressor duplicateSuppressor,
BackfillManagerOptions options,
ILogger<BackfillManager> logger)
{
_backfillRepository = backfillRepository;
_safetyValidator = safetyValidator;
_eventCounter = eventCounter;
_duplicateSuppressor = duplicateSuppressor;
_options = options;
_logger = logger;
}
public async Task<BackfillRequest> CreateRequestAsync(
string tenantId,
Guid? sourceId,
string? jobType,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
string reason,
string createdBy,
int batchSize = 100,
bool dryRun = false,
bool forceReprocess = false,
string? ticket = null,
TimeSpan? maxDuration = null,
CancellationToken cancellationToken = default)
{
var request = BackfillRequest.Create(
tenantId: tenantId,
sourceId: sourceId,
jobType: jobType,
windowStart: windowStart,
windowEnd: windowEnd,
reason: reason,
createdBy: createdBy,
batchSize: batchSize,
dryRun: dryRun,
forceReprocess: forceReprocess,
ticket: ticket,
maxDuration: maxDuration);
await _backfillRepository.CreateAsync(request, cancellationToken);
_logger.LogInformation(
"Created backfill request {BackfillId} for scope {ScopeKey} from {WindowStart} to {WindowEnd}",
request.BackfillId, request.ScopeKey, windowStart, windowEnd);
return request;
}
public async Task<BackfillRequest> ValidateRequestAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default)
{
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
request = request.StartValidation(updatedBy);
await _backfillRepository.UpdateAsync(request, cancellationToken);
// Estimate event count
var estimatedEvents = await _eventCounter.EstimateEventCountAsync(
tenantId, request.ScopeKey, request.WindowStart, request.WindowEnd, cancellationToken);
// Calculate estimated duration
var estimatedDuration = TimeSpan.FromSeconds(estimatedEvents / _options.EstimatedEventsPerSecond);
// Run safety validations
var safetyChecks = await _safetyValidator.ValidateAsync(
request, estimatedEvents, estimatedDuration, cancellationToken);
request = request.WithSafetyChecks(safetyChecks, estimatedEvents, estimatedDuration, updatedBy);
await _backfillRepository.UpdateAsync(request, cancellationToken);
_logger.LogInformation(
"Validated backfill request {BackfillId}: {EstimatedEvents} events, safe={IsSafe}",
backfillId, estimatedEvents, safetyChecks.IsSafe);
return request;
}
public async Task<BackfillPreview> PreviewAsync(
string tenantId,
Guid? sourceId,
string? jobType,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
int batchSize = 100,
CancellationToken cancellationToken = default)
{
var scopeKey = GetScopeKey(sourceId, jobType);
// Estimate total events
var estimatedEvents = await _eventCounter.EstimateEventCountAsync(
tenantId, scopeKey, windowStart, windowEnd, cancellationToken);
// Get already processed count
var processedCount = await _duplicateSuppressor.CountProcessedAsync(
scopeKey, windowStart, windowEnd, cancellationToken);
// Get sample event keys
var sampleKeys = await _eventCounter.GetSampleEventKeysAsync(
tenantId, scopeKey, windowStart, windowEnd, _options.PreviewSampleSize, cancellationToken);
// Calculate estimates
var processableEvents = Math.Max(0, estimatedEvents - processedCount);
var estimatedDuration = TimeSpan.FromSeconds(processableEvents / _options.EstimatedEventsPerSecond);
var estimatedBatches = (int)Math.Ceiling((double)processableEvents / batchSize);
// Run safety checks
var tempRequest = BackfillRequest.Create(
tenantId, sourceId, jobType, windowStart, windowEnd,
"preview", "system", batchSize);
var safetyChecks = await _safetyValidator.ValidateAsync(
tempRequest, estimatedEvents, estimatedDuration, cancellationToken);
return new BackfillPreview(
ScopeKey: scopeKey,
WindowStart: windowStart,
WindowEnd: windowEnd,
EstimatedEvents: estimatedEvents,
SkippedEvents: processedCount,
ProcessableEvents: processableEvents,
EstimatedDuration: estimatedDuration,
EstimatedBatches: estimatedBatches,
SafetyChecks: safetyChecks,
SampleEventKeys: sampleKeys);
}
public async Task<BackfillRequest> StartAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default)
{
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
request = request.Start(updatedBy);
await _backfillRepository.UpdateAsync(request, cancellationToken);
_logger.LogInformation("Started backfill request {BackfillId}", backfillId);
return request;
}
public async Task<BackfillRequest> PauseAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default)
{
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
request = request.Pause(updatedBy);
await _backfillRepository.UpdateAsync(request, cancellationToken);
_logger.LogInformation("Paused backfill request {BackfillId}", backfillId);
return request;
}
public async Task<BackfillRequest> ResumeAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default)
{
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
request = request.Resume(updatedBy);
await _backfillRepository.UpdateAsync(request, cancellationToken);
_logger.LogInformation("Resumed backfill request {BackfillId}", backfillId);
return request;
}
public async Task<BackfillRequest> CancelAsync(
string tenantId,
Guid backfillId,
string updatedBy,
CancellationToken cancellationToken = default)
{
var request = await _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken)
?? throw new InvalidOperationException($"Backfill request {backfillId} not found.");
request = request.Cancel(updatedBy);
await _backfillRepository.UpdateAsync(request, cancellationToken);
_logger.LogInformation("Canceled backfill request {BackfillId}", backfillId);
return request;
}
public Task<BackfillRequest?> GetStatusAsync(
string tenantId,
Guid backfillId,
CancellationToken cancellationToken = default)
{
return _backfillRepository.GetByIdAsync(tenantId, backfillId, cancellationToken);
}
public Task<IReadOnlyList<BackfillRequest>> ListAsync(
string tenantId,
BackfillStatus? status = null,
Guid? sourceId = null,
string? jobType = null,
int limit = 50,
int offset = 0,
CancellationToken cancellationToken = default)
{
return _backfillRepository.ListAsync(tenantId, status, sourceId, jobType, limit, offset, cancellationToken);
}
private static string GetScopeKey(Guid? sourceId, string? jobType)
{
return (sourceId, jobType) switch
{
(Guid s, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(s, j),
(Guid s, _) => Watermark.CreateScopeKey(s),
(_, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(j),
_ => throw new ArgumentException("Either sourceId or jobType must be specified.")
};
}
}
/// <summary>
/// Repository interface for backfill persistence (imported for convenience).
/// </summary>
public interface IBackfillRepository
{
Task<BackfillRequest?> GetByIdAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
Task CreateAsync(BackfillRequest request, CancellationToken cancellationToken);
Task UpdateAsync(BackfillRequest request, CancellationToken cancellationToken);
Task<IReadOnlyList<BackfillRequest>> ListAsync(
string tenantId,
BackfillStatus? status,
Guid? sourceId,
string? jobType,
int limit,
int offset,
CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,318 @@
namespace StellaOps.Orchestrator.Core.Backfill;
/// <summary>
/// Tracks processed events for duplicate suppression.
/// </summary>
public interface IDuplicateSuppressor
{
/// <summary>
/// Checks if an event has already been processed.
/// </summary>
/// <param name="scopeKey">Scope identifier.</param>
/// <param name="eventKey">Unique event identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the event was already processed.</returns>
Task<bool> HasProcessedAsync(string scopeKey, string eventKey, CancellationToken cancellationToken);
/// <summary>
/// Checks multiple events for duplicate status.
/// </summary>
/// <param name="scopeKey">Scope identifier.</param>
/// <param name="eventKeys">Event identifiers to check.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Set of event keys that have already been processed.</returns>
Task<IReadOnlySet<string>> GetProcessedAsync(string scopeKey, IEnumerable<string> eventKeys, CancellationToken cancellationToken);
/// <summary>
/// Marks an event as processed.
/// </summary>
/// <param name="scopeKey">Scope identifier.</param>
/// <param name="eventKey">Unique event identifier.</param>
/// <param name="eventTime">Event timestamp.</param>
/// <param name="batchId">Optional batch/backfill identifier.</param>
/// <param name="ttl">Time-to-live for the record.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task MarkProcessedAsync(
string scopeKey,
string eventKey,
DateTimeOffset eventTime,
Guid? batchId,
TimeSpan ttl,
CancellationToken cancellationToken);
/// <summary>
/// Marks multiple events as processed.
/// </summary>
/// <param name="scopeKey">Scope identifier.</param>
/// <param name="events">Events to mark as processed.</param>
/// <param name="batchId">Optional batch/backfill identifier.</param>
/// <param name="ttl">Time-to-live for the records.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task MarkProcessedBatchAsync(
string scopeKey,
IEnumerable<ProcessedEvent> events,
Guid? batchId,
TimeSpan ttl,
CancellationToken cancellationToken);
/// <summary>
/// Counts processed events within a time range.
/// </summary>
/// <param name="scopeKey">Scope identifier.</param>
/// <param name="from">Start of time range.</param>
/// <param name="to">End of time range.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Count of processed events.</returns>
Task<long> CountProcessedAsync(string scopeKey, DateTimeOffset from, DateTimeOffset to, CancellationToken cancellationToken);
/// <summary>
/// Removes expired records (cleanup).
/// </summary>
/// <param name="batchLimit">Maximum records to remove per call.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Number of records removed.</returns>
Task<int> CleanupExpiredAsync(int batchLimit, CancellationToken cancellationToken);
}
/// <summary>
/// Event data for duplicate tracking.
/// </summary>
public sealed record ProcessedEvent(
/// <summary>Unique event identifier.</summary>
string EventKey,
/// <summary>Event timestamp.</summary>
DateTimeOffset EventTime);
/// <summary>
/// In-memory duplicate suppressor for testing.
/// </summary>
public sealed class InMemoryDuplicateSuppressor : IDuplicateSuppressor
{
private readonly Dictionary<string, Dictionary<string, ProcessedEventEntry>> _store = new();
private readonly object _lock = new();
private sealed record ProcessedEventEntry(
DateTimeOffset EventTime,
DateTimeOffset ProcessedAt,
Guid? BatchId,
DateTimeOffset ExpiresAt);
public Task<bool> HasProcessedAsync(string scopeKey, string eventKey, CancellationToken cancellationToken)
{
lock (_lock)
{
if (!_store.TryGetValue(scopeKey, out var scopeStore))
return Task.FromResult(false);
if (!scopeStore.TryGetValue(eventKey, out var entry))
return Task.FromResult(false);
// Check if expired
if (entry.ExpiresAt < DateTimeOffset.UtcNow)
{
scopeStore.Remove(eventKey);
return Task.FromResult(false);
}
return Task.FromResult(true);
}
}
public Task<IReadOnlySet<string>> GetProcessedAsync(string scopeKey, IEnumerable<string> eventKeys, CancellationToken cancellationToken)
{
var now = DateTimeOffset.UtcNow;
var result = new HashSet<string>();
lock (_lock)
{
if (!_store.TryGetValue(scopeKey, out var scopeStore))
return Task.FromResult<IReadOnlySet<string>>(result);
foreach (var eventKey in eventKeys)
{
if (scopeStore.TryGetValue(eventKey, out var entry) && entry.ExpiresAt >= now)
{
result.Add(eventKey);
}
}
}
return Task.FromResult<IReadOnlySet<string>>(result);
}
public Task MarkProcessedAsync(
string scopeKey,
string eventKey,
DateTimeOffset eventTime,
Guid? batchId,
TimeSpan ttl,
CancellationToken cancellationToken)
{
var now = DateTimeOffset.UtcNow;
var entry = new ProcessedEventEntry(eventTime, now, batchId, now + ttl);
lock (_lock)
{
if (!_store.TryGetValue(scopeKey, out var scopeStore))
{
scopeStore = new Dictionary<string, ProcessedEventEntry>();
_store[scopeKey] = scopeStore;
}
scopeStore[eventKey] = entry;
}
return Task.CompletedTask;
}
public Task MarkProcessedBatchAsync(
string scopeKey,
IEnumerable<ProcessedEvent> events,
Guid? batchId,
TimeSpan ttl,
CancellationToken cancellationToken)
{
var now = DateTimeOffset.UtcNow;
var expiresAt = now + ttl;
lock (_lock)
{
if (!_store.TryGetValue(scopeKey, out var scopeStore))
{
scopeStore = new Dictionary<string, ProcessedEventEntry>();
_store[scopeKey] = scopeStore;
}
foreach (var evt in events)
{
scopeStore[evt.EventKey] = new ProcessedEventEntry(evt.EventTime, now, batchId, expiresAt);
}
}
return Task.CompletedTask;
}
public Task<long> CountProcessedAsync(string scopeKey, DateTimeOffset from, DateTimeOffset to, CancellationToken cancellationToken)
{
var now = DateTimeOffset.UtcNow;
long count = 0;
lock (_lock)
{
if (_store.TryGetValue(scopeKey, out var scopeStore))
{
count = scopeStore.Values
.Count(e => e.ExpiresAt >= now && e.EventTime >= from && e.EventTime < to);
}
}
return Task.FromResult(count);
}
public Task<int> CleanupExpiredAsync(int batchLimit, CancellationToken cancellationToken)
{
var now = DateTimeOffset.UtcNow;
var removed = 0;
lock (_lock)
{
foreach (var scopeStore in _store.Values)
{
var expiredKeys = scopeStore
.Where(kvp => kvp.Value.ExpiresAt < now)
.Take(batchLimit - removed)
.Select(kvp => kvp.Key)
.ToList();
foreach (var key in expiredKeys)
{
scopeStore.Remove(key);
removed++;
}
if (removed >= batchLimit)
break;
}
}
return Task.FromResult(removed);
}
}
/// <summary>
/// Result of filtering events through duplicate suppression.
/// </summary>
public sealed record DuplicateFilterResult<T>(
/// <summary>Events that should be processed (not duplicates).</summary>
IReadOnlyList<T> ToProcess,
/// <summary>Events that were filtered as duplicates.</summary>
IReadOnlyList<T> Duplicates,
/// <summary>Total events evaluated.</summary>
int Total)
{
/// <summary>
/// Number of events that passed filtering.
/// </summary>
public int ProcessCount => ToProcess.Count;
/// <summary>
/// Number of duplicates filtered.
/// </summary>
public int DuplicateCount => Duplicates.Count;
/// <summary>
/// Duplicate percentage.
/// </summary>
public double DuplicatePercent => Total > 0 ? Math.Round((double)DuplicateCount / Total * 100, 2) : 0;
}
/// <summary>
/// Helper methods for duplicate suppression.
/// </summary>
public static class DuplicateSuppressorExtensions
{
/// <summary>
/// Filters a batch of events, removing duplicates.
/// </summary>
/// <typeparam name="T">Event type.</typeparam>
/// <param name="suppressor">Duplicate suppressor.</param>
/// <param name="scopeKey">Scope identifier.</param>
/// <param name="events">Events to filter.</param>
/// <param name="keySelector">Function to extract event key.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Filter result with events to process and duplicates.</returns>
public static async Task<DuplicateFilterResult<T>> FilterAsync<T>(
this IDuplicateSuppressor suppressor,
string scopeKey,
IReadOnlyList<T> events,
Func<T, string> keySelector,
CancellationToken cancellationToken)
{
if (events.Count == 0)
return new DuplicateFilterResult<T>([], [], 0);
var eventKeys = events.Select(keySelector).ToList();
var processed = await suppressor.GetProcessedAsync(scopeKey, eventKeys, cancellationToken).ConfigureAwait(false);
var toProcess = new List<T>();
var duplicates = new List<T>();
foreach (var evt in events)
{
var key = keySelector(evt);
if (processed.Contains(key))
{
duplicates.Add(evt);
}
else
{
toProcess.Add(evt);
}
}
return new DuplicateFilterResult<T>(toProcess, duplicates, events.Count);
}
}

View File

@@ -0,0 +1,220 @@
namespace StellaOps.Orchestrator.Core.Backfill;
/// <summary>
/// Represents an event-time window for batch processing.
/// </summary>
public sealed record EventTimeWindow(
/// <summary>Start of the window (inclusive).</summary>
DateTimeOffset Start,
/// <summary>End of the window (exclusive).</summary>
DateTimeOffset End)
{
/// <summary>
/// Duration of the window.
/// </summary>
public TimeSpan Duration => End - Start;
/// <summary>
/// Whether the window is empty (zero duration).
/// </summary>
public bool IsEmpty => End <= Start;
/// <summary>
/// Whether a timestamp falls within this window.
/// </summary>
public bool Contains(DateTimeOffset timestamp) => timestamp >= Start && timestamp < End;
/// <summary>
/// Whether this window overlaps with another.
/// </summary>
public bool Overlaps(EventTimeWindow other) =>
Start < other.End && End > other.Start;
/// <summary>
/// Creates the intersection of two windows.
/// </summary>
public EventTimeWindow? Intersect(EventTimeWindow other)
{
var newStart = Start > other.Start ? Start : other.Start;
var newEnd = End < other.End ? End : other.End;
return newEnd > newStart ? new EventTimeWindow(newStart, newEnd) : null;
}
/// <summary>
/// Splits the window into batches of the specified duration.
/// </summary>
public IEnumerable<EventTimeWindow> Split(TimeSpan batchDuration)
{
if (batchDuration <= TimeSpan.Zero)
throw new ArgumentOutOfRangeException(nameof(batchDuration), "Batch duration must be positive.");
var current = Start;
while (current < End)
{
var batchEnd = current + batchDuration;
if (batchEnd > End)
batchEnd = End;
yield return new EventTimeWindow(current, batchEnd);
current = batchEnd;
}
}
/// <summary>
/// Creates a window from a duration ending at the specified time.
/// </summary>
public static EventTimeWindow FromDuration(DateTimeOffset end, TimeSpan duration) =>
new(end - duration, end);
/// <summary>
/// Creates a window covering the last N hours from now.
/// </summary>
public static EventTimeWindow LastHours(int hours, DateTimeOffset? now = null)
{
var endTime = now ?? DateTimeOffset.UtcNow;
return FromDuration(endTime, TimeSpan.FromHours(hours));
}
/// <summary>
/// Creates a window covering the last N days from now.
/// </summary>
public static EventTimeWindow LastDays(int days, DateTimeOffset? now = null)
{
var endTime = now ?? DateTimeOffset.UtcNow;
return FromDuration(endTime, TimeSpan.FromDays(days));
}
}
/// <summary>
/// Configuration for event-time window computation.
/// </summary>
public sealed record EventTimeWindowOptions(
/// <summary>Minimum window size (prevents too-small batches).</summary>
TimeSpan MinWindowSize,
/// <summary>Maximum window size (prevents too-large batches).</summary>
TimeSpan MaxWindowSize,
/// <summary>Overlap with previous window for late-arriving events.</summary>
TimeSpan OverlapDuration,
/// <summary>Maximum lag allowed before triggering alerts.</summary>
TimeSpan MaxLag,
/// <summary>Default lookback for initial fetch when no watermark exists.</summary>
TimeSpan InitialLookback)
{
/// <summary>
/// Default options for hourly batching.
/// </summary>
public static EventTimeWindowOptions HourlyBatches => new(
MinWindowSize: TimeSpan.FromMinutes(5),
MaxWindowSize: TimeSpan.FromHours(1),
OverlapDuration: TimeSpan.FromMinutes(5),
MaxLag: TimeSpan.FromHours(2),
InitialLookback: TimeSpan.FromDays(7));
/// <summary>
/// Default options for daily batching.
/// </summary>
public static EventTimeWindowOptions DailyBatches => new(
MinWindowSize: TimeSpan.FromHours(1),
MaxWindowSize: TimeSpan.FromDays(1),
OverlapDuration: TimeSpan.FromHours(1),
MaxLag: TimeSpan.FromDays(1),
InitialLookback: TimeSpan.FromDays(30));
}
/// <summary>
/// Computes event-time windows for incremental processing.
/// </summary>
public static class EventTimeWindowPlanner
{
/// <summary>
/// Computes the next window to process based on current watermark.
/// </summary>
/// <param name="now">Current time.</param>
/// <param name="highWatermark">Current high watermark (null for initial fetch).</param>
/// <param name="options">Window configuration options.</param>
/// <returns>The next window to process, or null if caught up.</returns>
public static EventTimeWindow? GetNextWindow(
DateTimeOffset now,
DateTimeOffset? highWatermark,
EventTimeWindowOptions options)
{
DateTimeOffset windowStart;
if (highWatermark is null)
{
// Initial fetch: start from initial lookback
windowStart = now - options.InitialLookback;
}
else
{
// Incremental fetch: start from watermark minus overlap
windowStart = highWatermark.Value - options.OverlapDuration;
// If we're caught up (watermark + min window > now), no work needed
if (highWatermark.Value + options.MinWindowSize > now)
{
return null;
}
}
// Calculate window end (at most now, at most max window from start)
var windowEnd = windowStart + options.MaxWindowSize;
if (windowEnd > now)
{
windowEnd = now;
}
// Ensure minimum window size
if (windowEnd - windowStart < options.MinWindowSize)
{
// If window would be too small, extend end (but not past now)
windowEnd = windowStart + options.MinWindowSize;
if (windowEnd > now)
{
return null; // Not enough data accumulated yet
}
}
return new EventTimeWindow(windowStart, windowEnd);
}
/// <summary>
/// Calculates the current lag from the high watermark.
/// </summary>
public static TimeSpan CalculateLag(DateTimeOffset now, DateTimeOffset highWatermark) =>
now - highWatermark;
/// <summary>
/// Determines if the lag exceeds the maximum allowed.
/// </summary>
public static bool IsLagging(DateTimeOffset now, DateTimeOffset highWatermark, EventTimeWindowOptions options) =>
CalculateLag(now, highWatermark) > options.MaxLag;
/// <summary>
/// Estimates the number of windows needed to catch up.
/// </summary>
public static int EstimateWindowsToProcess(
DateTimeOffset now,
DateTimeOffset? highWatermark,
EventTimeWindowOptions options)
{
if (highWatermark is null)
{
// Initial fetch
var totalDuration = options.InitialLookback;
return (int)Math.Ceiling(totalDuration / options.MaxWindowSize);
}
var lag = CalculateLag(now, highWatermark.Value);
if (lag <= options.MinWindowSize)
return 0;
return (int)Math.Ceiling(lag / options.MaxWindowSize);
}
}

View File

@@ -0,0 +1,502 @@
using Microsoft.Extensions.Logging;
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.DeadLetter;
/// <summary>
/// Notification channel types.
/// </summary>
public enum NotificationChannel
{
Email,
Slack,
Teams,
Webhook,
PagerDuty
}
/// <summary>
/// Notification rule for dead-letter events.
/// </summary>
public sealed record NotificationRule(
Guid RuleId,
string TenantId,
string? JobTypePattern,
string? ErrorCodePattern,
ErrorCategory? Category,
Guid? SourceId,
bool Enabled,
NotificationChannel Channel,
string Endpoint,
int CooldownMinutes,
int MaxPerHour,
bool Aggregate,
DateTimeOffset? LastNotifiedAt,
int NotificationsSent,
DateTimeOffset CreatedAt,
DateTimeOffset UpdatedAt,
string CreatedBy,
string UpdatedBy)
{
/// <summary>Creates a new notification rule.</summary>
public static NotificationRule Create(
string tenantId,
NotificationChannel channel,
string endpoint,
string createdBy,
string? jobTypePattern = null,
string? errorCodePattern = null,
ErrorCategory? category = null,
Guid? sourceId = null,
int cooldownMinutes = 15,
int maxPerHour = 10,
bool aggregate = true)
{
var now = DateTimeOffset.UtcNow;
return new NotificationRule(
RuleId: Guid.NewGuid(),
TenantId: tenantId,
JobTypePattern: jobTypePattern,
ErrorCodePattern: errorCodePattern,
Category: category,
SourceId: sourceId,
Enabled: true,
Channel: channel,
Endpoint: endpoint,
CooldownMinutes: cooldownMinutes,
MaxPerHour: maxPerHour,
Aggregate: aggregate,
LastNotifiedAt: null,
NotificationsSent: 0,
CreatedAt: now,
UpdatedAt: now,
CreatedBy: createdBy,
UpdatedBy: createdBy);
}
/// <summary>Checks if this rule matches the given entry.</summary>
public bool Matches(DeadLetterEntry entry)
{
if (!Enabled) return false;
if (SourceId.HasValue && entry.SourceId != SourceId.Value) return false;
if (Category.HasValue && entry.Category != Category.Value) return false;
if (!string.IsNullOrEmpty(JobTypePattern))
{
if (!System.Text.RegularExpressions.Regex.IsMatch(entry.JobType, JobTypePattern))
return false;
}
if (!string.IsNullOrEmpty(ErrorCodePattern))
{
if (!System.Text.RegularExpressions.Regex.IsMatch(entry.ErrorCode, ErrorCodePattern))
return false;
}
return true;
}
/// <summary>Checks if this rule is within rate limits.</summary>
public bool CanNotify(DateTimeOffset now, int notificationsSentThisHour)
{
if (!Enabled) return false;
if (notificationsSentThisHour >= MaxPerHour) return false;
if (LastNotifiedAt.HasValue)
{
var elapsed = now - LastNotifiedAt.Value;
if (elapsed < TimeSpan.FromMinutes(CooldownMinutes))
return false;
}
return true;
}
/// <summary>Records a notification sent.</summary>
public NotificationRule RecordNotification(DateTimeOffset now) =>
this with
{
LastNotifiedAt = now,
NotificationsSent = NotificationsSent + 1,
UpdatedAt = now
};
}
/// <summary>
/// Notification log entry.
/// </summary>
public sealed record NotificationLogEntry(
Guid LogId,
string TenantId,
Guid RuleId,
IReadOnlyList<Guid> EntryIds,
NotificationChannel Channel,
string Endpoint,
bool Success,
string? ErrorMessage,
string? Subject,
int EntryCount,
DateTimeOffset SentAt);
/// <summary>
/// Notification payload for dead-letter events.
/// </summary>
public sealed record DeadLetterNotificationPayload(
string TenantId,
string EventType,
IReadOnlyList<DeadLetterEntrySummary> Entries,
DeadLetterStatsSnapshot? Stats,
DateTimeOffset Timestamp,
string? ActionUrl);
/// <summary>
/// Summary of a dead-letter entry for notifications.
/// </summary>
public sealed record DeadLetterEntrySummary(
Guid EntryId,
Guid OriginalJobId,
string JobType,
string ErrorCode,
ErrorCategory Category,
string FailureReason,
string? RemediationHint,
bool IsRetryable,
int ReplayAttempts,
DateTimeOffset FailedAt);
/// <summary>
/// Stats snapshot for notifications.
/// </summary>
public sealed record DeadLetterStatsSnapshot(
long PendingCount,
long RetryableCount,
long ExhaustedCount);
/// <summary>
/// Interface for dead-letter event notifications.
/// </summary>
public interface IDeadLetterNotifier
{
/// <summary>Notifies when a new entry is added to dead-letter store.</summary>
Task NotifyNewEntryAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken);
/// <summary>Notifies when an entry is successfully replayed.</summary>
Task NotifyReplaySuccessAsync(
DeadLetterEntry entry,
Guid newJobId,
CancellationToken cancellationToken);
/// <summary>Notifies when an entry exhausts all replay attempts.</summary>
Task NotifyExhaustedAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken);
/// <summary>Sends aggregated notifications for pending entries.</summary>
Task SendAggregatedNotificationsAsync(
string tenantId,
CancellationToken cancellationToken);
}
/// <summary>
/// Interface for notification delivery.
/// </summary>
public interface INotificationDelivery
{
/// <summary>Sends a notification to the specified endpoint.</summary>
Task<bool> SendAsync(
NotificationChannel channel,
string endpoint,
DeadLetterNotificationPayload payload,
CancellationToken cancellationToken);
}
/// <summary>
/// Repository for notification rules.
/// </summary>
public interface INotificationRuleRepository
{
Task<NotificationRule?> GetByIdAsync(string tenantId, Guid ruleId, CancellationToken cancellationToken);
Task<IReadOnlyList<NotificationRule>> ListAsync(string tenantId, bool enabledOnly, CancellationToken cancellationToken);
Task<IReadOnlyList<NotificationRule>> GetMatchingRulesAsync(string tenantId, DeadLetterEntry entry, CancellationToken cancellationToken);
Task CreateAsync(NotificationRule rule, CancellationToken cancellationToken);
Task<bool> UpdateAsync(NotificationRule rule, CancellationToken cancellationToken);
Task<bool> DeleteAsync(string tenantId, Guid ruleId, CancellationToken cancellationToken);
Task<int> GetNotificationCountThisHourAsync(string tenantId, Guid ruleId, CancellationToken cancellationToken);
Task LogNotificationAsync(NotificationLogEntry log, CancellationToken cancellationToken);
}
/// <summary>
/// Default dead-letter notifier implementation.
/// </summary>
public sealed class DeadLetterNotifier : IDeadLetterNotifier
{
private readonly INotificationRuleRepository _ruleRepository;
private readonly IDeadLetterRepository _deadLetterRepository;
private readonly INotificationDelivery _delivery;
private readonly TimeProvider _timeProvider;
private readonly ILogger<DeadLetterNotifier> _logger;
public DeadLetterNotifier(
INotificationRuleRepository ruleRepository,
IDeadLetterRepository deadLetterRepository,
INotificationDelivery delivery,
TimeProvider timeProvider,
ILogger<DeadLetterNotifier> logger)
{
_ruleRepository = ruleRepository ?? throw new ArgumentNullException(nameof(ruleRepository));
_deadLetterRepository = deadLetterRepository ?? throw new ArgumentNullException(nameof(deadLetterRepository));
_delivery = delivery ?? throw new ArgumentNullException(nameof(delivery));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task NotifyNewEntryAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken)
{
var rules = await _ruleRepository.GetMatchingRulesAsync(entry.TenantId, entry, cancellationToken)
.ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
foreach (var rule in rules)
{
if (rule.Aggregate)
{
// Skip immediate notification for aggregated rules
continue;
}
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
entry.TenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
if (!rule.CanNotify(now, notificationsThisHour))
{
continue;
}
await SendNotificationAsync(rule, "new_entry", [entry], null, cancellationToken)
.ConfigureAwait(false);
}
}
public async Task NotifyReplaySuccessAsync(
DeadLetterEntry entry,
Guid newJobId,
CancellationToken cancellationToken)
{
var rules = await _ruleRepository.GetMatchingRulesAsync(entry.TenantId, entry, cancellationToken)
.ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
foreach (var rule in rules)
{
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
entry.TenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
if (!rule.CanNotify(now, notificationsThisHour))
{
continue;
}
var payload = new DeadLetterNotificationPayload(
TenantId: entry.TenantId,
EventType: "replay_success",
Entries: [ToSummary(entry)],
Stats: null,
Timestamp: now,
ActionUrl: null);
var success = await _delivery.SendAsync(rule.Channel, rule.Endpoint, payload, cancellationToken)
.ConfigureAwait(false);
await LogNotificationAsync(rule, [entry.EntryId], success, null, cancellationToken)
.ConfigureAwait(false);
}
}
public async Task NotifyExhaustedAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken)
{
var rules = await _ruleRepository.GetMatchingRulesAsync(entry.TenantId, entry, cancellationToken)
.ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
foreach (var rule in rules)
{
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
entry.TenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
if (!rule.CanNotify(now, notificationsThisHour))
{
continue;
}
await SendNotificationAsync(rule, "exhausted", [entry], null, cancellationToken)
.ConfigureAwait(false);
}
}
public async Task SendAggregatedNotificationsAsync(
string tenantId,
CancellationToken cancellationToken)
{
var rules = await _ruleRepository.ListAsync(tenantId, enabledOnly: true, cancellationToken)
.ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var stats = await _deadLetterRepository.GetStatsAsync(tenantId, cancellationToken).ConfigureAwait(false);
foreach (var rule in rules.Where(r => r.Aggregate))
{
var notificationsThisHour = await _ruleRepository.GetNotificationCountThisHourAsync(
tenantId, rule.RuleId, cancellationToken).ConfigureAwait(false);
if (!rule.CanNotify(now, notificationsThisHour))
{
continue;
}
// Get pending entries matching this rule
var options = new DeadLetterListOptions(
Status: DeadLetterStatus.Pending,
Category: rule.Category,
Limit: 10);
var entries = await _deadLetterRepository.ListAsync(tenantId, options, cancellationToken)
.ConfigureAwait(false);
// Filter to only matching entries
var matchingEntries = entries.Where(e => rule.Matches(e)).ToList();
if (matchingEntries.Count == 0)
{
continue;
}
var statsSnapshot = new DeadLetterStatsSnapshot(
PendingCount: stats.PendingEntries,
RetryableCount: stats.RetryableEntries,
ExhaustedCount: stats.ExhaustedEntries);
await SendNotificationAsync(rule, "aggregated", matchingEntries, statsSnapshot, cancellationToken)
.ConfigureAwait(false);
}
}
private async Task SendNotificationAsync(
NotificationRule rule,
string eventType,
IReadOnlyList<DeadLetterEntry> entries,
DeadLetterStatsSnapshot? stats,
CancellationToken cancellationToken)
{
var now = _timeProvider.GetUtcNow();
var payload = new DeadLetterNotificationPayload(
TenantId: rule.TenantId,
EventType: eventType,
Entries: entries.Select(ToSummary).ToList(),
Stats: stats,
Timestamp: now,
ActionUrl: null);
string? errorMessage = null;
bool success;
try
{
success = await _delivery.SendAsync(rule.Channel, rule.Endpoint, payload, cancellationToken)
.ConfigureAwait(false);
}
catch (Exception ex)
{
success = false;
errorMessage = ex.Message;
_logger.LogError(ex, "Failed to send {EventType} notification for rule {RuleId}", eventType, rule.RuleId);
}
await LogNotificationAsync(rule, entries.Select(e => e.EntryId).ToList(), success, errorMessage, cancellationToken)
.ConfigureAwait(false);
if (success)
{
var updatedRule = rule.RecordNotification(now);
await _ruleRepository.UpdateAsync(updatedRule, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Dead-letter notification sent: tenant={TenantId}, channel={Channel}, eventType={EventType}",
rule.TenantId, rule.Channel, eventType);
}
else
{
_logger.LogWarning(
"Dead-letter notification failed: tenant={TenantId}, channel={Channel}, eventType={EventType}",
rule.TenantId, rule.Channel, eventType);
}
}
private async Task LogNotificationAsync(
NotificationRule rule,
IReadOnlyList<Guid> entryIds,
bool success,
string? errorMessage,
CancellationToken cancellationToken)
{
var log = new NotificationLogEntry(
LogId: Guid.NewGuid(),
TenantId: rule.TenantId,
RuleId: rule.RuleId,
EntryIds: entryIds,
Channel: rule.Channel,
Endpoint: rule.Endpoint,
Success: success,
ErrorMessage: errorMessage,
Subject: null,
EntryCount: entryIds.Count,
SentAt: _timeProvider.GetUtcNow());
await _ruleRepository.LogNotificationAsync(log, cancellationToken).ConfigureAwait(false);
}
private static DeadLetterEntrySummary ToSummary(DeadLetterEntry entry) =>
new(
EntryId: entry.EntryId,
OriginalJobId: entry.OriginalJobId,
JobType: entry.JobType,
ErrorCode: entry.ErrorCode,
Category: entry.Category,
FailureReason: entry.FailureReason,
RemediationHint: entry.RemediationHint,
IsRetryable: entry.IsRetryable,
ReplayAttempts: entry.ReplayAttempts,
FailedAt: entry.FailedAt);
}
/// <summary>
/// No-op notifier for when notifications are disabled.
/// </summary>
public sealed class NullDeadLetterNotifier : IDeadLetterNotifier
{
public static readonly NullDeadLetterNotifier Instance = new();
private NullDeadLetterNotifier() { }
public Task NotifyNewEntryAsync(DeadLetterEntry entry, CancellationToken cancellationToken) =>
Task.CompletedTask;
public Task NotifyReplaySuccessAsync(DeadLetterEntry entry, Guid newJobId, CancellationToken cancellationToken) =>
Task.CompletedTask;
public Task NotifyExhaustedAsync(DeadLetterEntry entry, CancellationToken cancellationToken) =>
Task.CompletedTask;
public Task SendAggregatedNotificationsAsync(string tenantId, CancellationToken cancellationToken) =>
Task.CompletedTask;
}

View File

@@ -0,0 +1,578 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.DeadLetter;
/// <summary>
/// Represents a classified error with remediation guidance.
/// </summary>
public sealed record ClassifiedError(
/// <summary>Error code (e.g., "ORCH-ERR-001").</summary>
string ErrorCode,
/// <summary>Error category.</summary>
ErrorCategory Category,
/// <summary>Human-readable description.</summary>
string Description,
/// <summary>Remediation hint for operators.</summary>
string RemediationHint,
/// <summary>Whether this error is potentially retryable.</summary>
bool IsRetryable,
/// <summary>Suggested retry delay if retryable.</summary>
TimeSpan? SuggestedRetryDelay);
/// <summary>
/// Classifies errors and provides remediation hints.
/// </summary>
public interface IErrorClassifier
{
/// <summary>Classifies an exception into a categorized error.</summary>
ClassifiedError Classify(Exception exception);
/// <summary>Classifies an error code and message.</summary>
ClassifiedError Classify(string errorCode, string message);
/// <summary>Classifies based on HTTP status code and message.</summary>
ClassifiedError ClassifyHttpError(int statusCode, string? message);
}
/// <summary>
/// Default error classifier with standard error codes and remediation hints.
/// </summary>
public sealed class DefaultErrorClassifier : IErrorClassifier
{
/// <summary>Known error codes with classifications.</summary>
public static class ErrorCodes
{
// Transient errors (ORCH-TRN-xxx)
public const string NetworkTimeout = "ORCH-TRN-001";
public const string ConnectionRefused = "ORCH-TRN-002";
public const string DnsResolutionFailed = "ORCH-TRN-003";
public const string ServiceUnavailable = "ORCH-TRN-004";
public const string GatewayTimeout = "ORCH-TRN-005";
public const string TemporaryFailure = "ORCH-TRN-099";
// Not found errors (ORCH-NF-xxx)
public const string ImageNotFound = "ORCH-NF-001";
public const string SourceNotFound = "ORCH-NF-002";
public const string RegistryNotFound = "ORCH-NF-003";
public const string ManifestNotFound = "ORCH-NF-004";
public const string ResourceNotFound = "ORCH-NF-099";
// Auth errors (ORCH-AUTH-xxx)
public const string InvalidCredentials = "ORCH-AUTH-001";
public const string TokenExpired = "ORCH-AUTH-002";
public const string InsufficientPermissions = "ORCH-AUTH-003";
public const string CertificateError = "ORCH-AUTH-004";
public const string AuthenticationFailed = "ORCH-AUTH-099";
// Rate limit errors (ORCH-RL-xxx)
public const string RateLimited = "ORCH-RL-001";
public const string QuotaExceeded = "ORCH-RL-002";
public const string ConcurrencyLimitReached = "ORCH-RL-003";
public const string ThrottlingError = "ORCH-RL-099";
// Validation errors (ORCH-VAL-xxx)
public const string InvalidPayload = "ORCH-VAL-001";
public const string InvalidConfiguration = "ORCH-VAL-002";
public const string SchemaValidationFailed = "ORCH-VAL-003";
public const string MissingRequiredField = "ORCH-VAL-004";
public const string ValidationFailed = "ORCH-VAL-099";
// Upstream errors (ORCH-UP-xxx)
public const string RegistryError = "ORCH-UP-001";
public const string AdvisoryFeedError = "ORCH-UP-002";
public const string DatabaseError = "ORCH-UP-003";
public const string ExternalServiceError = "ORCH-UP-099";
// Internal errors (ORCH-INT-xxx)
public const string InternalError = "ORCH-INT-001";
public const string StateCorruption = "ORCH-INT-002";
public const string ProcessingError = "ORCH-INT-003";
public const string UnexpectedError = "ORCH-INT-099";
// Conflict errors (ORCH-CON-xxx)
public const string DuplicateJob = "ORCH-CON-001";
public const string VersionMismatch = "ORCH-CON-002";
public const string ConcurrentModification = "ORCH-CON-003";
public const string ConflictError = "ORCH-CON-099";
// Canceled errors (ORCH-CAN-xxx)
public const string UserCanceled = "ORCH-CAN-001";
public const string SystemCanceled = "ORCH-CAN-002";
public const string TimeoutCanceled = "ORCH-CAN-003";
public const string OperationCanceled = "ORCH-CAN-099";
}
private static readonly Dictionary<string, ClassifiedError> KnownErrors = new()
{
// Transient errors
[ErrorCodes.NetworkTimeout] = new(
ErrorCodes.NetworkTimeout,
ErrorCategory.Transient,
"Network operation timed out",
"Check network connectivity and firewall rules. If the target service is healthy, increase timeout settings.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
[ErrorCodes.ConnectionRefused] = new(
ErrorCodes.ConnectionRefused,
ErrorCategory.Transient,
"Connection refused by target host",
"Verify the target service is running and accessible. Check firewall rules and network policies.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
[ErrorCodes.DnsResolutionFailed] = new(
ErrorCodes.DnsResolutionFailed,
ErrorCategory.Transient,
"DNS resolution failed",
"Verify the hostname is correct. Check DNS server configuration and network connectivity.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
[ErrorCodes.ServiceUnavailable] = new(
ErrorCodes.ServiceUnavailable,
ErrorCategory.Transient,
"Service temporarily unavailable (503)",
"The target service is temporarily overloaded or under maintenance. Retry with exponential backoff.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
[ErrorCodes.GatewayTimeout] = new(
ErrorCodes.GatewayTimeout,
ErrorCategory.Transient,
"Gateway timeout (504)",
"An upstream service took too long to respond. This is typically transient; retry with backoff.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
[ErrorCodes.TemporaryFailure] = new(
ErrorCodes.TemporaryFailure,
ErrorCategory.Transient,
"Temporary failure",
"A transient error occurred. Retry the operation after a brief delay.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
// Not found errors
[ErrorCodes.ImageNotFound] = new(
ErrorCodes.ImageNotFound,
ErrorCategory.NotFound,
"Container image not found",
"Verify the image reference is correct (repository, tag, digest). Check registry access and that the image exists.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.SourceNotFound] = new(
ErrorCodes.SourceNotFound,
ErrorCategory.NotFound,
"Source configuration not found",
"The referenced source may have been deleted. Verify the source ID and recreate if necessary.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.RegistryNotFound] = new(
ErrorCodes.RegistryNotFound,
ErrorCategory.NotFound,
"Container registry not found",
"Verify the registry URL is correct. Check DNS resolution and that the registry is operational.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.ManifestNotFound] = new(
ErrorCodes.ManifestNotFound,
ErrorCategory.NotFound,
"Image manifest not found",
"The image exists but the manifest is missing. The image may have been deleted or the tag moved.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.ResourceNotFound] = new(
ErrorCodes.ResourceNotFound,
ErrorCategory.NotFound,
"Resource not found",
"The requested resource does not exist. Verify the resource identifier is correct.",
IsRetryable: false,
SuggestedRetryDelay: null),
// Auth errors
[ErrorCodes.InvalidCredentials] = new(
ErrorCodes.InvalidCredentials,
ErrorCategory.AuthFailure,
"Invalid credentials",
"The provided credentials are invalid. Update the registry credentials in the source configuration.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.TokenExpired] = new(
ErrorCodes.TokenExpired,
ErrorCategory.AuthFailure,
"Authentication token expired",
"The authentication token has expired. Refresh credentials or re-authenticate to obtain a new token.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
[ErrorCodes.InsufficientPermissions] = new(
ErrorCodes.InsufficientPermissions,
ErrorCategory.AuthFailure,
"Insufficient permissions",
"The authenticated user lacks required permissions. Request access from the registry administrator.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.CertificateError] = new(
ErrorCodes.CertificateError,
ErrorCategory.AuthFailure,
"TLS certificate error",
"Certificate validation failed. Verify the CA bundle or add the registry's certificate to trusted roots.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.AuthenticationFailed] = new(
ErrorCodes.AuthenticationFailed,
ErrorCategory.AuthFailure,
"Authentication failed",
"Unable to authenticate with the target service. Verify credentials and authentication configuration.",
IsRetryable: false,
SuggestedRetryDelay: null),
// Rate limit errors
[ErrorCodes.RateLimited] = new(
ErrorCodes.RateLimited,
ErrorCategory.RateLimited,
"Rate limit exceeded (429)",
"Request rate limit exceeded. Reduce request frequency or upgrade service tier. Will auto-retry with backoff.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
[ErrorCodes.QuotaExceeded] = new(
ErrorCodes.QuotaExceeded,
ErrorCategory.RateLimited,
"Quota exceeded",
"Usage quota has been exceeded. Wait for quota reset or request quota increase.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromHours(1)),
[ErrorCodes.ConcurrencyLimitReached] = new(
ErrorCodes.ConcurrencyLimitReached,
ErrorCategory.RateLimited,
"Concurrency limit reached",
"Maximum concurrent operations limit reached. Reduce parallel operations or increase limit.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
[ErrorCodes.ThrottlingError] = new(
ErrorCodes.ThrottlingError,
ErrorCategory.RateLimited,
"Request throttled",
"Request was throttled due to rate limits. Retry with exponential backoff.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
// Validation errors
[ErrorCodes.InvalidPayload] = new(
ErrorCodes.InvalidPayload,
ErrorCategory.ValidationError,
"Invalid job payload",
"The job payload is malformed or invalid. Review the payload structure and fix validation errors.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.InvalidConfiguration] = new(
ErrorCodes.InvalidConfiguration,
ErrorCategory.ValidationError,
"Invalid configuration",
"Source or job configuration is invalid. Review and correct the configuration settings.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.SchemaValidationFailed] = new(
ErrorCodes.SchemaValidationFailed,
ErrorCategory.ValidationError,
"Schema validation failed",
"Input data failed schema validation. Ensure data conforms to the expected schema.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.MissingRequiredField] = new(
ErrorCodes.MissingRequiredField,
ErrorCategory.ValidationError,
"Missing required field",
"A required field is missing from the input. Provide all required fields.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.ValidationFailed] = new(
ErrorCodes.ValidationFailed,
ErrorCategory.ValidationError,
"Validation failed",
"Input validation failed. Review the error details and correct the input.",
IsRetryable: false,
SuggestedRetryDelay: null),
// Upstream errors
[ErrorCodes.RegistryError] = new(
ErrorCodes.RegistryError,
ErrorCategory.UpstreamError,
"Container registry error",
"The container registry returned an error. Check registry status and logs for details.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
[ErrorCodes.AdvisoryFeedError] = new(
ErrorCodes.AdvisoryFeedError,
ErrorCategory.UpstreamError,
"Advisory feed error",
"Error fetching from advisory feed. Check feed URL and authentication. May be temporary.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(15)),
[ErrorCodes.DatabaseError] = new(
ErrorCodes.DatabaseError,
ErrorCategory.UpstreamError,
"Database error",
"Database operation failed. Check database connectivity and status.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(1)),
[ErrorCodes.ExternalServiceError] = new(
ErrorCodes.ExternalServiceError,
ErrorCategory.UpstreamError,
"External service error",
"An external service dependency failed. Check service status and connectivity.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
// Internal errors
[ErrorCodes.InternalError] = new(
ErrorCodes.InternalError,
ErrorCategory.InternalError,
"Internal processing error",
"An internal error occurred. This may indicate a bug. Please report if persistent.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.StateCorruption] = new(
ErrorCodes.StateCorruption,
ErrorCategory.InternalError,
"State corruption detected",
"Internal state corruption detected. Manual intervention may be required.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.ProcessingError] = new(
ErrorCodes.ProcessingError,
ErrorCategory.InternalError,
"Processing error",
"Error during job processing. Review job payload and configuration.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.UnexpectedError] = new(
ErrorCodes.UnexpectedError,
ErrorCategory.InternalError,
"Unexpected error",
"An unexpected error occurred. This may indicate a bug. Please report with error details.",
IsRetryable: false,
SuggestedRetryDelay: null),
// Conflict errors
[ErrorCodes.DuplicateJob] = new(
ErrorCodes.DuplicateJob,
ErrorCategory.Conflict,
"Duplicate job detected",
"A job with the same idempotency key already exists. This is expected for retry scenarios.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.VersionMismatch] = new(
ErrorCodes.VersionMismatch,
ErrorCategory.Conflict,
"Version mismatch",
"Resource version conflict detected. Refresh and retry the operation.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromSeconds(5)),
[ErrorCodes.ConcurrentModification] = new(
ErrorCodes.ConcurrentModification,
ErrorCategory.Conflict,
"Concurrent modification",
"Resource was modified concurrently. Refresh state and retry.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromSeconds(5)),
[ErrorCodes.ConflictError] = new(
ErrorCodes.ConflictError,
ErrorCategory.Conflict,
"Resource conflict",
"A resource conflict occurred. Check for concurrent operations.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromSeconds(10)),
// Canceled errors
[ErrorCodes.UserCanceled] = new(
ErrorCodes.UserCanceled,
ErrorCategory.Canceled,
"Canceled by user",
"Operation was canceled by user request. No action required unless retry is desired.",
IsRetryable: false,
SuggestedRetryDelay: null),
[ErrorCodes.SystemCanceled] = new(
ErrorCodes.SystemCanceled,
ErrorCategory.Canceled,
"Canceled by system",
"Operation was canceled by the system (e.g., shutdown, quota). May be automatically rescheduled.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(5)),
[ErrorCodes.TimeoutCanceled] = new(
ErrorCodes.TimeoutCanceled,
ErrorCategory.Canceled,
"Canceled due to timeout",
"Operation exceeded its time limit. Consider increasing timeout or optimizing the operation.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
[ErrorCodes.OperationCanceled] = new(
ErrorCodes.OperationCanceled,
ErrorCategory.Canceled,
"Operation canceled",
"The operation was canceled. Check cancellation source for details.",
IsRetryable: false,
SuggestedRetryDelay: null)
};
/// <inheritdoc />
public ClassifiedError Classify(Exception exception)
{
ArgumentNullException.ThrowIfNull(exception);
return exception switch
{
OperationCanceledException => KnownErrors[ErrorCodes.OperationCanceled],
TimeoutException => KnownErrors[ErrorCodes.NetworkTimeout],
HttpRequestException httpEx => ClassifyHttpException(httpEx),
_ when exception.Message.Contains("connection refused", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.ConnectionRefused],
_ when exception.Message.Contains("DNS", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.DnsResolutionFailed],
_ when exception.Message.Contains("timeout", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.NetworkTimeout],
_ when exception.Message.Contains("certificate", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.CertificateError],
_ when exception.Message.Contains("unauthorized", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.AuthenticationFailed],
_ when exception.Message.Contains("forbidden", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.InsufficientPermissions],
_ => new ClassifiedError(
ErrorCodes.UnexpectedError,
ErrorCategory.InternalError,
exception.GetType().Name,
$"Unexpected error: {exception.Message}. Review stack trace for details.",
IsRetryable: false,
SuggestedRetryDelay: null)
};
}
/// <inheritdoc />
public ClassifiedError Classify(string errorCode, string message)
{
ArgumentException.ThrowIfNullOrWhiteSpace(errorCode);
if (KnownErrors.TryGetValue(errorCode, out var known))
{
return known;
}
// Try to infer from error code prefix
var category = errorCode switch
{
_ when errorCode.StartsWith("ORCH-TRN-", StringComparison.Ordinal) => ErrorCategory.Transient,
_ when errorCode.StartsWith("ORCH-NF-", StringComparison.Ordinal) => ErrorCategory.NotFound,
_ when errorCode.StartsWith("ORCH-AUTH-", StringComparison.Ordinal) => ErrorCategory.AuthFailure,
_ when errorCode.StartsWith("ORCH-RL-", StringComparison.Ordinal) => ErrorCategory.RateLimited,
_ when errorCode.StartsWith("ORCH-VAL-", StringComparison.Ordinal) => ErrorCategory.ValidationError,
_ when errorCode.StartsWith("ORCH-UP-", StringComparison.Ordinal) => ErrorCategory.UpstreamError,
_ when errorCode.StartsWith("ORCH-INT-", StringComparison.Ordinal) => ErrorCategory.InternalError,
_ when errorCode.StartsWith("ORCH-CON-", StringComparison.Ordinal) => ErrorCategory.Conflict,
_ when errorCode.StartsWith("ORCH-CAN-", StringComparison.Ordinal) => ErrorCategory.Canceled,
_ => ErrorCategory.Unknown
};
var isRetryable = category is ErrorCategory.Transient or ErrorCategory.RateLimited or ErrorCategory.UpstreamError;
return new ClassifiedError(
errorCode,
category,
message,
"Unknown error code. Review the error message for details.",
isRetryable,
isRetryable ? TimeSpan.FromMinutes(5) : null);
}
/// <inheritdoc />
public ClassifiedError ClassifyHttpError(int statusCode, string? message)
{
return statusCode switch
{
400 => KnownErrors[ErrorCodes.ValidationFailed],
401 => KnownErrors[ErrorCodes.AuthenticationFailed],
403 => KnownErrors[ErrorCodes.InsufficientPermissions],
404 => KnownErrors[ErrorCodes.ResourceNotFound],
408 => KnownErrors[ErrorCodes.NetworkTimeout],
409 => KnownErrors[ErrorCodes.ConflictError],
429 => KnownErrors[ErrorCodes.RateLimited],
500 => KnownErrors[ErrorCodes.InternalError],
502 => KnownErrors[ErrorCodes.ExternalServiceError],
503 => KnownErrors[ErrorCodes.ServiceUnavailable],
504 => KnownErrors[ErrorCodes.GatewayTimeout],
_ when statusCode >= 400 && statusCode < 500 => new ClassifiedError(
$"HTTP-{statusCode}",
ErrorCategory.ValidationError,
message ?? $"HTTP {statusCode} error",
"Client error. Review request parameters.",
IsRetryable: false,
SuggestedRetryDelay: null),
_ when statusCode >= 500 => new ClassifiedError(
$"HTTP-{statusCode}",
ErrorCategory.UpstreamError,
message ?? $"HTTP {statusCode} error",
"Server error. May be transient; retry with backoff.",
IsRetryable: true,
SuggestedRetryDelay: TimeSpan.FromMinutes(2)),
_ => new ClassifiedError(
$"HTTP-{statusCode}",
ErrorCategory.Unknown,
message ?? $"HTTP {statusCode}",
"Unexpected HTTP status. Review response for details.",
IsRetryable: false,
SuggestedRetryDelay: null)
};
}
private ClassifiedError ClassifyHttpException(HttpRequestException ex)
{
if (ex.StatusCode.HasValue)
{
return ClassifyHttpError((int)ex.StatusCode.Value, ex.Message);
}
// No status code - likely a connection error
return ex.Message switch
{
_ when ex.Message.Contains("connection refused", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.ConnectionRefused],
_ when ex.Message.Contains("name resolution", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.DnsResolutionFailed],
_ when ex.Message.Contains("SSL", StringComparison.OrdinalIgnoreCase) ||
ex.Message.Contains("TLS", StringComparison.OrdinalIgnoreCase)
=> KnownErrors[ErrorCodes.CertificateError],
_ => KnownErrors[ErrorCodes.ExternalServiceError]
};
}
}

View File

@@ -0,0 +1,221 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.DeadLetter;
/// <summary>
/// Repository for dead-letter entry persistence.
/// </summary>
public interface IDeadLetterRepository
{
/// <summary>Gets a dead-letter entry by ID.</summary>
Task<DeadLetterEntry?> GetByIdAsync(
string tenantId,
Guid entryId,
CancellationToken cancellationToken);
/// <summary>Gets a dead-letter entry by original job ID.</summary>
Task<DeadLetterEntry?> GetByOriginalJobIdAsync(
string tenantId,
Guid originalJobId,
CancellationToken cancellationToken);
/// <summary>Lists dead-letter entries with filtering and pagination.</summary>
Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
string tenantId,
DeadLetterListOptions options,
CancellationToken cancellationToken);
/// <summary>Counts dead-letter entries with filtering.</summary>
Task<long> CountAsync(
string tenantId,
DeadLetterListOptions options,
CancellationToken cancellationToken);
/// <summary>Creates a new dead-letter entry.</summary>
Task CreateAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken);
/// <summary>Updates an existing dead-letter entry.</summary>
Task<bool> UpdateAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken);
/// <summary>Gets entries pending replay that are retryable.</summary>
Task<IReadOnlyList<DeadLetterEntry>> GetPendingRetryableAsync(
string tenantId,
int limit,
CancellationToken cancellationToken);
/// <summary>Gets entries by error code.</summary>
Task<IReadOnlyList<DeadLetterEntry>> GetByErrorCodeAsync(
string tenantId,
string errorCode,
DeadLetterStatus? status,
int limit,
CancellationToken cancellationToken);
/// <summary>Gets entries by category.</summary>
Task<IReadOnlyList<DeadLetterEntry>> GetByCategoryAsync(
string tenantId,
ErrorCategory category,
DeadLetterStatus? status,
int limit,
CancellationToken cancellationToken);
/// <summary>Gets aggregated statistics.</summary>
Task<DeadLetterStats> GetStatsAsync(
string tenantId,
CancellationToken cancellationToken);
/// <summary>Gets a summary of actionable entries grouped by error code.</summary>
Task<IReadOnlyList<DeadLetterSummary>> GetActionableSummaryAsync(
string tenantId,
int limit,
CancellationToken cancellationToken);
/// <summary>Marks expired entries.</summary>
Task<int> MarkExpiredAsync(
int batchLimit,
CancellationToken cancellationToken);
/// <summary>Purges old resolved/expired entries.</summary>
Task<int> PurgeOldEntriesAsync(
int retentionDays,
int batchLimit,
CancellationToken cancellationToken);
}
/// <summary>
/// Options for listing dead-letter entries.
/// </summary>
public sealed record DeadLetterListOptions(
DeadLetterStatus? Status = null,
ErrorCategory? Category = null,
string? JobType = null,
string? ErrorCode = null,
Guid? SourceId = null,
Guid? RunId = null,
bool? IsRetryable = null,
DateTimeOffset? CreatedAfter = null,
DateTimeOffset? CreatedBefore = null,
string? Cursor = null,
int Limit = 50,
bool Ascending = false);
/// <summary>
/// Aggregated dead-letter statistics.
/// </summary>
public sealed record DeadLetterStats(
long TotalEntries,
long PendingEntries,
long ReplayingEntries,
long ReplayedEntries,
long ResolvedEntries,
long ExhaustedEntries,
long ExpiredEntries,
long RetryableEntries,
IReadOnlyDictionary<ErrorCategory, long> ByCategory,
IReadOnlyDictionary<string, long> TopErrorCodes,
IReadOnlyDictionary<string, long> TopJobTypes);
/// <summary>
/// Summary of dead-letter entries grouped by error code.
/// </summary>
public sealed record DeadLetterSummary(
string ErrorCode,
ErrorCategory Category,
long EntryCount,
long RetryableCount,
DateTimeOffset OldestEntry,
string? SampleReason);
/// <summary>
/// Repository for replay audit records.
/// </summary>
public interface IReplayAuditRepository
{
/// <summary>Gets audit records for an entry.</summary>
Task<IReadOnlyList<ReplayAuditRecord>> GetByEntryAsync(
string tenantId,
Guid entryId,
CancellationToken cancellationToken);
/// <summary>Gets a specific audit record.</summary>
Task<ReplayAuditRecord?> GetByIdAsync(
string tenantId,
Guid auditId,
CancellationToken cancellationToken);
/// <summary>Creates a new audit record.</summary>
Task CreateAsync(
ReplayAuditRecord record,
CancellationToken cancellationToken);
/// <summary>Updates an audit record (completion).</summary>
Task<bool> UpdateAsync(
ReplayAuditRecord record,
CancellationToken cancellationToken);
/// <summary>Gets audit records for a new job ID (to find replay source).</summary>
Task<ReplayAuditRecord?> GetByNewJobIdAsync(
string tenantId,
Guid newJobId,
CancellationToken cancellationToken);
}
/// <summary>
/// Replay attempt audit record.
/// </summary>
public sealed record ReplayAuditRecord(
Guid AuditId,
string TenantId,
Guid EntryId,
int AttemptNumber,
bool Success,
Guid? NewJobId,
string? ErrorMessage,
string TriggeredBy,
DateTimeOffset TriggeredAt,
DateTimeOffset? CompletedAt,
string InitiatedBy)
{
/// <summary>Creates a new audit record for a replay attempt.</summary>
public static ReplayAuditRecord Create(
string tenantId,
Guid entryId,
int attemptNumber,
string triggeredBy,
string initiatedBy,
DateTimeOffset now) =>
new(
AuditId: Guid.NewGuid(),
TenantId: tenantId,
EntryId: entryId,
AttemptNumber: attemptNumber,
Success: false,
NewJobId: null,
ErrorMessage: null,
TriggeredBy: triggeredBy,
TriggeredAt: now,
CompletedAt: null,
InitiatedBy: initiatedBy);
/// <summary>Marks the replay as successful.</summary>
public ReplayAuditRecord Complete(Guid newJobId, DateTimeOffset now) =>
this with
{
Success = true,
NewJobId = newJobId,
CompletedAt = now
};
/// <summary>Marks the replay as failed.</summary>
public ReplayAuditRecord Fail(string errorMessage, DateTimeOffset now) =>
this with
{
Success = false,
ErrorMessage = errorMessage,
CompletedAt = now
};
}

View File

@@ -0,0 +1,472 @@
using Microsoft.Extensions.Logging;
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.DeadLetter;
/// <summary>
/// Options for replay manager configuration.
/// </summary>
public sealed record ReplayManagerOptions(
/// <summary>Default maximum replay attempts.</summary>
int DefaultMaxReplayAttempts = 3,
/// <summary>Default retention period for dead-letter entries.</summary>
TimeSpan DefaultRetention = default,
/// <summary>Minimum delay between replay attempts.</summary>
TimeSpan MinReplayDelay = default,
/// <summary>Maximum batch size for bulk operations.</summary>
int MaxBatchSize = 100,
/// <summary>Enable automatic replay of retryable entries.</summary>
bool AutoReplayEnabled = false,
/// <summary>Delay before automatic replay.</summary>
TimeSpan AutoReplayDelay = default)
{
/// <summary>Default options.</summary>
public static ReplayManagerOptions Default => new(
DefaultMaxReplayAttempts: 3,
DefaultRetention: TimeSpan.FromDays(30),
MinReplayDelay: TimeSpan.FromMinutes(5),
MaxBatchSize: 100,
AutoReplayEnabled: false,
AutoReplayDelay: TimeSpan.FromMinutes(15));
}
/// <summary>
/// Result of a replay operation.
/// </summary>
public sealed record ReplayResult(
bool Success,
Guid? NewJobId,
string? ErrorMessage,
DeadLetterEntry UpdatedEntry);
/// <summary>
/// Result of a batch replay operation.
/// </summary>
public sealed record BatchReplayResult(
int Attempted,
int Succeeded,
int Failed,
IReadOnlyList<ReplayResult> Results);
/// <summary>
/// Manages dead-letter entry replay operations.
/// </summary>
public interface IReplayManager
{
/// <summary>Replays a single dead-letter entry.</summary>
Task<ReplayResult> ReplayAsync(
string tenantId,
Guid entryId,
string initiatedBy,
CancellationToken cancellationToken);
/// <summary>Replays multiple entries by ID.</summary>
Task<BatchReplayResult> ReplayBatchAsync(
string tenantId,
IReadOnlyList<Guid> entryIds,
string initiatedBy,
CancellationToken cancellationToken);
/// <summary>Replays all pending retryable entries matching criteria.</summary>
Task<BatchReplayResult> ReplayPendingAsync(
string tenantId,
string? errorCode,
ErrorCategory? category,
int maxCount,
string initiatedBy,
CancellationToken cancellationToken);
/// <summary>Resolves an entry without replay.</summary>
Task<DeadLetterEntry> ResolveAsync(
string tenantId,
Guid entryId,
string notes,
string resolvedBy,
CancellationToken cancellationToken);
/// <summary>Resolves multiple entries without replay.</summary>
Task<int> ResolveBatchAsync(
string tenantId,
IReadOnlyList<Guid> entryIds,
string notes,
string resolvedBy,
CancellationToken cancellationToken);
}
/// <summary>
/// Job creator interface for replay operations.
/// </summary>
public interface IJobCreator
{
/// <summary>Creates a new job from a dead-letter entry payload.</summary>
Task<Job> CreateFromReplayAsync(
string tenantId,
string jobType,
string payload,
string payloadDigest,
string idempotencyKey,
string? correlationId,
Guid replayOf,
string createdBy,
CancellationToken cancellationToken);
}
/// <summary>
/// Default replay manager implementation.
/// </summary>
public sealed class ReplayManager : IReplayManager
{
private readonly IDeadLetterRepository _deadLetterRepository;
private readonly IReplayAuditRepository _auditRepository;
private readonly IJobCreator _jobCreator;
private readonly IDeadLetterNotifier _notifier;
private readonly TimeProvider _timeProvider;
private readonly ReplayManagerOptions _options;
private readonly ILogger<ReplayManager> _logger;
public ReplayManager(
IDeadLetterRepository deadLetterRepository,
IReplayAuditRepository auditRepository,
IJobCreator jobCreator,
IDeadLetterNotifier notifier,
TimeProvider timeProvider,
ReplayManagerOptions options,
ILogger<ReplayManager> logger)
{
_deadLetterRepository = deadLetterRepository ?? throw new ArgumentNullException(nameof(deadLetterRepository));
_auditRepository = auditRepository ?? throw new ArgumentNullException(nameof(auditRepository));
_jobCreator = jobCreator ?? throw new ArgumentNullException(nameof(jobCreator));
_notifier = notifier ?? throw new ArgumentNullException(nameof(notifier));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_options = options ?? ReplayManagerOptions.Default;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<ReplayResult> ReplayAsync(
string tenantId,
Guid entryId,
string initiatedBy,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(initiatedBy);
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
.ConfigureAwait(false);
if (entry is null)
{
throw new InvalidOperationException($"Dead-letter entry {entryId} not found.");
}
return await ReplayEntryAsync(entry, "manual", initiatedBy, cancellationToken).ConfigureAwait(false);
}
public async Task<BatchReplayResult> ReplayBatchAsync(
string tenantId,
IReadOnlyList<Guid> entryIds,
string initiatedBy,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentNullException.ThrowIfNull(entryIds);
ArgumentException.ThrowIfNullOrWhiteSpace(initiatedBy);
if (entryIds.Count > _options.MaxBatchSize)
{
throw new ArgumentException($"Batch size {entryIds.Count} exceeds maximum {_options.MaxBatchSize}.");
}
var results = new List<ReplayResult>();
var succeeded = 0;
var failed = 0;
foreach (var entryId in entryIds)
{
try
{
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
.ConfigureAwait(false);
if (entry is null)
{
results.Add(new ReplayResult(
Success: false,
NewJobId: null,
ErrorMessage: $"Entry {entryId} not found.",
UpdatedEntry: null!));
failed++;
continue;
}
var result = await ReplayEntryAsync(entry, "batch", initiatedBy, cancellationToken)
.ConfigureAwait(false);
results.Add(result);
if (result.Success)
succeeded++;
else
failed++;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to replay entry {EntryId}", entryId);
results.Add(new ReplayResult(
Success: false,
NewJobId: null,
ErrorMessage: ex.Message,
UpdatedEntry: null!));
failed++;
}
}
return new BatchReplayResult(
Attempted: entryIds.Count,
Succeeded: succeeded,
Failed: failed,
Results: results);
}
public async Task<BatchReplayResult> ReplayPendingAsync(
string tenantId,
string? errorCode,
ErrorCategory? category,
int maxCount,
string initiatedBy,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(initiatedBy);
var effectiveLimit = Math.Min(maxCount, _options.MaxBatchSize);
IReadOnlyList<DeadLetterEntry> entries;
if (!string.IsNullOrEmpty(errorCode))
{
entries = await _deadLetterRepository.GetByErrorCodeAsync(
tenantId, errorCode, DeadLetterStatus.Pending, effectiveLimit, cancellationToken)
.ConfigureAwait(false);
}
else if (category.HasValue)
{
entries = await _deadLetterRepository.GetByCategoryAsync(
tenantId, category.Value, DeadLetterStatus.Pending, effectiveLimit, cancellationToken)
.ConfigureAwait(false);
}
else
{
entries = await _deadLetterRepository.GetPendingRetryableAsync(tenantId, effectiveLimit, cancellationToken)
.ConfigureAwait(false);
}
var results = new List<ReplayResult>();
var succeeded = 0;
var failed = 0;
foreach (var entry in entries)
{
if (!entry.CanReplay)
{
continue;
}
try
{
var result = await ReplayEntryAsync(entry, "auto", initiatedBy, cancellationToken)
.ConfigureAwait(false);
results.Add(result);
if (result.Success)
succeeded++;
else
failed++;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to replay entry {EntryId}", entry.EntryId);
results.Add(new ReplayResult(
Success: false,
NewJobId: null,
ErrorMessage: ex.Message,
UpdatedEntry: entry));
failed++;
}
}
return new BatchReplayResult(
Attempted: results.Count,
Succeeded: succeeded,
Failed: failed,
Results: results);
}
public async Task<DeadLetterEntry> ResolveAsync(
string tenantId,
Guid entryId,
string notes,
string resolvedBy,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(resolvedBy);
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
.ConfigureAwait(false);
if (entry is null)
{
throw new InvalidOperationException($"Dead-letter entry {entryId} not found.");
}
var now = _timeProvider.GetUtcNow();
var resolved = entry.Resolve(notes, resolvedBy, now);
await _deadLetterRepository.UpdateAsync(resolved, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Resolved dead-letter entry {EntryId} for job {JobId}. Notes: {Notes}",
entryId, entry.OriginalJobId, notes);
return resolved;
}
public async Task<int> ResolveBatchAsync(
string tenantId,
IReadOnlyList<Guid> entryIds,
string notes,
string resolvedBy,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentNullException.ThrowIfNull(entryIds);
ArgumentException.ThrowIfNullOrWhiteSpace(resolvedBy);
var resolved = 0;
var now = _timeProvider.GetUtcNow();
foreach (var entryId in entryIds)
{
try
{
var entry = await _deadLetterRepository.GetByIdAsync(tenantId, entryId, cancellationToken)
.ConfigureAwait(false);
if (entry is null || entry.IsTerminal)
{
continue;
}
var resolvedEntry = entry.Resolve(notes, resolvedBy, now);
await _deadLetterRepository.UpdateAsync(resolvedEntry, cancellationToken).ConfigureAwait(false);
resolved++;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to resolve entry {EntryId}", entryId);
}
}
return resolved;
}
private async Task<ReplayResult> ReplayEntryAsync(
DeadLetterEntry entry,
string triggeredBy,
string initiatedBy,
CancellationToken cancellationToken)
{
if (!entry.CanReplay)
{
return new ReplayResult(
Success: false,
NewJobId: null,
ErrorMessage: $"Entry cannot be replayed: status={entry.Status}, attempts={entry.ReplayAttempts}/{entry.MaxReplayAttempts}, retryable={entry.IsRetryable}",
UpdatedEntry: entry);
}
var now = _timeProvider.GetUtcNow();
// Mark entry as replaying
var replaying = entry.StartReplay(initiatedBy, now);
await _deadLetterRepository.UpdateAsync(replaying, cancellationToken).ConfigureAwait(false);
// Create audit record
var auditRecord = ReplayAuditRecord.Create(
entry.TenantId,
entry.EntryId,
replaying.ReplayAttempts,
triggeredBy,
initiatedBy,
now);
await _auditRepository.CreateAsync(auditRecord, cancellationToken).ConfigureAwait(false);
try
{
// Create new job with updated idempotency key
var newIdempotencyKey = $"{entry.IdempotencyKey}:replay:{replaying.ReplayAttempts}";
var newJob = await _jobCreator.CreateFromReplayAsync(
entry.TenantId,
entry.JobType,
entry.Payload,
entry.PayloadDigest,
newIdempotencyKey,
entry.CorrelationId,
entry.OriginalJobId,
initiatedBy,
cancellationToken).ConfigureAwait(false);
// Mark replay successful
now = _timeProvider.GetUtcNow();
var completed = replaying.CompleteReplay(newJob.JobId, initiatedBy, now);
await _deadLetterRepository.UpdateAsync(completed, cancellationToken).ConfigureAwait(false);
// Update audit record
var completedAudit = auditRecord.Complete(newJob.JobId, now);
await _auditRepository.UpdateAsync(completedAudit, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Replayed dead-letter entry {EntryId} as new job {NewJobId}",
entry.EntryId, newJob.JobId);
// Notify on success
await _notifier.NotifyReplaySuccessAsync(completed, newJob.JobId, cancellationToken)
.ConfigureAwait(false);
return new ReplayResult(
Success: true,
NewJobId: newJob.JobId,
ErrorMessage: null,
UpdatedEntry: completed);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to replay entry {EntryId}", entry.EntryId);
// Mark replay failed
now = _timeProvider.GetUtcNow();
var failed = replaying.FailReplay(ex.Message, initiatedBy, now);
await _deadLetterRepository.UpdateAsync(failed, cancellationToken).ConfigureAwait(false);
// Update audit record
var failedAudit = auditRecord.Fail(ex.Message, now);
await _auditRepository.UpdateAsync(failedAudit, cancellationToken).ConfigureAwait(false);
// Notify on exhausted
if (failed.Status == DeadLetterStatus.Exhausted)
{
await _notifier.NotifyExhaustedAsync(failed, cancellationToken).ConfigureAwait(false);
}
return new ReplayResult(
Success: false,
NewJobId: null,
ErrorMessage: ex.Message,
UpdatedEntry: failed);
}
}
}

View File

@@ -0,0 +1,39 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents an artifact produced by a job execution.
/// Artifacts are immutable outputs with content digests for provenance.
/// </summary>
public sealed record Artifact(
/// <summary>Unique artifact identifier.</summary>
Guid ArtifactId,
/// <summary>Tenant owning this artifact.</summary>
string TenantId,
/// <summary>Job that produced this artifact.</summary>
Guid JobId,
/// <summary>Run containing the producing job (if any).</summary>
Guid? RunId,
/// <summary>Artifact type (e.g., "sbom", "scan-result", "attestation", "log").</summary>
string ArtifactType,
/// <summary>Storage URI (e.g., "s3://bucket/path", "file:///local/path").</summary>
string Uri,
/// <summary>Content digest (SHA-256) for integrity verification.</summary>
string Digest,
/// <summary>MIME type (e.g., "application/json", "application/vnd.cyclonedx+json").</summary>
string? MimeType,
/// <summary>Artifact size in bytes.</summary>
long? SizeBytes,
/// <summary>When the artifact was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>Optional metadata JSON blob.</summary>
string? Metadata);

View File

@@ -0,0 +1,250 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents an immutable audit log entry for orchestrator operations.
/// Captures who did what, when, and with what effect.
/// </summary>
public sealed record AuditEntry(
/// <summary>Unique audit entry identifier.</summary>
Guid EntryId,
/// <summary>Tenant owning this entry.</summary>
string TenantId,
/// <summary>Type of audited event.</summary>
AuditEventType EventType,
/// <summary>Resource type being audited (job, run, source, quota, etc.).</summary>
string ResourceType,
/// <summary>Resource identifier being audited.</summary>
Guid ResourceId,
/// <summary>Actor who performed the action.</summary>
string ActorId,
/// <summary>Actor type (user, system, worker, api-key).</summary>
ActorType ActorType,
/// <summary>IP address of the actor (if applicable).</summary>
string? ActorIp,
/// <summary>User agent string (if applicable).</summary>
string? UserAgent,
/// <summary>HTTP method used (if applicable).</summary>
string? HttpMethod,
/// <summary>Request path (if applicable).</summary>
string? RequestPath,
/// <summary>State before the change (JSON).</summary>
string? OldState,
/// <summary>State after the change (JSON).</summary>
string? NewState,
/// <summary>Human-readable description of the change.</summary>
string Description,
/// <summary>Correlation ID for distributed tracing.</summary>
string? CorrelationId,
/// <summary>SHA-256 hash of the previous entry for chain integrity.</summary>
string? PreviousEntryHash,
/// <summary>SHA-256 hash of this entry's content for integrity.</summary>
string ContentHash,
/// <summary>Sequence number within the tenant's audit stream.</summary>
long SequenceNumber,
/// <summary>When the event occurred.</summary>
DateTimeOffset OccurredAt,
/// <summary>Optional metadata JSON blob.</summary>
string? Metadata)
{
/// <summary>
/// Creates a new audit entry with computed hash.
/// </summary>
public static AuditEntry Create(
string tenantId,
AuditEventType eventType,
string resourceType,
Guid resourceId,
string actorId,
ActorType actorType,
string description,
string? oldState = null,
string? newState = null,
string? actorIp = null,
string? userAgent = null,
string? httpMethod = null,
string? requestPath = null,
string? correlationId = null,
string? previousEntryHash = null,
long sequenceNumber = 0,
string? metadata = null)
{
var entryId = Guid.NewGuid();
var occurredAt = DateTimeOffset.UtcNow;
// Compute content hash from entry data
var contentToHash = $"{entryId}|{tenantId}|{eventType}|{resourceType}|{resourceId}|{actorId}|{actorType}|{description}|{oldState}|{newState}|{occurredAt:O}|{sequenceNumber}";
var contentHash = ComputeSha256(contentToHash);
return new AuditEntry(
EntryId: entryId,
TenantId: tenantId,
EventType: eventType,
ResourceType: resourceType,
ResourceId: resourceId,
ActorId: actorId,
ActorType: actorType,
ActorIp: actorIp,
UserAgent: userAgent,
HttpMethod: httpMethod,
RequestPath: requestPath,
OldState: oldState,
NewState: newState,
Description: description,
CorrelationId: correlationId,
PreviousEntryHash: previousEntryHash,
ContentHash: contentHash,
SequenceNumber: sequenceNumber,
OccurredAt: occurredAt,
Metadata: metadata);
}
/// <summary>
/// Verifies the integrity of this entry's content hash.
/// </summary>
public bool VerifyIntegrity()
{
var contentToHash = $"{EntryId}|{TenantId}|{EventType}|{ResourceType}|{ResourceId}|{ActorId}|{ActorType}|{Description}|{OldState}|{NewState}|{OccurredAt:O}|{SequenceNumber}";
var computed = ComputeSha256(contentToHash);
return string.Equals(ContentHash, computed, StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Verifies the chain link to the previous entry.
/// </summary>
public bool VerifyChainLink(AuditEntry? previousEntry)
{
if (previousEntry is null)
{
return PreviousEntryHash is null || SequenceNumber == 1;
}
return string.Equals(PreviousEntryHash, previousEntry.ContentHash, StringComparison.OrdinalIgnoreCase);
}
private static string ComputeSha256(string content)
{
var bytes = System.Text.Encoding.UTF8.GetBytes(content);
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Types of auditable events in the orchestrator.
/// </summary>
public enum AuditEventType
{
// Job lifecycle events
JobCreated = 100,
JobScheduled = 101,
JobLeased = 102,
JobCompleted = 103,
JobFailed = 104,
JobCanceled = 105,
JobRetried = 106,
// Run lifecycle events
RunCreated = 200,
RunStarted = 201,
RunCompleted = 202,
RunFailed = 203,
RunCanceled = 204,
// Source management events
SourceCreated = 300,
SourceUpdated = 301,
SourcePaused = 302,
SourceResumed = 303,
SourceDeleted = 304,
// Quota management events
QuotaCreated = 400,
QuotaUpdated = 401,
QuotaPaused = 402,
QuotaResumed = 403,
QuotaDeleted = 404,
// SLO management events
SloCreated = 500,
SloUpdated = 501,
SloEnabled = 502,
SloDisabled = 503,
SloDeleted = 504,
SloAlertTriggered = 505,
SloAlertAcknowledged = 506,
SloAlertResolved = 507,
// Dead-letter events
DeadLetterCreated = 600,
DeadLetterReplayed = 601,
DeadLetterResolved = 602,
DeadLetterExpired = 603,
// Backfill events
BackfillCreated = 700,
BackfillStarted = 701,
BackfillCompleted = 702,
BackfillFailed = 703,
BackfillCanceled = 704,
// Ledger events
LedgerExportRequested = 800,
LedgerExportCompleted = 801,
LedgerExportFailed = 802,
// Worker events
WorkerClaimed = 900,
WorkerHeartbeat = 901,
WorkerProgressReported = 902,
WorkerCompleted = 903,
// Security events
AuthenticationSuccess = 1000,
AuthenticationFailure = 1001,
AuthorizationDenied = 1002,
ApiKeyCreated = 1003,
ApiKeyRevoked = 1004
}
/// <summary>
/// Types of actors that can perform auditable actions.
/// </summary>
public enum ActorType
{
/// <summary>Human user via UI or API.</summary>
User = 0,
/// <summary>System-initiated action (scheduler, background job).</summary>
System = 1,
/// <summary>Worker process.</summary>
Worker = 2,
/// <summary>API key authentication.</summary>
ApiKey = 3,
/// <summary>Service-to-service call.</summary>
Service = 4,
/// <summary>Unknown or unidentified actor.</summary>
Unknown = 99
}

View File

@@ -0,0 +1,429 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a request to backfill/reprocess events within a time window.
/// </summary>
public sealed record BackfillRequest(
/// <summary>Unique backfill request identifier.</summary>
Guid BackfillId,
/// <summary>Tenant this backfill applies to.</summary>
string TenantId,
/// <summary>Source to backfill (null if job-type scoped).</summary>
Guid? SourceId,
/// <summary>Job type to backfill (null if source-scoped).</summary>
string? JobType,
/// <summary>Normalized scope key.</summary>
string ScopeKey,
/// <summary>Current status of the backfill.</summary>
BackfillStatus Status,
/// <summary>Start of the time window to backfill (inclusive).</summary>
DateTimeOffset WindowStart,
/// <summary>End of the time window to backfill (exclusive).</summary>
DateTimeOffset WindowEnd,
/// <summary>Current processing position within the window.</summary>
DateTimeOffset? CurrentPosition,
/// <summary>Total events estimated in the window.</summary>
long? TotalEvents,
/// <summary>Events successfully processed.</summary>
long ProcessedEvents,
/// <summary>Events skipped due to duplicate suppression.</summary>
long SkippedEvents,
/// <summary>Events that failed processing.</summary>
long FailedEvents,
/// <summary>Number of events to process per batch.</summary>
int BatchSize,
/// <summary>Whether this is a dry-run (preview only, no changes).</summary>
bool DryRun,
/// <summary>Whether to force reprocessing (ignore duplicate suppression).</summary>
bool ForceReprocess,
/// <summary>Estimated duration for the backfill.</summary>
TimeSpan? EstimatedDuration,
/// <summary>Maximum allowed duration (safety limit).</summary>
TimeSpan? MaxDuration,
/// <summary>Results of safety validation checks.</summary>
BackfillSafetyChecks? SafetyChecks,
/// <summary>Reason for the backfill request.</summary>
string Reason,
/// <summary>Optional ticket reference for audit.</summary>
string? Ticket,
/// <summary>When the request was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When processing started.</summary>
DateTimeOffset? StartedAt,
/// <summary>When processing completed.</summary>
DateTimeOffset? CompletedAt,
/// <summary>Actor who created the request.</summary>
string CreatedBy,
/// <summary>Actor who last modified the request.</summary>
string UpdatedBy,
/// <summary>Error message if failed.</summary>
string? ErrorMessage)
{
/// <summary>
/// Window duration.
/// </summary>
public TimeSpan WindowDuration => WindowEnd - WindowStart;
/// <summary>
/// Progress percentage (0-100).
/// </summary>
public double ProgressPercent => TotalEvents > 0
? Math.Round((double)(ProcessedEvents + SkippedEvents + FailedEvents) / TotalEvents.Value * 100, 2)
: 0;
/// <summary>
/// Whether the backfill is in a terminal state.
/// </summary>
public bool IsTerminal => Status is BackfillStatus.Completed or BackfillStatus.Failed or BackfillStatus.Canceled;
/// <summary>
/// Creates a new backfill request.
/// </summary>
public static BackfillRequest Create(
string tenantId,
Guid? sourceId,
string? jobType,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
string reason,
string createdBy,
int batchSize = 100,
bool dryRun = false,
bool forceReprocess = false,
string? ticket = null,
TimeSpan? maxDuration = null)
{
if (windowEnd <= windowStart)
throw new ArgumentException("Window end must be after window start.", nameof(windowEnd));
if (batchSize <= 0 || batchSize > 10000)
throw new ArgumentOutOfRangeException(nameof(batchSize), "Batch size must be between 1 and 10000.");
var scopeKey = (sourceId, jobType) switch
{
(Guid s, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(s, j),
(Guid s, _) => Watermark.CreateScopeKey(s),
(_, string j) when !string.IsNullOrEmpty(j) => Watermark.CreateScopeKey(j),
_ => throw new ArgumentException("Either sourceId or jobType must be specified.")
};
var now = DateTimeOffset.UtcNow;
return new BackfillRequest(
BackfillId: Guid.NewGuid(),
TenantId: tenantId,
SourceId: sourceId,
JobType: jobType,
ScopeKey: scopeKey,
Status: BackfillStatus.Pending,
WindowStart: windowStart,
WindowEnd: windowEnd,
CurrentPosition: null,
TotalEvents: null,
ProcessedEvents: 0,
SkippedEvents: 0,
FailedEvents: 0,
BatchSize: batchSize,
DryRun: dryRun,
ForceReprocess: forceReprocess,
EstimatedDuration: null,
MaxDuration: maxDuration,
SafetyChecks: null,
Reason: reason,
Ticket: ticket,
CreatedAt: now,
StartedAt: null,
CompletedAt: null,
CreatedBy: createdBy,
UpdatedBy: createdBy,
ErrorMessage: null);
}
/// <summary>
/// Transitions to validating status.
/// </summary>
public BackfillRequest StartValidation(string updatedBy)
{
if (Status != BackfillStatus.Pending)
throw new InvalidOperationException($"Cannot start validation from status {Status}.");
return this with
{
Status = BackfillStatus.Validating,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Records safety check results.
/// </summary>
public BackfillRequest WithSafetyChecks(BackfillSafetyChecks checks, long? totalEvents, TimeSpan? estimatedDuration, string updatedBy)
{
return this with
{
SafetyChecks = checks,
TotalEvents = totalEvents,
EstimatedDuration = estimatedDuration,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Transitions to running status.
/// </summary>
public BackfillRequest Start(string updatedBy)
{
if (Status != BackfillStatus.Validating)
throw new InvalidOperationException($"Cannot start from status {Status}.");
if (SafetyChecks?.HasBlockingIssues == true)
throw new InvalidOperationException("Cannot start backfill with blocking safety issues.");
return this with
{
Status = BackfillStatus.Running,
StartedAt = DateTimeOffset.UtcNow,
CurrentPosition = WindowStart,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Updates progress after processing a batch.
/// </summary>
public BackfillRequest UpdateProgress(
DateTimeOffset newPosition,
long processed,
long skipped,
long failed,
string updatedBy)
{
if (Status != BackfillStatus.Running)
throw new InvalidOperationException($"Cannot update progress in status {Status}.");
return this with
{
CurrentPosition = newPosition,
ProcessedEvents = ProcessedEvents + processed,
SkippedEvents = SkippedEvents + skipped,
FailedEvents = FailedEvents + failed,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Pauses the backfill.
/// </summary>
public BackfillRequest Pause(string updatedBy)
{
if (Status != BackfillStatus.Running)
throw new InvalidOperationException($"Cannot pause from status {Status}.");
return this with
{
Status = BackfillStatus.Paused,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Resumes a paused backfill.
/// </summary>
public BackfillRequest Resume(string updatedBy)
{
if (Status != BackfillStatus.Paused)
throw new InvalidOperationException($"Cannot resume from status {Status}.");
return this with
{
Status = BackfillStatus.Running,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Completes the backfill successfully.
/// </summary>
public BackfillRequest Complete(string updatedBy)
{
if (Status != BackfillStatus.Running)
throw new InvalidOperationException($"Cannot complete from status {Status}.");
return this with
{
Status = BackfillStatus.Completed,
CompletedAt = DateTimeOffset.UtcNow,
CurrentPosition = WindowEnd,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Fails the backfill with an error.
/// </summary>
public BackfillRequest Fail(string error, string updatedBy)
{
return this with
{
Status = BackfillStatus.Failed,
CompletedAt = DateTimeOffset.UtcNow,
ErrorMessage = error,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Cancels the backfill.
/// </summary>
public BackfillRequest Cancel(string updatedBy)
{
if (IsTerminal)
throw new InvalidOperationException($"Cannot cancel from terminal status {Status}.");
return this with
{
Status = BackfillStatus.Canceled,
CompletedAt = DateTimeOffset.UtcNow,
UpdatedBy = updatedBy
};
}
}
/// <summary>
/// Status of a backfill request.
/// </summary>
public enum BackfillStatus
{
/// <summary>Request created, awaiting validation.</summary>
Pending,
/// <summary>Running safety validations.</summary>
Validating,
/// <summary>Actively processing events.</summary>
Running,
/// <summary>Temporarily paused.</summary>
Paused,
/// <summary>Successfully completed.</summary>
Completed,
/// <summary>Failed with error.</summary>
Failed,
/// <summary>Canceled by operator.</summary>
Canceled
}
/// <summary>
/// Results of backfill safety validation checks.
/// </summary>
public sealed record BackfillSafetyChecks(
/// <summary>Whether the source exists and is accessible.</summary>
bool SourceExists,
/// <summary>Whether there are overlapping active backfills.</summary>
bool HasOverlappingBackfill,
/// <summary>Whether the window is within retention period.</summary>
bool WithinRetention,
/// <summary>Whether the estimated event count is within limits.</summary>
bool WithinEventLimit,
/// <summary>Whether estimated duration is within max duration.</summary>
bool WithinDurationLimit,
/// <summary>Whether required quotas are available.</summary>
bool QuotaAvailable,
/// <summary>Warning messages (non-blocking).</summary>
IReadOnlyList<string> Warnings,
/// <summary>Error messages (blocking).</summary>
IReadOnlyList<string> Errors)
{
/// <summary>
/// Whether there are any blocking issues.
/// </summary>
public bool HasBlockingIssues => !SourceExists || HasOverlappingBackfill || !WithinRetention
|| !WithinEventLimit || !WithinDurationLimit || Errors.Count > 0;
/// <summary>
/// Whether the backfill is safe to proceed.
/// </summary>
public bool IsSafe => !HasBlockingIssues;
/// <summary>
/// Creates successful safety checks with no issues.
/// </summary>
public static BackfillSafetyChecks AllPassed() => new(
SourceExists: true,
HasOverlappingBackfill: false,
WithinRetention: true,
WithinEventLimit: true,
WithinDurationLimit: true,
QuotaAvailable: true,
Warnings: [],
Errors: []);
}
/// <summary>
/// Preview result for dry-run backfill.
/// </summary>
public sealed record BackfillPreview(
/// <summary>Scope being backfilled.</summary>
string ScopeKey,
/// <summary>Time window for backfill.</summary>
DateTimeOffset WindowStart,
/// <summary>Time window for backfill.</summary>
DateTimeOffset WindowEnd,
/// <summary>Estimated total events in window.</summary>
long EstimatedEvents,
/// <summary>Events that would be skipped (already processed).</summary>
long SkippedEvents,
/// <summary>Events that would be processed.</summary>
long ProcessableEvents,
/// <summary>Estimated duration.</summary>
TimeSpan EstimatedDuration,
/// <summary>Number of batches required.</summary>
int EstimatedBatches,
/// <summary>Safety validation results.</summary>
BackfillSafetyChecks SafetyChecks,
/// <summary>Sample of event keys that would be processed.</summary>
IReadOnlyList<string> SampleEventKeys);

View File

@@ -0,0 +1,42 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a dependency edge in a job DAG (Directed Acyclic Graph).
/// The child job cannot start until the parent job succeeds.
/// </summary>
public sealed record DagEdge(
/// <summary>Unique edge identifier.</summary>
Guid EdgeId,
/// <summary>Tenant owning this edge.</summary>
string TenantId,
/// <summary>Run containing these jobs.</summary>
Guid RunId,
/// <summary>Parent job ID (must complete first).</summary>
Guid ParentJobId,
/// <summary>Child job ID (depends on parent).</summary>
Guid ChildJobId,
/// <summary>Edge type (e.g., "success", "always", "failure").</summary>
string EdgeType,
/// <summary>When this edge was created.</summary>
DateTimeOffset CreatedAt);
/// <summary>
/// Edge types defining dependency semantics.
/// </summary>
public static class DagEdgeTypes
{
/// <summary>Child runs only if parent succeeds.</summary>
public const string Success = "success";
/// <summary>Child runs regardless of parent outcome.</summary>
public const string Always = "always";
/// <summary>Child runs only if parent fails.</summary>
public const string Failure = "failure";
}

View File

@@ -0,0 +1,292 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a job that has been moved to the dead-letter store after exhausting retries
/// or encountering a non-retryable error.
/// </summary>
public sealed record DeadLetterEntry(
/// <summary>Unique dead-letter entry identifier.</summary>
Guid EntryId,
/// <summary>Tenant owning this entry.</summary>
string TenantId,
/// <summary>Original job that failed.</summary>
Guid OriginalJobId,
/// <summary>Run the job belonged to (if any).</summary>
Guid? RunId,
/// <summary>Source the job was processing (if any).</summary>
Guid? SourceId,
/// <summary>Job type (e.g., "scan.image", "advisory.nvd").</summary>
string JobType,
/// <summary>Job payload JSON (inputs, parameters).</summary>
string Payload,
/// <summary>SHA-256 digest of the payload.</summary>
string PayloadDigest,
/// <summary>Idempotency key from original job.</summary>
string IdempotencyKey,
/// <summary>Correlation ID for distributed tracing.</summary>
string? CorrelationId,
/// <summary>Current entry status.</summary>
DeadLetterStatus Status,
/// <summary>Classified error code.</summary>
string ErrorCode,
/// <summary>Human-readable failure reason.</summary>
string FailureReason,
/// <summary>Suggested remediation hint for operators.</summary>
string? RemediationHint,
/// <summary>Error classification category.</summary>
ErrorCategory Category,
/// <summary>Whether this error is potentially retryable.</summary>
bool IsRetryable,
/// <summary>Number of attempts made by original job.</summary>
int OriginalAttempts,
/// <summary>Number of replay attempts from dead-letter.</summary>
int ReplayAttempts,
/// <summary>Maximum replay attempts allowed.</summary>
int MaxReplayAttempts,
/// <summary>When the job originally failed.</summary>
DateTimeOffset FailedAt,
/// <summary>When the entry was created in dead-letter store.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the entry was last updated.</summary>
DateTimeOffset UpdatedAt,
/// <summary>When the entry expires and can be purged.</summary>
DateTimeOffset ExpiresAt,
/// <summary>When the entry was resolved (if applicable).</summary>
DateTimeOffset? ResolvedAt,
/// <summary>Resolution notes (if resolved).</summary>
string? ResolutionNotes,
/// <summary>Actor who created/submitted the original job.</summary>
string CreatedBy,
/// <summary>Actor who last updated the entry.</summary>
string UpdatedBy)
{
/// <summary>Default retention period for dead-letter entries.</summary>
public static readonly TimeSpan DefaultRetention = TimeSpan.FromDays(30);
/// <summary>Default maximum replay attempts.</summary>
public const int DefaultMaxReplayAttempts = 3;
/// <summary>Whether this entry is in a terminal state.</summary>
public bool IsTerminal => Status is DeadLetterStatus.Replayed
or DeadLetterStatus.Resolved
or DeadLetterStatus.Exhausted
or DeadLetterStatus.Expired;
/// <summary>Whether more replay attempts are allowed.</summary>
public bool CanReplay => !IsTerminal && IsRetryable && ReplayAttempts < MaxReplayAttempts;
/// <summary>Creates a new dead-letter entry from a failed job.</summary>
public static DeadLetterEntry FromFailedJob(
Job job,
string errorCode,
string failureReason,
string? remediationHint,
ErrorCategory category,
bool isRetryable,
DateTimeOffset now,
TimeSpan? retention = null,
int? maxReplayAttempts = null)
{
ArgumentNullException.ThrowIfNull(job);
ArgumentException.ThrowIfNullOrWhiteSpace(errorCode);
ArgumentException.ThrowIfNullOrWhiteSpace(failureReason);
var effectiveRetention = retention ?? DefaultRetention;
var effectiveMaxReplays = maxReplayAttempts ?? DefaultMaxReplayAttempts;
return new DeadLetterEntry(
EntryId: Guid.NewGuid(),
TenantId: job.TenantId,
OriginalJobId: job.JobId,
RunId: job.RunId,
SourceId: null, // Would be extracted from payload if available
JobType: job.JobType,
Payload: job.Payload,
PayloadDigest: job.PayloadDigest,
IdempotencyKey: job.IdempotencyKey,
CorrelationId: job.CorrelationId,
Status: DeadLetterStatus.Pending,
ErrorCode: errorCode,
FailureReason: failureReason,
RemediationHint: remediationHint,
Category: category,
IsRetryable: isRetryable,
OriginalAttempts: job.Attempt,
ReplayAttempts: 0,
MaxReplayAttempts: effectiveMaxReplays,
FailedAt: job.CompletedAt ?? now,
CreatedAt: now,
UpdatedAt: now,
ExpiresAt: now.Add(effectiveRetention),
ResolvedAt: null,
ResolutionNotes: null,
CreatedBy: job.CreatedBy,
UpdatedBy: "system");
}
/// <summary>Marks entry as being replayed.</summary>
public DeadLetterEntry StartReplay(string updatedBy, DateTimeOffset now)
{
if (!CanReplay)
throw new InvalidOperationException($"Cannot replay entry in status {Status} with {ReplayAttempts}/{MaxReplayAttempts} attempts.");
return this with
{
Status = DeadLetterStatus.Replaying,
ReplayAttempts = ReplayAttempts + 1,
UpdatedAt = now,
UpdatedBy = updatedBy
};
}
/// <summary>Marks entry as successfully replayed.</summary>
public DeadLetterEntry CompleteReplay(Guid newJobId, string updatedBy, DateTimeOffset now)
{
if (Status != DeadLetterStatus.Replaying)
throw new InvalidOperationException($"Cannot complete replay from status {Status}.");
return this with
{
Status = DeadLetterStatus.Replayed,
ResolvedAt = now,
ResolutionNotes = $"Replayed as job {newJobId}",
UpdatedAt = now,
UpdatedBy = updatedBy
};
}
/// <summary>Marks replay as failed.</summary>
public DeadLetterEntry FailReplay(string reason, string updatedBy, DateTimeOffset now)
{
if (Status != DeadLetterStatus.Replaying)
throw new InvalidOperationException($"Cannot fail replay from status {Status}.");
var newStatus = ReplayAttempts >= MaxReplayAttempts
? DeadLetterStatus.Exhausted
: DeadLetterStatus.Pending;
return this with
{
Status = newStatus,
FailureReason = reason,
UpdatedAt = now,
UpdatedBy = updatedBy
};
}
/// <summary>Manually resolves the entry without replay.</summary>
public DeadLetterEntry Resolve(string notes, string updatedBy, DateTimeOffset now)
{
if (IsTerminal)
throw new InvalidOperationException($"Cannot resolve entry in terminal status {Status}.");
return this with
{
Status = DeadLetterStatus.Resolved,
ResolvedAt = now,
ResolutionNotes = notes,
UpdatedAt = now,
UpdatedBy = updatedBy
};
}
/// <summary>Marks entry as expired for cleanup.</summary>
public DeadLetterEntry MarkExpired(DateTimeOffset now)
{
if (IsTerminal)
throw new InvalidOperationException($"Cannot expire entry in terminal status {Status}.");
return this with
{
Status = DeadLetterStatus.Expired,
UpdatedAt = now,
UpdatedBy = "system"
};
}
}
/// <summary>
/// Dead-letter entry lifecycle states.
/// </summary>
public enum DeadLetterStatus
{
/// <summary>Entry awaiting operator action or replay.</summary>
Pending = 0,
/// <summary>Entry currently being replayed.</summary>
Replaying = 1,
/// <summary>Entry successfully replayed as a new job.</summary>
Replayed = 2,
/// <summary>Entry manually resolved without replay.</summary>
Resolved = 3,
/// <summary>Entry exhausted all replay attempts.</summary>
Exhausted = 4,
/// <summary>Entry expired and eligible for purge.</summary>
Expired = 5
}
/// <summary>
/// Error classification categories for dead-letter entries.
/// </summary>
public enum ErrorCategory
{
/// <summary>Unknown or unclassified error.</summary>
Unknown = 0,
/// <summary>Transient infrastructure error (network, timeout).</summary>
Transient = 1,
/// <summary>Resource not found (image, source, etc.).</summary>
NotFound = 2,
/// <summary>Authentication or authorization failure.</summary>
AuthFailure = 3,
/// <summary>Rate limiting or quota exceeded.</summary>
RateLimited = 4,
/// <summary>Invalid input or configuration.</summary>
ValidationError = 5,
/// <summary>Upstream service error (registry, advisory feed).</summary>
UpstreamError = 6,
/// <summary>Internal processing error (bug, corruption).</summary>
InternalError = 7,
/// <summary>Resource conflict (duplicate, version mismatch).</summary>
Conflict = 8,
/// <summary>Operation canceled by user or system.</summary>
Canceled = 9
}

View File

@@ -0,0 +1,69 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents an operational incident triggered by threshold breaches.
/// Incidents are generated when failure rates exceed configured limits.
/// </summary>
public sealed record Incident(
/// <summary>Unique incident identifier.</summary>
Guid IncidentId,
/// <summary>Tenant affected by this incident.</summary>
string TenantId,
/// <summary>Incident type (e.g., "failure_rate", "quota_exhausted", "circuit_open").</summary>
string IncidentType,
/// <summary>Incident severity (e.g., "warning", "critical").</summary>
string Severity,
/// <summary>Affected job type (if applicable).</summary>
string? JobType,
/// <summary>Affected source (if applicable).</summary>
Guid? SourceId,
/// <summary>Human-readable incident title.</summary>
string Title,
/// <summary>Detailed incident description.</summary>
string Description,
/// <summary>Current incident status.</summary>
IncidentStatus Status,
/// <summary>When the incident was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the incident was acknowledged.</summary>
DateTimeOffset? AcknowledgedAt,
/// <summary>Actor who acknowledged the incident.</summary>
string? AcknowledgedBy,
/// <summary>When the incident was resolved.</summary>
DateTimeOffset? ResolvedAt,
/// <summary>Actor who resolved the incident.</summary>
string? ResolvedBy,
/// <summary>Resolution notes.</summary>
string? ResolutionNotes,
/// <summary>Optional metadata JSON blob.</summary>
string? Metadata);
/// <summary>
/// Incident lifecycle states.
/// </summary>
public enum IncidentStatus
{
/// <summary>Incident is open and unacknowledged.</summary>
Open = 0,
/// <summary>Incident acknowledged by operator.</summary>
Acknowledged = 1,
/// <summary>Incident resolved.</summary>
Resolved = 2
}

View File

@@ -0,0 +1,81 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a unit of work to be executed by a worker.
/// Jobs are scheduled, leased to workers, and tracked through completion.
/// </summary>
public sealed record Job(
/// <summary>Unique job identifier.</summary>
Guid JobId,
/// <summary>Tenant owning this job.</summary>
string TenantId,
/// <summary>Optional project scope within tenant.</summary>
string? ProjectId,
/// <summary>Run this job belongs to (if any).</summary>
Guid? RunId,
/// <summary>Job type (e.g., "scan.image", "advisory.nvd", "export.sbom").</summary>
string JobType,
/// <summary>Current job status.</summary>
JobStatus Status,
/// <summary>Priority (higher = more urgent). Default 0.</summary>
int Priority,
/// <summary>Current attempt number (1-based).</summary>
int Attempt,
/// <summary>Maximum retry attempts.</summary>
int MaxAttempts,
/// <summary>SHA-256 digest of the payload for determinism verification.</summary>
string PayloadDigest,
/// <summary>Job payload JSON (inputs, parameters).</summary>
string Payload,
/// <summary>Idempotency key for deduplication.</summary>
string IdempotencyKey,
/// <summary>Correlation ID for distributed tracing.</summary>
string? CorrelationId,
/// <summary>Current lease ID (if leased).</summary>
Guid? LeaseId,
/// <summary>Worker holding the lease (if leased).</summary>
string? WorkerId,
/// <summary>Task runner ID executing the job (if applicable).</summary>
string? TaskRunnerId,
/// <summary>Lease expiration time.</summary>
DateTimeOffset? LeaseUntil,
/// <summary>When the job was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the job was scheduled (quota cleared).</summary>
DateTimeOffset? ScheduledAt,
/// <summary>When the job was leased to a worker.</summary>
DateTimeOffset? LeasedAt,
/// <summary>When the job completed (terminal state).</summary>
DateTimeOffset? CompletedAt,
/// <summary>Earliest time the job can be scheduled (for backoff).</summary>
DateTimeOffset? NotBefore,
/// <summary>Terminal status reason (failure message, cancel reason, etc.).</summary>
string? Reason,
/// <summary>ID of the original job if this is a replay.</summary>
Guid? ReplayOf,
/// <summary>Actor who created/submitted the job.</summary>
string CreatedBy);

View File

@@ -0,0 +1,48 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents an immutable history entry for job state changes.
/// Provides audit trail for all job lifecycle transitions.
/// </summary>
public sealed record JobHistory(
/// <summary>Unique history entry identifier.</summary>
Guid HistoryId,
/// <summary>Tenant owning this entry.</summary>
string TenantId,
/// <summary>Job this history entry belongs to.</summary>
Guid JobId,
/// <summary>Sequence number within the job's history (1-based).</summary>
int SequenceNo,
/// <summary>Previous job status.</summary>
JobStatus? FromStatus,
/// <summary>New job status.</summary>
JobStatus ToStatus,
/// <summary>Attempt number at time of transition.</summary>
int Attempt,
/// <summary>Lease ID (if applicable).</summary>
Guid? LeaseId,
/// <summary>Worker ID (if applicable).</summary>
string? WorkerId,
/// <summary>Reason for the transition.</summary>
string? Reason,
/// <summary>When this transition occurred.</summary>
DateTimeOffset OccurredAt,
/// <summary>When this entry was recorded.</summary>
DateTimeOffset RecordedAt,
/// <summary>Actor who caused this transition.</summary>
string ActorId,
/// <summary>Actor type (system, operator, worker).</summary>
string ActorType);

View File

@@ -0,0 +1,30 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Job lifecycle states. Transitions follow the state machine:
/// Pending → Scheduled → Leased → (Succeeded | Failed | Canceled | TimedOut)
/// Failed jobs may transition to Pending via replay.
/// </summary>
public enum JobStatus
{
/// <summary>Job enqueued but not yet scheduled (e.g., quota exceeded).</summary>
Pending = 0,
/// <summary>Job scheduled and awaiting worker lease.</summary>
Scheduled = 1,
/// <summary>Job leased to a worker for execution.</summary>
Leased = 2,
/// <summary>Job completed successfully.</summary>
Succeeded = 3,
/// <summary>Job failed after exhausting retries.</summary>
Failed = 4,
/// <summary>Job canceled by operator or system.</summary>
Canceled = 5,
/// <summary>Job timed out (lease expired without completion).</summary>
TimedOut = 6
}

View File

@@ -0,0 +1,60 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents rate-limit and concurrency quotas for job scheduling.
/// Quotas are scoped to tenant and optionally job type.
/// </summary>
public sealed record Quota(
/// <summary>Unique quota identifier.</summary>
Guid QuotaId,
/// <summary>Tenant this quota applies to.</summary>
string TenantId,
/// <summary>Job type this quota applies to (null = all job types).</summary>
string? JobType,
/// <summary>Maximum concurrent active (leased) jobs.</summary>
int MaxActive,
/// <summary>Maximum jobs per hour (sliding window).</summary>
int MaxPerHour,
/// <summary>Burst capacity for token bucket.</summary>
int BurstCapacity,
/// <summary>Token refill rate (tokens per second).</summary>
double RefillRate,
/// <summary>Current available tokens.</summary>
double CurrentTokens,
/// <summary>Last time tokens were refilled.</summary>
DateTimeOffset LastRefillAt,
/// <summary>Current count of active (leased) jobs.</summary>
int CurrentActive,
/// <summary>Jobs scheduled in current hour window.</summary>
int CurrentHourCount,
/// <summary>Start of current hour window.</summary>
DateTimeOffset CurrentHourStart,
/// <summary>Whether this quota is currently paused (operator override).</summary>
bool Paused,
/// <summary>Operator-provided reason for pause.</summary>
string? PauseReason,
/// <summary>Ticket reference for quota change audit.</summary>
string? QuotaTicket,
/// <summary>When the quota was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the quota was last updated.</summary>
DateTimeOffset UpdatedAt,
/// <summary>Actor who last modified the quota.</summary>
string UpdatedBy);

View File

@@ -0,0 +1,78 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a run (batch/workflow execution) containing multiple jobs.
/// Runs group related jobs (e.g., scanning an image produces multiple analyzer jobs).
/// </summary>
public sealed record Run(
/// <summary>Unique run identifier.</summary>
Guid RunId,
/// <summary>Tenant owning this run.</summary>
string TenantId,
/// <summary>Optional project scope within tenant.</summary>
string? ProjectId,
/// <summary>Source that initiated this run.</summary>
Guid SourceId,
/// <summary>Run type (e.g., "scan", "advisory-sync", "export").</summary>
string RunType,
/// <summary>Current aggregate status of the run.</summary>
RunStatus Status,
/// <summary>Correlation ID for distributed tracing.</summary>
string? CorrelationId,
/// <summary>Total number of jobs in this run.</summary>
int TotalJobs,
/// <summary>Number of completed jobs (succeeded + failed + canceled).</summary>
int CompletedJobs,
/// <summary>Number of succeeded jobs.</summary>
int SucceededJobs,
/// <summary>Number of failed jobs.</summary>
int FailedJobs,
/// <summary>When the run was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the run started executing (first job leased).</summary>
DateTimeOffset? StartedAt,
/// <summary>When the run completed (all jobs terminal).</summary>
DateTimeOffset? CompletedAt,
/// <summary>Actor who initiated the run.</summary>
string CreatedBy,
/// <summary>Optional metadata JSON blob.</summary>
string? Metadata);
/// <summary>
/// Run lifecycle states.
/// </summary>
public enum RunStatus
{
/// <summary>Run created, jobs being enqueued.</summary>
Pending = 0,
/// <summary>Run is executing (at least one job leased).</summary>
Running = 1,
/// <summary>All jobs completed successfully.</summary>
Succeeded = 2,
/// <summary>Run completed with some failures.</summary>
PartiallySucceeded = 3,
/// <summary>All jobs failed.</summary>
Failed = 4,
/// <summary>Run canceled by operator.</summary>
Canceled = 5
}

View File

@@ -0,0 +1,341 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Immutable ledger entry for run execution records.
/// Provides a tamper-evident history of run outcomes with provenance to artifacts.
/// </summary>
public sealed record RunLedgerEntry(
/// <summary>Unique ledger entry identifier.</summary>
Guid LedgerId,
/// <summary>Tenant owning this entry.</summary>
string TenantId,
/// <summary>Run this entry records.</summary>
Guid RunId,
/// <summary>Source that initiated the run.</summary>
Guid SourceId,
/// <summary>Run type (scan, advisory-sync, export).</summary>
string RunType,
/// <summary>Final run status.</summary>
RunStatus FinalStatus,
/// <summary>Total jobs in the run.</summary>
int TotalJobs,
/// <summary>Successfully completed jobs.</summary>
int SucceededJobs,
/// <summary>Failed jobs.</summary>
int FailedJobs,
/// <summary>When the run was created.</summary>
DateTimeOffset RunCreatedAt,
/// <summary>When the run started executing.</summary>
DateTimeOffset? RunStartedAt,
/// <summary>When the run completed.</summary>
DateTimeOffset RunCompletedAt,
/// <summary>Total execution duration.</summary>
TimeSpan ExecutionDuration,
/// <summary>Actor who initiated the run.</summary>
string InitiatedBy,
/// <summary>SHA-256 digest of the run's input payload.</summary>
string InputDigest,
/// <summary>Aggregated SHA-256 digest of all outputs.</summary>
string OutputDigest,
/// <summary>JSON array of artifact references with their digests.</summary>
string ArtifactManifest,
/// <summary>Sequence number in the tenant's ledger.</summary>
long SequenceNumber,
/// <summary>SHA-256 hash of the previous ledger entry.</summary>
string? PreviousEntryHash,
/// <summary>SHA-256 hash of this entry's content.</summary>
string ContentHash,
/// <summary>When this ledger entry was created.</summary>
DateTimeOffset LedgerCreatedAt,
/// <summary>Correlation ID for tracing.</summary>
string? CorrelationId,
/// <summary>Optional metadata JSON.</summary>
string? Metadata)
{
/// <summary>
/// Creates a ledger entry from a completed run.
/// </summary>
public static RunLedgerEntry FromCompletedRun(
Run run,
IReadOnlyList<Artifact> artifacts,
string inputDigest,
long sequenceNumber,
string? previousEntryHash,
string? metadata = null)
{
if (run.CompletedAt is null)
{
throw new InvalidOperationException("Cannot create ledger entry from an incomplete run.");
}
var ledgerId = Guid.NewGuid();
var ledgerCreatedAt = DateTimeOffset.UtcNow;
// Build artifact manifest
var artifactManifest = BuildArtifactManifest(artifacts);
// Compute output digest from all artifact digests
var outputDigest = ComputeOutputDigest(artifacts);
// Compute execution duration
var startTime = run.StartedAt ?? run.CreatedAt;
var executionDuration = run.CompletedAt.Value - startTime;
// Compute content hash for tamper evidence
var contentToHash = $"{ledgerId}|{run.TenantId}|{run.RunId}|{run.SourceId}|{run.RunType}|{run.Status}|{run.TotalJobs}|{run.SucceededJobs}|{run.FailedJobs}|{run.CreatedAt:O}|{run.StartedAt:O}|{run.CompletedAt:O}|{inputDigest}|{outputDigest}|{sequenceNumber}|{previousEntryHash}|{ledgerCreatedAt:O}";
var contentHash = ComputeSha256(contentToHash);
return new RunLedgerEntry(
LedgerId: ledgerId,
TenantId: run.TenantId,
RunId: run.RunId,
SourceId: run.SourceId,
RunType: run.RunType,
FinalStatus: run.Status,
TotalJobs: run.TotalJobs,
SucceededJobs: run.SucceededJobs,
FailedJobs: run.FailedJobs,
RunCreatedAt: run.CreatedAt,
RunStartedAt: run.StartedAt,
RunCompletedAt: run.CompletedAt.Value,
ExecutionDuration: executionDuration,
InitiatedBy: run.CreatedBy,
InputDigest: inputDigest,
OutputDigest: outputDigest,
ArtifactManifest: artifactManifest,
SequenceNumber: sequenceNumber,
PreviousEntryHash: previousEntryHash,
ContentHash: contentHash,
LedgerCreatedAt: ledgerCreatedAt,
CorrelationId: run.CorrelationId,
Metadata: metadata);
}
/// <summary>
/// Verifies the integrity of this ledger entry.
/// </summary>
public bool VerifyIntegrity()
{
var contentToHash = $"{LedgerId}|{TenantId}|{RunId}|{SourceId}|{RunType}|{FinalStatus}|{TotalJobs}|{SucceededJobs}|{FailedJobs}|{RunCreatedAt:O}|{RunStartedAt:O}|{RunCompletedAt:O}|{InputDigest}|{OutputDigest}|{SequenceNumber}|{PreviousEntryHash}|{LedgerCreatedAt:O}";
var computed = ComputeSha256(contentToHash);
return string.Equals(ContentHash, computed, StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Verifies the chain link to the previous entry.
/// </summary>
public bool VerifyChainLink(RunLedgerEntry? previousEntry)
{
if (previousEntry is null)
{
return PreviousEntryHash is null || SequenceNumber == 1;
}
return string.Equals(PreviousEntryHash, previousEntry.ContentHash, StringComparison.OrdinalIgnoreCase);
}
private static string BuildArtifactManifest(IReadOnlyList<Artifact> artifacts)
{
var entries = artifacts.Select(a => new
{
a.ArtifactId,
a.ArtifactType,
a.Uri,
a.Digest,
a.MimeType,
a.SizeBytes,
a.CreatedAt
});
return System.Text.Json.JsonSerializer.Serialize(entries);
}
private static string ComputeOutputDigest(IReadOnlyList<Artifact> artifacts)
{
if (artifacts.Count == 0)
{
return ComputeSha256("(no artifacts)");
}
// Sort by artifact ID for deterministic ordering
var sortedDigests = artifacts
.OrderBy(a => a.ArtifactId)
.Select(a => a.Digest)
.ToList();
var combined = string.Join("|", sortedDigests);
return ComputeSha256(combined);
}
private static string ComputeSha256(string content)
{
var bytes = System.Text.Encoding.UTF8.GetBytes(content);
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Represents a ledger export operation.
/// </summary>
public sealed record LedgerExport(
/// <summary>Unique export identifier.</summary>
Guid ExportId,
/// <summary>Tenant requesting the export.</summary>
string TenantId,
/// <summary>Export status.</summary>
LedgerExportStatus Status,
/// <summary>Export format (json, ndjson, csv).</summary>
string Format,
/// <summary>Start of the time range to export.</summary>
DateTimeOffset? StartTime,
/// <summary>End of the time range to export.</summary>
DateTimeOffset? EndTime,
/// <summary>Run types to include (null = all).</summary>
string? RunTypeFilter,
/// <summary>Source ID filter (null = all).</summary>
Guid? SourceIdFilter,
/// <summary>Number of entries exported.</summary>
int EntryCount,
/// <summary>URI where the export is stored.</summary>
string? OutputUri,
/// <summary>SHA-256 digest of the export file.</summary>
string? OutputDigest,
/// <summary>Size of the export in bytes.</summary>
long? OutputSizeBytes,
/// <summary>Actor who requested the export.</summary>
string RequestedBy,
/// <summary>When the export was requested.</summary>
DateTimeOffset RequestedAt,
/// <summary>When the export started processing.</summary>
DateTimeOffset? StartedAt,
/// <summary>When the export completed.</summary>
DateTimeOffset? CompletedAt,
/// <summary>Error message if export failed.</summary>
string? ErrorMessage)
{
/// <summary>
/// Creates a new pending export request.
/// </summary>
public static LedgerExport CreateRequest(
string tenantId,
string format,
string requestedBy,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
string? runTypeFilter = null,
Guid? sourceIdFilter = null)
{
if (string.IsNullOrWhiteSpace(format))
{
throw new ArgumentException("Format is required.", nameof(format));
}
var validFormats = new[] { "json", "ndjson", "csv" };
if (!validFormats.Contains(format.ToLowerInvariant()))
{
throw new ArgumentException($"Invalid format. Must be one of: {string.Join(", ", validFormats)}", nameof(format));
}
return new LedgerExport(
ExportId: Guid.NewGuid(),
TenantId: tenantId,
Status: LedgerExportStatus.Pending,
Format: format.ToLowerInvariant(),
StartTime: startTime,
EndTime: endTime,
RunTypeFilter: runTypeFilter,
SourceIdFilter: sourceIdFilter,
EntryCount: 0,
OutputUri: null,
OutputDigest: null,
OutputSizeBytes: null,
RequestedBy: requestedBy,
RequestedAt: DateTimeOffset.UtcNow,
StartedAt: null,
CompletedAt: null,
ErrorMessage: null);
}
/// <summary>
/// Marks the export as started.
/// </summary>
public LedgerExport Start() => this with
{
Status = LedgerExportStatus.Processing,
StartedAt = DateTimeOffset.UtcNow
};
/// <summary>
/// Marks the export as completed.
/// </summary>
public LedgerExport Complete(string outputUri, string outputDigest, long outputSizeBytes, int entryCount) => this with
{
Status = LedgerExportStatus.Completed,
OutputUri = outputUri,
OutputDigest = outputDigest,
OutputSizeBytes = outputSizeBytes,
EntryCount = entryCount,
CompletedAt = DateTimeOffset.UtcNow
};
/// <summary>
/// Marks the export as failed.
/// </summary>
public LedgerExport Fail(string errorMessage) => this with
{
Status = LedgerExportStatus.Failed,
ErrorMessage = errorMessage,
CompletedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Status of a ledger export operation.
/// </summary>
public enum LedgerExportStatus
{
Pending = 0,
Processing = 1,
Completed = 2,
Failed = 3,
Canceled = 4
}

View File

@@ -0,0 +1,60 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a scheduled job trigger (cron-based or interval-based).
/// Schedules automatically create jobs at specified times.
/// </summary>
public sealed record Schedule(
/// <summary>Unique schedule identifier.</summary>
Guid ScheduleId,
/// <summary>Tenant owning this schedule.</summary>
string TenantId,
/// <summary>Optional project scope within tenant.</summary>
string? ProjectId,
/// <summary>Source that will be used for jobs.</summary>
Guid SourceId,
/// <summary>Human-readable schedule name.</summary>
string Name,
/// <summary>Job type to create.</summary>
string JobType,
/// <summary>Cron expression (6-field with seconds, UTC).</summary>
string CronExpression,
/// <summary>Timezone for cron evaluation (IANA, e.g., "UTC", "America/New_York").</summary>
string Timezone,
/// <summary>Whether the schedule is enabled.</summary>
bool Enabled,
/// <summary>Job payload template JSON.</summary>
string PayloadTemplate,
/// <summary>Job priority for scheduled jobs.</summary>
int Priority,
/// <summary>Maximum retry attempts for scheduled jobs.</summary>
int MaxAttempts,
/// <summary>Last time a job was triggered from this schedule.</summary>
DateTimeOffset? LastTriggeredAt,
/// <summary>Next scheduled trigger time.</summary>
DateTimeOffset? NextTriggerAt,
/// <summary>When the schedule was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the schedule was last updated.</summary>
DateTimeOffset UpdatedAt,
/// <summary>Actor who created the schedule.</summary>
string CreatedBy,
/// <summary>Actor who last modified the schedule.</summary>
string UpdatedBy);

View File

@@ -0,0 +1,423 @@
using System.Text.Json;
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Signed manifest providing provenance chain from ledger entries to artifacts.
/// Enables verification of artifact authenticity and integrity.
/// </summary>
public sealed record SignedManifest(
/// <summary>Unique manifest identifier.</summary>
Guid ManifestId,
/// <summary>Manifest schema version.</summary>
string SchemaVersion,
/// <summary>Tenant owning this manifest.</summary>
string TenantId,
/// <summary>Type of provenance (run, export, attestation).</summary>
ProvenanceType ProvenanceType,
/// <summary>Subject of the provenance (run ID, export ID, etc.).</summary>
Guid SubjectId,
/// <summary>Provenance statements (JSON array).</summary>
string Statements,
/// <summary>Artifact references with digests (JSON array).</summary>
string Artifacts,
/// <summary>Materials (inputs) used to produce the artifacts (JSON array).</summary>
string Materials,
/// <summary>Build environment information (JSON object).</summary>
string? BuildInfo,
/// <summary>SHA-256 digest of the manifest payload (excluding signature).</summary>
string PayloadDigest,
/// <summary>Signature algorithm used.</summary>
string SignatureAlgorithm,
/// <summary>Base64-encoded signature.</summary>
string Signature,
/// <summary>Key ID used for signing.</summary>
string KeyId,
/// <summary>When the manifest was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>Expiration time of the manifest (if any).</summary>
DateTimeOffset? ExpiresAt,
/// <summary>Additional metadata (JSON object).</summary>
string? Metadata)
{
/// <summary>
/// Current schema version for manifests.
/// </summary>
public const string CurrentSchemaVersion = "1.0.0";
/// <summary>
/// Creates an unsigned manifest from a ledger entry.
/// The manifest must be signed separately using SigningService.
/// </summary>
public static SignedManifest CreateFromLedgerEntry(
RunLedgerEntry ledger,
string? buildInfo = null,
string? metadata = null)
{
var statements = CreateStatementsFromLedger(ledger);
var artifacts = ledger.ArtifactManifest;
var materials = CreateMaterialsFromLedger(ledger);
var payloadDigest = ComputePayloadDigest(
ledger.TenantId,
ProvenanceType.Run,
ledger.RunId,
statements,
artifacts,
materials);
return new SignedManifest(
ManifestId: Guid.NewGuid(),
SchemaVersion: CurrentSchemaVersion,
TenantId: ledger.TenantId,
ProvenanceType: ProvenanceType.Run,
SubjectId: ledger.RunId,
Statements: statements,
Artifacts: artifacts,
Materials: materials,
BuildInfo: buildInfo,
PayloadDigest: payloadDigest,
SignatureAlgorithm: "none",
Signature: string.Empty,
KeyId: string.Empty,
CreatedAt: DateTimeOffset.UtcNow,
ExpiresAt: null,
Metadata: metadata);
}
/// <summary>
/// Creates an unsigned manifest from a ledger export.
/// </summary>
public static SignedManifest CreateFromExport(
LedgerExport export,
IReadOnlyList<RunLedgerEntry> entries,
string? buildInfo = null,
string? metadata = null)
{
if (export.Status != LedgerExportStatus.Completed)
{
throw new InvalidOperationException("Cannot create manifest from incomplete export.");
}
var statements = CreateStatementsFromExport(export, entries);
var artifacts = CreateExportArtifacts(export);
var materials = CreateExportMaterials(entries);
var payloadDigest = ComputePayloadDigest(
export.TenantId,
ProvenanceType.Export,
export.ExportId,
statements,
artifacts,
materials);
return new SignedManifest(
ManifestId: Guid.NewGuid(),
SchemaVersion: CurrentSchemaVersion,
TenantId: export.TenantId,
ProvenanceType: ProvenanceType.Export,
SubjectId: export.ExportId,
Statements: statements,
Artifacts: artifacts,
Materials: materials,
BuildInfo: buildInfo,
PayloadDigest: payloadDigest,
SignatureAlgorithm: "none",
Signature: string.Empty,
KeyId: string.Empty,
CreatedAt: DateTimeOffset.UtcNow,
ExpiresAt: null,
Metadata: metadata);
}
/// <summary>
/// Signs the manifest with the provided signature.
/// </summary>
public SignedManifest Sign(string signatureAlgorithm, string signature, string keyId, DateTimeOffset? expiresAt = null)
{
if (string.IsNullOrWhiteSpace(signatureAlgorithm))
{
throw new ArgumentException("Signature algorithm is required.", nameof(signatureAlgorithm));
}
if (string.IsNullOrWhiteSpace(signature))
{
throw new ArgumentException("Signature is required.", nameof(signature));
}
if (string.IsNullOrWhiteSpace(keyId))
{
throw new ArgumentException("Key ID is required.", nameof(keyId));
}
return this with
{
SignatureAlgorithm = signatureAlgorithm,
Signature = signature,
KeyId = keyId,
ExpiresAt = expiresAt
};
}
/// <summary>
/// Checks if the manifest is signed.
/// </summary>
public bool IsSigned => !string.IsNullOrEmpty(Signature) && SignatureAlgorithm != "none";
/// <summary>
/// Checks if the manifest has expired.
/// </summary>
public bool IsExpired => ExpiresAt.HasValue && ExpiresAt.Value < DateTimeOffset.UtcNow;
/// <summary>
/// Verifies the payload digest integrity.
/// </summary>
public bool VerifyPayloadIntegrity()
{
var computed = ComputePayloadDigest(TenantId, ProvenanceType, SubjectId, Statements, Artifacts, Materials);
return string.Equals(PayloadDigest, computed, StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Parses the artifact manifest into typed objects.
/// </summary>
public IReadOnlyList<ArtifactReference> GetArtifactReferences()
{
if (string.IsNullOrEmpty(Artifacts) || Artifacts == "[]")
{
return Array.Empty<ArtifactReference>();
}
return JsonSerializer.Deserialize<List<ArtifactReference>>(Artifacts) ?? [];
}
/// <summary>
/// Parses the material manifest into typed objects.
/// </summary>
public IReadOnlyList<MaterialReference> GetMaterialReferences()
{
if (string.IsNullOrEmpty(Materials) || Materials == "[]")
{
return Array.Empty<MaterialReference>();
}
return JsonSerializer.Deserialize<List<MaterialReference>>(Materials) ?? [];
}
/// <summary>
/// Parses the statements into typed objects.
/// </summary>
public IReadOnlyList<ProvenanceStatement> GetStatements()
{
if (string.IsNullOrEmpty(Statements) || Statements == "[]")
{
return Array.Empty<ProvenanceStatement>();
}
return JsonSerializer.Deserialize<List<ProvenanceStatement>>(Statements) ?? [];
}
private static string CreateStatementsFromLedger(RunLedgerEntry ledger)
{
var statements = new List<ProvenanceStatement>
{
new(
StatementType: "run_completed",
Subject: $"run:{ledger.RunId}",
Predicate: "produced",
Object: $"outputs:{ledger.OutputDigest}",
Timestamp: ledger.RunCompletedAt,
Metadata: JsonSerializer.Serialize(new
{
ledger.RunType,
ledger.FinalStatus,
ledger.TotalJobs,
ledger.SucceededJobs,
ledger.FailedJobs,
ledger.ExecutionDuration
})),
new(
StatementType: "chain_link",
Subject: $"ledger:{ledger.LedgerId}",
Predicate: "follows",
Object: ledger.PreviousEntryHash ?? "(genesis)",
Timestamp: ledger.LedgerCreatedAt,
Metadata: JsonSerializer.Serialize(new
{
ledger.SequenceNumber,
ledger.ContentHash
}))
};
return JsonSerializer.Serialize(statements);
}
private static string CreateMaterialsFromLedger(RunLedgerEntry ledger)
{
var materials = new List<MaterialReference>
{
new(
Uri: $"input:{ledger.RunId}",
Digest: ledger.InputDigest,
MediaType: "application/json",
Name: "run_input")
};
return JsonSerializer.Serialize(materials);
}
private static string CreateStatementsFromExport(LedgerExport export, IReadOnlyList<RunLedgerEntry> entries)
{
var statements = new List<ProvenanceStatement>
{
new(
StatementType: "export_completed",
Subject: $"export:{export.ExportId}",
Predicate: "contains",
Object: $"entries:{entries.Count}",
Timestamp: export.CompletedAt ?? DateTimeOffset.UtcNow,
Metadata: JsonSerializer.Serialize(new
{
export.Format,
export.EntryCount,
export.StartTime,
export.EndTime,
export.RunTypeFilter,
export.SourceIdFilter
}))
};
// Add chain integrity statement
if (entries.Count > 0)
{
var first = entries.MinBy(e => e.SequenceNumber);
var last = entries.MaxBy(e => e.SequenceNumber);
if (first is not null && last is not null)
{
statements.Add(new ProvenanceStatement(
StatementType: "chain_range",
Subject: $"export:{export.ExportId}",
Predicate: "covers",
Object: $"sequence:{first.SequenceNumber}-{last.SequenceNumber}",
Timestamp: export.CompletedAt ?? DateTimeOffset.UtcNow,
Metadata: JsonSerializer.Serialize(new
{
FirstEntryHash = first.ContentHash,
LastEntryHash = last.ContentHash
})));
}
}
return JsonSerializer.Serialize(statements);
}
private static string CreateExportArtifacts(LedgerExport export)
{
var artifacts = new List<ArtifactReference>
{
new(
ArtifactId: export.ExportId,
ArtifactType: "ledger_export",
Uri: export.OutputUri ?? string.Empty,
Digest: export.OutputDigest ?? string.Empty,
MediaType: GetMediaType(export.Format),
SizeBytes: export.OutputSizeBytes ?? 0)
};
return JsonSerializer.Serialize(artifacts);
}
private static string CreateExportMaterials(IReadOnlyList<RunLedgerEntry> entries)
{
var materials = entries.Select(e => new MaterialReference(
Uri: $"ledger:{e.LedgerId}",
Digest: e.ContentHash,
MediaType: "application/json",
Name: $"run_{e.RunId}")).ToList();
return JsonSerializer.Serialize(materials);
}
private static string GetMediaType(string format) => format.ToLowerInvariant() switch
{
"json" => "application/json",
"ndjson" => "application/x-ndjson",
"csv" => "text/csv",
_ => "application/octet-stream"
};
private static string ComputePayloadDigest(
string tenantId,
ProvenanceType provenanceType,
Guid subjectId,
string statements,
string artifacts,
string materials)
{
var payload = $"{tenantId}|{provenanceType}|{subjectId}|{statements}|{artifacts}|{materials}";
var bytes = System.Text.Encoding.UTF8.GetBytes(payload);
var hash = System.Security.Cryptography.SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}
/// <summary>
/// Types of provenance tracked by manifests.
/// </summary>
public enum ProvenanceType
{
/// <summary>Provenance for a completed run.</summary>
Run = 0,
/// <summary>Provenance for a ledger export.</summary>
Export = 1,
/// <summary>Provenance for an attestation.</summary>
Attestation = 2
}
/// <summary>
/// Reference to an artifact in a manifest.
/// </summary>
public sealed record ArtifactReference(
Guid ArtifactId,
string ArtifactType,
string Uri,
string Digest,
string MediaType,
long SizeBytes);
/// <summary>
/// Reference to a material (input) in a manifest.
/// </summary>
public sealed record MaterialReference(
string Uri,
string Digest,
string MediaType,
string Name);
/// <summary>
/// A provenance statement in a manifest.
/// </summary>
public sealed record ProvenanceStatement(
string StatementType,
string Subject,
string Predicate,
string Object,
DateTimeOffset Timestamp,
string? Metadata);

View File

@@ -0,0 +1,567 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Service Level Objective type.
/// </summary>
public enum SloType
{
/// <summary>Availability SLO (percentage of successful requests).</summary>
Availability,
/// <summary>Latency SLO (percentile-based response time).</summary>
Latency,
/// <summary>Throughput SLO (minimum jobs processed per period).</summary>
Throughput
}
/// <summary>
/// Time window for SLO computation.
/// </summary>
public enum SloWindow
{
/// <summary>Rolling 1 hour window.</summary>
OneHour,
/// <summary>Rolling 1 day window.</summary>
OneDay,
/// <summary>Rolling 7 day window.</summary>
SevenDays,
/// <summary>Rolling 30 day window.</summary>
ThirtyDays
}
/// <summary>
/// Alert severity for SLO violations.
/// </summary>
public enum AlertSeverity
{
/// <summary>Informational - SLO approaching threshold.</summary>
Info,
/// <summary>Warning - SLO at risk.</summary>
Warning,
/// <summary>Critical - SLO likely to be breached.</summary>
Critical,
/// <summary>Emergency - SLO breached.</summary>
Emergency
}
/// <summary>
/// Service Level Objective definition.
/// </summary>
public sealed record Slo(
/// <summary>Unique SLO identifier.</summary>
Guid SloId,
/// <summary>Tenant this SLO belongs to.</summary>
string TenantId,
/// <summary>Human-readable name.</summary>
string Name,
/// <summary>Optional description.</summary>
string? Description,
/// <summary>Type of SLO.</summary>
SloType Type,
/// <summary>Job type this SLO applies to (null = all job types).</summary>
string? JobType,
/// <summary>Source ID this SLO applies to (null = all sources).</summary>
Guid? SourceId,
/// <summary>Target objective (e.g., 0.999 for 99.9% availability).</summary>
double Target,
/// <summary>Time window for SLO evaluation.</summary>
SloWindow Window,
/// <summary>For latency SLOs: the percentile (e.g., 0.95 for P95).</summary>
double? LatencyPercentile,
/// <summary>For latency SLOs: the target latency in seconds.</summary>
double? LatencyTargetSeconds,
/// <summary>For throughput SLOs: minimum jobs per period.</summary>
int? ThroughputMinimum,
/// <summary>Whether this SLO is actively monitored.</summary>
bool Enabled,
/// <summary>When the SLO was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the SLO was last updated.</summary>
DateTimeOffset UpdatedAt,
/// <summary>Actor who created the SLO.</summary>
string CreatedBy,
/// <summary>Actor who last modified the SLO.</summary>
string UpdatedBy)
{
/// <summary>Calculates the error budget as a decimal (1 - target).</summary>
public double ErrorBudget => 1.0 - Target;
/// <summary>Creates a new availability SLO.</summary>
public static Slo CreateAvailability(
string tenantId,
string name,
double target,
SloWindow window,
string createdBy,
string? description = null,
string? jobType = null,
Guid? sourceId = null)
{
ValidateTarget(target);
var now = DateTimeOffset.UtcNow;
return new Slo(
SloId: Guid.NewGuid(),
TenantId: tenantId,
Name: name,
Description: description,
Type: SloType.Availability,
JobType: jobType,
SourceId: sourceId,
Target: target,
Window: window,
LatencyPercentile: null,
LatencyTargetSeconds: null,
ThroughputMinimum: null,
Enabled: true,
CreatedAt: now,
UpdatedAt: now,
CreatedBy: createdBy,
UpdatedBy: createdBy);
}
/// <summary>Creates a new latency SLO.</summary>
public static Slo CreateLatency(
string tenantId,
string name,
double percentile,
double targetSeconds,
double target,
SloWindow window,
string createdBy,
string? description = null,
string? jobType = null,
Guid? sourceId = null)
{
ValidateTarget(target);
if (percentile < 0 || percentile > 1)
throw new ArgumentOutOfRangeException(nameof(percentile), "Percentile must be between 0 and 1");
if (targetSeconds <= 0)
throw new ArgumentOutOfRangeException(nameof(targetSeconds), "Target latency must be positive");
var now = DateTimeOffset.UtcNow;
return new Slo(
SloId: Guid.NewGuid(),
TenantId: tenantId,
Name: name,
Description: description,
Type: SloType.Latency,
JobType: jobType,
SourceId: sourceId,
Target: target,
Window: window,
LatencyPercentile: percentile,
LatencyTargetSeconds: targetSeconds,
ThroughputMinimum: null,
Enabled: true,
CreatedAt: now,
UpdatedAt: now,
CreatedBy: createdBy,
UpdatedBy: createdBy);
}
/// <summary>Creates a new throughput SLO.</summary>
public static Slo CreateThroughput(
string tenantId,
string name,
int minimum,
double target,
SloWindow window,
string createdBy,
string? description = null,
string? jobType = null,
Guid? sourceId = null)
{
ValidateTarget(target);
if (minimum <= 0)
throw new ArgumentOutOfRangeException(nameof(minimum), "Throughput minimum must be positive");
var now = DateTimeOffset.UtcNow;
return new Slo(
SloId: Guid.NewGuid(),
TenantId: tenantId,
Name: name,
Description: description,
Type: SloType.Throughput,
JobType: jobType,
SourceId: sourceId,
Target: target,
Window: window,
LatencyPercentile: null,
LatencyTargetSeconds: null,
ThroughputMinimum: minimum,
Enabled: true,
CreatedAt: now,
UpdatedAt: now,
CreatedBy: createdBy,
UpdatedBy: createdBy);
}
/// <summary>Updates the SLO with new values.</summary>
public Slo Update(
string? name = null,
string? description = null,
double? target = null,
bool? enabled = null,
string? updatedBy = null)
{
if (target.HasValue)
ValidateTarget(target.Value);
return this with
{
Name = name ?? Name,
Description = description ?? Description,
Target = target ?? Target,
Enabled = enabled ?? Enabled,
UpdatedAt = DateTimeOffset.UtcNow,
UpdatedBy = updatedBy ?? UpdatedBy
};
}
/// <summary>Disables the SLO.</summary>
public Slo Disable(string updatedBy) =>
this with
{
Enabled = false,
UpdatedAt = DateTimeOffset.UtcNow,
UpdatedBy = updatedBy
};
/// <summary>Enables the SLO.</summary>
public Slo Enable(string updatedBy) =>
this with
{
Enabled = true,
UpdatedAt = DateTimeOffset.UtcNow,
UpdatedBy = updatedBy
};
/// <summary>Gets the window duration as a TimeSpan.</summary>
public TimeSpan GetWindowDuration() => Window switch
{
SloWindow.OneHour => TimeSpan.FromHours(1),
SloWindow.OneDay => TimeSpan.FromDays(1),
SloWindow.SevenDays => TimeSpan.FromDays(7),
SloWindow.ThirtyDays => TimeSpan.FromDays(30),
_ => throw new InvalidOperationException($"Unknown window: {Window}")
};
private static void ValidateTarget(double target)
{
if (target <= 0 || target > 1)
throw new ArgumentOutOfRangeException(nameof(target), "Target must be between 0 (exclusive) and 1 (inclusive)");
}
}
/// <summary>
/// Current state of an SLO including burn rate and budget consumption.
/// </summary>
public sealed record SloState(
/// <summary>The SLO this state belongs to.</summary>
Guid SloId,
/// <summary>Tenant this state belongs to.</summary>
string TenantId,
/// <summary>Current SLI value (actual performance).</summary>
double CurrentSli,
/// <summary>Total events/requests in the window.</summary>
long TotalEvents,
/// <summary>Good events (successful) in the window.</summary>
long GoodEvents,
/// <summary>Bad events (failed) in the window.</summary>
long BadEvents,
/// <summary>Error budget consumed (0-1 where 1 = fully consumed).</summary>
double BudgetConsumed,
/// <summary>Error budget remaining (0-1 where 1 = fully available).</summary>
double BudgetRemaining,
/// <summary>Current burn rate (1.0 = consuming budget at sustainable rate).</summary>
double BurnRate,
/// <summary>Projected time until budget exhaustion (null if not burning).</summary>
TimeSpan? TimeToExhaustion,
/// <summary>Whether the SLO is currently met.</summary>
bool IsMet,
/// <summary>Current alert severity based on budget consumption.</summary>
AlertSeverity AlertSeverity,
/// <summary>When this state was computed.</summary>
DateTimeOffset ComputedAt,
/// <summary>Start of the evaluation window.</summary>
DateTimeOffset WindowStart,
/// <summary>End of the evaluation window.</summary>
DateTimeOffset WindowEnd)
{
/// <summary>Creates a state indicating no data is available.</summary>
public static SloState NoData(Guid sloId, string tenantId, DateTimeOffset now, SloWindow window)
{
var windowDuration = GetWindowDuration(window);
return new SloState(
SloId: sloId,
TenantId: tenantId,
CurrentSli: 1.0, // Assume good when no data
TotalEvents: 0,
GoodEvents: 0,
BadEvents: 0,
BudgetConsumed: 0,
BudgetRemaining: 1.0,
BurnRate: 0,
TimeToExhaustion: null,
IsMet: true,
AlertSeverity: AlertSeverity.Info,
ComputedAt: now,
WindowStart: now - windowDuration,
WindowEnd: now);
}
private static TimeSpan GetWindowDuration(SloWindow window) => window switch
{
SloWindow.OneHour => TimeSpan.FromHours(1),
SloWindow.OneDay => TimeSpan.FromDays(1),
SloWindow.SevenDays => TimeSpan.FromDays(7),
SloWindow.ThirtyDays => TimeSpan.FromDays(30),
_ => TimeSpan.FromDays(1)
};
}
/// <summary>
/// Alert budget threshold configuration.
/// </summary>
public sealed record AlertBudgetThreshold(
/// <summary>Unique threshold identifier.</summary>
Guid ThresholdId,
/// <summary>SLO this threshold applies to.</summary>
Guid SloId,
/// <summary>Tenant this threshold belongs to.</summary>
string TenantId,
/// <summary>Budget consumed percentage that triggers this alert (0-1).</summary>
double BudgetConsumedThreshold,
/// <summary>Burn rate threshold that triggers this alert.</summary>
double? BurnRateThreshold,
/// <summary>Severity of the alert.</summary>
AlertSeverity Severity,
/// <summary>Whether this threshold is enabled.</summary>
bool Enabled,
/// <summary>Notification channel for this alert.</summary>
string? NotificationChannel,
/// <summary>Notification endpoint for this alert.</summary>
string? NotificationEndpoint,
/// <summary>Cooldown period between alerts.</summary>
TimeSpan Cooldown,
/// <summary>When an alert was last triggered.</summary>
DateTimeOffset? LastTriggeredAt,
/// <summary>When the threshold was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the threshold was last updated.</summary>
DateTimeOffset UpdatedAt,
/// <summary>Actor who created the threshold.</summary>
string CreatedBy,
/// <summary>Actor who last modified the threshold.</summary>
string UpdatedBy)
{
/// <summary>Creates a new alert threshold.</summary>
public static AlertBudgetThreshold Create(
Guid sloId,
string tenantId,
double budgetConsumedThreshold,
AlertSeverity severity,
string createdBy,
double? burnRateThreshold = null,
string? notificationChannel = null,
string? notificationEndpoint = null,
TimeSpan? cooldown = null)
{
if (budgetConsumedThreshold < 0 || budgetConsumedThreshold > 1)
throw new ArgumentOutOfRangeException(nameof(budgetConsumedThreshold), "Threshold must be between 0 and 1");
var now = DateTimeOffset.UtcNow;
return new AlertBudgetThreshold(
ThresholdId: Guid.NewGuid(),
SloId: sloId,
TenantId: tenantId,
BudgetConsumedThreshold: budgetConsumedThreshold,
BurnRateThreshold: burnRateThreshold,
Severity: severity,
Enabled: true,
NotificationChannel: notificationChannel,
NotificationEndpoint: notificationEndpoint,
Cooldown: cooldown ?? TimeSpan.FromHours(1),
LastTriggeredAt: null,
CreatedAt: now,
UpdatedAt: now,
CreatedBy: createdBy,
UpdatedBy: createdBy);
}
/// <summary>Checks if this threshold should trigger based on current state.</summary>
public bool ShouldTrigger(SloState state, DateTimeOffset now)
{
if (!Enabled) return false;
// Check cooldown
if (LastTriggeredAt.HasValue && (now - LastTriggeredAt.Value) < Cooldown)
return false;
// Check budget consumed threshold
if (state.BudgetConsumed >= BudgetConsumedThreshold)
return true;
// Check burn rate threshold if set
if (BurnRateThreshold.HasValue && state.BurnRate >= BurnRateThreshold.Value)
return true;
return false;
}
/// <summary>Records that this threshold was triggered.</summary>
public AlertBudgetThreshold RecordTrigger(DateTimeOffset now) =>
this with
{
LastTriggeredAt = now,
UpdatedAt = now
};
}
/// <summary>
/// SLO alert event.
/// </summary>
public sealed record SloAlert(
/// <summary>Unique alert identifier.</summary>
Guid AlertId,
/// <summary>SLO this alert relates to.</summary>
Guid SloId,
/// <summary>Threshold that triggered this alert.</summary>
Guid ThresholdId,
/// <summary>Tenant this alert belongs to.</summary>
string TenantId,
/// <summary>Severity of the alert.</summary>
AlertSeverity Severity,
/// <summary>Alert message.</summary>
string Message,
/// <summary>Budget consumed at time of alert.</summary>
double BudgetConsumed,
/// <summary>Burn rate at time of alert.</summary>
double BurnRate,
/// <summary>Current SLI value at time of alert.</summary>
double CurrentSli,
/// <summary>When the alert was triggered.</summary>
DateTimeOffset TriggeredAt,
/// <summary>When the alert was acknowledged (null if not acknowledged).</summary>
DateTimeOffset? AcknowledgedAt,
/// <summary>Who acknowledged the alert.</summary>
string? AcknowledgedBy,
/// <summary>When the alert was resolved (null if not resolved).</summary>
DateTimeOffset? ResolvedAt,
/// <summary>How the alert was resolved.</summary>
string? ResolutionNotes)
{
/// <summary>Creates a new alert from an SLO state and threshold.</summary>
public static SloAlert Create(
Slo slo,
SloState state,
AlertBudgetThreshold threshold)
{
var message = threshold.BurnRateThreshold.HasValue && state.BurnRate >= threshold.BurnRateThreshold.Value
? $"SLO '{slo.Name}' burn rate {state.BurnRate:F2}x exceeds threshold {threshold.BurnRateThreshold.Value:F2}x"
: $"SLO '{slo.Name}' error budget {state.BudgetConsumed:P1} consumed exceeds threshold {threshold.BudgetConsumedThreshold:P1}";
return new SloAlert(
AlertId: Guid.NewGuid(),
SloId: slo.SloId,
ThresholdId: threshold.ThresholdId,
TenantId: slo.TenantId,
Severity: threshold.Severity,
Message: message,
BudgetConsumed: state.BudgetConsumed,
BurnRate: state.BurnRate,
CurrentSli: state.CurrentSli,
TriggeredAt: state.ComputedAt,
AcknowledgedAt: null,
AcknowledgedBy: null,
ResolvedAt: null,
ResolutionNotes: null);
}
/// <summary>Acknowledges the alert.</summary>
public SloAlert Acknowledge(string acknowledgedBy, DateTimeOffset now) =>
this with
{
AcknowledgedAt = now,
AcknowledgedBy = acknowledgedBy
};
/// <summary>Resolves the alert.</summary>
public SloAlert Resolve(string notes, DateTimeOffset now) =>
this with
{
ResolvedAt = now,
ResolutionNotes = notes
};
/// <summary>Whether this alert has been acknowledged.</summary>
public bool IsAcknowledged => AcknowledgedAt.HasValue;
/// <summary>Whether this alert has been resolved.</summary>
public bool IsResolved => ResolvedAt.HasValue;
}

View File

@@ -0,0 +1,42 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a job source (producer) that submits jobs to the orchestrator.
/// Examples: Concelier, Excititor, Scheduler, Export Center, Policy Engine.
/// </summary>
public sealed record Source(
/// <summary>Unique source identifier.</summary>
Guid SourceId,
/// <summary>Tenant owning this source.</summary>
string TenantId,
/// <summary>Human-readable source name (e.g., "concelier-nvd").</summary>
string Name,
/// <summary>Source type/category (e.g., "advisory-ingest", "scanner", "export").</summary>
string SourceType,
/// <summary>Whether the source is currently enabled.</summary>
bool Enabled,
/// <summary>Whether the source is paused (throttled by operator).</summary>
bool Paused,
/// <summary>Operator-provided reason for pause (if paused).</summary>
string? PauseReason,
/// <summary>Ticket reference for pause audit trail.</summary>
string? PauseTicket,
/// <summary>Optional configuration JSON blob.</summary>
string? Configuration,
/// <summary>When the source was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the source was last updated.</summary>
DateTimeOffset UpdatedAt,
/// <summary>Actor who last modified the source.</summary>
string UpdatedBy);

View File

@@ -0,0 +1,60 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents a dynamic rate-limit override (throttle) for a source or job type.
/// Throttles are temporary pause/slow-down mechanisms, often in response to upstream pressure.
/// </summary>
public sealed record Throttle(
/// <summary>Unique throttle identifier.</summary>
Guid ThrottleId,
/// <summary>Tenant this throttle applies to.</summary>
string TenantId,
/// <summary>Source to throttle (null if job-type scoped).</summary>
Guid? SourceId,
/// <summary>Job type to throttle (null if source-scoped).</summary>
string? JobType,
/// <summary>Whether this throttle is currently active.</summary>
bool Active,
/// <summary>Reason for the throttle (e.g., "429 from upstream", "Manual pause").</summary>
string Reason,
/// <summary>Optional ticket reference for audit.</summary>
string? Ticket,
/// <summary>When the throttle was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the throttle expires (null = indefinite).</summary>
DateTimeOffset? ExpiresAt,
/// <summary>Actor who created the throttle.</summary>
string CreatedBy);
/// <summary>
/// Reason categories for throttle creation.
/// </summary>
public static class ThrottleReasons
{
/// <summary>Upstream returned 429 Too Many Requests.</summary>
public const string UpstreamRateLimited = "upstream_429";
/// <summary>Upstream returned 503 Service Unavailable.</summary>
public const string UpstreamUnavailable = "upstream_503";
/// <summary>Upstream returned 5xx error repeatedly.</summary>
public const string UpstreamErrors = "upstream_5xx";
/// <summary>Manual operator intervention.</summary>
public const string ManualPause = "manual_pause";
/// <summary>Circuit breaker triggered.</summary>
public const string CircuitBreaker = "circuit_breaker";
/// <summary>Quota exhausted.</summary>
public const string QuotaExhausted = "quota_exhausted";
}

View File

@@ -0,0 +1,162 @@
namespace StellaOps.Orchestrator.Core.Domain;
/// <summary>
/// Represents an event-time watermark for tracking processing progress.
/// Watermarks are scoped by source, job type, or custom key.
/// </summary>
public sealed record Watermark(
/// <summary>Unique watermark identifier.</summary>
Guid WatermarkId,
/// <summary>Tenant this watermark belongs to.</summary>
string TenantId,
/// <summary>Source this watermark tracks (null if job-type scoped).</summary>
Guid? SourceId,
/// <summary>Job type this watermark tracks (null if source-scoped).</summary>
string? JobType,
/// <summary>Normalized scope key for uniqueness.</summary>
string ScopeKey,
/// <summary>Latest processed event time (high watermark).</summary>
DateTimeOffset HighWatermark,
/// <summary>Earliest event time in current window (low watermark for windowing).</summary>
DateTimeOffset? LowWatermark,
/// <summary>Monotonic sequence number for ordering.</summary>
long SequenceNumber,
/// <summary>Total events processed through this watermark.</summary>
long ProcessedCount,
/// <summary>SHA-256 hash of last processed batch for integrity verification.</summary>
string? LastBatchHash,
/// <summary>When the watermark was created.</summary>
DateTimeOffset CreatedAt,
/// <summary>When the watermark was last updated.</summary>
DateTimeOffset UpdatedAt,
/// <summary>Actor who last modified the watermark.</summary>
string UpdatedBy)
{
/// <summary>
/// Creates a scope key for source-scoped watermarks.
/// </summary>
public static string CreateScopeKey(Guid sourceId) =>
$"source:{sourceId:N}";
/// <summary>
/// Creates a scope key for job-type-scoped watermarks.
/// </summary>
public static string CreateScopeKey(string jobType) =>
$"job_type:{jobType.ToLowerInvariant()}";
/// <summary>
/// Creates a scope key for source+job-type scoped watermarks.
/// </summary>
public static string CreateScopeKey(Guid sourceId, string jobType) =>
$"source:{sourceId:N}:job_type:{jobType.ToLowerInvariant()}";
/// <summary>
/// Creates a new watermark with initial values.
/// </summary>
public static Watermark Create(
string tenantId,
Guid? sourceId,
string? jobType,
DateTimeOffset highWatermark,
string createdBy)
{
var scopeKey = (sourceId, jobType) switch
{
(Guid s, string j) when !string.IsNullOrEmpty(j) => CreateScopeKey(s, j),
(Guid s, _) => CreateScopeKey(s),
(_, string j) when !string.IsNullOrEmpty(j) => CreateScopeKey(j),
_ => throw new ArgumentException("Either sourceId or jobType must be specified.")
};
var now = DateTimeOffset.UtcNow;
return new Watermark(
WatermarkId: Guid.NewGuid(),
TenantId: tenantId,
SourceId: sourceId,
JobType: jobType,
ScopeKey: scopeKey,
HighWatermark: highWatermark,
LowWatermark: null,
SequenceNumber: 0,
ProcessedCount: 0,
LastBatchHash: null,
CreatedAt: now,
UpdatedAt: now,
UpdatedBy: createdBy);
}
/// <summary>
/// Advances the watermark after successful batch processing.
/// </summary>
public Watermark Advance(
DateTimeOffset newHighWatermark,
long eventsProcessed,
string? batchHash,
string updatedBy)
{
if (newHighWatermark < HighWatermark)
throw new ArgumentException("New high watermark cannot be before current high watermark.", nameof(newHighWatermark));
return this with
{
HighWatermark = newHighWatermark,
SequenceNumber = SequenceNumber + 1,
ProcessedCount = ProcessedCount + eventsProcessed,
LastBatchHash = batchHash,
UpdatedAt = DateTimeOffset.UtcNow,
UpdatedBy = updatedBy
};
}
/// <summary>
/// Sets the event-time window bounds.
/// </summary>
public Watermark WithWindow(DateTimeOffset lowWatermark, DateTimeOffset highWatermark)
{
if (highWatermark < lowWatermark)
throw new ArgumentException("High watermark cannot be before low watermark.");
return this with
{
LowWatermark = lowWatermark,
HighWatermark = highWatermark,
UpdatedAt = DateTimeOffset.UtcNow
};
}
}
/// <summary>
/// Snapshot of watermark state for observability.
/// </summary>
public sealed record WatermarkSnapshot(
string ScopeKey,
DateTimeOffset HighWatermark,
DateTimeOffset? LowWatermark,
long SequenceNumber,
long ProcessedCount,
TimeSpan? Lag)
{
/// <summary>
/// Creates a snapshot from a watermark with calculated lag.
/// </summary>
public static WatermarkSnapshot FromWatermark(Watermark watermark, DateTimeOffset now) =>
new(
ScopeKey: watermark.ScopeKey,
HighWatermark: watermark.HighWatermark,
LowWatermark: watermark.LowWatermark,
SequenceNumber: watermark.SequenceNumber,
ProcessedCount: watermark.ProcessedCount,
Lag: now - watermark.HighWatermark);
}

View File

@@ -0,0 +1,450 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.RateLimiting;
/// <summary>
/// Adaptive rate limiter that combines token bucket, concurrency limiting, and backpressure handling.
/// Provides per-tenant/job-type rate limiting with automatic adaptation to upstream pressure.
/// </summary>
public sealed class AdaptiveRateLimiter
{
private readonly TokenBucket _tokenBucket;
private readonly ConcurrencyLimiter _concurrencyLimiter;
private readonly BackpressureHandler _backpressureHandler;
private readonly HourlyCounter _hourlyCounter;
private readonly object _lock = new();
/// <summary>
/// Tenant ID this limiter applies to.
/// </summary>
public string TenantId { get; }
/// <summary>
/// Job type this limiter applies to (null = all types).
/// </summary>
public string? JobType { get; }
/// <summary>
/// Maximum jobs per hour.
/// </summary>
public int MaxPerHour { get; }
/// <summary>
/// Whether the limiter is paused by operator.
/// </summary>
public bool IsPaused { get; private set; }
/// <summary>
/// Reason for pause (if paused).
/// </summary>
public string? PauseReason { get; private set; }
/// <summary>
/// Creates a new adaptive rate limiter from quota configuration.
/// </summary>
public AdaptiveRateLimiter(Quota quota, TimeProvider? timeProvider = null)
{
ArgumentNullException.ThrowIfNull(quota);
TenantId = quota.TenantId;
JobType = quota.JobType;
MaxPerHour = quota.MaxPerHour;
IsPaused = quota.Paused;
PauseReason = quota.PauseReason;
_tokenBucket = new TokenBucket(
quota.BurstCapacity,
quota.RefillRate,
quota.CurrentTokens,
quota.LastRefillAt);
_concurrencyLimiter = new ConcurrencyLimiter(
quota.MaxActive,
quota.CurrentActive);
_backpressureHandler = new BackpressureHandler(
baseDelay: TimeSpan.FromSeconds(1),
maxDelay: TimeSpan.FromMinutes(5),
failureThreshold: 3,
jitterFactor: 0.2);
_hourlyCounter = new HourlyCounter(
quota.MaxPerHour,
quota.CurrentHourCount,
quota.CurrentHourStart);
}
/// <summary>
/// Creates a new adaptive rate limiter with explicit configuration.
/// </summary>
public AdaptiveRateLimiter(
string tenantId,
string? jobType,
int maxActive,
int maxPerHour,
int burstCapacity,
double refillRate)
{
TenantId = tenantId ?? throw new ArgumentNullException(nameof(tenantId));
JobType = jobType;
MaxPerHour = maxPerHour;
_tokenBucket = new TokenBucket(burstCapacity, refillRate);
_concurrencyLimiter = new ConcurrencyLimiter(maxActive);
_backpressureHandler = new BackpressureHandler();
_hourlyCounter = new HourlyCounter(maxPerHour);
}
/// <summary>
/// Attempts to acquire permission to execute a job.
/// </summary>
/// <param name="now">Current time.</param>
/// <returns>Result indicating whether acquisition was successful and why.</returns>
public RateLimitResult TryAcquire(DateTimeOffset now)
{
lock (_lock)
{
// Check if paused
if (IsPaused)
{
return RateLimitResult.Denied(RateLimitDenialReason.Paused, PauseReason);
}
// Check backpressure
if (!_backpressureHandler.ShouldAllow(now))
{
var snapshot = _backpressureHandler.GetSnapshot(now);
return RateLimitResult.Denied(
RateLimitDenialReason.Backpressure,
snapshot.LastFailureReason,
retryAfter: snapshot.TimeRemaining);
}
// Check hourly limit
if (!_hourlyCounter.TryIncrement(now))
{
var hourlySnapshot = _hourlyCounter.GetSnapshot(now);
return RateLimitResult.Denied(
RateLimitDenialReason.HourlyLimitExceeded,
$"Hourly limit of {MaxPerHour} exceeded",
retryAfter: hourlySnapshot.TimeUntilReset);
}
// Check concurrency
if (!_concurrencyLimiter.TryAcquire())
{
// Rollback hourly counter
_hourlyCounter.Decrement();
var concurrencySnapshot = _concurrencyLimiter.GetSnapshot();
return RateLimitResult.Denied(
RateLimitDenialReason.ConcurrencyLimitExceeded,
$"Concurrency limit of {concurrencySnapshot.MaxActive} exceeded");
}
// Check token bucket
if (!_tokenBucket.TryConsume(now))
{
// Rollback concurrency and hourly counter
_concurrencyLimiter.Release();
_hourlyCounter.Decrement();
var waitTime = _tokenBucket.EstimatedWaitTime(now);
return RateLimitResult.Denied(
RateLimitDenialReason.TokensExhausted,
"Token bucket exhausted",
retryAfter: waitTime);
}
return RateLimitResult.Allowed();
}
}
/// <summary>
/// Releases a concurrency slot when a job completes.
/// </summary>
public void Release()
{
lock (_lock)
{
_concurrencyLimiter.Release();
}
}
/// <summary>
/// Records an upstream failure for backpressure calculation.
/// </summary>
/// <param name="statusCode">HTTP status code from upstream.</param>
/// <param name="retryAfter">Optional Retry-After header value.</param>
/// <param name="now">Current time.</param>
/// <returns>Backpressure result.</returns>
public BackpressureResult RecordUpstreamFailure(int statusCode, TimeSpan? retryAfter = null, DateTimeOffset? now = null)
{
lock (_lock)
{
return _backpressureHandler.RecordFailure(statusCode, retryAfter, now);
}
}
/// <summary>
/// Records a successful upstream request.
/// </summary>
public void RecordUpstreamSuccess()
{
lock (_lock)
{
_backpressureHandler.RecordSuccess();
}
}
/// <summary>
/// Pauses the limiter.
/// </summary>
/// <param name="reason">Reason for pause.</param>
public void Pause(string reason)
{
lock (_lock)
{
IsPaused = true;
PauseReason = reason;
}
}
/// <summary>
/// Resumes the limiter.
/// </summary>
public void Resume()
{
lock (_lock)
{
IsPaused = false;
PauseReason = null;
}
}
/// <summary>
/// Gets a snapshot of the current limiter state.
/// </summary>
/// <param name="now">Current time.</param>
/// <returns>Snapshot of limiter state.</returns>
public AdaptiveRateLimiterSnapshot GetSnapshot(DateTimeOffset now)
{
lock (_lock)
{
return new AdaptiveRateLimiterSnapshot(
TenantId: TenantId,
JobType: JobType,
IsPaused: IsPaused,
PauseReason: PauseReason,
TokenBucket: _tokenBucket.GetSnapshot(now),
Concurrency: _concurrencyLimiter.GetSnapshot(),
Backpressure: _backpressureHandler.GetSnapshot(now),
HourlyCounter: _hourlyCounter.GetSnapshot(now));
}
}
/// <summary>
/// Exports the current state to a quota record for persistence.
/// </summary>
/// <param name="quotaId">Original quota ID.</param>
/// <param name="now">Current time.</param>
/// <param name="updatedBy">Actor performing the update.</param>
/// <returns>Quota record with current state.</returns>
public Quota ExportToQuota(Guid quotaId, DateTimeOffset now, string updatedBy)
{
lock (_lock)
{
var tokenSnapshot = _tokenBucket.GetSnapshot(now);
var concurrencySnapshot = _concurrencyLimiter.GetSnapshot();
var hourlySnapshot = _hourlyCounter.GetSnapshot(now);
return new Quota(
QuotaId: quotaId,
TenantId: TenantId,
JobType: JobType,
MaxActive: concurrencySnapshot.MaxActive,
MaxPerHour: MaxPerHour,
BurstCapacity: tokenSnapshot.BurstCapacity,
RefillRate: tokenSnapshot.RefillRate,
CurrentTokens: tokenSnapshot.CurrentTokens,
LastRefillAt: tokenSnapshot.LastRefillAt,
CurrentActive: concurrencySnapshot.CurrentActive,
CurrentHourCount: hourlySnapshot.CurrentCount,
CurrentHourStart: hourlySnapshot.HourStart,
Paused: IsPaused,
PauseReason: PauseReason,
QuotaTicket: null,
CreatedAt: now, // This should be preserved from original
UpdatedAt: now,
UpdatedBy: updatedBy);
}
}
}
/// <summary>
/// Result of a rate limit acquisition attempt.
/// </summary>
public sealed record RateLimitResult(
bool IsAllowed,
RateLimitDenialReason? DenialReason,
string? DenialMessage,
TimeSpan? RetryAfter)
{
/// <summary>
/// Creates an allowed result.
/// </summary>
public static RateLimitResult Allowed() => new(true, null, null, null);
/// <summary>
/// Creates a denied result.
/// </summary>
public static RateLimitResult Denied(
RateLimitDenialReason reason,
string? message = null,
TimeSpan? retryAfter = null) =>
new(false, reason, message, retryAfter);
}
/// <summary>
/// Reasons for rate limit denial.
/// </summary>
public enum RateLimitDenialReason
{
/// <summary>Limiter is paused by operator.</summary>
Paused,
/// <summary>In backpressure backoff period.</summary>
Backpressure,
/// <summary>Hourly request limit exceeded.</summary>
HourlyLimitExceeded,
/// <summary>Concurrency limit exceeded.</summary>
ConcurrencyLimitExceeded,
/// <summary>Token bucket exhausted.</summary>
TokensExhausted
}
/// <summary>
/// Snapshot of adaptive rate limiter state.
/// </summary>
public sealed record AdaptiveRateLimiterSnapshot(
string TenantId,
string? JobType,
bool IsPaused,
string? PauseReason,
TokenBucketSnapshot TokenBucket,
ConcurrencySnapshot Concurrency,
BackpressureSnapshot Backpressure,
HourlyCounterSnapshot HourlyCounter);
/// <summary>
/// Tracks requests per hour with automatic reset.
/// </summary>
public sealed class HourlyCounter
{
private readonly object _lock = new();
private int _currentCount;
private DateTimeOffset _hourStart;
/// <summary>
/// Maximum allowed requests per hour.
/// </summary>
public int MaxPerHour { get; }
/// <summary>
/// Creates a new hourly counter.
/// </summary>
public HourlyCounter(int maxPerHour, int currentCount = 0, DateTimeOffset? hourStart = null)
{
if (maxPerHour <= 0)
throw new ArgumentOutOfRangeException(nameof(maxPerHour), "Max per hour must be positive.");
MaxPerHour = maxPerHour;
_currentCount = currentCount;
_hourStart = hourStart ?? TruncateToHour(DateTimeOffset.UtcNow);
}
/// <summary>
/// Attempts to increment the counter.
/// </summary>
/// <param name="now">Current time.</param>
/// <returns>True if increment was allowed, false if limit reached.</returns>
public bool TryIncrement(DateTimeOffset now)
{
lock (_lock)
{
MaybeResetHour(now);
if (_currentCount < MaxPerHour)
{
_currentCount++;
return true;
}
return false;
}
}
/// <summary>
/// Decrements the counter (for rollback).
/// </summary>
public void Decrement()
{
lock (_lock)
{
if (_currentCount > 0)
_currentCount--;
}
}
/// <summary>
/// Gets a snapshot of the counter state.
/// </summary>
public HourlyCounterSnapshot GetSnapshot(DateTimeOffset now)
{
lock (_lock)
{
MaybeResetHour(now);
var nextHour = _hourStart.AddHours(1);
var timeUntilReset = nextHour - now;
return new HourlyCounterSnapshot(
MaxPerHour: MaxPerHour,
CurrentCount: _currentCount,
HourStart: _hourStart,
TimeUntilReset: timeUntilReset > TimeSpan.Zero ? timeUntilReset : TimeSpan.Zero);
}
}
private void MaybeResetHour(DateTimeOffset now)
{
var currentHour = TruncateToHour(now);
if (currentHour > _hourStart)
{
_hourStart = currentHour;
_currentCount = 0;
}
}
private static DateTimeOffset TruncateToHour(DateTimeOffset dt) =>
new(dt.Year, dt.Month, dt.Day, dt.Hour, 0, 0, dt.Offset);
}
/// <summary>
/// Snapshot of hourly counter state.
/// </summary>
public sealed record HourlyCounterSnapshot(
int MaxPerHour,
int CurrentCount,
DateTimeOffset HourStart,
TimeSpan TimeUntilReset)
{
/// <summary>
/// Remaining requests in current hour.
/// </summary>
public int Remaining => Math.Max(0, MaxPerHour - CurrentCount);
/// <summary>
/// Whether the hourly limit has been reached.
/// </summary>
public bool IsExhausted => CurrentCount >= MaxPerHour;
}

View File

@@ -0,0 +1,273 @@
namespace StellaOps.Orchestrator.Core.RateLimiting;
/// <summary>
/// Handles backpressure from upstream services (429, 503, etc.).
/// Implements exponential backoff with jitter for retry timing.
/// </summary>
public sealed class BackpressureHandler
{
private readonly object _lock = new();
private int _consecutiveFailures;
private DateTimeOffset? _backoffUntil;
private DateTimeOffset _lastFailureAt;
private string? _lastFailureReason;
/// <summary>
/// Base delay for backoff calculation.
/// </summary>
public TimeSpan BaseDelay { get; }
/// <summary>
/// Maximum delay cap.
/// </summary>
public TimeSpan MaxDelay { get; }
/// <summary>
/// Number of failures before triggering full backoff.
/// </summary>
public int FailureThreshold { get; }
/// <summary>
/// Maximum random jitter to add (0.0 to 1.0 fraction of delay).
/// </summary>
public double JitterFactor { get; }
/// <summary>
/// Whether currently in backoff state.
/// </summary>
public bool IsInBackoff
{
get
{
lock (_lock)
{
return _backoffUntil.HasValue && DateTimeOffset.UtcNow < _backoffUntil.Value;
}
}
}
/// <summary>
/// Number of consecutive failures.
/// </summary>
public int ConsecutiveFailures
{
get
{
lock (_lock)
{
return _consecutiveFailures;
}
}
}
/// <summary>
/// Time until backoff expires (or TimeSpan.Zero if not in backoff).
/// </summary>
public TimeSpan TimeUntilReady
{
get
{
lock (_lock)
{
if (!_backoffUntil.HasValue)
return TimeSpan.Zero;
var remaining = _backoffUntil.Value - DateTimeOffset.UtcNow;
return remaining > TimeSpan.Zero ? remaining : TimeSpan.Zero;
}
}
}
/// <summary>
/// Creates a new backpressure handler.
/// </summary>
/// <param name="baseDelay">Base delay for exponential backoff.</param>
/// <param name="maxDelay">Maximum delay cap.</param>
/// <param name="failureThreshold">Failures before entering backoff.</param>
/// <param name="jitterFactor">Random jitter factor (0.0 to 1.0).</param>
public BackpressureHandler(
TimeSpan? baseDelay = null,
TimeSpan? maxDelay = null,
int failureThreshold = 1,
double jitterFactor = 0.2)
{
BaseDelay = baseDelay ?? TimeSpan.FromSeconds(1);
MaxDelay = maxDelay ?? TimeSpan.FromMinutes(5);
FailureThreshold = failureThreshold > 0 ? failureThreshold : 1;
JitterFactor = Math.Clamp(jitterFactor, 0.0, 1.0);
if (BaseDelay <= TimeSpan.Zero)
throw new ArgumentOutOfRangeException(nameof(baseDelay), "Base delay must be positive.");
if (MaxDelay < BaseDelay)
throw new ArgumentOutOfRangeException(nameof(maxDelay), "Max delay must be >= base delay.");
}
/// <summary>
/// Records an upstream failure and potentially triggers backoff.
/// </summary>
/// <param name="statusCode">HTTP status code from upstream.</param>
/// <param name="retryAfter">Optional Retry-After header value.</param>
/// <param name="now">Current time.</param>
/// <returns>Backoff result with recommended delay.</returns>
public BackpressureResult RecordFailure(int statusCode, TimeSpan? retryAfter = null, DateTimeOffset? now = null)
{
var timestamp = now ?? DateTimeOffset.UtcNow;
lock (_lock)
{
_consecutiveFailures++;
_lastFailureAt = timestamp;
_lastFailureReason = GetFailureReason(statusCode);
// Use Retry-After if provided and reasonable
if (retryAfter.HasValue && retryAfter.Value > TimeSpan.Zero && retryAfter.Value <= MaxDelay)
{
_backoffUntil = timestamp + retryAfter.Value;
return new BackpressureResult(
ShouldBackoff: true,
BackoffDuration: retryAfter.Value,
BackoffUntil: _backoffUntil.Value,
ConsecutiveFailures: _consecutiveFailures,
Reason: _lastFailureReason,
StatusCode: statusCode);
}
// Calculate exponential backoff with jitter
var delay = CalculateBackoffDelay(_consecutiveFailures, timestamp);
_backoffUntil = timestamp + delay;
return new BackpressureResult(
ShouldBackoff: _consecutiveFailures >= FailureThreshold,
BackoffDuration: delay,
BackoffUntil: _backoffUntil.Value,
ConsecutiveFailures: _consecutiveFailures,
Reason: _lastFailureReason,
StatusCode: statusCode);
}
}
/// <summary>
/// Records a successful request, resetting failure count.
/// </summary>
public void RecordSuccess()
{
lock (_lock)
{
_consecutiveFailures = 0;
_backoffUntil = null;
_lastFailureReason = null;
}
}
/// <summary>
/// Checks if a request should be allowed based on backoff state.
/// </summary>
/// <param name="now">Current time.</param>
/// <returns>True if request should proceed, false if in backoff.</returns>
public bool ShouldAllow(DateTimeOffset? now = null)
{
var timestamp = now ?? DateTimeOffset.UtcNow;
lock (_lock)
{
if (!_backoffUntil.HasValue)
return true;
if (timestamp >= _backoffUntil.Value)
{
// Backoff expired
return true;
}
return false;
}
}
/// <summary>
/// Resets the handler to initial state.
/// </summary>
public void Reset()
{
lock (_lock)
{
_consecutiveFailures = 0;
_backoffUntil = null;
_lastFailureReason = null;
}
}
/// <summary>
/// Gets a snapshot of the current backpressure state.
/// </summary>
/// <param name="now">Current time.</param>
/// <returns>Snapshot of backpressure state.</returns>
public BackpressureSnapshot GetSnapshot(DateTimeOffset? now = null)
{
var timestamp = now ?? DateTimeOffset.UtcNow;
lock (_lock)
{
var isInBackoff = _backoffUntil.HasValue && timestamp < _backoffUntil.Value;
var timeRemaining = isInBackoff ? _backoffUntil!.Value - timestamp : TimeSpan.Zero;
return new BackpressureSnapshot(
IsInBackoff: isInBackoff,
ConsecutiveFailures: _consecutiveFailures,
BackoffUntil: _backoffUntil,
TimeRemaining: timeRemaining > TimeSpan.Zero ? timeRemaining : TimeSpan.Zero,
LastFailureAt: _lastFailureAt,
LastFailureReason: _lastFailureReason);
}
}
private TimeSpan CalculateBackoffDelay(int failures, DateTimeOffset now)
{
// Exponential backoff: baseDelay * 2^(failures-1)
var exponent = Math.Min(failures - 1, 10); // Cap exponent to prevent overflow
var delayMs = BaseDelay.TotalMilliseconds * Math.Pow(2, exponent);
// Add jitter
if (JitterFactor > 0)
{
var jitter = delayMs * JitterFactor * Random.Shared.NextDouble();
delayMs += jitter;
}
// Cap at max delay
var delay = TimeSpan.FromMilliseconds(Math.Min(delayMs, MaxDelay.TotalMilliseconds));
return delay;
}
private static string GetFailureReason(int statusCode) => statusCode switch
{
429 => "upstream_rate_limited",
503 => "upstream_unavailable",
502 => "upstream_bad_gateway",
504 => "upstream_timeout",
>= 500 and < 600 => "upstream_server_error",
>= 400 and < 500 => "upstream_client_error",
_ => "upstream_error"
};
}
/// <summary>
/// Result of recording a failure.
/// </summary>
public sealed record BackpressureResult(
bool ShouldBackoff,
TimeSpan BackoffDuration,
DateTimeOffset BackoffUntil,
int ConsecutiveFailures,
string Reason,
int StatusCode);
/// <summary>
/// Snapshot of backpressure handler state.
/// </summary>
public sealed record BackpressureSnapshot(
bool IsInBackoff,
int ConsecutiveFailures,
DateTimeOffset? BackoffUntil,
TimeSpan TimeRemaining,
DateTimeOffset LastFailureAt,
string? LastFailureReason);

View File

@@ -0,0 +1,226 @@
namespace StellaOps.Orchestrator.Core.RateLimiting;
/// <summary>
/// Concurrency limiter that tracks active jobs and enforces maximum concurrent execution.
/// </summary>
public sealed class ConcurrencyLimiter
{
private readonly object _lock = new();
private int _currentActive;
/// <summary>
/// Maximum allowed concurrent active jobs.
/// </summary>
public int MaxActive { get; }
/// <summary>
/// Current count of active jobs.
/// </summary>
public int CurrentActive
{
get
{
lock (_lock)
{
return _currentActive;
}
}
}
/// <summary>
/// Number of available slots.
/// </summary>
public int AvailableSlots
{
get
{
lock (_lock)
{
return Math.Max(0, MaxActive - _currentActive);
}
}
}
/// <summary>
/// Creates a new concurrency limiter.
/// </summary>
/// <param name="maxActive">Maximum concurrent jobs allowed.</param>
/// <param name="currentActive">Starting count of active jobs.</param>
public ConcurrencyLimiter(int maxActive, int currentActive = 0)
{
if (maxActive <= 0)
throw new ArgumentOutOfRangeException(nameof(maxActive), "Max active must be positive.");
if (currentActive < 0)
throw new ArgumentOutOfRangeException(nameof(currentActive), "Current active cannot be negative.");
MaxActive = maxActive;
_currentActive = currentActive;
}
/// <summary>
/// Attempts to acquire a slot for a new active job.
/// </summary>
/// <returns>True if slot was acquired, false if at capacity.</returns>
public bool TryAcquire()
{
lock (_lock)
{
if (_currentActive < MaxActive)
{
_currentActive++;
return true;
}
return false;
}
}
/// <summary>
/// Attempts to acquire multiple slots.
/// </summary>
/// <param name="count">Number of slots to acquire.</param>
/// <returns>True if all slots were acquired, false otherwise (no partial acquisition).</returns>
public bool TryAcquire(int count)
{
if (count <= 0)
throw new ArgumentOutOfRangeException(nameof(count), "Count must be positive.");
lock (_lock)
{
if (_currentActive + count <= MaxActive)
{
_currentActive += count;
return true;
}
return false;
}
}
/// <summary>
/// Releases a slot when a job completes.
/// </summary>
/// <returns>True if slot was released, false if already at zero.</returns>
public bool Release()
{
lock (_lock)
{
if (_currentActive > 0)
{
_currentActive--;
return true;
}
return false;
}
}
/// <summary>
/// Releases multiple slots.
/// </summary>
/// <param name="count">Number of slots to release.</param>
/// <returns>Number of slots actually released.</returns>
public int Release(int count)
{
if (count <= 0)
throw new ArgumentOutOfRangeException(nameof(count), "Count must be positive.");
lock (_lock)
{
var released = Math.Min(count, _currentActive);
_currentActive -= released;
return released;
}
}
/// <summary>
/// Checks if a slot is available without acquiring it.
/// </summary>
/// <returns>True if at least one slot is available.</returns>
public bool HasCapacity()
{
lock (_lock)
{
return _currentActive < MaxActive;
}
}
/// <summary>
/// Checks if multiple slots are available without acquiring them.
/// </summary>
/// <param name="count">Number of slots to check for.</param>
/// <returns>True if requested slots are available.</returns>
public bool HasCapacity(int count)
{
lock (_lock)
{
return _currentActive + count <= MaxActive;
}
}
/// <summary>
/// Resets the limiter to zero active jobs.
/// </summary>
/// <returns>Number of slots that were released.</returns>
public int Reset()
{
lock (_lock)
{
var released = _currentActive;
_currentActive = 0;
return released;
}
}
/// <summary>
/// Sets the current active count directly (for recovery/sync scenarios).
/// </summary>
/// <param name="count">New active count.</param>
public void SetActive(int count)
{
if (count < 0)
throw new ArgumentOutOfRangeException(nameof(count), "Count cannot be negative.");
lock (_lock)
{
_currentActive = count;
}
}
/// <summary>
/// Gets a snapshot of the current limiter state.
/// </summary>
/// <returns>Snapshot of limiter state.</returns>
public ConcurrencySnapshot GetSnapshot()
{
lock (_lock)
{
return new ConcurrencySnapshot(MaxActive, _currentActive);
}
}
}
/// <summary>
/// Immutable snapshot of concurrency limiter state.
/// </summary>
public sealed record ConcurrencySnapshot(
int MaxActive,
int CurrentActive)
{
/// <summary>
/// Number of available slots.
/// </summary>
public int AvailableSlots => Math.Max(0, MaxActive - CurrentActive);
/// <summary>
/// Utilization percentage (0.0 to 1.0).
/// </summary>
public double Utilization => (double)CurrentActive / MaxActive;
/// <summary>
/// Whether the limiter is at capacity.
/// </summary>
public bool IsAtCapacity => CurrentActive >= MaxActive;
/// <summary>
/// Whether there are no active jobs.
/// </summary>
public bool IsIdle => CurrentActive == 0;
}

View File

@@ -0,0 +1,210 @@
namespace StellaOps.Orchestrator.Core.RateLimiting;
/// <summary>
/// Token bucket rate limiter implementation.
/// Tokens refill at a constant rate up to a burst capacity.
/// </summary>
public sealed class TokenBucket
{
private readonly object _lock = new();
private double _currentTokens;
private DateTimeOffset _lastRefillAt;
/// <summary>
/// Maximum tokens the bucket can hold (burst capacity).
/// </summary>
public int BurstCapacity { get; }
/// <summary>
/// Rate at which tokens are added (tokens per second).
/// </summary>
public double RefillRate { get; }
/// <summary>
/// Current number of available tokens.
/// </summary>
public double CurrentTokens
{
get
{
lock (_lock)
{
return _currentTokens;
}
}
}
/// <summary>
/// Last time the bucket was refilled.
/// </summary>
public DateTimeOffset LastRefillAt
{
get
{
lock (_lock)
{
return _lastRefillAt;
}
}
}
/// <summary>
/// Creates a new token bucket.
/// </summary>
/// <param name="burstCapacity">Maximum tokens the bucket can hold.</param>
/// <param name="refillRate">Tokens per second to add.</param>
/// <param name="initialTokens">Starting number of tokens (defaults to burst capacity).</param>
/// <param name="lastRefillAt">Starting time for refill calculation.</param>
public TokenBucket(
int burstCapacity,
double refillRate,
double? initialTokens = null,
DateTimeOffset? lastRefillAt = null)
{
if (burstCapacity <= 0)
throw new ArgumentOutOfRangeException(nameof(burstCapacity), "Burst capacity must be positive.");
if (refillRate <= 0)
throw new ArgumentOutOfRangeException(nameof(refillRate), "Refill rate must be positive.");
BurstCapacity = burstCapacity;
RefillRate = refillRate;
_currentTokens = Math.Min(initialTokens ?? burstCapacity, burstCapacity);
_lastRefillAt = lastRefillAt ?? DateTimeOffset.UtcNow;
}
/// <summary>
/// Attempts to consume a token from the bucket.
/// </summary>
/// <param name="now">Current time for refill calculation.</param>
/// <param name="tokensRequired">Number of tokens to consume (default 1).</param>
/// <returns>True if tokens were consumed, false if insufficient tokens.</returns>
public bool TryConsume(DateTimeOffset now, int tokensRequired = 1)
{
if (tokensRequired <= 0)
throw new ArgumentOutOfRangeException(nameof(tokensRequired), "Tokens required must be positive.");
lock (_lock)
{
Refill(now);
if (_currentTokens >= tokensRequired)
{
_currentTokens -= tokensRequired;
return true;
}
return false;
}
}
/// <summary>
/// Checks if the bucket has enough tokens without consuming them.
/// </summary>
/// <param name="now">Current time for refill calculation.</param>
/// <param name="tokensRequired">Number of tokens to check for.</param>
/// <returns>True if sufficient tokens are available.</returns>
public bool HasTokens(DateTimeOffset now, int tokensRequired = 1)
{
lock (_lock)
{
Refill(now);
return _currentTokens >= tokensRequired;
}
}
/// <summary>
/// Gets estimated time until the specified number of tokens will be available.
/// </summary>
/// <param name="now">Current time for calculation.</param>
/// <param name="tokensRequired">Number of tokens needed.</param>
/// <returns>Time until tokens available, or TimeSpan.Zero if already available.</returns>
public TimeSpan EstimatedWaitTime(DateTimeOffset now, int tokensRequired = 1)
{
lock (_lock)
{
Refill(now);
if (_currentTokens >= tokensRequired)
return TimeSpan.Zero;
var tokensNeeded = tokensRequired - _currentTokens;
var secondsToWait = tokensNeeded / RefillRate;
return TimeSpan.FromSeconds(secondsToWait);
}
}
/// <summary>
/// Refills tokens based on elapsed time.
/// </summary>
/// <param name="now">Current time.</param>
public void Refill(DateTimeOffset now)
{
lock (_lock)
{
if (now <= _lastRefillAt)
return;
var elapsed = (now - _lastRefillAt).TotalSeconds;
var tokensToAdd = elapsed * RefillRate;
_currentTokens = Math.Min(_currentTokens + tokensToAdd, BurstCapacity);
_lastRefillAt = now;
}
}
/// <summary>
/// Resets the bucket to full capacity.
/// </summary>
/// <param name="now">Current time.</param>
public void Reset(DateTimeOffset now)
{
lock (_lock)
{
_currentTokens = BurstCapacity;
_lastRefillAt = now;
}
}
/// <summary>
/// Creates a snapshot of the current bucket state.
/// </summary>
/// <param name="now">Current time for refill calculation.</param>
/// <returns>Snapshot of bucket state.</returns>
public TokenBucketSnapshot GetSnapshot(DateTimeOffset now)
{
lock (_lock)
{
Refill(now);
return new TokenBucketSnapshot(
BurstCapacity,
RefillRate,
_currentTokens,
_lastRefillAt);
}
}
}
/// <summary>
/// Immutable snapshot of token bucket state.
/// </summary>
public sealed record TokenBucketSnapshot(
int BurstCapacity,
double RefillRate,
double CurrentTokens,
DateTimeOffset LastRefillAt)
{
/// <summary>
/// Percentage of bucket that is full (0.0 to 1.0).
/// </summary>
public double FillPercent => CurrentTokens / BurstCapacity;
/// <summary>
/// Whether the bucket is empty.
/// </summary>
public bool IsEmpty => CurrentTokens < 1;
/// <summary>
/// Whether the bucket is full.
/// </summary>
public bool IsFull => CurrentTokens >= BurstCapacity;
}

View File

@@ -0,0 +1,399 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.Scheduling;
/// <summary>
/// Plans and manages job DAG (Directed Acyclic Graph) execution.
/// Handles dependency resolution, topological sorting, and critical path analysis.
/// </summary>
public sealed class DagPlanner
{
/// <summary>
/// Validates that the given edges form a valid DAG (no cycles).
/// </summary>
/// <param name="edges">DAG edges to validate.</param>
/// <returns>Validation result with any detected cycles.</returns>
public static DagValidationResult ValidateDag(IEnumerable<DagEdge> edges)
{
ArgumentNullException.ThrowIfNull(edges);
var edgeList = edges.ToList();
if (edgeList.Count == 0)
{
return DagValidationResult.Valid();
}
// Build adjacency list
var adjacency = new Dictionary<Guid, List<Guid>>();
var allNodes = new HashSet<Guid>();
foreach (var edge in edgeList)
{
allNodes.Add(edge.ParentJobId);
allNodes.Add(edge.ChildJobId);
if (!adjacency.TryGetValue(edge.ParentJobId, out var children))
{
children = [];
adjacency[edge.ParentJobId] = children;
}
children.Add(edge.ChildJobId);
}
// Detect cycles using DFS with coloring
var white = new HashSet<Guid>(allNodes); // Unvisited
var gray = new HashSet<Guid>(); // In progress
var cycleNodes = new List<Guid>();
foreach (var node in allNodes)
{
if (white.Contains(node))
{
if (HasCycleDfs(node, adjacency, white, gray, cycleNodes))
{
return DagValidationResult.CycleDetected(cycleNodes);
}
}
}
return DagValidationResult.Valid();
}
private static bool HasCycleDfs(
Guid node,
Dictionary<Guid, List<Guid>> adjacency,
HashSet<Guid> white,
HashSet<Guid> gray,
List<Guid> cycleNodes)
{
white.Remove(node);
gray.Add(node);
if (adjacency.TryGetValue(node, out var children))
{
foreach (var child in children)
{
if (gray.Contains(child))
{
// Back edge found - cycle detected
cycleNodes.Add(child);
cycleNodes.Add(node);
return true;
}
if (white.Contains(child) && HasCycleDfs(child, adjacency, white, gray, cycleNodes))
{
cycleNodes.Add(node);
return true;
}
}
}
gray.Remove(node);
return false;
}
/// <summary>
/// Performs topological sort on jobs based on their dependencies.
/// </summary>
/// <param name="jobIds">Job IDs to sort.</param>
/// <param name="edges">Dependency edges.</param>
/// <returns>Jobs in topologically sorted order (parents before children).</returns>
public static IReadOnlyList<Guid> TopologicalSort(IEnumerable<Guid> jobIds, IEnumerable<DagEdge> edges)
{
ArgumentNullException.ThrowIfNull(jobIds);
ArgumentNullException.ThrowIfNull(edges);
var jobs = jobIds.ToHashSet();
var edgeList = edges.ToList();
// Build in-degree map and adjacency list
var inDegree = jobs.ToDictionary(j => j, _ => 0);
var adjacency = new Dictionary<Guid, List<Guid>>();
foreach (var edge in edgeList)
{
if (!jobs.Contains(edge.ParentJobId) || !jobs.Contains(edge.ChildJobId))
{
continue; // Skip edges for jobs not in our set
}
inDegree[edge.ChildJobId]++;
if (!adjacency.TryGetValue(edge.ParentJobId, out var children))
{
children = [];
adjacency[edge.ParentJobId] = children;
}
children.Add(edge.ChildJobId);
}
// Kahn's algorithm
var queue = new Queue<Guid>(inDegree.Where(kv => kv.Value == 0).Select(kv => kv.Key));
var result = new List<Guid>(jobs.Count);
while (queue.Count > 0)
{
var current = queue.Dequeue();
result.Add(current);
if (adjacency.TryGetValue(current, out var children))
{
foreach (var child in children)
{
inDegree[child]--;
if (inDegree[child] == 0)
{
queue.Enqueue(child);
}
}
}
}
if (result.Count != jobs.Count)
{
throw new InvalidOperationException("Cycle detected in job DAG - topological sort failed.");
}
return result;
}
/// <summary>
/// Gets all jobs that have no unmet dependencies (ready to schedule).
/// </summary>
/// <param name="jobs">All jobs in the DAG.</param>
/// <param name="edges">Dependency edges.</param>
/// <returns>Jobs with all dependencies satisfied or no dependencies.</returns>
public static IReadOnlyList<Job> GetReadyJobs(IEnumerable<Job> jobs, IEnumerable<DagEdge> edges)
{
ArgumentNullException.ThrowIfNull(jobs);
ArgumentNullException.ThrowIfNull(edges);
var jobList = jobs.ToList();
var edgeList = edges.ToList();
// Build map of job ID to job and set of succeeded job IDs
var jobMap = jobList.ToDictionary(j => j.JobId);
var succeededJobs = jobList
.Where(j => JobStateMachine.IsSuccess(j.Status))
.Select(j => j.JobId)
.ToHashSet();
// Build map of job ID to parent dependencies
var dependencies = new Dictionary<Guid, List<DagEdge>>();
foreach (var edge in edgeList)
{
if (!dependencies.TryGetValue(edge.ChildJobId, out var deps))
{
deps = [];
dependencies[edge.ChildJobId] = deps;
}
deps.Add(edge);
}
var ready = new List<Job>();
foreach (var job in jobList)
{
// Skip jobs that aren't pending
if (!JobStateMachine.IsPending(job.Status))
{
continue;
}
// Check if all dependencies are satisfied
if (!dependencies.TryGetValue(job.JobId, out var deps))
{
// No dependencies - ready to go
ready.Add(job);
continue;
}
var allSatisfied = deps.All(edge => IsDependencySatisfied(edge, jobMap, succeededJobs));
if (allSatisfied)
{
ready.Add(job);
}
}
return ready;
}
private static bool IsDependencySatisfied(DagEdge edge, Dictionary<Guid, Job> jobMap, HashSet<Guid> succeededJobs)
{
if (!jobMap.TryGetValue(edge.ParentJobId, out var parentJob))
{
// Parent job doesn't exist - treat as satisfied (orphan edge)
return true;
}
return edge.EdgeType switch
{
DagEdgeTypes.Success => succeededJobs.Contains(edge.ParentJobId),
DagEdgeTypes.Always => JobStateMachine.IsTerminal(parentJob.Status),
DagEdgeTypes.Failure => parentJob.Status == JobStatus.Failed,
_ => false
};
}
/// <summary>
/// Calculates the critical path through the DAG based on estimated durations.
/// </summary>
/// <param name="jobs">Jobs with estimated durations.</param>
/// <param name="edges">Dependency edges.</param>
/// <param name="getDuration">Function to get estimated duration for a job.</param>
/// <returns>Critical path information.</returns>
public static CriticalPathResult CalculateCriticalPath(
IEnumerable<Job> jobs,
IEnumerable<DagEdge> edges,
Func<Job, TimeSpan> getDuration)
{
ArgumentNullException.ThrowIfNull(jobs);
ArgumentNullException.ThrowIfNull(edges);
ArgumentNullException.ThrowIfNull(getDuration);
var jobList = jobs.ToList();
var edgeList = edges.ToList();
if (jobList.Count == 0)
{
return new CriticalPathResult([], TimeSpan.Zero);
}
var jobMap = jobList.ToDictionary(j => j.JobId);
var sortedIds = TopologicalSort(jobList.Select(j => j.JobId), edgeList);
// Build reverse adjacency (child -> parents)
var parents = new Dictionary<Guid, List<Guid>>();
foreach (var edge in edgeList)
{
if (!parents.TryGetValue(edge.ChildJobId, out var parentList))
{
parentList = [];
parents[edge.ChildJobId] = parentList;
}
parentList.Add(edge.ParentJobId);
}
// Forward pass: calculate earliest start times
var earliestStart = new Dictionary<Guid, TimeSpan>();
var earliestFinish = new Dictionary<Guid, TimeSpan>();
foreach (var jobId in sortedIds)
{
var job = jobMap[jobId];
var duration = getDuration(job);
var maxParentFinish = TimeSpan.Zero;
if (parents.TryGetValue(jobId, out var parentIds))
{
foreach (var parentId in parentIds)
{
if (earliestFinish.TryGetValue(parentId, out var pf) && pf > maxParentFinish)
{
maxParentFinish = pf;
}
}
}
earliestStart[jobId] = maxParentFinish;
earliestFinish[jobId] = maxParentFinish + duration;
}
// Find total duration and identify critical path
var totalDuration = earliestFinish.Values.DefaultIfEmpty(TimeSpan.Zero).Max();
// Backward pass: identify critical path (jobs where slack = 0)
var criticalPath = new List<Guid>();
var latestFinish = new Dictionary<Guid, TimeSpan>();
foreach (var jobId in sortedIds.Reverse())
{
var job = jobMap[jobId];
var duration = getDuration(job);
// Find minimum latest start of children
var minChildStart = totalDuration;
var adjacency = edgeList.Where(e => e.ParentJobId == jobId).Select(e => e.ChildJobId);
foreach (var childId in adjacency)
{
if (latestFinish.TryGetValue(childId, out var lf))
{
var childLatestStart = lf - getDuration(jobMap[childId]);
if (childLatestStart < minChildStart)
{
minChildStart = childLatestStart;
}
}
}
latestFinish[jobId] = minChildStart;
// Check if on critical path (slack = 0)
var slack = minChildStart - earliestFinish[jobId];
if (slack <= TimeSpan.Zero)
{
criticalPath.Add(jobId);
}
}
criticalPath.Reverse();
return new CriticalPathResult(criticalPath, totalDuration);
}
/// <summary>
/// Gets jobs that are blocked by a specific failed job.
/// </summary>
/// <param name="failedJobId">The failed job ID.</param>
/// <param name="edges">Dependency edges.</param>
/// <returns>All job IDs that are transitively blocked.</returns>
public static IReadOnlySet<Guid> GetBlockedJobs(Guid failedJobId, IEnumerable<DagEdge> edges)
{
ArgumentNullException.ThrowIfNull(edges);
var edgeList = edges.ToList();
var blocked = new HashSet<Guid>();
var queue = new Queue<Guid>();
// Find direct children with "success" dependency
foreach (var edge in edgeList.Where(e => e.ParentJobId == failedJobId && e.EdgeType == DagEdgeTypes.Success))
{
queue.Enqueue(edge.ChildJobId);
}
// BFS to find all transitively blocked jobs
while (queue.Count > 0)
{
var current = queue.Dequeue();
if (!blocked.Add(current))
{
continue;
}
foreach (var edge in edgeList.Where(e => e.ParentJobId == current))
{
queue.Enqueue(edge.ChildJobId);
}
}
return blocked;
}
}
/// <summary>
/// Result of DAG validation.
/// </summary>
public sealed record DagValidationResult(
bool IsValid,
IReadOnlyList<Guid> CycleNodes)
{
public static DagValidationResult Valid() => new(true, []);
public static DagValidationResult CycleDetected(IReadOnlyList<Guid> cycleNodes) => new(false, cycleNodes);
}
/// <summary>
/// Result of critical path calculation.
/// </summary>
public sealed record CriticalPathResult(
IReadOnlyList<Guid> CriticalPathJobIds,
TimeSpan TotalDuration);

View File

@@ -0,0 +1,223 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.Scheduling;
/// <summary>
/// Coordinates job scheduling decisions including quota checks,
/// dependency resolution, and status transitions.
/// </summary>
public interface IJobScheduler
{
/// <summary>
/// Evaluates whether a job can be scheduled.
/// </summary>
ScheduleDecision EvaluateScheduling(Job job, SchedulingContext context);
/// <summary>
/// Evaluates the outcome of a job completion and determines next steps.
/// </summary>
CompletionDecision EvaluateCompletion(Job job, JobStatus outcome, string? reason, CompletionContext context);
/// <summary>
/// Evaluates which pending jobs are ready to be scheduled.
/// </summary>
IReadOnlyList<Job> GetSchedulableJobs(IEnumerable<Job> pendingJobs, SchedulingContext context);
}
/// <summary>
/// Default implementation of job scheduler.
/// </summary>
public sealed class JobScheduler : IJobScheduler
{
/// <summary>
/// Evaluates whether a job can transition from Pending to Scheduled.
/// </summary>
public ScheduleDecision EvaluateScheduling(Job job, SchedulingContext context)
{
ArgumentNullException.ThrowIfNull(job);
ArgumentNullException.ThrowIfNull(context);
// Check current status
if (job.Status != JobStatus.Pending)
{
return ScheduleDecision.Reject($"Job is not pending (current: {job.Status})");
}
// Check if job has a not-before time that hasn't passed
if (job.NotBefore.HasValue && job.NotBefore.Value > context.Now)
{
return ScheduleDecision.Defer(job.NotBefore.Value, "Backoff period not elapsed");
}
// Check dependencies
if (!context.AreDependenciesSatisfied)
{
return ScheduleDecision.Defer(null, "Dependencies not satisfied");
}
// Check quota
if (!context.HasQuotaAvailable)
{
return ScheduleDecision.Defer(context.QuotaAvailableAt, "Quota exhausted");
}
// Check if source/job type is throttled
if (context.IsThrottled)
{
return ScheduleDecision.Defer(context.ThrottleExpiresAt, context.ThrottleReason ?? "Throttled");
}
return ScheduleDecision.Schedule();
}
/// <summary>
/// Evaluates the outcome of a job completion.
/// </summary>
public CompletionDecision EvaluateCompletion(Job job, JobStatus outcome, string? reason, CompletionContext context)
{
ArgumentNullException.ThrowIfNull(job);
ArgumentNullException.ThrowIfNull(context);
// Validate transition
if (!JobStateMachine.IsValidTransition(job.Status, outcome))
{
throw new InvalidJobTransitionException(job.Status, outcome);
}
// Success - job is done
if (outcome == JobStatus.Succeeded)
{
return CompletionDecision.Complete(outcome, reason);
}
// Canceled - no retry
if (outcome == JobStatus.Canceled)
{
return CompletionDecision.Complete(outcome, reason ?? "Canceled");
}
// Failed or TimedOut - check retry policy
if (outcome == JobStatus.Failed || outcome == JobStatus.TimedOut)
{
var retryDecision = RetryEvaluator.Evaluate(job.Attempt, context.RetryPolicy, context.Now);
if (retryDecision.ShouldRetry)
{
return CompletionDecision.Retry(
retryDecision.NextAttempt,
retryDecision.NotBefore!.Value,
$"{outcome}: {reason ?? "Unknown error"}. Retry scheduled.");
}
return CompletionDecision.Complete(
JobStatus.Failed,
$"{outcome}: {reason ?? "Unknown error"}. {retryDecision.Reason}");
}
return CompletionDecision.Complete(outcome, reason);
}
/// <summary>
/// Gets all pending jobs that are ready to be scheduled.
/// </summary>
public IReadOnlyList<Job> GetSchedulableJobs(IEnumerable<Job> pendingJobs, SchedulingContext context)
{
ArgumentNullException.ThrowIfNull(pendingJobs);
ArgumentNullException.ThrowIfNull(context);
var schedulable = new List<Job>();
foreach (var job in pendingJobs)
{
if (job.Status != JobStatus.Pending)
{
continue;
}
// Skip if in backoff period
if (job.NotBefore.HasValue && job.NotBefore.Value > context.Now)
{
continue;
}
// Dependencies are checked via context.ReadyJobIds
if (context.ReadyJobIds != null && !context.ReadyJobIds.Contains(job.JobId))
{
continue;
}
schedulable.Add(job);
}
// Sort by priority (descending) then created time (ascending)
return schedulable
.OrderByDescending(j => j.Priority)
.ThenBy(j => j.CreatedAt)
.ToList();
}
}
/// <summary>
/// Context for scheduling decisions.
/// </summary>
public sealed record SchedulingContext(
DateTimeOffset Now,
bool AreDependenciesSatisfied,
bool HasQuotaAvailable,
DateTimeOffset? QuotaAvailableAt,
bool IsThrottled,
string? ThrottleReason,
DateTimeOffset? ThrottleExpiresAt,
IReadOnlySet<Guid>? ReadyJobIds = null)
{
/// <summary>
/// Creates a context where scheduling is allowed.
/// </summary>
public static SchedulingContext AllowScheduling(DateTimeOffset now) => new(
now,
AreDependenciesSatisfied: true,
HasQuotaAvailable: true,
QuotaAvailableAt: null,
IsThrottled: false,
ThrottleReason: null,
ThrottleExpiresAt: null);
}
/// <summary>
/// Context for completion decisions.
/// </summary>
public sealed record CompletionContext(
DateTimeOffset Now,
RetryPolicy RetryPolicy);
/// <summary>
/// Decision about whether to schedule a job.
/// </summary>
public sealed record ScheduleDecision(
bool CanSchedule,
bool ShouldDefer,
DateTimeOffset? DeferUntil,
string? Reason)
{
public static ScheduleDecision Schedule() => new(true, false, null, null);
public static ScheduleDecision Defer(DateTimeOffset? until, string reason) => new(false, true, until, reason);
public static ScheduleDecision Reject(string reason) => new(false, false, null, reason);
}
/// <summary>
/// Decision about job completion outcome.
/// </summary>
public sealed record CompletionDecision(
bool IsComplete,
bool ShouldRetry,
JobStatus FinalStatus,
int? NextAttempt,
DateTimeOffset? RetryNotBefore,
string? Reason)
{
public static CompletionDecision Complete(JobStatus status, string? reason)
=> new(true, false, status, null, null, reason);
public static CompletionDecision Retry(int nextAttempt, DateTimeOffset notBefore, string reason)
=> new(false, true, JobStatus.Pending, nextAttempt, notBefore, reason);
}

View File

@@ -0,0 +1,141 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.Scheduling;
/// <summary>
/// Manages job status transitions and validates state machine rules.
///
/// State machine:
/// Pending → Scheduled (quota cleared, dependencies satisfied)
/// Scheduled → Leased (worker acquired lease)
/// Leased → Succeeded | Failed | Canceled | TimedOut
/// Failed → Pending (retry) | Failed (exhausted)
/// TimedOut → Pending (retry) | Failed (exhausted)
/// </summary>
public static class JobStateMachine
{
/// <summary>
/// Validates whether a status transition is allowed.
/// </summary>
/// <param name="from">Current status.</param>
/// <param name="to">Target status.</param>
/// <returns>True if transition is valid.</returns>
public static bool IsValidTransition(JobStatus from, JobStatus to)
{
return (from, to) switch
{
// From Pending
(JobStatus.Pending, JobStatus.Scheduled) => true,
(JobStatus.Pending, JobStatus.Canceled) => true,
// From Scheduled
(JobStatus.Scheduled, JobStatus.Leased) => true,
(JobStatus.Scheduled, JobStatus.Canceled) => true,
(JobStatus.Scheduled, JobStatus.Pending) => true, // Back to pending (quota exceeded, dependency failed)
// From Leased
(JobStatus.Leased, JobStatus.Succeeded) => true,
(JobStatus.Leased, JobStatus.Failed) => true,
(JobStatus.Leased, JobStatus.Canceled) => true,
(JobStatus.Leased, JobStatus.TimedOut) => true,
// Retry transitions (Failed/TimedOut back to Pending)
(JobStatus.Failed, JobStatus.Pending) => true,
(JobStatus.TimedOut, JobStatus.Pending) => true,
// Same status (idempotent)
_ when from == to => true,
// All other transitions are invalid
_ => false
};
}
/// <summary>
/// Determines if a job status is terminal (no further transitions except replay).
/// </summary>
public static bool IsTerminal(JobStatus status) => status switch
{
JobStatus.Succeeded => true,
JobStatus.Failed => true,
JobStatus.Canceled => true,
JobStatus.TimedOut => true,
_ => false
};
/// <summary>
/// Determines if a job status represents a successful completion.
/// </summary>
public static bool IsSuccess(JobStatus status) => status == JobStatus.Succeeded;
/// <summary>
/// Determines if a job status represents a failure that may be retried.
/// </summary>
public static bool IsRetryable(JobStatus status) => status switch
{
JobStatus.Failed => true,
JobStatus.TimedOut => true,
_ => false
};
/// <summary>
/// Determines if a job is in a state where it can be leased by a worker.
/// </summary>
public static bool IsLeasable(JobStatus status) => status == JobStatus.Scheduled;
/// <summary>
/// Determines if a job is waiting to be scheduled.
/// </summary>
public static bool IsPending(JobStatus status) => status == JobStatus.Pending;
/// <summary>
/// Determines if a job is currently being executed.
/// </summary>
public static bool IsActive(JobStatus status) => status == JobStatus.Leased;
/// <summary>
/// Gets all valid transitions from a given status.
/// </summary>
public static IReadOnlyList<JobStatus> GetValidTransitions(JobStatus from)
{
return from switch
{
JobStatus.Pending => [JobStatus.Scheduled, JobStatus.Canceled],
JobStatus.Scheduled => [JobStatus.Leased, JobStatus.Canceled, JobStatus.Pending],
JobStatus.Leased => [JobStatus.Succeeded, JobStatus.Failed, JobStatus.Canceled, JobStatus.TimedOut],
JobStatus.Failed => [JobStatus.Pending], // Retry only
JobStatus.TimedOut => [JobStatus.Pending], // Retry only
JobStatus.Succeeded => [],
JobStatus.Canceled => [],
_ => []
};
}
/// <summary>
/// Validates a transition and throws if invalid.
/// </summary>
/// <exception cref="InvalidJobTransitionException">Thrown when transition is not allowed.</exception>
public static void ValidateTransition(JobStatus from, JobStatus to)
{
if (!IsValidTransition(from, to))
{
throw new InvalidJobTransitionException(from, to);
}
}
}
/// <summary>
/// Exception thrown when an invalid job status transition is attempted.
/// </summary>
public sealed class InvalidJobTransitionException : Exception
{
public JobStatus FromStatus { get; }
public JobStatus ToStatus { get; }
public InvalidJobTransitionException(JobStatus from, JobStatus to)
: base($"Invalid job status transition from '{from}' to '{to}'.")
{
FromStatus = from;
ToStatus = to;
}
}

View File

@@ -0,0 +1,173 @@
namespace StellaOps.Orchestrator.Core.Scheduling;
/// <summary>
/// Defines retry behavior for failed jobs.
/// </summary>
public sealed record RetryPolicy(
/// <summary>Maximum number of retry attempts (including initial attempt).</summary>
int MaxAttempts,
/// <summary>Initial backoff delay in seconds.</summary>
double InitialBackoffSeconds,
/// <summary>Maximum backoff delay in seconds.</summary>
double MaxBackoffSeconds,
/// <summary>Backoff multiplier for exponential growth.</summary>
double BackoffMultiplier,
/// <summary>Jitter factor (0.0-1.0) to add randomness to backoff.</summary>
double JitterFactor)
{
/// <summary>
/// Default retry policy: 3 attempts, exponential backoff from 5s to 300s.
/// </summary>
public static RetryPolicy Default { get; } = new(
MaxAttempts: 3,
InitialBackoffSeconds: 5.0,
MaxBackoffSeconds: 300.0,
BackoffMultiplier: 2.0,
JitterFactor: 0.1);
/// <summary>
/// Aggressive retry policy for critical jobs: 5 attempts, quick retries.
/// </summary>
public static RetryPolicy Aggressive { get; } = new(
MaxAttempts: 5,
InitialBackoffSeconds: 1.0,
MaxBackoffSeconds: 60.0,
BackoffMultiplier: 1.5,
JitterFactor: 0.2);
/// <summary>
/// Conservative retry policy: 2 attempts, longer delays.
/// </summary>
public static RetryPolicy Conservative { get; } = new(
MaxAttempts: 2,
InitialBackoffSeconds: 30.0,
MaxBackoffSeconds: 600.0,
BackoffMultiplier: 3.0,
JitterFactor: 0.1);
/// <summary>
/// No retry policy: single attempt only.
/// </summary>
public static RetryPolicy NoRetry { get; } = new(
MaxAttempts: 1,
InitialBackoffSeconds: 0,
MaxBackoffSeconds: 0,
BackoffMultiplier: 1.0,
JitterFactor: 0);
/// <summary>
/// Determines if a job should be retried based on current attempt.
/// </summary>
/// <param name="currentAttempt">Current attempt number (1-based).</param>
/// <returns>True if retry is allowed.</returns>
public bool ShouldRetry(int currentAttempt) => currentAttempt < MaxAttempts;
/// <summary>
/// Calculates the next retry time based on current attempt.
/// </summary>
/// <param name="currentAttempt">Current attempt number (1-based).</param>
/// <param name="now">Current time.</param>
/// <returns>Earliest time for next retry attempt.</returns>
public DateTimeOffset CalculateNextRetryTime(int currentAttempt, DateTimeOffset now)
{
if (!ShouldRetry(currentAttempt))
{
throw new InvalidOperationException($"No retry allowed after attempt {currentAttempt} (max: {MaxAttempts}).");
}
var backoffSeconds = CalculateBackoffSeconds(currentAttempt);
return now.AddSeconds(backoffSeconds);
}
/// <summary>
/// Calculates backoff duration in seconds for a given attempt.
/// </summary>
/// <param name="attempt">Attempt number (1-based).</param>
/// <returns>Backoff duration in seconds.</returns>
public double CalculateBackoffSeconds(int attempt)
{
if (attempt < 1)
{
throw new ArgumentOutOfRangeException(nameof(attempt), "Attempt must be >= 1.");
}
// Exponential backoff: initial * multiplier^(attempt-1)
var exponentialBackoff = InitialBackoffSeconds * Math.Pow(BackoffMultiplier, attempt - 1);
// Cap at maximum
var cappedBackoff = Math.Min(exponentialBackoff, MaxBackoffSeconds);
// Add jitter to prevent thundering herd
var jitter = cappedBackoff * JitterFactor * (Random.Shared.NextDouble() * 2 - 1);
var finalBackoff = Math.Max(0, cappedBackoff + jitter);
return finalBackoff;
}
}
/// <summary>
/// Result of evaluating retry policy for a failed job.
/// </summary>
public sealed record RetryDecision(
/// <summary>Whether the job should be retried.</summary>
bool ShouldRetry,
/// <summary>Next attempt number (if retrying).</summary>
int NextAttempt,
/// <summary>Earliest time for next attempt (if retrying).</summary>
DateTimeOffset? NotBefore,
/// <summary>Reason for the decision.</summary>
string Reason)
{
/// <summary>
/// Creates a retry decision.
/// </summary>
public static RetryDecision Retry(int nextAttempt, DateTimeOffset notBefore)
=> new(true, nextAttempt, notBefore, $"Scheduling retry attempt {nextAttempt}");
/// <summary>
/// Creates a no-retry decision (exhausted).
/// </summary>
public static RetryDecision Exhausted(int maxAttempts)
=> new(false, 0, null, $"Max attempts ({maxAttempts}) exhausted");
/// <summary>
/// Creates a no-retry decision (not retryable status).
/// </summary>
public static RetryDecision NotRetryable(string reason)
=> new(false, 0, null, reason);
}
/// <summary>
/// Service for evaluating retry decisions.
/// </summary>
public static class RetryEvaluator
{
/// <summary>
/// Evaluates whether a job should be retried and calculates timing.
/// </summary>
/// <param name="currentAttempt">Current attempt number.</param>
/// <param name="policy">Retry policy to apply.</param>
/// <param name="now">Current time.</param>
/// <returns>Retry decision.</returns>
public static RetryDecision Evaluate(int currentAttempt, RetryPolicy policy, DateTimeOffset now)
{
ArgumentNullException.ThrowIfNull(policy);
if (!policy.ShouldRetry(currentAttempt))
{
return RetryDecision.Exhausted(policy.MaxAttempts);
}
var nextAttempt = currentAttempt + 1;
var notBefore = policy.CalculateNextRetryTime(currentAttempt, now);
return RetryDecision.Retry(nextAttempt, notBefore);
}
}

View File

@@ -0,0 +1,341 @@
using Microsoft.Extensions.Logging;
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Core.SloManagement;
/// <summary>
/// Options for burn rate computation.
/// </summary>
public sealed record BurnRateOptions
{
/// <summary>Short window multiplier for multi-window burn rate.</summary>
public double ShortWindowMultiplier { get; init; } = 14.4; // 5% budget in 1 hour
/// <summary>Long window multiplier for multi-window burn rate.</summary>
public double LongWindowMultiplier { get; init; } = 6.0; // 20% budget in 6 hours
/// <summary>Minimum events required for meaningful computation.</summary>
public int MinimumEvents { get; init; } = 10;
}
/// <summary>
/// Event counts for SLO computation.
/// </summary>
public sealed record SloEventCounts(
/// <summary>Total events in the window.</summary>
long TotalEvents,
/// <summary>Good events (successful) in the window.</summary>
long GoodEvents,
/// <summary>Bad events (failed) in the window.</summary>
long BadEvents,
/// <summary>Start of the evaluation window.</summary>
DateTimeOffset WindowStart,
/// <summary>End of the evaluation window.</summary>
DateTimeOffset WindowEnd);
/// <summary>
/// Interface for retrieving SLO event counts.
/// </summary>
public interface ISloEventSource
{
/// <summary>Gets event counts for an availability SLO.</summary>
Task<SloEventCounts> GetAvailabilityCountsAsync(
string tenantId,
string? jobType,
Guid? sourceId,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
CancellationToken cancellationToken);
/// <summary>Gets event counts for a latency SLO.</summary>
Task<SloEventCounts> GetLatencyCountsAsync(
string tenantId,
string? jobType,
Guid? sourceId,
double percentile,
double targetSeconds,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
CancellationToken cancellationToken);
/// <summary>Gets event counts for a throughput SLO.</summary>
Task<SloEventCounts> GetThroughputCountsAsync(
string tenantId,
string? jobType,
Guid? sourceId,
int minimumRequired,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
CancellationToken cancellationToken);
}
/// <summary>
/// Engine for computing SLO burn rates and error budget consumption.
/// </summary>
public interface IBurnRateEngine
{
/// <summary>Computes the current state of an SLO.</summary>
Task<SloState> ComputeStateAsync(
Slo slo,
CancellationToken cancellationToken);
/// <summary>Computes states for all enabled SLOs for a tenant.</summary>
Task<IReadOnlyList<SloState>> ComputeAllStatesAsync(
string tenantId,
CancellationToken cancellationToken);
/// <summary>Evaluates alert thresholds and creates alerts if needed.</summary>
Task<IReadOnlyList<SloAlert>> EvaluateAlertsAsync(
Slo slo,
SloState state,
CancellationToken cancellationToken);
}
/// <summary>
/// Default implementation of burn rate computation engine.
/// </summary>
public sealed class BurnRateEngine : IBurnRateEngine
{
private readonly ISloRepository _sloRepository;
private readonly ISloEventSource _eventSource;
private readonly IAlertThresholdRepository _thresholdRepository;
private readonly ISloAlertRepository _alertRepository;
private readonly TimeProvider _timeProvider;
private readonly BurnRateOptions _options;
private readonly ILogger<BurnRateEngine> _logger;
public BurnRateEngine(
ISloRepository sloRepository,
ISloEventSource eventSource,
IAlertThresholdRepository thresholdRepository,
ISloAlertRepository alertRepository,
TimeProvider timeProvider,
BurnRateOptions options,
ILogger<BurnRateEngine> logger)
{
_sloRepository = sloRepository ?? throw new ArgumentNullException(nameof(sloRepository));
_eventSource = eventSource ?? throw new ArgumentNullException(nameof(eventSource));
_thresholdRepository = thresholdRepository ?? throw new ArgumentNullException(nameof(thresholdRepository));
_alertRepository = alertRepository ?? throw new ArgumentNullException(nameof(alertRepository));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<SloState> ComputeStateAsync(
Slo slo,
CancellationToken cancellationToken)
{
var now = _timeProvider.GetUtcNow();
var windowDuration = slo.GetWindowDuration();
var windowStart = now - windowDuration;
// Get event counts based on SLO type
var counts = slo.Type switch
{
SloType.Availability => await _eventSource.GetAvailabilityCountsAsync(
slo.TenantId, slo.JobType, slo.SourceId, windowStart, now, cancellationToken).ConfigureAwait(false),
SloType.Latency => await _eventSource.GetLatencyCountsAsync(
slo.TenantId, slo.JobType, slo.SourceId,
slo.LatencyPercentile ?? 0.95,
slo.LatencyTargetSeconds ?? 1.0,
windowStart, now, cancellationToken).ConfigureAwait(false),
SloType.Throughput => await _eventSource.GetThroughputCountsAsync(
slo.TenantId, slo.JobType, slo.SourceId,
slo.ThroughputMinimum ?? 1,
windowStart, now, cancellationToken).ConfigureAwait(false),
_ => throw new InvalidOperationException($"Unknown SLO type: {slo.Type}")
};
// Handle no data case
if (counts.TotalEvents < _options.MinimumEvents)
{
_logger.LogDebug(
"SLO {SloId} has insufficient data ({Events} events, minimum {Min})",
slo.SloId, counts.TotalEvents, _options.MinimumEvents);
return SloState.NoData(slo.SloId, slo.TenantId, now, slo.Window);
}
// Compute SLI (Service Level Indicator)
var sli = (double)counts.GoodEvents / counts.TotalEvents;
// Compute error budget consumption
var errorBudget = slo.ErrorBudget;
var errorRate = 1.0 - sli;
var budgetConsumed = errorBudget > 0 ? errorRate / errorBudget : (errorRate > 0 ? 1.0 : 0.0);
budgetConsumed = Math.Clamp(budgetConsumed, 0, 2.0); // Allow showing overconsumption up to 200%
var budgetRemaining = Math.Max(0, 1.0 - budgetConsumed);
// Compute burn rate
// Burn rate = (actual error rate) / (allowed error rate for sustainable consumption)
// Sustainable consumption = error budget / window duration * elapsed time
var elapsedRatio = (now - counts.WindowStart).TotalSeconds / windowDuration.TotalSeconds;
var sustainableErrorRate = errorBudget * elapsedRatio;
var burnRate = sustainableErrorRate > 0 ? errorRate / sustainableErrorRate : 0;
// Compute time to exhaustion
TimeSpan? timeToExhaustion = null;
if (burnRate > 0 && budgetRemaining > 0)
{
var remainingBudget = errorBudget * budgetRemaining;
var currentErrorRatePerSecond = errorRate / (now - counts.WindowStart).TotalSeconds;
if (currentErrorRatePerSecond > 0)
{
var secondsToExhaustion = remainingBudget / currentErrorRatePerSecond;
timeToExhaustion = TimeSpan.FromSeconds(Math.Min(secondsToExhaustion, windowDuration.TotalSeconds));
}
}
// Determine if SLO is met
var isMet = sli >= slo.Target;
// Determine alert severity
var alertSeverity = DetermineAlertSeverity(budgetConsumed, burnRate);
var state = new SloState(
SloId: slo.SloId,
TenantId: slo.TenantId,
CurrentSli: sli,
TotalEvents: counts.TotalEvents,
GoodEvents: counts.GoodEvents,
BadEvents: counts.BadEvents,
BudgetConsumed: budgetConsumed,
BudgetRemaining: budgetRemaining,
BurnRate: burnRate,
TimeToExhaustion: timeToExhaustion,
IsMet: isMet,
AlertSeverity: alertSeverity,
ComputedAt: now,
WindowStart: counts.WindowStart,
WindowEnd: counts.WindowEnd);
_logger.LogDebug(
"SLO {SloId} state computed: SLI={Sli:P2}, BudgetConsumed={BudgetConsumed:P1}, BurnRate={BurnRate:F2}x",
slo.SloId, state.CurrentSli, state.BudgetConsumed, state.BurnRate);
return state;
}
public async Task<IReadOnlyList<SloState>> ComputeAllStatesAsync(
string tenantId,
CancellationToken cancellationToken)
{
var slos = await _sloRepository.ListAsync(tenantId, enabledOnly: true, cancellationToken: cancellationToken)
.ConfigureAwait(false);
var states = new List<SloState>(slos.Count);
foreach (var slo in slos)
{
try
{
var state = await ComputeStateAsync(slo, cancellationToken).ConfigureAwait(false);
states.Add(state);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to compute state for SLO {SloId}", slo.SloId);
// Add no-data state for failed computation
states.Add(SloState.NoData(slo.SloId, slo.TenantId, _timeProvider.GetUtcNow(), slo.Window));
}
}
return states;
}
public async Task<IReadOnlyList<SloAlert>> EvaluateAlertsAsync(
Slo slo,
SloState state,
CancellationToken cancellationToken)
{
var now = _timeProvider.GetUtcNow();
var thresholds = await _thresholdRepository.ListBySloAsync(slo.SloId, cancellationToken)
.ConfigureAwait(false);
var alerts = new List<SloAlert>();
foreach (var threshold in thresholds)
{
if (!threshold.ShouldTrigger(state, now))
{
continue;
}
var alert = SloAlert.Create(slo, state, threshold);
await _alertRepository.CreateAsync(alert, cancellationToken).ConfigureAwait(false);
var updatedThreshold = threshold.RecordTrigger(now);
await _thresholdRepository.UpdateAsync(updatedThreshold, cancellationToken).ConfigureAwait(false);
alerts.Add(alert);
_logger.LogWarning(
"SLO alert triggered: SloId={SloId}, Severity={Severity}, Message={Message}",
slo.SloId, alert.Severity, alert.Message);
}
return alerts;
}
private static AlertSeverity DetermineAlertSeverity(double budgetConsumed, double burnRate)
{
// Emergency: Budget exhausted or burn rate extremely high
if (budgetConsumed >= 1.0 || burnRate >= 10.0)
return AlertSeverity.Emergency;
// Critical: Budget nearly exhausted or burn rate very high
if (budgetConsumed >= 0.8 || burnRate >= 5.0)
return AlertSeverity.Critical;
// Warning: Budget significantly consumed or elevated burn rate
if (budgetConsumed >= 0.5 || burnRate >= 2.0)
return AlertSeverity.Warning;
// Info: Everything is normal
return AlertSeverity.Info;
}
}
/// <summary>
/// Repository interface for SLO persistence.
/// </summary>
public interface ISloRepository
{
Task<Slo?> GetByIdAsync(string tenantId, Guid sloId, CancellationToken cancellationToken);
Task<IReadOnlyList<Slo>> ListAsync(string tenantId, bool enabledOnly, string? jobType = null, CancellationToken cancellationToken = default);
Task CreateAsync(Slo slo, CancellationToken cancellationToken);
Task UpdateAsync(Slo slo, CancellationToken cancellationToken);
Task<bool> DeleteAsync(string tenantId, Guid sloId, CancellationToken cancellationToken);
}
/// <summary>
/// Repository interface for alert threshold persistence.
/// </summary>
public interface IAlertThresholdRepository
{
Task<AlertBudgetThreshold?> GetByIdAsync(string tenantId, Guid thresholdId, CancellationToken cancellationToken);
Task<IReadOnlyList<AlertBudgetThreshold>> ListBySloAsync(Guid sloId, CancellationToken cancellationToken);
Task CreateAsync(AlertBudgetThreshold threshold, CancellationToken cancellationToken);
Task UpdateAsync(AlertBudgetThreshold threshold, CancellationToken cancellationToken);
Task<bool> DeleteAsync(string tenantId, Guid thresholdId, CancellationToken cancellationToken);
}
/// <summary>
/// Repository interface for SLO alert persistence.
/// </summary>
public interface ISloAlertRepository
{
Task<SloAlert?> GetByIdAsync(string tenantId, Guid alertId, CancellationToken cancellationToken);
Task<IReadOnlyList<SloAlert>> ListAsync(string tenantId, Guid? sloId, bool? acknowledged, bool? resolved, int limit, int offset, CancellationToken cancellationToken);
Task CreateAsync(SloAlert alert, CancellationToken cancellationToken);
Task UpdateAsync(SloAlert alert, CancellationToken cancellationToken);
Task<int> GetActiveAlertCountAsync(string tenantId, CancellationToken cancellationToken);
}

View File

@@ -1,18 +1,20 @@
<?xml version="1.0" ?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0-rc.2.25502.107" />
</ItemGroup>
</Project>

View File

@@ -1,6 +0,0 @@
namespace StellaOps.Orchestrator.Infrastructure;
public class Class1
{
}

View File

@@ -0,0 +1,45 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Ledger;
/// <summary>
/// Service for exporting ledger data in various formats.
/// </summary>
public interface ILedgerExporter
{
/// <summary>
/// Exports ledger entries to a file.
/// </summary>
/// <param name="export">The export request.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The completed export with output details.</returns>
Task<LedgerExport> ExportAsync(
LedgerExport export,
CancellationToken cancellationToken = default);
/// <summary>
/// Generates a signed manifest for a ledger entry.
/// </summary>
/// <param name="entry">The ledger entry.</param>
/// <param name="artifacts">The artifacts from the run.</param>
/// <param name="buildInfo">Optional build information.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The generated manifest.</returns>
Task<SignedManifest> GenerateManifestAsync(
RunLedgerEntry entry,
IReadOnlyList<Artifact> artifacts,
string? buildInfo = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Generates a signed manifest for an export.
/// </summary>
/// <param name="export">The completed export.</param>
/// <param name="entries">The entries included in the export.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The generated manifest.</returns>
Task<SignedManifest> GenerateExportManifestAsync(
LedgerExport export,
IReadOnlyList<RunLedgerEntry> entries,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,309 @@
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Ledger;
/// <summary>
/// Service for exporting ledger data in various formats.
/// </summary>
public sealed class LedgerExporter : ILedgerExporter
{
private readonly ILedgerRepository _ledgerRepository;
private readonly ILedgerExportRepository _exportRepository;
private readonly ILogger<LedgerExporter> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
private static readonly JsonSerializerOptions NdjsonOptions = new()
{
WriteIndented = false,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
public LedgerExporter(
ILedgerRepository ledgerRepository,
ILedgerExportRepository exportRepository,
ILogger<LedgerExporter> logger)
{
_ledgerRepository = ledgerRepository;
_exportRepository = exportRepository;
_logger = logger;
}
/// <inheritdoc />
public async Task<LedgerExport> ExportAsync(
LedgerExport export,
CancellationToken cancellationToken = default)
{
var startTime = DateTimeOffset.UtcNow;
try
{
_logger.LogInformation(
"Starting ledger export {ExportId} for tenant {TenantId} in format {Format}",
export.ExportId, export.TenantId, export.Format);
// Mark export as started
export = export.Start();
export = await _exportRepository.UpdateAsync(export, cancellationToken);
// Fetch entries based on filters
var entries = await _ledgerRepository.ListAsync(
export.TenantId,
export.RunTypeFilter,
export.SourceIdFilter,
finalStatus: null,
export.StartTime,
export.EndTime,
limit: int.MaxValue,
offset: 0,
cancellationToken);
_logger.LogInformation(
"Found {EntryCount} ledger entries for export {ExportId}",
entries.Count, export.ExportId);
// Generate output based on format
var (content, digest) = await GenerateOutputAsync(entries, export.Format, cancellationToken);
// Generate output path (in production, this would write to storage)
var outputUri = GenerateOutputUri(export);
var sizeBytes = Encoding.UTF8.GetByteCount(content);
// Complete the export
export = export.Complete(outputUri, digest, sizeBytes, entries.Count);
export = await _exportRepository.UpdateAsync(export, cancellationToken);
var duration = DateTimeOffset.UtcNow - startTime;
OrchestratorMetrics.LedgerExportCompleted(export.TenantId, export.Format);
OrchestratorMetrics.RecordLedgerExportDuration(export.TenantId, export.Format, duration.TotalSeconds);
OrchestratorMetrics.RecordLedgerExportSize(export.TenantId, export.Format, sizeBytes);
_logger.LogInformation(
"Completed ledger export {ExportId} with {EntryCount} entries, {SizeBytes} bytes",
export.ExportId, entries.Count, sizeBytes);
return export;
}
catch (Exception ex)
{
_logger.LogError(ex,
"Failed to export ledger {ExportId} for tenant {TenantId}",
export.ExportId, export.TenantId);
OrchestratorMetrics.LedgerExportFailed(export.TenantId, export.Format);
export = export.Fail(ex.Message);
export = await _exportRepository.UpdateAsync(export, cancellationToken);
throw;
}
}
/// <inheritdoc />
public Task<SignedManifest> GenerateManifestAsync(
RunLedgerEntry entry,
IReadOnlyList<Artifact> artifacts,
string? buildInfo = null,
CancellationToken cancellationToken = default)
{
_logger.LogInformation(
"Generating manifest for ledger entry {LedgerId}, run {RunId}",
entry.LedgerId, entry.RunId);
var manifest = SignedManifest.CreateFromLedgerEntry(entry, buildInfo);
OrchestratorMetrics.ManifestCreated(entry.TenantId, "run");
return Task.FromResult(manifest);
}
/// <inheritdoc />
public Task<SignedManifest> GenerateExportManifestAsync(
LedgerExport export,
IReadOnlyList<RunLedgerEntry> entries,
CancellationToken cancellationToken = default)
{
_logger.LogInformation(
"Generating manifest for export {ExportId} with {EntryCount} entries",
export.ExportId, entries.Count);
var manifest = SignedManifest.CreateFromExport(export, entries);
OrchestratorMetrics.ManifestCreated(export.TenantId, "export");
return Task.FromResult(manifest);
}
private async Task<(string Content, string Digest)> GenerateOutputAsync(
IReadOnlyList<RunLedgerEntry> entries,
string format,
CancellationToken cancellationToken)
{
var content = format.ToLowerInvariant() switch
{
"json" => GenerateJson(entries),
"ndjson" => GenerateNdjson(entries),
"csv" => GenerateCsv(entries),
_ => throw new ArgumentException($"Unsupported export format: {format}", nameof(format))
};
// Compute digest
var bytes = Encoding.UTF8.GetBytes(content);
var hash = await Task.Run(() => SHA256.HashData(bytes), cancellationToken);
var digest = $"sha256:{Convert.ToHexStringLower(hash)}";
return (content, digest);
}
private static string GenerateJson(IReadOnlyList<RunLedgerEntry> entries)
{
var exportData = new LedgerExportData
{
SchemaVersion = "1.0.0",
ExportedAt = DateTimeOffset.UtcNow,
EntryCount = entries.Count,
Entries = entries.Select(MapEntry).ToList()
};
return JsonSerializer.Serialize(exportData, JsonOptions);
}
private static string GenerateNdjson(IReadOnlyList<RunLedgerEntry> entries)
{
var sb = new StringBuilder();
foreach (var entry in entries)
{
var mapped = MapEntry(entry);
sb.AppendLine(JsonSerializer.Serialize(mapped, NdjsonOptions));
}
return sb.ToString();
}
private static string GenerateCsv(IReadOnlyList<RunLedgerEntry> entries)
{
var sb = new StringBuilder();
// Header
sb.AppendLine("LedgerId,TenantId,RunId,SourceId,RunType,FinalStatus,TotalJobs,SucceededJobs,FailedJobs,ExecutionDurationMs,InputDigest,OutputDigest,SequenceNumber,ContentHash,PreviousEntryHash,RunCreatedAt,RunCompletedAt,LedgerCreatedAt");
// Data rows
foreach (var entry in entries)
{
sb.AppendLine(string.Join(",",
EscapeCsv(entry.LedgerId.ToString()),
EscapeCsv(entry.TenantId),
EscapeCsv(entry.RunId.ToString()),
EscapeCsv(entry.SourceId.ToString()),
EscapeCsv(entry.RunType),
EscapeCsv(entry.FinalStatus.ToString()),
entry.TotalJobs,
entry.SucceededJobs,
entry.FailedJobs,
entry.ExecutionDuration.TotalMilliseconds.ToString(CultureInfo.InvariantCulture),
EscapeCsv(entry.InputDigest),
EscapeCsv(entry.OutputDigest),
entry.SequenceNumber,
EscapeCsv(entry.ContentHash),
EscapeCsv(entry.PreviousEntryHash ?? ""),
EscapeCsv(entry.RunCreatedAt.ToString("O")),
EscapeCsv(entry.RunCompletedAt.ToString("O")),
EscapeCsv(entry.LedgerCreatedAt.ToString("O"))));
}
return sb.ToString();
}
private static string EscapeCsv(string value)
{
if (string.IsNullOrEmpty(value))
return "";
if (value.Contains(',') || value.Contains('"') || value.Contains('\n'))
{
return $"\"{value.Replace("\"", "\"\"")}\"";
}
return value;
}
private static LedgerEntryDto MapEntry(RunLedgerEntry entry) => new()
{
LedgerId = entry.LedgerId,
TenantId = entry.TenantId,
RunId = entry.RunId,
SourceId = entry.SourceId,
RunType = entry.RunType,
FinalStatus = entry.FinalStatus.ToString(),
TotalJobs = entry.TotalJobs,
SucceededJobs = entry.SucceededJobs,
FailedJobs = entry.FailedJobs,
ExecutionDurationMs = entry.ExecutionDuration.TotalMilliseconds,
InputDigest = entry.InputDigest,
OutputDigest = entry.OutputDigest,
ArtifactManifest = entry.ArtifactManifest,
SequenceNumber = entry.SequenceNumber,
ContentHash = entry.ContentHash,
PreviousEntryHash = entry.PreviousEntryHash,
RunCreatedAt = entry.RunCreatedAt,
RunCompletedAt = entry.RunCompletedAt,
LedgerCreatedAt = entry.LedgerCreatedAt,
Metadata = entry.Metadata
};
private static string GenerateOutputUri(LedgerExport export)
{
var extension = export.Format.ToLowerInvariant() switch
{
"json" => "json",
"ndjson" => "ndjson",
"csv" => "csv",
_ => "dat"
};
return $"ledger://exports/{export.TenantId}/{export.ExportId}.{extension}";
}
private sealed class LedgerExportData
{
public required string SchemaVersion { get; init; }
public required DateTimeOffset ExportedAt { get; init; }
public required int EntryCount { get; init; }
public required List<LedgerEntryDto> Entries { get; init; }
}
private sealed class LedgerEntryDto
{
public required Guid LedgerId { get; init; }
public required string TenantId { get; init; }
public required Guid RunId { get; init; }
public required Guid SourceId { get; init; }
public required string RunType { get; init; }
public required string FinalStatus { get; init; }
public required int TotalJobs { get; init; }
public required int SucceededJobs { get; init; }
public required int FailedJobs { get; init; }
public required double ExecutionDurationMs { get; init; }
public required string InputDigest { get; init; }
public required string OutputDigest { get; init; }
public required string ArtifactManifest { get; init; }
public required long SequenceNumber { get; init; }
public required string ContentHash { get; init; }
public string? PreviousEntryHash { get; init; }
public required DateTimeOffset RunCreatedAt { get; init; }
public required DateTimeOffset RunCompletedAt { get; init; }
public required DateTimeOffset LedgerCreatedAt { get; init; }
public string? Metadata { get; init; }
}
}

View File

@@ -0,0 +1,660 @@
using System.Diagnostics.Metrics;
namespace StellaOps.Orchestrator.Infrastructure;
/// <summary>
/// Metrics instrumentation for the Orchestrator service.
/// </summary>
public static class OrchestratorMetrics
{
private static readonly Meter Meter = new("StellaOps.Orchestrator", "1.0.0");
private static readonly Counter<long> JobsEnqueued = Meter.CreateCounter<long>(
"orchestrator.jobs.enqueued",
description: "Total jobs enqueued");
private static readonly Counter<long> JobsScheduled = Meter.CreateCounter<long>(
"orchestrator.jobs.scheduled",
description: "Total jobs scheduled");
private static readonly Counter<long> JobsLeased = Meter.CreateCounter<long>(
"orchestrator.jobs.leased",
description: "Total jobs leased to workers");
private static readonly Counter<long> JobsCompleted = Meter.CreateCounter<long>(
"orchestrator.jobs.completed",
description: "Total jobs completed");
private static readonly Counter<long> JobsFailed = Meter.CreateCounter<long>(
"orchestrator.jobs.failed",
description: "Total jobs failed");
private static readonly Counter<long> JobsRetried = Meter.CreateCounter<long>(
"orchestrator.jobs.retried",
description: "Total job retry attempts");
private static readonly Counter<long> LeaseExtensions = Meter.CreateCounter<long>(
"orchestrator.lease.extensions",
description: "Total lease extensions");
private static readonly Counter<long> LeaseExpirations = Meter.CreateCounter<long>(
"orchestrator.lease.expirations",
description: "Total lease expirations");
private static readonly Histogram<double> JobDuration = Meter.CreateHistogram<double>(
"orchestrator.job.duration.seconds",
unit: "s",
description: "Job execution duration");
private static readonly Histogram<double> SchedulingLatency = Meter.CreateHistogram<double>(
"orchestrator.scheduling.latency.seconds",
unit: "s",
description: "Time from job creation to scheduling");
private static readonly UpDownCounter<long> ActiveConnections = Meter.CreateUpDownCounter<long>(
"orchestrator.db.connections.active",
description: "Active database connections");
private static readonly UpDownCounter<long> QueueDepth = Meter.CreateUpDownCounter<long>(
"orchestrator.queue.depth",
description: "Number of pending jobs in queue");
private static readonly Counter<long> ArtifactsCreated = Meter.CreateCounter<long>(
"orchestrator.artifacts.created",
description: "Total artifacts created");
private static readonly Counter<long> HeartbeatsReceived = Meter.CreateCounter<long>(
"orchestrator.heartbeats.received",
description: "Total worker heartbeats received");
private static readonly Counter<long> ProgressReports = Meter.CreateCounter<long>(
"orchestrator.progress.reports",
description: "Total job progress reports");
private static readonly Counter<long> SourcesCreated = Meter.CreateCounter<long>(
"orchestrator.sources.created",
description: "Total sources created");
private static readonly Counter<long> SourcesPaused = Meter.CreateCounter<long>(
"orchestrator.sources.paused",
description: "Total source pause operations");
private static readonly Counter<long> SourcesResumed = Meter.CreateCounter<long>(
"orchestrator.sources.resumed",
description: "Total source resume operations");
private static readonly Counter<long> RunsCreated = Meter.CreateCounter<long>(
"orchestrator.runs.created",
description: "Total runs created");
private static readonly Counter<long> RunsCompleted = Meter.CreateCounter<long>(
"orchestrator.runs.completed",
description: "Total runs completed");
private static readonly Counter<long> QuotasCreated = Meter.CreateCounter<long>(
"orchestrator.quotas.created",
description: "Total quotas created");
private static readonly Counter<long> QuotasPaused = Meter.CreateCounter<long>(
"orchestrator.quotas.paused",
description: "Total quota pause operations");
private static readonly Counter<long> QuotasResumed = Meter.CreateCounter<long>(
"orchestrator.quotas.resumed",
description: "Total quota resume operations");
private static readonly Counter<long> ThrottlesCreated = Meter.CreateCounter<long>(
"orchestrator.throttles.created",
description: "Total throttles created");
private static readonly Counter<long> ThrottlesDeactivated = Meter.CreateCounter<long>(
"orchestrator.throttles.deactivated",
description: "Total throttles deactivated");
private static readonly Counter<long> RateLimitDenials = Meter.CreateCounter<long>(
"orchestrator.ratelimit.denials",
description: "Total rate limit denials");
private static readonly Counter<long> BackpressureEvents = Meter.CreateCounter<long>(
"orchestrator.backpressure.events",
description: "Total backpressure events from upstream");
private static readonly Histogram<double> TokenBucketUtilization = Meter.CreateHistogram<double>(
"orchestrator.ratelimit.token_utilization",
unit: "ratio",
description: "Token bucket utilization ratio (0-1)");
private static readonly Histogram<double> ConcurrencyUtilization = Meter.CreateHistogram<double>(
"orchestrator.ratelimit.concurrency_utilization",
unit: "ratio",
description: "Concurrency limiter utilization ratio (0-1)");
public static void JobEnqueued(string tenantId, string jobType)
=> JobsEnqueued.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void JobScheduled(string tenantId, string jobType)
=> JobsScheduled.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void JobLeased(string tenantId, string jobType)
=> JobsLeased.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void JobCompleted(string tenantId, string jobType, string status)
=> JobsCompleted.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType),
new KeyValuePair<string, object?>("status", status));
public static void JobFailed(string tenantId, string jobType)
=> JobsFailed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void JobRetried(string tenantId, string jobType, int attempt)
=> JobsRetried.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType),
new KeyValuePair<string, object?>("attempt", attempt));
public static void LeaseExtended(string tenantId, string jobType)
=> LeaseExtensions.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void LeaseExpired(string tenantId, string jobType)
=> LeaseExpirations.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void RecordJobDuration(string tenantId, string jobType, double durationSeconds)
=> JobDuration.Record(durationSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void RecordSchedulingLatency(string tenantId, string jobType, double latencySeconds)
=> SchedulingLatency.Record(latencySeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void ConnectionOpened(string role)
=> ActiveConnections.Add(1, new KeyValuePair<string, object?>("role", role));
public static void ConnectionClosed(string role)
=> ActiveConnections.Add(-1, new KeyValuePair<string, object?>("role", role));
public static void QueueDepthChanged(string tenantId, string jobType, long delta)
=> QueueDepth.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void ArtifactCreated(string tenantId, string artifactType)
=> ArtifactsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("artifact_type", artifactType));
public static void HeartbeatReceived(string tenantId, string jobType)
=> HeartbeatsReceived.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void ProgressReported(string tenantId, string jobType)
=> ProgressReports.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType));
public static void SourceCreated(string tenantId, string sourceType)
=> SourcesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("source_type", sourceType));
public static void SourcePaused(string tenantId)
=> SourcesPaused.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void SourceResumed(string tenantId)
=> SourcesResumed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void RunCreated(string tenantId, string runType)
=> RunsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("run_type", runType));
public static void RunCompleted(string tenantId, string runType, string status)
=> RunsCompleted.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("run_type", runType),
new KeyValuePair<string, object?>("status", status));
public static void QuotaCreated(string tenantId, string? jobType)
=> QuotasCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
public static void QuotaPaused(string tenantId)
=> QuotasPaused.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void QuotaResumed(string tenantId)
=> QuotasResumed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void ThrottleCreated(string tenantId, string reason)
=> ThrottlesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("reason", reason));
public static void ThrottleDeactivated(string tenantId)
=> ThrottlesDeactivated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void RateLimitDenied(string tenantId, string? jobType, string reason)
=> RateLimitDenials.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"),
new KeyValuePair<string, object?>("reason", reason));
public static void BackpressureEvent(string tenantId, int statusCode, string reason)
=> BackpressureEvents.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("status_code", statusCode),
new KeyValuePair<string, object?>("reason", reason));
public static void RecordTokenBucketUtilization(string tenantId, string? jobType, double utilization)
=> TokenBucketUtilization.Record(utilization, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
public static void RecordConcurrencyUtilization(string tenantId, string? jobType, double utilization)
=> ConcurrencyUtilization.Record(utilization, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
// Watermark metrics
private static readonly Counter<long> WatermarksCreatedCounter = Meter.CreateCounter<long>(
"orchestrator.watermarks.created",
description: "Total watermarks created");
private static readonly Counter<long> WatermarksAdvanced = Meter.CreateCounter<long>(
"orchestrator.watermarks.advanced",
description: "Total watermark advancement operations");
private static readonly Histogram<double> WatermarkLag = Meter.CreateHistogram<double>(
"orchestrator.watermark.lag.seconds",
unit: "s",
description: "Watermark lag from current time");
// Backfill metrics
private static readonly Counter<long> BackfillsCreated = Meter.CreateCounter<long>(
"orchestrator.backfills.created",
description: "Total backfill requests created");
private static readonly Counter<long> BackfillStatusChanges = Meter.CreateCounter<long>(
"orchestrator.backfills.status_changes",
description: "Total backfill status changes");
private static readonly Counter<long> BackfillEventsProcessed = Meter.CreateCounter<long>(
"orchestrator.backfills.events_processed",
description: "Total events processed by backfills");
private static readonly Counter<long> BackfillEventsSkipped = Meter.CreateCounter<long>(
"orchestrator.backfills.events_skipped",
description: "Total events skipped by backfills (duplicates)");
private static readonly Histogram<double> BackfillDuration = Meter.CreateHistogram<double>(
"orchestrator.backfill.duration.seconds",
unit: "s",
description: "Backfill execution duration");
private static readonly Histogram<double> BackfillProgress = Meter.CreateHistogram<double>(
"orchestrator.backfill.progress",
unit: "percent",
description: "Backfill progress percentage");
// Duplicate suppression metrics
private static readonly Counter<long> ProcessedEventsMarkedCounter = Meter.CreateCounter<long>(
"orchestrator.processed_events.marked",
description: "Total processed events marked for duplicate suppression");
private static readonly Counter<long> ProcessedEventsCleanedUpCounter = Meter.CreateCounter<long>(
"orchestrator.processed_events.cleaned_up",
description: "Total expired processed events cleaned up");
private static readonly Counter<long> DuplicatesDetected = Meter.CreateCounter<long>(
"orchestrator.duplicates.detected",
description: "Total duplicate events detected");
public static void WatermarkCreated(string tenantId, string scopeKey)
=> WatermarksCreatedCounter.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void WatermarkAdvanced(string tenantId, string scopeKey)
=> WatermarksAdvanced.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void RecordWatermarkLag(string tenantId, string scopeKey, double lagSeconds)
=> WatermarkLag.Record(lagSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void BackfillCreated(string tenantId, string scopeKey)
=> BackfillsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void BackfillStatusChanged(string tenantId, string scopeKey, string status)
=> BackfillStatusChanges.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey),
new KeyValuePair<string, object?>("status", status));
public static void BackfillEventProcessed(string tenantId, string scopeKey, long count)
=> BackfillEventsProcessed.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void BackfillEventSkipped(string tenantId, string scopeKey, long count)
=> BackfillEventsSkipped.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void RecordBackfillDuration(string tenantId, string scopeKey, double durationSeconds)
=> BackfillDuration.Record(durationSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void RecordBackfillProgress(string tenantId, string scopeKey, double progressPercent)
=> BackfillProgress.Record(progressPercent, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void ProcessedEventsMarked(string tenantId, string scopeKey, long count)
=> ProcessedEventsMarkedCounter.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
public static void ProcessedEventsCleanedUp(string tenantId, long count)
=> ProcessedEventsCleanedUpCounter.Add(count, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void DuplicateDetected(string tenantId, string scopeKey)
=> DuplicatesDetected.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("scope_key", scopeKey));
// Dead-letter metrics
private static readonly Counter<long> DeadLetterEntriesCreated = Meter.CreateCounter<long>(
"orchestrator.deadletter.created",
description: "Total dead-letter entries created");
private static readonly Counter<long> DeadLetterStatusChanges = Meter.CreateCounter<long>(
"orchestrator.deadletter.status_changes",
description: "Total dead-letter status changes");
private static readonly Counter<long> DeadLetterReplayAttempts = Meter.CreateCounter<long>(
"orchestrator.deadletter.replay_attempts",
description: "Total dead-letter replay attempts");
private static readonly Counter<long> DeadLetterReplaySuccesses = Meter.CreateCounter<long>(
"orchestrator.deadletter.replay_successes",
description: "Total successful dead-letter replays");
private static readonly Counter<long> DeadLetterReplayFailures = Meter.CreateCounter<long>(
"orchestrator.deadletter.replay_failures",
description: "Total failed dead-letter replays");
private static readonly Counter<long> DeadLetterEntriesExpired = Meter.CreateCounter<long>(
"orchestrator.deadletter.expired",
description: "Total dead-letter entries marked as expired");
private static readonly Counter<long> DeadLetterEntriesPurged = Meter.CreateCounter<long>(
"orchestrator.deadletter.purged",
description: "Total dead-letter entries purged");
private static readonly Counter<long> DeadLetterNotificationsSent = Meter.CreateCounter<long>(
"orchestrator.deadletter.notifications_sent",
description: "Total dead-letter notifications sent");
private static readonly Counter<long> DeadLetterNotificationsFailed = Meter.CreateCounter<long>(
"orchestrator.deadletter.notifications_failed",
description: "Total failed dead-letter notifications");
private static readonly UpDownCounter<long> DeadLetterPendingCount = Meter.CreateUpDownCounter<long>(
"orchestrator.deadletter.pending",
description: "Current number of pending dead-letter entries");
public static void DeadLetterCreated(string tenantId, string jobType, string errorCode, string category)
=> DeadLetterEntriesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType),
new KeyValuePair<string, object?>("error_code", errorCode),
new KeyValuePair<string, object?>("category", category));
public static void DeadLetterStatusChanged(string tenantId, string jobType, string status)
=> DeadLetterStatusChanges.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("job_type", jobType),
new KeyValuePair<string, object?>("status", status));
public static void DeadLetterReplayAttempted(string tenantId, string triggeredBy)
=> DeadLetterReplayAttempts.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("triggered_by", triggeredBy));
public static void DeadLetterReplaySucceeded(string tenantId)
=> DeadLetterReplaySuccesses.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void DeadLetterReplayFailed(string tenantId)
=> DeadLetterReplayFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void DeadLetterExpired(int count)
=> DeadLetterEntriesExpired.Add(count);
public static void DeadLetterPurged(int count)
=> DeadLetterEntriesPurged.Add(count);
public static void DeadLetterNotificationSent(string tenantId, string channel, string eventType)
=> DeadLetterNotificationsSent.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("channel", channel),
new KeyValuePair<string, object?>("event_type", eventType));
public static void DeadLetterNotificationFailed(string tenantId, string channel, string eventType)
=> DeadLetterNotificationsFailed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("channel", channel),
new KeyValuePair<string, object?>("event_type", eventType));
public static void DeadLetterPendingChanged(string tenantId, long delta)
=> DeadLetterPendingCount.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId));
// SLO metrics
private static readonly Counter<long> SlosCreated = Meter.CreateCounter<long>(
"orchestrator.slos.created",
description: "Total SLOs created");
private static readonly Counter<long> SlosUpdated = Meter.CreateCounter<long>(
"orchestrator.slos.updated",
description: "Total SLO updates");
private static readonly Counter<long> SloAlertsTriggered = Meter.CreateCounter<long>(
"orchestrator.slo.alerts_triggered",
description: "Total SLO alerts triggered");
private static readonly Counter<long> SloAlertsAcknowledged = Meter.CreateCounter<long>(
"orchestrator.slo.alerts_acknowledged",
description: "Total SLO alerts acknowledged");
private static readonly Counter<long> SloAlertsResolved = Meter.CreateCounter<long>(
"orchestrator.slo.alerts_resolved",
description: "Total SLO alerts resolved");
private static readonly Histogram<double> SloBudgetConsumed = Meter.CreateHistogram<double>(
"orchestrator.slo.budget_consumed",
unit: "ratio",
description: "SLO error budget consumed (0-1)");
private static readonly Histogram<double> SloBurnRate = Meter.CreateHistogram<double>(
"orchestrator.slo.burn_rate",
unit: "ratio",
description: "SLO burn rate (1.0 = sustainable)");
private static readonly Histogram<double> SloCurrentSli = Meter.CreateHistogram<double>(
"orchestrator.slo.current_sli",
unit: "ratio",
description: "Current SLI value (0-1)");
private static readonly UpDownCounter<long> SloActiveAlerts = Meter.CreateUpDownCounter<long>(
"orchestrator.slo.active_alerts",
description: "Current number of active SLO alerts");
private static readonly Histogram<double> SloBudgetRemaining = Meter.CreateHistogram<double>(
"orchestrator.slo.budget_remaining",
unit: "ratio",
description: "SLO error budget remaining (0-1)");
private static readonly Histogram<double> SloTimeToExhaustion = Meter.CreateHistogram<double>(
"orchestrator.slo.time_to_exhaustion.seconds",
unit: "s",
description: "Estimated time until error budget exhaustion");
public static void SloCreated(string tenantId, string sloType, string? jobType)
=> SlosCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_type", sloType),
new KeyValuePair<string, object?>("job_type", jobType ?? "(all)"));
public static void SloUpdated(string tenantId, string sloName)
=> SlosUpdated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName));
public static void SloAlertTriggered(string tenantId, string sloName, string severity)
=> SloAlertsTriggered.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName),
new KeyValuePair<string, object?>("severity", severity));
public static void SloAlertAcknowledged(string tenantId, string sloName)
=> SloAlertsAcknowledged.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName));
public static void SloAlertResolved(string tenantId, string sloName)
=> SloAlertsResolved.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName));
public static void RecordSloBudgetConsumed(string tenantId, string sloName, string sloType, double consumed)
=> SloBudgetConsumed.Record(consumed, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName),
new KeyValuePair<string, object?>("slo_type", sloType));
public static void RecordSloBurnRate(string tenantId, string sloName, string sloType, double burnRate)
=> SloBurnRate.Record(burnRate, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName),
new KeyValuePair<string, object?>("slo_type", sloType));
public static void RecordSloCurrentSli(string tenantId, string sloName, string sloType, double sli)
=> SloCurrentSli.Record(sli, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName),
new KeyValuePair<string, object?>("slo_type", sloType));
public static void SloActiveAlertsChanged(string tenantId, long delta)
=> SloActiveAlerts.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void RecordSloBudgetRemaining(string tenantId, string sloName, string sloType, double remaining)
=> SloBudgetRemaining.Record(remaining, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName),
new KeyValuePair<string, object?>("slo_type", sloType));
public static void RecordSloTimeToExhaustion(string tenantId, string sloName, double seconds)
=> SloTimeToExhaustion.Record(seconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("slo_name", sloName));
// Audit log metrics
private static readonly Counter<long> AuditEntriesCreated = Meter.CreateCounter<long>(
"orchestrator.audit.entries_created",
description: "Total audit log entries created");
private static readonly Counter<long> AuditChainVerifications = Meter.CreateCounter<long>(
"orchestrator.audit.chain_verifications",
description: "Total audit chain verification operations");
private static readonly Counter<long> AuditChainFailures = Meter.CreateCounter<long>(
"orchestrator.audit.chain_failures",
description: "Total audit chain verification failures");
private static readonly UpDownCounter<long> AuditEntryCount = Meter.CreateUpDownCounter<long>(
"orchestrator.audit.entry_count",
description: "Current number of audit entries");
// Ledger metrics
private static readonly Counter<long> LedgerEntriesCreated = Meter.CreateCounter<long>(
"orchestrator.ledger.entries_created",
description: "Total ledger entries created");
private static readonly Counter<long> LedgerChainVerifications = Meter.CreateCounter<long>(
"orchestrator.ledger.chain_verifications",
description: "Total ledger chain verification operations");
private static readonly Counter<long> LedgerChainFailures = Meter.CreateCounter<long>(
"orchestrator.ledger.chain_failures",
description: "Total ledger chain verification failures");
private static readonly Counter<long> LedgerExportsRequested = Meter.CreateCounter<long>(
"orchestrator.ledger.exports_requested",
description: "Total ledger export requests");
private static readonly Counter<long> LedgerExportsCompleted = Meter.CreateCounter<long>(
"orchestrator.ledger.exports_completed",
description: "Total ledger exports completed successfully");
private static readonly Counter<long> LedgerExportsFailed = Meter.CreateCounter<long>(
"orchestrator.ledger.exports_failed",
description: "Total ledger exports that failed");
private static readonly Histogram<double> LedgerExportDuration = Meter.CreateHistogram<double>(
"orchestrator.ledger.export_duration.seconds",
unit: "s",
description: "Ledger export duration");
private static readonly Histogram<long> LedgerExportSize = Meter.CreateHistogram<long>(
"orchestrator.ledger.export_size.bytes",
unit: "bytes",
description: "Ledger export file size");
// Manifest metrics
private static readonly Counter<long> ManifestsCreated = Meter.CreateCounter<long>(
"orchestrator.manifests.created",
description: "Total signed manifests created");
private static readonly Counter<long> ManifestVerifications = Meter.CreateCounter<long>(
"orchestrator.manifests.verifications",
description: "Total manifest verification operations");
private static readonly Counter<long> ManifestVerificationFailures = Meter.CreateCounter<long>(
"orchestrator.manifests.verification_failures",
description: "Total manifest verification failures");
public static void AuditEntryCreated(string tenantId, string eventType, string resourceType)
=> AuditEntriesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("event_type", eventType),
new KeyValuePair<string, object?>("resource_type", resourceType));
public static void AuditChainVerified(string tenantId, bool success)
{
AuditChainVerifications.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
if (!success)
{
AuditChainFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
}
}
public static void AuditEntryCountChanged(string tenantId, long delta)
=> AuditEntryCount.Add(delta, new KeyValuePair<string, object?>("tenant_id", tenantId));
public static void LedgerEntryCreated(string tenantId, string runType, string finalStatus)
=> LedgerEntriesCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("run_type", runType),
new KeyValuePair<string, object?>("final_status", finalStatus));
public static void LedgerChainVerified(string tenantId, bool success)
{
LedgerChainVerifications.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
if (!success)
{
LedgerChainFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
}
}
public static void LedgerExportRequested(string tenantId, string format)
=> LedgerExportsRequested.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("format", format));
public static void LedgerExportCompleted(string tenantId, string format)
=> LedgerExportsCompleted.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("format", format));
public static void LedgerExportFailed(string tenantId, string format)
=> LedgerExportsFailed.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("format", format));
public static void RecordLedgerExportDuration(string tenantId, string format, double durationSeconds)
=> LedgerExportDuration.Record(durationSeconds, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("format", format));
public static void RecordLedgerExportSize(string tenantId, string format, long sizeBytes)
=> LedgerExportSize.Record(sizeBytes, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("format", format));
public static void ManifestCreated(string tenantId, string provenanceType)
=> ManifestsCreated.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("provenance_type", provenanceType));
public static void ManifestVerified(string tenantId, bool success)
{
ManifestVerifications.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
if (!success)
{
ManifestVerificationFailures.Add(1, new KeyValuePair<string, object?>("tenant_id", tenantId));
}
}
}

View File

@@ -0,0 +1,130 @@
namespace StellaOps.Orchestrator.Infrastructure.Options;
/// <summary>
/// Configuration options for the Orchestrator service.
/// </summary>
public sealed class OrchestratorServiceOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "Orchestrator";
/// <summary>
/// HTTP header name for tenant identification.
/// </summary>
public string TenantHeader { get; set; } = "X-Tenant-Id";
/// <summary>
/// Database connection options.
/// </summary>
public DatabaseOptions Database { get; set; } = new();
/// <summary>
/// Lease management options.
/// </summary>
public LeaseOptions Lease { get; set; } = new();
/// <summary>
/// Rate-limiting options.
/// </summary>
public RateLimitOptions RateLimit { get; set; } = new();
/// <summary>
/// Database connection options.
/// </summary>
public sealed class DatabaseOptions
{
/// <summary>
/// PostgreSQL connection string.
/// </summary>
public string ConnectionString { get; set; } = string.Empty;
/// <summary>
/// Command timeout in seconds.
/// </summary>
public int CommandTimeoutSeconds { get; set; } = 30;
/// <summary>
/// Enable connection pooling.
/// </summary>
public bool EnablePooling { get; set; } = true;
/// <summary>
/// Minimum pool size.
/// </summary>
public int MinPoolSize { get; set; } = 1;
/// <summary>
/// Maximum pool size.
/// </summary>
public int MaxPoolSize { get; set; } = 100;
}
/// <summary>
/// Lease management options.
/// </summary>
public sealed class LeaseOptions
{
/// <summary>
/// Default lease duration in seconds.
/// </summary>
public int DefaultLeaseDurationSeconds { get; set; } = 300;
/// <summary>
/// Maximum lease duration in seconds.
/// </summary>
public int MaxLeaseDurationSeconds { get; set; } = 3600;
/// <summary>
/// Lease renewal threshold (renew when this fraction of lease remains).
/// </summary>
public double RenewalThreshold { get; set; } = 0.5;
/// <summary>
/// Interval for checking expired leases in seconds.
/// </summary>
public int ExpiryCheckIntervalSeconds { get; set; } = 30;
}
/// <summary>
/// Rate-limiting options.
/// </summary>
public sealed class RateLimitOptions
{
/// <summary>
/// Default maximum concurrent active jobs per tenant.
/// </summary>
public int DefaultMaxActive { get; set; } = 10;
/// <summary>
/// Default maximum jobs per hour per tenant.
/// </summary>
public int DefaultMaxPerHour { get; set; } = 1000;
/// <summary>
/// Default burst capacity for token bucket.
/// </summary>
public int DefaultBurstCapacity { get; set; } = 50;
/// <summary>
/// Default token refill rate (tokens per second).
/// </summary>
public double DefaultRefillRate { get; set; } = 1.0;
/// <summary>
/// Failure rate threshold for circuit breaker (0.0-1.0).
/// </summary>
public double CircuitBreakerThreshold { get; set; } = 0.5;
/// <summary>
/// Window size in minutes for failure rate calculation.
/// </summary>
public int CircuitBreakerWindowMinutes { get; set; } = 5;
/// <summary>
/// Minimum sample size before circuit breaker can trip.
/// </summary>
public int CircuitBreakerMinSamples { get; set; } = 10;
}
}

View File

@@ -0,0 +1,118 @@
using System.Data;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using StellaOps.Orchestrator.Infrastructure.Options;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// Manages PostgreSQL connections for the Orchestrator service.
/// Configures session-level tenant context for row-level security.
/// </summary>
public sealed class OrchestratorDataSource : IAsyncDisposable
{
private readonly NpgsqlDataSource _dataSource;
private readonly OrchestratorServiceOptions.DatabaseOptions _options;
private readonly ILogger<OrchestratorDataSource> _logger;
public OrchestratorDataSource(
IOptions<OrchestratorServiceOptions> options,
ILogger<OrchestratorDataSource> logger)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value.Database;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
var builder = new NpgsqlDataSourceBuilder(_options.ConnectionString);
_dataSource = builder.Build();
}
/// <summary>
/// Command timeout in seconds.
/// </summary>
public int CommandTimeoutSeconds => _options.CommandTimeoutSeconds;
/// <summary>
/// Disposes the data source and releases all connections.
/// </summary>
public async ValueTask DisposeAsync()
{
await _dataSource.DisposeAsync().ConfigureAwait(false);
}
/// <summary>
/// Opens a connection with tenant context configured.
/// </summary>
/// <param name="tenantId">Tenant identifier for session configuration.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Open PostgreSQL connection.</returns>
public Task<NpgsqlConnection> OpenConnectionAsync(string tenantId, CancellationToken cancellationToken)
=> OpenConnectionInternalAsync(tenantId, "unspecified", cancellationToken);
/// <summary>
/// Opens a connection with tenant context and role label configured.
/// </summary>
/// <param name="tenantId">Tenant identifier for session configuration.</param>
/// <param name="role">Role label for metrics/logging (e.g., "reader", "writer").</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Open PostgreSQL connection.</returns>
public Task<NpgsqlConnection> OpenConnectionAsync(string tenantId, string role, CancellationToken cancellationToken)
=> OpenConnectionInternalAsync(tenantId, role, cancellationToken);
private async Task<NpgsqlConnection> OpenConnectionInternalAsync(string tenantId, string role, CancellationToken cancellationToken)
{
var connection = await _dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
try
{
await ConfigureSessionAsync(connection, tenantId, cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.ConnectionOpened(role);
connection.StateChange += (_, args) =>
{
if (args.CurrentState == ConnectionState.Closed)
{
OrchestratorMetrics.ConnectionClosed(role);
}
};
}
catch
{
await connection.DisposeAsync().ConfigureAwait(false);
throw;
}
return connection;
}
private async Task ConfigureSessionAsync(NpgsqlConnection connection, string tenantId, CancellationToken cancellationToken)
{
try
{
// Set UTC timezone for deterministic timestamps
await using (var command = new NpgsqlCommand("SET TIME ZONE 'UTC';", connection))
{
command.CommandTimeout = _options.CommandTimeoutSeconds;
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Set tenant context for row-level security
if (!string.IsNullOrWhiteSpace(tenantId))
{
await using var tenantCommand = new NpgsqlCommand("SELECT set_config('app.current_tenant', @tenant, false);", connection);
tenantCommand.CommandTimeout = _options.CommandTimeoutSeconds;
tenantCommand.Parameters.AddWithValue("tenant", tenantId);
await tenantCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
catch (Exception ex)
{
if (_logger.IsEnabled(LogLevel.Error))
{
_logger.LogError(ex, "Failed to configure PostgreSQL session for tenant {TenantId}.", tenantId);
}
throw;
}
}
}

View File

@@ -0,0 +1,362 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of artifact repository.
/// </summary>
public sealed class PostgresArtifactRepository : IArtifactRepository
{
private const string SelectArtifactColumns = """
artifact_id, tenant_id, job_id, run_id, artifact_type, uri, digest,
mime_type, size_bytes, created_at, metadata
""";
private const string SelectByIdSql = $"""
SELECT {SelectArtifactColumns}
FROM artifacts
WHERE tenant_id = @tenant_id AND artifact_id = @artifact_id
""";
private const string SelectByJobIdSql = $"""
SELECT {SelectArtifactColumns}
FROM artifacts
WHERE tenant_id = @tenant_id AND job_id = @job_id
ORDER BY created_at
""";
private const string SelectByRunIdSql = $"""
SELECT {SelectArtifactColumns}
FROM artifacts
WHERE tenant_id = @tenant_id AND run_id = @run_id
ORDER BY created_at
""";
private const string SelectByDigestSql = $"""
SELECT {SelectArtifactColumns}
FROM artifacts
WHERE tenant_id = @tenant_id AND digest = @digest
""";
private const string InsertArtifactSql = """
INSERT INTO artifacts (
artifact_id, tenant_id, job_id, run_id, artifact_type, uri, digest,
mime_type, size_bytes, created_at, metadata)
VALUES (
@artifact_id, @tenant_id, @job_id, @run_id, @artifact_type, @uri, @digest,
@mime_type, @size_bytes, @created_at, @metadata)
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresArtifactRepository> _logger;
public PostgresArtifactRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresArtifactRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<Artifact?> GetByIdAsync(string tenantId, Guid artifactId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("artifact_id", artifactId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapArtifact(reader);
}
public async Task<IReadOnlyList<Artifact>> GetByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByJobIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_id", jobId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var artifacts = new List<Artifact>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
artifacts.Add(MapArtifact(reader));
}
return artifacts;
}
public async Task<IReadOnlyList<Artifact>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByRunIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("run_id", runId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var artifacts = new List<Artifact>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
artifacts.Add(MapArtifact(reader));
}
return artifacts;
}
public async Task<Artifact?> GetByDigestAsync(string tenantId, string digest, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByDigestSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("digest", digest);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapArtifact(reader);
}
public async Task CreateAsync(Artifact artifact, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(artifact.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertArtifactSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddArtifactParameters(command, artifact);
try
{
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.ArtifactCreated(artifact.TenantId, artifact.ArtifactType);
}
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
{
_logger.LogWarning("Duplicate artifact ID or digest: {ArtifactId}, {Digest}", artifact.ArtifactId, artifact.Digest);
throw new DuplicateArtifactException(artifact.ArtifactId, artifact.Digest, ex);
}
}
public async Task CreateBatchAsync(IEnumerable<Artifact> artifacts, CancellationToken cancellationToken)
{
var artifactList = artifacts.ToList();
if (artifactList.Count == 0)
{
return;
}
var tenantId = artifactList[0].TenantId;
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
foreach (var artifact in artifactList)
{
await using var command = new NpgsqlCommand(InsertArtifactSql, connection, transaction);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddArtifactParameters(command, artifact);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.ArtifactCreated(artifact.TenantId, artifact.ArtifactType);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
}
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
_logger.LogWarning(ex, "Duplicate artifact in batch insert");
throw;
}
catch
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task<IReadOnlyList<Artifact>> ListAsync(
string tenantId,
string? artifactType,
string? jobType,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, artifactType, jobType, createdAfter, createdBefore, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var artifacts = new List<Artifact>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
artifacts.Add(MapArtifact(reader));
}
return artifacts;
}
public async Task<int> CountAsync(
string tenantId,
string? artifactType,
string? jobType,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildCountQuery(tenantId, artifactType, jobType);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt32(result);
}
private static void AddArtifactParameters(NpgsqlCommand command, Artifact artifact)
{
command.Parameters.AddWithValue("artifact_id", artifact.ArtifactId);
command.Parameters.AddWithValue("tenant_id", artifact.TenantId);
command.Parameters.AddWithValue("job_id", artifact.JobId);
command.Parameters.AddWithValue("run_id", (object?)artifact.RunId ?? DBNull.Value);
command.Parameters.AddWithValue("artifact_type", artifact.ArtifactType);
command.Parameters.AddWithValue("uri", artifact.Uri);
command.Parameters.AddWithValue("digest", artifact.Digest);
command.Parameters.AddWithValue("mime_type", (object?)artifact.MimeType ?? DBNull.Value);
command.Parameters.AddWithValue("size_bytes", (object?)artifact.SizeBytes ?? DBNull.Value);
command.Parameters.AddWithValue("created_at", artifact.CreatedAt);
command.Parameters.Add(new NpgsqlParameter("metadata", NpgsqlDbType.Jsonb)
{
Value = (object?)artifact.Metadata ?? DBNull.Value
});
}
private static Artifact MapArtifact(NpgsqlDataReader reader)
{
return new Artifact(
ArtifactId: reader.GetGuid(0),
TenantId: reader.GetString(1),
JobId: reader.GetGuid(2),
RunId: reader.IsDBNull(3) ? null : reader.GetGuid(3),
ArtifactType: reader.GetString(4),
Uri: reader.GetString(5),
Digest: reader.GetString(6),
MimeType: reader.IsDBNull(7) ? null : reader.GetString(7),
SizeBytes: reader.IsDBNull(8) ? null : reader.GetInt64(8),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(9),
Metadata: reader.IsDBNull(10) ? null : reader.GetString(10));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
string? artifactType,
string? jobType,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectArtifactColumns} FROM artifacts a WHERE a.tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (!string.IsNullOrEmpty(artifactType))
{
sb.Append(" AND a.artifact_type = @artifact_type");
parameters.Add(("artifact_type", artifactType));
}
if (!string.IsNullOrEmpty(jobType))
{
sb.Append(" AND EXISTS (SELECT 1 FROM jobs j WHERE j.job_id = a.job_id AND j.tenant_id = a.tenant_id AND j.job_type = @job_type)");
parameters.Add(("job_type", jobType));
}
if (createdAfter.HasValue)
{
sb.Append(" AND a.created_at >= @created_after");
parameters.Add(("created_after", createdAfter.Value));
}
if (createdBefore.HasValue)
{
sb.Append(" AND a.created_at < @created_before");
parameters.Add(("created_before", createdBefore.Value));
}
sb.Append(" ORDER BY a.created_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
string tenantId,
string? artifactType,
string? jobType)
{
var sb = new StringBuilder();
sb.Append("SELECT COUNT(*) FROM artifacts a WHERE a.tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (!string.IsNullOrEmpty(artifactType))
{
sb.Append(" AND a.artifact_type = @artifact_type");
parameters.Add(("artifact_type", artifactType));
}
if (!string.IsNullOrEmpty(jobType))
{
sb.Append(" AND EXISTS (SELECT 1 FROM jobs j WHERE j.job_id = a.job_id AND j.tenant_id = a.tenant_id AND j.job_type = @job_type)");
parameters.Add(("job_type", jobType));
}
return (sb.ToString(), parameters);
}
}
/// <summary>
/// Exception thrown when attempting to create a duplicate artifact.
/// </summary>
public sealed class DuplicateArtifactException : Exception
{
public Guid ArtifactId { get; }
public string Digest { get; }
public DuplicateArtifactException(Guid artifactId, string digest, Exception innerException)
: base($"Artifact with ID '{artifactId}' or digest '{digest}' already exists.", innerException)
{
ArtifactId = artifactId;
Digest = digest;
}
}

View File

@@ -0,0 +1,504 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of the audit repository.
/// </summary>
public sealed class PostgresAuditRepository : IAuditRepository
{
private const string SelectAuditColumns = """
entry_id, tenant_id, event_type, resource_type, resource_id, actor_id, actor_type,
actor_ip, user_agent, http_method, request_path, old_state, new_state, description,
correlation_id, previous_entry_hash, content_hash, sequence_number, occurred_at, metadata
""";
private const string SelectByIdSql = $"""
SELECT {SelectAuditColumns}
FROM audit_entries
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
""";
private const string InsertEntrySql = """
INSERT INTO audit_entries (
entry_id, tenant_id, event_type, resource_type, resource_id, actor_id, actor_type,
actor_ip, user_agent, http_method, request_path, old_state, new_state, description,
correlation_id, previous_entry_hash, content_hash, sequence_number, occurred_at, metadata)
VALUES (
@entry_id, @tenant_id, @event_type, @resource_type, @resource_id, @actor_id, @actor_type,
@actor_ip, @user_agent, @http_method, @request_path, @old_state::jsonb, @new_state::jsonb, @description,
@correlation_id, @previous_entry_hash, @content_hash, @sequence_number, @occurred_at, @metadata::jsonb)
""";
private const string SelectLatestSql = $"""
SELECT {SelectAuditColumns}
FROM audit_entries
WHERE tenant_id = @tenant_id
ORDER BY sequence_number DESC
LIMIT 1
""";
private const string GetSequenceSql = """
SELECT next_seq, prev_hash FROM next_audit_sequence(@tenant_id)
""";
private const string UpdateSequenceHashSql = """
SELECT update_audit_sequence_hash(@tenant_id, @content_hash)
""";
private const string VerifyChainSql = """
SELECT is_valid, invalid_entry_id, invalid_sequence, error_message
FROM verify_audit_chain(@tenant_id, @start_seq, @end_seq)
""";
private const string GetSummarySql = """
SELECT total_entries, entries_since, event_types, unique_actors, unique_resources, earliest_entry, latest_entry
FROM get_audit_summary(@tenant_id, @since)
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresAuditRepository> _logger;
public PostgresAuditRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresAuditRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<AuditEntry> AppendAsync(
string tenantId,
AuditEventType eventType,
string resourceType,
Guid resourceId,
string actorId,
ActorType actorType,
string description,
string? oldState = null,
string? newState = null,
string? actorIp = null,
string? userAgent = null,
string? httpMethod = null,
string? requestPath = null,
string? correlationId = null,
string? metadata = null,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
// Get next sequence number and previous hash
long sequenceNumber;
string? previousEntryHash;
await using (var seqCommand = new NpgsqlCommand(GetSequenceSql, connection, transaction))
{
seqCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
seqCommand.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await seqCommand.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
throw new InvalidOperationException("Failed to get next audit sequence.");
}
sequenceNumber = reader.GetInt64(0);
previousEntryHash = reader.IsDBNull(1) ? null : reader.GetString(1);
}
// Create the entry
var entry = AuditEntry.Create(
tenantId: tenantId,
eventType: eventType,
resourceType: resourceType,
resourceId: resourceId,
actorId: actorId,
actorType: actorType,
description: description,
oldState: oldState,
newState: newState,
actorIp: actorIp,
userAgent: userAgent,
httpMethod: httpMethod,
requestPath: requestPath,
correlationId: correlationId,
previousEntryHash: previousEntryHash,
sequenceNumber: sequenceNumber,
metadata: metadata);
// Insert the entry
await using (var insertCommand = new NpgsqlCommand(InsertEntrySql, connection, transaction))
{
insertCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddEntryParameters(insertCommand, entry);
await insertCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Update sequence hash
await using (var updateCommand = new NpgsqlCommand(UpdateSequenceHashSql, connection, transaction))
{
updateCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
updateCommand.Parameters.AddWithValue("tenant_id", tenantId);
updateCommand.Parameters.AddWithValue("content_hash", entry.ContentHash);
await updateCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.AuditEntryCreated(tenantId, eventType.ToString(), resourceType);
_logger.LogDebug("Audit entry {EntryId} appended for tenant {TenantId}, sequence {Sequence}",
entry.EntryId, tenantId, sequenceNumber);
return entry;
}
catch
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task<AuditEntry?> GetByIdAsync(
string tenantId,
Guid entryId,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("entry_id", entryId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapEntry(reader);
}
public async Task<IReadOnlyList<AuditEntry>> ListAsync(
string tenantId,
AuditEventType? eventType = null,
string? resourceType = null,
Guid? resourceId = null,
string? actorId = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
var (sql, parameters) = BuildListQuery(tenantId, eventType, resourceType, resourceId, actorId, startTime, endTime, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<AuditEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<IReadOnlyList<AuditEntry>> GetBySequenceRangeAsync(
string tenantId,
long startSequence,
long endSequence,
CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectAuditColumns}
FROM audit_entries
WHERE tenant_id = @tenant_id
AND sequence_number >= @start_seq
AND sequence_number <= @end_seq
ORDER BY sequence_number ASC
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("start_seq", startSequence);
command.Parameters.AddWithValue("end_seq", endSequence);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<AuditEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<AuditEntry?> GetLatestAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectLatestSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapEntry(reader);
}
public async Task<IReadOnlyList<AuditEntry>> GetByResourceAsync(
string tenantId,
string resourceType,
Guid resourceId,
int limit = 100,
CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectAuditColumns}
FROM audit_entries
WHERE tenant_id = @tenant_id
AND resource_type = @resource_type
AND resource_id = @resource_id
ORDER BY occurred_at DESC
LIMIT @limit
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("resource_type", resourceType);
command.Parameters.AddWithValue("resource_id", resourceId);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<AuditEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<long> GetCountAsync(
string tenantId,
AuditEventType? eventType = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
CancellationToken cancellationToken = default)
{
var sb = new StringBuilder("SELECT COUNT(*) FROM audit_entries WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (eventType.HasValue)
{
sb.Append(" AND event_type = @event_type");
parameters.Add(("event_type", (int)eventType.Value));
}
if (startTime.HasValue)
{
sb.Append(" AND occurred_at >= @start_time");
parameters.Add(("start_time", startTime.Value));
}
if (endTime.HasValue)
{
sb.Append(" AND occurred_at <= @end_time");
parameters.Add(("end_time", endTime.Value));
}
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sb.ToString(), connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt64(result);
}
public async Task<ChainVerificationResult> VerifyChainAsync(
string tenantId,
long? startSequence = null,
long? endSequence = null,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(VerifyChainSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("start_seq", (object?)startSequence ?? 1L);
command.Parameters.AddWithValue("end_seq", (object?)endSequence ?? DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return new ChainVerificationResult(true, null, null, null);
}
return new ChainVerificationResult(
IsValid: reader.GetBoolean(0),
InvalidEntryId: reader.IsDBNull(1) ? null : reader.GetGuid(1),
InvalidSequence: reader.IsDBNull(2) ? null : reader.GetInt64(2),
ErrorMessage: reader.IsDBNull(3) ? null : reader.GetString(3));
}
public async Task<AuditSummary> GetSummaryAsync(
string tenantId,
DateTimeOffset? since = null,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(GetSummarySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("since", (object?)since ?? DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return new AuditSummary(0, 0, 0, 0, 0, null, null);
}
return new AuditSummary(
TotalEntries: reader.GetInt64(0),
EntriesSince: reader.GetInt64(1),
EventTypes: reader.GetInt64(2),
UniqueActors: reader.GetInt64(3),
UniqueResources: reader.GetInt64(4),
EarliestEntry: reader.IsDBNull(5) ? null : reader.GetFieldValue<DateTimeOffset>(5),
LatestEntry: reader.IsDBNull(6) ? null : reader.GetFieldValue<DateTimeOffset>(6));
}
private static void AddEntryParameters(NpgsqlCommand command, AuditEntry entry)
{
command.Parameters.AddWithValue("entry_id", entry.EntryId);
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
command.Parameters.AddWithValue("event_type", (int)entry.EventType);
command.Parameters.AddWithValue("resource_type", entry.ResourceType);
command.Parameters.AddWithValue("resource_id", entry.ResourceId);
command.Parameters.AddWithValue("actor_id", entry.ActorId);
command.Parameters.AddWithValue("actor_type", (int)entry.ActorType);
command.Parameters.AddWithValue("actor_ip", (object?)entry.ActorIp ?? DBNull.Value);
command.Parameters.AddWithValue("user_agent", (object?)entry.UserAgent ?? DBNull.Value);
command.Parameters.AddWithValue("http_method", (object?)entry.HttpMethod ?? DBNull.Value);
command.Parameters.AddWithValue("request_path", (object?)entry.RequestPath ?? DBNull.Value);
command.Parameters.AddWithValue("old_state", (object?)entry.OldState ?? DBNull.Value);
command.Parameters.AddWithValue("new_state", (object?)entry.NewState ?? DBNull.Value);
command.Parameters.AddWithValue("description", entry.Description);
command.Parameters.AddWithValue("correlation_id", (object?)entry.CorrelationId ?? DBNull.Value);
command.Parameters.AddWithValue("previous_entry_hash", (object?)entry.PreviousEntryHash ?? DBNull.Value);
command.Parameters.AddWithValue("content_hash", entry.ContentHash);
command.Parameters.AddWithValue("sequence_number", entry.SequenceNumber);
command.Parameters.AddWithValue("occurred_at", entry.OccurredAt);
command.Parameters.AddWithValue("metadata", (object?)entry.Metadata ?? DBNull.Value);
}
private static AuditEntry MapEntry(NpgsqlDataReader reader)
{
return new AuditEntry(
EntryId: reader.GetGuid(0),
TenantId: reader.GetString(1),
EventType: (AuditEventType)reader.GetInt32(2),
ResourceType: reader.GetString(3),
ResourceId: reader.GetGuid(4),
ActorId: reader.GetString(5),
ActorType: (ActorType)reader.GetInt32(6),
ActorIp: reader.IsDBNull(7) ? null : reader.GetString(7),
UserAgent: reader.IsDBNull(8) ? null : reader.GetString(8),
HttpMethod: reader.IsDBNull(9) ? null : reader.GetString(9),
RequestPath: reader.IsDBNull(10) ? null : reader.GetString(10),
OldState: reader.IsDBNull(11) ? null : reader.GetString(11),
NewState: reader.IsDBNull(12) ? null : reader.GetString(12),
Description: reader.GetString(13),
CorrelationId: reader.IsDBNull(14) ? null : reader.GetString(14),
PreviousEntryHash: reader.IsDBNull(15) ? null : reader.GetString(15),
ContentHash: reader.GetString(16),
SequenceNumber: reader.GetInt64(17),
OccurredAt: reader.GetFieldValue<DateTimeOffset>(18),
Metadata: reader.IsDBNull(19) ? null : reader.GetString(19));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
AuditEventType? eventType,
string? resourceType,
Guid? resourceId,
string? actorId,
DateTimeOffset? startTime,
DateTimeOffset? endTime,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectAuditColumns} FROM audit_entries WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (eventType.HasValue)
{
sb.Append(" AND event_type = @event_type");
parameters.Add(("event_type", (int)eventType.Value));
}
if (resourceType is not null)
{
sb.Append(" AND resource_type = @resource_type");
parameters.Add(("resource_type", resourceType));
}
if (resourceId.HasValue)
{
sb.Append(" AND resource_id = @resource_id");
parameters.Add(("resource_id", resourceId.Value));
}
if (actorId is not null)
{
sb.Append(" AND actor_id = @actor_id");
parameters.Add(("actor_id", actorId));
}
if (startTime.HasValue)
{
sb.Append(" AND occurred_at >= @start_time");
parameters.Add(("start_time", startTime.Value));
}
if (endTime.HasValue)
{
sb.Append(" AND occurred_at <= @end_time");
parameters.Add(("end_time", endTime.Value));
}
sb.Append(" ORDER BY occurred_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
}

View File

@@ -0,0 +1,395 @@
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of backfill request repository.
/// </summary>
public sealed class PostgresBackfillRepository : IBackfillRepository
{
private const string SelectBackfillColumns = """
backfill_id, tenant_id, source_id, job_type, scope_key, status,
window_start, window_end, current_position, total_events,
processed_events, skipped_events, failed_events, batch_size,
dry_run, force_reprocess, estimated_duration, max_duration,
safety_checks, reason, ticket, created_at, started_at, completed_at,
created_by, updated_by, error_message
""";
private const string SelectByIdSql = $"""
SELECT {SelectBackfillColumns}
FROM backfill_requests
WHERE tenant_id = @tenant_id AND backfill_id = @backfill_id
""";
private const string InsertBackfillSql = """
INSERT INTO backfill_requests (
backfill_id, tenant_id, source_id, job_type, scope_key, status,
window_start, window_end, current_position, total_events,
processed_events, skipped_events, failed_events, batch_size,
dry_run, force_reprocess, estimated_duration, max_duration,
safety_checks, reason, ticket, created_at, started_at, completed_at,
created_by, updated_by, error_message)
VALUES (
@backfill_id, @tenant_id, @source_id, @job_type, @scope_key, @status,
@window_start, @window_end, @current_position, @total_events,
@processed_events, @skipped_events, @failed_events, @batch_size,
@dry_run, @force_reprocess, @estimated_duration, @max_duration,
@safety_checks, @reason, @ticket, @created_at, @started_at, @completed_at,
@created_by, @updated_by, @error_message)
""";
private const string UpdateBackfillSql = """
UPDATE backfill_requests
SET status = @status,
current_position = @current_position,
total_events = @total_events,
processed_events = @processed_events,
skipped_events = @skipped_events,
failed_events = @failed_events,
estimated_duration = @estimated_duration,
safety_checks = @safety_checks,
started_at = @started_at,
completed_at = @completed_at,
updated_by = @updated_by,
error_message = @error_message
WHERE tenant_id = @tenant_id AND backfill_id = @backfill_id
""";
private const string SelectOverlappingSql = """
SELECT COUNT(*) FROM backfill_requests
WHERE tenant_id = @tenant_id
AND scope_key = @scope_key
AND status IN ('pending', 'validating', 'running', 'paused')
AND window_start < @window_end
AND window_end > @window_start
AND (@exclude_backfill_id IS NULL OR backfill_id != @exclude_backfill_id)
""";
private const string SelectActiveByScopeSql = $"""
SELECT {SelectBackfillColumns}
FROM backfill_requests
WHERE tenant_id = @tenant_id
AND scope_key = @scope_key
AND status IN ('pending', 'validating', 'running', 'paused')
ORDER BY created_at DESC
""";
private const string CountByStatusSql = """
SELECT status, COUNT(*) as count
FROM backfill_requests
WHERE tenant_id = @tenant_id
GROUP BY status
""";
private const string SelectNextPendingSql = $"""
SELECT {SelectBackfillColumns}
FROM backfill_requests
WHERE tenant_id = @tenant_id
AND status = 'pending'
ORDER BY created_at ASC
LIMIT 1
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresBackfillRepository> _logger;
private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
public PostgresBackfillRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresBackfillRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<BackfillRequest?> GetByIdAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("backfill_id", backfillId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapBackfillRequest(reader);
}
public async Task CreateAsync(BackfillRequest request, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(request.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertBackfillSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddBackfillParameters(command, request);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.BackfillCreated(request.TenantId, request.ScopeKey);
}
public async Task UpdateAsync(BackfillRequest request, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(request.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateBackfillSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", request.TenantId);
command.Parameters.AddWithValue("backfill_id", request.BackfillId);
command.Parameters.AddWithValue("status", request.Status.ToString().ToLowerInvariant());
command.Parameters.AddWithValue("current_position", (object?)request.CurrentPosition ?? DBNull.Value);
command.Parameters.AddWithValue("total_events", (object?)request.TotalEvents ?? DBNull.Value);
command.Parameters.AddWithValue("processed_events", request.ProcessedEvents);
command.Parameters.AddWithValue("skipped_events", request.SkippedEvents);
command.Parameters.AddWithValue("failed_events", request.FailedEvents);
command.Parameters.AddWithValue("estimated_duration", (object?)request.EstimatedDuration ?? DBNull.Value);
command.Parameters.AddWithValue("safety_checks", request.SafetyChecks is not null
? JsonSerializer.Serialize(request.SafetyChecks, JsonOptions)
: DBNull.Value);
command.Parameters.AddWithValue("started_at", (object?)request.StartedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)request.CompletedAt ?? DBNull.Value);
command.Parameters.AddWithValue("updated_by", request.UpdatedBy);
command.Parameters.AddWithValue("error_message", (object?)request.ErrorMessage ?? DBNull.Value);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows == 0)
{
_logger.LogWarning("Backfill request not found for update: {BackfillId}", request.BackfillId);
}
else
{
OrchestratorMetrics.BackfillStatusChanged(request.TenantId, request.ScopeKey, request.Status.ToString());
}
}
public async Task<IReadOnlyList<BackfillRequest>> ListAsync(
string tenantId,
BackfillStatus? status,
Guid? sourceId,
string? jobType,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, status, sourceId, jobType, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var requests = new List<BackfillRequest>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
requests.Add(MapBackfillRequest(reader));
}
return requests;
}
public async Task<bool> HasOverlappingActiveAsync(
string tenantId,
string scopeKey,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
Guid? excludeBackfillId,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectOverlappingSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
command.Parameters.AddWithValue("window_start", windowStart);
command.Parameters.AddWithValue("window_end", windowEnd);
command.Parameters.AddWithValue("exclude_backfill_id", (object?)excludeBackfillId ?? DBNull.Value);
var count = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt64(count) > 0;
}
public async Task<IReadOnlyList<BackfillRequest>> GetActiveByScope(
string tenantId,
string scopeKey,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectActiveByScopeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var requests = new List<BackfillRequest>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
requests.Add(MapBackfillRequest(reader));
}
return requests;
}
public async Task<IDictionary<BackfillStatus, int>> CountByStatusAsync(
string tenantId,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(CountByStatusSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var counts = new Dictionary<BackfillStatus, int>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var statusStr = reader.GetString(0);
var count = reader.GetInt32(1);
if (Enum.TryParse<BackfillStatus>(statusStr, true, out var status))
{
counts[status] = count;
}
}
return counts;
}
public async Task<BackfillRequest?> GetNextPendingAsync(string tenantId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectNextPendingSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapBackfillRequest(reader);
}
private static void AddBackfillParameters(NpgsqlCommand command, BackfillRequest request)
{
command.Parameters.AddWithValue("backfill_id", request.BackfillId);
command.Parameters.AddWithValue("tenant_id", request.TenantId);
command.Parameters.AddWithValue("source_id", (object?)request.SourceId ?? DBNull.Value);
command.Parameters.AddWithValue("job_type", (object?)request.JobType ?? DBNull.Value);
command.Parameters.AddWithValue("scope_key", request.ScopeKey);
command.Parameters.AddWithValue("status", request.Status.ToString().ToLowerInvariant());
command.Parameters.AddWithValue("window_start", request.WindowStart);
command.Parameters.AddWithValue("window_end", request.WindowEnd);
command.Parameters.AddWithValue("current_position", (object?)request.CurrentPosition ?? DBNull.Value);
command.Parameters.AddWithValue("total_events", (object?)request.TotalEvents ?? DBNull.Value);
command.Parameters.AddWithValue("processed_events", request.ProcessedEvents);
command.Parameters.AddWithValue("skipped_events", request.SkippedEvents);
command.Parameters.AddWithValue("failed_events", request.FailedEvents);
command.Parameters.AddWithValue("batch_size", request.BatchSize);
command.Parameters.AddWithValue("dry_run", request.DryRun);
command.Parameters.AddWithValue("force_reprocess", request.ForceReprocess);
command.Parameters.AddWithValue("estimated_duration", (object?)request.EstimatedDuration ?? DBNull.Value);
command.Parameters.AddWithValue("max_duration", (object?)request.MaxDuration ?? DBNull.Value);
command.Parameters.AddWithValue("safety_checks", request.SafetyChecks is not null
? JsonSerializer.Serialize(request.SafetyChecks, JsonOptions)
: DBNull.Value);
command.Parameters.AddWithValue("reason", request.Reason);
command.Parameters.AddWithValue("ticket", (object?)request.Ticket ?? DBNull.Value);
command.Parameters.AddWithValue("created_at", request.CreatedAt);
command.Parameters.AddWithValue("started_at", (object?)request.StartedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)request.CompletedAt ?? DBNull.Value);
command.Parameters.AddWithValue("created_by", request.CreatedBy);
command.Parameters.AddWithValue("updated_by", request.UpdatedBy);
command.Parameters.AddWithValue("error_message", (object?)request.ErrorMessage ?? DBNull.Value);
}
private static BackfillRequest MapBackfillRequest(NpgsqlDataReader reader)
{
var safetyChecksJson = reader.IsDBNull(18) ? null : reader.GetString(18);
var safetyChecks = safetyChecksJson is not null
? JsonSerializer.Deserialize<BackfillSafetyChecks>(safetyChecksJson, JsonOptions)
: null;
return new BackfillRequest(
BackfillId: reader.GetGuid(0),
TenantId: reader.GetString(1),
SourceId: reader.IsDBNull(2) ? null : reader.GetGuid(2),
JobType: reader.IsDBNull(3) ? null : reader.GetString(3),
ScopeKey: reader.GetString(4),
Status: Enum.Parse<BackfillStatus>(reader.GetString(5), ignoreCase: true),
WindowStart: reader.GetFieldValue<DateTimeOffset>(6),
WindowEnd: reader.GetFieldValue<DateTimeOffset>(7),
CurrentPosition: reader.IsDBNull(8) ? null : reader.GetFieldValue<DateTimeOffset>(8),
TotalEvents: reader.IsDBNull(9) ? null : reader.GetInt64(9),
ProcessedEvents: reader.GetInt64(10),
SkippedEvents: reader.GetInt64(11),
FailedEvents: reader.GetInt64(12),
BatchSize: reader.GetInt32(13),
DryRun: reader.GetBoolean(14),
ForceReprocess: reader.GetBoolean(15),
EstimatedDuration: reader.IsDBNull(16) ? null : reader.GetFieldValue<TimeSpan>(16),
MaxDuration: reader.IsDBNull(17) ? null : reader.GetFieldValue<TimeSpan>(17),
SafetyChecks: safetyChecks,
Reason: reader.GetString(19),
Ticket: reader.IsDBNull(20) ? null : reader.GetString(20),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(21),
StartedAt: reader.IsDBNull(22) ? null : reader.GetFieldValue<DateTimeOffset>(22),
CompletedAt: reader.IsDBNull(23) ? null : reader.GetFieldValue<DateTimeOffset>(23),
CreatedBy: reader.GetString(24),
UpdatedBy: reader.GetString(25),
ErrorMessage: reader.IsDBNull(26) ? null : reader.GetString(26));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
BackfillStatus? status,
Guid? sourceId,
string? jobType,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectBackfillColumns} FROM backfill_requests WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (status.HasValue)
{
sb.Append(" AND status = @status");
parameters.Add(("status", status.Value.ToString().ToLowerInvariant()));
}
if (sourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", sourceId.Value));
}
if (jobType is not null)
{
sb.Append(" AND job_type = @job_type");
parameters.Add(("job_type", jobType));
}
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
}

View File

@@ -0,0 +1,678 @@
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.DeadLetter;
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of dead-letter entry repository.
/// </summary>
public sealed class PostgresDeadLetterRepository : IDeadLetterRepository
{
private const string SelectEntryColumns = """
entry_id, tenant_id, original_job_id, run_id, source_id, job_type,
payload, payload_digest, idempotency_key, correlation_id,
status, error_code, failure_reason, remediation_hint, category, is_retryable,
original_attempts, replay_attempts, max_replay_attempts,
failed_at, created_at, updated_at, expires_at, resolved_at,
resolution_notes, created_by, updated_by
""";
private const string SelectByIdSql = $"""
SELECT {SelectEntryColumns}
FROM dead_letter_entries
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
""";
private const string SelectByJobIdSql = $"""
SELECT {SelectEntryColumns}
FROM dead_letter_entries
WHERE tenant_id = @tenant_id AND original_job_id = @original_job_id
ORDER BY created_at DESC
LIMIT 1
""";
private const string InsertEntrySql = """
INSERT INTO dead_letter_entries (
entry_id, tenant_id, original_job_id, run_id, source_id, job_type,
payload, payload_digest, idempotency_key, correlation_id,
status, error_code, failure_reason, remediation_hint, category, is_retryable,
original_attempts, replay_attempts, max_replay_attempts,
failed_at, created_at, updated_at, expires_at, resolved_at,
resolution_notes, created_by, updated_by)
VALUES (
@entry_id, @tenant_id, @original_job_id, @run_id, @source_id, @job_type,
@payload::jsonb, @payload_digest, @idempotency_key, @correlation_id,
@status, @error_code, @failure_reason, @remediation_hint, @category, @is_retryable,
@original_attempts, @replay_attempts, @max_replay_attempts,
@failed_at, @created_at, @updated_at, @expires_at, @resolved_at,
@resolution_notes, @created_by, @updated_by)
""";
private const string UpdateEntrySql = """
UPDATE dead_letter_entries
SET status = @status,
replay_attempts = @replay_attempts,
failure_reason = @failure_reason,
updated_at = @updated_at,
resolved_at = @resolved_at,
resolution_notes = @resolution_notes,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
""";
private const string SelectPendingRetryableSql = $"""
SELECT {SelectEntryColumns}
FROM dead_letter_entries
WHERE tenant_id = @tenant_id
AND status = 'pending'
AND is_retryable = TRUE
AND replay_attempts < max_replay_attempts
ORDER BY created_at ASC
LIMIT @limit
""";
private const string SelectByErrorCodeSql = $"""
SELECT {SelectEntryColumns}
FROM dead_letter_entries
WHERE tenant_id = @tenant_id
AND error_code = @error_code
AND (@status IS NULL OR status = @status)
ORDER BY created_at DESC
LIMIT @limit
""";
private const string SelectByCategorySql = $"""
SELECT {SelectEntryColumns}
FROM dead_letter_entries
WHERE tenant_id = @tenant_id
AND category = @category
AND (@status IS NULL OR status = @status)
ORDER BY created_at DESC
LIMIT @limit
""";
private const string MarkExpiredSql = """
SELECT mark_expired_dead_letter_entries(@batch_limit)
""";
private const string PurgeSql = """
SELECT purge_dead_letter_entries(@retention_days, @batch_limit)
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresDeadLetterRepository> _logger;
private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
public PostgresDeadLetterRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresDeadLetterRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<DeadLetterEntry?> GetByIdAsync(
string tenantId,
Guid entryId,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("entry_id", entryId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapEntry(reader);
}
public async Task<DeadLetterEntry?> GetByOriginalJobIdAsync(
string tenantId,
Guid originalJobId,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByJobIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("original_job_id", originalJobId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapEntry(reader);
}
public async Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
string tenantId,
DeadLetterListOptions options,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, options);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<DeadLetterEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<long> CountAsync(
string tenantId,
DeadLetterListOptions options,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildCountQuery(tenantId, options);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt64(result);
}
public async Task CreateAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(entry.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertEntrySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddEntryParameters(command, entry);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.DeadLetterCreated(entry.TenantId, entry.JobType, entry.ErrorCode, entry.Category.ToString());
}
public async Task<bool> UpdateAsync(
DeadLetterEntry entry,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(entry.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateEntrySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
command.Parameters.AddWithValue("entry_id", entry.EntryId);
command.Parameters.AddWithValue("status", entry.Status.ToString().ToLowerInvariant());
command.Parameters.AddWithValue("replay_attempts", entry.ReplayAttempts);
command.Parameters.AddWithValue("failure_reason", entry.FailureReason);
command.Parameters.AddWithValue("updated_at", entry.UpdatedAt);
command.Parameters.AddWithValue("resolved_at", (object?)entry.ResolvedAt ?? DBNull.Value);
command.Parameters.AddWithValue("resolution_notes", (object?)entry.ResolutionNotes ?? DBNull.Value);
command.Parameters.AddWithValue("updated_by", entry.UpdatedBy);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
OrchestratorMetrics.DeadLetterStatusChanged(entry.TenantId, entry.JobType, entry.Status.ToString());
}
return rows > 0;
}
public async Task<IReadOnlyList<DeadLetterEntry>> GetPendingRetryableAsync(
string tenantId,
int limit,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectPendingRetryableSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<DeadLetterEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<IReadOnlyList<DeadLetterEntry>> GetByErrorCodeAsync(
string tenantId,
string errorCode,
DeadLetterStatus? status,
int limit,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByErrorCodeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("error_code", errorCode);
command.Parameters.AddWithValue("status", status.HasValue ? status.Value.ToString().ToLowerInvariant() : DBNull.Value);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<DeadLetterEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<IReadOnlyList<DeadLetterEntry>> GetByCategoryAsync(
string tenantId,
ErrorCategory category,
DeadLetterStatus? status,
int limit,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByCategorySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("category", category.ToString().ToLowerInvariant());
command.Parameters.AddWithValue("status", status.HasValue ? status.Value.ToString().ToLowerInvariant() : DBNull.Value);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<DeadLetterEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<DeadLetterStats> GetStatsAsync(
string tenantId,
CancellationToken cancellationToken)
{
const string statsSql = """
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE status = 'pending') AS pending,
COUNT(*) FILTER (WHERE status = 'replaying') AS replaying,
COUNT(*) FILTER (WHERE status = 'replayed') AS replayed,
COUNT(*) FILTER (WHERE status = 'resolved') AS resolved,
COUNT(*) FILTER (WHERE status = 'exhausted') AS exhausted,
COUNT(*) FILTER (WHERE status = 'expired') AS expired,
COUNT(*) FILTER (WHERE is_retryable = TRUE AND status = 'pending') AS retryable
FROM dead_letter_entries
WHERE tenant_id = @tenant_id
""";
const string byCategorySql = """
SELECT category, COUNT(*) as cnt
FROM dead_letter_entries
WHERE tenant_id = @tenant_id
GROUP BY category
""";
const string topErrorCodesSql = """
SELECT error_code, COUNT(*) as cnt
FROM dead_letter_entries
WHERE tenant_id = @tenant_id AND status = 'pending'
GROUP BY error_code
ORDER BY cnt DESC
LIMIT 10
""";
const string topJobTypesSql = """
SELECT job_type, COUNT(*) as cnt
FROM dead_letter_entries
WHERE tenant_id = @tenant_id AND status = 'pending'
GROUP BY job_type
ORDER BY cnt DESC
LIMIT 10
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
// Get counts
long total = 0, pending = 0, replaying = 0, replayed = 0, resolved = 0, exhausted = 0, expired = 0, retryable = 0;
await using (var command = new NpgsqlCommand(statsSql, connection))
{
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
total = reader.GetInt64(0);
pending = reader.GetInt64(1);
replaying = reader.GetInt64(2);
replayed = reader.GetInt64(3);
resolved = reader.GetInt64(4);
exhausted = reader.GetInt64(5);
expired = reader.GetInt64(6);
retryable = reader.GetInt64(7);
}
}
// Get by category
var byCategory = new Dictionary<ErrorCategory, long>();
await using (var command = new NpgsqlCommand(byCategorySql, connection))
{
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
if (Enum.TryParse<ErrorCategory>(reader.GetString(0), true, out var cat))
{
byCategory[cat] = reader.GetInt64(1);
}
}
}
// Get top error codes
var topErrorCodes = new Dictionary<string, long>();
await using (var command = new NpgsqlCommand(topErrorCodesSql, connection))
{
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
topErrorCodes[reader.GetString(0)] = reader.GetInt64(1);
}
}
// Get top job types
var topJobTypes = new Dictionary<string, long>();
await using (var command = new NpgsqlCommand(topJobTypesSql, connection))
{
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
topJobTypes[reader.GetString(0)] = reader.GetInt64(1);
}
}
return new DeadLetterStats(
TotalEntries: total,
PendingEntries: pending,
ReplayingEntries: replaying,
ReplayedEntries: replayed,
ResolvedEntries: resolved,
ExhaustedEntries: exhausted,
ExpiredEntries: expired,
RetryableEntries: retryable,
ByCategory: byCategory,
TopErrorCodes: topErrorCodes,
TopJobTypes: topJobTypes);
}
public async Task<IReadOnlyList<DeadLetterSummary>> GetActionableSummaryAsync(
string tenantId,
int limit,
CancellationToken cancellationToken)
{
const string sql = """
SELECT error_code, category, entry_count, retryable_count, oldest_entry, sample_reason
FROM get_actionable_dead_letter_summary(@tenant_id, @limit)
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var summaries = new List<DeadLetterSummary>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var categoryStr = reader.GetString(1);
var category = Enum.TryParse<ErrorCategory>(categoryStr, true, out var cat) ? cat : ErrorCategory.Unknown;
summaries.Add(new DeadLetterSummary(
ErrorCode: reader.GetString(0),
Category: category,
EntryCount: reader.GetInt64(2),
RetryableCount: reader.GetInt64(3),
OldestEntry: reader.GetFieldValue<DateTimeOffset>(4),
SampleReason: reader.IsDBNull(5) ? null : reader.GetString(5)));
}
return summaries;
}
public async Task<int> MarkExpiredAsync(
int batchLimit,
CancellationToken cancellationToken)
{
// Use a system-level connection (no tenant context needed for maintenance)
await using var connection = await _dataSource.OpenConnectionAsync("system", "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(MarkExpiredSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("batch_limit", batchLimit);
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
var marked = Convert.ToInt32(result);
if (marked > 0)
{
OrchestratorMetrics.DeadLetterExpired(marked);
_logger.LogInformation("Marked {Count} dead-letter entries as expired", marked);
}
return marked;
}
public async Task<int> PurgeOldEntriesAsync(
int retentionDays,
int batchLimit,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync("system", "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(PurgeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("retention_days", retentionDays);
command.Parameters.AddWithValue("batch_limit", batchLimit);
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
var purged = Convert.ToInt32(result);
if (purged > 0)
{
OrchestratorMetrics.DeadLetterPurged(purged);
_logger.LogInformation("Purged {Count} old dead-letter entries (retention: {RetentionDays} days)", purged, retentionDays);
}
return purged;
}
private static void AddEntryParameters(NpgsqlCommand command, DeadLetterEntry entry)
{
command.Parameters.AddWithValue("entry_id", entry.EntryId);
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
command.Parameters.AddWithValue("original_job_id", entry.OriginalJobId);
command.Parameters.AddWithValue("run_id", (object?)entry.RunId ?? DBNull.Value);
command.Parameters.AddWithValue("source_id", (object?)entry.SourceId ?? DBNull.Value);
command.Parameters.AddWithValue("job_type", entry.JobType);
command.Parameters.AddWithValue("payload", entry.Payload);
command.Parameters.AddWithValue("payload_digest", entry.PayloadDigest);
command.Parameters.AddWithValue("idempotency_key", entry.IdempotencyKey);
command.Parameters.AddWithValue("correlation_id", (object?)entry.CorrelationId ?? DBNull.Value);
command.Parameters.AddWithValue("status", entry.Status.ToString().ToLowerInvariant());
command.Parameters.AddWithValue("error_code", entry.ErrorCode);
command.Parameters.AddWithValue("failure_reason", entry.FailureReason);
command.Parameters.AddWithValue("remediation_hint", (object?)entry.RemediationHint ?? DBNull.Value);
command.Parameters.AddWithValue("category", entry.Category.ToString().ToLowerInvariant());
command.Parameters.AddWithValue("is_retryable", entry.IsRetryable);
command.Parameters.AddWithValue("original_attempts", entry.OriginalAttempts);
command.Parameters.AddWithValue("replay_attempts", entry.ReplayAttempts);
command.Parameters.AddWithValue("max_replay_attempts", entry.MaxReplayAttempts);
command.Parameters.AddWithValue("failed_at", entry.FailedAt);
command.Parameters.AddWithValue("created_at", entry.CreatedAt);
command.Parameters.AddWithValue("updated_at", entry.UpdatedAt);
command.Parameters.AddWithValue("expires_at", entry.ExpiresAt);
command.Parameters.AddWithValue("resolved_at", (object?)entry.ResolvedAt ?? DBNull.Value);
command.Parameters.AddWithValue("resolution_notes", (object?)entry.ResolutionNotes ?? DBNull.Value);
command.Parameters.AddWithValue("created_by", entry.CreatedBy);
command.Parameters.AddWithValue("updated_by", entry.UpdatedBy);
}
private static DeadLetterEntry MapEntry(NpgsqlDataReader reader)
{
var statusStr = reader.GetString(10);
var categoryStr = reader.GetString(14);
return new DeadLetterEntry(
EntryId: reader.GetGuid(0),
TenantId: reader.GetString(1),
OriginalJobId: reader.GetGuid(2),
RunId: reader.IsDBNull(3) ? null : reader.GetGuid(3),
SourceId: reader.IsDBNull(4) ? null : reader.GetGuid(4),
JobType: reader.GetString(5),
Payload: reader.GetString(6),
PayloadDigest: reader.GetString(7),
IdempotencyKey: reader.GetString(8),
CorrelationId: reader.IsDBNull(9) ? null : reader.GetString(9),
Status: Enum.TryParse<DeadLetterStatus>(statusStr, true, out var status) ? status : DeadLetterStatus.Pending,
ErrorCode: reader.GetString(11),
FailureReason: reader.GetString(12),
RemediationHint: reader.IsDBNull(13) ? null : reader.GetString(13),
Category: Enum.TryParse<ErrorCategory>(categoryStr, true, out var cat) ? cat : ErrorCategory.Unknown,
IsRetryable: reader.GetBoolean(15),
OriginalAttempts: reader.GetInt32(16),
ReplayAttempts: reader.GetInt32(17),
MaxReplayAttempts: reader.GetInt32(18),
FailedAt: reader.GetFieldValue<DateTimeOffset>(19),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(20),
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(21),
ExpiresAt: reader.GetFieldValue<DateTimeOffset>(22),
ResolvedAt: reader.IsDBNull(23) ? null : reader.GetFieldValue<DateTimeOffset>(23),
ResolutionNotes: reader.IsDBNull(24) ? null : reader.GetString(24),
CreatedBy: reader.GetString(25),
UpdatedBy: reader.GetString(26));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
DeadLetterListOptions options)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectEntryColumns} FROM dead_letter_entries WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
AppendFilters(sb, parameters, options);
var order = options.Ascending ? "ASC" : "DESC";
sb.Append($" ORDER BY created_at {order}");
if (!string.IsNullOrEmpty(options.Cursor))
{
// Cursor is the created_at timestamp
var op = options.Ascending ? ">" : "<";
sb.Append($" AND created_at {op} @cursor");
if (DateTimeOffset.TryParse(options.Cursor, out var cursor))
{
parameters.Add(("cursor", cursor));
}
}
sb.Append(" LIMIT @limit");
parameters.Add(("limit", options.Limit));
return (sb.ToString(), parameters);
}
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
string tenantId,
DeadLetterListOptions options)
{
var sb = new StringBuilder();
sb.Append("SELECT COUNT(*) FROM dead_letter_entries WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
AppendFilters(sb, parameters, options);
return (sb.ToString(), parameters);
}
private static void AppendFilters(StringBuilder sb, List<(string, object)> parameters, DeadLetterListOptions options)
{
if (options.Status.HasValue)
{
sb.Append(" AND status = @status");
parameters.Add(("status", options.Status.Value.ToString().ToLowerInvariant()));
}
if (options.Category.HasValue)
{
sb.Append(" AND category = @category");
parameters.Add(("category", options.Category.Value.ToString().ToLowerInvariant()));
}
if (!string.IsNullOrEmpty(options.JobType))
{
sb.Append(" AND job_type = @job_type");
parameters.Add(("job_type", options.JobType));
}
if (!string.IsNullOrEmpty(options.ErrorCode))
{
sb.Append(" AND error_code = @error_code");
parameters.Add(("error_code", options.ErrorCode));
}
if (options.SourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", options.SourceId.Value));
}
if (options.RunId.HasValue)
{
sb.Append(" AND run_id = @run_id");
parameters.Add(("run_id", options.RunId.Value));
}
if (options.IsRetryable.HasValue)
{
sb.Append(" AND is_retryable = @is_retryable");
parameters.Add(("is_retryable", options.IsRetryable.Value));
}
if (options.CreatedAfter.HasValue)
{
sb.Append(" AND created_at >= @created_after");
parameters.Add(("created_after", options.CreatedAfter.Value));
}
if (options.CreatedBefore.HasValue)
{
sb.Append(" AND created_at <= @created_before");
parameters.Add(("created_before", options.CreatedBefore.Value));
}
}
}

View File

@@ -0,0 +1,247 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.Backfill;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of duplicate suppressor.
/// </summary>
public sealed class PostgresDuplicateSuppressor : IDuplicateSuppressor
{
private const string SelectProcessedSql = """
SELECT 1 FROM processed_events
WHERE tenant_id = @tenant_id
AND scope_key = @scope_key
AND event_key = @event_key
AND expires_at > NOW()
""";
private const string SelectMultipleProcessedSql = """
SELECT event_key FROM processed_events
WHERE tenant_id = @tenant_id
AND scope_key = @scope_key
AND event_key = ANY(@event_keys)
AND expires_at > NOW()
""";
private const string UpsertProcessedSql = """
INSERT INTO processed_events (tenant_id, scope_key, event_key, event_time, processed_at, batch_id, expires_at)
VALUES (@tenant_id, @scope_key, @event_key, @event_time, NOW(), @batch_id, @expires_at)
ON CONFLICT (tenant_id, scope_key, event_key) DO UPDATE
SET event_time = EXCLUDED.event_time,
processed_at = NOW(),
batch_id = EXCLUDED.batch_id,
expires_at = EXCLUDED.expires_at
""";
private const string CountProcessedSql = """
SELECT COUNT(*) FROM processed_events
WHERE tenant_id = @tenant_id
AND scope_key = @scope_key
AND event_time >= @from
AND event_time < @to
AND expires_at > NOW()
""";
private const string CleanupExpiredSql = """
DELETE FROM processed_events
WHERE ctid IN (
SELECT ctid FROM processed_events
WHERE expires_at < NOW()
LIMIT @batch_limit
)
""";
private readonly OrchestratorDataSource _dataSource;
private readonly string _tenantId;
private readonly ILogger<PostgresDuplicateSuppressor> _logger;
public PostgresDuplicateSuppressor(
OrchestratorDataSource dataSource,
string tenantId,
ILogger<PostgresDuplicateSuppressor> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_tenantId = tenantId ?? throw new ArgumentNullException(nameof(tenantId));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<bool> HasProcessedAsync(string scopeKey, string eventKey, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectProcessedSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", _tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
command.Parameters.AddWithValue("event_key", eventKey);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
}
public async Task<IReadOnlySet<string>> GetProcessedAsync(string scopeKey, IEnumerable<string> eventKeys, CancellationToken cancellationToken)
{
var keyList = eventKeys.ToArray();
if (keyList.Length == 0)
{
return new HashSet<string>();
}
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectMultipleProcessedSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", _tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
command.Parameters.AddWithValue("event_keys", keyList);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var result = new HashSet<string>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
result.Add(reader.GetString(0));
}
return result;
}
public async Task MarkProcessedAsync(
string scopeKey,
string eventKey,
DateTimeOffset eventTime,
Guid? batchId,
TimeSpan ttl,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpsertProcessedSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", _tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
command.Parameters.AddWithValue("event_key", eventKey);
command.Parameters.AddWithValue("event_time", eventTime);
command.Parameters.AddWithValue("batch_id", (object?)batchId ?? DBNull.Value);
command.Parameters.AddWithValue("expires_at", DateTimeOffset.UtcNow + ttl);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task MarkProcessedBatchAsync(
string scopeKey,
IEnumerable<ProcessedEvent> events,
Guid? batchId,
TimeSpan ttl,
CancellationToken cancellationToken)
{
var eventList = events.ToList();
if (eventList.Count == 0)
{
return;
}
var expiresAt = DateTimeOffset.UtcNow + ttl;
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
foreach (var evt in eventList)
{
await using var command = new NpgsqlCommand(UpsertProcessedSql, connection, transaction);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", _tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
command.Parameters.AddWithValue("event_key", evt.EventKey);
command.Parameters.AddWithValue("event_time", evt.EventTime);
command.Parameters.AddWithValue("batch_id", (object?)batchId ?? DBNull.Value);
command.Parameters.AddWithValue("expires_at", expiresAt);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.ProcessedEventsMarked(_tenantId, scopeKey, eventList.Count);
}
catch
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task<long> CountProcessedAsync(string scopeKey, DateTimeOffset from, DateTimeOffset to, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(CountProcessedSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", _tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
command.Parameters.AddWithValue("from", from);
command.Parameters.AddWithValue("to", to);
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt64(result);
}
public async Task<int> CleanupExpiredAsync(int batchLimit, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(_tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(CleanupExpiredSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("batch_limit", batchLimit);
var deleted = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (deleted > 0)
{
_logger.LogInformation("Cleaned up {DeletedCount} expired processed events", deleted);
OrchestratorMetrics.ProcessedEventsCleanedUp(_tenantId, deleted);
}
return deleted;
}
}
/// <summary>
/// Factory for creating tenant-scoped duplicate suppressors.
/// </summary>
public interface IDuplicateSuppressorFactory
{
/// <summary>
/// Creates a duplicate suppressor for the specified tenant.
/// </summary>
IDuplicateSuppressor Create(string tenantId);
}
/// <summary>
/// Factory implementation for PostgreSQL duplicate suppressors.
/// </summary>
public sealed class PostgresDuplicateSuppressorFactory : IDuplicateSuppressorFactory
{
private readonly OrchestratorDataSource _dataSource;
private readonly ILoggerFactory _loggerFactory;
public PostgresDuplicateSuppressorFactory(
OrchestratorDataSource dataSource,
ILoggerFactory loggerFactory)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
}
public IDuplicateSuppressor Create(string tenantId)
{
return new PostgresDuplicateSuppressor(
_dataSource,
tenantId,
_loggerFactory.CreateLogger<PostgresDuplicateSuppressor>());
}
}

View File

@@ -0,0 +1,540 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of job repository.
/// </summary>
public sealed class PostgresJobRepository : IJobRepository
{
private const string SelectJobColumns = """
job_id, tenant_id, project_id, run_id, job_type, status, priority, attempt, max_attempts,
payload_digest, payload, idempotency_key, correlation_id, lease_id, worker_id, task_runner_id,
lease_until, created_at, scheduled_at, leased_at, completed_at, not_before, reason, replay_of, created_by
""";
private const string SelectByIdSql = $"""
SELECT {SelectJobColumns}
FROM jobs
WHERE tenant_id = @tenant_id AND job_id = @job_id
""";
private const string SelectByIdempotencyKeySql = $"""
SELECT {SelectJobColumns}
FROM jobs
WHERE tenant_id = @tenant_id AND idempotency_key = @idempotency_key
""";
private const string InsertJobSql = """
INSERT INTO jobs (
job_id, tenant_id, project_id, run_id, job_type, status, priority, attempt, max_attempts,
payload_digest, payload, idempotency_key, correlation_id, lease_id, worker_id, task_runner_id,
lease_until, created_at, scheduled_at, leased_at, completed_at, not_before, reason, replay_of, created_by)
VALUES (
@job_id, @tenant_id, @project_id, @run_id, @job_type, @status::job_status, @priority, @attempt, @max_attempts,
@payload_digest, @payload, @idempotency_key, @correlation_id, @lease_id, @worker_id, @task_runner_id,
@lease_until, @created_at, @scheduled_at, @leased_at, @completed_at, @not_before, @reason, @replay_of, @created_by)
""";
private const string UpdateStatusSql = """
UPDATE jobs
SET status = @status::job_status,
attempt = @attempt,
lease_id = @lease_id,
worker_id = @worker_id,
task_runner_id = @task_runner_id,
lease_until = @lease_until,
scheduled_at = @scheduled_at,
leased_at = @leased_at,
completed_at = @completed_at,
not_before = @not_before,
reason = @reason
WHERE tenant_id = @tenant_id AND job_id = @job_id
""";
private const string LeaseNextSqlTemplate = """
UPDATE jobs
SET status = 'leased'::job_status,
lease_id = @lease_id,
worker_id = @worker_id,
lease_until = @lease_until,
leased_at = @leased_at
WHERE tenant_id = @tenant_id
AND job_id = (
SELECT job_id
FROM jobs
WHERE tenant_id = @tenant_id
AND status = 'scheduled'::job_status
AND (not_before IS NULL OR not_before <= @now)
{0}
ORDER BY priority DESC, created_at
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING
""";
private const string ExtendLeaseSql = """
UPDATE jobs
SET lease_until = @new_lease_until
WHERE tenant_id = @tenant_id
AND job_id = @job_id
AND lease_id = @lease_id
AND status = 'leased'::job_status
AND lease_until > @now
""";
private const string SelectByRunIdSql = $"""
SELECT {SelectJobColumns}
FROM jobs
WHERE tenant_id = @tenant_id AND run_id = @run_id
ORDER BY created_at
""";
private const string SelectExpiredLeasesSql = $"""
SELECT {SelectJobColumns}
FROM jobs
WHERE tenant_id = @tenant_id
AND status = 'leased'::job_status
AND lease_until < @cutoff
ORDER BY lease_until
LIMIT @limit
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresJobRepository> _logger;
public PostgresJobRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresJobRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<Job?> GetByIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_id", jobId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapJob(reader);
}
public async Task<Job?> GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdempotencyKeySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("idempotency_key", idempotencyKey);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapJob(reader);
}
public async Task CreateAsync(Job job, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(job.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertJobSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddJobParameters(command, job);
try
{
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.JobEnqueued(job.TenantId, job.JobType);
OrchestratorMetrics.QueueDepthChanged(job.TenantId, job.JobType, 1);
}
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
{
_logger.LogWarning("Duplicate job idempotency key: {IdempotencyKey}", job.IdempotencyKey);
throw new DuplicateJobException(job.IdempotencyKey, ex);
}
}
public async Task UpdateStatusAsync(
string tenantId,
Guid jobId,
JobStatus status,
int attempt,
Guid? leaseId,
string? workerId,
string? taskRunnerId,
DateTimeOffset? leaseUntil,
DateTimeOffset? scheduledAt,
DateTimeOffset? leasedAt,
DateTimeOffset? completedAt,
DateTimeOffset? notBefore,
string? reason,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateStatusSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_id", jobId);
command.Parameters.AddWithValue("status", StatusToString(status));
command.Parameters.AddWithValue("attempt", attempt);
command.Parameters.AddWithValue("lease_id", (object?)leaseId ?? DBNull.Value);
command.Parameters.AddWithValue("worker_id", (object?)workerId ?? DBNull.Value);
command.Parameters.AddWithValue("task_runner_id", (object?)taskRunnerId ?? DBNull.Value);
command.Parameters.AddWithValue("lease_until", (object?)leaseUntil ?? DBNull.Value);
command.Parameters.AddWithValue("scheduled_at", (object?)scheduledAt ?? DBNull.Value);
command.Parameters.AddWithValue("leased_at", (object?)leasedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)completedAt ?? DBNull.Value);
command.Parameters.AddWithValue("not_before", (object?)notBefore ?? DBNull.Value);
command.Parameters.AddWithValue("reason", (object?)reason ?? DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task<Job?> LeaseNextAsync(
string tenantId,
string? jobType,
Guid leaseId,
string workerId,
DateTimeOffset leaseUntil,
CancellationToken cancellationToken)
{
var jobTypeFilter = jobType != null ? "AND job_type = @job_type" : "";
var sql = string.Format(LeaseNextSqlTemplate, jobTypeFilter) + " " + SelectJobColumns;
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("lease_id", leaseId);
command.Parameters.AddWithValue("worker_id", workerId);
command.Parameters.AddWithValue("lease_until", leaseUntil);
command.Parameters.AddWithValue("leased_at", DateTimeOffset.UtcNow);
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
if (jobType != null)
{
command.Parameters.AddWithValue("job_type", jobType);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
var job = MapJob(reader);
OrchestratorMetrics.JobLeased(job.TenantId, job.JobType);
OrchestratorMetrics.QueueDepthChanged(job.TenantId, job.JobType, -1);
return job;
}
public async Task<bool> ExtendLeaseAsync(
string tenantId,
Guid jobId,
Guid leaseId,
DateTimeOffset newLeaseUntil,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(ExtendLeaseSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_id", jobId);
command.Parameters.AddWithValue("lease_id", leaseId);
command.Parameters.AddWithValue("new_lease_until", newLeaseUntil);
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
public async Task<IReadOnlyList<Job>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByRunIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("run_id", runId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var jobs = new List<Job>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
jobs.Add(MapJob(reader));
}
return jobs;
}
public async Task<IReadOnlyList<Job>> GetExpiredLeasesAsync(string tenantId, DateTimeOffset cutoff, int limit, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectExpiredLeasesSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("cutoff", cutoff);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var jobs = new List<Job>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
jobs.Add(MapJob(reader));
}
return jobs;
}
public async Task<IReadOnlyList<Job>> ListAsync(
string tenantId,
JobStatus? status,
string? jobType,
string? projectId,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, status, jobType, projectId, createdAfter, createdBefore, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var jobs = new List<Job>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
jobs.Add(MapJob(reader));
}
return jobs;
}
public async Task<int> CountAsync(
string tenantId,
JobStatus? status,
string? jobType,
string? projectId,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildCountQuery(tenantId, status, jobType, projectId);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt32(result);
}
private static void AddJobParameters(NpgsqlCommand command, Job job)
{
command.Parameters.AddWithValue("job_id", job.JobId);
command.Parameters.AddWithValue("tenant_id", job.TenantId);
command.Parameters.AddWithValue("project_id", (object?)job.ProjectId ?? DBNull.Value);
command.Parameters.AddWithValue("run_id", (object?)job.RunId ?? DBNull.Value);
command.Parameters.AddWithValue("job_type", job.JobType);
command.Parameters.AddWithValue("status", StatusToString(job.Status));
command.Parameters.AddWithValue("priority", job.Priority);
command.Parameters.AddWithValue("attempt", job.Attempt);
command.Parameters.AddWithValue("max_attempts", job.MaxAttempts);
command.Parameters.AddWithValue("payload_digest", job.PayloadDigest);
command.Parameters.Add(new NpgsqlParameter<string>("payload", NpgsqlDbType.Jsonb) { TypedValue = job.Payload });
command.Parameters.AddWithValue("idempotency_key", job.IdempotencyKey);
command.Parameters.AddWithValue("correlation_id", (object?)job.CorrelationId ?? DBNull.Value);
command.Parameters.AddWithValue("lease_id", (object?)job.LeaseId ?? DBNull.Value);
command.Parameters.AddWithValue("worker_id", (object?)job.WorkerId ?? DBNull.Value);
command.Parameters.AddWithValue("task_runner_id", (object?)job.TaskRunnerId ?? DBNull.Value);
command.Parameters.AddWithValue("lease_until", (object?)job.LeaseUntil ?? DBNull.Value);
command.Parameters.AddWithValue("created_at", job.CreatedAt);
command.Parameters.AddWithValue("scheduled_at", (object?)job.ScheduledAt ?? DBNull.Value);
command.Parameters.AddWithValue("leased_at", (object?)job.LeasedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)job.CompletedAt ?? DBNull.Value);
command.Parameters.AddWithValue("not_before", (object?)job.NotBefore ?? DBNull.Value);
command.Parameters.AddWithValue("reason", (object?)job.Reason ?? DBNull.Value);
command.Parameters.AddWithValue("replay_of", (object?)job.ReplayOf ?? DBNull.Value);
command.Parameters.AddWithValue("created_by", job.CreatedBy);
}
private static Job MapJob(NpgsqlDataReader reader)
{
return new Job(
JobId: reader.GetGuid(0),
TenantId: reader.GetString(1),
ProjectId: reader.IsDBNull(2) ? null : reader.GetString(2),
RunId: reader.IsDBNull(3) ? null : reader.GetGuid(3),
JobType: reader.GetString(4),
Status: ParseStatus(reader.GetString(5)),
Priority: reader.GetInt32(6),
Attempt: reader.GetInt32(7),
MaxAttempts: reader.GetInt32(8),
PayloadDigest: reader.GetString(9),
Payload: reader.GetString(10),
IdempotencyKey: reader.GetString(11),
CorrelationId: reader.IsDBNull(12) ? null : reader.GetString(12),
LeaseId: reader.IsDBNull(13) ? null : reader.GetGuid(13),
WorkerId: reader.IsDBNull(14) ? null : reader.GetString(14),
TaskRunnerId: reader.IsDBNull(15) ? null : reader.GetString(15),
LeaseUntil: reader.IsDBNull(16) ? null : reader.GetFieldValue<DateTimeOffset>(16),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(17),
ScheduledAt: reader.IsDBNull(18) ? null : reader.GetFieldValue<DateTimeOffset>(18),
LeasedAt: reader.IsDBNull(19) ? null : reader.GetFieldValue<DateTimeOffset>(19),
CompletedAt: reader.IsDBNull(20) ? null : reader.GetFieldValue<DateTimeOffset>(20),
NotBefore: reader.IsDBNull(21) ? null : reader.GetFieldValue<DateTimeOffset>(21),
Reason: reader.IsDBNull(22) ? null : reader.GetString(22),
ReplayOf: reader.IsDBNull(23) ? null : reader.GetGuid(23),
CreatedBy: reader.GetString(24));
}
private static string StatusToString(JobStatus status) => status switch
{
JobStatus.Pending => "pending",
JobStatus.Scheduled => "scheduled",
JobStatus.Leased => "leased",
JobStatus.Succeeded => "succeeded",
JobStatus.Failed => "failed",
JobStatus.Canceled => "canceled",
JobStatus.TimedOut => "timed_out",
_ => throw new ArgumentOutOfRangeException(nameof(status))
};
private static JobStatus ParseStatus(string status) => status switch
{
"pending" => JobStatus.Pending,
"scheduled" => JobStatus.Scheduled,
"leased" => JobStatus.Leased,
"succeeded" => JobStatus.Succeeded,
"failed" => JobStatus.Failed,
"canceled" => JobStatus.Canceled,
"timed_out" => JobStatus.TimedOut,
_ => throw new ArgumentOutOfRangeException(nameof(status))
};
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
JobStatus? status,
string? jobType,
string? projectId,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectJobColumns} FROM jobs WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (status.HasValue)
{
sb.Append(" AND status = @status::job_status");
parameters.Add(("status", StatusToString(status.Value)));
}
if (!string.IsNullOrEmpty(jobType))
{
sb.Append(" AND job_type = @job_type");
parameters.Add(("job_type", jobType));
}
if (!string.IsNullOrEmpty(projectId))
{
sb.Append(" AND project_id = @project_id");
parameters.Add(("project_id", projectId));
}
if (createdAfter.HasValue)
{
sb.Append(" AND created_at >= @created_after");
parameters.Add(("created_after", createdAfter.Value));
}
if (createdBefore.HasValue)
{
sb.Append(" AND created_at < @created_before");
parameters.Add(("created_before", createdBefore.Value));
}
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
string tenantId,
JobStatus? status,
string? jobType,
string? projectId)
{
var sb = new StringBuilder();
sb.Append("SELECT COUNT(*) FROM jobs WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (status.HasValue)
{
sb.Append(" AND status = @status::job_status");
parameters.Add(("status", StatusToString(status.Value)));
}
if (!string.IsNullOrEmpty(jobType))
{
sb.Append(" AND job_type = @job_type");
parameters.Add(("job_type", jobType));
}
if (!string.IsNullOrEmpty(projectId))
{
sb.Append(" AND project_id = @project_id");
parameters.Add(("project_id", projectId));
}
return (sb.ToString(), parameters);
}
}
/// <summary>
/// Exception thrown when attempting to create a job with a duplicate idempotency key.
/// </summary>
public sealed class DuplicateJobException : Exception
{
public string IdempotencyKey { get; }
public DuplicateJobException(string idempotencyKey, Exception innerException)
: base($"Job with idempotency key '{idempotencyKey}' already exists.", innerException)
{
IdempotencyKey = idempotencyKey;
}
}

View File

@@ -0,0 +1,949 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of the ledger repository.
/// </summary>
public sealed class PostgresLedgerRepository : ILedgerRepository
{
private const string SelectLedgerColumns = """
ledger_id, tenant_id, run_id, source_id, run_type, final_status, total_jobs,
succeeded_jobs, failed_jobs, run_created_at, run_started_at, run_completed_at,
execution_duration_ms, initiated_by, input_digest, output_digest, artifact_manifest,
sequence_number, previous_entry_hash, content_hash, ledger_created_at, correlation_id, metadata
""";
private const string SelectByIdSql = $"""
SELECT {SelectLedgerColumns}
FROM run_ledger_entries
WHERE tenant_id = @tenant_id AND ledger_id = @ledger_id
""";
private const string SelectByRunIdSql = $"""
SELECT {SelectLedgerColumns}
FROM run_ledger_entries
WHERE tenant_id = @tenant_id AND run_id = @run_id
""";
private const string InsertEntrySql = """
INSERT INTO run_ledger_entries (
ledger_id, tenant_id, run_id, source_id, run_type, final_status, total_jobs,
succeeded_jobs, failed_jobs, run_created_at, run_started_at, run_completed_at,
execution_duration_ms, initiated_by, input_digest, output_digest, artifact_manifest,
sequence_number, previous_entry_hash, content_hash, ledger_created_at, correlation_id, metadata)
VALUES (
@ledger_id, @tenant_id, @run_id, @source_id, @run_type, @final_status, @total_jobs,
@succeeded_jobs, @failed_jobs, @run_created_at, @run_started_at, @run_completed_at,
@execution_duration_ms, @initiated_by, @input_digest, @output_digest, @artifact_manifest::jsonb,
@sequence_number, @previous_entry_hash, @content_hash, @ledger_created_at, @correlation_id, @metadata::jsonb)
""";
private const string SelectLatestSql = $"""
SELECT {SelectLedgerColumns}
FROM run_ledger_entries
WHERE tenant_id = @tenant_id
ORDER BY sequence_number DESC
LIMIT 1
""";
private const string GetSequenceSql = """
SELECT next_seq, prev_hash FROM next_ledger_sequence(@tenant_id)
""";
private const string UpdateSequenceHashSql = """
SELECT update_ledger_sequence_hash(@tenant_id, @content_hash)
""";
private const string VerifyChainSql = """
SELECT is_valid, invalid_ledger_id, invalid_sequence, error_message
FROM verify_ledger_chain(@tenant_id, @start_seq, @end_seq)
""";
private const string GetSummarySql = """
SELECT total_entries, entries_since, total_runs, successful_runs, failed_runs,
total_jobs, unique_sources, unique_run_types, earliest_entry, latest_entry
FROM get_ledger_summary(@tenant_id, @since)
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresLedgerRepository> _logger;
public PostgresLedgerRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresLedgerRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<RunLedgerEntry> AppendAsync(
Run run,
IReadOnlyList<Artifact> artifacts,
string inputDigest,
string? metadata = null,
CancellationToken cancellationToken = default)
{
if (run.CompletedAt is null)
{
throw new InvalidOperationException("Cannot create ledger entry from an incomplete run.");
}
await using var connection = await _dataSource.OpenConnectionAsync(run.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
// Get next sequence number and previous hash
long sequenceNumber;
string? previousEntryHash;
await using (var seqCommand = new NpgsqlCommand(GetSequenceSql, connection, transaction))
{
seqCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
seqCommand.Parameters.AddWithValue("tenant_id", run.TenantId);
await using var reader = await seqCommand.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
throw new InvalidOperationException("Failed to get next ledger sequence.");
}
sequenceNumber = reader.GetInt64(0);
previousEntryHash = reader.IsDBNull(1) ? null : reader.GetString(1);
}
// Create the ledger entry
var entry = RunLedgerEntry.FromCompletedRun(
run: run,
artifacts: artifacts,
inputDigest: inputDigest,
sequenceNumber: sequenceNumber,
previousEntryHash: previousEntryHash,
metadata: metadata);
// Insert the entry
await using (var insertCommand = new NpgsqlCommand(InsertEntrySql, connection, transaction))
{
insertCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddEntryParameters(insertCommand, entry);
await insertCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Update sequence hash
await using (var updateCommand = new NpgsqlCommand(UpdateSequenceHashSql, connection, transaction))
{
updateCommand.CommandTimeout = _dataSource.CommandTimeoutSeconds;
updateCommand.Parameters.AddWithValue("tenant_id", run.TenantId);
updateCommand.Parameters.AddWithValue("content_hash", entry.ContentHash);
await updateCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.LedgerEntryCreated(run.TenantId, run.RunType, entry.FinalStatus.ToString());
_logger.LogDebug("Ledger entry {LedgerId} appended for run {RunId}, sequence {Sequence}",
entry.LedgerId, run.RunId, sequenceNumber);
return entry;
}
catch
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task<RunLedgerEntry?> GetByIdAsync(
string tenantId,
Guid ledgerId,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("ledger_id", ledgerId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapEntry(reader);
}
public async Task<RunLedgerEntry?> GetByRunIdAsync(
string tenantId,
Guid runId,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByRunIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("run_id", runId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapEntry(reader);
}
public async Task<IReadOnlyList<RunLedgerEntry>> ListAsync(
string tenantId,
string? runType = null,
Guid? sourceId = null,
RunStatus? finalStatus = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
var (sql, parameters) = BuildListQuery(tenantId, runType, sourceId, finalStatus, startTime, endTime, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<RunLedgerEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<IReadOnlyList<RunLedgerEntry>> GetBySequenceRangeAsync(
string tenantId,
long startSequence,
long endSequence,
CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectLedgerColumns}
FROM run_ledger_entries
WHERE tenant_id = @tenant_id
AND sequence_number >= @start_seq
AND sequence_number <= @end_seq
ORDER BY sequence_number ASC
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("start_seq", startSequence);
command.Parameters.AddWithValue("end_seq", endSequence);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<RunLedgerEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<RunLedgerEntry?> GetLatestAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectLatestSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapEntry(reader);
}
public async Task<IReadOnlyList<RunLedgerEntry>> GetBySourceAsync(
string tenantId,
Guid sourceId,
int limit = 100,
CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectLedgerColumns}
FROM run_ledger_entries
WHERE tenant_id = @tenant_id
AND source_id = @source_id
ORDER BY ledger_created_at DESC
LIMIT @limit
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var entries = new List<RunLedgerEntry>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
public async Task<long> GetCountAsync(
string tenantId,
string? runType = null,
Guid? sourceId = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
CancellationToken cancellationToken = default)
{
var sb = new StringBuilder("SELECT COUNT(*) FROM run_ledger_entries WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (runType is not null)
{
sb.Append(" AND run_type = @run_type");
parameters.Add(("run_type", runType));
}
if (sourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", sourceId.Value));
}
if (startTime.HasValue)
{
sb.Append(" AND ledger_created_at >= @start_time");
parameters.Add(("start_time", startTime.Value));
}
if (endTime.HasValue)
{
sb.Append(" AND ledger_created_at <= @end_time");
parameters.Add(("end_time", endTime.Value));
}
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sb.ToString(), connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt64(result);
}
public async Task<ChainVerificationResult> VerifyChainAsync(
string tenantId,
long? startSequence = null,
long? endSequence = null,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(VerifyChainSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("start_seq", (object?)startSequence ?? 1L);
command.Parameters.AddWithValue("end_seq", (object?)endSequence ?? DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return new ChainVerificationResult(true, null, null, null);
}
return new ChainVerificationResult(
IsValid: reader.GetBoolean(0),
InvalidEntryId: reader.IsDBNull(1) ? null : reader.GetGuid(1),
InvalidSequence: reader.IsDBNull(2) ? null : reader.GetInt64(2),
ErrorMessage: reader.IsDBNull(3) ? null : reader.GetString(3));
}
public async Task<LedgerSummary> GetSummaryAsync(
string tenantId,
DateTimeOffset? since = null,
CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(GetSummarySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("since", (object?)since ?? DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return new LedgerSummary(0, 0, 0, 0, 0, 0, 0, 0, null, null);
}
return new LedgerSummary(
TotalEntries: reader.GetInt64(0),
EntriesSince: reader.GetInt64(1),
TotalRuns: reader.GetInt64(2),
SuccessfulRuns: reader.GetInt64(3),
FailedRuns: reader.GetInt64(4),
TotalJobs: reader.GetInt64(5),
UniqueSources: reader.GetInt64(6),
UniqueRunTypes: reader.GetInt64(7),
EarliestEntry: reader.IsDBNull(8) ? null : reader.GetFieldValue<DateTimeOffset>(8),
LatestEntry: reader.IsDBNull(9) ? null : reader.GetFieldValue<DateTimeOffset>(9));
}
private static void AddEntryParameters(NpgsqlCommand command, RunLedgerEntry entry)
{
command.Parameters.AddWithValue("ledger_id", entry.LedgerId);
command.Parameters.AddWithValue("tenant_id", entry.TenantId);
command.Parameters.AddWithValue("run_id", entry.RunId);
command.Parameters.AddWithValue("source_id", entry.SourceId);
command.Parameters.AddWithValue("run_type", entry.RunType);
command.Parameters.AddWithValue("final_status", (int)entry.FinalStatus);
command.Parameters.AddWithValue("total_jobs", entry.TotalJobs);
command.Parameters.AddWithValue("succeeded_jobs", entry.SucceededJobs);
command.Parameters.AddWithValue("failed_jobs", entry.FailedJobs);
command.Parameters.AddWithValue("run_created_at", entry.RunCreatedAt);
command.Parameters.AddWithValue("run_started_at", (object?)entry.RunStartedAt ?? DBNull.Value);
command.Parameters.AddWithValue("run_completed_at", entry.RunCompletedAt);
command.Parameters.AddWithValue("execution_duration_ms", (long)entry.ExecutionDuration.TotalMilliseconds);
command.Parameters.AddWithValue("initiated_by", entry.InitiatedBy);
command.Parameters.AddWithValue("input_digest", entry.InputDigest);
command.Parameters.AddWithValue("output_digest", entry.OutputDigest);
command.Parameters.AddWithValue("artifact_manifest", entry.ArtifactManifest);
command.Parameters.AddWithValue("sequence_number", entry.SequenceNumber);
command.Parameters.AddWithValue("previous_entry_hash", (object?)entry.PreviousEntryHash ?? DBNull.Value);
command.Parameters.AddWithValue("content_hash", entry.ContentHash);
command.Parameters.AddWithValue("ledger_created_at", entry.LedgerCreatedAt);
command.Parameters.AddWithValue("correlation_id", (object?)entry.CorrelationId ?? DBNull.Value);
command.Parameters.AddWithValue("metadata", (object?)entry.Metadata ?? DBNull.Value);
}
private static RunLedgerEntry MapEntry(NpgsqlDataReader reader)
{
return new RunLedgerEntry(
LedgerId: reader.GetGuid(0),
TenantId: reader.GetString(1),
RunId: reader.GetGuid(2),
SourceId: reader.GetGuid(3),
RunType: reader.GetString(4),
FinalStatus: (RunStatus)reader.GetInt32(5),
TotalJobs: reader.GetInt32(6),
SucceededJobs: reader.GetInt32(7),
FailedJobs: reader.GetInt32(8),
RunCreatedAt: reader.GetFieldValue<DateTimeOffset>(9),
RunStartedAt: reader.IsDBNull(10) ? null : reader.GetFieldValue<DateTimeOffset>(10),
RunCompletedAt: reader.GetFieldValue<DateTimeOffset>(11),
ExecutionDuration: TimeSpan.FromMilliseconds(reader.GetInt64(12)),
InitiatedBy: reader.GetString(13),
InputDigest: reader.GetString(14),
OutputDigest: reader.GetString(15),
ArtifactManifest: reader.GetString(16),
SequenceNumber: reader.GetInt64(17),
PreviousEntryHash: reader.IsDBNull(18) ? null : reader.GetString(18),
ContentHash: reader.GetString(19),
LedgerCreatedAt: reader.GetFieldValue<DateTimeOffset>(20),
CorrelationId: reader.IsDBNull(21) ? null : reader.GetString(21),
Metadata: reader.IsDBNull(22) ? null : reader.GetString(22));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
string? runType,
Guid? sourceId,
RunStatus? finalStatus,
DateTimeOffset? startTime,
DateTimeOffset? endTime,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectLedgerColumns} FROM run_ledger_entries WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (runType is not null)
{
sb.Append(" AND run_type = @run_type");
parameters.Add(("run_type", runType));
}
if (sourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", sourceId.Value));
}
if (finalStatus.HasValue)
{
sb.Append(" AND final_status = @final_status");
parameters.Add(("final_status", (int)finalStatus.Value));
}
if (startTime.HasValue)
{
sb.Append(" AND ledger_created_at >= @start_time");
parameters.Add(("start_time", startTime.Value));
}
if (endTime.HasValue)
{
sb.Append(" AND ledger_created_at <= @end_time");
parameters.Add(("end_time", endTime.Value));
}
sb.Append(" ORDER BY ledger_created_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
}
/// <summary>
/// PostgreSQL implementation of the ledger export repository.
/// </summary>
public sealed class PostgresLedgerExportRepository : ILedgerExportRepository
{
private const string SelectExportColumns = """
export_id, tenant_id, status, format, start_time, end_time, run_type_filter,
source_id_filter, entry_count, output_uri, output_digest, output_size_bytes,
requested_by, requested_at, started_at, completed_at, error_message
""";
private const string InsertExportSql = """
INSERT INTO ledger_exports (
export_id, tenant_id, status, format, start_time, end_time, run_type_filter,
source_id_filter, entry_count, output_uri, output_digest, output_size_bytes,
requested_by, requested_at, started_at, completed_at, error_message)
VALUES (
@export_id, @tenant_id, @status, @format, @start_time, @end_time, @run_type_filter,
@source_id_filter, @entry_count, @output_uri, @output_digest, @output_size_bytes,
@requested_by, @requested_at, @started_at, @completed_at, @error_message)
""";
private const string UpdateExportSql = """
UPDATE ledger_exports
SET status = @status,
entry_count = @entry_count,
output_uri = @output_uri,
output_digest = @output_digest,
output_size_bytes = @output_size_bytes,
started_at = @started_at,
completed_at = @completed_at,
error_message = @error_message
WHERE tenant_id = @tenant_id AND export_id = @export_id
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresLedgerExportRepository> _logger;
public PostgresLedgerExportRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresLedgerExportRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<LedgerExport> CreateAsync(LedgerExport export, CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(export.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertExportSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddExportParameters(command, export);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.LedgerExportRequested(export.TenantId, export.Format);
_logger.LogDebug("Ledger export {ExportId} created for tenant {TenantId}", export.ExportId, export.TenantId);
return export;
}
public async Task<LedgerExport?> GetByIdAsync(string tenantId, Guid exportId, CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectExportColumns}
FROM ledger_exports
WHERE tenant_id = @tenant_id AND export_id = @export_id
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("export_id", exportId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapExport(reader);
}
public async Task<IReadOnlyList<LedgerExport>> ListAsync(
string tenantId,
LedgerExportStatus? status = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
var sb = new StringBuilder($"SELECT {SelectExportColumns} FROM ledger_exports WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (status.HasValue)
{
sb.Append(" AND status = @status");
parameters.Add(("status", (int)status.Value));
}
sb.Append(" ORDER BY requested_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sb.ToString(), connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var exports = new List<LedgerExport>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
exports.Add(MapExport(reader));
}
return exports;
}
public async Task<LedgerExport> UpdateAsync(LedgerExport export, CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(export.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateExportSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("export_id", export.ExportId);
command.Parameters.AddWithValue("tenant_id", export.TenantId);
command.Parameters.AddWithValue("status", (int)export.Status);
command.Parameters.AddWithValue("entry_count", export.EntryCount);
command.Parameters.AddWithValue("output_uri", (object?)export.OutputUri ?? DBNull.Value);
command.Parameters.AddWithValue("output_digest", (object?)export.OutputDigest ?? DBNull.Value);
command.Parameters.AddWithValue("output_size_bytes", (object?)export.OutputSizeBytes ?? DBNull.Value);
command.Parameters.AddWithValue("started_at", (object?)export.StartedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)export.CompletedAt ?? DBNull.Value);
command.Parameters.AddWithValue("error_message", (object?)export.ErrorMessage ?? DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (export.Status == LedgerExportStatus.Completed)
{
OrchestratorMetrics.LedgerExportCompleted(export.TenantId, export.Format);
}
else if (export.Status == LedgerExportStatus.Failed)
{
OrchestratorMetrics.LedgerExportFailed(export.TenantId, export.Format);
}
return export;
}
public async Task<IReadOnlyList<LedgerExport>> GetPendingAsync(int limit = 10, CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectExportColumns}
FROM ledger_exports
WHERE status = @status
ORDER BY requested_at ASC
LIMIT @limit
""";
await using var connection = await _dataSource.OpenConnectionAsync("_system", "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("status", (int)LedgerExportStatus.Pending);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var exports = new List<LedgerExport>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
exports.Add(MapExport(reader));
}
return exports;
}
private static void AddExportParameters(NpgsqlCommand command, LedgerExport export)
{
command.Parameters.AddWithValue("export_id", export.ExportId);
command.Parameters.AddWithValue("tenant_id", export.TenantId);
command.Parameters.AddWithValue("status", (int)export.Status);
command.Parameters.AddWithValue("format", export.Format);
command.Parameters.AddWithValue("start_time", (object?)export.StartTime ?? DBNull.Value);
command.Parameters.AddWithValue("end_time", (object?)export.EndTime ?? DBNull.Value);
command.Parameters.AddWithValue("run_type_filter", (object?)export.RunTypeFilter ?? DBNull.Value);
command.Parameters.AddWithValue("source_id_filter", (object?)export.SourceIdFilter ?? DBNull.Value);
command.Parameters.AddWithValue("entry_count", export.EntryCount);
command.Parameters.AddWithValue("output_uri", (object?)export.OutputUri ?? DBNull.Value);
command.Parameters.AddWithValue("output_digest", (object?)export.OutputDigest ?? DBNull.Value);
command.Parameters.AddWithValue("output_size_bytes", (object?)export.OutputSizeBytes ?? DBNull.Value);
command.Parameters.AddWithValue("requested_by", export.RequestedBy);
command.Parameters.AddWithValue("requested_at", export.RequestedAt);
command.Parameters.AddWithValue("started_at", (object?)export.StartedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)export.CompletedAt ?? DBNull.Value);
command.Parameters.AddWithValue("error_message", (object?)export.ErrorMessage ?? DBNull.Value);
}
private static LedgerExport MapExport(NpgsqlDataReader reader)
{
return new LedgerExport(
ExportId: reader.GetGuid(0),
TenantId: reader.GetString(1),
Status: (LedgerExportStatus)reader.GetInt32(2),
Format: reader.GetString(3),
StartTime: reader.IsDBNull(4) ? null : reader.GetFieldValue<DateTimeOffset>(4),
EndTime: reader.IsDBNull(5) ? null : reader.GetFieldValue<DateTimeOffset>(5),
RunTypeFilter: reader.IsDBNull(6) ? null : reader.GetString(6),
SourceIdFilter: reader.IsDBNull(7) ? null : reader.GetGuid(7),
EntryCount: reader.GetInt32(8),
OutputUri: reader.IsDBNull(9) ? null : reader.GetString(9),
OutputDigest: reader.IsDBNull(10) ? null : reader.GetString(10),
OutputSizeBytes: reader.IsDBNull(11) ? null : reader.GetInt64(11),
RequestedBy: reader.GetString(12),
RequestedAt: reader.GetFieldValue<DateTimeOffset>(13),
StartedAt: reader.IsDBNull(14) ? null : reader.GetFieldValue<DateTimeOffset>(14),
CompletedAt: reader.IsDBNull(15) ? null : reader.GetFieldValue<DateTimeOffset>(15),
ErrorMessage: reader.IsDBNull(16) ? null : reader.GetString(16));
}
}
/// <summary>
/// PostgreSQL implementation of the manifest repository.
/// </summary>
public sealed class PostgresManifestRepository : IManifestRepository
{
private const string SelectManifestColumns = """
manifest_id, schema_version, tenant_id, provenance_type, subject_id, statements,
artifacts, materials, build_info, payload_digest, signature_algorithm, signature,
key_id, created_at, expires_at, metadata
""";
private const string InsertManifestSql = """
INSERT INTO signed_manifests (
manifest_id, schema_version, tenant_id, provenance_type, subject_id, statements,
artifacts, materials, build_info, payload_digest, signature_algorithm, signature,
key_id, created_at, expires_at, metadata)
VALUES (
@manifest_id, @schema_version, @tenant_id, @provenance_type, @subject_id, @statements::jsonb,
@artifacts::jsonb, @materials::jsonb, @build_info::jsonb, @payload_digest, @signature_algorithm, @signature,
@key_id, @created_at, @expires_at, @metadata::jsonb)
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresManifestRepository> _logger;
public PostgresManifestRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresManifestRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<SignedManifest> CreateAsync(SignedManifest manifest, CancellationToken cancellationToken = default)
{
await using var connection = await _dataSource.OpenConnectionAsync(manifest.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertManifestSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("manifest_id", manifest.ManifestId);
command.Parameters.AddWithValue("schema_version", manifest.SchemaVersion);
command.Parameters.AddWithValue("tenant_id", manifest.TenantId);
command.Parameters.AddWithValue("provenance_type", (int)manifest.ProvenanceType);
command.Parameters.AddWithValue("subject_id", manifest.SubjectId);
command.Parameters.AddWithValue("statements", manifest.Statements);
command.Parameters.AddWithValue("artifacts", manifest.Artifacts);
command.Parameters.AddWithValue("materials", manifest.Materials);
command.Parameters.AddWithValue("build_info", (object?)manifest.BuildInfo ?? DBNull.Value);
command.Parameters.AddWithValue("payload_digest", manifest.PayloadDigest);
command.Parameters.AddWithValue("signature_algorithm", manifest.SignatureAlgorithm);
command.Parameters.AddWithValue("signature", manifest.Signature);
command.Parameters.AddWithValue("key_id", manifest.KeyId);
command.Parameters.AddWithValue("created_at", manifest.CreatedAt);
command.Parameters.AddWithValue("expires_at", (object?)manifest.ExpiresAt ?? DBNull.Value);
command.Parameters.AddWithValue("metadata", (object?)manifest.Metadata ?? DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.ManifestCreated(manifest.TenantId, manifest.ProvenanceType.ToString());
_logger.LogDebug("Manifest {ManifestId} created for subject {SubjectId}", manifest.ManifestId, manifest.SubjectId);
return manifest;
}
public async Task<SignedManifest?> GetByIdAsync(string tenantId, Guid manifestId, CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectManifestColumns}
FROM signed_manifests
WHERE tenant_id = @tenant_id AND manifest_id = @manifest_id
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("manifest_id", manifestId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapManifest(reader);
}
public async Task<SignedManifest?> GetBySubjectAsync(
string tenantId,
ProvenanceType provenanceType,
Guid subjectId,
CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectManifestColumns}
FROM signed_manifests
WHERE tenant_id = @tenant_id
AND provenance_type = @provenance_type
AND subject_id = @subject_id
ORDER BY created_at DESC
LIMIT 1
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("provenance_type", (int)provenanceType);
command.Parameters.AddWithValue("subject_id", subjectId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapManifest(reader);
}
public async Task<IReadOnlyList<SignedManifest>> ListAsync(
string tenantId,
ProvenanceType? provenanceType = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
var sb = new StringBuilder($"SELECT {SelectManifestColumns} FROM signed_manifests WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (provenanceType.HasValue)
{
sb.Append(" AND provenance_type = @provenance_type");
parameters.Add(("provenance_type", (int)provenanceType.Value));
}
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sb.ToString(), connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var manifests = new List<SignedManifest>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
manifests.Add(MapManifest(reader));
}
return manifests;
}
public async Task<SignedManifest?> GetByPayloadDigestAsync(
string tenantId,
string payloadDigest,
CancellationToken cancellationToken = default)
{
var sql = $"""
SELECT {SelectManifestColumns}
FROM signed_manifests
WHERE tenant_id = @tenant_id AND payload_digest = @payload_digest
""";
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("payload_digest", payloadDigest);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapManifest(reader);
}
private static SignedManifest MapManifest(NpgsqlDataReader reader)
{
return new SignedManifest(
ManifestId: reader.GetGuid(0),
SchemaVersion: reader.GetString(1),
TenantId: reader.GetString(2),
ProvenanceType: (ProvenanceType)reader.GetInt32(3),
SubjectId: reader.GetGuid(4),
Statements: reader.GetString(5),
Artifacts: reader.GetString(6),
Materials: reader.GetString(7),
BuildInfo: reader.IsDBNull(8) ? null : reader.GetString(8),
PayloadDigest: reader.GetString(9),
SignatureAlgorithm: reader.GetString(10),
Signature: reader.GetString(11),
KeyId: reader.GetString(12),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(13),
ExpiresAt: reader.IsDBNull(14) ? null : reader.GetFieldValue<DateTimeOffset>(14),
Metadata: reader.IsDBNull(15) ? null : reader.GetString(15));
}
}

View File

@@ -0,0 +1,434 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of quota repository.
/// </summary>
public sealed class PostgresQuotaRepository : IQuotaRepository
{
private const string SelectQuotaColumns = """
quota_id, tenant_id, job_type, max_active, max_per_hour, burst_capacity,
refill_rate, current_tokens, last_refill_at, current_active, current_hour_count,
current_hour_start, paused, pause_reason, quota_ticket, created_at, updated_at, updated_by
""";
private const string SelectByIdSql = $"""
SELECT {SelectQuotaColumns}
FROM quotas
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private const string SelectByTenantAndJobTypeSql = $"""
SELECT {SelectQuotaColumns}
FROM quotas
WHERE tenant_id = @tenant_id AND (job_type = @job_type OR (job_type IS NULL AND @job_type IS NULL))
""";
private const string InsertQuotaSql = """
INSERT INTO quotas (
quota_id, tenant_id, job_type, max_active, max_per_hour, burst_capacity,
refill_rate, current_tokens, last_refill_at, current_active, current_hour_count,
current_hour_start, paused, pause_reason, quota_ticket, created_at, updated_at, updated_by)
VALUES (
@quota_id, @tenant_id, @job_type, @max_active, @max_per_hour, @burst_capacity,
@refill_rate, @current_tokens, @last_refill_at, @current_active, @current_hour_count,
@current_hour_start, @paused, @pause_reason, @quota_ticket, @created_at, @updated_at, @updated_by)
""";
private const string UpdateQuotaSql = """
UPDATE quotas
SET job_type = @job_type,
max_active = @max_active,
max_per_hour = @max_per_hour,
burst_capacity = @burst_capacity,
refill_rate = @refill_rate,
current_tokens = @current_tokens,
last_refill_at = @last_refill_at,
current_active = @current_active,
current_hour_count = @current_hour_count,
current_hour_start = @current_hour_start,
paused = @paused,
pause_reason = @pause_reason,
quota_ticket = @quota_ticket,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private const string UpdateStateSql = """
UPDATE quotas
SET current_tokens = @current_tokens,
last_refill_at = @last_refill_at,
current_active = @current_active,
current_hour_count = @current_hour_count,
current_hour_start = @current_hour_start,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private const string PauseQuotaSql = """
UPDATE quotas
SET paused = TRUE,
pause_reason = @pause_reason,
quota_ticket = @quota_ticket,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private const string ResumeQuotaSql = """
UPDATE quotas
SET paused = FALSE,
pause_reason = NULL,
quota_ticket = NULL,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private const string IncrementActiveSql = """
UPDATE quotas
SET current_active = current_active + 1,
updated_at = @updated_at
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private const string DecrementActiveSql = """
UPDATE quotas
SET current_active = GREATEST(current_active - 1, 0),
updated_at = @updated_at
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private const string DeleteQuotaSql = """
DELETE FROM quotas
WHERE tenant_id = @tenant_id AND quota_id = @quota_id
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresQuotaRepository> _logger;
public PostgresQuotaRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresQuotaRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<Quota?> GetByIdAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("quota_id", quotaId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapQuota(reader);
}
public async Task<Quota?> GetByTenantAndJobTypeAsync(string tenantId, string? jobType, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByTenantAndJobTypeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_type", (object?)jobType ?? DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapQuota(reader);
}
public async Task CreateAsync(Quota quota, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(quota.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertQuotaSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddQuotaParameters(command, quota);
try
{
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.QuotaCreated(quota.TenantId, quota.JobType);
}
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
{
_logger.LogWarning("Duplicate quota for tenant {TenantId} job type {JobType}", quota.TenantId, quota.JobType);
throw new DuplicateQuotaException(quota.TenantId, quota.JobType, ex);
}
}
public async Task UpdateAsync(Quota quota, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(quota.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateQuotaSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", quota.TenantId);
command.Parameters.AddWithValue("quota_id", quota.QuotaId);
command.Parameters.AddWithValue("job_type", (object?)quota.JobType ?? DBNull.Value);
command.Parameters.AddWithValue("max_active", quota.MaxActive);
command.Parameters.AddWithValue("max_per_hour", quota.MaxPerHour);
command.Parameters.AddWithValue("burst_capacity", quota.BurstCapacity);
command.Parameters.AddWithValue("refill_rate", quota.RefillRate);
command.Parameters.AddWithValue("current_tokens", quota.CurrentTokens);
command.Parameters.AddWithValue("last_refill_at", quota.LastRefillAt);
command.Parameters.AddWithValue("current_active", quota.CurrentActive);
command.Parameters.AddWithValue("current_hour_count", quota.CurrentHourCount);
command.Parameters.AddWithValue("current_hour_start", quota.CurrentHourStart);
command.Parameters.AddWithValue("paused", quota.Paused);
command.Parameters.AddWithValue("pause_reason", (object?)quota.PauseReason ?? DBNull.Value);
command.Parameters.AddWithValue("quota_ticket", (object?)quota.QuotaTicket ?? DBNull.Value);
command.Parameters.AddWithValue("updated_at", quota.UpdatedAt);
command.Parameters.AddWithValue("updated_by", quota.UpdatedBy);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows == 0)
{
_logger.LogWarning("Quota not found for update: {QuotaId}", quota.QuotaId);
}
}
public async Task UpdateStateAsync(
string tenantId,
Guid quotaId,
double currentTokens,
DateTimeOffset lastRefillAt,
int currentActive,
int currentHourCount,
DateTimeOffset currentHourStart,
string updatedBy,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateStateSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("quota_id", quotaId);
command.Parameters.AddWithValue("current_tokens", currentTokens);
command.Parameters.AddWithValue("last_refill_at", lastRefillAt);
command.Parameters.AddWithValue("current_active", currentActive);
command.Parameters.AddWithValue("current_hour_count", currentHourCount);
command.Parameters.AddWithValue("current_hour_start", currentHourStart);
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
command.Parameters.AddWithValue("updated_by", updatedBy);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task PauseAsync(string tenantId, Guid quotaId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(PauseQuotaSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("quota_id", quotaId);
command.Parameters.AddWithValue("pause_reason", reason);
command.Parameters.AddWithValue("quota_ticket", (object?)ticket ?? DBNull.Value);
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
command.Parameters.AddWithValue("updated_by", updatedBy);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
OrchestratorMetrics.QuotaPaused(tenantId);
}
}
public async Task ResumeAsync(string tenantId, Guid quotaId, string updatedBy, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(ResumeQuotaSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("quota_id", quotaId);
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
command.Parameters.AddWithValue("updated_by", updatedBy);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
OrchestratorMetrics.QuotaResumed(tenantId);
}
}
public async Task IncrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(IncrementActiveSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("quota_id", quotaId);
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task DecrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(DecrementActiveSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("quota_id", quotaId);
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task<IReadOnlyList<Quota>> ListAsync(
string tenantId,
string? jobType,
bool? paused,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, jobType, paused, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var quotas = new List<Quota>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
quotas.Add(MapQuota(reader));
}
return quotas;
}
public async Task<bool> DeleteAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(DeleteQuotaSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("quota_id", quotaId);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
private static void AddQuotaParameters(NpgsqlCommand command, Quota quota)
{
command.Parameters.AddWithValue("quota_id", quota.QuotaId);
command.Parameters.AddWithValue("tenant_id", quota.TenantId);
command.Parameters.AddWithValue("job_type", (object?)quota.JobType ?? DBNull.Value);
command.Parameters.AddWithValue("max_active", quota.MaxActive);
command.Parameters.AddWithValue("max_per_hour", quota.MaxPerHour);
command.Parameters.AddWithValue("burst_capacity", quota.BurstCapacity);
command.Parameters.AddWithValue("refill_rate", quota.RefillRate);
command.Parameters.AddWithValue("current_tokens", quota.CurrentTokens);
command.Parameters.AddWithValue("last_refill_at", quota.LastRefillAt);
command.Parameters.AddWithValue("current_active", quota.CurrentActive);
command.Parameters.AddWithValue("current_hour_count", quota.CurrentHourCount);
command.Parameters.AddWithValue("current_hour_start", quota.CurrentHourStart);
command.Parameters.AddWithValue("paused", quota.Paused);
command.Parameters.AddWithValue("pause_reason", (object?)quota.PauseReason ?? DBNull.Value);
command.Parameters.AddWithValue("quota_ticket", (object?)quota.QuotaTicket ?? DBNull.Value);
command.Parameters.AddWithValue("created_at", quota.CreatedAt);
command.Parameters.AddWithValue("updated_at", quota.UpdatedAt);
command.Parameters.AddWithValue("updated_by", quota.UpdatedBy);
}
private static Quota MapQuota(NpgsqlDataReader reader)
{
return new Quota(
QuotaId: reader.GetGuid(0),
TenantId: reader.GetString(1),
JobType: reader.IsDBNull(2) ? null : reader.GetString(2),
MaxActive: reader.GetInt32(3),
MaxPerHour: reader.GetInt32(4),
BurstCapacity: reader.GetInt32(5),
RefillRate: reader.GetDouble(6),
CurrentTokens: reader.GetDouble(7),
LastRefillAt: reader.GetFieldValue<DateTimeOffset>(8),
CurrentActive: reader.GetInt32(9),
CurrentHourCount: reader.GetInt32(10),
CurrentHourStart: reader.GetFieldValue<DateTimeOffset>(11),
Paused: reader.GetBoolean(12),
PauseReason: reader.IsDBNull(13) ? null : reader.GetString(13),
QuotaTicket: reader.IsDBNull(14) ? null : reader.GetString(14),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(15),
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(16),
UpdatedBy: reader.GetString(17));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
string? jobType,
bool? paused,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectQuotaColumns} FROM quotas WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (jobType is not null)
{
sb.Append(" AND job_type = @job_type");
parameters.Add(("job_type", jobType));
}
if (paused.HasValue)
{
sb.Append(" AND paused = @paused");
parameters.Add(("paused", paused.Value));
}
sb.Append(" ORDER BY job_type NULLS FIRST LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
}
/// <summary>
/// Exception thrown when attempting to create a duplicate quota.
/// </summary>
public sealed class DuplicateQuotaException : Exception
{
public string TenantId { get; }
public string? JobType { get; }
public DuplicateQuotaException(string tenantId, string? jobType, Exception innerException)
: base($"Quota for tenant '{tenantId}' and job type '{jobType ?? "(all)"}' already exists.", innerException)
{
TenantId = tenantId;
JobType = jobType;
}
}

View File

@@ -0,0 +1,199 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.DeadLetter;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of replay audit repository.
/// </summary>
public sealed class PostgresReplayAuditRepository : IReplayAuditRepository
{
private const string SelectAuditColumns = """
audit_id, tenant_id, entry_id, attempt_number,
success, new_job_id, error_message,
triggered_by, triggered_at, completed_at, initiated_by
""";
private const string SelectByEntrySql = $"""
SELECT {SelectAuditColumns}
FROM dead_letter_replay_audit
WHERE tenant_id = @tenant_id AND entry_id = @entry_id
ORDER BY attempt_number ASC
""";
private const string SelectByIdSql = $"""
SELECT {SelectAuditColumns}
FROM dead_letter_replay_audit
WHERE tenant_id = @tenant_id AND audit_id = @audit_id
""";
private const string SelectByNewJobIdSql = $"""
SELECT {SelectAuditColumns}
FROM dead_letter_replay_audit
WHERE tenant_id = @tenant_id AND new_job_id = @new_job_id
""";
private const string InsertAuditSql = """
INSERT INTO dead_letter_replay_audit (
audit_id, tenant_id, entry_id, attempt_number,
success, new_job_id, error_message,
triggered_by, triggered_at, completed_at, initiated_by)
VALUES (
@audit_id, @tenant_id, @entry_id, @attempt_number,
@success, @new_job_id, @error_message,
@triggered_by, @triggered_at, @completed_at, @initiated_by)
""";
private const string UpdateAuditSql = """
UPDATE dead_letter_replay_audit
SET success = @success,
new_job_id = @new_job_id,
error_message = @error_message,
completed_at = @completed_at
WHERE tenant_id = @tenant_id AND audit_id = @audit_id
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresReplayAuditRepository> _logger;
public PostgresReplayAuditRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresReplayAuditRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<IReadOnlyList<ReplayAuditRecord>> GetByEntryAsync(
string tenantId,
Guid entryId,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByEntrySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("entry_id", entryId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var records = new List<ReplayAuditRecord>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
records.Add(MapRecord(reader));
}
return records;
}
public async Task<ReplayAuditRecord?> GetByIdAsync(
string tenantId,
Guid auditId,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("audit_id", auditId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapRecord(reader);
}
public async Task<ReplayAuditRecord?> GetByNewJobIdAsync(
string tenantId,
Guid newJobId,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByNewJobIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("new_job_id", newJobId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapRecord(reader);
}
public async Task CreateAsync(
ReplayAuditRecord record,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(record.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertAuditSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddParameters(command, record);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.DeadLetterReplayAttempted(record.TenantId, record.TriggeredBy);
}
public async Task<bool> UpdateAsync(
ReplayAuditRecord record,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(record.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateAuditSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", record.TenantId);
command.Parameters.AddWithValue("audit_id", record.AuditId);
command.Parameters.AddWithValue("success", record.Success);
command.Parameters.AddWithValue("new_job_id", (object?)record.NewJobId ?? DBNull.Value);
command.Parameters.AddWithValue("error_message", (object?)record.ErrorMessage ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)record.CompletedAt ?? DBNull.Value);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0 && record.Success)
{
OrchestratorMetrics.DeadLetterReplaySucceeded(record.TenantId);
}
else if (rows > 0 && !record.Success)
{
OrchestratorMetrics.DeadLetterReplayFailed(record.TenantId);
}
return rows > 0;
}
private static void AddParameters(NpgsqlCommand command, ReplayAuditRecord record)
{
command.Parameters.AddWithValue("audit_id", record.AuditId);
command.Parameters.AddWithValue("tenant_id", record.TenantId);
command.Parameters.AddWithValue("entry_id", record.EntryId);
command.Parameters.AddWithValue("attempt_number", record.AttemptNumber);
command.Parameters.AddWithValue("success", record.Success);
command.Parameters.AddWithValue("new_job_id", (object?)record.NewJobId ?? DBNull.Value);
command.Parameters.AddWithValue("error_message", (object?)record.ErrorMessage ?? DBNull.Value);
command.Parameters.AddWithValue("triggered_by", record.TriggeredBy);
command.Parameters.AddWithValue("triggered_at", record.TriggeredAt);
command.Parameters.AddWithValue("completed_at", (object?)record.CompletedAt ?? DBNull.Value);
command.Parameters.AddWithValue("initiated_by", record.InitiatedBy);
}
private static ReplayAuditRecord MapRecord(NpgsqlDataReader reader) =>
new(
AuditId: reader.GetGuid(0),
TenantId: reader.GetString(1),
EntryId: reader.GetGuid(2),
AttemptNumber: reader.GetInt32(3),
Success: reader.GetBoolean(4),
NewJobId: reader.IsDBNull(5) ? null : reader.GetGuid(5),
ErrorMessage: reader.IsDBNull(6) ? null : reader.GetString(6),
TriggeredBy: reader.GetString(7),
TriggeredAt: reader.GetFieldValue<DateTimeOffset>(8),
CompletedAt: reader.IsDBNull(9) ? null : reader.GetFieldValue<DateTimeOffset>(9),
InitiatedBy: reader.GetString(10));
}

View File

@@ -0,0 +1,388 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of run repository.
/// </summary>
public sealed class PostgresRunRepository : IRunRepository
{
private const string SelectRunColumns = """
run_id, tenant_id, project_id, source_id, run_type, status, correlation_id,
total_jobs, completed_jobs, succeeded_jobs, failed_jobs, created_at,
started_at, completed_at, created_by, metadata
""";
private const string SelectByIdSql = $"""
SELECT {SelectRunColumns}
FROM runs
WHERE tenant_id = @tenant_id AND run_id = @run_id
""";
private const string InsertRunSql = """
INSERT INTO runs (
run_id, tenant_id, project_id, source_id, run_type, status, correlation_id,
total_jobs, completed_jobs, succeeded_jobs, failed_jobs, created_at,
started_at, completed_at, created_by, metadata)
VALUES (
@run_id, @tenant_id, @project_id, @source_id, @run_type, @status::run_status, @correlation_id,
@total_jobs, @completed_jobs, @succeeded_jobs, @failed_jobs, @created_at,
@started_at, @completed_at, @created_by, @metadata)
""";
private const string UpdateStatusSql = """
UPDATE runs
SET status = @status::run_status,
total_jobs = @total_jobs,
completed_jobs = @completed_jobs,
succeeded_jobs = @succeeded_jobs,
failed_jobs = @failed_jobs,
started_at = @started_at,
completed_at = @completed_at
WHERE tenant_id = @tenant_id AND run_id = @run_id
""";
private const string IncrementJobCountsSql = """
UPDATE runs
SET completed_jobs = completed_jobs + 1,
succeeded_jobs = CASE WHEN @succeeded THEN succeeded_jobs + 1 ELSE succeeded_jobs END,
failed_jobs = CASE WHEN NOT @succeeded THEN failed_jobs + 1 ELSE failed_jobs END,
started_at = COALESCE(started_at, @now),
status = CASE
WHEN completed_jobs + 1 >= total_jobs THEN
CASE
WHEN @succeeded AND (failed_jobs = 0 OR (NOT @succeeded AND failed_jobs + 1 = total_jobs)) THEN 'succeeded'::run_status
WHEN NOT @succeeded AND succeeded_jobs = 0 THEN 'failed'::run_status
ELSE 'partially_succeeded'::run_status
END
ELSE 'running'::run_status
END,
completed_at = CASE WHEN completed_jobs + 1 >= total_jobs THEN @now ELSE completed_at END
WHERE tenant_id = @tenant_id AND run_id = @run_id
RETURNING status
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresRunRepository> _logger;
public PostgresRunRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresRunRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<Run?> GetByIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("run_id", runId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapRun(reader);
}
public async Task CreateAsync(Run run, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(run.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertRunSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddRunParameters(command, run);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.RunCreated(run.TenantId, run.RunType);
}
public async Task UpdateStatusAsync(
string tenantId,
Guid runId,
RunStatus status,
int totalJobs,
int completedJobs,
int succeededJobs,
int failedJobs,
DateTimeOffset? startedAt,
DateTimeOffset? completedAt,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateStatusSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("run_id", runId);
command.Parameters.AddWithValue("status", StatusToString(status));
command.Parameters.AddWithValue("total_jobs", totalJobs);
command.Parameters.AddWithValue("completed_jobs", completedJobs);
command.Parameters.AddWithValue("succeeded_jobs", succeededJobs);
command.Parameters.AddWithValue("failed_jobs", failedJobs);
command.Parameters.AddWithValue("started_at", (object?)startedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)completedAt ?? DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task IncrementJobCountsAsync(
string tenantId,
Guid runId,
bool succeeded,
CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(IncrementJobCountsSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("run_id", runId);
command.Parameters.AddWithValue("succeeded", succeeded);
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var newStatus = reader.GetString(0);
if (newStatus is "succeeded" or "failed" or "partially_succeeded")
{
// Run completed - get the full run for metrics
var run = await GetByIdAsync(tenantId, runId, cancellationToken).ConfigureAwait(false);
if (run is not null)
{
OrchestratorMetrics.RunCompleted(tenantId, run.RunType, newStatus);
}
}
}
}
public async Task<IReadOnlyList<Run>> ListAsync(
string tenantId,
Guid? sourceId,
string? runType,
RunStatus? status,
string? projectId,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, sourceId, runType, status, projectId, createdAfter, createdBefore, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var runs = new List<Run>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
runs.Add(MapRun(reader));
}
return runs;
}
public async Task<int> CountAsync(
string tenantId,
Guid? sourceId,
string? runType,
RunStatus? status,
string? projectId,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildCountQuery(tenantId, sourceId, runType, status, projectId);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt32(result);
}
private static void AddRunParameters(NpgsqlCommand command, Run run)
{
command.Parameters.AddWithValue("run_id", run.RunId);
command.Parameters.AddWithValue("tenant_id", run.TenantId);
command.Parameters.AddWithValue("project_id", (object?)run.ProjectId ?? DBNull.Value);
command.Parameters.AddWithValue("source_id", run.SourceId);
command.Parameters.AddWithValue("run_type", run.RunType);
command.Parameters.AddWithValue("status", StatusToString(run.Status));
command.Parameters.AddWithValue("correlation_id", (object?)run.CorrelationId ?? DBNull.Value);
command.Parameters.AddWithValue("total_jobs", run.TotalJobs);
command.Parameters.AddWithValue("completed_jobs", run.CompletedJobs);
command.Parameters.AddWithValue("succeeded_jobs", run.SucceededJobs);
command.Parameters.AddWithValue("failed_jobs", run.FailedJobs);
command.Parameters.AddWithValue("created_at", run.CreatedAt);
command.Parameters.AddWithValue("started_at", (object?)run.StartedAt ?? DBNull.Value);
command.Parameters.AddWithValue("completed_at", (object?)run.CompletedAt ?? DBNull.Value);
command.Parameters.AddWithValue("created_by", run.CreatedBy);
command.Parameters.Add(new NpgsqlParameter("metadata", NpgsqlDbType.Jsonb)
{
Value = (object?)run.Metadata ?? DBNull.Value
});
}
private static Run MapRun(NpgsqlDataReader reader)
{
return new Run(
RunId: reader.GetGuid(0),
TenantId: reader.GetString(1),
ProjectId: reader.IsDBNull(2) ? null : reader.GetString(2),
SourceId: reader.GetGuid(3),
RunType: reader.GetString(4),
Status: ParseStatus(reader.GetString(5)),
CorrelationId: reader.IsDBNull(6) ? null : reader.GetString(6),
TotalJobs: reader.GetInt32(7),
CompletedJobs: reader.GetInt32(8),
SucceededJobs: reader.GetInt32(9),
FailedJobs: reader.GetInt32(10),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(11),
StartedAt: reader.IsDBNull(12) ? null : reader.GetFieldValue<DateTimeOffset>(12),
CompletedAt: reader.IsDBNull(13) ? null : reader.GetFieldValue<DateTimeOffset>(13),
CreatedBy: reader.GetString(14),
Metadata: reader.IsDBNull(15) ? null : reader.GetString(15));
}
private static string StatusToString(RunStatus status) => status switch
{
RunStatus.Pending => "pending",
RunStatus.Running => "running",
RunStatus.Succeeded => "succeeded",
RunStatus.PartiallySucceeded => "partially_succeeded",
RunStatus.Failed => "failed",
RunStatus.Canceled => "canceled",
_ => throw new ArgumentOutOfRangeException(nameof(status))
};
private static RunStatus ParseStatus(string status) => status switch
{
"pending" => RunStatus.Pending,
"running" => RunStatus.Running,
"succeeded" => RunStatus.Succeeded,
"partially_succeeded" => RunStatus.PartiallySucceeded,
"failed" => RunStatus.Failed,
"canceled" => RunStatus.Canceled,
_ => throw new ArgumentOutOfRangeException(nameof(status))
};
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
Guid? sourceId,
string? runType,
RunStatus? status,
string? projectId,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectRunColumns} FROM runs WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (sourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", sourceId.Value));
}
if (!string.IsNullOrEmpty(runType))
{
sb.Append(" AND run_type = @run_type");
parameters.Add(("run_type", runType));
}
if (status.HasValue)
{
sb.Append(" AND status = @status::run_status");
parameters.Add(("status", StatusToString(status.Value)));
}
if (!string.IsNullOrEmpty(projectId))
{
sb.Append(" AND project_id = @project_id");
parameters.Add(("project_id", projectId));
}
if (createdAfter.HasValue)
{
sb.Append(" AND created_at >= @created_after");
parameters.Add(("created_after", createdAfter.Value));
}
if (createdBefore.HasValue)
{
sb.Append(" AND created_at < @created_before");
parameters.Add(("created_before", createdBefore.Value));
}
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
private static (string sql, List<(string name, object value)> parameters) BuildCountQuery(
string tenantId,
Guid? sourceId,
string? runType,
RunStatus? status,
string? projectId)
{
var sb = new StringBuilder();
sb.Append("SELECT COUNT(*) FROM runs WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (sourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", sourceId.Value));
}
if (!string.IsNullOrEmpty(runType))
{
sb.Append(" AND run_type = @run_type");
parameters.Add(("run_type", runType));
}
if (status.HasValue)
{
sb.Append(" AND status = @status::run_status");
parameters.Add(("status", StatusToString(status.Value)));
}
if (!string.IsNullOrEmpty(projectId))
{
sb.Append(" AND project_id = @project_id");
parameters.Add(("project_id", projectId));
}
return (sb.ToString(), parameters);
}
}

View File

@@ -0,0 +1,314 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of source repository.
/// </summary>
public sealed class PostgresSourceRepository : ISourceRepository
{
private const string SelectSourceColumns = """
source_id, tenant_id, name, source_type, enabled, paused, pause_reason,
pause_ticket, configuration, created_at, updated_at, updated_by
""";
private const string SelectByIdSql = $"""
SELECT {SelectSourceColumns}
FROM sources
WHERE tenant_id = @tenant_id AND source_id = @source_id
""";
private const string SelectByNameSql = $"""
SELECT {SelectSourceColumns}
FROM sources
WHERE tenant_id = @tenant_id AND name = @name
""";
private const string InsertSourceSql = """
INSERT INTO sources (
source_id, tenant_id, name, source_type, enabled, paused, pause_reason,
pause_ticket, configuration, created_at, updated_at, updated_by)
VALUES (
@source_id, @tenant_id, @name, @source_type, @enabled, @paused, @pause_reason,
@pause_ticket, @configuration, @created_at, @updated_at, @updated_by)
""";
private const string UpdateSourceSql = """
UPDATE sources
SET name = @name,
source_type = @source_type,
enabled = @enabled,
paused = @paused,
pause_reason = @pause_reason,
pause_ticket = @pause_ticket,
configuration = @configuration,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND source_id = @source_id
""";
private const string PauseSourceSql = """
UPDATE sources
SET paused = TRUE,
pause_reason = @pause_reason,
pause_ticket = @pause_ticket,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND source_id = @source_id
""";
private const string ResumeSourceSql = """
UPDATE sources
SET paused = FALSE,
pause_reason = NULL,
pause_ticket = NULL,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND source_id = @source_id
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresSourceRepository> _logger;
public PostgresSourceRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresSourceRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<Source?> GetByIdAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapSource(reader);
}
public async Task<Source?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByNameSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("name", name);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapSource(reader);
}
public async Task CreateAsync(Source source, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(source.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertSourceSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddSourceParameters(command, source);
try
{
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.SourceCreated(source.TenantId, source.SourceType);
}
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
{
_logger.LogWarning("Duplicate source name: {Name}", source.Name);
throw new DuplicateSourceException(source.Name, ex);
}
}
public async Task UpdateAsync(Source source, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(source.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateSourceSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", source.TenantId);
command.Parameters.AddWithValue("source_id", source.SourceId);
command.Parameters.AddWithValue("name", source.Name);
command.Parameters.AddWithValue("source_type", source.SourceType);
command.Parameters.AddWithValue("enabled", source.Enabled);
command.Parameters.AddWithValue("paused", source.Paused);
command.Parameters.AddWithValue("pause_reason", (object?)source.PauseReason ?? DBNull.Value);
command.Parameters.AddWithValue("pause_ticket", (object?)source.PauseTicket ?? DBNull.Value);
command.Parameters.Add(new NpgsqlParameter("configuration", NpgsqlDbType.Jsonb)
{
Value = (object?)source.Configuration ?? DBNull.Value
});
command.Parameters.AddWithValue("updated_at", source.UpdatedAt);
command.Parameters.AddWithValue("updated_by", source.UpdatedBy);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows == 0)
{
_logger.LogWarning("Source not found for update: {SourceId}", source.SourceId);
}
}
public async Task PauseAsync(string tenantId, Guid sourceId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(PauseSourceSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
command.Parameters.AddWithValue("pause_reason", reason);
command.Parameters.AddWithValue("pause_ticket", (object?)ticket ?? DBNull.Value);
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
command.Parameters.AddWithValue("updated_by", updatedBy);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
OrchestratorMetrics.SourcePaused(tenantId);
}
}
public async Task ResumeAsync(string tenantId, Guid sourceId, string updatedBy, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(ResumeSourceSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
command.Parameters.AddWithValue("updated_at", DateTimeOffset.UtcNow);
command.Parameters.AddWithValue("updated_by", updatedBy);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
OrchestratorMetrics.SourceResumed(tenantId);
}
}
public async Task<IReadOnlyList<Source>> ListAsync(
string tenantId,
string? sourceType,
bool? enabled,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, sourceType, enabled, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var sources = new List<Source>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
sources.Add(MapSource(reader));
}
return sources;
}
private static void AddSourceParameters(NpgsqlCommand command, Source source)
{
command.Parameters.AddWithValue("source_id", source.SourceId);
command.Parameters.AddWithValue("tenant_id", source.TenantId);
command.Parameters.AddWithValue("name", source.Name);
command.Parameters.AddWithValue("source_type", source.SourceType);
command.Parameters.AddWithValue("enabled", source.Enabled);
command.Parameters.AddWithValue("paused", source.Paused);
command.Parameters.AddWithValue("pause_reason", (object?)source.PauseReason ?? DBNull.Value);
command.Parameters.AddWithValue("pause_ticket", (object?)source.PauseTicket ?? DBNull.Value);
command.Parameters.Add(new NpgsqlParameter("configuration", NpgsqlDbType.Jsonb)
{
Value = (object?)source.Configuration ?? DBNull.Value
});
command.Parameters.AddWithValue("created_at", source.CreatedAt);
command.Parameters.AddWithValue("updated_at", source.UpdatedAt);
command.Parameters.AddWithValue("updated_by", source.UpdatedBy);
}
private static Source MapSource(NpgsqlDataReader reader)
{
return new Source(
SourceId: reader.GetGuid(0),
TenantId: reader.GetString(1),
Name: reader.GetString(2),
SourceType: reader.GetString(3),
Enabled: reader.GetBoolean(4),
Paused: reader.GetBoolean(5),
PauseReason: reader.IsDBNull(6) ? null : reader.GetString(6),
PauseTicket: reader.IsDBNull(7) ? null : reader.GetString(7),
Configuration: reader.IsDBNull(8) ? null : reader.GetString(8),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(9),
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(10),
UpdatedBy: reader.GetString(11));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
string? sourceType,
bool? enabled,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectSourceColumns} FROM sources WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (!string.IsNullOrEmpty(sourceType))
{
sb.Append(" AND source_type = @source_type");
parameters.Add(("source_type", sourceType));
}
if (enabled.HasValue)
{
sb.Append(" AND enabled = @enabled");
parameters.Add(("enabled", enabled.Value));
}
sb.Append(" ORDER BY name LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
}
/// <summary>
/// Exception thrown when attempting to create a source with a duplicate name.
/// </summary>
public sealed class DuplicateSourceException : Exception
{
public string Name { get; }
public DuplicateSourceException(string name, Exception innerException)
: base($"Source with name '{name}' already exists.", innerException)
{
Name = name;
}
}

View File

@@ -0,0 +1,310 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of throttle repository.
/// </summary>
public sealed class PostgresThrottleRepository : IThrottleRepository
{
private const string SelectThrottleColumns = """
throttle_id, tenant_id, source_id, job_type, active, reason, ticket,
created_at, expires_at, created_by
""";
private const string SelectByIdSql = $"""
SELECT {SelectThrottleColumns}
FROM throttles
WHERE tenant_id = @tenant_id AND throttle_id = @throttle_id
""";
private const string SelectActiveBySourceSql = $"""
SELECT {SelectThrottleColumns}
FROM throttles
WHERE tenant_id = @tenant_id
AND source_id = @source_id
AND active = TRUE
AND (expires_at IS NULL OR expires_at > @now)
ORDER BY created_at DESC
""";
private const string SelectActiveByJobTypeSql = $"""
SELECT {SelectThrottleColumns}
FROM throttles
WHERE tenant_id = @tenant_id
AND job_type = @job_type
AND active = TRUE
AND (expires_at IS NULL OR expires_at > @now)
ORDER BY created_at DESC
""";
private const string InsertThrottleSql = """
INSERT INTO throttles (
throttle_id, tenant_id, source_id, job_type, active, reason, ticket,
created_at, expires_at, created_by)
VALUES (
@throttle_id, @tenant_id, @source_id, @job_type, @active, @reason, @ticket,
@created_at, @expires_at, @created_by)
""";
private const string DeactivateSql = """
UPDATE throttles
SET active = FALSE
WHERE tenant_id = @tenant_id AND throttle_id = @throttle_id
""";
private const string DeactivateBySourceSql = """
UPDATE throttles
SET active = FALSE
WHERE tenant_id = @tenant_id AND source_id = @source_id AND active = TRUE
""";
private const string DeactivateByJobTypeSql = """
UPDATE throttles
SET active = FALSE
WHERE tenant_id = @tenant_id AND job_type = @job_type AND active = TRUE
""";
private const string CleanupExpiredSql = """
UPDATE throttles
SET active = FALSE
WHERE active = TRUE AND expires_at IS NOT NULL AND expires_at <= @now
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresThrottleRepository> _logger;
public PostgresThrottleRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresThrottleRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<Throttle?> GetByIdAsync(string tenantId, Guid throttleId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("throttle_id", throttleId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapThrottle(reader);
}
public async Task<IReadOnlyList<Throttle>> GetActiveBySourceAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectActiveBySourceSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var throttles = new List<Throttle>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
throttles.Add(MapThrottle(reader));
}
return throttles;
}
public async Task<IReadOnlyList<Throttle>> GetActiveByJobTypeAsync(string tenantId, string jobType, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectActiveByJobTypeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_type", jobType);
command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var throttles = new List<Throttle>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
throttles.Add(MapThrottle(reader));
}
return throttles;
}
public async Task CreateAsync(Throttle throttle, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(throttle.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertThrottleSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("throttle_id", throttle.ThrottleId);
command.Parameters.AddWithValue("tenant_id", throttle.TenantId);
command.Parameters.AddWithValue("source_id", (object?)throttle.SourceId ?? DBNull.Value);
command.Parameters.AddWithValue("job_type", (object?)throttle.JobType ?? DBNull.Value);
command.Parameters.AddWithValue("active", throttle.Active);
command.Parameters.AddWithValue("reason", throttle.Reason);
command.Parameters.AddWithValue("ticket", (object?)throttle.Ticket ?? DBNull.Value);
command.Parameters.AddWithValue("created_at", throttle.CreatedAt);
command.Parameters.AddWithValue("expires_at", (object?)throttle.ExpiresAt ?? DBNull.Value);
command.Parameters.AddWithValue("created_by", throttle.CreatedBy);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.ThrottleCreated(throttle.TenantId, throttle.Reason);
}
public async Task DeactivateAsync(string tenantId, Guid throttleId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(DeactivateSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("throttle_id", throttleId);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
OrchestratorMetrics.ThrottleDeactivated(tenantId);
}
}
public async Task DeactivateBySourceAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(DeactivateBySourceSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
_logger.LogInformation("Deactivated {Count} throttles for source {SourceId}", rows, sourceId);
}
}
public async Task DeactivateByJobTypeAsync(string tenantId, string jobType, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(DeactivateByJobTypeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_type", jobType);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
_logger.LogInformation("Deactivated {Count} throttles for job type {JobType}", rows, jobType);
}
}
public async Task<int> CleanupExpiredAsync(DateTimeOffset now, CancellationToken cancellationToken)
{
// Use system tenant for cross-tenant cleanup operations
// In production, this should use a dedicated admin connection or be run by a background service
await using var connection = await _dataSource.OpenConnectionAsync("system", "admin", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(CleanupExpiredSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("now", now);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
_logger.LogInformation("Cleaned up {Count} expired throttles", rows);
}
return rows;
}
public async Task<IReadOnlyList<Throttle>> ListAsync(
string tenantId,
bool? active,
Guid? sourceId,
string? jobType,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, active, sourceId, jobType, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var throttles = new List<Throttle>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
throttles.Add(MapThrottle(reader));
}
return throttles;
}
private static Throttle MapThrottle(NpgsqlDataReader reader)
{
return new Throttle(
ThrottleId: reader.GetGuid(0),
TenantId: reader.GetString(1),
SourceId: reader.IsDBNull(2) ? null : reader.GetGuid(2),
JobType: reader.IsDBNull(3) ? null : reader.GetString(3),
Active: reader.GetBoolean(4),
Reason: reader.GetString(5),
Ticket: reader.IsDBNull(6) ? null : reader.GetString(6),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(7),
ExpiresAt: reader.IsDBNull(8) ? null : reader.GetFieldValue<DateTimeOffset>(8),
CreatedBy: reader.GetString(9));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
bool? active,
Guid? sourceId,
string? jobType,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectThrottleColumns} FROM throttles WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (active.HasValue)
{
sb.Append(" AND active = @active");
parameters.Add(("active", active.Value));
}
if (sourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", sourceId.Value));
}
if (!string.IsNullOrEmpty(jobType))
{
sb.Append(" AND job_type = @job_type");
parameters.Add(("job_type", jobType));
}
sb.Append(" ORDER BY created_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
}

View File

@@ -0,0 +1,386 @@
using System.Text;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Orchestrator.Core.Domain;
using StellaOps.Orchestrator.Infrastructure.Repositories;
namespace StellaOps.Orchestrator.Infrastructure.Postgres;
/// <summary>
/// PostgreSQL implementation of watermark repository.
/// </summary>
public sealed class PostgresWatermarkRepository : IWatermarkRepository
{
private const string SelectWatermarkColumns = """
watermark_id, tenant_id, source_id, job_type, scope_key,
high_watermark, low_watermark, sequence_number, processed_count,
last_batch_hash, created_at, updated_at, updated_by
""";
private const string SelectByScopeKeySql = $"""
SELECT {SelectWatermarkColumns}
FROM watermarks
WHERE tenant_id = @tenant_id AND scope_key = @scope_key
""";
private const string SelectBySourceIdSql = $"""
SELECT {SelectWatermarkColumns}
FROM watermarks
WHERE tenant_id = @tenant_id AND source_id = @source_id AND job_type IS NULL
""";
private const string SelectByJobTypeSql = $"""
SELECT {SelectWatermarkColumns}
FROM watermarks
WHERE tenant_id = @tenant_id AND job_type = @job_type AND source_id IS NULL
""";
private const string SelectBySourceAndJobTypeSql = $"""
SELECT {SelectWatermarkColumns}
FROM watermarks
WHERE tenant_id = @tenant_id AND source_id = @source_id AND job_type = @job_type
""";
private const string InsertWatermarkSql = """
INSERT INTO watermarks (
watermark_id, tenant_id, source_id, job_type, scope_key,
high_watermark, low_watermark, sequence_number, processed_count,
last_batch_hash, created_at, updated_at, updated_by)
VALUES (
@watermark_id, @tenant_id, @source_id, @job_type, @scope_key,
@high_watermark, @low_watermark, @sequence_number, @processed_count,
@last_batch_hash, @created_at, @updated_at, @updated_by)
""";
private const string UpdateWatermarkSql = """
UPDATE watermarks
SET high_watermark = @high_watermark,
low_watermark = @low_watermark,
sequence_number = @sequence_number,
processed_count = @processed_count,
last_batch_hash = @last_batch_hash,
updated_at = @updated_at,
updated_by = @updated_by
WHERE tenant_id = @tenant_id AND watermark_id = @watermark_id
AND sequence_number = @expected_sequence_number
""";
private const string UpsertWatermarkSql = """
INSERT INTO watermarks (
watermark_id, tenant_id, source_id, job_type, scope_key,
high_watermark, low_watermark, sequence_number, processed_count,
last_batch_hash, created_at, updated_at, updated_by)
VALUES (
@watermark_id, @tenant_id, @source_id, @job_type, @scope_key,
@high_watermark, @low_watermark, @sequence_number, @processed_count,
@last_batch_hash, @created_at, @updated_at, @updated_by)
ON CONFLICT (tenant_id, scope_key) DO UPDATE
SET high_watermark = EXCLUDED.high_watermark,
low_watermark = EXCLUDED.low_watermark,
sequence_number = EXCLUDED.sequence_number,
processed_count = EXCLUDED.processed_count,
last_batch_hash = EXCLUDED.last_batch_hash,
updated_at = EXCLUDED.updated_at,
updated_by = EXCLUDED.updated_by
""";
private const string DeleteWatermarkSql = """
DELETE FROM watermarks
WHERE tenant_id = @tenant_id AND scope_key = @scope_key
""";
private const string SelectLaggingSql = $"""
SELECT {SelectWatermarkColumns}
FROM watermarks
WHERE tenant_id = @tenant_id
AND high_watermark < @lag_threshold
ORDER BY high_watermark ASC
LIMIT @limit
""";
private readonly OrchestratorDataSource _dataSource;
private readonly ILogger<PostgresWatermarkRepository> _logger;
public PostgresWatermarkRepository(
OrchestratorDataSource dataSource,
ILogger<PostgresWatermarkRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<Watermark?> GetByScopeKeyAsync(string tenantId, string scopeKey, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByScopeKeySql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapWatermark(reader);
}
public async Task<Watermark?> GetBySourceIdAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectBySourceIdSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapWatermark(reader);
}
public async Task<Watermark?> GetByJobTypeAsync(string tenantId, string jobType, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectByJobTypeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("job_type", jobType);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapWatermark(reader);
}
public async Task<Watermark?> GetBySourceAndJobTypeAsync(string tenantId, Guid sourceId, string jobType, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectBySourceAndJobTypeSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("source_id", sourceId);
command.Parameters.AddWithValue("job_type", jobType);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return MapWatermark(reader);
}
public async Task CreateAsync(Watermark watermark, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(watermark.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(InsertWatermarkSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddWatermarkParameters(command, watermark);
try
{
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.WatermarkCreated(watermark.TenantId, watermark.ScopeKey);
}
catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal))
{
_logger.LogWarning("Duplicate watermark for tenant {TenantId} scope {ScopeKey}", watermark.TenantId, watermark.ScopeKey);
throw new DuplicateWatermarkException(watermark.TenantId, watermark.ScopeKey, ex);
}
}
public async Task<bool> UpdateAsync(Watermark watermark, long expectedSequenceNumber, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(watermark.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpdateWatermarkSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", watermark.TenantId);
command.Parameters.AddWithValue("watermark_id", watermark.WatermarkId);
command.Parameters.AddWithValue("high_watermark", watermark.HighWatermark);
command.Parameters.AddWithValue("low_watermark", (object?)watermark.LowWatermark ?? DBNull.Value);
command.Parameters.AddWithValue("sequence_number", watermark.SequenceNumber);
command.Parameters.AddWithValue("processed_count", watermark.ProcessedCount);
command.Parameters.AddWithValue("last_batch_hash", (object?)watermark.LastBatchHash ?? DBNull.Value);
command.Parameters.AddWithValue("updated_at", watermark.UpdatedAt);
command.Parameters.AddWithValue("updated_by", watermark.UpdatedBy);
command.Parameters.AddWithValue("expected_sequence_number", expectedSequenceNumber);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
if (rows > 0)
{
OrchestratorMetrics.WatermarkAdvanced(watermark.TenantId, watermark.ScopeKey);
}
return rows > 0;
}
public async Task UpsertAsync(Watermark watermark, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(watermark.TenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(UpsertWatermarkSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
AddWatermarkParameters(command, watermark);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
OrchestratorMetrics.WatermarkAdvanced(watermark.TenantId, watermark.ScopeKey);
}
public async Task<IReadOnlyList<Watermark>> ListAsync(
string tenantId,
Guid? sourceId,
string? jobType,
int limit,
int offset,
CancellationToken cancellationToken)
{
var (sql, parameters) = BuildListQuery(tenantId, sourceId, jobType, limit, offset);
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(sql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
foreach (var (name, value) in parameters)
{
command.Parameters.AddWithValue(name, value);
}
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var watermarks = new List<Watermark>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
watermarks.Add(MapWatermark(reader));
}
return watermarks;
}
public async Task<IReadOnlyList<Watermark>> GetLaggingAsync(
string tenantId,
TimeSpan lagThreshold,
int limit,
CancellationToken cancellationToken)
{
var thresholdTime = DateTimeOffset.UtcNow - lagThreshold;
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(SelectLaggingSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("lag_threshold", thresholdTime);
command.Parameters.AddWithValue("limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var watermarks = new List<Watermark>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
watermarks.Add(MapWatermark(reader));
}
return watermarks;
}
public async Task<bool> DeleteAsync(string tenantId, string scopeKey, CancellationToken cancellationToken)
{
await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false);
await using var command = new NpgsqlCommand(DeleteWatermarkSql, connection);
command.CommandTimeout = _dataSource.CommandTimeoutSeconds;
command.Parameters.AddWithValue("tenant_id", tenantId);
command.Parameters.AddWithValue("scope_key", scopeKey);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
private static void AddWatermarkParameters(NpgsqlCommand command, Watermark watermark)
{
command.Parameters.AddWithValue("watermark_id", watermark.WatermarkId);
command.Parameters.AddWithValue("tenant_id", watermark.TenantId);
command.Parameters.AddWithValue("source_id", (object?)watermark.SourceId ?? DBNull.Value);
command.Parameters.AddWithValue("job_type", (object?)watermark.JobType ?? DBNull.Value);
command.Parameters.AddWithValue("scope_key", watermark.ScopeKey);
command.Parameters.AddWithValue("high_watermark", watermark.HighWatermark);
command.Parameters.AddWithValue("low_watermark", (object?)watermark.LowWatermark ?? DBNull.Value);
command.Parameters.AddWithValue("sequence_number", watermark.SequenceNumber);
command.Parameters.AddWithValue("processed_count", watermark.ProcessedCount);
command.Parameters.AddWithValue("last_batch_hash", (object?)watermark.LastBatchHash ?? DBNull.Value);
command.Parameters.AddWithValue("created_at", watermark.CreatedAt);
command.Parameters.AddWithValue("updated_at", watermark.UpdatedAt);
command.Parameters.AddWithValue("updated_by", watermark.UpdatedBy);
}
private static Watermark MapWatermark(NpgsqlDataReader reader)
{
return new Watermark(
WatermarkId: reader.GetGuid(0),
TenantId: reader.GetString(1),
SourceId: reader.IsDBNull(2) ? null : reader.GetGuid(2),
JobType: reader.IsDBNull(3) ? null : reader.GetString(3),
ScopeKey: reader.GetString(4),
HighWatermark: reader.GetFieldValue<DateTimeOffset>(5),
LowWatermark: reader.IsDBNull(6) ? null : reader.GetFieldValue<DateTimeOffset>(6),
SequenceNumber: reader.GetInt64(7),
ProcessedCount: reader.GetInt64(8),
LastBatchHash: reader.IsDBNull(9) ? null : reader.GetString(9),
CreatedAt: reader.GetFieldValue<DateTimeOffset>(10),
UpdatedAt: reader.GetFieldValue<DateTimeOffset>(11),
UpdatedBy: reader.GetString(12));
}
private static (string sql, List<(string name, object value)> parameters) BuildListQuery(
string tenantId,
Guid? sourceId,
string? jobType,
int limit,
int offset)
{
var sb = new StringBuilder();
sb.Append($"SELECT {SelectWatermarkColumns} FROM watermarks WHERE tenant_id = @tenant_id");
var parameters = new List<(string, object)> { ("tenant_id", tenantId) };
if (sourceId.HasValue)
{
sb.Append(" AND source_id = @source_id");
parameters.Add(("source_id", sourceId.Value));
}
if (jobType is not null)
{
sb.Append(" AND job_type = @job_type");
parameters.Add(("job_type", jobType));
}
sb.Append(" ORDER BY updated_at DESC LIMIT @limit OFFSET @offset");
parameters.Add(("limit", limit));
parameters.Add(("offset", offset));
return (sb.ToString(), parameters);
}
}
/// <summary>
/// Exception thrown when attempting to create a duplicate watermark.
/// </summary>
public sealed class DuplicateWatermarkException : Exception
{
public string TenantId { get; }
public string ScopeKey { get; }
public DuplicateWatermarkException(string tenantId, string scopeKey, Exception innerException)
: base($"Watermark for tenant '{tenantId}' and scope '{scopeKey}' already exists.", innerException)
{
TenantId = tenantId;
ScopeKey = scopeKey;
}
}

View File

@@ -0,0 +1,61 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for artifact persistence operations.
/// </summary>
public interface IArtifactRepository
{
/// <summary>
/// Gets an artifact by ID.
/// </summary>
Task<Artifact?> GetByIdAsync(string tenantId, Guid artifactId, CancellationToken cancellationToken);
/// <summary>
/// Gets artifacts by job ID.
/// </summary>
Task<IReadOnlyList<Artifact>> GetByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
/// <summary>
/// Gets artifacts by run ID.
/// </summary>
Task<IReadOnlyList<Artifact>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
/// <summary>
/// Gets an artifact by its content digest.
/// </summary>
Task<Artifact?> GetByDigestAsync(string tenantId, string digest, CancellationToken cancellationToken);
/// <summary>
/// Creates a new artifact.
/// </summary>
Task CreateAsync(Artifact artifact, CancellationToken cancellationToken);
/// <summary>
/// Creates multiple artifacts in a batch.
/// </summary>
Task CreateBatchAsync(IEnumerable<Artifact> artifacts, CancellationToken cancellationToken);
/// <summary>
/// Lists artifacts with pagination and filters.
/// </summary>
Task<IReadOnlyList<Artifact>> ListAsync(
string tenantId,
string? artifactType,
string? jobType,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset,
CancellationToken cancellationToken);
/// <summary>
/// Counts artifacts matching the filters.
/// </summary>
Task<int> CountAsync(
string tenantId,
string? artifactType,
string? jobType,
CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,127 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository for audit log entries.
/// </summary>
public interface IAuditRepository
{
/// <summary>
/// Appends a new audit entry to the log.
/// </summary>
Task<AuditEntry> AppendAsync(
string tenantId,
AuditEventType eventType,
string resourceType,
Guid resourceId,
string actorId,
ActorType actorType,
string description,
string? oldState = null,
string? newState = null,
string? actorIp = null,
string? userAgent = null,
string? httpMethod = null,
string? requestPath = null,
string? correlationId = null,
string? metadata = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets an audit entry by ID.
/// </summary>
Task<AuditEntry?> GetByIdAsync(
string tenantId,
Guid entryId,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists audit entries with optional filters.
/// </summary>
Task<IReadOnlyList<AuditEntry>> ListAsync(
string tenantId,
AuditEventType? eventType = null,
string? resourceType = null,
Guid? resourceId = null,
string? actorId = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets audit entries by sequence range.
/// </summary>
Task<IReadOnlyList<AuditEntry>> GetBySequenceRangeAsync(
string tenantId,
long startSequence,
long endSequence,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the latest audit entry for a tenant.
/// </summary>
Task<AuditEntry?> GetLatestAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets audit entries for a specific resource.
/// </summary>
Task<IReadOnlyList<AuditEntry>> GetByResourceAsync(
string tenantId,
string resourceType,
Guid resourceId,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the count of audit entries.
/// </summary>
Task<long> GetCountAsync(
string tenantId,
AuditEventType? eventType = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies the chain integrity for a range of entries.
/// </summary>
Task<ChainVerificationResult> VerifyChainAsync(
string tenantId,
long? startSequence = null,
long? endSequence = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets audit summary statistics.
/// </summary>
Task<AuditSummary> GetSummaryAsync(
string tenantId,
DateTimeOffset? since = null,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of chain verification.
/// </summary>
public sealed record ChainVerificationResult(
bool IsValid,
Guid? InvalidEntryId,
long? InvalidSequence,
string? ErrorMessage);
/// <summary>
/// Audit summary statistics.
/// </summary>
public sealed record AuditSummary(
long TotalEntries,
long EntriesSince,
long EventTypes,
long UniqueActors,
long UniqueResources,
DateTimeOffset? EarliestEntry,
DateTimeOffset? LatestEntry);

View File

@@ -0,0 +1,200 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for backfill request persistence operations.
/// </summary>
public interface IBackfillRepository
{
/// <summary>
/// Gets a backfill request by ID.
/// </summary>
Task<BackfillRequest?> GetByIdAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
/// <summary>
/// Creates a new backfill request.
/// </summary>
Task CreateAsync(BackfillRequest request, CancellationToken cancellationToken);
/// <summary>
/// Updates a backfill request.
/// </summary>
Task UpdateAsync(BackfillRequest request, CancellationToken cancellationToken);
/// <summary>
/// Lists backfill requests with filters.
/// </summary>
Task<IReadOnlyList<BackfillRequest>> ListAsync(
string tenantId,
BackfillStatus? status,
Guid? sourceId,
string? jobType,
int limit,
int offset,
CancellationToken cancellationToken);
/// <summary>
/// Checks for overlapping active backfills.
/// </summary>
Task<bool> HasOverlappingActiveAsync(
string tenantId,
string scopeKey,
DateTimeOffset windowStart,
DateTimeOffset windowEnd,
Guid? excludeBackfillId,
CancellationToken cancellationToken);
/// <summary>
/// Gets running backfills for a scope.
/// </summary>
Task<IReadOnlyList<BackfillRequest>> GetActiveByScope(
string tenantId,
string scopeKey,
CancellationToken cancellationToken);
/// <summary>
/// Counts backfill requests by status.
/// </summary>
Task<IDictionary<BackfillStatus, int>> CountByStatusAsync(
string tenantId,
CancellationToken cancellationToken);
/// <summary>
/// Gets the next backfill ready for processing.
/// </summary>
Task<BackfillRequest?> GetNextPendingAsync(string tenantId, CancellationToken cancellationToken);
}
/// <summary>
/// Repository interface for backfill checkpoint persistence.
/// </summary>
public interface IBackfillCheckpointRepository
{
/// <summary>
/// Gets the latest checkpoint for a backfill.
/// </summary>
Task<BackfillCheckpoint?> GetLatestAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
/// <summary>
/// Gets all checkpoints for a backfill.
/// </summary>
Task<IReadOnlyList<BackfillCheckpoint>> GetAllAsync(string tenantId, Guid backfillId, CancellationToken cancellationToken);
/// <summary>
/// Creates a new checkpoint.
/// </summary>
Task CreateAsync(BackfillCheckpoint checkpoint, CancellationToken cancellationToken);
/// <summary>
/// Updates a checkpoint (e.g., mark complete).
/// </summary>
Task UpdateAsync(BackfillCheckpoint checkpoint, CancellationToken cancellationToken);
}
/// <summary>
/// Represents a backfill processing checkpoint.
/// </summary>
public sealed record BackfillCheckpoint(
/// <summary>Unique checkpoint identifier.</summary>
Guid CheckpointId,
/// <summary>Tenant this checkpoint belongs to.</summary>
string TenantId,
/// <summary>Parent backfill request ID.</summary>
Guid BackfillId,
/// <summary>Batch sequence number.</summary>
int BatchNumber,
/// <summary>Start of batch time window.</summary>
DateTimeOffset BatchStart,
/// <summary>End of batch time window.</summary>
DateTimeOffset BatchEnd,
/// <summary>Total events in batch.</summary>
int EventsInBatch,
/// <summary>Events processed in batch.</summary>
int EventsProcessed,
/// <summary>Events skipped as duplicates.</summary>
int EventsSkipped,
/// <summary>Events that failed processing.</summary>
int EventsFailed,
/// <summary>Hash of the batch for integrity verification.</summary>
string? BatchHash,
/// <summary>When batch processing started.</summary>
DateTimeOffset StartedAt,
/// <summary>When batch processing completed.</summary>
DateTimeOffset? CompletedAt,
/// <summary>Error message if batch failed.</summary>
string? ErrorMessage)
{
/// <summary>
/// Whether this checkpoint is complete.
/// </summary>
public bool IsComplete => CompletedAt.HasValue;
/// <summary>
/// Creates a new checkpoint for a batch.
/// </summary>
public static BackfillCheckpoint Create(
string tenantId,
Guid backfillId,
int batchNumber,
DateTimeOffset batchStart,
DateTimeOffset batchEnd,
int eventsInBatch)
{
return new BackfillCheckpoint(
CheckpointId: Guid.NewGuid(),
TenantId: tenantId,
BackfillId: backfillId,
BatchNumber: batchNumber,
BatchStart: batchStart,
BatchEnd: batchEnd,
EventsInBatch: eventsInBatch,
EventsProcessed: 0,
EventsSkipped: 0,
EventsFailed: 0,
BatchHash: null,
StartedAt: DateTimeOffset.UtcNow,
CompletedAt: null,
ErrorMessage: null);
}
/// <summary>
/// Marks the checkpoint as complete.
/// </summary>
public BackfillCheckpoint Complete(int processed, int skipped, int failed, string? batchHash)
{
return this with
{
EventsProcessed = processed,
EventsSkipped = skipped,
EventsFailed = failed,
BatchHash = batchHash,
CompletedAt = DateTimeOffset.UtcNow
};
}
/// <summary>
/// Marks the checkpoint as failed.
/// </summary>
public BackfillCheckpoint Fail(string error)
{
return this with
{
CompletedAt = DateTimeOffset.UtcNow,
ErrorMessage = error
};
}
}

View File

@@ -0,0 +1,43 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for DAG edge persistence operations.
/// </summary>
public interface IDagEdgeRepository
{
/// <summary>
/// Creates a new DAG edge.
/// </summary>
Task CreateAsync(DagEdge edge, CancellationToken cancellationToken);
/// <summary>
/// Creates multiple DAG edges in a batch.
/// </summary>
Task CreateBatchAsync(IEnumerable<DagEdge> edges, CancellationToken cancellationToken);
/// <summary>
/// Gets all edges for a run.
/// </summary>
Task<IReadOnlyList<DagEdge>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
/// <summary>
/// Gets parent edges (incoming) for a job.
/// </summary>
Task<IReadOnlyList<DagEdge>> GetParentEdgesAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
/// <summary>
/// Gets child edges (outgoing) for a job.
/// </summary>
Task<IReadOnlyList<DagEdge>> GetChildEdgesAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
/// <summary>
/// Checks if all parent dependencies are satisfied for a job.
/// </summary>
/// <param name="tenantId">Tenant ID.</param>
/// <param name="jobId">Job to check dependencies for.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if all dependencies are satisfied.</returns>
Task<bool> AreDependenciesSatisfiedAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,29 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for job history persistence operations.
/// </summary>
public interface IJobHistoryRepository
{
/// <summary>
/// Appends a history entry for a job state change.
/// </summary>
Task AppendAsync(JobHistory history, CancellationToken cancellationToken);
/// <summary>
/// Gets the history for a job.
/// </summary>
Task<IReadOnlyList<JobHistory>> GetByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
/// <summary>
/// Gets the latest history entry for a job.
/// </summary>
Task<JobHistory?> GetLatestByJobIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
/// <summary>
/// Gets the next sequence number for a job's history.
/// </summary>
Task<int> GetNextSequenceNoAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,100 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for job persistence operations.
/// </summary>
public interface IJobRepository
{
/// <summary>
/// Gets a job by ID.
/// </summary>
Task<Job?> GetByIdAsync(string tenantId, Guid jobId, CancellationToken cancellationToken);
/// <summary>
/// Gets a job by idempotency key.
/// </summary>
Task<Job?> GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken);
/// <summary>
/// Creates a new job.
/// </summary>
Task CreateAsync(Job job, CancellationToken cancellationToken);
/// <summary>
/// Updates a job's status and related fields.
/// </summary>
Task UpdateStatusAsync(
string tenantId,
Guid jobId,
JobStatus status,
int attempt,
Guid? leaseId,
string? workerId,
string? taskRunnerId,
DateTimeOffset? leaseUntil,
DateTimeOffset? scheduledAt,
DateTimeOffset? leasedAt,
DateTimeOffset? completedAt,
DateTimeOffset? notBefore,
string? reason,
CancellationToken cancellationToken);
/// <summary>
/// Acquires a lease on a pending/scheduled job for worker execution.
/// </summary>
/// <returns>The leased job, or null if no jobs available.</returns>
Task<Job?> LeaseNextAsync(
string tenantId,
string? jobType,
Guid leaseId,
string workerId,
DateTimeOffset leaseUntil,
CancellationToken cancellationToken);
/// <summary>
/// Extends an existing lease.
/// </summary>
/// <returns>True if lease was extended, false if lease not found or expired.</returns>
Task<bool> ExtendLeaseAsync(
string tenantId,
Guid jobId,
Guid leaseId,
DateTimeOffset newLeaseUntil,
CancellationToken cancellationToken);
/// <summary>
/// Gets jobs by run ID.
/// </summary>
Task<IReadOnlyList<Job>> GetByRunIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
/// <summary>
/// Gets jobs with expired leases.
/// </summary>
Task<IReadOnlyList<Job>> GetExpiredLeasesAsync(string tenantId, DateTimeOffset cutoff, int limit, CancellationToken cancellationToken);
/// <summary>
/// Lists jobs with pagination and filters.
/// </summary>
Task<IReadOnlyList<Job>> ListAsync(
string tenantId,
JobStatus? status,
string? jobType,
string? projectId,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset,
CancellationToken cancellationToken);
/// <summary>
/// Counts jobs matching the filters.
/// </summary>
Task<int> CountAsync(
string tenantId,
JobStatus? status,
string? jobType,
string? projectId,
CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,210 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository for run ledger entries.
/// </summary>
public interface ILedgerRepository
{
/// <summary>
/// Appends a new ledger entry from a completed run.
/// </summary>
Task<RunLedgerEntry> AppendAsync(
Run run,
IReadOnlyList<Artifact> artifacts,
string inputDigest,
string? metadata = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a ledger entry by ID.
/// </summary>
Task<RunLedgerEntry?> GetByIdAsync(
string tenantId,
Guid ledgerId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a ledger entry by run ID.
/// </summary>
Task<RunLedgerEntry?> GetByRunIdAsync(
string tenantId,
Guid runId,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists ledger entries with optional filters.
/// </summary>
Task<IReadOnlyList<RunLedgerEntry>> ListAsync(
string tenantId,
string? runType = null,
Guid? sourceId = null,
RunStatus? finalStatus = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets ledger entries by sequence range.
/// </summary>
Task<IReadOnlyList<RunLedgerEntry>> GetBySequenceRangeAsync(
string tenantId,
long startSequence,
long endSequence,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the latest ledger entry for a tenant.
/// </summary>
Task<RunLedgerEntry?> GetLatestAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets ledger entries for a specific source.
/// </summary>
Task<IReadOnlyList<RunLedgerEntry>> GetBySourceAsync(
string tenantId,
Guid sourceId,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the count of ledger entries.
/// </summary>
Task<long> GetCountAsync(
string tenantId,
string? runType = null,
Guid? sourceId = null,
DateTimeOffset? startTime = null,
DateTimeOffset? endTime = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies the chain integrity for a range of entries.
/// </summary>
Task<ChainVerificationResult> VerifyChainAsync(
string tenantId,
long? startSequence = null,
long? endSequence = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets ledger summary statistics.
/// </summary>
Task<LedgerSummary> GetSummaryAsync(
string tenantId,
DateTimeOffset? since = null,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Ledger summary statistics.
/// </summary>
public sealed record LedgerSummary(
long TotalEntries,
long EntriesSince,
long TotalRuns,
long SuccessfulRuns,
long FailedRuns,
long TotalJobs,
long UniqueSources,
long UniqueRunTypes,
DateTimeOffset? EarliestEntry,
DateTimeOffset? LatestEntry);
/// <summary>
/// Repository for ledger exports.
/// </summary>
public interface ILedgerExportRepository
{
/// <summary>
/// Creates a new export request.
/// </summary>
Task<LedgerExport> CreateAsync(
LedgerExport export,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets an export by ID.
/// </summary>
Task<LedgerExport?> GetByIdAsync(
string tenantId,
Guid exportId,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists exports for a tenant.
/// </summary>
Task<IReadOnlyList<LedgerExport>> ListAsync(
string tenantId,
LedgerExportStatus? status = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates an export.
/// </summary>
Task<LedgerExport> UpdateAsync(
LedgerExport export,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets pending exports.
/// </summary>
Task<IReadOnlyList<LedgerExport>> GetPendingAsync(
int limit = 10,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Repository for signed manifests.
/// </summary>
public interface IManifestRepository
{
/// <summary>
/// Creates a new manifest.
/// </summary>
Task<SignedManifest> CreateAsync(
SignedManifest manifest,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a manifest by ID.
/// </summary>
Task<SignedManifest?> GetByIdAsync(
string tenantId,
Guid manifestId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a manifest by subject.
/// </summary>
Task<SignedManifest?> GetBySubjectAsync(
string tenantId,
ProvenanceType provenanceType,
Guid subjectId,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists manifests for a tenant.
/// </summary>
Task<IReadOnlyList<SignedManifest>> ListAsync(
string tenantId,
ProvenanceType? provenanceType = null,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a manifest by payload digest.
/// </summary>
Task<SignedManifest?> GetByPayloadDigestAsync(
string tenantId,
string payloadDigest,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,79 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for quota persistence operations.
/// </summary>
public interface IQuotaRepository
{
/// <summary>
/// Gets a quota by ID.
/// </summary>
Task<Quota?> GetByIdAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
/// <summary>
/// Gets the quota for a tenant and optional job type.
/// </summary>
Task<Quota?> GetByTenantAndJobTypeAsync(string tenantId, string? jobType, CancellationToken cancellationToken);
/// <summary>
/// Creates a new quota.
/// </summary>
Task CreateAsync(Quota quota, CancellationToken cancellationToken);
/// <summary>
/// Updates a quota (including token/counter state).
/// </summary>
Task UpdateAsync(Quota quota, CancellationToken cancellationToken);
/// <summary>
/// Pauses a quota with reason.
/// </summary>
Task PauseAsync(string tenantId, Guid quotaId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken);
/// <summary>
/// Resumes a paused quota.
/// </summary>
Task ResumeAsync(string tenantId, Guid quotaId, string updatedBy, CancellationToken cancellationToken);
/// <summary>
/// Updates the rate limiter state (tokens, counters) without changing configuration.
/// </summary>
Task UpdateStateAsync(
string tenantId,
Guid quotaId,
double currentTokens,
DateTimeOffset lastRefillAt,
int currentActive,
int currentHourCount,
DateTimeOffset currentHourStart,
string updatedBy,
CancellationToken cancellationToken);
/// <summary>
/// Increments the current active count.
/// </summary>
Task IncrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
/// <summary>
/// Decrements the current active count.
/// </summary>
Task DecrementActiveAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
/// <summary>
/// Lists quotas for a tenant with pagination.
/// </summary>
Task<IReadOnlyList<Quota>> ListAsync(
string tenantId,
string? jobType,
bool? paused,
int limit,
int offset,
CancellationToken cancellationToken);
/// <summary>
/// Deletes a quota.
/// </summary>
Task<bool> DeleteAsync(string tenantId, Guid quotaId, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,69 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for run persistence operations.
/// </summary>
public interface IRunRepository
{
/// <summary>
/// Gets a run by ID.
/// </summary>
Task<Run?> GetByIdAsync(string tenantId, Guid runId, CancellationToken cancellationToken);
/// <summary>
/// Creates a new run.
/// </summary>
Task CreateAsync(Run run, CancellationToken cancellationToken);
/// <summary>
/// Updates run status and job counts.
/// </summary>
Task UpdateStatusAsync(
string tenantId,
Guid runId,
RunStatus status,
int totalJobs,
int completedJobs,
int succeededJobs,
int failedJobs,
DateTimeOffset? startedAt,
DateTimeOffset? completedAt,
CancellationToken cancellationToken);
/// <summary>
/// Increments job counters when a job completes.
/// </summary>
Task IncrementJobCountsAsync(
string tenantId,
Guid runId,
bool succeeded,
CancellationToken cancellationToken);
/// <summary>
/// Lists runs with pagination and filters.
/// </summary>
Task<IReadOnlyList<Run>> ListAsync(
string tenantId,
Guid? sourceId,
string? runType,
RunStatus? status,
string? projectId,
DateTimeOffset? createdAfter,
DateTimeOffset? createdBefore,
int limit,
int offset,
CancellationToken cancellationToken);
/// <summary>
/// Counts runs matching the filters.
/// </summary>
Task<int> CountAsync(
string tenantId,
Guid? sourceId,
string? runType,
RunStatus? status,
string? projectId,
CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,50 @@
using StellaOps.Orchestrator.Core.Domain;
namespace StellaOps.Orchestrator.Infrastructure.Repositories;
/// <summary>
/// Repository interface for source persistence operations.
/// </summary>
public interface ISourceRepository
{
/// <summary>
/// Gets a source by ID.
/// </summary>
Task<Source?> GetByIdAsync(string tenantId, Guid sourceId, CancellationToken cancellationToken);
/// <summary>
/// Gets a source by name.
/// </summary>
Task<Source?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken);
/// <summary>
/// Creates a new source.
/// </summary>
Task CreateAsync(Source source, CancellationToken cancellationToken);
/// <summary>
/// Updates a source.
/// </summary>
Task UpdateAsync(Source source, CancellationToken cancellationToken);
/// <summary>
/// Pauses a source with reason.
/// </summary>
Task PauseAsync(string tenantId, Guid sourceId, string reason, string? ticket, string updatedBy, CancellationToken cancellationToken);
/// <summary>
/// Resumes a paused source.
/// </summary>
Task ResumeAsync(string tenantId, Guid sourceId, string updatedBy, CancellationToken cancellationToken);
/// <summary>
/// Lists sources with pagination.
/// </summary>
Task<IReadOnlyList<Source>> ListAsync(
string tenantId,
string? sourceType,
bool? enabled,
int limit,
int offset,
CancellationToken cancellationToken);
}

Some files were not shown because too many files have changed in this diff Show More