Files
git.stella-ops.org/docs/router/15-Step.md
master 75f6942769
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Add integration tests for migration categories and execution
- Implemented MigrationCategoryTests to validate migration categorization for startup, release, seed, and data migrations.
- Added tests for edge cases, including null, empty, and whitespace migration names.
- Created StartupMigrationHostTests to verify the behavior of the migration host with real PostgreSQL instances using Testcontainers.
- Included tests for migration execution, schema creation, and handling of pending release migrations.
- Added SQL migration files for testing: creating a test table, adding a column, a release migration, and seeding data.
2025-12-04 19:10:54 +02:00

39 KiB

Step 15: TLS Transport Implementation

Phase 3: Transport Layer Estimated Complexity: Medium Dependencies: Step 14 (TCP Transport)


Overview

The TLS transport wraps TCP with mutual TLS (mTLS) authentication for secure microservice-to-gateway communication. It provides encryption, server/client authentication, and certificate-based identity for production deployments.


Goals

  1. Add TLS encryption layer on top of TCP transport
  2. Support mutual TLS (mTLS) for bidirectional authentication
  3. Support certificate rotation without service restart
  4. Integrate with platform certificate stores and custom CAs
  5. Provide clear certificate validation error messages

Core Architecture

┌──────────────────────────────────────────────────────────────┐
│                     TLS Transport Stack                       │
├──────────────────────────────────────────────────────────────┤
│                                                               │
│   ┌─────────────┐                    ┌─────────────┐         │
│   │ Microservice│                    │   Gateway   │         │
│   │   Client    │◄──── mTLS ────────►│   Server    │         │
│   └──────┬──────┘                    └──────┬──────┘         │
│          │                                  │                 │
│   ┌──────▼──────┐                    ┌──────▼──────┐         │
│   │  SslStream  │                    │  SslStream  │         │
│   └──────┬──────┘                    └──────┬──────┘         │
│          │                                  │                 │
│   ┌──────▼──────┐                    ┌──────▼──────┐         │
│   │   Socket    │                    │   Socket    │         │
│   └─────────────┘                    └─────────────┘         │
│                                                               │
└──────────────────────────────────────────────────────────────┘

Configuration

namespace StellaOps.Router.Transport.Tls;

public class TlsTransportConfig : TcpTransportConfig
{
    /// <summary>Path to server certificate PFX/P12 file.</summary>
    public string? CertificatePath { get; set; }

    /// <summary>Password for the certificate file.</summary>
    public string? CertificatePassword { get; set; }

    /// <summary>Thumbprint to load certificate from Windows certificate store.</summary>
    public string? CertificateThumbprint { get; set; }

    /// <summary>Store name for certificate lookup.</summary>
    public string CertificateStoreName { get; set; } = "My";

    /// <summary>Store location for certificate lookup.</summary>
    public string CertificateStoreLocation { get; set; } = "CurrentUser";

    /// <summary>Whether to require client certificates (mTLS).</summary>
    public bool RequireClientCertificate { get; set; } = true;

    /// <summary>Path to CA certificate for client validation.</summary>
    public string? ClientCaCertificatePath { get; set; }

    /// <summary>Allowed TLS protocols.</summary>
    public SslProtocols AllowedProtocols { get; set; } = SslProtocols.Tls12 | SslProtocols.Tls13;

    /// <summary>Certificate revocation check mode.</summary>
    public X509RevocationMode RevocationMode { get; set; } = X509RevocationMode.Online;

    /// <summary>Whether to allow untrusted root certificates (dev only).</summary>
    public bool AllowUntrustedRootCertificates { get; set; } = false;
}

public class TlsClientConfig : TcpClientConfig
{
    /// <summary>Path to client certificate PFX/P12 file.</summary>
    public string? ClientCertificatePath { get; set; }

    /// <summary>Password for the client certificate file.</summary>
    public string? ClientCertificatePassword { get; set; }

    /// <summary>Thumbprint to load client certificate from store.</summary>
    public string? ClientCertificateThumbprint { get; set; }

    /// <summary>Expected server certificate CN/SAN for validation.</summary>
    public string? ExpectedServerName { get; set; }

    /// <summary>Path to CA certificate for server validation.</summary>
    public string? ServerCaCertificatePath { get; set; }

    /// <summary>Whether to skip server certificate validation (dev only).</summary>
    public bool SkipServerCertificateValidation { get; set; } = false;
}

Certificate Provider

namespace StellaOps.Router.Transport.Tls;

/// <summary>
/// Provides certificates for TLS connections with hot-reload support.
/// </summary>
public interface ICertificateProvider
{
    /// <summary>Gets the current server certificate.</summary>
    X509Certificate2? GetServerCertificate();

    /// <summary>Gets the current client certificate.</summary>
    X509Certificate2? GetClientCertificate();

    /// <summary>Gets CA certificates for validation.</summary>
    X509Certificate2Collection GetCaCertificates();

    /// <summary>Event raised when certificates are reloaded.</summary>
    event Action? CertificatesReloaded;
}

public sealed class CertificateProvider : ICertificateProvider, IDisposable
{
    private readonly TlsTransportConfig _serverConfig;
    private readonly TlsClientConfig? _clientConfig;
    private readonly ILogger<CertificateProvider> _logger;
    private readonly FileSystemWatcher? _fileWatcher;
    private X509Certificate2? _serverCertificate;
    private X509Certificate2? _clientCertificate;
    private X509Certificate2Collection _caCertificates = new();

    public event Action? CertificatesReloaded;

    public CertificateProvider(
        IOptions<TlsTransportConfig> serverConfig,
        IOptions<TlsClientConfig>? clientConfig,
        ILogger<CertificateProvider> logger)
    {
        _serverConfig = serverConfig.Value;
        _clientConfig = clientConfig?.Value;
        _logger = logger;

        LoadCertificates();

        // Watch for certificate file changes
        if (!string.IsNullOrEmpty(_serverConfig.CertificatePath))
        {
            var dir = Path.GetDirectoryName(_serverConfig.CertificatePath);
            if (dir != null && Directory.Exists(dir))
            {
                _fileWatcher = new FileSystemWatcher(dir)
                {
                    Filter = "*.pfx",
                    NotifyFilter = NotifyFilters.LastWrite | NotifyFilters.CreationTime
                };
                _fileWatcher.Changed += OnCertificateFileChanged;
                _fileWatcher.EnableRaisingEvents = true;
            }
        }
    }

    private void LoadCertificates()
    {
        try
        {
            // Load server certificate
            _serverCertificate = LoadCertificate(
                _serverConfig.CertificatePath,
                _serverConfig.CertificatePassword,
                _serverConfig.CertificateThumbprint,
                _serverConfig.CertificateStoreName,
                _serverConfig.CertificateStoreLocation);

            if (_serverCertificate != null)
            {
                _logger.LogInformation(
                    "Loaded server certificate: Subject={Subject}, Expires={Expires}",
                    _serverCertificate.Subject,
                    _serverCertificate.NotAfter);
            }

            // Load client certificate
            if (_clientConfig != null)
            {
                _clientCertificate = LoadCertificate(
                    _clientConfig.ClientCertificatePath,
                    _clientConfig.ClientCertificatePassword,
                    _clientConfig.ClientCertificateThumbprint,
                    "My",
                    "CurrentUser");
            }

            // Load CA certificates
            _caCertificates = new X509Certificate2Collection();

            if (!string.IsNullOrEmpty(_serverConfig.ClientCaCertificatePath) &&
                File.Exists(_serverConfig.ClientCaCertificatePath))
            {
                _caCertificates.Add(new X509Certificate2(_serverConfig.ClientCaCertificatePath));
            }

            if (_clientConfig != null &&
                !string.IsNullOrEmpty(_clientConfig.ServerCaCertificatePath) &&
                File.Exists(_clientConfig.ServerCaCertificatePath))
            {
                _caCertificates.Add(new X509Certificate2(_clientConfig.ServerCaCertificatePath));
            }
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Failed to load certificates");
            throw;
        }
    }

    private X509Certificate2? LoadCertificate(
        string? path,
        string? password,
        string? thumbprint,
        string storeName,
        string storeLocation)
    {
        // Try file-based certificate first
        if (!string.IsNullOrEmpty(path) && File.Exists(path))
        {
            return new X509Certificate2(
                path,
                password,
                X509KeyStorageFlags.MachineKeySet | X509KeyStorageFlags.PersistKeySet);
        }

        // Try certificate store
        if (!string.IsNullOrEmpty(thumbprint))
        {
            using var store = new X509Store(
                Enum.Parse<StoreName>(storeName),
                Enum.Parse<StoreLocation>(storeLocation));

            store.Open(OpenFlags.ReadOnly);

            var certs = store.Certificates.Find(
                X509FindType.FindByThumbprint,
                thumbprint,
                validOnly: false);

            return certs.Count > 0 ? certs[0] : null;
        }

        return null;
    }

    private void OnCertificateFileChanged(object sender, FileSystemEventArgs e)
    {
        _logger.LogInformation("Certificate file changed, reloading: {Path}", e.FullPath);

        try
        {
            // Small delay to ensure file is fully written
            Thread.Sleep(500);
            LoadCertificates();
            CertificatesReloaded?.Invoke();
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Failed to reload certificates");
        }
    }

    public X509Certificate2? GetServerCertificate() => _serverCertificate;
    public X509Certificate2? GetClientCertificate() => _clientCertificate;
    public X509Certificate2Collection GetCaCertificates() => _caCertificates;

    public void Dispose()
    {
        _fileWatcher?.Dispose();
        _serverCertificate?.Dispose();
        _clientCertificate?.Dispose();
    }
}

TLS Connection Wrapper

namespace StellaOps.Router.Transport.Tls;

/// <summary>
/// TLS-wrapped frame connection.
/// </summary>
public sealed class TlsFrameConnection : IAsyncDisposable
{
    private readonly Socket _socket;
    private readonly SslStream _sslStream;
    private readonly TcpFrameCodec _codec;
    private readonly ILogger _logger;
    private readonly SemaphoreSlim _writeLock = new(1, 1);
    private readonly byte[] _readBuffer;
    private readonly byte[] _writeBuffer;
    private int _readBufferOffset;
    private int _readBufferCount;

    public string ConnectionId { get; }
    public EndPoint? RemoteEndPoint => _socket.RemoteEndPoint;
    public bool IsConnected => _socket.Connected;
    public string? RemoteCertificateSubject { get; private set; }
    public string? RemoteCertificateThumbprint { get; private set; }

    public TlsFrameConnection(
        Socket socket,
        SslStream sslStream,
        TcpFrameCodec codec,
        ILogger logger)
    {
        _socket = socket;
        _sslStream = sslStream;
        _codec = codec;
        _logger = logger;
        _readBuffer = new byte[64 * 1024];
        _writeBuffer = new byte[64 * 1024];
        ConnectionId = Guid.NewGuid().ToString("N");

        // Extract remote certificate info
        if (_sslStream.RemoteCertificate != null)
        {
            var cert = new X509Certificate2(_sslStream.RemoteCertificate);
            RemoteCertificateSubject = cert.Subject;
            RemoteCertificateThumbprint = cert.Thumbprint;
        }
    }

    public async ValueTask SendAsync(Frame frame, CancellationToken cancellationToken)
    {
        await _writeLock.WaitAsync(cancellationToken);
        try
        {
            var size = _codec.Encode(frame, _writeBuffer);
            await _sslStream.WriteAsync(_writeBuffer.AsMemory(0, size), cancellationToken);
            await _sslStream.FlushAsync(cancellationToken);
        }
        finally
        {
            _writeLock.Release();
        }
    }

    public async ValueTask<Frame> ReceiveAsync(CancellationToken cancellationToken)
    {
        while (true)
        {
            // Try to decode from existing buffer
            if (_readBufferCount >= 24)
            {
                var span = new ReadOnlySpan<byte>(_readBuffer, _readBufferOffset, _readBufferCount);

                if (span.Length >= 8)
                {
                    var payloadLength = BinaryPrimitives.ReadUInt32BigEndian(span[4..]);
                    var totalLength = 24 + (int)payloadLength;

                    if (span.Length >= totalLength)
                    {
                        var frame = _codec.Decode(span[..totalLength]);
                        _readBufferOffset += totalLength;
                        _readBufferCount -= totalLength;

                        if (_readBufferOffset > _readBuffer.Length / 2)
                        {
                            Buffer.BlockCopy(_readBuffer, _readBufferOffset, _readBuffer, 0, _readBufferCount);
                            _readBufferOffset = 0;
                        }

                        return frame;
                    }
                }
            }

            if (_readBufferOffset + _readBufferCount >= _readBuffer.Length)
            {
                Buffer.BlockCopy(_readBuffer, _readBufferOffset, _readBuffer, 0, _readBufferCount);
                _readBufferOffset = 0;
            }

            var bytesRead = await _sslStream.ReadAsync(
                _readBuffer.AsMemory(_readBufferOffset + _readBufferCount),
                cancellationToken);

            if (bytesRead == 0)
            {
                throw new EndOfStreamException("TLS connection closed by remote");
            }

            _readBufferCount += bytesRead;
        }
    }

    public async IAsyncEnumerable<Frame> ReceiveAllAsync(
        [EnumeratorCancellation] CancellationToken cancellationToken)
    {
        while (!cancellationToken.IsCancellationRequested)
        {
            Frame frame;
            try
            {
                frame = await ReceiveAsync(cancellationToken);
            }
            catch (EndOfStreamException)
            {
                yield break;
            }
            catch (OperationCanceledException)
            {
                yield break;
            }
            catch (IOException ex) when (ex.InnerException is SocketException)
            {
                yield break;
            }

            yield return frame;
        }
    }

    public async ValueTask DisposeAsync()
    {
        _writeLock.Dispose();
        await _sslStream.DisposeAsync();
        _socket.Dispose();
    }
}

Gateway TLS Server

namespace StellaOps.Router.Transport.Tls;

/// <summary>
/// TLS-enabled transport server for the gateway.
/// </summary>
public sealed class TlsTransportServer : IHostedService
{
    private readonly TlsTransportConfig _config;
    private readonly ICertificateProvider _certificateProvider;
    private readonly TcpFrameCodec _codec;
    private readonly IGlobalRoutingState _routingState;
    private readonly IPayloadSerializer _serializer;
    private readonly ILogger<TlsTransportServer> _logger;
    private Socket? _listener;
    private CancellationTokenSource? _cts;
    private readonly ConcurrentDictionary<string, TlsMicroserviceConnection> _connections = new();

    public TlsTransportServer(
        IOptions<TlsTransportConfig> config,
        ICertificateProvider certificateProvider,
        TcpFrameCodec codec,
        IGlobalRoutingState routingState,
        IPayloadSerializer serializer,
        ILogger<TlsTransportServer> logger)
    {
        _config = config.Value;
        _certificateProvider = certificateProvider;
        _codec = codec;
        _routingState = routingState;
        _serializer = serializer;
        _logger = logger;
    }

    public async Task StartAsync(CancellationToken cancellationToken)
    {
        var serverCert = _certificateProvider.GetServerCertificate();
        if (serverCert == null)
        {
            throw new InvalidOperationException("Server certificate not configured");
        }

        _cts = new CancellationTokenSource();

        _listener = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
        _listener.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, true);
        _listener.Bind(new IPEndPoint(IPAddress.Parse(_config.ListenAddress), _config.Port));
        _listener.Listen(_config.Backlog);

        _logger.LogInformation(
            "TLS transport server listening on {Address}:{Port}",
            _config.ListenAddress, _config.Port);

        _ = AcceptConnectionsAsync(_cts.Token);
    }

    private async Task AcceptConnectionsAsync(CancellationToken cancellationToken)
    {
        while (!cancellationToken.IsCancellationRequested)
        {
            try
            {
                var socket = await _listener!.AcceptAsync(cancellationToken);
                _logger.LogDebug("Accepted TLS connection from {RemoteEndPoint}", socket.RemoteEndPoint);

                _ = HandleConnectionAsync(socket, cancellationToken);
            }
            catch (OperationCanceledException)
            {
                break;
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Error accepting TLS connection");
            }
        }
    }

    private async Task HandleConnectionAsync(Socket socket, CancellationToken cancellationToken)
    {
        SslStream? sslStream = null;

        try
        {
            var networkStream = new NetworkStream(socket, ownsSocket: false);
            sslStream = new SslStream(
                networkStream,
                leaveInnerStreamOpen: false,
                ValidateClientCertificate);

            var serverCert = _certificateProvider.GetServerCertificate()!;

            var authOptions = new SslServerAuthenticationOptions
            {
                ServerCertificate = serverCert,
                ClientCertificateRequired = _config.RequireClientCertificate,
                EnabledSslProtocols = _config.AllowedProtocols,
                CertificateRevocationCheckMode = _config.RevocationMode
            };

            await sslStream.AuthenticateAsServerAsync(authOptions, cancellationToken);

            _logger.LogDebug(
                "TLS handshake complete: Protocol={Protocol}, Cipher={Cipher}",
                sslStream.SslProtocol,
                sslStream.CipherAlgorithm);

            var connection = new TlsFrameConnection(socket, sslStream, _codec, _logger);

            // Wait for HELLO frame
            var helloFrame = await connection.ReceiveAsync(cancellationToken)
                .AsTask()
                .WaitAsync(TimeSpan.FromSeconds(_config.HandshakeTimeoutSeconds), cancellationToken);

            if (helloFrame.Type != FrameType.Hello)
            {
                _logger.LogWarning("Expected HELLO frame, got {Type}", helloFrame.Type);
                return;
            }

            var hello = _serializer.DeserializeHello(helloFrame.Payload);

            // Log client certificate identity
            if (connection.RemoteCertificateSubject != null)
            {
                _logger.LogInformation(
                    "Microservice connected via TLS: {ServiceName}/{InstanceId}, Cert={Subject}",
                    hello.ServiceName, hello.InstanceId, connection.RemoteCertificateSubject);
            }

            // Send HELLO response
            var helloResponse = new HelloResponse
            {
                Accepted = true,
                HeartbeatIntervalMs = _config.HeartbeatIntervalMs,
                MaxPayloadSize = _config.MaxPayloadSize
            };

            var responseFrame = new Frame
            {
                Type = FrameType.Hello,
                CorrelationId = helloFrame.CorrelationId,
                Payload = _serializer.SerializeHelloResponse(helloResponse)
            };
            await connection.SendAsync(responseFrame, cancellationToken);

            var msConnection = new TlsMicroserviceConnection(
                connection,
                hello.ServiceName,
                hello.InstanceId,
                hello.Endpoints,
                _serializer,
                _logger);

            _connections[connection.ConnectionId] = msConnection;

            _routingState.RegisterConnection(new EndpointConnection
            {
                ConnectionId = connection.ConnectionId,
                ServiceName = hello.ServiceName,
                InstanceId = hello.InstanceId,
                Transport = "TLS",
                State = ConnectionState.Connected,
                Endpoints = hello.Endpoints,
                Region = hello.Metadata?.GetValueOrDefault("region"),
                LastHeartbeat = DateTimeOffset.UtcNow,
                CertificateThumbprint = connection.RemoteCertificateThumbprint
            });

            await msConnection.ProcessAsync(cancellationToken);
        }
        catch (AuthenticationException ex)
        {
            _logger.LogWarning(ex, "TLS authentication failed from {RemoteEndPoint}", socket.RemoteEndPoint);
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Error handling TLS connection");
        }
        finally
        {
            if (sslStream != null)
            {
                var conn = new TlsFrameConnection(socket, sslStream, _codec, _logger);
                _connections.TryRemove(conn.ConnectionId, out _);
                _routingState.RemoveConnection(conn.ConnectionId);
                await sslStream.DisposeAsync();
            }
            socket.Dispose();
        }
    }

    private bool ValidateClientCertificate(
        object sender,
        X509Certificate? certificate,
        X509Chain? chain,
        SslPolicyErrors sslPolicyErrors)
    {
        if (!_config.RequireClientCertificate)
            return true;

        if (certificate == null)
        {
            _logger.LogWarning("Client did not provide certificate");
            return false;
        }

        if (sslPolicyErrors == SslPolicyErrors.None)
            return true;

        if (_config.AllowUntrustedRootCertificates &&
            sslPolicyErrors == SslPolicyErrors.RemoteCertificateChainErrors)
        {
            _logger.LogWarning("Accepting client certificate with chain errors (dev mode)");
            return true;
        }

        _logger.LogWarning(
            "Client certificate validation failed: Errors={Errors}, Subject={Subject}",
            sslPolicyErrors,
            certificate.Subject);

        return false;
    }

    public TlsMicroserviceConnection? GetConnection(string connectionId)
    {
        return _connections.TryGetValue(connectionId, out var conn) ? conn : null;
    }

    public async Task StopAsync(CancellationToken cancellationToken)
    {
        _cts?.Cancel();
        _listener?.Close();

        foreach (var connection in _connections.Values)
        {
            await connection.DisconnectAsync();
        }

        _cts?.Dispose();
    }
}

public sealed class TlsMicroserviceConnection
{
    private readonly TlsFrameConnection _connection;
    private readonly IPayloadSerializer _serializer;
    private readonly ILogger _logger;
    private readonly ConcurrentDictionary<string, TaskCompletionSource<Frame>> _pendingRequests = new();

    public string ServiceName { get; }
    public string InstanceId { get; }
    public EndpointDescriptor[] Endpoints { get; }
    public DateTimeOffset LastActivity { get; private set; }
    public string? CertificateThumbprint => _connection.RemoteCertificateThumbprint;

    public TlsMicroserviceConnection(
        TlsFrameConnection connection,
        string serviceName,
        string instanceId,
        EndpointDescriptor[] endpoints,
        IPayloadSerializer serializer,
        ILogger logger)
    {
        _connection = connection;
        ServiceName = serviceName;
        InstanceId = instanceId;
        Endpoints = endpoints;
        _serializer = serializer;
        _logger = logger;
        LastActivity = DateTimeOffset.UtcNow;
    }

    public async Task ProcessAsync(CancellationToken cancellationToken)
    {
        await foreach (var frame in _connection.ReceiveAllAsync(cancellationToken))
        {
            LastActivity = DateTimeOffset.UtcNow;

            if (frame.Type == FrameType.Response &&
                _pendingRequests.TryRemove(frame.CorrelationId, out var tcs))
            {
                tcs.TrySetResult(frame);
            }
        }
    }

    public async Task<ResponsePayload> SendRequestAsync(
        RequestPayload request,
        TimeSpan timeout,
        CancellationToken cancellationToken)
    {
        var correlationId = Guid.NewGuid().ToString("N");
        var tcs = new TaskCompletionSource<Frame>(TaskCreationOptions.RunContinuationsAsynchronously);
        _pendingRequests[correlationId] = tcs;

        try
        {
            var frame = new Frame
            {
                Type = FrameType.Request,
                CorrelationId = correlationId,
                Payload = _serializer.SerializeRequest(request)
            };

            await _connection.SendAsync(frame, cancellationToken);

            var responseFrame = await tcs.Task.WaitAsync(timeout, cancellationToken);
            return _serializer.DeserializeResponse(responseFrame.Payload);
        }
        finally
        {
            _pendingRequests.TryRemove(correlationId, out _);
        }
    }

    public async Task DisconnectAsync()
    {
        foreach (var pending in _pendingRequests.Values)
        {
            pending.TrySetCanceled();
        }
        _pendingRequests.Clear();
        await _connection.DisposeAsync();
    }
}

Microservice TLS Client

namespace StellaOps.Router.Transport.Tls;

/// <summary>
/// TLS client for microservices to connect securely to the gateway.
/// </summary>
public sealed class TlsTransportClient : ITransportServer, IAsyncDisposable
{
    private readonly TlsClientConfig _config;
    private readonly ICertificateProvider _certificateProvider;
    private readonly TcpFrameCodec _codec;
    private readonly IPayloadSerializer _serializer;
    private readonly ILogger<TlsTransportClient> _logger;
    private TlsFrameConnection? _connection;
    private CancellationTokenSource? _cts;
    private Task? _processingTask;
    private int _reconnectAttempts;

    public string TransportType => "TLS";
    public bool IsConnected => _connection?.IsConnected ?? false;

    public event Func<RequestPayload, CancellationToken, Task<ResponsePayload>>? OnRequest;
    public event Func<string, CancellationToken, Task>? OnCancel;

    public TlsTransportClient(
        IOptions<TlsClientConfig> config,
        ICertificateProvider certificateProvider,
        TcpFrameCodec codec,
        IPayloadSerializer serializer,
        ILogger<TlsTransportClient> logger)
    {
        _config = config.Value;
        _certificateProvider = certificateProvider;
        _codec = codec;
        _serializer = serializer;
        _logger = logger;
    }

    public async Task ConnectAsync(
        string serviceName,
        string instanceId,
        EndpointDescriptor[] endpoints,
        CancellationToken cancellationToken)
    {
        _cts = new CancellationTokenSource();
        await ConnectWithRetryAsync(serviceName, instanceId, endpoints, cancellationToken);
        _processingTask = ProcessFramesAsync(serviceName, instanceId, endpoints, _cts.Token);
    }

    private async Task ConnectWithRetryAsync(
        string serviceName,
        string instanceId,
        EndpointDescriptor[] endpoints,
        CancellationToken cancellationToken)
    {
        while (!cancellationToken.IsCancellationRequested)
        {
            try
            {
                var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
                await socket.ConnectAsync(_config.GatewayHost, _config.GatewayPort, cancellationToken);

                var networkStream = new NetworkStream(socket, ownsSocket: false);
                var sslStream = new SslStream(
                    networkStream,
                    leaveInnerStreamOpen: false,
                    ValidateServerCertificate);

                var clientCert = _certificateProvider.GetClientCertificate();
                var clientCerts = clientCert != null
                    ? new X509CertificateCollection { clientCert }
                    : new X509CertificateCollection();

                var authOptions = new SslClientAuthenticationOptions
                {
                    TargetHost = _config.ExpectedServerName ?? _config.GatewayHost,
                    ClientCertificates = clientCerts,
                    EnabledSslProtocols = SslProtocols.Tls12 | SslProtocols.Tls13
                };

                await sslStream.AuthenticateAsClientAsync(authOptions, cancellationToken);

                _logger.LogDebug(
                    "TLS handshake complete: Protocol={Protocol}, Server={Server}",
                    sslStream.SslProtocol,
                    _config.GatewayHost);

                _connection = new TlsFrameConnection(socket, sslStream, _codec, _logger);

                // Send HELLO
                var hello = new HelloPayload
                {
                    ServiceName = serviceName,
                    InstanceId = instanceId,
                    Endpoints = endpoints,
                    Metadata = new Dictionary<string, string>
                    {
                        ["region"] = _config.Region ?? "default",
                        ["version"] = _config.ServiceVersion ?? "1.0.0"
                    }
                };

                var helloFrame = new Frame
                {
                    Type = FrameType.Hello,
                    CorrelationId = Guid.NewGuid().ToString("N"),
                    Payload = _serializer.SerializeHello(hello)
                };

                await _connection.SendAsync(helloFrame, cancellationToken);

                var response = await _connection.ReceiveAsync(cancellationToken);
                if (response.Type != FrameType.Hello)
                {
                    throw new ProtocolException($"Expected HELLO response, got {response.Type}");
                }

                _reconnectAttempts = 0;
                _logger.LogInformation(
                    "Connected to gateway via TLS at {Host}:{Port}",
                    _config.GatewayHost, _config.GatewayPort);

                return;
            }
            catch (AuthenticationException ex)
            {
                _logger.LogError(ex, "TLS authentication failed");
                throw; // Don't retry auth failures
            }
            catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
            {
                _reconnectAttempts++;
                var delay = Math.Min(
                    _config.InitialReconnectDelayMs * Math.Pow(2, _reconnectAttempts - 1),
                    _config.MaxReconnectDelayMs);

                _logger.LogWarning(ex, "TLS connection attempt {Attempt} failed, retrying in {Delay}ms",
                    _reconnectAttempts, delay);

                await Task.Delay((int)delay, cancellationToken);
            }
        }
    }

    private bool ValidateServerCertificate(
        object sender,
        X509Certificate? certificate,
        X509Chain? chain,
        SslPolicyErrors sslPolicyErrors)
    {
        if (_config.SkipServerCertificateValidation)
        {
            _logger.LogWarning("Skipping server certificate validation (dev mode)");
            return true;
        }

        if (sslPolicyErrors == SslPolicyErrors.None)
            return true;

        _logger.LogWarning(
            "Server certificate validation failed: Errors={Errors}",
            sslPolicyErrors);

        return false;
    }

    private async Task ProcessFramesAsync(
        string serviceName,
        string instanceId,
        EndpointDescriptor[] endpoints,
        CancellationToken cancellationToken)
    {
        while (!cancellationToken.IsCancellationRequested)
        {
            try
            {
                if (_connection == null || !_connection.IsConnected)
                {
                    await ConnectWithRetryAsync(serviceName, instanceId, endpoints, cancellationToken);
                }

                await foreach (var frame in _connection!.ReceiveAllAsync(cancellationToken))
                {
                    switch (frame.Type)
                    {
                        case FrameType.Request:
                            _ = HandleRequestAsync(frame, cancellationToken);
                            break;

                        case FrameType.Cancel:
                            if (OnCancel != null)
                                await OnCancel(frame.CorrelationId, cancellationToken);
                            break;

                        case FrameType.Heartbeat:
                            await HandleHeartbeatAsync(frame);
                            break;
                    }
                }
            }
            catch (EndOfStreamException)
            {
                _logger.LogWarning("TLS connection closed, attempting reconnect");
                _connection = null;
            }
            catch (OperationCanceledException)
            {
                break;
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Error processing TLS frames");
                _connection = null;
            }
        }
    }

    private async Task HandleRequestAsync(Frame frame, CancellationToken cancellationToken)
    {
        if (_connection == null || OnRequest == null) return;

        try
        {
            var request = _serializer.DeserializeRequest(frame.Payload);
            var response = await OnRequest(request, cancellationToken);

            var responseFrame = new Frame
            {
                Type = FrameType.Response,
                CorrelationId = frame.CorrelationId,
                Payload = _serializer.SerializeResponse(response),
                Flags = FrameFlags.Final
            };

            await _connection.SendAsync(responseFrame, cancellationToken);
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Error handling TLS request");

            var errorResponse = new ResponsePayload
            {
                StatusCode = 500,
                Headers = new Dictionary<string, string>(),
                ErrorMessage = ex.Message,
                IsFinalChunk = true
            };

            var errorFrame = new Frame
            {
                Type = FrameType.Response,
                CorrelationId = frame.CorrelationId,
                Payload = _serializer.SerializeResponse(errorResponse),
                Flags = FrameFlags.Final | FrameFlags.Error
            };

            await _connection.SendAsync(errorFrame, cancellationToken);
        }
    }

    private async Task HandleHeartbeatAsync(Frame frame)
    {
        if (_connection == null) return;

        var pongFrame = new Frame
        {
            Type = FrameType.Heartbeat,
            CorrelationId = frame.CorrelationId,
            Payload = frame.Payload
        };

        await _connection.SendAsync(pongFrame, CancellationToken.None);
    }

    public async Task DisconnectAsync()
    {
        _cts?.Cancel();
        if (_processingTask != null)
        {
            try { await _processingTask.WaitAsync(TimeSpan.FromSeconds(5)); } catch { }
        }
        if (_connection != null)
        {
            await _connection.DisposeAsync();
        }
        _cts?.Dispose();
    }

    public async ValueTask DisposeAsync() => await DisconnectAsync();
}

Service Registration

namespace StellaOps.Router.Transport.Tls;

public static class TlsTransportExtensions
{
    public static IServiceCollection AddTlsTransport(
        this IServiceCollection services,
        IConfiguration configuration)
    {
        services.Configure<TlsTransportConfig>(configuration.GetSection("TlsTransport"));
        services.AddSingleton<ICertificateProvider, CertificateProvider>();
        services.AddSingleton<TcpFrameCodec>();
        services.AddSingleton<TlsTransportServer>();
        services.AddHostedService(sp => sp.GetRequiredService<TlsTransportServer>());

        return services;
    }

    public static IServiceCollection AddTlsMicroserviceTransport(
        this IServiceCollection services,
        IConfiguration configuration)
    {
        services.Configure<TlsClientConfig>(configuration.GetSection("TlsClient"));
        services.AddSingleton<ICertificateProvider, CertificateProvider>();
        services.AddSingleton<TcpFrameCodec>();
        services.AddSingleton<ITransportServer, TlsTransportClient>();

        return services;
    }
}

YAML Configuration

# Gateway TLS configuration
TlsTransport:
  ListenAddress: "0.0.0.0"
  Port: 9501
  CertificatePath: "/etc/stellaops/certs/gateway.pfx"
  CertificatePassword: "${GATEWAY_CERT_PASSWORD}"
  RequireClientCertificate: true
  ClientCaCertificatePath: "/etc/stellaops/certs/client-ca.crt"
  AllowedProtocols: "Tls12, Tls13"
  RevocationMode: "Online"

# Microservice TLS configuration
TlsClient:
  GatewayHost: "gateway.internal"
  GatewayPort: 9501
  ClientCertificatePath: "/etc/stellaops/certs/service.pfx"
  ClientCertificatePassword: "${SERVICE_CERT_PASSWORD}"
  ExpectedServerName: "gateway.stellaops.internal"

Deliverables

  1. StellaOps.Router.Transport.Tls/TlsTransportConfig.cs
  2. StellaOps.Router.Transport.Tls/ICertificateProvider.cs
  3. StellaOps.Router.Transport.Tls/CertificateProvider.cs
  4. StellaOps.Router.Transport.Tls/TlsFrameConnection.cs
  5. StellaOps.Router.Transport.Tls/TlsTransportServer.cs
  6. StellaOps.Router.Transport.Tls/TlsTransportClient.cs
  7. StellaOps.Router.Transport.Tls/TlsTransportExtensions.cs
  8. Certificate validation tests
  9. mTLS handshake tests
  10. Certificate rotation tests

Next Step

Proceed to Step 16: GraphQL Handler Implementation to implement the GraphQL route handler plugin.