namespace StellaOps.AdvisoryAI.Inference; ///

/// Result of local LLM inference. ///

public sealed record LocalInferenceResult { ///

/// Generated text content. ///

public required string Content { get; init; } ///

/// Number of tokens generated. ///

public required int TokensGenerated { get; init; } ///

/// Total inference time in milliseconds. ///

public required long InferenceTimeMs { get; init; } ///

/// Time to first token in milliseconds. ///

public required long TimeToFirstTokenMs { get; init; } ///

/// Tokens per second throughput. ///

public double TokensPerSecond => InferenceTimeMs > 0 ? TokensGenerated * 1000.0 / InferenceTimeMs : 0; ///

/// Model ID used for inference. ///

public required string ModelId { get; init; } ///

/// Whether inference was deterministic. ///

public required bool Deterministic { get; init; } ///

/// Seed used for generation. ///

public required int Seed { get; init; } } ///

/// Model status information. ///

public sealed record LocalModelStatus { ///

/// Whether model is loaded. ///

public required bool Loaded { get; init; } ///

/// Model path. ///

public required string ModelPath { get; init; } ///

/// Verified digest matches expected. ///

public required bool DigestVerified { get; init; } ///

/// Memory usage in bytes. ///

public required long MemoryBytes { get; init; } ///

/// Device being used. ///

public required string Device { get; init; } ///

/// Context size in tokens. ///

public required int ContextSize { get; init; } } ///

/// Interface for local LLM runtime. /// Sprint: SPRINT_20251226_019_AI_offline_inference /// Task: OFFLINE-04 ///

public interface ILocalLlmRuntime : IDisposable { ///

/// Runtime type identifier. ///

string RuntimeType { get; } ///

/// Load a model with the given configuration. ///

/// Model configuration. /// Cancellation token. Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default); ///

/// Unload the current model. ///

Task UnloadModelAsync(CancellationToken cancellationToken = default); ///

/// Get current model status. ///

Task GetStatusAsync(CancellationToken cancellationToken = default); ///

/// Generate text from a prompt. ///

/// Input prompt. /// Cancellation token. Task GenerateAsync(string prompt, CancellationToken cancellationToken = default); ///

/// Generate text with streaming output. ///

/// Input prompt. /// Cancellation token. IAsyncEnumerable GenerateStreamAsync(string prompt, CancellationToken cancellationToken = default); ///

/// Verify model digest matches expected. ///

/// Expected SHA-256 digest. /// Cancellation token. Task VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default); }