namespace StellaOps.AdvisoryAI.Inference; /// /// Result of local LLM inference. /// public sealed record LocalInferenceResult { /// /// Generated text content. /// public required string Content { get; init; } /// /// Number of tokens generated. /// public required int TokensGenerated { get; init; } /// /// Total inference time in milliseconds. /// public required long InferenceTimeMs { get; init; } /// /// Time to first token in milliseconds. /// public required long TimeToFirstTokenMs { get; init; } /// /// Tokens per second throughput. /// public double TokensPerSecond => InferenceTimeMs > 0 ? TokensGenerated * 1000.0 / InferenceTimeMs : 0; /// /// Model ID used for inference. /// public required string ModelId { get; init; } /// /// Whether inference was deterministic. /// public required bool Deterministic { get; init; } /// /// Seed used for generation. /// public required int Seed { get; init; } } /// /// Model status information. /// public sealed record LocalModelStatus { /// /// Whether model is loaded. /// public required bool Loaded { get; init; } /// /// Model path. /// public required string ModelPath { get; init; } /// /// Verified digest matches expected. /// public required bool DigestVerified { get; init; } /// /// Memory usage in bytes. /// public required long MemoryBytes { get; init; } /// /// Device being used. /// public required string Device { get; init; } /// /// Context size in tokens. /// public required int ContextSize { get; init; } } /// /// Interface for local LLM runtime. /// Sprint: SPRINT_20251226_019_AI_offline_inference /// Task: OFFLINE-04 /// public interface ILocalLlmRuntime : IDisposable { /// /// Runtime type identifier. /// string RuntimeType { get; } /// /// Load a model with the given configuration. /// /// Model configuration. /// Cancellation token. Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default); /// /// Unload the current model. /// Task UnloadModelAsync(CancellationToken cancellationToken = default); /// /// Get current model status. /// Task GetStatusAsync(CancellationToken cancellationToken = default); /// /// Generate text from a prompt. /// /// Input prompt. /// Cancellation token. Task GenerateAsync(string prompt, CancellationToken cancellationToken = default); /// /// Generate text with streaming output. /// /// Input prompt. /// Cancellation token. IAsyncEnumerable GenerateStreamAsync(string prompt, CancellationToken cancellationToken = default); /// /// Verify model digest matches expected. /// /// Expected SHA-256 digest. /// Cancellation token. Task VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default); }