namespace StellaOps.AdvisoryAI.Inference;
///
/// Result of local LLM inference.
///
public sealed record LocalInferenceResult
{
///
/// Generated text content.
///
public required string Content { get; init; }
///
/// Number of tokens generated.
///
public required int TokensGenerated { get; init; }
///
/// Total inference time in milliseconds.
///
public required long InferenceTimeMs { get; init; }
///
/// Time to first token in milliseconds.
///
public required long TimeToFirstTokenMs { get; init; }
///
/// Tokens per second throughput.
///
public double TokensPerSecond => InferenceTimeMs > 0
? TokensGenerated * 1000.0 / InferenceTimeMs
: 0;
///
/// Model ID used for inference.
///
public required string ModelId { get; init; }
///
/// Whether inference was deterministic.
///
public required bool Deterministic { get; init; }
///
/// Seed used for generation.
///
public required int Seed { get; init; }
}
///
/// Model status information.
///
public sealed record LocalModelStatus
{
///
/// Whether model is loaded.
///
public required bool Loaded { get; init; }
///
/// Model path.
///
public required string ModelPath { get; init; }
///
/// Verified digest matches expected.
///
public required bool DigestVerified { get; init; }
///
/// Memory usage in bytes.
///
public required long MemoryBytes { get; init; }
///
/// Device being used.
///
public required string Device { get; init; }
///
/// Context size in tokens.
///
public required int ContextSize { get; init; }
}
///
/// Interface for local LLM runtime.
/// Sprint: SPRINT_20251226_019_AI_offline_inference
/// Task: OFFLINE-04
///
public interface ILocalLlmRuntime : IDisposable
{
///
/// Runtime type identifier.
///
string RuntimeType { get; }
///
/// Load a model with the given configuration.
///
/// Model configuration.
/// Cancellation token.
Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default);
///
/// Unload the current model.
///
Task UnloadModelAsync(CancellationToken cancellationToken = default);
///
/// Get current model status.
///
Task GetStatusAsync(CancellationToken cancellationToken = default);
///
/// Generate text from a prompt.
///
/// Input prompt.
/// Cancellation token.
Task GenerateAsync(string prompt, CancellationToken cancellationToken = default);
///
/// Generate text with streaming output.
///
/// Input prompt.
/// Cancellation token.
IAsyncEnumerable GenerateStreamAsync(string prompt, CancellationToken cancellationToken = default);
///
/// Verify model digest matches expected.
///
/// Expected SHA-256 digest.
/// Cancellation token.
Task VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default);
}