Files
git.stella-ops.org/src/AdvisoryAI/StellaOps.AdvisoryAI/Inference/ILocalLlmRuntime.cs

137 lines
3.8 KiB
C#

namespace StellaOps.AdvisoryAI.Inference;
/// <summary>
/// Result of local LLM inference.
/// </summary>
public sealed record LocalInferenceResult
{
/// <summary>
/// Generated text content.
/// </summary>
public required string Content { get; init; }
/// <summary>
/// Number of tokens generated.
/// </summary>
public required int TokensGenerated { get; init; }
/// <summary>
/// Total inference time in milliseconds.
/// </summary>
public required long InferenceTimeMs { get; init; }
/// <summary>
/// Time to first token in milliseconds.
/// </summary>
public required long TimeToFirstTokenMs { get; init; }
/// <summary>
/// Tokens per second throughput.
/// </summary>
public double TokensPerSecond => InferenceTimeMs > 0
? TokensGenerated * 1000.0 / InferenceTimeMs
: 0;
/// <summary>
/// Model ID used for inference.
/// </summary>
public required string ModelId { get; init; }
/// <summary>
/// Whether inference was deterministic.
/// </summary>
public required bool Deterministic { get; init; }
/// <summary>
/// Seed used for generation.
/// </summary>
public required int Seed { get; init; }
}
/// <summary>
/// Model status information.
/// </summary>
public sealed record LocalModelStatus
{
/// <summary>
/// Whether model is loaded.
/// </summary>
public required bool Loaded { get; init; }
/// <summary>
/// Model path.
/// </summary>
public required string ModelPath { get; init; }
/// <summary>
/// Verified digest matches expected.
/// </summary>
public required bool DigestVerified { get; init; }
/// <summary>
/// Memory usage in bytes.
/// </summary>
public required long MemoryBytes { get; init; }
/// <summary>
/// Device being used.
/// </summary>
public required string Device { get; init; }
/// <summary>
/// Context size in tokens.
/// </summary>
public required int ContextSize { get; init; }
}
/// <summary>
/// Interface for local LLM runtime.
/// Sprint: SPRINT_20251226_019_AI_offline_inference
/// Task: OFFLINE-04
/// </summary>
public interface ILocalLlmRuntime : IDisposable
{
/// <summary>
/// Runtime type identifier.
/// </summary>
string RuntimeType { get; }
/// <summary>
/// Load a model with the given configuration.
/// </summary>
/// <param name="config">Model configuration.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default);
/// <summary>
/// Unload the current model.
/// </summary>
Task UnloadModelAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Get current model status.
/// </summary>
Task<LocalModelStatus> GetStatusAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Generate text from a prompt.
/// </summary>
/// <param name="prompt">Input prompt.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task<LocalInferenceResult> GenerateAsync(string prompt, CancellationToken cancellationToken = default);
/// <summary>
/// Generate text with streaming output.
/// </summary>
/// <param name="prompt">Input prompt.</param>
/// <param name="cancellationToken">Cancellation token.</param>
IAsyncEnumerable<string> GenerateStreamAsync(string prompt, CancellationToken cancellationToken = default);
/// <summary>
/// Verify model digest matches expected.
/// </summary>
/// <param name="expectedDigest">Expected SHA-256 digest.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task<bool> VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default);
}