137 lines
3.8 KiB
C#
137 lines
3.8 KiB
C#
namespace StellaOps.AdvisoryAI.Inference;
|
|
|
|
/// <summary>
|
|
/// Result of local LLM inference.
|
|
/// </summary>
|
|
public sealed record LocalInferenceResult
|
|
{
|
|
/// <summary>
|
|
/// Generated text content.
|
|
/// </summary>
|
|
public required string Content { get; init; }
|
|
|
|
/// <summary>
|
|
/// Number of tokens generated.
|
|
/// </summary>
|
|
public required int TokensGenerated { get; init; }
|
|
|
|
/// <summary>
|
|
/// Total inference time in milliseconds.
|
|
/// </summary>
|
|
public required long InferenceTimeMs { get; init; }
|
|
|
|
/// <summary>
|
|
/// Time to first token in milliseconds.
|
|
/// </summary>
|
|
public required long TimeToFirstTokenMs { get; init; }
|
|
|
|
/// <summary>
|
|
/// Tokens per second throughput.
|
|
/// </summary>
|
|
public double TokensPerSecond => InferenceTimeMs > 0
|
|
? TokensGenerated * 1000.0 / InferenceTimeMs
|
|
: 0;
|
|
|
|
/// <summary>
|
|
/// Model ID used for inference.
|
|
/// </summary>
|
|
public required string ModelId { get; init; }
|
|
|
|
/// <summary>
|
|
/// Whether inference was deterministic.
|
|
/// </summary>
|
|
public required bool Deterministic { get; init; }
|
|
|
|
/// <summary>
|
|
/// Seed used for generation.
|
|
/// </summary>
|
|
public required int Seed { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Model status information.
|
|
/// </summary>
|
|
public sealed record LocalModelStatus
|
|
{
|
|
/// <summary>
|
|
/// Whether model is loaded.
|
|
/// </summary>
|
|
public required bool Loaded { get; init; }
|
|
|
|
/// <summary>
|
|
/// Model path.
|
|
/// </summary>
|
|
public required string ModelPath { get; init; }
|
|
|
|
/// <summary>
|
|
/// Verified digest matches expected.
|
|
/// </summary>
|
|
public required bool DigestVerified { get; init; }
|
|
|
|
/// <summary>
|
|
/// Memory usage in bytes.
|
|
/// </summary>
|
|
public required long MemoryBytes { get; init; }
|
|
|
|
/// <summary>
|
|
/// Device being used.
|
|
/// </summary>
|
|
public required string Device { get; init; }
|
|
|
|
/// <summary>
|
|
/// Context size in tokens.
|
|
/// </summary>
|
|
public required int ContextSize { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Interface for local LLM runtime.
|
|
/// Sprint: SPRINT_20251226_019_AI_offline_inference
|
|
/// Task: OFFLINE-04
|
|
/// </summary>
|
|
public interface ILocalLlmRuntime : IDisposable
|
|
{
|
|
/// <summary>
|
|
/// Runtime type identifier.
|
|
/// </summary>
|
|
string RuntimeType { get; }
|
|
|
|
/// <summary>
|
|
/// Load a model with the given configuration.
|
|
/// </summary>
|
|
/// <param name="config">Model configuration.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Unload the current model.
|
|
/// </summary>
|
|
Task UnloadModelAsync(CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Get current model status.
|
|
/// </summary>
|
|
Task<LocalModelStatus> GetStatusAsync(CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Generate text from a prompt.
|
|
/// </summary>
|
|
/// <param name="prompt">Input prompt.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
Task<LocalInferenceResult> GenerateAsync(string prompt, CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Generate text with streaming output.
|
|
/// </summary>
|
|
/// <param name="prompt">Input prompt.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
IAsyncEnumerable<string> GenerateStreamAsync(string prompt, CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Verify model digest matches expected.
|
|
/// </summary>
|
|
/// <param name="expectedDigest">Expected SHA-256 digest.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
Task<bool> VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default);
|
|
}
|