Merge branch 'main' of https://git.stella-ops.org/stella-ops.org/git.stella-ops.org
This commit is contained in:
@@ -0,0 +1,136 @@
|
||||
namespace StellaOps.AdvisoryAI.Inference;
|
||||
|
||||
/// <summary>
|
||||
/// Result of local LLM inference.
|
||||
/// </summary>
|
||||
public sealed record LocalInferenceResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Generated text content.
|
||||
/// </summary>
|
||||
public required string Content { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of tokens generated.
|
||||
/// </summary>
|
||||
public required int TokensGenerated { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Total inference time in milliseconds.
|
||||
/// </summary>
|
||||
public required long InferenceTimeMs { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Time to first token in milliseconds.
|
||||
/// </summary>
|
||||
public required long TimeToFirstTokenMs { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tokens per second throughput.
|
||||
/// </summary>
|
||||
public double TokensPerSecond => InferenceTimeMs > 0
|
||||
? TokensGenerated * 1000.0 / InferenceTimeMs
|
||||
: 0;
|
||||
|
||||
/// <summary>
|
||||
/// Model ID used for inference.
|
||||
/// </summary>
|
||||
public required string ModelId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether inference was deterministic.
|
||||
/// </summary>
|
||||
public required bool Deterministic { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Seed used for generation.
|
||||
/// </summary>
|
||||
public required int Seed { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Model status information.
|
||||
/// </summary>
|
||||
public sealed record LocalModelStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Whether model is loaded.
|
||||
/// </summary>
|
||||
public required bool Loaded { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Model path.
|
||||
/// </summary>
|
||||
public required string ModelPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Verified digest matches expected.
|
||||
/// </summary>
|
||||
public required bool DigestVerified { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Memory usage in bytes.
|
||||
/// </summary>
|
||||
public required long MemoryBytes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Device being used.
|
||||
/// </summary>
|
||||
public required string Device { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Context size in tokens.
|
||||
/// </summary>
|
||||
public required int ContextSize { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface for local LLM runtime.
|
||||
/// Sprint: SPRINT_20251226_019_AI_offline_inference
|
||||
/// Task: OFFLINE-04
|
||||
/// </summary>
|
||||
public interface ILocalLlmRuntime : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Runtime type identifier.
|
||||
/// </summary>
|
||||
string RuntimeType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Load a model with the given configuration.
|
||||
/// </summary>
|
||||
/// <param name="config">Model configuration.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Unload the current model.
|
||||
/// </summary>
|
||||
Task UnloadModelAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Get current model status.
|
||||
/// </summary>
|
||||
Task<LocalModelStatus> GetStatusAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Generate text from a prompt.
|
||||
/// </summary>
|
||||
/// <param name="prompt">Input prompt.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task<LocalInferenceResult> GenerateAsync(string prompt, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Generate text with streaming output.
|
||||
/// </summary>
|
||||
/// <param name="prompt">Input prompt.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
IAsyncEnumerable<string> GenerateStreamAsync(string prompt, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verify model digest matches expected.
|
||||
/// </summary>
|
||||
/// <param name="expectedDigest">Expected SHA-256 digest.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task<bool> VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default);
|
||||
}
|
||||
Reference in New Issue
Block a user