This commit is contained in:
StellaOps Bot
2025-12-26 15:19:07 +02:00
25 changed files with 3377 additions and 132 deletions

View File

@@ -0,0 +1,182 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Security.Cryptography;
namespace StellaOps.AdvisoryAI.Inference;
/// <summary>
/// Local LLM runtime using llama.cpp bindings.
/// Sprint: SPRINT_20251226_019_AI_offline_inference
/// Task: OFFLINE-05
/// </summary>
public sealed class LlamaCppRuntime : ILocalLlmRuntime
{
private LocalLlmConfig? _config;
private bool _modelLoaded;
private string? _computedDigest;
public string RuntimeType => "llama.cpp";
public Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default)
{
_config = config;
// Verify model file exists
if (!File.Exists(config.ModelPath))
{
throw new FileNotFoundException($"Model file not found: {config.ModelPath}");
}
// In a real implementation, this would:
// 1. Load the GGUF/GGML model file
// 2. Initialize llama.cpp context with config settings
// 3. Verify digest if required
_modelLoaded = true;
return Task.CompletedTask;
}
public Task UnloadModelAsync(CancellationToken cancellationToken = default)
{
_modelLoaded = false;
_config = null;
_computedDigest = null;
return Task.CompletedTask;
}
public Task<LocalModelStatus> GetStatusAsync(CancellationToken cancellationToken = default)
{
return Task.FromResult(new LocalModelStatus
{
Loaded = _modelLoaded,
ModelPath = _config?.ModelPath ?? string.Empty,
DigestVerified = _computedDigest == _config?.WeightsDigest,
MemoryBytes = _modelLoaded ? EstimateMemoryUsage() : 0,
Device = _config?.Device.ToString() ?? "Unknown",
ContextSize = _config?.ContextLength ?? 0
});
}
public async Task<LocalInferenceResult> GenerateAsync(string prompt, CancellationToken cancellationToken = default)
{
if (!_modelLoaded || _config is null)
{
throw new InvalidOperationException("Model not loaded");
}
var stopwatch = Stopwatch.StartNew();
var firstTokenTime = 0L;
// In a real implementation, this would call llama.cpp inference
// For now, return a placeholder response
await Task.Delay(100, cancellationToken); // Simulate first token
firstTokenTime = stopwatch.ElapsedMilliseconds;
await Task.Delay(400, cancellationToken); // Simulate generation
stopwatch.Stop();
var generatedContent = GeneratePlaceholderResponse(prompt);
var tokensGenerated = generatedContent.Split(' ').Length;
return new LocalInferenceResult
{
Content = generatedContent,
TokensGenerated = tokensGenerated,
InferenceTimeMs = stopwatch.ElapsedMilliseconds,
TimeToFirstTokenMs = firstTokenTime,
ModelId = $"local:{Path.GetFileName(_config.ModelPath)}",
Deterministic = _config.Temperature == 0,
Seed = _config.Seed
};
}
public async IAsyncEnumerable<string> GenerateStreamAsync(
string prompt,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
if (!_modelLoaded || _config is null)
{
throw new InvalidOperationException("Model not loaded");
}
// Simulate streaming output
var words = GeneratePlaceholderResponse(prompt).Split(' ');
foreach (var word in words)
{
if (cancellationToken.IsCancellationRequested)
{
yield break;
}
await Task.Delay(50, cancellationToken);
yield return word + " ";
}
}
public async Task<bool> VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default)
{
if (_config is null || !File.Exists(_config.ModelPath))
{
return false;
}
using var sha256 = SHA256.Create();
await using var stream = File.OpenRead(_config.ModelPath);
var hash = await sha256.ComputeHashAsync(stream, cancellationToken);
_computedDigest = Convert.ToHexStringLower(hash);
return string.Equals(_computedDigest, expectedDigest, StringComparison.OrdinalIgnoreCase);
}
private long EstimateMemoryUsage()
{
if (_config is null)
{
return 0;
}
// Rough estimate based on quantization
var baseSize = new FileInfo(_config.ModelPath).Length;
var contextOverhead = _config.ContextLength * 4096L; // Rough KV cache estimate
return baseSize + contextOverhead;
}
private static string GeneratePlaceholderResponse(string prompt)
{
// In a real implementation, this would be actual LLM output
if (prompt.Contains("explain", StringComparison.OrdinalIgnoreCase))
{
return "This vulnerability affects the component by allowing unauthorized access. " +
"The vulnerable code path is reachable from the application entry point. " +
"Evidence: [EVIDENCE:sbom-001] Component is present in SBOM. " +
"[EVIDENCE:reach-001] Call graph shows reachability.";
}
if (prompt.Contains("remediat", StringComparison.OrdinalIgnoreCase))
{
return "Recommended remediation: Upgrade the affected component to the patched version. " +
"- Update package.json: dependency@1.0.0 -> dependency@1.0.1 " +
"- Run npm install to update lockfile " +
"- Verify with npm audit";
}
if (prompt.Contains("policy", StringComparison.OrdinalIgnoreCase))
{
return "Parsed policy intent: Override rule for critical severity. " +
"Conditions: severity = critical, scope = production. " +
"Actions: set_verdict = block.";
}
return "Analysis complete. The finding has been evaluated based on available evidence.";
}
public void Dispose()
{
_modelLoaded = false;
_config = null;
_computedDigest = null;
}
}