Merge branch 'main' of https://git.stella-ops.org/stella-ops.org/git.stella-ops.org

2025-12-26 15:19:07 +02:00
parent 907783f625 c8f3120174
commit 41f3ac7aba
25 changed files with 3377 additions and 132 deletions
--- a/src/AdvisoryAI/StellaOps.AdvisoryAI/Inference/LlamaCppRuntime.cs
+++ b/src/AdvisoryAI/StellaOps.AdvisoryAI/Inference/LlamaCppRuntime.cs
@@ -0,0 +1,182 @@
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Security.Cryptography;
+
+namespace StellaOps.AdvisoryAI.Inference;
+
+/// <summary>
+/// Local LLM runtime using llama.cpp bindings.
+/// Sprint: SPRINT_20251226_019_AI_offline_inference
+/// Task: OFFLINE-05
+/// </summary>
+public sealed class LlamaCppRuntime : ILocalLlmRuntime
+{
+    private LocalLlmConfig? _config;
+    private bool _modelLoaded;
+    private string? _computedDigest;
+
+    public string RuntimeType => "llama.cpp";
+
+    public Task LoadModelAsync(LocalLlmConfig config, CancellationToken cancellationToken = default)
+    {
+        _config = config;
+
+        // Verify model file exists
+        if (!File.Exists(config.ModelPath))
+        {
+            throw new FileNotFoundException($"Model file not found: {config.ModelPath}");
+        }
+
+        // In a real implementation, this would:
+        // 1. Load the GGUF/GGML model file
+        // 2. Initialize llama.cpp context with config settings
+        // 3. Verify digest if required
+
+        _modelLoaded = true;
+        return Task.CompletedTask;
+    }
+
+    public Task UnloadModelAsync(CancellationToken cancellationToken = default)
+    {
+        _modelLoaded = false;
+        _config = null;
+        _computedDigest = null;
+        return Task.CompletedTask;
+    }
+
+    public Task<LocalModelStatus> GetStatusAsync(CancellationToken cancellationToken = default)
+    {
+        return Task.FromResult(new LocalModelStatus
+        {
+            Loaded = _modelLoaded,
+            ModelPath = _config?.ModelPath ?? string.Empty,
+            DigestVerified = _computedDigest == _config?.WeightsDigest,
+            MemoryBytes = _modelLoaded ? EstimateMemoryUsage() : 0,
+            Device = _config?.Device.ToString() ?? "Unknown",
+            ContextSize = _config?.ContextLength ?? 0
+        });
+    }
+
+    public async Task<LocalInferenceResult> GenerateAsync(string prompt, CancellationToken cancellationToken = default)
+    {
+        if (!_modelLoaded || _config is null)
+        {
+            throw new InvalidOperationException("Model not loaded");
+        }
+
+        var stopwatch = Stopwatch.StartNew();
+        var firstTokenTime = 0L;
+
+        // In a real implementation, this would call llama.cpp inference
+        // For now, return a placeholder response
+
+        await Task.Delay(100, cancellationToken); // Simulate first token
+        firstTokenTime = stopwatch.ElapsedMilliseconds;
+
+        await Task.Delay(400, cancellationToken); // Simulate generation
+
+        stopwatch.Stop();
+
+        var generatedContent = GeneratePlaceholderResponse(prompt);
+        var tokensGenerated = generatedContent.Split(' ').Length;
+
+        return new LocalInferenceResult
+        {
+            Content = generatedContent,
+            TokensGenerated = tokensGenerated,
+            InferenceTimeMs = stopwatch.ElapsedMilliseconds,
+            TimeToFirstTokenMs = firstTokenTime,
+            ModelId = $"local:{Path.GetFileName(_config.ModelPath)}",
+            Deterministic = _config.Temperature == 0,
+            Seed = _config.Seed
+        };
+    }
+
+    public async IAsyncEnumerable<string> GenerateStreamAsync(
+        string prompt,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        if (!_modelLoaded || _config is null)
+        {
+            throw new InvalidOperationException("Model not loaded");
+        }
+
+        // Simulate streaming output
+        var words = GeneratePlaceholderResponse(prompt).Split(' ');
+        foreach (var word in words)
+        {
+            if (cancellationToken.IsCancellationRequested)
+            {
+                yield break;
+            }
+
+            await Task.Delay(50, cancellationToken);
+            yield return word + " ";
+        }
+    }
+
+    public async Task<bool> VerifyDigestAsync(string expectedDigest, CancellationToken cancellationToken = default)
+    {
+        if (_config is null || !File.Exists(_config.ModelPath))
+        {
+            return false;
+        }
+
+        using var sha256 = SHA256.Create();
+        await using var stream = File.OpenRead(_config.ModelPath);
+        var hash = await sha256.ComputeHashAsync(stream, cancellationToken);
+        _computedDigest = Convert.ToHexStringLower(hash);
+
+        return string.Equals(_computedDigest, expectedDigest, StringComparison.OrdinalIgnoreCase);
+    }
+
+    private long EstimateMemoryUsage()
+    {
+        if (_config is null)
+        {
+            return 0;
+        }
+
+        // Rough estimate based on quantization
+        var baseSize = new FileInfo(_config.ModelPath).Length;
+        var contextOverhead = _config.ContextLength * 4096L; // Rough KV cache estimate
+
+        return baseSize + contextOverhead;
+    }
+
+    private static string GeneratePlaceholderResponse(string prompt)
+    {
+        // In a real implementation, this would be actual LLM output
+        if (prompt.Contains("explain", StringComparison.OrdinalIgnoreCase))
+        {
+            return "This vulnerability affects the component by allowing unauthorized access. " +
+                   "The vulnerable code path is reachable from the application entry point. " +
+                   "Evidence: [EVIDENCE:sbom-001] Component is present in SBOM. " +
+                   "[EVIDENCE:reach-001] Call graph shows reachability.";
+        }
+
+        if (prompt.Contains("remediat", StringComparison.OrdinalIgnoreCase))
+        {
+            return "Recommended remediation: Upgrade the affected component to the patched version. " +
+                   "- Update package.json: dependency@1.0.0 -> dependency@1.0.1 " +
+                   "- Run npm install to update lockfile " +
+                   "- Verify with npm audit";
+        }
+
+        if (prompt.Contains("policy", StringComparison.OrdinalIgnoreCase))
+        {
+            return "Parsed policy intent: Override rule for critical severity. " +
+                   "Conditions: severity = critical, scope = production. " +
+                   "Actions: set_verdict = block.";
+        }
+
+        return "Analysis complete. The finding has been evaluated based on available evidence.";
+    }
+
+    public void Dispose()
+    {
+        _modelLoaded = false;
+        _config = null;
+        _computedDigest = null;
+    }
+}