Refactor code structure and optimize performance across multiple modules

2025-12-26 20:03:22 +02:00
parent c786faae84
commit f10d83c444
1385 changed files with 69732 additions and 10280 deletions
--- a/etc/llm-providers/llama-server.yaml.sample
+++ b/etc/llm-providers/llama-server.yaml.sample
@@ -0,0 +1,96 @@
+# llama.cpp Server LLM Provider configuration template
+# This is the PRIMARY provider for OFFLINE/AIRGAP deployments.
+# Copy to llama-server.yaml (remove .sample extension) and configure.
+
+# Provider enabled state and priority
+# Lower priority number = higher preference (10 = prefer over cloud providers)
+enabled: true
+priority: 10
+
+# Server Configuration
+server:
+  # Base URL for llama.cpp server
+  # Start llama.cpp with: llama-server -m model.gguf --host 0.0.0.0 --port 8080
+  baseUrl: "http://localhost:8080"
+
+  # API key if server requires authentication (--api-key flag)
+  apiKey: ""
+
+  # Health check endpoint
+  healthEndpoint: "/health"
+
+# Model Configuration
+model:
+  # Model name (for logging and identification)
+  name: "llama3-8b-q4km"
+
+  # Path to model file (informational, model is loaded on server)
+  modelPath: "/models/llama-3-8b-instruct.Q4_K_M.gguf"
+
+  # Expected model digest (SHA-256) for verification
+  # Ensures the correct model is loaded in airgap environments
+  expectedDigest: ""
+
+# Inference Parameters
+inference:
+  # Temperature: 0 = deterministic (REQUIRED for reproducibility)
+  temperature: 0.0
+
+  # Maximum tokens to generate
+  maxTokens: 4096
+
+  # Random seed for reproducibility (REQUIRED for determinism)
+  seed: 42
+
+  # Nucleus sampling (top-p)
+  topP: 1.0
+
+  # Top-k sampling
+  topK: 40
+
+  # Repeat penalty (1.0 = no penalty)
+  repeatPenalty: 1.1
+
+  # Context length (must match server's -c flag)
+  contextLength: 4096
+
+# Request Configuration
+request:
+  # Request timeout (longer for local inference)
+  timeout: "00:05:00"
+
+  # Maximum retries on failure
+  maxRetries: 2
+
+# Model Bundle Configuration (for airgap deployments)
+bundle:
+  # Path to signed model bundle (.stellaops-model directory)
+  # Created using: stella model bundle --sign
+  bundlePath: ""
+
+  # Verify bundle signature before loading
+  verifySignature: true
+
+  # Cryptographic scheme for verification
+  # Options: ed25519, ecdsa-p256, gost3410, sm2
+  cryptoScheme: "ed25519"
+
+# Logging Configuration
+logging:
+  # Log health check results
+  logHealthChecks: false
+
+  # Log token usage statistics
+  logUsage: true
+
+# Performance Tuning
+performance:
+  # Number of threads for inference (-t flag on server)
+  # 0 = auto-detect
+  threads: 0
+
+  # Batch size for prompt processing
+  batchSize: 512
+
+  # Context size for parallel requests
+  parallelContexts: 1