Refactor code structure and optimize performance across multiple modules

This commit is contained in:
StellaOps Bot
2025-12-26 20:03:22 +02:00
parent c786faae84
commit f10d83c444
1385 changed files with 69732 additions and 10280 deletions

View File

@@ -0,0 +1,96 @@
# llama.cpp Server LLM Provider configuration template
# This is the PRIMARY provider for OFFLINE/AIRGAP deployments.
# Copy to llama-server.yaml (remove .sample extension) and configure.
# Provider enabled state and priority
# Lower priority number = higher preference (10 = prefer over cloud providers)
enabled: true
priority: 10
# Server Configuration
server:
# Base URL for llama.cpp server
# Start llama.cpp with: llama-server -m model.gguf --host 0.0.0.0 --port 8080
baseUrl: "http://localhost:8080"
# API key if server requires authentication (--api-key flag)
apiKey: ""
# Health check endpoint
healthEndpoint: "/health"
# Model Configuration
model:
# Model name (for logging and identification)
name: "llama3-8b-q4km"
# Path to model file (informational, model is loaded on server)
modelPath: "/models/llama-3-8b-instruct.Q4_K_M.gguf"
# Expected model digest (SHA-256) for verification
# Ensures the correct model is loaded in airgap environments
expectedDigest: ""
# Inference Parameters
inference:
# Temperature: 0 = deterministic (REQUIRED for reproducibility)
temperature: 0.0
# Maximum tokens to generate
maxTokens: 4096
# Random seed for reproducibility (REQUIRED for determinism)
seed: 42
# Nucleus sampling (top-p)
topP: 1.0
# Top-k sampling
topK: 40
# Repeat penalty (1.0 = no penalty)
repeatPenalty: 1.1
# Context length (must match server's -c flag)
contextLength: 4096
# Request Configuration
request:
# Request timeout (longer for local inference)
timeout: "00:05:00"
# Maximum retries on failure
maxRetries: 2
# Model Bundle Configuration (for airgap deployments)
bundle:
# Path to signed model bundle (.stellaops-model directory)
# Created using: stella model bundle --sign
bundlePath: ""
# Verify bundle signature before loading
verifySignature: true
# Cryptographic scheme for verification
# Options: ed25519, ecdsa-p256, gost3410, sm2
cryptoScheme: "ed25519"
# Logging Configuration
logging:
# Log health check results
logHealthChecks: false
# Log token usage statistics
logUsage: true
# Performance Tuning
performance:
# Number of threads for inference (-t flag on server)
# 0 = auto-detect
threads: 0
# Batch size for prompt processing
batchSize: 512
# Context size for parallel requests
parallelContexts: 1