Refactor code structure and optimize performance across multiple modules

This commit is contained in:
StellaOps Bot
2025-12-26 20:03:22 +02:00
parent c786faae84
commit b4fc66feb6
3353 changed files with 88254 additions and 1590657 deletions

View File

@@ -0,0 +1,81 @@
# Claude (Anthropic) LLM Provider configuration template
# Copy to claude.yaml (remove .sample extension) and configure.
# Environment variable ANTHROPIC_API_KEY can be used instead of api.apiKey.
# Provider enabled state and priority (lower = higher priority)
enabled: true
priority: 100
# API Configuration
api:
# API key - use environment variable reference or set directly
# Environment variable: ANTHROPIC_API_KEY
apiKey: "${ANTHROPIC_API_KEY}"
# Base URL for API requests
baseUrl: "https://api.anthropic.com"
# API version header
apiVersion: "2023-06-01"
# Model Configuration
model:
# Primary model name
# Options: claude-sonnet-4-20250514, claude-opus-4-20250514, claude-3-5-sonnet-20241022
name: "claude-sonnet-4-20250514"
# Fallback models (tried in order if primary fails)
fallbacks:
- "claude-3-5-sonnet-20241022"
# Inference Parameters
inference:
# Temperature: 0 = deterministic, higher = more creative
# For reproducibility in StellaOps, use 0
temperature: 0.0
# Maximum tokens to generate
maxTokens: 4096
# Nucleus sampling (top-p)
# 1.0 = disabled, lower values = more focused
topP: 1.0
# Top-k sampling (0 = disabled)
# Lower values = more focused
topK: 0
# Extended Thinking (Claude's reasoning feature)
thinking:
# Enable extended thinking for complex reasoning tasks
enabled: false
# Budget tokens for thinking process
budgetTokens: 10000
# Request Configuration
request:
# Request timeout
timeout: "00:02:00"
# Maximum retries on failure
maxRetries: 3
# Logging Configuration
logging:
# Log request/response bodies (WARNING: may contain sensitive data)
logBodies: false
# Log token usage statistics
logUsage: true
# Rate Limiting
rateLimit:
# Requests per minute limit (0 = no limit)
requestsPerMinute: 0
# Tokens per minute limit (0 = no limit)
tokensPerMinute: 0
# Backoff duration when rate limited
backoff: "00:01:00"

View File

@@ -0,0 +1,96 @@
# llama.cpp Server LLM Provider configuration template
# This is the PRIMARY provider for OFFLINE/AIRGAP deployments.
# Copy to llama-server.yaml (remove .sample extension) and configure.
# Provider enabled state and priority
# Lower priority number = higher preference (10 = prefer over cloud providers)
enabled: true
priority: 10
# Server Configuration
server:
# Base URL for llama.cpp server
# Start llama.cpp with: llama-server -m model.gguf --host 0.0.0.0 --port 8080
baseUrl: "http://localhost:8080"
# API key if server requires authentication (--api-key flag)
apiKey: ""
# Health check endpoint
healthEndpoint: "/health"
# Model Configuration
model:
# Model name (for logging and identification)
name: "llama3-8b-q4km"
# Path to model file (informational, model is loaded on server)
modelPath: "/models/llama-3-8b-instruct.Q4_K_M.gguf"
# Expected model digest (SHA-256) for verification
# Ensures the correct model is loaded in airgap environments
expectedDigest: ""
# Inference Parameters
inference:
# Temperature: 0 = deterministic (REQUIRED for reproducibility)
temperature: 0.0
# Maximum tokens to generate
maxTokens: 4096
# Random seed for reproducibility (REQUIRED for determinism)
seed: 42
# Nucleus sampling (top-p)
topP: 1.0
# Top-k sampling
topK: 40
# Repeat penalty (1.0 = no penalty)
repeatPenalty: 1.1
# Context length (must match server's -c flag)
contextLength: 4096
# Request Configuration
request:
# Request timeout (longer for local inference)
timeout: "00:05:00"
# Maximum retries on failure
maxRetries: 2
# Model Bundle Configuration (for airgap deployments)
bundle:
# Path to signed model bundle (.stellaops-model directory)
# Created using: stella model bundle --sign
bundlePath: ""
# Verify bundle signature before loading
verifySignature: true
# Cryptographic scheme for verification
# Options: ed25519, ecdsa-p256, gost3410, sm2
cryptoScheme: "ed25519"
# Logging Configuration
logging:
# Log health check results
logHealthChecks: false
# Log token usage statistics
logUsage: true
# Performance Tuning
performance:
# Number of threads for inference (-t flag on server)
# 0 = auto-detect
threads: 0
# Batch size for prompt processing
batchSize: 512
# Context size for parallel requests
parallelContexts: 1

View File

@@ -0,0 +1,87 @@
# Ollama LLM Provider configuration template
# For local inference using Ollama.
# Copy to ollama.yaml (remove .sample extension) and configure.
# Provider enabled state and priority
# Priority 20 = prefer over cloud, but after llama-server (10)
enabled: true
priority: 20
# Server Configuration
server:
# Base URL for Ollama server
# Default Ollama port is 11434
baseUrl: "http://localhost:11434"
# Health check endpoint
healthEndpoint: "/api/tags"
# Model Configuration
model:
# Primary model name
# Use 'ollama list' to see available models
# Common options: llama3:8b, llama3:70b, codellama:13b, mistral:7b
name: "llama3:8b"
# Fallback models (tried in order if primary fails)
fallbacks:
- "llama3:latest"
- "mistral:7b"
# Keep model loaded in memory (prevents unloading between requests)
# Options: "5m", "10m", "1h", "-1" (forever)
keepAlive: "5m"
# Inference Parameters
inference:
# Temperature: 0 = deterministic (REQUIRED for reproducibility)
temperature: 0.0
# Maximum tokens to generate (-1 = use model default)
maxTokens: 4096
# Random seed for reproducibility (REQUIRED for determinism)
seed: 42
# Nucleus sampling (top-p)
topP: 1.0
# Top-k sampling
topK: 40
# Repeat penalty (1.0 = no penalty)
repeatPenalty: 1.1
# Context window size
numCtx: 4096
# Number of tokens to predict (-1 = unlimited, use maxTokens)
numPredict: -1
# GPU Configuration
gpu:
# Number of GPU layers to offload (0 = CPU only)
# -1 = offload all layers to GPU
numGpu: 0
# Request Configuration
request:
# Request timeout (longer for local inference)
timeout: "00:05:00"
# Maximum retries on failure
maxRetries: 2
# Model Management
management:
# Automatically pull model if not found locally
# WARNING: Requires internet access, disable for airgap
autoPull: false
# Verify model integrity after pull
verifyPull: true
# Logging Configuration
logging:
# Log token usage statistics
logUsage: true

View File

@@ -0,0 +1,87 @@
# OpenAI LLM Provider configuration template
# Copy to openai.yaml (remove .sample extension) and configure.
# Environment variable OPENAI_API_KEY can be used instead of api.apiKey.
# Provider enabled state and priority (lower = higher priority)
enabled: true
priority: 100
# API Configuration
api:
# API key - use environment variable reference or set directly
# Environment variable: OPENAI_API_KEY
apiKey: "${OPENAI_API_KEY}"
# Base URL for API requests
# Default: https://api.openai.com/v1
# For Azure OpenAI: https://{resource}.openai.azure.com/openai/deployments/{deployment}
baseUrl: "https://api.openai.com/v1"
# Organization ID (optional, for multi-org accounts)
organizationId: ""
# API version (required for Azure OpenAI, e.g., "2024-02-15-preview")
apiVersion: ""
# Model Configuration
model:
# Primary model name
# Options: gpt-4o, gpt-4o-mini, gpt-4-turbo, gpt-4, gpt-3.5-turbo
# For Azure: use your deployment name
name: "gpt-4o"
# Fallback models (tried in order if primary fails)
fallbacks:
- "gpt-4o-mini"
- "gpt-3.5-turbo"
# Inference Parameters
inference:
# Temperature: 0 = deterministic, higher = more creative
# For reproducibility in StellaOps, use 0
temperature: 0.0
# Maximum tokens to generate
maxTokens: 4096
# Random seed for reproducibility (when temperature=0)
seed: 42
# Nucleus sampling (top-p)
# 1.0 = disabled, lower values = more focused
topP: 1.0
# Frequency penalty (-2.0 to 2.0)
# Positive = reduce repetition of tokens already used
frequencyPenalty: 0.0
# Presence penalty (-2.0 to 2.0)
# Positive = encourage new topics
presencePenalty: 0.0
# Request Configuration
request:
# Request timeout
timeout: "00:02:00"
# Maximum retries on failure
maxRetries: 3
# Logging Configuration
logging:
# Log request/response bodies (WARNING: may contain sensitive data)
logBodies: false
# Log token usage statistics
logUsage: true
# Rate Limiting
rateLimit:
# Requests per minute limit (0 = no limit)
requestsPerMinute: 0
# Tokens per minute limit (0 = no limit)
tokensPerMinute: 0
# Backoff duration when rate limited
backoff: "00:01:00"

View File

@@ -0,0 +1,92 @@
# LLM Provider Registry
# AI/LLM provider configurations for Advisory AI
version: "1.0"
category: llm-providers
# Global LLM settings
defaults:
enabled: false # Explicitly enable providers
timeout: "00:02:00"
maxRetries: 3
# ============================================================================
# LLM PROVIDERS
# ============================================================================
providers:
# Cloud providers
claude:
enabled: false
priority: 100
config: claude.yaml
description: "Anthropic Claude (Claude 3.5/4)"
mode: remote
openai:
enabled: false
priority: 90
config: openai.yaml
description: "OpenAI GPT-4/4o"
mode: remote
azure-openai:
enabled: false
priority: 90
config: azure-openai.yaml
description: "Azure OpenAI Service"
mode: remote
# Local providers (for air-gap)
ollama:
enabled: false
priority: 80
config: ollama.yaml
description: "Ollama local inference"
mode: local
llama-server:
enabled: false
priority: 80
config: llama-server.yaml
description: "llama.cpp HTTP server"
mode: local
vllm:
enabled: false
priority: 80
config: vllm.yaml
description: "vLLM inference server"
mode: local
# ============================================================================
# INFERENCE SETTINGS
# ============================================================================
inference:
# Mode: remote, local, hybrid
mode: "${ADVISORY_AI_INFERENCE_MODE:-local}"
# Fallback chain
fallbackChain:
- claude
- openai
- ollama
# Model selection
modelSelection:
# Task-specific model overrides
explanation: "" # Use default
remediation: "" # Use default
classification: "" # Use default
# ============================================================================
# OFFLINE/AIR-GAP SETTINGS
# ============================================================================
offline:
# Signed model bundle path
modelBundlePath: "${ADVISORY_AI_MODEL_BUNDLE_PATH:-/opt/stellaops/offline/models}"
# Verify bundle signatures
verifySignatures: true
# Public key for signature verification
publicKeyPath: "${ADVISORY_AI_MODEL_PUBKEY:-/etc/stellaops/model-signing-pubkey.pem}"