Refactor code structure and optimize performance across multiple modules

2025-12-26 20:03:22 +02:00
parent c786faae84
commit f10d83c444
1385 changed files with 69732 additions and 10280 deletions
--- a/etc/llm-providers/claude.yaml.sample
+++ b/etc/llm-providers/claude.yaml.sample
@@ -0,0 +1,81 @@
+# Claude (Anthropic) LLM Provider configuration template
+# Copy to claude.yaml (remove .sample extension) and configure.
+# Environment variable ANTHROPIC_API_KEY can be used instead of api.apiKey.
+
+# Provider enabled state and priority (lower = higher priority)
+enabled: true
+priority: 100
+
+# API Configuration
+api:
+  # API key - use environment variable reference or set directly
+  # Environment variable: ANTHROPIC_API_KEY
+  apiKey: "${ANTHROPIC_API_KEY}"
+
+  # Base URL for API requests
+  baseUrl: "https://api.anthropic.com"
+
+  # API version header
+  apiVersion: "2023-06-01"
+
+# Model Configuration
+model:
+  # Primary model name
+  # Options: claude-sonnet-4-20250514, claude-opus-4-20250514, claude-3-5-sonnet-20241022
+  name: "claude-sonnet-4-20250514"
+
+  # Fallback models (tried in order if primary fails)
+  fallbacks:
+    - "claude-3-5-sonnet-20241022"
+
+# Inference Parameters
+inference:
+  # Temperature: 0 = deterministic, higher = more creative
+  # For reproducibility in StellaOps, use 0
+  temperature: 0.0
+
+  # Maximum tokens to generate
+  maxTokens: 4096
+
+  # Nucleus sampling (top-p)
+  # 1.0 = disabled, lower values = more focused
+  topP: 1.0
+
+  # Top-k sampling (0 = disabled)
+  # Lower values = more focused
+  topK: 0
+
+# Extended Thinking (Claude's reasoning feature)
+thinking:
+  # Enable extended thinking for complex reasoning tasks
+  enabled: false
+
+  # Budget tokens for thinking process
+  budgetTokens: 10000
+
+# Request Configuration
+request:
+  # Request timeout
+  timeout: "00:02:00"
+
+  # Maximum retries on failure
+  maxRetries: 3
+
+# Logging Configuration
+logging:
+  # Log request/response bodies (WARNING: may contain sensitive data)
+  logBodies: false
+
+  # Log token usage statistics
+  logUsage: true
+
+# Rate Limiting
+rateLimit:
+  # Requests per minute limit (0 = no limit)
+  requestsPerMinute: 0
+
+  # Tokens per minute limit (0 = no limit)
+  tokensPerMinute: 0
+
+  # Backoff duration when rate limited
+  backoff: "00:01:00"