Refactor code structure and optimize performance across multiple modules

2025-12-26 20:03:22 +02:00
parent c786faae84
commit f10d83c444
1385 changed files with 69732 additions and 10280 deletions
--- a/etc/llm-providers/openai.yaml.sample
+++ b/etc/llm-providers/openai.yaml.sample
@@ -0,0 +1,87 @@
+# OpenAI LLM Provider configuration template
+# Copy to openai.yaml (remove .sample extension) and configure.
+# Environment variable OPENAI_API_KEY can be used instead of api.apiKey.
+
+# Provider enabled state and priority (lower = higher priority)
+enabled: true
+priority: 100
+
+# API Configuration
+api:
+  # API key - use environment variable reference or set directly
+  # Environment variable: OPENAI_API_KEY
+  apiKey: "${OPENAI_API_KEY}"
+
+  # Base URL for API requests
+  # Default: https://api.openai.com/v1
+  # For Azure OpenAI: https://{resource}.openai.azure.com/openai/deployments/{deployment}
+  baseUrl: "https://api.openai.com/v1"
+
+  # Organization ID (optional, for multi-org accounts)
+  organizationId: ""
+
+  # API version (required for Azure OpenAI, e.g., "2024-02-15-preview")
+  apiVersion: ""
+
+# Model Configuration
+model:
+  # Primary model name
+  # Options: gpt-4o, gpt-4o-mini, gpt-4-turbo, gpt-4, gpt-3.5-turbo
+  # For Azure: use your deployment name
+  name: "gpt-4o"
+
+  # Fallback models (tried in order if primary fails)
+  fallbacks:
+    - "gpt-4o-mini"
+    - "gpt-3.5-turbo"
+
+# Inference Parameters
+inference:
+  # Temperature: 0 = deterministic, higher = more creative
+  # For reproducibility in StellaOps, use 0
+  temperature: 0.0
+
+  # Maximum tokens to generate
+  maxTokens: 4096
+
+  # Random seed for reproducibility (when temperature=0)
+  seed: 42
+
+  # Nucleus sampling (top-p)
+  # 1.0 = disabled, lower values = more focused
+  topP: 1.0
+
+  # Frequency penalty (-2.0 to 2.0)
+  # Positive = reduce repetition of tokens already used
+  frequencyPenalty: 0.0
+
+  # Presence penalty (-2.0 to 2.0)
+  # Positive = encourage new topics
+  presencePenalty: 0.0
+
+# Request Configuration
+request:
+  # Request timeout
+  timeout: "00:02:00"
+
+  # Maximum retries on failure
+  maxRetries: 3
+
+# Logging Configuration
+logging:
+  # Log request/response bodies (WARNING: may contain sensitive data)
+  logBodies: false
+
+  # Log token usage statistics
+  logUsage: true
+
+# Rate Limiting
+rateLimit:
+  # Requests per minute limit (0 = no limit)
+  requestsPerMinute: 0
+
+  # Tokens per minute limit (0 = no limit)
+  tokensPerMinute: 0
+
+  # Backoff duration when rate limited
+  backoff: "00:01:00"