Files
git.stella-ops.org/etc/llm-providers/openai.yaml.sample

88 lines
2.1 KiB
Plaintext

# OpenAI LLM Provider configuration template
# Copy to openai.yaml (remove .sample extension) and configure.
# Environment variable OPENAI_API_KEY can be used instead of api.apiKey.
# Provider enabled state and priority (lower = higher priority)
enabled: true
priority: 100
# API Configuration
api:
# API key - use environment variable reference or set directly
# Environment variable: OPENAI_API_KEY
apiKey: "${OPENAI_API_KEY}"
# Base URL for API requests
# Default: https://api.openai.com/v1
# For Azure OpenAI: https://{resource}.openai.azure.com/openai/deployments/{deployment}
baseUrl: "https://api.openai.com/v1"
# Organization ID (optional, for multi-org accounts)
organizationId: ""
# API version (required for Azure OpenAI, e.g., "2024-02-15-preview")
apiVersion: ""
# Model Configuration
model:
# Primary model name
# Options: gpt-4o, gpt-4o-mini, gpt-4-turbo, gpt-4, gpt-3.5-turbo
# For Azure: use your deployment name
name: "gpt-4o"
# Fallback models (tried in order if primary fails)
fallbacks:
- "gpt-4o-mini"
- "gpt-3.5-turbo"
# Inference Parameters
inference:
# Temperature: 0 = deterministic, higher = more creative
# For reproducibility in StellaOps, use 0
temperature: 0.0
# Maximum tokens to generate
maxTokens: 4096
# Random seed for reproducibility (when temperature=0)
seed: 42
# Nucleus sampling (top-p)
# 1.0 = disabled, lower values = more focused
topP: 1.0
# Frequency penalty (-2.0 to 2.0)
# Positive = reduce repetition of tokens already used
frequencyPenalty: 0.0
# Presence penalty (-2.0 to 2.0)
# Positive = encourage new topics
presencePenalty: 0.0
# Request Configuration
request:
# Request timeout
timeout: "00:02:00"
# Maximum retries on failure
maxRetries: 3
# Logging Configuration
logging:
# Log request/response bodies (WARNING: may contain sensitive data)
logBodies: false
# Log token usage statistics
logUsage: true
# Rate Limiting
rateLimit:
# Requests per minute limit (0 = no limit)
requestsPerMinute: 0
# Tokens per minute limit (0 = no limit)
tokensPerMinute: 0
# Backoff duration when rate limited
backoff: "00:01:00"