git.stella-ops.org/etc/llm-providers/openai.yaml.sample

# OpenAI LLM Provider configuration template
# Copy to openai.yaml (remove .sample extension) and configure.
# Environment variable OPENAI_API_KEY can be used instead of api.apiKey.

# Provider enabled state and priority (lower = higher priority)
enabled: true
priority: 100

# API Configuration
api:
  # API key - use environment variable reference or set directly
  # Environment variable: OPENAI_API_KEY
  apiKey: "${OPENAI_API_KEY}"

  # Base URL for API requests
  # Default: https://api.openai.com/v1
  # For Azure OpenAI: https://{resource}.openai.azure.com/openai/deployments/{deployment}
  baseUrl: "https://api.openai.com/v1"

  # Organization ID (optional, for multi-org accounts)
  organizationId: ""

  # API version (required for Azure OpenAI, e.g., "2024-02-15-preview")
  apiVersion: ""

# Model Configuration
model:
  # Primary model name
  # Options: gpt-4o, gpt-4o-mini, gpt-4-turbo, gpt-4, gpt-3.5-turbo
  # For Azure: use your deployment name
  name: "gpt-4o"

  # Fallback models (tried in order if primary fails)
  fallbacks:
    - "gpt-4o-mini"
    - "gpt-3.5-turbo"

# Inference Parameters
inference:
  # Temperature: 0 = deterministic, higher = more creative
  # For reproducibility in StellaOps, use 0
  temperature: 0.0

  # Maximum tokens to generate
  maxTokens: 4096

  # Random seed for reproducibility (when temperature=0)
  seed: 42

  # Nucleus sampling (top-p)
  # 1.0 = disabled, lower values = more focused
  topP: 1.0

  # Frequency penalty (-2.0 to 2.0)
  # Positive = reduce repetition of tokens already used
  frequencyPenalty: 0.0

  # Presence penalty (-2.0 to 2.0)
  # Positive = encourage new topics
  presencePenalty: 0.0

# Request Configuration
request:
  # Request timeout
  timeout: "00:02:00"

  # Maximum retries on failure
  maxRetries: 3

# Logging Configuration
logging:
  # Log request/response bodies (WARNING: may contain sensitive data)
  logBodies: false

  # Log token usage statistics
  logUsage: true

# Rate Limiting
rateLimit:
  # Requests per minute limit (0 = no limit)
  requestsPerMinute: 0

  # Tokens per minute limit (0 = no limit)
  tokensPerMinute: 0

  # Backoff duration when rate limited
  backoff: "00:01:00"