git.stella-ops.org/etc/llm-providers/claude.yaml.sample

# Claude (Anthropic) LLM Provider configuration template
# Copy to claude.yaml (remove .sample extension) and configure.
# Environment variable ANTHROPIC_API_KEY can be used instead of api.apiKey.

# Provider enabled state and priority (lower = higher priority)
enabled: true
priority: 100

# API Configuration
api:
  # API key - use environment variable reference or set directly
  # Environment variable: ANTHROPIC_API_KEY
  apiKey: "${ANTHROPIC_API_KEY}"

  # Base URL for API requests
  baseUrl: "https://api.anthropic.com"

  # API version header
  apiVersion: "2023-06-01"

# Model Configuration
model:
  # Primary model name
  # Options: claude-sonnet-4-20250514, claude-opus-4-20250514, claude-3-5-sonnet-20241022
  name: "claude-sonnet-4-20250514"

  # Fallback models (tried in order if primary fails)
  fallbacks:
    - "claude-3-5-sonnet-20241022"

# Inference Parameters
inference:
  # Temperature: 0 = deterministic, higher = more creative
  # For reproducibility in StellaOps, use 0
  temperature: 0.0

  # Maximum tokens to generate
  maxTokens: 4096

  # Nucleus sampling (top-p)
  # 1.0 = disabled, lower values = more focused
  topP: 1.0

  # Top-k sampling (0 = disabled)
  # Lower values = more focused
  topK: 0

# Extended Thinking (Claude's reasoning feature)
thinking:
  # Enable extended thinking for complex reasoning tasks
  enabled: false

  # Budget tokens for thinking process
  budgetTokens: 10000

# Request Configuration
request:
  # Request timeout
  timeout: "00:02:00"

  # Maximum retries on failure
  maxRetries: 3

# Logging Configuration
logging:
  # Log request/response bodies (WARNING: may contain sensitive data)
  logBodies: false

  # Log token usage statistics
  logUsage: true

# Rate Limiting
rateLimit:
  # Requests per minute limit (0 = no limit)
  requestsPerMinute: 0

  # Tokens per minute limit (0 = no limit)
  tokensPerMinute: 0

  # Backoff duration when rate limited
  backoff: "00:01:00"