# Google Gemini LLM Provider configuration template # Copy to gemini.yaml (remove .sample extension) and configure. # Environment variable GEMINI_API_KEY or GOOGLE_API_KEY can be used instead of api.apiKey. # Provider enabled state and priority (lower = higher priority) enabled: true priority: 100 # API Configuration api: # API key - use environment variable reference or set directly # Environment variables: GEMINI_API_KEY or GOOGLE_API_KEY apiKey: "${GEMINI_API_KEY}" # Base URL for API requests # Default: https://generativelanguage.googleapis.com/v1beta # For Vertex AI: https://{region}-aiplatform.googleapis.com/v1 baseUrl: "https://generativelanguage.googleapis.com/v1beta" # Model Configuration model: # Primary model name # Options: gemini-1.5-flash, gemini-1.5-pro, gemini-1.0-pro, gemini-pro-vision name: "gemini-1.5-flash" # Fallback models (tried in order if primary fails) fallbacks: - "gemini-1.5-pro" - "gemini-1.0-pro" # Inference Parameters inference: # Temperature: 0 = deterministic, higher = more creative # For reproducibility in StellaOps, use 0 temperature: 0.0 # Maximum tokens to generate maxTokens: 8192 # Random seed for reproducibility (when temperature=0) # Note: Gemini may not fully support seeds for determinism seed: 42 # Nucleus sampling (top-p) # 1.0 = disabled, lower values = more focused topP: 1.0 # Top-k sampling # Higher = more diverse, 1 = greedy topK: 40 # Request Configuration request: # Request timeout timeout: "00:02:00" # Maximum retries on failure maxRetries: 3 # Logging Configuration logging: # Log request/response bodies (WARNING: may contain sensitive data) logBodies: false # Log token usage statistics logUsage: true # Rate Limiting rateLimit: # Requests per minute limit (0 = no limit) requestsPerMinute: 0 # Tokens per minute limit (0 = no limit) tokensPerMinute: 0 # Backoff duration when rate limited backoff: "00:01:00"