78 lines
1.9 KiB
Plaintext
78 lines
1.9 KiB
Plaintext
# Google Gemini LLM Provider configuration template
|
|
# Copy to gemini.yaml (remove .sample extension) and configure.
|
|
# Environment variable GEMINI_API_KEY or GOOGLE_API_KEY can be used instead of api.apiKey.
|
|
|
|
# Provider enabled state and priority (lower = higher priority)
|
|
enabled: true
|
|
priority: 100
|
|
|
|
# API Configuration
|
|
api:
|
|
# API key - use environment variable reference or set directly
|
|
# Environment variables: GEMINI_API_KEY or GOOGLE_API_KEY
|
|
apiKey: "${GEMINI_API_KEY}"
|
|
|
|
# Base URL for API requests
|
|
# Default: https://generativelanguage.googleapis.com/v1beta
|
|
# For Vertex AI: https://{region}-aiplatform.googleapis.com/v1
|
|
baseUrl: "https://generativelanguage.googleapis.com/v1beta"
|
|
|
|
# Model Configuration
|
|
model:
|
|
# Primary model name
|
|
# Options: gemini-1.5-flash, gemini-1.5-pro, gemini-1.0-pro, gemini-pro-vision
|
|
name: "gemini-1.5-flash"
|
|
|
|
# Fallback models (tried in order if primary fails)
|
|
fallbacks:
|
|
- "gemini-1.5-pro"
|
|
- "gemini-1.0-pro"
|
|
|
|
# Inference Parameters
|
|
inference:
|
|
# Temperature: 0 = deterministic, higher = more creative
|
|
# For reproducibility in StellaOps, use 0
|
|
temperature: 0.0
|
|
|
|
# Maximum tokens to generate
|
|
maxTokens: 8192
|
|
|
|
# Random seed for reproducibility (when temperature=0)
|
|
# Note: Gemini may not fully support seeds for determinism
|
|
seed: 42
|
|
|
|
# Nucleus sampling (top-p)
|
|
# 1.0 = disabled, lower values = more focused
|
|
topP: 1.0
|
|
|
|
# Top-k sampling
|
|
# Higher = more diverse, 1 = greedy
|
|
topK: 40
|
|
|
|
# Request Configuration
|
|
request:
|
|
# Request timeout
|
|
timeout: "00:02:00"
|
|
|
|
# Maximum retries on failure
|
|
maxRetries: 3
|
|
|
|
# Logging Configuration
|
|
logging:
|
|
# Log request/response bodies (WARNING: may contain sensitive data)
|
|
logBodies: false
|
|
|
|
# Log token usage statistics
|
|
logUsage: true
|
|
|
|
# Rate Limiting
|
|
rateLimit:
|
|
# Requests per minute limit (0 = no limit)
|
|
requestsPerMinute: 0
|
|
|
|
# Tokens per minute limit (0 = no limit)
|
|
tokensPerMinute: 0
|
|
|
|
# Backoff duration when rate limited
|
|
backoff: "00:01:00"
|