Files
git.stella-ops.org/etc/llm-providers/ollama.yaml
2025-12-26 18:11:06 +02:00

111 lines
2.2 KiB
YAML

# Ollama LLM Provider Configuration
# Documentation: https://ollama.ai/
#
# Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh
# Pull a model: ollama pull llama3:8b
# Start server: ollama serve
#
# Ollama provides an easy way to run local LLMs with automatic GPU detection.
# Provider metadata
provider:
id: ollama
name: Ollama
description: Local LLM inference via Ollama
# Enable/disable this provider
enabled: true
# Priority for provider selection (lower = higher priority)
# Set lower than cloud providers for local-first operation
priority: 20
# Server Configuration
server:
# Ollama server URL
baseUrl: "http://localhost:11434"
# Health check endpoint
healthEndpoint: "/api/tags"
# Model Configuration
model:
# Model to use (must be pulled first: ollama pull <model>)
# Popular options:
# - llama3:8b (8B params, good quality/speed balance)
# - llama3:70b (70B params, higher quality, needs more RAM)
# - mistral:7b (7B params, fast)
# - mixtral:8x7b (MoE, good quality)
# - codellama:7b (code-focused)
# - phi3:mini (small, fast)
name: "llama3:8b"
# Fallback models
fallbacks:
- "mistral:7b"
- "phi3:mini"
# Keep model loaded in memory (reduces latency for repeated requests)
keepAlive: "5m"
# Inference Parameters
inference:
# Temperature (0 = deterministic)
temperature: 0
# Maximum tokens to generate
maxTokens: 4096
# Random seed for reproducibility
seed: 42
# Top-p (nucleus sampling)
topP: 1.0
# Top-k sampling
topK: 40
# Repeat penalty
repeatPenalty: 1.1
# Context length
numCtx: 4096
# Number of tokens to predict (-1 = unlimited)
numPredict: -1
# Stop sequences
stopSequences: []
# Request Configuration
request:
# Request timeout
timeout: "00:05:00"
# Maximum retries on failure
maxRetries: 2
# Retry delay
retryDelay: "00:00:02"
# GPU Configuration
gpu:
# Number of GPU layers to use (0 = CPU only)
numGpu: 0
# Use GPU for embedding (if available)
useGpuForEmbedding: false
# Model Management
management:
# Auto-pull model if not found
autoPull: false
# Verify model integrity after pull
verifyPull: true
# Logging
logging:
logBodies: false
logUsage: true