services: backend: build: ./backend ports: ["8000:8000"] depends_on: [chromadb, redis] environment: - OLLAMA_HOST=http://ollama:11434 - CHROMA_HOST=chromadb - REDIS_URL=redis://redis:6379 frontend: build: ./frontend ports: ["3000:3000"] depends_on: [backend] chromadb: image: chromadb/chroma:latest volumes: - chroma_data:/chroma/chroma redis: image: redis:7-alpine volumes: - redis_data:/data command: redis-server --appendonly yes ollama: image: ollama/ollama:latest volumes: - ollama_models:/root/.ollama deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] volumes: chroma_data: redis_data: ollama_models:
services: backend: build: ./backend ports: ["8000:8000"] depends_on: [chromadb, redis] environment: - OLLAMA_HOST=http://ollama:11434 - CHROMA_HOST=chromadb - REDIS_URL=redis://redis:6379 frontend: build: ./frontend ports: ["3000:3000"] depends_on: [backend] chromadb: image: chromadb/chroma:latest volumes: - chroma_data:/chroma/chroma redis: image: redis:7-alpine volumes: - redis_data:/data command: redis-server --appendonly yes ollama: image: ollama/ollama:latest volumes: - ollama_models:/root/.ollama deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] volumes: chroma_data: redis_data: ollama_models:
services: backend: build: ./backend ports: ["8000:8000"] depends_on: [chromadb, redis] environment: - OLLAMA_HOST=http://ollama:11434 - CHROMA_HOST=chromadb - REDIS_URL=redis://redis:6379 frontend: build: ./frontend ports: ["3000:3000"] depends_on: [backend] chromadb: image: chromadb/chroma:latest volumes: - chroma_data:/chroma/chroma redis: image: redis:7-alpine volumes: - redis_data:/data command: redis-server --appendonly yes ollama: image: ollama/ollama:latest volumes: - ollama_models:/root/.ollama deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] volumes: chroma_data: redis_data: ollama_models:
networks: autobot_net: driver: bridge services: backend: networks: [autobot_net] # ... same for all services
networks: autobot_net: driver: bridge services: backend: networks: [autobot_net] # ... same for all services
networks: autobot_net: driver: bridge services: backend: networks: [autobot_net] # ... same for all services
# ChromaDB — your knowledge bases
docker run --rm \ -v autobot_chroma_data:/source \ -v /backup:/backup \ alpine tar czf /backup/chroma-$(date +%Y%m%d).tar.gz -C /source . # Redis — session state
docker exec autobot-redis-1 redis-cli BGSAVE
docker cp autobot-redis-1:/data/dump.rdb /backup/redis-$(date +%Y%m%d).rdb # Ollama models — large, but painful to re-download
docker run --rm \ -v autobot_ollama_models:/source \ -v /backup:/backup \ alpine tar czf /backup/ollama-$(date +%Y%m%d).tar.gz -C /source .
# ChromaDB — your knowledge bases
docker run --rm \ -v autobot_chroma_data:/source \ -v /backup:/backup \ alpine tar czf /backup/chroma-$(date +%Y%m%d).tar.gz -C /source . # Redis — session state
docker exec autobot-redis-1 redis-cli BGSAVE
docker cp autobot-redis-1:/data/dump.rdb /backup/redis-$(date +%Y%m%d).rdb # Ollama models — large, but painful to re-download
docker run --rm \ -v autobot_ollama_models:/source \ -v /backup:/backup \ alpine tar czf /backup/ollama-$(date +%Y%m%d).tar.gz -C /source .
# ChromaDB — your knowledge bases
docker run --rm \ -v autobot_chroma_data:/source \ -v /backup:/backup \ alpine tar czf /backup/chroma-$(date +%Y%m%d).tar.gz -C /source . # Redis — session state
docker exec autobot-redis-1 redis-cli BGSAVE
docker cp autobot-redis-1:/data/dump.rdb /backup/redis-$(date +%Y%m%d).rdb # Ollama models — large, but painful to re-download
docker run --rm \ -v autobot_ollama_models:/source \ -v /backup:/backup \ alpine tar czf /backup/ollama-$(date +%Y%m%d).tar.gz -C /source .
# Pull latest images
docker compose pull # Recreate containers (zero-downtime if you add a load balancer)
docker compose up -d --no-deps --build backend frontend # Full restart (brief downtime)
docker compose down && docker compose up -d
# Pull latest images
docker compose pull # Recreate containers (zero-downtime if you add a load balancer)
docker compose up -d --no-deps --build backend frontend # Full restart (brief downtime)
docker compose down && docker compose up -d
# Pull latest images
docker compose pull # Recreate containers (zero-downtime if you add a load balancer)
docker compose up -d --no-deps --build backend frontend # Full restart (brief downtime)
docker compose down && docker compose up -d
# Simple cron healthcheck
*/5 * * * * curl -sf http://localhost:8000/health || notify-oncall
# Simple cron healthcheck
*/5 * * * * curl -sf http://localhost:8000/health || notify-oncall
# Simple cron healthcheck
*/5 * * * * curl -sf http://localhost:8000/health || notify-oncall
backend: logging: driver: "json-file" options: max-size: "50m" max-file: "5"
backend: logging: driver: "json-file" options: max-size: "50m" max-file: "5"
backend: logging: driver: "json-file" options: max-size: "50m" max-file: "5" - Conversations never leave your network
- You choose which models run (open-weight, cloud API, or a mix)
- Upgrade timing is yours to control
- No per-seat pricing surprises - Zero per-token cost
- Private by definition
- Latency depends on your hardware
- Best for: high-volume internal tools, sensitive data, experimentation - Pay per token
- Faster for large models you can't run locally
- Data leaves your network (check your provider's retention policy)
- Best for: production apps that need frontier model quality without buying GPUs - ChromaDB query latency > 2s — index fragmentation or under-resourced container
- Redis memory approaching limit — set maxmemory and a sensible eviction policy (allkeys-lru)
- Ollama inference time spiking — model being swapped to RAM; consider reducing context length or switching to a smaller quantization