version: '3.8' | |
services: | |
llama: | |
image: ghcr.io/ggerganov/llama.cpp:server | |
ports: | |
- "8001:8001" | |
volumes: | |
- ./kai-model-7.2B-Q4_0.gguf:/models/kai-model-7.2B-Q4_0.gguf:ro | |
command: > | |
--model /models/kai-model-7.2B-Q4_0.gguf | |
--alias kai-model:latest | |
--host 0.0.0.0 | |
--port 8001 | |
--ctx-size 4096 | |
--threads 4 | |
fastapi: | |
build: . | |
container_name: kai_fastapi | |
ports: | |
- "8000:8000" | |
environment: | |
OPENAI_API_BASE: "http://llama:8001/v1" | |
OPENAI_API_KEY: "sk-no-key-needed" | |
depends_on: | |
- llama | |