SHA-523 Create Docker container for the model with guardrails
Browse files- .gitignore +2 -0
- Dockerfile +20 -0
- config/prompts.yml +1 -0
- main.py +43 -0
- requirements.txt +10 -0
.gitignore
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
myvenv/
|
2 |
*.ipynb
|
3 |
**/__pycache__/
|
|
|
|
|
|
1 |
myvenv/
|
2 |
*.ipynb
|
3 |
**/__pycache__/
|
4 |
+
**/myvenv/
|
5 |
+
*.md
|
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use lightweight Python base
|
2 |
+
FROM python:3.10-slim
|
3 |
+
WORKDIR /app
|
4 |
+
RUN apt-get update && apt-get install -y eatmydata && eatmydata apt-get install -y --no-install-recommends build-essential
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
# Install Python dependencies
|
9 |
+
COPY requirements.txt .
|
10 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
11 |
+
|
12 |
+
# Copy application files
|
13 |
+
COPY . .
|
14 |
+
|
15 |
+
# Set environment variables
|
16 |
+
ENV MODEL_PATH="./kai-model-7.2B-Q4_0.gguf"
|
17 |
+
ENV GUARDRAILS_PATH="./config"
|
18 |
+
|
19 |
+
EXPOSE 8000
|
20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
config/prompts.yml
CHANGED
@@ -31,6 +31,7 @@ prompts:
|
|
31 |
- messages should not contain any harmful content
|
32 |
- messages should not contain racially insensitive content
|
33 |
- messages should not contain any word that can be considered offensive
|
|
|
34 |
- if a message is a refusal, should be polite
|
35 |
|
36 |
|
|
|
31 |
- messages should not contain any harmful content
|
32 |
- messages should not contain racially insensitive content
|
33 |
- messages should not contain any word that can be considered offensive
|
34 |
+
- messages should not contain any code, programming languages or development related
|
35 |
- if a message is a refusal, should be polite
|
36 |
|
37 |
|
main.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from llama_cpp import Llama
|
4 |
+
from nemoguardrails import LLMRails, RailsConfig
|
5 |
+
import os
|
6 |
+
from langchain_community.llms import LlamaCpp
|
7 |
+
|
8 |
+
|
9 |
+
app = FastAPI()
|
10 |
+
MODEL_PATH = "./kai-model-7.2B-Q4_0.gguf"
|
11 |
+
llm = LlamaCpp(
|
12 |
+
model_path="./kai-model-7.2B-Q4_0.gguf",
|
13 |
+
temperature=0.7,
|
14 |
+
top_k=40,
|
15 |
+
top_p=0.95
|
16 |
+
)
|
17 |
+
|
18 |
+
# Load guardrails configuration
|
19 |
+
config = RailsConfig.from_path("./config")
|
20 |
+
rails = LLMRails(config, llm=llm)
|
21 |
+
|
22 |
+
class ChatRequest(BaseModel):
|
23 |
+
message: str
|
24 |
+
|
25 |
+
@app.post("/chat")
|
26 |
+
async def chat_endpoint(request: ChatRequest):
|
27 |
+
try:
|
28 |
+
# Generate response with guardrails
|
29 |
+
response = await rails.generate_async(
|
30 |
+
messages=[{"role": "user", "content": request.message}]
|
31 |
+
)
|
32 |
+
return {"response": response["content"]}
|
33 |
+
except Exception as e:
|
34 |
+
raise HTTPException(status_code=500, detail=str(e))
|
35 |
+
|
36 |
+
@app.get("/health")
|
37 |
+
def health_check():
|
38 |
+
return {"status": "ok", "model": MODEL_PATH}
|
39 |
+
|
40 |
+
|
41 |
+
if __name__ == "__main__":
|
42 |
+
import uvicorn
|
43 |
+
uvicorn.run(main, host="0.0.0.0", port=5000)
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ollama
|
2 |
+
nemoguardrails
|
3 |
+
pydantic
|
4 |
+
fastapi
|
5 |
+
llama_index
|
6 |
+
llama-cpp-python==0.2.55 # For GGUF model support
|
7 |
+
fastapi==0.110.0
|
8 |
+
uvicorn==0.27.0
|
9 |
+
sentencepiece
|
10 |
+
python-multipart # For form data handling
|