SHA-523 Create Docker container for the model with guardrails

Files changed (5) hide show

.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 myvenv/
 *.ipynb
 **/__pycache__/

 myvenv/
 *.ipynb
 **/__pycache__/
+**/myvenv/
+*.md

Dockerfile ADDED Viewed

+# Use lightweight Python base
+FROM python:3.10-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y eatmydata && eatmydata apt-get install -y --no-install-recommends build-essential
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application files
+COPY . .
+# Set environment variables
+ENV MODEL_PATH="./kai-model-7.2B-Q4_0.gguf"
+ENV GUARDRAILS_PATH="./config"
+EXPOSE 8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

config/prompts.yml CHANGED Viewed

@@ -31,6 +31,7 @@ prompts:
       - messages should not contain any harmful content
       - messages should not contain racially insensitive content
       - messages should not contain any word that can be considered offensive
       - if a message is a refusal, should be polite

       - messages should not contain any harmful content
       - messages should not contain racially insensitive content
       - messages should not contain any word that can be considered offensive
+      - messages should not contain any code, programming languages or development related
       - if a message is a refusal, should be polite

main.py ADDED Viewed

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from llama_cpp import Llama
+from nemoguardrails import LLMRails, RailsConfig
+import os
+from langchain_community.llms import LlamaCpp
+app = FastAPI()
+MODEL_PATH = "./kai-model-7.2B-Q4_0.gguf"
+llm = LlamaCpp(
+    model_path="./kai-model-7.2B-Q4_0.gguf",
+    temperature=0.7,
+    top_k=40,
+    top_p=0.95
+)
+# Load guardrails configuration
+config = RailsConfig.from_path("./config")
+rails = LLMRails(config, llm=llm)
+class ChatRequest(BaseModel):
+    message: str
+@app.post("/chat")
+async def chat_endpoint(request: ChatRequest):
+    try:
+        # Generate response with guardrails
+        response = await rails.generate_async(
+            messages=[{"role": "user", "content": request.message}]
+        )
+        return {"response": response["content"]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+def health_check():
+    return {"status": "ok", "model": MODEL_PATH}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(main, host="0.0.0.0", port=5000)

requirements.txt ADDED Viewed

+ollama
+nemoguardrails
+pydantic
+fastapi
+llama_index
+llama-cpp-python==0.2.55  # For GGUF model support
+fastapi==0.110.0
+uvicorn==0.27.0
+sentencepiece
+python-multipart  # For form data handling