rmtlabs commited on
Commit
86eb307
·
verified ·
1 Parent(s): ed6dc22

SHA-523 Create Docker container for the model with guardrails

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. Dockerfile +20 -0
  3. config/prompts.yml +1 -0
  4. main.py +43 -0
  5. requirements.txt +10 -0
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  myvenv/
2
  *.ipynb
3
  **/__pycache__/
 
 
 
1
  myvenv/
2
  *.ipynb
3
  **/__pycache__/
4
+ **/myvenv/
5
+ *.md
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use lightweight Python base
2
+ FROM python:3.10-slim
3
+ WORKDIR /app
4
+ RUN apt-get update && apt-get install -y eatmydata && eatmydata apt-get install -y --no-install-recommends build-essential
5
+
6
+
7
+
8
+ # Install Python dependencies
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Copy application files
13
+ COPY . .
14
+
15
+ # Set environment variables
16
+ ENV MODEL_PATH="./kai-model-7.2B-Q4_0.gguf"
17
+ ENV GUARDRAILS_PATH="./config"
18
+
19
+ EXPOSE 8000
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
config/prompts.yml CHANGED
@@ -31,6 +31,7 @@ prompts:
31
  - messages should not contain any harmful content
32
  - messages should not contain racially insensitive content
33
  - messages should not contain any word that can be considered offensive
 
34
  - if a message is a refusal, should be polite
35
 
36
 
 
31
  - messages should not contain any harmful content
32
  - messages should not contain racially insensitive content
33
  - messages should not contain any word that can be considered offensive
34
+ - messages should not contain any code, programming languages or development related
35
  - if a message is a refusal, should be polite
36
 
37
 
main.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ from nemoguardrails import LLMRails, RailsConfig
5
+ import os
6
+ from langchain_community.llms import LlamaCpp
7
+
8
+
9
+ app = FastAPI()
10
+ MODEL_PATH = "./kai-model-7.2B-Q4_0.gguf"
11
+ llm = LlamaCpp(
12
+ model_path="./kai-model-7.2B-Q4_0.gguf",
13
+ temperature=0.7,
14
+ top_k=40,
15
+ top_p=0.95
16
+ )
17
+
18
+ # Load guardrails configuration
19
+ config = RailsConfig.from_path("./config")
20
+ rails = LLMRails(config, llm=llm)
21
+
22
+ class ChatRequest(BaseModel):
23
+ message: str
24
+
25
+ @app.post("/chat")
26
+ async def chat_endpoint(request: ChatRequest):
27
+ try:
28
+ # Generate response with guardrails
29
+ response = await rails.generate_async(
30
+ messages=[{"role": "user", "content": request.message}]
31
+ )
32
+ return {"response": response["content"]}
33
+ except Exception as e:
34
+ raise HTTPException(status_code=500, detail=str(e))
35
+
36
+ @app.get("/health")
37
+ def health_check():
38
+ return {"status": "ok", "model": MODEL_PATH}
39
+
40
+
41
+ if __name__ == "__main__":
42
+ import uvicorn
43
+ uvicorn.run(main, host="0.0.0.0", port=5000)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ollama
2
+ nemoguardrails
3
+ pydantic
4
+ fastapi
5
+ llama_index
6
+ llama-cpp-python==0.2.55 # For GGUF model support
7
+ fastapi==0.110.0
8
+ uvicorn==0.27.0
9
+ sentencepiece
10
+ python-multipart # For form data handling