File size: 1,107 Bytes
bfc2180 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from llama_cpp import Llama
from nemoguardrails import LLMRails, RailsConfig
import os
from langchain_community.llms import LlamaCpp
app = FastAPI()
MODEL_PATH = "./kai-model-7.2B-Q4_0.gguf"
llm = LlamaCpp(
model_path="./kai-model-7.2B-Q4_0.gguf",
temperature=0.7,
top_k=40,
top_p=0.95
)
# Load guardrails configuration
config = RailsConfig.from_path("./config")
rails = LLMRails(config, llm=llm)
class ChatRequest(BaseModel):
message: str
@app.post("/chat")
async def chat_endpoint(request: ChatRequest):
try:
# Generate response with guardrails
response = await rails.generate_async(
messages=[{"role": "user", "content": request.message}]
)
return {"response": response["content"]}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
def health_check():
return {"status": "ok", "model": MODEL_PATH}
if __name__ == "__main__":
import uvicorn
uvicorn.run(main, host="127.0.0.1", port=8000) |