|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline |
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, model_path="djangodevloper/llama3-70b-4bit-medqa"): |
|
try: |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) |
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
model_path, |
|
device_map="auto", |
|
torch_dtype=torch.bfloat16, |
|
trust_remote_code=True |
|
) |
|
self.pipeline = TextGenerationPipeline( |
|
model=self.model, |
|
tokenizer=self.tokenizer, |
|
) |
|
except Exception as e: |
|
raise RuntimeError(f"Failed to initialize model or tokenizer: {e}") |
|
|
|
|
|
self.general_prompt = ( |
|
"You are DoctusMind, a trustworthy and friendly medical AI assistant. " |
|
"Provide clear, easy-to-understand, and medically accurate answers to everyday health questions. " |
|
"Use simple language and suggest safe, evidence-informed home remedies when suitable. " |
|
"Be supportive and avoid technical jargon. Prioritize safety and clarity. " |
|
"If asked a non-medical question, politely respond with:\n" |
|
"`{\"not_medical_question\": true}`\n" |
|
"Format responses with bullet points, headers, or short paragraphs when helpful." |
|
) |
|
|
|
|
|
self.professional_prompt = ( |
|
"You are DoctusMind, a highly competent and articulate medical AI assistant for healthcare professionals. " |
|
"Provide concise, medically rigorous responses using appropriate clinical terminology, diagnostic language, " |
|
"and pathophysiological reasoning. Reference guidelines (e.g., WHO, CDC, NICE) where relevant. " |
|
"Always maintain a professional tone and format responses for quick clinical comprehension. " |
|
"If asked a non-medical question, reply with:\n" |
|
"`{\"not_medical_question\": true}`" |
|
) |
|
|
|
|
|
self.summary_prompt = ( |
|
"Update the user’s running chat summary by incorporating the most recent messages. " |
|
"Preserve important context like health conditions, preferences, personal facts, " |
|
"or constraints. Keep the summary compact and in User: ...\\nBot: ... format. " |
|
"Omit small talk unless relevant." |
|
) |
|
|
|
|
|
self.header_prompt = ( |
|
"Generate a short and meaningful header (max 50 characters) based on the conversation." |
|
) |
|
|
|
def __call__(self, data): |
|
try: |
|
user_input = data.get("inputs", "") |
|
user_type = data.get("user_type", "general").strip().lower() |
|
mode = data.get("mode", "chat").strip().lower() |
|
|
|
if not user_input: |
|
return {"error": "Missing 'inputs' in request."} |
|
|
|
|
|
if mode == "summary": |
|
system_prompt = self.summary_prompt |
|
elif mode == "header": |
|
system_prompt = self.header_prompt |
|
else: |
|
system_prompt = self.professional_prompt if user_type == "professional" else self.general_prompt |
|
|
|
|
|
full_prompt = f"<|system|>{system_prompt}<|user|>{user_input}<|assistant|>" |
|
|
|
|
|
outputs = self.pipeline( |
|
full_prompt, |
|
max_new_tokens=600, |
|
temperature=0.1, |
|
top_k=50, |
|
top_p=0.9, |
|
repetition_penalty=1.05, |
|
do_sample=False, |
|
eos_token_id=[ |
|
self.tokenizer.eos_token_id, |
|
self.tokenizer.convert_tokens_to_ids("<|eot_id|>") |
|
] |
|
) |
|
|
|
|
|
generated_text = outputs[0]["generated_text"] |
|
response = generated_text.split("<|assistant|>")[-1].strip() |
|
|
|
|
|
if not response: |
|
response = "Sorry, I couldn't generate a complete response. Try rephrasing." |
|
|
|
return {"generated_text": response} |
|
|
|
except Exception as e: |
|
return {"error": f"Inference error: {str(e)}"} |
|
|
|
|