import torch from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor from peft import PeftModel from PIL import Image import base64 import io # Load model and tokenizer @torch.no_grad() def load_model(): """Load the ViTCM_LLM model for Traditional Chinese Medicine Tongue diagnosis.""" tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct") processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct") base_model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen2.5-VL-32B-Instruct", torch_dtype=torch.float16, device_map="auto" ) model = PeftModel.from_pretrained(base_model, "Mark-CHAE/shezhen") return model, tokenizer, processor # Initialize model model, tokenizer, processor = load_model() def query(question: str, image: str) -> str: """ Analyze tongue image for Traditional Chinese Medicine diagnosis. Args: question: The question about the tongue image (e.g., "根据图片判断舌诊内容") image: Base64 encoded image string Returns: The TCM diagnosis analysis of the tongue """ try: # Decode base64 image image_data = base64.b64decode(image) image_pil = Image.open(io.BytesIO(image_data)) # Construct prompt for TCM tongue diagnosis prompt = f"<|im_start|>user\n\n{question}<|im_end|>\n<|im_start|>assistant\n" # Process inputs inputs = processor( text=prompt, images=image_pil, return_tensors="pt" ) # Generate response outputs = model.generate( **inputs, max_length=512, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id ) # Decode response response = tokenizer.decode(outputs[0], skip_special_tokens=True) answer = response.split("<|im_start|>assistant")[-1].strip() return answer except Exception as e: return f"Error processing request: {str(e)}"