|
import torch
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
|
|
from peft import PeftModel
|
|
from PIL import Image
|
|
import base64
|
|
import io
|
|
|
|
|
|
@torch.no_grad()
|
|
def load_model():
|
|
"""Load the ViTCM_LLM model for Traditional Chinese Medicine Tongue diagnosis."""
|
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct")
|
|
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct")
|
|
|
|
base_model = AutoModelForCausalLM.from_pretrained(
|
|
"Qwen/Qwen2.5-VL-32B-Instruct",
|
|
torch_dtype=torch.float16,
|
|
device_map="auto"
|
|
)
|
|
|
|
model = PeftModel.from_pretrained(base_model, "Mark-CHAE/shezhen")
|
|
return model, tokenizer, processor
|
|
|
|
|
|
model, tokenizer, processor = load_model()
|
|
|
|
def query(question: str, image: str) -> str:
|
|
"""
|
|
Analyze tongue image for Traditional Chinese Medicine diagnosis.
|
|
|
|
Args:
|
|
question: The question about the tongue image (e.g., "根据图片判断舌诊内容")
|
|
image: Base64 encoded image string
|
|
|
|
Returns:
|
|
The TCM diagnosis analysis of the tongue
|
|
"""
|
|
try:
|
|
|
|
image_data = base64.b64decode(image)
|
|
image_pil = Image.open(io.BytesIO(image_data))
|
|
|
|
|
|
prompt = f"<|im_start|>user\n<image>\n{question}<|im_end|>\n<|im_start|>assistant\n"
|
|
|
|
|
|
inputs = processor(
|
|
text=prompt,
|
|
images=image_pil,
|
|
return_tensors="pt"
|
|
)
|
|
|
|
|
|
outputs = model.generate(
|
|
**inputs,
|
|
max_length=512,
|
|
temperature=0.7,
|
|
top_p=0.9,
|
|
do_sample=True,
|
|
pad_token_id=tokenizer.eos_token_id
|
|
)
|
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
answer = response.split("<|im_start|>assistant")[-1].strip()
|
|
|
|
return answer
|
|
|
|
except Exception as e:
|
|
return f"Error processing request: {str(e)}" |