alvarobartt HF Staff commited on
Commit
e2a8712
·
verified ·
1 Parent(s): f418cd5

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +79 -0
handler.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from copy import deepcopy
2
+ from typing import Any, Dict
3
+
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoProcessor
6
+ from transformers.image_utils import load_image
7
+
8
+ IMAGE_TOKENS = "<image_start><image><image_end>"
9
+ SEPARATOR = "\n"
10
+
11
+
12
+ class EndpointHandler:
13
+ def __init__(
14
+ self,
15
+ model_dir: str = "alvarobartt/Magma-8B",
16
+ **kwargs: Any, # type: ignore
17
+ ) -> None:
18
+ self.model = AutoModelForCausalLM.from_pretrained(
19
+ model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16
20
+ ).eval()
21
+ self.processor = AutoProcessor.from_pretrained(
22
+ model_dir, trust_remote_code=True
23
+ )
24
+
25
+ def __call__(self, data: Dict[str, Any]) -> Any:
26
+ if "messages" not in data:
27
+ raise ValueError(
28
+ "The request body must contain a key 'messages' with a list of messages."
29
+ )
30
+
31
+ messages, images = [], []
32
+ for message in data["messages"]:
33
+ if isinstance(list, message["content"]):
34
+ new_message = {"role": message["role"], "content": ""}
35
+ for content in message["content"]:
36
+ if content["type"] == "text":
37
+ new_message["content"] += content["text"]
38
+ elif content["type"] == "image_url":
39
+ images.append(load_image(content["image_url"]["url"]))
40
+ if new_message["content"].count(
41
+ f"{IMAGE_TOKENS}{SEPARATOR}"
42
+ ) < len(images):
43
+ new_message["content"] = (
44
+ f"{IMAGE_TOKENS}{SEPARATOR}" + new_message["content"]
45
+ )
46
+ else:
47
+ messages.append(
48
+ {"role": message["role"], "content": message["content"]}
49
+ )
50
+
51
+ data.pop("messages")
52
+
53
+ prompt = self.processor.tokenizer.apply_chat_template(
54
+ messages, tokenize=False, add_generation_prompt=True
55
+ )
56
+
57
+ inputs = self.processor(images=images, texts=prompt, return_tensors="pt")
58
+ inputs["pixel_values"] = inputs["pixel_values"].unsqueeze(0)
59
+ inputs["image_sizes"] = inputs["image_sizes"].unsqueeze(0)
60
+ inputs = inputs.to("cuda").to(torch.bfloat16)
61
+
62
+ generation_args = {
63
+ "max_new_tokens": 128,
64
+ "temperature": 0.0,
65
+ "do_sample": False,
66
+ "use_cache": True,
67
+ "num_beams": 1,
68
+ }
69
+ generation_args.update(data)
70
+
71
+ with torch.inference_mode():
72
+ generate_ids = self.model.generate(**inputs, **generation_args)
73
+
74
+ generate_ids = generate_ids[:, inputs["input_ids"].shape[-1] :]
75
+ response = self.processor.decode(
76
+ generate_ids[0], skip_special_tokens=True
77
+ ).strip()
78
+
79
+ return {"generated_text": response}