LanguageMachines
/

blip2-flan-t5-xxl

image-captioning

visual-question-answering

Model card Files Files and versions

sarang-shrivastava commited on Jun 28, 2023

Commit

a79245f

·

1 Parent(s): 2e8bef0

Update handler.py

Files changed (1) hide show

handler.py +16 -4

handler.py CHANGED Viewed

@@ -4,7 +4,10 @@ from typing import Dict, List, Any
 # import torch
 from datetime import datetime
 import requests
@@ -19,6 +22,12 @@ class EndpointHandler():
         self.processor = Blip2Processor.from_pretrained(path)
         self.model = Blip2ForConditionalGeneration.from_pretrained(path, device_map="auto")
         # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # self.model.eval()
         # self.model.to(device=device, dtype=self.torch_dtype)
@@ -71,11 +80,14 @@ class EndpointHandler():
         raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
-        question = "how many dogs are in the picture?"
-        inputs = self.processor(raw_image, question, return_tensors="pt").to("cuda")
         out = self.model.generate(**inputs)
-        output_text = self.processor.decode(out[0], skip_special_tokens=True)
         current = datetime.now()
@@ -100,4 +112,4 @@ class EndpointHandler():
         # new_tokens = output_ids[0, len(input_ids[0]) :]
         # output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
-        return [{"gen_text":output_text, "time_elapsed": str(current-now)}]

 # import torch
 from datetime import datetime
+import torch
+import logging
+logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
 import requests
         self.processor = Blip2Processor.from_pretrained(path)
         self.model = Blip2ForConditionalGeneration.from_pretrained(path, device_map="auto")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model.to(self.device)
+        logging.info('Model moved to device-' + self.device)
         # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # self.model.eval()
         # self.model.to(device=device, dtype=self.torch_dtype)
         raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
+        # question = "how many dogs are in the picture?"
+        # inputs = self.processor(raw_image, question, return_tensors="pt").to("cuda")
+        inputs = self.processor(raw_image, return_tensors="pt").to("cuda")
         out = self.model.generate(**inputs)
+        generated_text = self.processor.batch_decode(out, skip_special_tokens=True)[0].strip()
         current = datetime.now()
         # new_tokens = output_ids[0, len(input_ids[0]) :]
         # output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
+        return [{"gen_text":generated_text, "time_elapsed": str(current-now)}]