inaf-oact-ai
/

radiollava-7b-qa

Image-Text-to-Text

Model card Files Files and versions

sriggi commited on Mar 27

Commit

a064035

·

verified ·

1 Parent(s): e6d2378

Update README.md

Files changed (1) hide show

README.md +69 -1

README.md CHANGED Viewed

@@ -43,4 +43,72 @@ tokenizer, model, image_processor, max_length = load_pretrained_model(
   model_name="llava_qwen",
   device_map="auto"
 )
-```

   model_name="llava_qwen",
   device_map="auto"
 )
+```
+To run model inference on an input image:
+```python
+import torch
+from PIL import Image
+from llava.model.builder import load_pretrained_model
+from llava.mm_utils import process_images, tokenizer_image_token
+from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
+from llava.conversation import conv_templates
+# - Load model
+tokenizer, model, image_processor, max_length = load_pretrained_model(
+  model_name_or_path="inaf-oact-ai/radiollava-7b-qa",
+  model_base=None,
+  model_name="llava_qwen",
+  device_map="auto"
+)
+# - Load image
+image_path= ...
+image= Image.fromarray(data).convert("RGB")
+# - Process image
+image_tensor = process_images([image], image_processor, model.config)
+image_tensor = [_image.to(dtype=torch.float16, device=model.device) for _image in image_tensor]
+# - Create prompt
+query= "Describe the input image"  # Replace it with your query
+question = DEFAULT_IMAGE_TOKEN + "\n" + query
+conv = copy.deepcopy(conv_templates[conv_template])
+conv.system= '<|im_start|>system\nYou are an AI assistant specialized in radio astronomical topics.'
+conv.append_message(conv.roles[0], question)
+conv.append_message(conv.roles[1], None)
+prompt_question = conv.get_prompt()
+# - Create model inputs
+input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(model.device)
+image_sizes = [image.size]
+# - Generate model response
+#   Change generation parameters as you wish
+do_sample=True
+temperature= 0.3
+max_new_tokens=4096
+output = model.generate(
+  input_ids,
+  images=image_tensor,
+  image_sizes=image_sizes,
+  do_sample=do_sample,
+  temperature=temperature if do_sample else None,
+  max_new_tokens=max_new_tokens,
+)
+output_parsed= tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
+# - Process response as you wish ...
+#response= output_parsed.strip("\n").strip()
+```
+See the tutorials available in the LLaVA-NeXT repository:
+`https://github.com/LLaVA-VL/LLaVA-NeXT/blob/main/docs/LLaVA_OneVision_Tutorials.ipynb`
+Further usage examples are provided in this repository:
+`https://github.com/SKA-INAF/radio-llava.git`