sriggi commited on
Commit
a064035
·
verified ·
1 Parent(s): e6d2378

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +69 -1
README.md CHANGED
@@ -43,4 +43,72 @@ tokenizer, model, image_processor, max_length = load_pretrained_model(
43
  model_name="llava_qwen",
44
  device_map="auto"
45
  )
46
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  model_name="llava_qwen",
44
  device_map="auto"
45
  )
46
+ ```
47
+
48
+ To run model inference on an input image:
49
+
50
+ ```python
51
+ import torch
52
+ from PIL import Image
53
+ from llava.model.builder import load_pretrained_model
54
+ from llava.mm_utils import process_images, tokenizer_image_token
55
+ from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
56
+ from llava.conversation import conv_templates
57
+
58
+
59
+ # - Load model
60
+ tokenizer, model, image_processor, max_length = load_pretrained_model(
61
+ model_name_or_path="inaf-oact-ai/radiollava-7b-qa",
62
+ model_base=None,
63
+ model_name="llava_qwen",
64
+ device_map="auto"
65
+ )
66
+
67
+ # - Load image
68
+ image_path= ...
69
+ image= Image.fromarray(data).convert("RGB")
70
+
71
+ # - Process image
72
+ image_tensor = process_images([image], image_processor, model.config)
73
+ image_tensor = [_image.to(dtype=torch.float16, device=model.device) for _image in image_tensor]
74
+
75
+ # - Create prompt
76
+ query= "Describe the input image" # Replace it with your query
77
+ question = DEFAULT_IMAGE_TOKEN + "\n" + query
78
+ conv = copy.deepcopy(conv_templates[conv_template])
79
+ conv.system= '<|im_start|>system\nYou are an AI assistant specialized in radio astronomical topics.'
80
+ conv.append_message(conv.roles[0], question)
81
+ conv.append_message(conv.roles[1], None)
82
+ prompt_question = conv.get_prompt()
83
+
84
+ # - Create model inputs
85
+ input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(model.device)
86
+ image_sizes = [image.size]
87
+
88
+ # - Generate model response
89
+ # Change generation parameters as you wish
90
+ do_sample=True
91
+ temperature= 0.3
92
+ max_new_tokens=4096
93
+
94
+ output = model.generate(
95
+ input_ids,
96
+ images=image_tensor,
97
+ image_sizes=image_sizes,
98
+ do_sample=do_sample,
99
+ temperature=temperature if do_sample else None,
100
+ max_new_tokens=max_new_tokens,
101
+ )
102
+ output_parsed= tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
103
+
104
+ # - Process response as you wish ...
105
+ #response= output_parsed.strip("\n").strip()
106
+ ```
107
+
108
+ See the tutorials available in the LLaVA-NeXT repository:
109
+
110
+ `https://github.com/LLaVA-VL/LLaVA-NeXT/blob/main/docs/LLaVA_OneVision_Tutorials.ipynb`
111
+
112
+ Further usage examples are provided in this repository:
113
+
114
+ `https://github.com/SKA-INAF/radio-llava.git`