Update README.md
Browse files
README.md
CHANGED
@@ -43,4 +43,72 @@ tokenizer, model, image_processor, max_length = load_pretrained_model(
|
|
43 |
model_name="llava_qwen",
|
44 |
device_map="auto"
|
45 |
)
|
46 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
model_name="llava_qwen",
|
44 |
device_map="auto"
|
45 |
)
|
46 |
+
```
|
47 |
+
|
48 |
+
To run model inference on an input image:
|
49 |
+
|
50 |
+
```python
|
51 |
+
import torch
|
52 |
+
from PIL import Image
|
53 |
+
from llava.model.builder import load_pretrained_model
|
54 |
+
from llava.mm_utils import process_images, tokenizer_image_token
|
55 |
+
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
|
56 |
+
from llava.conversation import conv_templates
|
57 |
+
|
58 |
+
|
59 |
+
# - Load model
|
60 |
+
tokenizer, model, image_processor, max_length = load_pretrained_model(
|
61 |
+
model_name_or_path="inaf-oact-ai/radiollava-7b-qa",
|
62 |
+
model_base=None,
|
63 |
+
model_name="llava_qwen",
|
64 |
+
device_map="auto"
|
65 |
+
)
|
66 |
+
|
67 |
+
# - Load image
|
68 |
+
image_path= ...
|
69 |
+
image= Image.fromarray(data).convert("RGB")
|
70 |
+
|
71 |
+
# - Process image
|
72 |
+
image_tensor = process_images([image], image_processor, model.config)
|
73 |
+
image_tensor = [_image.to(dtype=torch.float16, device=model.device) for _image in image_tensor]
|
74 |
+
|
75 |
+
# - Create prompt
|
76 |
+
query= "Describe the input image" # Replace it with your query
|
77 |
+
question = DEFAULT_IMAGE_TOKEN + "\n" + query
|
78 |
+
conv = copy.deepcopy(conv_templates[conv_template])
|
79 |
+
conv.system= '<|im_start|>system\nYou are an AI assistant specialized in radio astronomical topics.'
|
80 |
+
conv.append_message(conv.roles[0], question)
|
81 |
+
conv.append_message(conv.roles[1], None)
|
82 |
+
prompt_question = conv.get_prompt()
|
83 |
+
|
84 |
+
# - Create model inputs
|
85 |
+
input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(model.device)
|
86 |
+
image_sizes = [image.size]
|
87 |
+
|
88 |
+
# - Generate model response
|
89 |
+
# Change generation parameters as you wish
|
90 |
+
do_sample=True
|
91 |
+
temperature= 0.3
|
92 |
+
max_new_tokens=4096
|
93 |
+
|
94 |
+
output = model.generate(
|
95 |
+
input_ids,
|
96 |
+
images=image_tensor,
|
97 |
+
image_sizes=image_sizes,
|
98 |
+
do_sample=do_sample,
|
99 |
+
temperature=temperature if do_sample else None,
|
100 |
+
max_new_tokens=max_new_tokens,
|
101 |
+
)
|
102 |
+
output_parsed= tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
103 |
+
|
104 |
+
# - Process response as you wish ...
|
105 |
+
#response= output_parsed.strip("\n").strip()
|
106 |
+
```
|
107 |
+
|
108 |
+
See the tutorials available in the LLaVA-NeXT repository:
|
109 |
+
|
110 |
+
`https://github.com/LLaVA-VL/LLaVA-NeXT/blob/main/docs/LLaVA_OneVision_Tutorials.ipynb`
|
111 |
+
|
112 |
+
Further usage examples are provided in this repository:
|
113 |
+
|
114 |
+
`https://github.com/SKA-INAF/radio-llava.git`
|