remove unnecessary space before instruction
Browse files
README.md
CHANGED
|
@@ -69,14 +69,14 @@ model.eval()
|
|
| 69 |
|
| 70 |
# Image + Text -> Text
|
| 71 |
image = Image.open(requests.get('https://github.com/haon-chen/mmE5/blob/main/figures/example.jpg?raw=true', stream=True).raw)
|
| 72 |
-
inputs = processor(text='<|image|><|begin_of_text|>
|
| 73 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 74 |
|
| 75 |
string = 'A cat and a dog'
|
| 76 |
text_inputs = processor(text=string, return_tensors="pt").to("cuda")
|
| 77 |
tgt_output = last_pooling(model(**text_inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], text_inputs['attention_mask'])
|
| 78 |
print(string, '=', compute_similarity(qry_output, tgt_output))
|
| 79 |
-
## A cat and a dog = tensor([[0.
|
| 80 |
|
| 81 |
string = 'A cat and a tiger'
|
| 82 |
text_inputs = processor(text=string, return_tensors="pt").to("cuda")
|
|
@@ -88,19 +88,19 @@ print(string, '=', compute_similarity(qry_output, tgt_output))
|
|
| 88 |
inputs = processor(text='Find me an everyday image that matches the given caption: A cat and a dog.', return_tensors="pt").to("cuda")
|
| 89 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 90 |
|
| 91 |
-
string = '<|image|><|begin_of_text|>
|
| 92 |
tgt_inputs = processor(text=string, images=[image], return_tensors="pt").to("cuda")
|
| 93 |
tgt_output = last_pooling(model(**tgt_inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], tgt_inputs['attention_mask'])
|
| 94 |
print(string, '=', compute_similarity(qry_output, tgt_output))
|
| 95 |
-
## <|image|><|begin_of_text|>
|
| 96 |
|
| 97 |
inputs = processor(text='Find me an everyday image that matches the given caption: A cat and a tiger.', return_tensors="pt").to("cuda")
|
| 98 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 99 |
-
string = '<|image|><|begin_of_text|>
|
| 100 |
tgt_inputs = processor(text=string, images=[image], return_tensors="pt").to("cuda")
|
| 101 |
tgt_output = last_pooling(model(**tgt_inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], tgt_inputs['attention_mask'])
|
| 102 |
print(string, '=', compute_similarity(qry_output, tgt_output))
|
| 103 |
-
## <|image|><|begin_of_text|>
|
| 104 |
```
|
| 105 |
|
| 106 |
### Sentence Transformers
|
|
|
|
| 69 |
|
| 70 |
# Image + Text -> Text
|
| 71 |
image = Image.open(requests.get('https://github.com/haon-chen/mmE5/blob/main/figures/example.jpg?raw=true', stream=True).raw)
|
| 72 |
+
inputs = processor(text='<|image|><|begin_of_text|>Represent the given image with the following question: What is in the image', images=[image], return_tensors="pt").to("cuda")
|
| 73 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 74 |
|
| 75 |
string = 'A cat and a dog'
|
| 76 |
text_inputs = processor(text=string, return_tensors="pt").to("cuda")
|
| 77 |
tgt_output = last_pooling(model(**text_inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], text_inputs['attention_mask'])
|
| 78 |
print(string, '=', compute_similarity(qry_output, tgt_output))
|
| 79 |
+
## A cat and a dog = tensor([[0.3945]], device='cuda:0', dtype=torch.bfloat16)
|
| 80 |
|
| 81 |
string = 'A cat and a tiger'
|
| 82 |
text_inputs = processor(text=string, return_tensors="pt").to("cuda")
|
|
|
|
| 88 |
inputs = processor(text='Find me an everyday image that matches the given caption: A cat and a dog.', return_tensors="pt").to("cuda")
|
| 89 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 90 |
|
| 91 |
+
string = '<|image|><|begin_of_text|>Represent the given image.'
|
| 92 |
tgt_inputs = processor(text=string, images=[image], return_tensors="pt").to("cuda")
|
| 93 |
tgt_output = last_pooling(model(**tgt_inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], tgt_inputs['attention_mask'])
|
| 94 |
print(string, '=', compute_similarity(qry_output, tgt_output))
|
| 95 |
+
## <|image|><|begin_of_text|>Represent the given image. = tensor([[0.4141]], device='cuda:0', dtype=torch.bfloat16)
|
| 96 |
|
| 97 |
inputs = processor(text='Find me an everyday image that matches the given caption: A cat and a tiger.', return_tensors="pt").to("cuda")
|
| 98 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 99 |
+
string = '<|image|><|begin_of_text|>Represent the given image.'
|
| 100 |
tgt_inputs = processor(text=string, images=[image], return_tensors="pt").to("cuda")
|
| 101 |
tgt_output = last_pooling(model(**tgt_inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], tgt_inputs['attention_mask'])
|
| 102 |
print(string, '=', compute_similarity(qry_output, tgt_output))
|
| 103 |
+
## <|image|><|begin_of_text|>Represent the given image. = tensor([[0.3770]], device='cuda:0', dtype=torch.bfloat16)
|
| 104 |
```
|
| 105 |
|
| 106 |
### Sentence Transformers
|