Reduce GPU memory usage in the runtime.
Browse filesAfter adding 'with torch.no_grad():', memory can be reduced form 11.77G to 3.19G when batch=4, token=1024.
README.md
CHANGED
@@ -157,7 +157,8 @@ batch_dict = tokenizer(
|
|
157 |
return_tensors="pt",
|
158 |
)
|
159 |
batch_dict.to(model.device)
|
160 |
-
|
|
|
161 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
162 |
|
163 |
# normalize embeddings
|
|
|
157 |
return_tensors="pt",
|
158 |
)
|
159 |
batch_dict.to(model.device)
|
160 |
+
with torch.no_grad():
|
161 |
+
outputs = model(**batch_dict)
|
162 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
163 |
|
164 |
# normalize embeddings
|