Reduce GPU memory usage in the runtime.

#14
Files changed (1) hide show
  1. README.md +2 -1
README.md CHANGED
@@ -157,7 +157,8 @@ batch_dict = tokenizer(
157
  return_tensors="pt",
158
  )
159
  batch_dict.to(model.device)
160
- outputs = model(**batch_dict)
 
161
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
162
 
163
  # normalize embeddings
 
157
  return_tensors="pt",
158
  )
159
  batch_dict.to(model.device)
160
+ with torch.no_grad():
161
+ outputs = model(**batch_dict)
162
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
163
 
164
  # normalize embeddings