mistralai
/

Mixtral-8x7B-Instruct-v0.1

DeathReaper0965 commited on Jan 2, 2024

Commit

42b244c

1 Parent(s): 125c431

Update the deprecated Flash Attention call parameter in from_pretrained() method

Files changed (1) hide show

README.md CHANGED Viewed

@@ -118,7 +118,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-+ model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True)
 text = "Hello my name is"
 + inputs = tokenizer(text, return_tensors="pt").to(0)

 model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
++ model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="flash_attention_2")
 text = "Hello my name is"
 + inputs = tokenizer(text, return_tensors="pt").to(0)