Update README.md
Browse files
README.md
CHANGED
@@ -71,10 +71,10 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
71 |
import flash_attn
|
72 |
import time
|
73 |
|
74 |
-
tokenizer = AutoTokenizer.from_pretrained("NousResearch/DeepHermes-3-
|
75 |
|
76 |
model = AutoModelForCausalLM.from_pretrained(
|
77 |
-
"NousResearch/DeepHermes-3-
|
78 |
torch_dtype=torch.float16,
|
79 |
device_map="auto",
|
80 |
attn_implementation="flash_attention_2",
|
@@ -110,10 +110,10 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
110 |
import flash_attn
|
111 |
import time
|
112 |
|
113 |
-
tokenizer = AutoTokenizer.from_pretrained("NousResearch/DeepHermes-3-
|
114 |
|
115 |
model = AutoModelForCausalLM.from_pretrained(
|
116 |
-
"NousResearch/DeepHermes-3-
|
117 |
torch_dtype=torch.float16,
|
118 |
device_map="auto",
|
119 |
attn_implementation="flash_attention_2",
|
@@ -141,7 +141,7 @@ print(f"Response: {response}")
|
|
141 |
|
142 |
You can also run this model with vLLM, by running the following in your terminal after `pip install vllm`
|
143 |
|
144 |
-
`vllm serve NousResearch/
|
145 |
|
146 |
You may then use the model over API using the OpenAI library just like you would call OpenAI's API.
|
147 |
|
|
|
71 |
import flash_attn
|
72 |
import time
|
73 |
|
74 |
+
tokenizer = AutoTokenizer.from_pretrained("NousResearch/DeepHermes-3-Llama-3-8B-Preview")
|
75 |
|
76 |
model = AutoModelForCausalLM.from_pretrained(
|
77 |
+
"NousResearch/DeepHermes-3-Llama-3-8B-Preview",
|
78 |
torch_dtype=torch.float16,
|
79 |
device_map="auto",
|
80 |
attn_implementation="flash_attention_2",
|
|
|
110 |
import flash_attn
|
111 |
import time
|
112 |
|
113 |
+
tokenizer = AutoTokenizer.from_pretrained("NousResearch/DeepHermes-3-Llama-3-8B-Preview")
|
114 |
|
115 |
model = AutoModelForCausalLM.from_pretrained(
|
116 |
+
"NousResearch/DeepHermes-3-Llama-3-8B-Preview",
|
117 |
torch_dtype=torch.float16,
|
118 |
device_map="auto",
|
119 |
attn_implementation="flash_attention_2",
|
|
|
141 |
|
142 |
You can also run this model with vLLM, by running the following in your terminal after `pip install vllm`
|
143 |
|
144 |
+
`vllm serve NousResearch/DeepHermes-3-Llama-3-8B-Preview`
|
145 |
|
146 |
You may then use the model over API using the OpenAI library just like you would call OpenAI's API.
|
147 |
|