JRosenkranz commited on
Commit
e1d9017
·
verified ·
1 Parent(s): 3fba51d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -6
README.md CHANGED
@@ -87,11 +87,12 @@ pip install transformers==4.35.0 sentencepiece numpy
87
  ##### batch_size=1 (compile + cudagraphs)
88
 
89
  ```bash
 
90
  python fms-extras/scripts/paged_speculative_inference.py \
91
  --variant=13b_code \
92
- --model_path=/path/to/llama/CodeLlama-13b-Instruct-hf \
93
  --model_source=hf \
94
- --tokenizer=/path/to/llama/CodeLlama-13b-Instruct-hf \
95
  --speculator_path=ibm-fms/codellama-13b-accelerator \
96
  --speculator_source=hf \
97
  --top_k_tokens_per_head=4,3,2,2,2,2,2 \
@@ -103,11 +104,12 @@ python fms-extras/scripts/paged_speculative_inference.py \
103
  ##### batch_size=1 (compile)
104
 
105
  ```bash
 
106
  python fms-extras/scripts/paged_speculative_inference.py \
107
  --variant=13b_code \
108
- --model_path=/path/to/llama/CodeLlama-13b-Instruct-hf \
109
  --model_source=hf \
110
- --tokenizer=/path/to/llama/CodeLlama-13b-Instruct-hf \
111
  --speculator_path=ibm-fms/codellama-13b-accelerator \
112
  --speculator_source=hf \
113
  --top_k_tokens_per_head=4,3,2,2,2,2,2 \
@@ -118,11 +120,12 @@ python fms-extras/scripts/paged_speculative_inference.py \
118
  ##### batch_size=4 (compile)
119
 
120
  ```bash
 
121
  python fms-extras/scripts/paged_speculative_inference.py \
122
  --variant=13b_code \
123
- --model_path=/path/to/llama/CodeLlama-13b-Instruct-hf \
124
  --model_source=hf \
125
- --tokenizer=/path/to/llama/CodeLlama-13b-Instruct-hf \
126
  --speculator_path=ibm-fms/codellama-13b-accelerator \
127
  --speculator_source=hf \
128
  --batch_input \
 
87
  ##### batch_size=1 (compile + cudagraphs)
88
 
89
  ```bash
90
+ MODEL_PATH=/path/to/llama/hf/CodeLlama-13b-Instruct-hf
91
  python fms-extras/scripts/paged_speculative_inference.py \
92
  --variant=13b_code \
93
+ --model_path=$MODEL_PATH \
94
  --model_source=hf \
95
+ --tokenizer=$MODEL_PATH \
96
  --speculator_path=ibm-fms/codellama-13b-accelerator \
97
  --speculator_source=hf \
98
  --top_k_tokens_per_head=4,3,2,2,2,2,2 \
 
104
  ##### batch_size=1 (compile)
105
 
106
  ```bash
107
+ MODEL_PATH=/path/to/llama/hf/CodeLlama-13b-Instruct-hf
108
  python fms-extras/scripts/paged_speculative_inference.py \
109
  --variant=13b_code \
110
+ --model_path=$MODEL_PATH \
111
  --model_source=hf \
112
+ --tokenizer=$MODEL_PATH \
113
  --speculator_path=ibm-fms/codellama-13b-accelerator \
114
  --speculator_source=hf \
115
  --top_k_tokens_per_head=4,3,2,2,2,2,2 \
 
120
  ##### batch_size=4 (compile)
121
 
122
  ```bash
123
+ MODEL_PATH=/path/to/llama/hf/CodeLlama-13b-Instruct-hf
124
  python fms-extras/scripts/paged_speculative_inference.py \
125
  --variant=13b_code \
126
+ --model_path=$MODEL_PATH \
127
  --model_source=hf \
128
+ --tokenizer=$MODEL_PATH \
129
  --speculator_path=ibm-fms/codellama-13b-accelerator \
130
  --speculator_source=hf \
131
  --batch_input \