JRosenkranz commited on
Commit
e605551
1 Parent(s): b7ceb19

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -0
README.md CHANGED
@@ -126,6 +126,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
126
  --tokenizer=$MODEL_PATH \
127
  --speculator_path=ibm-fms/llama-13b-accelerator \
128
  --speculator_source=hf \
 
129
  --compile \
130
  --compile_mode=reduce-overhead
131
  ```
@@ -141,6 +142,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
141
  --tokenizer=$MODEL_PATH \
142
  --speculator_path=ibm-fms/llama-13b-accelerator \
143
  --speculator_source=hf \
 
144
  --compile \
145
  ```
146
 
@@ -155,6 +157,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
155
  --tokenizer=$MODEL_PATH \
156
  --speculator_path=ibm-fms/llama-13b-accelerator \
157
  --speculator_source=hf \
 
158
  --batch_input \
159
  --compile \
160
  ```
 
126
  --tokenizer=$MODEL_PATH \
127
  --speculator_path=ibm-fms/llama-13b-accelerator \
128
  --speculator_source=hf \
129
+ --speculator_variant=840m \
130
  --compile \
131
  --compile_mode=reduce-overhead
132
  ```
 
142
  --tokenizer=$MODEL_PATH \
143
  --speculator_path=ibm-fms/llama-13b-accelerator \
144
  --speculator_source=hf \
145
+ --speculator_variant=840m \
146
  --compile \
147
  ```
148
 
 
157
  --tokenizer=$MODEL_PATH \
158
  --speculator_path=ibm-fms/llama-13b-accelerator \
159
  --speculator_source=hf \
160
+ --speculator_variant=840m \
161
  --batch_input \
162
  --compile \
163
  ```