JRosenkranz
commited on
Commit
•
0f37d3d
1
Parent(s):
a85babb
Update README.md
Browse files
README.md
CHANGED
@@ -89,11 +89,12 @@ pip install transformers==4.35.0 sentencepiece numpy
|
|
89 |
##### batch_size=1 (compile + cudagraphs)
|
90 |
|
91 |
```bash
|
|
|
92 |
python fms-extras/scripts/paged_speculative_inference.py \
|
93 |
--variant=13b \
|
94 |
-
--model_path
|
95 |
--model_source=hf \
|
96 |
-
--tokenizer
|
97 |
--speculator_path=ibm-fms/llama-13b-accelerator \
|
98 |
--speculator_source=hf \
|
99 |
--compile \
|
@@ -103,11 +104,12 @@ python fms-extras/scripts/paged_speculative_inference.py \
|
|
103 |
##### batch_size=1 (compile)
|
104 |
|
105 |
```bash
|
|
|
106 |
python fms-extras/scripts/paged_speculative_inference.py \
|
107 |
--variant=13b \
|
108 |
-
--model_path
|
109 |
--model_source=hf \
|
110 |
-
--tokenizer
|
111 |
--speculator_path=ibm-fms/llama-13b-accelerator \
|
112 |
--speculator_source=hf \
|
113 |
--compile \
|
@@ -116,11 +118,12 @@ python fms-extras/scripts/paged_speculative_inference.py \
|
|
116 |
##### batch_size=4 (compile)
|
117 |
|
118 |
```bash
|
|
|
119 |
python fms-extras/scripts/paged_speculative_inference.py \
|
120 |
--variant=13b \
|
121 |
-
--model_path
|
122 |
--model_source=hf \
|
123 |
-
--tokenizer
|
124 |
--speculator_path=ibm-fms/llama-13b-accelerator \
|
125 |
--speculator_source=hf \
|
126 |
--batch_input \
|
|
|
89 |
##### batch_size=1 (compile + cudagraphs)
|
90 |
|
91 |
```bash
|
92 |
+
MODEL_PATH=/path/to/llama/13B-F
|
93 |
python fms-extras/scripts/paged_speculative_inference.py \
|
94 |
--variant=13b \
|
95 |
+
--model_path=$MODEL_PATH \
|
96 |
--model_source=hf \
|
97 |
+
--tokenizer=$MODEL_PATH \
|
98 |
--speculator_path=ibm-fms/llama-13b-accelerator \
|
99 |
--speculator_source=hf \
|
100 |
--compile \
|
|
|
104 |
##### batch_size=1 (compile)
|
105 |
|
106 |
```bash
|
107 |
+
MODEL_PATH=/path/to/llama/13B-F
|
108 |
python fms-extras/scripts/paged_speculative_inference.py \
|
109 |
--variant=13b \
|
110 |
+
--model_path=$MODEL_PATH \
|
111 |
--model_source=hf \
|
112 |
+
--tokenizer=$MODEL_PATH \
|
113 |
--speculator_path=ibm-fms/llama-13b-accelerator \
|
114 |
--speculator_source=hf \
|
115 |
--compile \
|
|
|
118 |
##### batch_size=4 (compile)
|
119 |
|
120 |
```bash
|
121 |
+
MODEL_PATH=/path/to/llama/13B-F
|
122 |
python fms-extras/scripts/paged_speculative_inference.py \
|
123 |
--variant=13b \
|
124 |
+
--model_path=$MODEL_PATH \
|
125 |
--model_source=hf \
|
126 |
+
--tokenizer=$MODEL_PATH \
|
127 |
--speculator_path=ibm-fms/llama-13b-accelerator \
|
128 |
--speculator_source=hf \
|
129 |
--batch_input \
|