JRosenkranz
commited on
Commit
•
08facc0
1
Parent(s):
9fca77e
Update README.md
Browse files
README.md
CHANGED
@@ -37,7 +37,7 @@ docker run -d --rm --gpus all \
|
|
37 |
-p 8033:8033 \
|
38 |
-v /path/to/all/models:/models \
|
39 |
-e MODEL_NAME=/models/model_weights/llama/13B-F \
|
40 |
-
-e SPECULATOR_NAME
|
41 |
-e FLASH_ATTENTION=true \
|
42 |
-e PAGED_ATTENTION=true \
|
43 |
-e DTYPE_STR=float16 \
|
@@ -85,7 +85,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
|
|
85 |
--model_path=/path/to/model_weights/llama/13B-F \
|
86 |
--model_source=hf \
|
87 |
--tokenizer=/path/to/llama/13B-F \
|
88 |
-
--speculator_path
|
89 |
--speculator_source=hf \
|
90 |
--compile \
|
91 |
--compile_mode=reduce-overhead
|
@@ -99,7 +99,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
|
|
99 |
--model_path=/path/to/model_weights/llama/13B-F \
|
100 |
--model_source=hf \
|
101 |
--tokenizer=/path/to/llama/13B-F \
|
102 |
-
--speculator_path
|
103 |
--speculator_source=hf \
|
104 |
--compile \
|
105 |
```
|
@@ -112,7 +112,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
|
|
112 |
--model_path=/path/to/model_weights/llama/13B-F \
|
113 |
--model_source=hf \
|
114 |
--tokenizer=/path/to/llama/13B-F \
|
115 |
-
--speculator_path
|
116 |
--speculator_source=hf \
|
117 |
--batch_input \
|
118 |
--compile \
|
|
|
37 |
-p 8033:8033 \
|
38 |
-v /path/to/all/models:/models \
|
39 |
-e MODEL_NAME=/models/model_weights/llama/13B-F \
|
40 |
+
-e SPECULATOR_NAME=ibm-fms/llama-13b-accelerator \
|
41 |
-e FLASH_ATTENTION=true \
|
42 |
-e PAGED_ATTENTION=true \
|
43 |
-e DTYPE_STR=float16 \
|
|
|
85 |
--model_path=/path/to/model_weights/llama/13B-F \
|
86 |
--model_source=hf \
|
87 |
--tokenizer=/path/to/llama/13B-F \
|
88 |
+
--speculator_path=ibm-fms/llama-13b-accelerator \
|
89 |
--speculator_source=hf \
|
90 |
--compile \
|
91 |
--compile_mode=reduce-overhead
|
|
|
99 |
--model_path=/path/to/model_weights/llama/13B-F \
|
100 |
--model_source=hf \
|
101 |
--tokenizer=/path/to/llama/13B-F \
|
102 |
+
--speculator_path=ibm-fms/llama-13b-accelerator \
|
103 |
--speculator_source=hf \
|
104 |
--compile \
|
105 |
```
|
|
|
112 |
--model_path=/path/to/model_weights/llama/13B-F \
|
113 |
--model_source=hf \
|
114 |
--tokenizer=/path/to/llama/13B-F \
|
115 |
+
--speculator_path=ibm-fms/llama-13b-accelerator \
|
116 |
--speculator_source=hf \
|
117 |
--batch_input \
|
118 |
--compile \
|