Update README.md (#19)
Browse files- Update README.md (15f395c688792d3140dbf5ced0857afaf65aff03)
- Update README.md (00e1acdb665ceabffba3830e4f54a4bd99fd4014)
Co-authored-by: Vaibhav Srivastav <reach-vb@users.noreply.huggingface.co>
README.md
CHANGED
@@ -73,51 +73,10 @@ print(tokenizer.decode(outputs[0]))
|
|
73 |
<a name="precisions"></a>
|
74 |
#### Running the model on a GPU using different precisions
|
75 |
|
76 |
-
The native weights of this model were exported in `bfloat16` precision.
|
77 |
|
78 |
You can also use `float32` if you skip the dtype, but no precision increase will occur (model weights will just be upcasted to `float32`). See examples below.
|
79 |
|
80 |
-
* _Using `torch.float16`_
|
81 |
-
|
82 |
-
```python
|
83 |
-
# pip install accelerate
|
84 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
85 |
-
import torch
|
86 |
-
|
87 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
|
88 |
-
model = AutoModelForCausalLM.from_pretrained(
|
89 |
-
"google/gemma-2-9b-it",
|
90 |
-
device_map="auto",
|
91 |
-
torch_dtype=torch.float16,
|
92 |
-
revision="float16",
|
93 |
-
)
|
94 |
-
|
95 |
-
input_text = "Write me a poem about Machine Learning."
|
96 |
-
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
97 |
-
|
98 |
-
outputs = model.generate(**input_ids)
|
99 |
-
print(tokenizer.decode(outputs[0]))
|
100 |
-
```
|
101 |
-
|
102 |
-
* _Using `torch.bfloat16`_
|
103 |
-
|
104 |
-
```python
|
105 |
-
# pip install accelerate
|
106 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
107 |
-
|
108 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
|
109 |
-
model = AutoModelForCausalLM.from_pretrained(
|
110 |
-
"google/gemma-2-9b-it",
|
111 |
-
device_map="auto",
|
112 |
-
torch_dtype=torch.bfloat16)
|
113 |
-
|
114 |
-
input_text = "Write me a poem about Machine Learning."
|
115 |
-
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
116 |
-
|
117 |
-
outputs = model.generate(**input_ids)
|
118 |
-
print(tokenizer.decode(outputs[0]))
|
119 |
-
```
|
120 |
-
|
121 |
* _Upcasting to `torch.float32`_
|
122 |
|
123 |
```python
|
|
|
73 |
<a name="precisions"></a>
|
74 |
#### Running the model on a GPU using different precisions
|
75 |
|
76 |
+
The native weights of this model were exported in `bfloat16` precision.
|
77 |
|
78 |
You can also use `float32` if you skip the dtype, but no precision increase will occur (model weights will just be upcasted to `float32`). See examples below.
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
* _Upcasting to `torch.float32`_
|
81 |
|
82 |
```python
|