Update README.md
Browse files
README.md
CHANGED
@@ -287,17 +287,26 @@ FLAIR เนื้องอกไขมันที่เส้นกลาง
|
|
287 |
|
288 |
```python
|
289 |
|
290 |
-
|
291 |
-
|
292 |
-
import transformers
|
293 |
import torch
|
294 |
|
295 |
model_id = "EIRTHAIMED/Llama-3.1-EIRAI-8B"
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
device_map="auto",
|
|
|
301 |
)
|
302 |
|
303 |
messages = [
|
@@ -305,8 +314,17 @@ messages = [
|
|
305 |
{"role": "user", "content": "การใช้ clinical tracer มีบทบาทอย่างไรในการพัฒนาคุณภาพการดูแลผู้ป่วย?"}
|
306 |
]
|
307 |
|
308 |
-
|
309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
|
312 |
```
|
|
|
287 |
|
288 |
```python
|
289 |
|
290 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
|
|
|
291 |
import torch
|
292 |
|
293 |
model_id = "EIRTHAIMED/Llama-3.1-EIRAI-8B"
|
294 |
+
|
295 |
+
nf4_config = BitsAndBytesConfig(
|
296 |
+
load_in_4bit=True,
|
297 |
+
bnb_4bit_quant_type="nf4",
|
298 |
+
bnb_4bit_use_double_quant=True,
|
299 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
300 |
+
)
|
301 |
+
|
302 |
+
# Load the base model
|
303 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
304 |
+
model = AutoModelForCausalLM.from_pretrained(
|
305 |
+
model_id,
|
306 |
+
torch_dtype=torch.bfloat16,
|
307 |
+
# quantization_config=nf4_config, # uncomment this line for 4 bit loading
|
308 |
device_map="auto",
|
309 |
+
attn_implementation="flash_attention_2"
|
310 |
)
|
311 |
|
312 |
messages = [
|
|
|
314 |
{"role": "user", "content": "การใช้ clinical tracer มีบทบาทอย่างไรในการพัฒนาคุณภาพการดูแลผู้ป่วย?"}
|
315 |
]
|
316 |
|
317 |
+
input = tokenizer.apply_chat_template(
|
318 |
+
messages,
|
319 |
+
tokenize = True,
|
320 |
+
add_generation_prompt = True, # Must add for generation
|
321 |
+
return_tensors = "pt",
|
322 |
+
).to("cuda")
|
323 |
+
|
324 |
+
|
325 |
+
from transformers import TextStreamer
|
326 |
+
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
|
327 |
+
_ = model.generate(input, streamer = text_streamer, max_new_tokens = 1500, do_sample=True, temperature=0.01, top_k=100, top_p=0.95)
|
328 |
|
329 |
|
330 |
```
|