AIEIR commited on
Commit
20a87e2
1 Parent(s): 7ab009b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -9
README.md CHANGED
@@ -287,17 +287,26 @@ FLAIR เนื้องอกไขมันที่เส้นกลาง
287
 
288
  ```python
289
 
290
- Using transformers.pipeline() API
291
-
292
- import transformers
293
  import torch
294
 
295
  model_id = "EIRTHAIMED/Llama-3.1-EIRAI-8B"
296
- pipeline = transformers.pipeline(
297
- "text-generation",
298
- model=model_id,
299
- model_kwargs={"torch_dtype": torch.bfloat16},
 
 
 
 
 
 
 
 
 
 
300
  device_map="auto",
 
301
  )
302
 
303
  messages = [
@@ -305,8 +314,17 @@ messages = [
305
  {"role": "user", "content": "การใช้ clinical tracer มีบทบาทอย่างไรในการพัฒนาคุณภาพการดูแลผู้ป่วย?"}
306
  ]
307
 
308
- outputs = pipeline(messages, max_new_tokens=128, do_sample=True, temperature=0.01, top_k=100, top_p=0.95)
309
- print(outputs[0]["generated_text"][-1])
 
 
 
 
 
 
 
 
 
310
 
311
 
312
  ```
 
287
 
288
  ```python
289
 
290
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
 
291
  import torch
292
 
293
  model_id = "EIRTHAIMED/Llama-3.1-EIRAI-8B"
294
+
295
+ nf4_config = BitsAndBytesConfig(
296
+ load_in_4bit=True,
297
+ bnb_4bit_quant_type="nf4",
298
+ bnb_4bit_use_double_quant=True,
299
+ bnb_4bit_compute_dtype=torch.bfloat16
300
+ )
301
+
302
+ # Load the base model
303
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
304
+ model = AutoModelForCausalLM.from_pretrained(
305
+ model_id,
306
+ torch_dtype=torch.bfloat16,
307
+ # quantization_config=nf4_config, # uncomment this line for 4 bit loading
308
  device_map="auto",
309
+ attn_implementation="flash_attention_2"
310
  )
311
 
312
  messages = [
 
314
  {"role": "user", "content": "การใช้ clinical tracer มีบทบาทอย่างไรในการพัฒนาคุณภาพการดูแลผู้ป่วย?"}
315
  ]
316
 
317
+ input = tokenizer.apply_chat_template(
318
+ messages,
319
+ tokenize = True,
320
+ add_generation_prompt = True, # Must add for generation
321
+ return_tensors = "pt",
322
+ ).to("cuda")
323
+
324
+
325
+ from transformers import TextStreamer
326
+ text_streamer = TextStreamer(tokenizer, skip_prompt = True)
327
+ _ = model.generate(input, streamer = text_streamer, max_new_tokens = 1500, do_sample=True, temperature=0.01, top_k=100, top_p=0.95)
328
 
329
 
330
  ```