Leonard Püttmann commited on
Commit
0a67e9e
·
verified ·
1 Parent(s): 89bd038

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -17
README.md CHANGED
@@ -31,38 +31,28 @@ Due to its size, the model runs very well on CPUs.
31
 
32
  ```python
33
  import torch
34
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
35
- from peft import PeftModel
36
 
37
- base_model_id = "unsloth/Llama-3.2-1B-Instruct"
38
- bnb_config = BitsAndBytesConfig(
39
- load_in_4bit=True,
40
- bnb_4bit_use_double_quant=True,
41
- bnb_4bit_quant_type="nf4",
42
- bnb_4bit_compute_dtype=torch.bfloat16
43
- )
44
 
45
- base_model = AutoModelForCausalLM.from_pretrained(
46
- base_model_id, # Mistral, same as before
47
- quantization_config=bnb_config, # Same quantization config as before
48
  device_map="auto",
49
  trust_remote_code=True,
50
  )
51
 
52
- tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)
53
-
54
- ft_model = PeftModel.from_pretrained(base_model, "LeonardPuettmann/LlaMaestra-3.2-1B-Instruct-v0.1-4bit")
55
 
56
  row_json = [
57
  {"role": "system", "content": "Your job is to return translations for sentences or words from either Italian to English or English to Italian."},
58
- {"role": "user", "content": "Scontri a Bologna, la destra lancia l'offensiva contro i centri sociali."}
59
  ]
60
 
61
  prompt = tokenizer.apply_chat_template(row_json, tokenize=False)
62
  model_input = tokenizer(prompt, return_tensors="pt").to("cuda")
63
 
64
  with torch.no_grad():
65
- print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=1024)[0]))
66
  ```
67
 
68
  ## Data used
 
31
 
32
  ```python
33
  import torch
34
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
35
 
36
+ model_id = "LeonardPuettmann/LlaMaestra-3.2-1B-Instruct-v0.1-4bit"
 
 
 
 
 
 
37
 
38
+ model = AutoModelForCausalLM.from_pretrained(
39
+ model_id,
 
40
  device_map="auto",
41
  trust_remote_code=True,
42
  )
43
 
44
+ tokenizer = AutoTokenizer.from_pretrained(model_id, add_bos_token=True, trust_remote_code=True)
 
 
45
 
46
  row_json = [
47
  {"role": "system", "content": "Your job is to return translations for sentences or words from either Italian to English or English to Italian."},
48
+ {"role": "user", "content": "Do you sell tickets for the bus?"},
49
  ]
50
 
51
  prompt = tokenizer.apply_chat_template(row_json, tokenize=False)
52
  model_input = tokenizer(prompt, return_tensors="pt").to("cuda")
53
 
54
  with torch.no_grad():
55
+ print(tokenizer.decode(model.generate(**model_input, max_new_tokens=1024)[0]))
56
  ```
57
 
58
  ## Data used