swap-uniba
/

LLaMAntino-3-ANITA-8B-Inst-DPO-ITA

@@ -287,6 +287,8 @@ For direct use with `transformers`, you can easily get started with the followin
       {"role": "system", "content": "Answer clearly and detailed."},
       {"role": "user", "content": "Why is the sky blue ?"}
   ]
   prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
   inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
   for k,v in inputs.items():
@@ -294,6 +296,24 @@ For direct use with `transformers`, you can easily get started with the followin
   outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, top_p=0.85, temperature=0.7)
   results = tokenizer.batch_decode(outputs)[0]
   print(results)
   ```
 - Additionally, you can also use a model with **4bit quantization** to reduce the required resources at least. You can start with the code below.
@@ -324,6 +344,8 @@ For direct use with `transformers`, you can easily get started with the followin
       {"role": "system", "content": "Answer clearly and detailed."},
       {"role": "user", "content": "Why is the sky blue ?"}
   ]
   prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
   inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
   for k,v in inputs.items():
@@ -332,6 +354,23 @@ For direct use with `transformers`, you can easily get started with the followin
   results = tokenizer.batch_decode(outputs)[0]
   print(results)
   ```
 ### Unsloth
@@ -385,6 +424,16 @@ For direct use with `unsloth`, you can easily get started with the following ste
 [Unsloth](https://unsloth.ai), a great tool that helps us easily develop products, at a lower cost than expected.
 ## Citation instructions
 ```bibtex
 @article{llama3modelcard,
@@ -395,13 +444,3 @@ For direct use with `unsloth`, you can easily get started with the following ste
 }
 ```
-```bibtex
-@misc{basile2023llamantino,
-      title={LLaMAntino: LLaMA 2 Models for Effective Text Generation in Italian Language},
-      author={Pierpaolo Basile and Elio Musacchio and Marco Polignano and Lucia Siciliani and Giuseppe Fiameni and Giovanni Semeraro},
-      year={2023},
-      eprint={2312.09993},
-      archivePrefix={arXiv},
-      primaryClass={cs.CL}
-}
-```

       {"role": "system", "content": "Answer clearly and detailed."},
       {"role": "user", "content": "Why is the sky blue ?"}
   ]
+  #Method 1
   prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
   inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
   for k,v in inputs.items():
   outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, top_p=0.85, temperature=0.7)
   results = tokenizer.batch_decode(outputs)[0]
   print(results)
+  #Method 2
+  import transformers
+  pipe = transformers.pipeline(
+      model=model,
+      tokenizer=tokenizer,
+      return_full_text=False, # langchain expects the full text
+      task='text-generation',
+      max_new_tokens=512, # max number of tokens to generate in the output
+      temperature=0.7,  #temperature for more or less creative answers
+      do_sample=True,
+      top_p=0.85,
+  )
+  sequences = pipe(messages)
+  for seq in sequences:
+      print(f"{seq['generated_text']}")
   ```
 - Additionally, you can also use a model with **4bit quantization** to reduce the required resources at least. You can start with the code below.
       {"role": "system", "content": "Answer clearly and detailed."},
       {"role": "user", "content": "Why is the sky blue ?"}
   ]
+  #Method 1
   prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
   inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
   for k,v in inputs.items():
   results = tokenizer.batch_decode(outputs)[0]
   print(results)
+  #Method 2
+  import transformers
+  pipe = transformers.pipeline(
+      model=model,
+      tokenizer=tokenizer,
+      return_full_text=False, # langchain expects the full text
+      task='text-generation',
+      max_new_tokens=512, # max number of tokens to generate in the output
+      temperature=0.7,  #temperature for more or less creative answers
+      do_sample=True,
+      top_p=0.85,
+  )
+  sequences = pipe(messages)
+  for seq in sequences:
+      print(f"{seq['generated_text']}")
   ```
 ### Unsloth
 [Unsloth](https://unsloth.ai), a great tool that helps us easily develop products, at a lower cost than expected.
 ## Citation instructions
+```bibtex
+@misc{basile2023llamantino,
+      title={LLaMAntino: LLaMA 2 Models for Effective Text Generation in Italian Language},
+      author={Pierpaolo Basile and Elio Musacchio and Marco Polignano and Lucia Siciliani and Giuseppe Fiameni and Giovanni Semeraro},
+      year={2023},
+      eprint={2312.09993},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
 ```bibtex
 @article{llama3modelcard,
 }
 ```