m-polignano-uniba
commited on
Commit
•
a4bb95e
1
Parent(s):
f96e773
Update README.md
Browse files
README.md
CHANGED
@@ -287,6 +287,8 @@ For direct use with `transformers`, you can easily get started with the followin
|
|
287 |
{"role": "system", "content": "Answer clearly and detailed."},
|
288 |
{"role": "user", "content": "Why is the sky blue ?"}
|
289 |
]
|
|
|
|
|
290 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
291 |
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
|
292 |
for k,v in inputs.items():
|
@@ -294,6 +296,24 @@ For direct use with `transformers`, you can easily get started with the followin
|
|
294 |
outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, top_p=0.85, temperature=0.7)
|
295 |
results = tokenizer.batch_decode(outputs)[0]
|
296 |
print(results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
```
|
298 |
|
299 |
- Additionally, you can also use a model with **4bit quantization** to reduce the required resources at least. You can start with the code below.
|
@@ -324,6 +344,8 @@ For direct use with `transformers`, you can easily get started with the followin
|
|
324 |
{"role": "system", "content": "Answer clearly and detailed."},
|
325 |
{"role": "user", "content": "Why is the sky blue ?"}
|
326 |
]
|
|
|
|
|
327 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
328 |
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
|
329 |
for k,v in inputs.items():
|
@@ -332,6 +354,23 @@ For direct use with `transformers`, you can easily get started with the followin
|
|
332 |
results = tokenizer.batch_decode(outputs)[0]
|
333 |
print(results)
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
```
|
336 |
|
337 |
### Unsloth
|
@@ -385,6 +424,16 @@ For direct use with `unsloth`, you can easily get started with the following ste
|
|
385 |
[Unsloth](https://unsloth.ai), a great tool that helps us easily develop products, at a lower cost than expected.
|
386 |
|
387 |
## Citation instructions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
|
389 |
```bibtex
|
390 |
@article{llama3modelcard,
|
@@ -395,13 +444,3 @@ For direct use with `unsloth`, you can easily get started with the following ste
|
|
395 |
}
|
396 |
```
|
397 |
|
398 |
-
```bibtex
|
399 |
-
@misc{basile2023llamantino,
|
400 |
-
title={LLaMAntino: LLaMA 2 Models for Effective Text Generation in Italian Language},
|
401 |
-
author={Pierpaolo Basile and Elio Musacchio and Marco Polignano and Lucia Siciliani and Giuseppe Fiameni and Giovanni Semeraro},
|
402 |
-
year={2023},
|
403 |
-
eprint={2312.09993},
|
404 |
-
archivePrefix={arXiv},
|
405 |
-
primaryClass={cs.CL}
|
406 |
-
}
|
407 |
-
```
|
|
|
287 |
{"role": "system", "content": "Answer clearly and detailed."},
|
288 |
{"role": "user", "content": "Why is the sky blue ?"}
|
289 |
]
|
290 |
+
|
291 |
+
#Method 1
|
292 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
293 |
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
|
294 |
for k,v in inputs.items():
|
|
|
296 |
outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, top_p=0.85, temperature=0.7)
|
297 |
results = tokenizer.batch_decode(outputs)[0]
|
298 |
print(results)
|
299 |
+
|
300 |
+
#Method 2
|
301 |
+
import transformers
|
302 |
+
pipe = transformers.pipeline(
|
303 |
+
model=model,
|
304 |
+
tokenizer=tokenizer,
|
305 |
+
return_full_text=False, # langchain expects the full text
|
306 |
+
task='text-generation',
|
307 |
+
max_new_tokens=512, # max number of tokens to generate in the output
|
308 |
+
temperature=0.7, #temperature for more or less creative answers
|
309 |
+
do_sample=True,
|
310 |
+
top_p=0.85,
|
311 |
+
)
|
312 |
+
|
313 |
+
sequences = pipe(messages)
|
314 |
+
for seq in sequences:
|
315 |
+
print(f"{seq['generated_text']}")
|
316 |
+
|
317 |
```
|
318 |
|
319 |
- Additionally, you can also use a model with **4bit quantization** to reduce the required resources at least. You can start with the code below.
|
|
|
344 |
{"role": "system", "content": "Answer clearly and detailed."},
|
345 |
{"role": "user", "content": "Why is the sky blue ?"}
|
346 |
]
|
347 |
+
|
348 |
+
#Method 1
|
349 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
350 |
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
|
351 |
for k,v in inputs.items():
|
|
|
354 |
results = tokenizer.batch_decode(outputs)[0]
|
355 |
print(results)
|
356 |
|
357 |
+
#Method 2
|
358 |
+
import transformers
|
359 |
+
pipe = transformers.pipeline(
|
360 |
+
model=model,
|
361 |
+
tokenizer=tokenizer,
|
362 |
+
return_full_text=False, # langchain expects the full text
|
363 |
+
task='text-generation',
|
364 |
+
max_new_tokens=512, # max number of tokens to generate in the output
|
365 |
+
temperature=0.7, #temperature for more or less creative answers
|
366 |
+
do_sample=True,
|
367 |
+
top_p=0.85,
|
368 |
+
)
|
369 |
+
|
370 |
+
sequences = pipe(messages)
|
371 |
+
for seq in sequences:
|
372 |
+
print(f"{seq['generated_text']}")
|
373 |
+
|
374 |
```
|
375 |
|
376 |
### Unsloth
|
|
|
424 |
[Unsloth](https://unsloth.ai), a great tool that helps us easily develop products, at a lower cost than expected.
|
425 |
|
426 |
## Citation instructions
|
427 |
+
```bibtex
|
428 |
+
@misc{basile2023llamantino,
|
429 |
+
title={LLaMAntino: LLaMA 2 Models for Effective Text Generation in Italian Language},
|
430 |
+
author={Pierpaolo Basile and Elio Musacchio and Marco Polignano and Lucia Siciliani and Giuseppe Fiameni and Giovanni Semeraro},
|
431 |
+
year={2023},
|
432 |
+
eprint={2312.09993},
|
433 |
+
archivePrefix={arXiv},
|
434 |
+
primaryClass={cs.CL}
|
435 |
+
}
|
436 |
+
```
|
437 |
|
438 |
```bibtex
|
439 |
@article{llama3modelcard,
|
|
|
444 |
}
|
445 |
```
|
446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|