|
--- |
|
language: |
|
- en |
|
--- |
|
## Ars model |
|
This model was trained on stanford alpaca dataset |
|
|
|
|
|
## To Run: |
|
from peft import PeftModel |
|
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig |
|
|
|
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf") |
|
|
|
model = LLaMAForCausalLM.from_pretrained( |
|
"decapoda-research/llama-7b-hf", |
|
load_in_8bit=True, |
|
device_map="auto", |
|
)\ |
|
model = PeftModel.from_pretrained(model, "patulya/ars") |
|
|
|
PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request. |
|
\### Instruction: |
|
{your_instruction} |
|
\### Response:""" |
|
|
|
inputs = tokenizer( |
|
PROMPT, |
|
return_tensors="pt", |
|
) |
|
|
|
input_ids = inputs["input_ids"].cuda() |
|
|
|
generation_config = GenerationConfig(\ |
|
temperature=0.6,\ |
|
top_p=0.95,\ |
|
repetition_penalty=1.15,\ |
|
) |
|
|
|
print("Generating...") |
|
|
|
generation_output = model.generate(\ |
|
input_ids=input_ids,\ |
|
generation_config=generation_config,\ |
|
return_dict_in_generate=True,\ |
|
output_scores=True,\ |
|
max_new_tokens=128,\ |
|
) |
|
|
|
for s in generation_output.sequences: print(tokenizer.decode(s)) |