patulya commited on
Commit
a8b9616
1 Parent(s): f8b73e2

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +41 -0
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Ars model
2
+
3
+ To Run:
4
+ from peft import PeftModel
5
+ from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
6
+
7
+ tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
8
+
9
+ model = LLaMAForCausalLM.from_pretrained(
10
+ "decapoda-research/llama-7b-hf",
11
+ load_in_8bit=True,
12
+ device_map="auto",
13
+ )
14
+ model = PeftModel.from_pretrained(model, "patulya/alpaca7B-lora")
15
+
16
+ PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
17
+ ### Instruction:
18
+ {your_instruction}
19
+ ### Response:"""
20
+
21
+ inputs = tokenizer(
22
+ PROMPT,
23
+ return_tensors="pt",
24
+ )
25
+ input_ids = inputs["input_ids"].cuda()
26
+
27
+ generation_config = GenerationConfig(
28
+ temperature=0.6,
29
+ top_p=0.95,
30
+ repetition_penalty=1.15,
31
+ )
32
+ print("Generating...")
33
+ generation_output = model.generate(
34
+ input_ids=input_ids,
35
+ generation_config=generation_config,
36
+ return_dict_in_generate=True,
37
+ output_scores=True,
38
+ max_new_tokens=128,
39
+ )
40
+ for s in generation_output.sequences:
41
+ print(tokenizer.decode(s))