Ashishkr commited on
Commit
7aca41d
1 Parent(s): a1b25e9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +55 -19
README.md CHANGED
@@ -2,9 +2,7 @@
2
  tags:
3
  - autotrain
4
  - text-generation
5
- widget:
6
- - text: "I love AutoTrain because "
7
- license: other
8
  ---
9
 
10
  # Model Trained Using AutoTrain
@@ -15,26 +13,64 @@ This model was trained using AutoTrain. For more information, please visit [Auto
15
 
16
  ```python
17
 
18
- from transformers import AutoModelForCausalLM, AutoTokenizer
19
 
20
- model_path = "PATH_TO_THIS_REPO"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- tokenizer = AutoTokenizer.from_pretrained(model_path)
23
- model = AutoModelForCausalLM.from_pretrained(
24
- model_path,
25
- device_map="auto",
26
- torch_dtype='auto'
27
- ).eval()
 
 
 
28
 
29
- # Prompt content: "hi"
30
- messages = [
31
- {"role": "user", "content": "hi"}
32
- ]
33
 
34
- input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
35
- output_ids = model.generate(input_ids.to('cuda'))
36
- response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
 
 
 
 
 
 
 
 
37
 
38
- # Model response: "Hello! How can I assist you today?"
39
  print(response)
 
 
40
  ```
 
2
  tags:
3
  - autotrain
4
  - text-generation
5
+
 
 
6
  ---
7
 
8
  # Model Trained Using AutoTrain
 
13
 
14
  ```python
15
 
 
16
 
17
+ from peft import PeftModel, PeftConfig
18
+ from transformers import AutoModelForCausalLM
19
+ from transformers import AutoTokenizer
20
+ import torch
21
+
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+
24
+ config = PeftConfig.from_pretrained("Ashishkr/llama2-qrecc-context-resolution")
25
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
26
+ model = PeftModel.from_pretrained(model, "Ashishkr/llama2-qrecc-context-resolution").to(device)
27
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
28
+
29
+ def response_generate(
30
+ model: AutoModelForCausalLM,
31
+ tokenizer: AutoTokenizer,
32
+ prompt: str,
33
+ max_new_tokens: int = 128,
34
+ temperature: float = 0.7,
35
+ ):
36
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+
38
+ inputs = tokenizer(
39
+ [prompt],
40
+ return_tensors="pt",
41
+ return_token_type_ids=False,
42
+ ).to(
43
+ device
44
+ )
45
 
46
+ with torch.autocast("cuda", dtype=torch.bfloat16):
47
+ response = model.generate(
48
+ **inputs,
49
+ max_new_tokens=max_new_tokens,
50
+ temperature=temperature,
51
+ return_dict_in_generate=True,
52
+ eos_token_id=tokenizer.eos_token_id,
53
+ pad_token_id=tokenizer.pad_token_id,
54
+ )
55
 
56
+ decoded_output = tokenizer.decode(
57
+ response["sequences"][0],
58
+ skip_special_tokens=True,
59
+ )
60
 
61
+ return decoded_output
62
+
63
+ prompt = """>>CONTEXT<<I heard John Marks was the first christian missionary in Ireland. What was the capital then??>>REWRITE<<"""
64
+
65
+ response = response_generate(
66
+ model,
67
+ tokenizer,
68
+ prompt,
69
+ max_new_tokens=20,
70
+ temperature=0.1,
71
+ )
72
 
 
73
  print(response)
74
+
75
+
76
  ```