kjn1009 commited on
Commit
46772b0
1 Parent(s): b6a555d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+
4
+ # Prepare the input as before
5
+ chat = [
6
+ {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
7
+ {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
8
+ ]
9
+
10
+ # 1: Load the model and tokenizer
11
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
12
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
13
+
14
+ # 2: Apply the chat template
15
+ formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
16
+ print("Formatted chat:\n", formatted_chat)
17
+
18
+ # 3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
19
+ inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
20
+ # Move the tokenized inputs to the same device the model is on (GPU/CPU)
21
+ inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
22
+ print("Tokenized inputs:\n", inputs)
23
+
24
+ # 4: Generate text from the model
25
+ outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
26
+ print("Generated tokens:\n", outputs)
27
+
28
+ # 5: Decode the output back to a string
29
+ decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
30
+ print("Decoded output:\n", decoded_output)