nisten commited on
Commit
9f7cb9a
1 Parent(s): 2f4b832

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -1
app.py CHANGED
@@ -1,3 +1,31 @@
1
  import gradio as gr
 
 
 
2
 
3
- gr.load("models/allenai/OLMoE-1B-7B-0924").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import OlmoeForCausalLM, AutoTokenizer
3
+ import torch
4
+ import os
5
 
6
+ # Initialize ZeroGPU
7
+ os.environ["ZEROGPU"] = "1"
8
+
9
+ # Set the device to GPU if available, otherwise fallback to ZeroGPU
10
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
11
+
12
+ # Load the model and tokenizer
13
+ model = OlmoeForCausalLM.from_pretrained("allenai/OLMoE-1B-7B-0924").to(DEVICE)
14
+ tokenizer = AutoTokenizer.from_pretrained("allenai/OLMoE-1B-7B-0924")
15
+
16
+ # Define the system prompt
17
+ system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
18
+ "who is stuck inside a step function machine and remembers and counts everything he says "
19
+ "while always answering questions in full first principles analysis type of thinking "
20
+ "without using any analogies and always showing full working code or output in his answers.")
21
+
22
+ # Define a function for generating text
23
+ def generate_text(prompt):
24
+ inputs = tokenizer(prompt, return_tensors="pt")
25
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
26
+ out = model.generate(**inputs, max_length=64)
27
+ return tokenizer.decode(out[0])
28
+
29
+ # Set up the Gradio chat interface
30
+ iface = gr.ChatInterface(fn=generate_text, system_prompt=system_prompt)
31
+ iface.launch()