gabrielclark3330 commited on
Commit
ede06bd
1 Parent(s): 7eeefc1

Expose both base and instruct models

Browse files
Files changed (1) hide show
  1. app.py +35 -21
app.py CHANGED
@@ -2,23 +2,36 @@ import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
- from huggingface_hub import login
6
 
7
- # Load the tokenizer and model
8
- tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-7B")
9
- model = AutoModelForCausalLM.from_pretrained(
10
- "Zyphra/Zamba2-7B",
11
- device_map="cuda", # Automatically handles device placement
12
- torch_dtype=torch.bfloat16
13
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Define the function to generate responses
16
- def generate_response(input_text, max_new_tokens, temperature, top_k, top_p, repetition_penalty, num_beams, length_penalty):
17
- # Tokenize and move input to model's device
18
- input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)
19
-
20
- # Generate response using specified parameters
21
- outputs = model.generate(
22
  input_ids=input_ids,
23
  max_new_tokens=max_new_tokens,
24
  do_sample=True,
@@ -30,25 +43,26 @@ def generate_response(input_text, max_new_tokens, temperature, top_k, top_p, rep
30
  length_penalty=length_penalty,
31
  num_return_sequences=1
32
  )
33
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
  return response
35
 
36
- # Create Gradio interface with adjustable parameters
37
  demo = gr.Interface(
38
  fn=generate_response,
39
  inputs=[
40
- gr.Textbox(lines=1, placeholder="Enter a text to prepend...", label="Input Text"),
41
  gr.Slider(50, 1000, step=50, value=500, label="Max New Tokens"),
42
  gr.Slider(0.1, 1.5, step=0.1, value=0.7, label="Temperature"),
43
  gr.Slider(1, 100, step=1, value=50, label="Top K"),
44
  gr.Slider(0.1, 1.0, step=0.1, value=0.9, label="Top P"),
45
  gr.Slider(1.0, 2.0, step=0.1, value=1.2, label="Repetition Penalty"),
46
  gr.Slider(1, 10, step=1, value=5, label="Number of Beams"),
47
- gr.Slider(0.0, 2.0, step=0.1, value=1.0, label="Length Penalty")
 
48
  ],
49
  outputs=gr.Textbox(label="Generated Response"),
50
- title="Zamba2-7B Model",
51
- description="Ask Zamba2 7B a question with customizable parameters."
52
  )
53
 
54
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
 
5
 
6
+ # Define models as None to delay loading
7
+ model, model_instruct = None, None
8
+ tokenizer, tokenizer_instruct = None, None
9
+
10
+ # Define the response function with lazy loading
11
+ def generate_response(input_text, max_new_tokens, temperature, top_k, top_p, repetition_penalty, num_beams, length_penalty, model_choice):
12
+ global model, model_instruct, tokenizer, tokenizer_instruct
13
+
14
+ # Lazy loading of the selected model
15
+ if model_choice == "Zamba2-7B":
16
+ if model is None: # Load only if not already loaded
17
+ tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-7B")
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ "Zyphra/Zamba2-7B", device_map="cuda", torch_dtype=torch.bfloat16
20
+ )
21
+ selected_model = model
22
+ selected_tokenizer = tokenizer
23
+ else:
24
+ if model_instruct is None: # Load only if not already loaded
25
+ tokenizer_instruct = AutoTokenizer.from_pretrained("Zyphra/Zamba2-7B-instruct")
26
+ model_instruct = AutoModelForCausalLM.from_pretrained(
27
+ "Zyphra/Zamba2-7B-instruct", device_map="cuda", torch_dtype=torch.bfloat16
28
+ )
29
+ selected_model = model_instruct
30
+ selected_tokenizer = tokenizer_instruct
31
 
32
+ # Tokenize and generate response
33
+ input_ids = selected_tokenizer(input_text, return_tensors="pt").input_ids.to(selected_model.device)
34
+ outputs = selected_model.generate(
 
 
 
 
35
  input_ids=input_ids,
36
  max_new_tokens=max_new_tokens,
37
  do_sample=True,
 
43
  length_penalty=length_penalty,
44
  num_return_sequences=1
45
  )
46
+ response = selected_tokenizer.decode(outputs[0], skip_special_tokens=True)
47
  return response
48
 
49
+ # Gradio interface with model selection
50
  demo = gr.Interface(
51
  fn=generate_response,
52
  inputs=[
53
+ gr.Textbox(lines=1, placeholder="Enter your input text...", label="Input Text"),
54
  gr.Slider(50, 1000, step=50, value=500, label="Max New Tokens"),
55
  gr.Slider(0.1, 1.5, step=0.1, value=0.7, label="Temperature"),
56
  gr.Slider(1, 100, step=1, value=50, label="Top K"),
57
  gr.Slider(0.1, 1.0, step=0.1, value=0.9, label="Top P"),
58
  gr.Slider(1.0, 2.0, step=0.1, value=1.2, label="Repetition Penalty"),
59
  gr.Slider(1, 10, step=1, value=5, label="Number of Beams"),
60
+ gr.Slider(0.0, 2.0, step=0.1, value=1.0, label="Length Penalty"),
61
+ gr.Dropdown(["Zamba2-7B", "Zamba2-7B-instruct"], label="Model Choice")
62
  ],
63
  outputs=gr.Textbox(label="Generated Response"),
64
+ title="Zamba2-7B Model Selector",
65
+ description="Choose a model and ask a question with customizable parameters."
66
  )
67
 
68
  if __name__ == "__main__":