ehristoforu commited on
Commit
6682edc
β€’
1 Parent(s): 19cb03a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -60
app.py CHANGED
@@ -42,7 +42,6 @@ model.eval()
42
  @spaces.GPU(duration=50)
43
  def generate(
44
  message: str,
45
- chat_history: list[tuple[str, str]],
46
  system_prompt: str,
47
  max_new_tokens: int = 1024,
48
  temperature: float = 0.6,
@@ -52,13 +51,6 @@ def generate(
52
  ) -> Iterator[str]:
53
  conversation = []
54
  conversation.append({"role": "system", "content": system_prompt})
55
- for user, assistant in chat_history:
56
- conversation.extend(
57
- [
58
- {"role": "user", "content": user},
59
- {"role": "assistant", "content": assistant},
60
- ]
61
- )
62
  conversation.append({"role": "user", "content": message})
63
 
64
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
@@ -87,60 +79,68 @@ def generate(
87
  outputs.append(text)
88
  yield "".join(outputs)
89
 
 
 
 
 
 
 
90
 
91
- chat_interface = gr.ChatInterface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  fn=generate,
93
- additional_inputs=[
94
- gr.Textbox(
95
- label="System prompt",
96
- max_lines=5,
97
- lines=2,
98
- interactive=True,
99
- ),
100
- gr.Slider(
101
- label="Max new tokens",
102
- minimum=1,
103
- maximum=MAX_MAX_NEW_TOKENS,
104
- step=1,
105
- value=DEFAULT_MAX_NEW_TOKENS,
106
- ),
107
- gr.Slider(
108
- label="Temperature",
109
- minimum=0.1,
110
- maximum=4.0,
111
- step=0.1,
112
- value=0.6,
113
- ),
114
- gr.Slider(
115
- label="Top-p (nucleus sampling)",
116
- minimum=0.05,
117
- maximum=1.0,
118
- step=0.05,
119
- value=0.9,
120
- ),
121
- gr.Slider(
122
- label="Top-k",
123
- minimum=1,
124
- maximum=1000,
125
- step=1,
126
- value=50,
127
- ),
128
- gr.Slider(
129
- label="Repetition penalty",
130
- minimum=1.0,
131
- maximum=2.0,
132
- step=0.05,
133
- value=1.2,
134
- ),
135
- ],
136
- stop_btn=None,
137
- examples=[
138
- ["Hello there! How are you doing?"],
139
- ["Can you explain briefly to me what is the Python programming language?"],
140
- ["Explain the plot of Cinderella in a sentence."],
141
- ["How many hours does it take a man to eat a Helicopter?"],
142
- ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
143
- ],
144
  )
145
 
146
  with gr.Blocks(css="style.css", fill_height=True) as demo:
 
42
  @spaces.GPU(duration=50)
43
  def generate(
44
  message: str,
 
45
  system_prompt: str,
46
  max_new_tokens: int = 1024,
47
  temperature: float = 0.6,
 
51
  ) -> Iterator[str]:
52
  conversation = []
53
  conversation.append({"role": "system", "content": system_prompt})
 
 
 
 
 
 
 
54
  conversation.append({"role": "user", "content": message})
55
 
56
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
 
79
  outputs.append(text)
80
  yield "".join(outputs)
81
 
82
+ message = gr.Textbox(
83
+ label="Message",
84
+ max_lines=5,
85
+ lines=2,
86
+ interactive=True,
87
+ )
88
 
89
+ system_prompt = gr.Textbox(
90
+ label="System prompt",
91
+ max_lines=5,
92
+ lines=2,
93
+ interactive=True,
94
+ )
95
+ max_tokens = gr.Slider(
96
+ label="Max new tokens",
97
+ minimum=1,
98
+ maximum=MAX_MAX_NEW_TOKENS,
99
+ step=1,
100
+ value=DEFAULT_MAX_NEW_TOKENS,
101
+ )
102
+ temperature = gr.Slider(
103
+ label="Temperature",
104
+ minimum=0.1,
105
+ maximum=4.0,
106
+ step=0.1,
107
+ value=0.6,
108
+ )
109
+ top_p = gr.Slider(
110
+ label="Top-p (nucleus sampling)",
111
+ minimum=0.05,
112
+ maximum=1.0,
113
+ step=0.05,
114
+ value=0.9,
115
+ )
116
+ top_k = gr.Slider(
117
+ label="Top-k",
118
+ minimum=1,
119
+ maximum=1000,
120
+ step=1,
121
+ value=50,
122
+ )
123
+ repeat_penalty = gr.Slider(
124
+ label="Repetition penalty",
125
+ minimum=1.0,
126
+ maximum=2.0,
127
+ step=0.05,
128
+ value=1.2,
129
+ )
130
+
131
+ output = gr.Textbox(
132
+ label="Output",
133
+ max_lines=16,
134
+ lines=10,
135
+ interactive=True,
136
+ )
137
+
138
+ chat_interface = gr.Interface(
139
  fn=generate,
140
+ inputs=[message, system_prompt, max_tokens, temperature, top_p, top_k, repeat_penalty],
141
+ outputs=output,
142
+ api_name="/run",
143
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  )
145
 
146
  with gr.Blocks(css="style.css", fill_height=True) as demo: