MuntasirHossain
commited on
Commit
•
99ae753
1
Parent(s):
f3902de
Update app.py
Browse files
app.py
CHANGED
@@ -17,15 +17,15 @@ def format_prompt(input_text, history):
|
|
17 |
<start_of_turn>model"""
|
18 |
return prompt
|
19 |
|
20 |
-
def generate(prompt, history):
|
21 |
if not history:
|
22 |
history = []
|
23 |
|
24 |
kwargs = dict(
|
25 |
-
temperature=
|
26 |
-
max_new_tokens=
|
27 |
-
top_p=
|
28 |
-
repetition_penalty=
|
29 |
do_sample=True,
|
30 |
)
|
31 |
|
@@ -40,6 +40,45 @@ def generate(prompt, history):
|
|
40 |
return output
|
41 |
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
chatbot = gr.Chatbot(height=500)
|
44 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
45 |
gr.HTML("<center><h1>Google Gemma 7B IT</h1><center>")
|
@@ -50,6 +89,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
50 |
undo_btn=None,
|
51 |
clear_btn="Clear",
|
52 |
description="This chatbot is using a Hugging Face Inference Client for the google/gemma-7b-it model.",
|
|
|
53 |
examples=[["Explain artificial intelligence in a few lines."]]
|
54 |
)
|
55 |
demo.queue().launch()
|
|
|
17 |
<start_of_turn>model"""
|
18 |
return prompt
|
19 |
|
20 |
+
def generate(prompt, history, temperature=0.95, max_new_tokens=512, top_p=0.9, repetition_penalty=1.0):
|
21 |
if not history:
|
22 |
history = []
|
23 |
|
24 |
kwargs = dict(
|
25 |
+
temperature=temperature,
|
26 |
+
max_new_tokens=max_new_tokens,
|
27 |
+
top_p=top_p,
|
28 |
+
repetition_penalty=repetition_penalty,
|
29 |
do_sample=True,
|
30 |
)
|
31 |
|
|
|
40 |
return output
|
41 |
|
42 |
|
43 |
+
additional_inputs=[
|
44 |
+
gr.Slider(
|
45 |
+
label="Temperature",
|
46 |
+
value=0.85,
|
47 |
+
minimum=0.1,
|
48 |
+
maximum=1.0,
|
49 |
+
step=0.05,
|
50 |
+
interactive=True,
|
51 |
+
info="A higher value (> 1) will generate randomness and variability in the model response",
|
52 |
+
),
|
53 |
+
gr.Slider(
|
54 |
+
label="Max new tokens",
|
55 |
+
value=512,
|
56 |
+
minimum=128,
|
57 |
+
maximum=1048,
|
58 |
+
step=64,
|
59 |
+
interactive=True,
|
60 |
+
info="The maximum numbers of new tokens generated in the model response",
|
61 |
+
),
|
62 |
+
gr.Slider(
|
63 |
+
label="Top-p (random sampling)",
|
64 |
+
value=0.80,
|
65 |
+
minimum=0.1,
|
66 |
+
maximum=1,
|
67 |
+
step=0.05,
|
68 |
+
interactive=True,
|
69 |
+
info="A smaller value generates the highest probability tokens, a higher value (~ 1) allows low-probability tokens",
|
70 |
+
),
|
71 |
+
gr.Slider(
|
72 |
+
label="Repetition penalty",
|
73 |
+
value=1.0,
|
74 |
+
minimum=0.5,
|
75 |
+
maximum=2.0,
|
76 |
+
step=0.05,
|
77 |
+
interactive=True,
|
78 |
+
info="Penalizes repeated tokens in model response",
|
79 |
+
)
|
80 |
+
]
|
81 |
+
|
82 |
chatbot = gr.Chatbot(height=500)
|
83 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
84 |
gr.HTML("<center><h1>Google Gemma 7B IT</h1><center>")
|
|
|
89 |
undo_btn=None,
|
90 |
clear_btn="Clear",
|
91 |
description="This chatbot is using a Hugging Face Inference Client for the google/gemma-7b-it model.",
|
92 |
+
additional_inputs=additional_inputs,
|
93 |
examples=[["Explain artificial intelligence in a few lines."]]
|
94 |
)
|
95 |
demo.queue().launch()
|