gemma-2-9b-it

Runtime error

App Files Files Community

ehristoforu commited on Jul 16

Commit

6682edc

•

1 Parent(s): 19cb03a

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -60

app.py CHANGED Viewed

@@ -42,7 +42,6 @@ model.eval()
 @spaces.GPU(duration=50)
 def generate(
     message: str,
-    chat_history: list[tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
@@ -52,13 +51,6 @@ def generate(
 ) -> Iterator[str]:
     conversation = []
     conversation.append({"role": "system", "content": system_prompt})
-    for user, assistant in chat_history:
-        conversation.extend(
-            [
-                {"role": "user", "content": user},
-                {"role": "assistant", "content": assistant},
-            ]
-        )
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
@@ -87,60 +79,68 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
-chat_interface = gr.ChatInterface(
     fn=generate,
-    additional_inputs=[
-        gr.Textbox(
-            label="System prompt",
-            max_lines=5,
-            lines=2,
-            interactive=True,
-        ),
-        gr.Slider(
-            label="Max new tokens",
-            minimum=1,
-            maximum=MAX_MAX_NEW_TOKENS,
-            step=1,
-            value=DEFAULT_MAX_NEW_TOKENS,
-        ),
-        gr.Slider(
-            label="Temperature",
-            minimum=0.1,
-            maximum=4.0,
-            step=0.1,
-            value=0.6,
-        ),
-        gr.Slider(
-            label="Top-p (nucleus sampling)",
-            minimum=0.05,
-            maximum=1.0,
-            step=0.05,
-            value=0.9,
-        ),
-        gr.Slider(
-            label="Top-k",
-            minimum=1,
-            maximum=1000,
-            step=1,
-            value=50,
-        ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.05,
-            value=1.2,
-        ),
-    ],
-    stop_btn=None,
-    examples=[
-        ["Hello there! How are you doing?"],
-        ["Can you explain briefly to me what is the Python programming language?"],
-        ["Explain the plot of Cinderella in a sentence."],
-        ["How many hours does it take a man to eat a Helicopter?"],
-        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
-    ],
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo:

 @spaces.GPU(duration=50)
 def generate(
     message: str,
     system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
 ) -> Iterator[str]:
     conversation = []
     conversation.append({"role": "system", "content": system_prompt})
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         outputs.append(text)
         yield "".join(outputs)
+message = gr.Textbox(
+    label="Message",
+    max_lines=5,
+    lines=2,
+    interactive=True,
+)
+system_prompt = gr.Textbox(
+    label="System prompt",
+    max_lines=5,
+    lines=2,
+    interactive=True,
+)
+max_tokens = gr.Slider(
+    label="Max new tokens",
+    minimum=1,
+    maximum=MAX_MAX_NEW_TOKENS,
+    step=1,
+        value=DEFAULT_MAX_NEW_TOKENS,
+)
+temperature = gr.Slider(
+    label="Temperature",
+    minimum=0.1,
+    maximum=4.0,
+    step=0.1,
+    value=0.6,
+)
+top_p = gr.Slider(
+    label="Top-p (nucleus sampling)",
+    minimum=0.05,
+    maximum=1.0,
+    step=0.05,
+    value=0.9,
+)
+top_k = gr.Slider(
+    label="Top-k",
+    minimum=1,
+    maximum=1000,
+    step=1,
+    value=50,
+)
+repeat_penalty = gr.Slider(
+    label="Repetition penalty",
+    minimum=1.0,
+    maximum=2.0,
+    step=0.05,
+    value=1.2,
+)
+output = gr.Textbox(
+    label="Output",
+    max_lines=16,
+    lines=10,
+    interactive=True,
+)
+chat_interface = gr.Interface(
     fn=generate,
+    inputs=[message, system_prompt, max_tokens, temperature, top_p, top_k, repeat_penalty],
+    outputs=output,
+    api_name="/run",
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo: