Spaces:

tulipbrain
/

chat

Runtime error

App Files Files Community

rishiraj commited on Oct 31, 2023

Commit

a4c5bac

•

1 Parent(s): 82822b0

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -22

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ logging.basicConfig(
 base_model = "HuggingFaceH4/zephyr-7b-beta"
 adapter_model = None
-tokenizer,model,device = load_tokenizer_and_model(base_model,adapter_model)
 total_count = 0
 def predict(text,
@@ -25,7 +25,7 @@ def predict(text,
             top_p,
             temperature,
             max_length_tokens,
-            max_context_length_tokens,):
     if text=="":
         yield chatbot,history,"Empty context."
         return
@@ -84,7 +84,7 @@ def retry(
         top_p,
         temperature,
         max_length_tokens,
-        max_context_length_tokens,
         ):
     logging.info("Retry...")
     if len(history) == 0:
@@ -132,36 +132,40 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                 with gr.Tab(label="Parameter Setting"):
                     gr.Markdown("# Parameters")
                     top_p = gr.Slider(
-                        minimum=-0,
-                        maximum=1.0,
                         value=0.95,
                         step=0.05,
                         interactive=True,
-                        label="Top-p",
                     )
                     temperature = gr.Slider(
-                        minimum=0.1,
-                        maximum=2.0,
                         value=1,
-                        step=0.1,
                         interactive=True,
-                        label="Temperature",
                     )
                     max_length_tokens = gr.Slider(
                         minimum=0,
-                        maximum=512,
-                        value=512,
-                        step=8,
                         interactive=True,
-                        label="Max Generation Tokens",
                     )
-                    max_context_length_tokens = gr.Slider(
-                        minimum=0,
-                        maximum=4096,
-                        value=2048,
-                        step=128,
                         interactive=True,
-                        label="Max History Tokens",
                     )
     gr.Markdown(description)
@@ -174,7 +178,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
             top_p,
             temperature,
             max_length_tokens,
-            max_context_length_tokens,
         ],
         outputs=[chatbot, history, status_display],
         show_progress=True,
@@ -188,7 +192,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
             top_p,
             temperature,
             max_length_tokens,
-            max_context_length_tokens,
         ],
         outputs=[chatbot, history, status_display],
         show_progress=True,

 base_model = "HuggingFaceH4/zephyr-7b-beta"
 adapter_model = None
+# tokenizer,model,device = load_tokenizer_and_model(base_model,adapter_model)
 total_count = 0
 def predict(text,
             top_p,
             temperature,
             max_length_tokens,
+            repetition_penalty):
     if text=="":
         yield chatbot,history,"Empty context."
         return
         top_p,
         temperature,
         max_length_tokens,
+        repetition_penalty
         ):
     logging.info("Retry...")
     if len(history) == 0:
                 with gr.Tab(label="Parameter Setting"):
                     gr.Markdown("# Parameters")
                     top_p = gr.Slider(
+                        label="Top-p (nucleus sampling)",
                         value=0.95,
+                        minimum=0.0,
+                        maximum=1,
                         step=0.05,
                         interactive=True,
+                        info="Higher values sample more low-probability tokens",
                     )
                     temperature = gr.Slider(
+                        label="Temperature",
                         value=1,
+                        minimum=0.0,
+                        maximum=1.0,
+                        step=0.05,
                         interactive=True,
+                        info="Higher values produce more diverse outputs",
                     )
                     max_length_tokens = gr.Slider(
+                        label="Max new tokens",
+                        value=256,
                         minimum=0,
+                        maximum=1048,
+                        step=64,
                         interactive=True,
+                        info="The maximum numbers of new tokens",
                     )
+                    repetition_penalty = gr.Slider(
+                        label="Repetition penalty",
+                        value=1.2,
+                        minimum=1.0,
+                        maximum=2.0,
+                        step=0.05,
                         interactive=True,
+                        info="Penalize repeated tokens",
                     )
     gr.Markdown(description)
             top_p,
             temperature,
             max_length_tokens,
+            repetition_penalty,
         ],
         outputs=[chatbot, history, status_display],
         show_progress=True,
             top_p,
             temperature,
             max_length_tokens,
+            repetition_penalty,
         ],
         outputs=[chatbot, history, status_display],
         show_progress=True,