google-gemma-dev

Runtime error

App Files Files Community

Omnibus commited on Mar 13

Commit

c371eda

•

1 Parent(s): 7fea8b8

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -38

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from huggingface_hub import InferenceClient
 import random
 ss_client = Client("https://omnibus-html-image-current-tab.hf.space/")
-'''models=[
     "google/gemma-7b",
     "google/gemma-7b-it",
     "google/gemma-2b",
@@ -15,52 +15,36 @@ InferenceClient(models[0]),
 InferenceClient(models[1]),
 InferenceClient(models[2]),
 InferenceClient(models[3]),
-]'''
-models=[
-    "google/gemma-7b",
-    "google/gemma-7b-it",
-    "google/gemma-2b",
-    "google/gemma-2b-it",
 ]
-client_z=[]
 def load_models(inp):
-    print(type(inp))
-    print(inp)
-    print(models[inp])
-    client_z.clear()
-    client_z.append(InferenceClient(models[inp]))
     return gr.update(label=models[inp])
-VERBOSE=False
 def format_prompt(message, history, cust_p):
-    prompt = ""
     if history:
-        #<start_of_turn>userHow does the brain work?<end_of_turn><start_of_turn>model
         for user_prompt, bot_response in history:
             prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
-            #print(prompt)
-            prompt += f"<start_of_turn>model\n{bot_response}<end_of_turn>"
-            #print(prompt)
     #prompt += f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
     prompt+=cust_p.replace("USER_INPUT",message)
     return prompt
-def custom_prompt(prompt):
-    return prompt
 def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem,cust_p):
     #token max=8192
     hist_len=0
-    #client=clients[int(client_choice)-1]
-    client=client_z[0]
     if not history:
         history = []
         hist_len=0
@@ -79,7 +63,7 @@ def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,
         generate_kwargs = dict(
             temperature=temp,
             max_new_tokens=tokens,
-            #top_p=top_p,
             repetition_penalty=rep_p,
             do_sample=True,
             seed=seed,
@@ -88,7 +72,7 @@ def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,
             formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", memory[0-chat_mem:],cust_p)
         else:
             formatted_prompt = format_prompt(prompt, memory[0-chat_mem:],cust_p)
-        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
         output = ""
         for response in stream:
             output += response.token.text
@@ -96,10 +80,10 @@ def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,
         history.append((prompt,output))
         memory.append((prompt,output))
         yield history,memory
     if VERBOSE==True:
         print("\n######### HIST "+str(in_len))
         print("\n######### TOKENS "+str(tokens))
-        #print("\n######### PROMPT "+str(len(formatted_prompt)))
 def get_screenshot(chat: list,height=5000,width=600,chatblock=[],theme="light",wait=3000,header=True):
     print(chatblock)
@@ -130,6 +114,8 @@ with gr.Blocks() as app:
             with gr.Column(scale=3):
                 inp = gr.Textbox(label="Prompt")
                 sys_inp = gr.Textbox(label="System Prompt (optional)")
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")
@@ -138,16 +124,14 @@ with gr.Blocks() as app:
                             stop_btn=gr.Button("Stop")
                             clear_btn=gr.Button("Clear")
                 client_choice=gr.Dropdown(label="Models",type='index',choices=[c for c in models],value=models[0],interactive=True)
-                with gr.Accordion("Prompt Format",open=False):
-                    custom_prompt=gr.Textbox(label="Prompt Format", info="For testing purposes. 'USER_INPUT' is where 'SYSTEM_PROMPT, PROMPT' will be placed", lines=5,value="<start_of_turn>userUSER_INPUT<end_of_turn><start_of_turn>model")
             with gr.Column(scale=1):
                 with gr.Group():
                     rand = gr.Checkbox(label="Random Seed", value=True)
                     seed=gr.Slider(label="Seed", minimum=1, maximum=1111111111111111,step=1, value=rand_val)
                     tokens = gr.Slider(label="Max new tokens",value=1600,minimum=0,maximum=8000,step=64,interactive=True, visible=True,info="The maximum number of tokens")
-                    temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
-                    top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
-                    rep_p=gr.Slider(label="Repetition Penalty",step=0.1, minimum=0.1, maximum=2.0, value=1.0)
                     chat_mem=gr.Number(label="Chat Memory", info="Number of previous chats to retain",value=4)
         with gr.Accordion(label="Screenshot",open=False):
             with gr.Row():

 import random
 ss_client = Client("https://omnibus-html-image-current-tab.hf.space/")
+models=[
     "google/gemma-7b",
     "google/gemma-7b-it",
     "google/gemma-2b",
 InferenceClient(models[1]),
 InferenceClient(models[2]),
 InferenceClient(models[3]),
 ]
+VERBOSE=False
 def load_models(inp):
+    if VERBOSE==True:
+        print(type(inp))
+        print(inp)
+        print(models[inp])
+    #client_z.clear()
+    #client_z.append(InferenceClient(models[inp]))
     return gr.update(label=models[inp])
 def format_prompt(message, history, cust_p):
+    prompt = "<bos>"
     if history:
         for user_prompt, bot_response in history:
             prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
+            prompt += f"<start_of_turn>model{bot_response}<end_of_turn>"
+            if VERBOSE==True:
+                print(prompt)
     #prompt += f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
     prompt+=cust_p.replace("USER_INPUT",message)
     return prompt
 def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem,cust_p):
     #token max=8192
+    print(client_choice)
     hist_len=0
+    client=clients[int(client_choice)-1]
     if not history:
         history = []
         hist_len=0
         generate_kwargs = dict(
             temperature=temp,
             max_new_tokens=tokens,
+            top_p=top_p,
             repetition_penalty=rep_p,
             do_sample=True,
             seed=seed,
             formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", memory[0-chat_mem:],cust_p)
         else:
             formatted_prompt = format_prompt(prompt, memory[0-chat_mem:],cust_p)
+        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
         output = ""
         for response in stream:
             output += response.token.text
         history.append((prompt,output))
         memory.append((prompt,output))
         yield history,memory
     if VERBOSE==True:
         print("\n######### HIST "+str(in_len))
         print("\n######### TOKENS "+str(tokens))
 def get_screenshot(chat: list,height=5000,width=600,chatblock=[],theme="light",wait=3000,header=True):
     print(chatblock)
             with gr.Column(scale=3):
                 inp = gr.Textbox(label="Prompt")
                 sys_inp = gr.Textbox(label="System Prompt (optional)")
+                with gr.Accordion("Prompt Format",open=False):
+                    custom_prompt=gr.Textbox(label="Modify Prompt Format", info="For testing purposes. 'USER_INPUT' is where 'SYSTEM_PROMPT, PROMPT' will be placed", lines=3,value="<bos><start_of_turn>userUSER_INPUT<end_of_turn><start_of_turn>model")
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")
                             stop_btn=gr.Button("Stop")
                             clear_btn=gr.Button("Clear")
                 client_choice=gr.Dropdown(label="Models",type='index',choices=[c for c in models],value=models[0],interactive=True)
             with gr.Column(scale=1):
                 with gr.Group():
                     rand = gr.Checkbox(label="Random Seed", value=True)
                     seed=gr.Slider(label="Seed", minimum=1, maximum=1111111111111111,step=1, value=rand_val)
                     tokens = gr.Slider(label="Max new tokens",value=1600,minimum=0,maximum=8000,step=64,interactive=True, visible=True,info="The maximum number of tokens")
+                    temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.49)
+                    top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.49)
+                    rep_p=gr.Slider(label="Repetition Penalty",step=0.01, minimum=0.1, maximum=2.0, value=0.99)
                     chat_mem=gr.Number(label="Chat Memory", info="Number of previous chats to retain",value=4)
         with gr.Accordion(label="Screenshot",open=False):
             with gr.Row():