Spaces:

codelion
/

optillm

Running

App Files Files Community

codelion commited on Sep 21

Commit

a066122

•

1 Parent(s): af8ec69

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -60

app.py CHANGED Viewed

@@ -14,45 +14,45 @@ from optillm.leap import leap
 API_KEY = os.environ.get("OPENROUTER_API_KEY")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    model,
-    approach,
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
     client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/api/v1")
-    system_prompt = system_message
-    initial_query = message
     messages = [{"role": "system", "content": system_message}]
     for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
-    if approach == 'rto':
-        final_response = round_trip_optimization(system_prompt, initial_query, client, model)
-    elif approach == 'z3':
-        z3_solver = Z3SolverSystem(system_prompt, client, model)
-        final_response = z3_solver.process_query(initial_query)
-    elif approach == "self_consistency":
-        final_response = advanced_self_consistency_approach(system_prompt, initial_query, client, model)
-    elif approach == "rstar":
-        rstar = RStar(system_prompt, client, model)
-        final_response = rstar.solve(initial_query)
-    elif approach == "cot_reflection":
-        final_response = cot_reflection(system_prompt, initial_query, client, model)
-    elif approach == 'plansearch':
-        final_response = plansearch(system_prompt, initial_query, client, model)
-    elif approach == 'leap':
-        final_response = leap(system_prompt, initial_query, client, model)
     return final_response
@@ -68,32 +68,69 @@ def respond(
     #     response += token
     #     yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Dropdown(
-            ["nousresearch/hermes-3-llama-3.1-405b:free", "meta-llama/llama-3.1-8b-instruct:free", "qwen/qwen-2-7b-instruct:free",
-            "google/gemma-2-9b-it:free", "mistralai/mistral-7b-instruct:free", ],
-            value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model", info="Choose the base model"
-        ),
-        gr.Dropdown(
-            ["leap", "plansearch", "rstar", "cot_reflection", "rto", "self_consistency", "z3"], value="cot_reflection", label="Approach", info="Choose the approach"
-        ),
-        gr.Textbox(value="", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
     demo.launch()

 API_KEY = os.environ.get("OPENROUTER_API_KEY")
+def compare_responses(message, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p):
+    response1 = respond(message, [], model1, approach1, system_message, max_tokens, temperature, top_p)
+    response2 = respond(message, [], model2, approach2, system_message, max_tokens, temperature, top_p)
+    return response1, response2
+def respond(message, history, model, approach, system_message, max_tokens, temperature, top_p):
     client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/api/v1")
     messages = [{"role": "system", "content": system_message}]
     for val in history:
+        if val[0]: messages.append({"role": "user", "content": val[0]})
+        if val[1]: messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
+    if approach == "none":
+        response = client.chat.completions.create(
+            model=model,
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+        )
+        return response.choices[0].message.content
+    else:
+        if approach == 'rto':
+            final_response = round_trip_optimization(system_prompt, initial_query, client, model)
+        elif approach == 'z3':
+            z3_solver = Z3SolverSystem(system_prompt, client, model)
+            final_response = z3_solver.process_query(initial_query)
+        elif approach == "self_consistency":
+            final_response = advanced_self_consistency_approach(system_prompt, initial_query, client, model)
+        elif approach == "rstar":
+            rstar = RStar(system_prompt, client, model)
+            final_response = rstar.solve(initial_query)
+        elif approach == "cot_reflection":
+            final_response = cot_reflection(system_prompt, initial_query, client, model)
+        elif approach == 'plansearch':
+            final_response = plansearch(system_prompt, initial_query, client, model)
+        elif approach == 'leap':
+            final_response = leap(system_prompt, initial_query, client, model)
     return final_response
     #     response += token
     #     yield response
+def create_model_dropdown():
+    return gr.Dropdown(
+        ["nousresearch/hermes-3-llama-3.1-405b:free", "meta-llama/llama-3.1-8b-instruct:free",
+         "qwen/qwen-2-7b-instruct:free", "google/gemma-2-9b-it:free", "mistralai/mistral-7b-instruct:free"],
+        value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model"
+    )
+def create_approach_dropdown():
+    return gr.Dropdown(
+        ["none", "leap", "plansearch", "rstar", "cot_reflection", "rto", "self_consistency", "z3"],
+        value="none", label="Approach"
+    )
+with gr.Blocks() as demo:
+    gr.Markdown("# LLM Optimization Comparison")
+    with gr.Row():
+        system_message = gr.Textbox(value="", label="System message")
+        max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
+        temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
+        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+    with gr.Tabs():
+        with gr.TabItem("Single Chat"):
+            model = create_model_dropdown()
+            approach = create_approach_dropdown()
+            chatbot = gr.Chatbot()
+            msg = gr.Textbox()
+            clear = gr.Button("Clear")
+            def user(user_message, history):
+                return "", history + [[user_message, None]]
+            def bot(history, model, approach, system_message, max_tokens, temperature, top_p):
+                user_message = history[-1][0]
+                bot_message = respond(user_message, history[:-1], model, approach, system_message, max_tokens, temperature, top_p)
+                history[-1][1] = bot_message
+                return history
+            msg.submit(user, [msg, chatbot], [msg, chatbot]).then(
+                bot, [chatbot, model, approach, system_message, max_tokens, temperature, top_p], chatbot
+            )
+            clear.click(lambda: None, None, chatbot, queue=False)
+        with gr.TabItem("Compare"):
+            with gr.Row():
+                model1 = create_model_dropdown()
+                approach1 = create_approach_dropdown()
+                model2 = create_model_dropdown()
+                approach2 = create_approach_dropdown()
+            compare_input = gr.Textbox(label="Enter your message for comparison")
+            compare_button = gr.Button("Compare")
+            with gr.Row():
+                output1 = gr.Textbox(label="Response 1")
+                output2 = gr.Textbox(label="Response 2")
+            compare_button.click(
+                compare_responses,
+                inputs=[compare_input, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p],
+                outputs=[output1, output2]
+            )
 if __name__ == "__main__":
     demo.launch()