Spaces:

valory
/

olas-prediction-leaderboard

Running

App Files Files Community

cyberosa commited on Jul 5, 2024

Commit

9b36cb7

1 Parent(s): d269dc6

disabling the benchmark feature til we fix it

Browse files

Files changed (5) hide show

.gitmodules +0 -3
app.py +125 -125
automate/automate.py +5 -9
olas-predict-benchmark +0 -1
start.py +16 -17

.gitmodules DELETED Viewed

@@ -1,3 +0,0 @@
-[submodule "olas-predict-benchmark"]
-	path = olas-predict-benchmark
-	url = https://github.com/valory-xyz/olas-predict-benchmark.git

app.py CHANGED Viewed

@@ -13,69 +13,69 @@ from tabs.faq import (
 from tabs.howto_benchmark import how_to_run
 # Feature temporarily disabled til HF support helps us with the Space Error
-from tabs.run_benchmark import run_benchmark_main
 demo = gr.Blocks()
-def run_benchmark_gradio(
-    tool_name,
-    model_name,
-    num_questions,
-    openai_api_key,
-    anthropic_api_key,
-    openrouter_api_key,
-):
-    """Run the benchmark using inputs."""
-    if tool_name is None:
-        return "Please enter the name of your tool."
-    if (
-        openai_api_key is None
-        and anthropic_api_key is None
-        and openrouter_api_key is None
-    ):
-        return "Please enter either OpenAI or Anthropic or OpenRouter API key."
-    result = run_benchmark_main(
-        tool_name,
-        model_name,
-        num_questions,
-        openai_api_key,
-        anthropic_api_key,
-        openrouter_api_key,
-    )
-    if result == "completed":
-        # get the results file in the results directory
-        fns = glob("results/*.csv")
-        print(f"Number of files in results directory: {len(fns)}")
-        # convert to Path
-        files = [Path(file) for file in fns]
-        # get results and summary files
-        results_files = [file for file in files if "results" in file.name]
-        # the other file is the summary file
-        summary_files = [file for file in files if "summary" in file.name]
-        print(results_files, summary_files)
-        # get the path with results
-        results_df = pd.read_csv(results_files[0])
-        summary_df = pd.read_csv(summary_files[0])
-        # make sure all df float values are rounded to 4 decimal places
-        results_df = results_df.round(4)
-        summary_df = summary_df.round(4)
-        return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
-    return gr.Textbox(
-        label="Benchmark Result", value=result, interactive=False
-    ), gr.Textbox(label="Summary", value="")
 with demo:
@@ -112,83 +112,83 @@ with demo:
             gr.Markdown(how_to_run)
         # fourth tab - run the benchmark
-        with gr.TabItem("🔥 Run the Benchmark"):
-            with gr.Row():
-                tool_name = gr.Dropdown(
-                    [
-                        "prediction-offline",
-                        "prediction-online",
-                        # "prediction-online-summarized-info",
-                        # "prediction-offline-sme",
-                        # "prediction-online-sme",
-                        "prediction-request-rag",
-                        "prediction-request-reasoning",
-                        # "prediction-url-cot-claude",
-                        # "prediction-request-rag-cohere",
-                        # "prediction-with-research-conservative",
-                        # "prediction-with-research-bold",
-                    ],
-                    label="Tool Name",
-                    info="Choose the tool to run",
-                )
-                model_name = gr.Dropdown(
-                    [
-                        "gpt-3.5-turbo-0125",
-                        "gpt-4-0125-preview",
-                        "claude-3-haiku-20240307",
-                        "claude-3-sonnet-20240229",
-                        "claude-3-opus-20240229",
-                        "databricks/dbrx-instruct:nitro",
-                        "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
-                        # "cohere/command-r-plus",
-                    ],
-                    label="Model Name",
-                    info="Choose the model to use",
-                )
-            with gr.Row():
-                openai_api_key = gr.Textbox(
-                    label="OpenAI API Key",
-                    placeholder="Enter your OpenAI API key here",
-                    type="password",
-                )
-                anthropic_api_key = gr.Textbox(
-                    label="Anthropic API Key",
-                    placeholder="Enter your Anthropic API key here",
-                    type="password",
-                )
-                openrouter_api_key = gr.Textbox(
-                    label="OpenRouter API Key",
-                    placeholder="Enter your OpenRouter API key here",
-                    type="password",
-                )
-            with gr.Row():
-                num_questions = gr.Slider(
-                    minimum=1,
-                    maximum=340,
-                    value=10,
-                    label="Number of questions to run the benchmark on",
-                )
-            with gr.Row():
-                run_button = gr.Button("Run Benchmark")
-            with gr.Row():
-                with gr.Accordion("Results", open=True):
-                    result = gr.Dataframe()
-            with gr.Row():
-                with gr.Accordion("Summary", open=False):
-                    summary = gr.Dataframe()
-            run_button.click(
-                run_benchmark_gradio,
-                inputs=[
-                    tool_name,
-                    model_name,
-                    num_questions,
-                    openai_api_key,
-                    anthropic_api_key,
-                    openrouter_api_key,
-                ],
-                outputs=[result, summary],
-            )
 demo.queue(default_concurrency_limit=40).launch()

 from tabs.howto_benchmark import how_to_run
 # Feature temporarily disabled til HF support helps us with the Space Error
+# from tabs.run_benchmark import run_benchmark_main
 demo = gr.Blocks()
+# def run_benchmark_gradio(
+#     tool_name,
+#     model_name,
+#     num_questions,
+#     openai_api_key,
+#     anthropic_api_key,
+#     openrouter_api_key,
+# ):
+#     """Run the benchmark using inputs."""
+#     if tool_name is None:
+#         return "Please enter the name of your tool."
+#     if (
+#         openai_api_key is None
+#         and anthropic_api_key is None
+#         and openrouter_api_key is None
+#     ):
+#         return "Please enter either OpenAI or Anthropic or OpenRouter API key."
+#     result = run_benchmark_main(
+#         tool_name,
+#         model_name,
+#         num_questions,
+#         openai_api_key,
+#         anthropic_api_key,
+#         openrouter_api_key,
+#     )
+#     if result == "completed":
+#         # get the results file in the results directory
+#         fns = glob("results/*.csv")
+#         print(f"Number of files in results directory: {len(fns)}")
+#         # convert to Path
+#         files = [Path(file) for file in fns]
+#         # get results and summary files
+#         results_files = [file for file in files if "results" in file.name]
+#         # the other file is the summary file
+#         summary_files = [file for file in files if "summary" in file.name]
+#         print(results_files, summary_files)
+#         # get the path with results
+#         results_df = pd.read_csv(results_files[0])
+#         summary_df = pd.read_csv(summary_files[0])
+#         # make sure all df float values are rounded to 4 decimal places
+#         results_df = results_df.round(4)
+#         summary_df = summary_df.round(4)
+#         return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
+#     return gr.Textbox(
+#         label="Benchmark Result", value=result, interactive=False
+#     ), gr.Textbox(label="Summary", value="")
 with demo:
             gr.Markdown(how_to_run)
         # fourth tab - run the benchmark
+        # with gr.TabItem("🔥 Run the Benchmark"):
+        #     with gr.Row():
+        #         tool_name = gr.Dropdown(
+        #             [
+        #                 "prediction-offline",
+        #                 "prediction-online",
+        #                 # "prediction-online-summarized-info",
+        #                 # "prediction-offline-sme",
+        #                 # "prediction-online-sme",
+        #                 "prediction-request-rag",
+        #                 "prediction-request-reasoning",
+        #                 # "prediction-url-cot-claude",
+        #                 # "prediction-request-rag-cohere",
+        #                 # "prediction-with-research-conservative",
+        #                 # "prediction-with-research-bold",
+        #             ],
+        #             label="Tool Name",
+        #             info="Choose the tool to run",
+        #         )
+        #         model_name = gr.Dropdown(
+        #             [
+        #                 "gpt-3.5-turbo-0125",
+        #                 "gpt-4-0125-preview",
+        #                 "claude-3-haiku-20240307",
+        #                 "claude-3-sonnet-20240229",
+        #                 "claude-3-opus-20240229",
+        #                 "databricks/dbrx-instruct:nitro",
+        #                 "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
+        #                 # "cohere/command-r-plus",
+        #             ],
+        #             label="Model Name",
+        #             info="Choose the model to use",
+        #         )
+        #     with gr.Row():
+        #         openai_api_key = gr.Textbox(
+        #             label="OpenAI API Key",
+        #             placeholder="Enter your OpenAI API key here",
+        #             type="password",
+        #         )
+        #         anthropic_api_key = gr.Textbox(
+        #             label="Anthropic API Key",
+        #             placeholder="Enter your Anthropic API key here",
+        #             type="password",
+        #         )
+        #         openrouter_api_key = gr.Textbox(
+        #             label="OpenRouter API Key",
+        #             placeholder="Enter your OpenRouter API key here",
+        #             type="password",
+        #         )
+        #     with gr.Row():
+        #         num_questions = gr.Slider(
+        #             minimum=1,
+        #             maximum=340,
+        #             value=10,
+        #             label="Number of questions to run the benchmark on",
+        #         )
+        #     with gr.Row():
+        #         run_button = gr.Button("Run Benchmark")
+        #     with gr.Row():
+        #         with gr.Accordion("Results", open=True):
+        #             result = gr.Dataframe()
+        #     with gr.Row():
+        #         with gr.Accordion("Summary", open=False):
+        #             summary = gr.Dataframe()
+        #     run_button.click(
+        #         run_benchmark_gradio,
+        #         inputs=[
+        #             tool_name,
+        #             model_name,
+        #             num_questions,
+        #             openai_api_key,
+        #             anthropic_api_key,
+        #             openrouter_api_key,
+        #         ],
+        #         outputs=[result, summary],
+        #     )
 demo.queue(default_concurrency_limit=40).launch()

automate/automate.py CHANGED Viewed

@@ -1,10 +1,11 @@
-import os
 import subprocess
 from apscheduler.schedulers.blocking import BackgroundScheduler
 def run_command(command, shell=True):
-    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)
     stdout, stderr = process.communicate()
     if process.returncode == 0:
@@ -20,10 +21,5 @@ def run_benchmark():
 scheduler = BackgroundScheduler()
-scheduler.add_job(
-    run_benchmark,
-    'cron',
-    day_of_week='sun',
-    hour=0,
-    timezone='UTC')
-scheduler.start()

 import subprocess
 from apscheduler.schedulers.blocking import BackgroundScheduler
 def run_command(command, shell=True):
+    process = subprocess.Popen(
+        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell
+    )
     stdout, stderr = process.communicate()
     if process.returncode == 0:
 scheduler = BackgroundScheduler()
+scheduler.add_job(run_benchmark, "cron", day_of_week="sun", hour=0, timezone="UTC")
+scheduler.start()

olas-predict-benchmark DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit bac77acc64ed129608e6f428d40e86c0eb2cb4d1

start.py CHANGED Viewed

@@ -45,27 +45,26 @@ def start():
     """Start commands."""
     print("Starting commands...")
     base_dir = os.getcwd()
-    olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
-    mech_dir = os.path.join(olas_dir, "benchmark", "mech")
     commands = [
-        ("git submodule init", base_dir),
         # no updates
         # ("git submodule update --init --recursive", base_dir),
         # ("git submodule update --remote --recursive", base_dir),
-        (
-            'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
-            olas_dir,
-        ),
-        # no updates
-        ("git remote update", olas_dir),
-        ("git fetch --all", olas_dir),
-        ("git checkout main", olas_dir),
-        ("git pull origin main", olas_dir),
-        ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
-        # ("git pull origin main", mech_dir),
-        ("pip install -e .", os.path.join(olas_dir, "benchmark")),
-        ("pip install -e .", mech_dir),
         ("pip install lxml[html_clean]", base_dir),
         ("pip install --upgrade huggingface_hub", base_dir),
     ]
@@ -74,7 +73,7 @@ def start():
         run_command(command, cwd=cwd)
     # add benchmark to the path
-    sys.path.append(os.path.join(olas_dir, "benchmark"))
     # Download the dataset
     download_dataset()

     """Start commands."""
     print("Starting commands...")
     base_dir = os.getcwd()
+    # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
+    # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
     commands = [
+        # ("git submodule init", base_dir),
         # no updates
         # ("git submodule update --init --recursive", base_dir),
         # ("git submodule update --remote --recursive", base_dir),
+        # (
+        #     'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
+        #     olas_dir,
+        # ),
+        # ("git remote update", olas_dir),
+        # ("git fetch --all", olas_dir),
+        # ("git checkout main", olas_dir),
+        # ("git pull origin main", olas_dir),
+        # ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
+        # # ("git pull origin main", mech_dir),
+        # ("pip install -e .", os.path.join(olas_dir, "benchmark")),
+        # ("pip install -e .", mech_dir),
         ("pip install lxml[html_clean]", base_dir),
         ("pip install --upgrade huggingface_hub", base_dir),
     ]
         run_command(command, cwd=cwd)
     # add benchmark to the path
+    # sys.path.append(os.path.join(olas_dir, "benchmark"))
     # Download the dataset
     download_dataset()