bigcodebench-evaluator-1

Running

App Files Files Community

Terry Zhuo commited on Aug 8

Commit

a2d883f

•

1 Parent(s): af3bf93

update

Browse files

Files changed (1) hide show

app.py +33 -29

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ default_command = "bigcodebench.evaluate"
 is_running = False
 def generate_command(
-    jsonl_file, split, subset, save_pass_rate, parallel,
     min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
     check_gt_only, no_gt
 ):
@@ -28,9 +28,6 @@ def generate_command(
     command.extend(["--split", split, "--subset", subset])
-    if save_pass_rate:
-        command.append("--save_pass_rate")
     if parallel is not None and parallel != 0:
         command.extend(["--parallel", str(int(parallel))])
@@ -67,27 +64,35 @@ def find_result_file():
 def run_bigcodebench(command):
     global is_running
-    is_running = True
-    yield f"Executing command: {command}\n"
-    process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
-    for line in process.stdout:
-        yield line
-    process.wait()
-    if process.returncode != 0:
-        yield f"Error: Command exited with status {process.returncode}\n"
-    is_running = False
-    yield "Evaluation completed.\n"
-    result_file = find_result_file()
-    if result_file:
-        yield f"Result file found: {result_file}\n"
-    else:
-        yield "No result file found.\n"
 def stream_logs(command, jsonl_file=None):
     global is_running
@@ -103,17 +108,16 @@ def stream_logs(command, jsonl_file=None):
     for log_line in run_bigcodebench(command):
         log_content.append(log_line)
         yield "".join(log_content)
 with gr.Blocks() as demo:
     gr.Markdown("# BigCodeBench Evaluator")
     with gr.Row():
         jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
         split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
-        subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="hard")
     with gr.Row():
-        save_pass_rate = gr.Checkbox(label="Save Pass Rate")
         parallel = gr.Number(label="Parallel (optional)", precision=0)
         min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
         max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
@@ -131,7 +135,7 @@ with gr.Blocks() as demo:
     log_output = gr.Textbox(label="Execution Logs", lines=20)
     input_components = [
-        jsonl_file, split, subset, save_pass_rate, parallel,
         min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
         check_gt_only, no_gt
     ]
@@ -148,7 +152,7 @@ with gr.Blocks() as demo:
         for log in stream_logs(command, jsonl_file):
             yield log, gr.update(value=result_path, label=result_path), gr.update()
         result_file = find_result_file()
         if result_file:
             return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)

 is_running = False
 def generate_command(
+    jsonl_file, split, subset, parallel,
     min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
     check_gt_only, no_gt
 ):
     command.extend(["--split", split, "--subset", subset])
     if parallel is not None and parallel != 0:
         command.extend(["--parallel", str(int(parallel))])
 def run_bigcodebench(command):
     global is_running
+    with lock:
+        if is_running:
+            yield "A command is already running. Please wait for it to finish.\n"
+            return
+        is_running = True
+    try:
+        yield f"Executing command: {command}\n"
+        process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
+        for line in process.stdout:
+            yield line
+        process.wait()
+        if process.returncode != 0:
+            yield f"Error: Command exited with status {process.returncode}\n"
+        yield "Evaluation completed.\n"
+        result_file = find_result_file()
+        if result_file:
+            yield f"Result file found: {result_file}\n"
+        else:
+            yield "No result file found.\n"
+    finally:
+        with lock:
+            is_running = False
 def stream_logs(command, jsonl_file=None):
     global is_running
     for log_line in run_bigcodebench(command):
         log_content.append(log_line)
         yield "".join(log_content)
 with gr.Blocks() as demo:
     gr.Markdown("# BigCodeBench Evaluator")
     with gr.Row():
         jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
         split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
+        subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
     with gr.Row():
         parallel = gr.Number(label="Parallel (optional)", precision=0)
         min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
         max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
     log_output = gr.Textbox(label="Execution Logs", lines=20)
     input_components = [
+        jsonl_file, split, subset, parallel,
         min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
         check_gt_only, no_gt
     ]
         for log in stream_logs(command, jsonl_file):
             yield log, gr.update(value=result_path, label=result_path), gr.update()
+        is_running = False
         result_file = find_result_file()
         if result_file:
             return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)