Terry Zhuo commited on
Commit
a2d883f
1 Parent(s): af3bf93
Files changed (1) hide show
  1. app.py +33 -29
app.py CHANGED
@@ -14,7 +14,7 @@ default_command = "bigcodebench.evaluate"
14
  is_running = False
15
 
16
  def generate_command(
17
- jsonl_file, split, subset, save_pass_rate, parallel,
18
  min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
19
  check_gt_only, no_gt
20
  ):
@@ -28,9 +28,6 @@ def generate_command(
28
 
29
  command.extend(["--split", split, "--subset", subset])
30
 
31
- if save_pass_rate:
32
- command.append("--save_pass_rate")
33
-
34
  if parallel is not None and parallel != 0:
35
  command.extend(["--parallel", str(int(parallel))])
36
 
@@ -67,27 +64,35 @@ def find_result_file():
67
 
68
  def run_bigcodebench(command):
69
  global is_running
70
- is_running = True
71
- yield f"Executing command: {command}\n"
72
-
73
- process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
74
-
75
- for line in process.stdout:
76
- yield line
77
-
78
- process.wait()
79
-
80
- if process.returncode != 0:
81
- yield f"Error: Command exited with status {process.returncode}\n"
82
 
83
- is_running = False
84
- yield "Evaluation completed.\n"
85
-
86
- result_file = find_result_file()
87
- if result_file:
88
- yield f"Result file found: {result_file}\n"
89
- else:
90
- yield "No result file found.\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def stream_logs(command, jsonl_file=None):
93
  global is_running
@@ -103,17 +108,16 @@ def stream_logs(command, jsonl_file=None):
103
  for log_line in run_bigcodebench(command):
104
  log_content.append(log_line)
105
  yield "".join(log_content)
106
-
107
  with gr.Blocks() as demo:
108
  gr.Markdown("# BigCodeBench Evaluator")
109
 
110
  with gr.Row():
111
  jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
112
  split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
113
- subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="hard")
114
 
115
  with gr.Row():
116
- save_pass_rate = gr.Checkbox(label="Save Pass Rate")
117
  parallel = gr.Number(label="Parallel (optional)", precision=0)
118
  min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
119
  max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
@@ -131,7 +135,7 @@ with gr.Blocks() as demo:
131
  log_output = gr.Textbox(label="Execution Logs", lines=20)
132
 
133
  input_components = [
134
- jsonl_file, split, subset, save_pass_rate, parallel,
135
  min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
136
  check_gt_only, no_gt
137
  ]
@@ -148,7 +152,7 @@ with gr.Blocks() as demo:
148
 
149
  for log in stream_logs(command, jsonl_file):
150
  yield log, gr.update(value=result_path, label=result_path), gr.update()
151
-
152
  result_file = find_result_file()
153
  if result_file:
154
  return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
 
14
  is_running = False
15
 
16
  def generate_command(
17
+ jsonl_file, split, subset, parallel,
18
  min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
19
  check_gt_only, no_gt
20
  ):
 
28
 
29
  command.extend(["--split", split, "--subset", subset])
30
 
 
 
 
31
  if parallel is not None and parallel != 0:
32
  command.extend(["--parallel", str(int(parallel))])
33
 
 
64
 
65
  def run_bigcodebench(command):
66
  global is_running
67
+ with lock:
68
+ if is_running:
69
+ yield "A command is already running. Please wait for it to finish.\n"
70
+ return
71
+ is_running = True
 
 
 
 
 
 
 
72
 
73
+ try:
74
+ yield f"Executing command: {command}\n"
75
+
76
+ process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
77
+
78
+ for line in process.stdout:
79
+ yield line
80
+
81
+ process.wait()
82
+
83
+ if process.returncode != 0:
84
+ yield f"Error: Command exited with status {process.returncode}\n"
85
+
86
+ yield "Evaluation completed.\n"
87
+
88
+ result_file = find_result_file()
89
+ if result_file:
90
+ yield f"Result file found: {result_file}\n"
91
+ else:
92
+ yield "No result file found.\n"
93
+ finally:
94
+ with lock:
95
+ is_running = False
96
 
97
  def stream_logs(command, jsonl_file=None):
98
  global is_running
 
108
  for log_line in run_bigcodebench(command):
109
  log_content.append(log_line)
110
  yield "".join(log_content)
111
+
112
  with gr.Blocks() as demo:
113
  gr.Markdown("# BigCodeBench Evaluator")
114
 
115
  with gr.Row():
116
  jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
117
  split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
118
+ subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
119
 
120
  with gr.Row():
 
121
  parallel = gr.Number(label="Parallel (optional)", precision=0)
122
  min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
123
  max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
 
135
  log_output = gr.Textbox(label="Execution Logs", lines=20)
136
 
137
  input_components = [
138
+ jsonl_file, split, subset, parallel,
139
  min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
140
  check_gt_only, no_gt
141
  ]
 
152
 
153
  for log in stream_logs(command, jsonl_file):
154
  yield log, gr.update(value=result_path, label=result_path), gr.update()
155
+ is_running = False
156
  result_file = find_result_file()
157
  if result_file:
158
  return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)