Terry Zhuo commited on
Commit
b64230f
1 Parent(s): 3980eb8
Files changed (2) hide show
  1. README.md +1 -0
  2. src/execute.py +1 -65
README.md CHANGED
@@ -4,6 +4,7 @@ emoji: 🥇
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: docker
 
7
  pinned: false
8
  license: apache-2.0
9
  tags:
 
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: docker
7
+ disable_embedding: true
8
  pinned: false
9
  license: apache-2.0
10
  tags:
src/execute.py CHANGED
@@ -111,68 +111,4 @@ def stream_logs(command, jsonl_file=None):
111
  log_content = []
112
  for log_line in run_bigcodebench(command):
113
  log_content.append(log_line)
114
- yield "".join(log_content)
115
-
116
- with gr.Blocks() as demo:
117
- gr.Markdown("# BigCodeBench Evaluator")
118
-
119
- with gr.Row():
120
- jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
121
- split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
122
- subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
123
-
124
- with gr.Row():
125
- parallel = gr.Number(label="Parallel (optional)", precision=0)
126
- min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
127
- max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
128
-
129
- with gr.Row():
130
- max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
131
- max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
132
- check_gt_only = gr.Checkbox(label="Check GT Only")
133
- no_gt = gr.Checkbox(label="No GT")
134
-
135
- command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
136
- with gr.Row():
137
- submit_btn = gr.Button("Run Evaluation")
138
- download_btn = gr.DownloadButton(label="Download Result")
139
- log_output = gr.Textbox(label="Execution Logs", lines=20)
140
-
141
- input_components = [
142
- jsonl_file, split, subset, parallel,
143
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
144
- check_gt_only, no_gt
145
- ]
146
-
147
- for component in input_components:
148
- component.change(generate_command, inputs=input_components, outputs=command_output)
149
-
150
-
151
- def start_evaluation(command, jsonl_file, subset, split):
152
- extra = subset + "_" if subset != "full" else ""
153
- if jsonl_file is not None:
154
- result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
155
- else:
156
- result_path = None
157
-
158
- for log in stream_logs(command, jsonl_file):
159
- if jsonl_file is not None:
160
- yield log, gr.update(value=result_path, label=result_path), gr.update()
161
- else:
162
- yield log, gr.update(), gr.update()
163
- is_running = False
164
- result_file = find_result_file()
165
- if result_file:
166
- return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
167
- # gr.Button(visible=False)#,
168
- # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
169
- else:
170
- return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
171
- # gr.Button("Run Evaluation", visible=True),
172
- # gr.DownloadButton(visible=False))
173
- submit_btn.click(start_evaluation,
174
- inputs=[command_output, jsonl_file, subset, split],
175
- outputs=[log_output, download_btn])
176
-
177
- demo.queue(max_size=300).launch(share=True, server_name="0.0.0.0", server_port=7860)
178
- scheduler = BackgroundScheduler()
 
111
  log_content = []
112
  for log_line in run_bigcodebench(command):
113
  log_content.append(log_line)
114
+ yield "".join(log_content)