terryyz commited on
Commit
865ee9c
·
verified ·
1 Parent(s): 242862b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -47
app.py CHANGED
@@ -86,54 +86,57 @@ def read_logs():
86
  with open(log_file, "r") as f:
87
  return f.read()
88
 
89
- with gr.Blocks() as demo:
90
- gr.Markdown("# BigCodeBench Evaluation App")
91
-
92
- with gr.Row():
93
- jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
94
- split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
95
- subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="full")
96
-
97
- with gr.Row():
98
- save_pass_rate = gr.Checkbox(label="Save Pass Rate")
99
- parallel = gr.Number(label="Parallel (optional)", precision=0)
100
- min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
101
- max_as_limit = gr.Number(label="Max AS Limit", value=128*1024, precision=0)
102
-
103
- with gr.Row():
104
- max_data_limit = gr.Number(label="Max Data Limit", value=4*1024, precision=0)
105
- max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
106
- check_gt_only = gr.Checkbox(label="Check GT Only")
107
- no_gt = gr.Checkbox(label="No GT")
108
-
109
- command_output = gr.Textbox(label="Command", lines=2, value=default_command, interactive=False)
110
- submit_btn = gr.Button("Run Evaluation")
111
- log_output = gr.Textbox(label="Execution Logs", lines=10)
112
-
113
- def update_command(*args):
114
- return generate_command(*args)
115
-
116
- input_components = [
117
- jsonl_file, split, subset, save_pass_rate, parallel,
118
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
119
- check_gt_only, no_gt
120
- ]
121
-
122
- for component in input_components:
123
- component.change(update_command, inputs=input_components, outputs=command_output)
124
-
125
- def on_submit(command):
126
- global is_running
127
- if is_running:
128
- return "A command is already running. Please wait for it to finish."
129
 
130
- def run_and_update():
131
- run_bigcodebench(command)
132
- return read_logs()
 
133
 
134
- return gr.update(value="Evaluation started. Please wait for the logs to update..."), gr.update(value=run_and_update)
135
-
136
- submit_btn.click(on_submit, inputs=[command_output], outputs=[log_output, log_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  if __name__ == "__main__":
139
- demo.queue().launch()
 
86
  with open(log_file, "r") as f:
87
  return f.read()
88
 
89
+ def run():
90
+ with gr.Blocks() as demo:
91
+ gr.Markdown("# BigCodeBench Evaluation App")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ with gr.Row():
94
+ jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
95
+ split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
96
+ subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="full")
97
 
98
+ with gr.Row():
99
+ save_pass_rate = gr.Checkbox(label="Save Pass Rate")
100
+ parallel = gr.Number(label="Parallel (optional)", precision=0)
101
+ min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
102
+ max_as_limit = gr.Number(label="Max AS Limit", value=128*1024, precision=0)
103
+
104
+ with gr.Row():
105
+ max_data_limit = gr.Number(label="Max Data Limit", value=4*1024, precision=0)
106
+ max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
107
+ check_gt_only = gr.Checkbox(label="Check GT Only")
108
+ no_gt = gr.Checkbox(label="No GT")
109
+
110
+ command_output = gr.Textbox(label="Command", lines=2, value=default_command, interactive=False)
111
+ submit_btn = gr.Button("Run Evaluation")
112
+ log_output = gr.Textbox(label="Execution Logs", lines=10)
113
+
114
+ def update_command(*args):
115
+ return generate_command(*args)
116
+
117
+ input_components = [
118
+ jsonl_file, split, subset, save_pass_rate, parallel,
119
+ min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
120
+ check_gt_only, no_gt
121
+ ]
122
+
123
+ for component in input_components:
124
+ component.change(update_command, inputs=input_components, outputs=command_output)
125
+
126
+ def on_submit(command):
127
+ global is_running
128
+ if is_running:
129
+ return "A command is already running. Please wait for it to finish."
130
+
131
+ def run_and_update():
132
+ run_bigcodebench(command)
133
+ return read_logs()
134
+
135
+ return gr.update(value="Evaluation started. Please wait for the logs to update..."), gr.update(value=run_and_update)
136
+
137
+ submit_btn.click(on_submit, inputs=[command_output], outputs=[log_output, log_output])
138
+
139
+ demo.launch(server_name="0.0.0.0", server_port=7860)
140
 
141
  if __name__ == "__main__":
142
+ run()