Spaces:
Running
on
Zero
Running
on
Zero
Commit
•
7092199
1
Parent(s):
fc6b70e
add examples
Browse files
app.py
CHANGED
@@ -250,8 +250,8 @@ with gr.Blocks() as demo:
|
|
250 |
placeholder="Search for dataset id on Huggingface",
|
251 |
search_type="dataset",
|
252 |
)
|
253 |
-
subset_dropdown = gr.Dropdown(
|
254 |
-
split_dropdown = gr.Dropdown(
|
255 |
|
256 |
# config_name = "default" # TODO: user input
|
257 |
with gr.Accordion("Dataset preview", open=False):
|
@@ -308,8 +308,8 @@ with gr.Blocks() as demo:
|
|
308 |
return _resolve_dataset_selection(dataset, default_subset=subset, default_split=split)
|
309 |
|
310 |
gr.Markdown("## Run nvidia quality classifier")
|
311 |
-
batch_size = gr.Slider(0, 64, 32, step=4, label="Inference batch size (set this to smaller value if this space crashes.)")
|
312 |
-
num_examples = gr.Slider(0, 1000, 500, step=10, label="Number of random examples to
|
313 |
gr_check_btn = gr.Button("Check Dataset")
|
314 |
progress_bar = gr.Label(show_label=False)
|
315 |
plot = gr.BarPlot()
|
@@ -324,6 +324,21 @@ with gr.Blocks() as demo:
|
|
324 |
df_high = gr.DataFrame()
|
325 |
|
326 |
texts_df = gr.DataFrame(visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
gr_check_btn.click(
|
328 |
run_quality_check,
|
329 |
inputs=[dataset_name, subset_dropdown, split_dropdown, text_column_dropdown, batch_size, num_examples],
|
@@ -351,4 +366,5 @@ with gr.Blocks() as demo:
|
|
351 |
outputs=[toxicity_progress_bar, toxicity_hist, toxicity_df]
|
352 |
)
|
353 |
|
|
|
354 |
demo.launch()
|
|
|
250 |
placeholder="Search for dataset id on Huggingface",
|
251 |
search_type="dataset",
|
252 |
)
|
253 |
+
subset_dropdown = gr.Dropdown(label="Subset", visible=False)
|
254 |
+
split_dropdown = gr.Dropdown(label="Split", visible=False)
|
255 |
|
256 |
# config_name = "default" # TODO: user input
|
257 |
with gr.Accordion("Dataset preview", open=False):
|
|
|
308 |
return _resolve_dataset_selection(dataset, default_subset=subset, default_split=split)
|
309 |
|
310 |
gr.Markdown("## Run nvidia quality classifier")
|
311 |
+
batch_size = gr.Slider(0, 64, 32, step=4, label="Inference batch size", info="(set this to smaller value if this space crashes.)")
|
312 |
+
num_examples = gr.Slider(0, 1000, 500, step=10, label="Number of examples", info="Number of random examples to run quality classifier on")
|
313 |
gr_check_btn = gr.Button("Check Dataset")
|
314 |
progress_bar = gr.Label(show_label=False)
|
315 |
plot = gr.BarPlot()
|
|
|
324 |
df_high = gr.DataFrame()
|
325 |
|
326 |
texts_df = gr.DataFrame(visible=False)
|
327 |
+
|
328 |
+
gr.Examples(
|
329 |
+
[
|
330 |
+
["HuggingFaceFW/fineweb-edu", "default", "train", "text", 16, 100],
|
331 |
+
["fka/awesome-chatgpt-prompts", "default", "train", "prompt", 64, 200],
|
332 |
+
# ["Anthropic/hh-rlhf"],
|
333 |
+
# ["OpenAssistant/oasst1"],
|
334 |
+
],
|
335 |
+
[dataset_name, subset_dropdown, split_dropdown, text_column_dropdown, batch_size, num_examples],
|
336 |
+
[progress_bar, plot, df_low, df_medium, df_high, texts_df],
|
337 |
+
fn=run_quality_check,
|
338 |
+
run_on_click=False,
|
339 |
+
cache_examples="lazy",
|
340 |
+
)
|
341 |
+
|
342 |
gr_check_btn.click(
|
343 |
run_quality_check,
|
344 |
inputs=[dataset_name, subset_dropdown, split_dropdown, text_column_dropdown, batch_size, num_examples],
|
|
|
366 |
outputs=[toxicity_progress_bar, toxicity_hist, toxicity_df]
|
367 |
)
|
368 |
|
369 |
+
|
370 |
demo.launch()
|