polinaeterna HF staff commited on
Commit
7092199
1 Parent(s): fc6b70e

add examples

Browse files
Files changed (1) hide show
  1. app.py +20 -4
app.py CHANGED
@@ -250,8 +250,8 @@ with gr.Blocks() as demo:
250
  placeholder="Search for dataset id on Huggingface",
251
  search_type="dataset",
252
  )
253
- subset_dropdown = gr.Dropdown(info="Subset", show_label=False, visible=False)
254
- split_dropdown = gr.Dropdown(info="Split", show_label=False, visible=False)
255
 
256
  # config_name = "default" # TODO: user input
257
  with gr.Accordion("Dataset preview", open=False):
@@ -308,8 +308,8 @@ with gr.Blocks() as demo:
308
  return _resolve_dataset_selection(dataset, default_subset=subset, default_split=split)
309
 
310
  gr.Markdown("## Run nvidia quality classifier")
311
- batch_size = gr.Slider(0, 64, 32, step=4, label="Inference batch size (set this to smaller value if this space crashes.)")
312
- num_examples = gr.Slider(0, 1000, 500, step=10, label="Number of random examples to check")
313
  gr_check_btn = gr.Button("Check Dataset")
314
  progress_bar = gr.Label(show_label=False)
315
  plot = gr.BarPlot()
@@ -324,6 +324,21 @@ with gr.Blocks() as demo:
324
  df_high = gr.DataFrame()
325
 
326
  texts_df = gr.DataFrame(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  gr_check_btn.click(
328
  run_quality_check,
329
  inputs=[dataset_name, subset_dropdown, split_dropdown, text_column_dropdown, batch_size, num_examples],
@@ -351,4 +366,5 @@ with gr.Blocks() as demo:
351
  outputs=[toxicity_progress_bar, toxicity_hist, toxicity_df]
352
  )
353
 
 
354
  demo.launch()
 
250
  placeholder="Search for dataset id on Huggingface",
251
  search_type="dataset",
252
  )
253
+ subset_dropdown = gr.Dropdown(label="Subset", visible=False)
254
+ split_dropdown = gr.Dropdown(label="Split", visible=False)
255
 
256
  # config_name = "default" # TODO: user input
257
  with gr.Accordion("Dataset preview", open=False):
 
308
  return _resolve_dataset_selection(dataset, default_subset=subset, default_split=split)
309
 
310
  gr.Markdown("## Run nvidia quality classifier")
311
+ batch_size = gr.Slider(0, 64, 32, step=4, label="Inference batch size", info="(set this to smaller value if this space crashes.)")
312
+ num_examples = gr.Slider(0, 1000, 500, step=10, label="Number of examples", info="Number of random examples to run quality classifier on")
313
  gr_check_btn = gr.Button("Check Dataset")
314
  progress_bar = gr.Label(show_label=False)
315
  plot = gr.BarPlot()
 
324
  df_high = gr.DataFrame()
325
 
326
  texts_df = gr.DataFrame(visible=False)
327
+
328
+ gr.Examples(
329
+ [
330
+ ["HuggingFaceFW/fineweb-edu", "default", "train", "text", 16, 100],
331
+ ["fka/awesome-chatgpt-prompts", "default", "train", "prompt", 64, 200],
332
+ # ["Anthropic/hh-rlhf"],
333
+ # ["OpenAssistant/oasst1"],
334
+ ],
335
+ [dataset_name, subset_dropdown, split_dropdown, text_column_dropdown, batch_size, num_examples],
336
+ [progress_bar, plot, df_low, df_medium, df_high, texts_df],
337
+ fn=run_quality_check,
338
+ run_on_click=False,
339
+ cache_examples="lazy",
340
+ )
341
+
342
  gr_check_btn.click(
343
  run_quality_check,
344
  inputs=[dataset_name, subset_dropdown, split_dropdown, text_column_dropdown, batch_size, num_examples],
 
366
  outputs=[toxicity_progress_bar, toxicity_hist, toxicity_df]
367
  )
368
 
369
+
370
  demo.launch()