polinaeterna HF staff commited on
Commit
4fe2860
1 Parent(s): fd7a758
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -90,7 +90,7 @@ def get_first_parquet_filename(dataset, config, split):
90
 
91
 
92
  @spaces.GPU
93
- def run_quality_check(dataset, config, split, column, batch_size, num_examples):
94
  logging.info(f"Fetching data for {dataset=} {config=} {split=} {column=}")
95
  try:
96
  filename = get_first_parquet_filename(dataset, config, split)
@@ -250,15 +250,15 @@ with gr.Blocks() as demo:
250
  return gr.HTML(value=html_code)
251
 
252
  with gr.Row():
253
- text_column_dropdown = gr.Dropdown(label="Text column name", info="Text colum name to check. ")
254
- nested_text_column_dropdown = gr.Dropdown(label="Nested text key")#, visible=False)
255
 
256
  def _resolve_dataset_selection(dataset: str, default_subset: str, default_split: str, text_feature):
257
  if "/" not in dataset.strip().strip("/"):
258
  return {
259
  subset_dropdown: gr.Dropdown(visible=False),
260
  split_dropdown: gr.Dropdown(visible=False),
261
- text_column_dropdown: gr.Dropdown(info="Text colum name to check"),
262
  nested_text_column_dropdown: gr.Dropdown(visible=False)
263
  }
264
  info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
@@ -266,7 +266,7 @@ with gr.Blocks() as demo:
266
  return {
267
  subset_dropdown: gr.Dropdown(visible=False),
268
  split_dropdown: gr.Dropdown(visible=False),
269
- text_column_dropdown: gr.Dropdown(label="Text column name", info="Text colum name to check"),
270
  nested_text_column_dropdown: gr.Dropdown(visible=False)
271
  }
272
  subsets: list[str] = list(info_resp["dataset_info"])
@@ -285,8 +285,7 @@ with gr.Blocks() as demo:
285
  return {
286
  subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
287
  split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
288
- text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name",
289
- info="Text colum name to check"),
290
  nested_text_column_dropdown: gr.Dropdown(visible=False),
291
  }
292
  logging.info(nested_text_features)
@@ -296,8 +295,7 @@ with gr.Blocks() as demo:
296
  subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
297
  split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
298
  text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features,
299
- label="Text column name",
300
- info="Text colum name to check (only non-nested texts are supported)"),
301
  nested_text_column_dropdown: gr.Dropdown(value=nested_keys[0], choices=nested_keys,
302
  label="Nested text column name", visible=True)
303
  }
 
90
 
91
 
92
  @spaces.GPU
93
+ def run_quality_check(dataset, config, split, column, nested_column, batch_size, num_examples):
94
  logging.info(f"Fetching data for {dataset=} {config=} {split=} {column=}")
95
  try:
96
  filename = get_first_parquet_filename(dataset, config, split)
 
250
  return gr.HTML(value=html_code)
251
 
252
  with gr.Row():
253
+ text_column_dropdown = gr.Dropdown(label="Text column name")
254
+ nested_text_column_dropdown = gr.Dropdown(visible=False)
255
 
256
  def _resolve_dataset_selection(dataset: str, default_subset: str, default_split: str, text_feature):
257
  if "/" not in dataset.strip().strip("/"):
258
  return {
259
  subset_dropdown: gr.Dropdown(visible=False),
260
  split_dropdown: gr.Dropdown(visible=False),
261
+ text_column_dropdown: gr.Dropdown(label="Text column name"),
262
  nested_text_column_dropdown: gr.Dropdown(visible=False)
263
  }
264
  info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
 
266
  return {
267
  subset_dropdown: gr.Dropdown(visible=False),
268
  split_dropdown: gr.Dropdown(visible=False),
269
+ text_column_dropdown: gr.Dropdown(label="Text column name"),
270
  nested_text_column_dropdown: gr.Dropdown(visible=False)
271
  }
272
  subsets: list[str] = list(info_resp["dataset_info"])
 
285
  return {
286
  subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
287
  split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
288
+ text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name",),
 
289
  nested_text_column_dropdown: gr.Dropdown(visible=False),
290
  }
291
  logging.info(nested_text_features)
 
295
  subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
296
  split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
297
  text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features,
298
+ label="Text column name"),
 
299
  nested_text_column_dropdown: gr.Dropdown(value=nested_keys[0], choices=nested_keys,
300
  label="Nested text column name", visible=True)
301
  }