Spaces:
Running
on
Zero
Running
on
Zero
Commit
•
4fe2860
1
Parent(s):
fd7a758
fix
Browse files
app.py
CHANGED
@@ -90,7 +90,7 @@ def get_first_parquet_filename(dataset, config, split):
|
|
90 |
|
91 |
|
92 |
@spaces.GPU
|
93 |
-
def run_quality_check(dataset, config, split, column, batch_size, num_examples):
|
94 |
logging.info(f"Fetching data for {dataset=} {config=} {split=} {column=}")
|
95 |
try:
|
96 |
filename = get_first_parquet_filename(dataset, config, split)
|
@@ -250,15 +250,15 @@ with gr.Blocks() as demo:
|
|
250 |
return gr.HTML(value=html_code)
|
251 |
|
252 |
with gr.Row():
|
253 |
-
text_column_dropdown = gr.Dropdown(label="Text column name"
|
254 |
-
nested_text_column_dropdown = gr.Dropdown(
|
255 |
|
256 |
def _resolve_dataset_selection(dataset: str, default_subset: str, default_split: str, text_feature):
|
257 |
if "/" not in dataset.strip().strip("/"):
|
258 |
return {
|
259 |
subset_dropdown: gr.Dropdown(visible=False),
|
260 |
split_dropdown: gr.Dropdown(visible=False),
|
261 |
-
text_column_dropdown: gr.Dropdown(
|
262 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
263 |
}
|
264 |
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
|
@@ -266,7 +266,7 @@ with gr.Blocks() as demo:
|
|
266 |
return {
|
267 |
subset_dropdown: gr.Dropdown(visible=False),
|
268 |
split_dropdown: gr.Dropdown(visible=False),
|
269 |
-
text_column_dropdown: gr.Dropdown(label="Text column name"
|
270 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
271 |
}
|
272 |
subsets: list[str] = list(info_resp["dataset_info"])
|
@@ -285,8 +285,7 @@ with gr.Blocks() as demo:
|
|
285 |
return {
|
286 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
287 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
288 |
-
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name",
|
289 |
-
info="Text colum name to check"),
|
290 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
291 |
}
|
292 |
logging.info(nested_text_features)
|
@@ -296,8 +295,7 @@ with gr.Blocks() as demo:
|
|
296 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
297 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
298 |
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features,
|
299 |
-
label="Text column name",
|
300 |
-
info="Text colum name to check (only non-nested texts are supported)"),
|
301 |
nested_text_column_dropdown: gr.Dropdown(value=nested_keys[0], choices=nested_keys,
|
302 |
label="Nested text column name", visible=True)
|
303 |
}
|
|
|
90 |
|
91 |
|
92 |
@spaces.GPU
|
93 |
+
def run_quality_check(dataset, config, split, column, nested_column, batch_size, num_examples):
|
94 |
logging.info(f"Fetching data for {dataset=} {config=} {split=} {column=}")
|
95 |
try:
|
96 |
filename = get_first_parquet_filename(dataset, config, split)
|
|
|
250 |
return gr.HTML(value=html_code)
|
251 |
|
252 |
with gr.Row():
|
253 |
+
text_column_dropdown = gr.Dropdown(label="Text column name")
|
254 |
+
nested_text_column_dropdown = gr.Dropdown(visible=False)
|
255 |
|
256 |
def _resolve_dataset_selection(dataset: str, default_subset: str, default_split: str, text_feature):
|
257 |
if "/" not in dataset.strip().strip("/"):
|
258 |
return {
|
259 |
subset_dropdown: gr.Dropdown(visible=False),
|
260 |
split_dropdown: gr.Dropdown(visible=False),
|
261 |
+
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
262 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
263 |
}
|
264 |
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
|
|
|
266 |
return {
|
267 |
subset_dropdown: gr.Dropdown(visible=False),
|
268 |
split_dropdown: gr.Dropdown(visible=False),
|
269 |
+
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
270 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
271 |
}
|
272 |
subsets: list[str] = list(info_resp["dataset_info"])
|
|
|
285 |
return {
|
286 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
287 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
288 |
+
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name",),
|
|
|
289 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
290 |
}
|
291 |
logging.info(nested_text_features)
|
|
|
295 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
296 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
297 |
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features,
|
298 |
+
label="Text column name"),
|
|
|
299 |
nested_text_column_dropdown: gr.Dropdown(value=nested_keys[0], choices=nested_keys,
|
300 |
label="Nested text column name", visible=True)
|
301 |
}
|