Spaces:
Running
on
Zero
Running
on
Zero
Commit
β’
57aaee5
1
Parent(s):
4e6b23a
fix parquet filename
Browse files
app.py
CHANGED
@@ -90,9 +90,13 @@ def run_quality_check(dataset, column, batch_size, num_examples):
|
|
90 |
except pl.exceptions.ComputeError:
|
91 |
try:
|
92 |
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/partial-{split}/0000.parquet", columns=[column])
|
93 |
-
except
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
texts = [text[:10000] for text in data[column].to_list()]
|
97 |
# texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
|
98 |
# batch_size = 100
|
|
|
90 |
except pl.exceptions.ComputeError:
|
91 |
try:
|
92 |
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/partial-{split}/0000.parquet", columns=[column])
|
93 |
+
except pl.exceptions.ComputeError:
|
94 |
+
try:
|
95 |
+
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/{split}-part0/0000.parquet", columns=[column])
|
96 |
+
except Exception as error:
|
97 |
+
yield f"β {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
|
98 |
+
return
|
99 |
+
|
100 |
texts = [text[:10000] for text in data[column].to_list()]
|
101 |
# texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
|
102 |
# batch_size = 100
|