polinaeterna HF staff commited on
Commit
57aaee5
β€’
1 Parent(s): 4e6b23a

fix parquet filename

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -90,9 +90,13 @@ def run_quality_check(dataset, column, batch_size, num_examples):
90
  except pl.exceptions.ComputeError:
91
  try:
92
  data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/partial-{split}/0000.parquet", columns=[column])
93
- except Exception as error:
94
- yield f"❌ {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
95
- return
 
 
 
 
96
  texts = [text[:10000] for text in data[column].to_list()]
97
  # texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
98
  # batch_size = 100
 
90
  except pl.exceptions.ComputeError:
91
  try:
92
  data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/partial-{split}/0000.parquet", columns=[column])
93
+ except pl.exceptions.ComputeError:
94
+ try:
95
+ data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/{split}-part0/0000.parquet", columns=[column])
96
+ except Exception as error:
97
+ yield f"❌ {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
98
+ return
99
+
100
  texts = [text[:10000] for text in data[column].to_list()]
101
  # texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
102
  # batch_size = 100