joaogante HF staff commited on
Commit
0d5284e
·
1 Parent(s): 9ad031d

c4 - specify data path

Browse files
Files changed (1) hide show
  1. src/calibration_datasets.py +9 -0
src/calibration_datasets.py CHANGED
@@ -233,6 +233,15 @@ class C4Dataset(CalibrationDataset):
233
  dataset_field = "text"
234
  dataset_config = {
235
  "path": "allenai/c4",
 
 
 
 
 
 
 
 
 
236
  "split": "train"
237
  }
238
  dataset_name = "C4"
 
233
  dataset_field = "text"
234
  dataset_config = {
235
  "path": "allenai/c4",
236
+ "data_files": {
237
+ "train": [
238
+ "en/c4-train.00000-of-01024.json.gz",
239
+ "en/c4-train.00001-of-01024.json.gz",
240
+ "en/c4-train.00002-of-01024.json.gz",
241
+ "en/c4-train.00003-of-01024.json.gz",
242
+ "en/c4-train.00004-of-01024.json.gz",
243
+ ],
244
+ },
245
  "split": "train"
246
  }
247
  dataset_name = "C4"