Spaces:
Paused
Paused
c4 - specify data path
Browse files
src/calibration_datasets.py
CHANGED
@@ -233,6 +233,15 @@ class C4Dataset(CalibrationDataset):
|
|
233 |
dataset_field = "text"
|
234 |
dataset_config = {
|
235 |
"path": "allenai/c4",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
"split": "train"
|
237 |
}
|
238 |
dataset_name = "C4"
|
|
|
233 |
dataset_field = "text"
|
234 |
dataset_config = {
|
235 |
"path": "allenai/c4",
|
236 |
+
"data_files": {
|
237 |
+
"train": [
|
238 |
+
"en/c4-train.00000-of-01024.json.gz",
|
239 |
+
"en/c4-train.00001-of-01024.json.gz",
|
240 |
+
"en/c4-train.00002-of-01024.json.gz",
|
241 |
+
"en/c4-train.00003-of-01024.json.gz",
|
242 |
+
"en/c4-train.00004-of-01024.json.gz",
|
243 |
+
],
|
244 |
+
},
|
245 |
"split": "train"
|
246 |
}
|
247 |
dataset_name = "C4"
|