File size: 765 Bytes
f44b91f
 
38481ea
 
 
 
 
 
6419e69
 
 
 
cfb4b98
05df1af
 
 
 
 
 
 
 
 
 
348fb48
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from datasets import load_dataset
raw_datasets = load_dataset("allocine")
raw_datasets.save_to_disk("my-arrow-datasets")



from datasets import load_from_disk
arrow_datasets_reloaded = load_from_disk("my-arrow-datasets")
arrow_datasets_reloaded



for split, dataset in raw_datasets.items():
  dataset.to_csv(f"my-dataset-{split}.csv", index=None)
  
data_files = { 
  "train": "my-dataset-train.csv",
  "validation": "my-dataset-validation.csv",
  "test": "my-dataset-test.csv",
}

csv_datasets_reloaded = load_dataset("csv", data_files=data_files)
csv_datasets_reloaded


for split, dataset in raw_datasets.items():
  dataset.to_json(f'my-dataset-{split}.jsonl")
  
for split, dataset in raw_datasets.items():
  dataset.to_parquet(f"my-dataset-{split}.parquet")