from datasets import load_dataset raw_datasets = load_dataset("allocine") raw_datasets.save_to_disk("my-arrow-datasets") from datasets import load_from_disk arrow_datasets_reloaded = load_from_disk("my-arrow-datasets") arrow_datasets_reloaded for split, dataset in raw_datasets.items(): dataset.to_csv(f"my-dataset-{split}.csv", index=None) data_files = { "train": "my-dataset-train.csv", "validation": "my-dataset-validation.csv", "test": "my-dataset-test.csv", } csv_datasets_reloaded = load_dataset("csv", data_files=data_files) csv_datasets_reloaded for split, dataset in raw_datasets.items(): dataset.to_json(f'my-dataset-{split}.jsonl") for split, dataset in raw_datasets.items(): dataset.to_parquet(f"my-dataset-{split}.parquet")