|
{ |
|
"builder_name": "common_voice_11_0", |
|
"citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", |
|
"config_name": "ta", |
|
"dataset_size": 6138537473, |
|
"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 16413 validated hours of speech in 100 languages, but more voices and languages are always added.", |
|
"download_checksums": { |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/n_shards.json": { |
|
"num_bytes": 12179, |
|
"checksum": "584aa91a99f678abe7ad1e181e5cdc6af970d20c82bcb63f54e86c4fd2e5a2a8" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/train/ta_train_0.tar": { |
|
"num_bytes": 1604730880, |
|
"checksum": "ad4bbc42512f1bc9efd98c188e44750ac83b71e4829f4a52d6d9cf7259c0e6cb" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/train/ta_train_1.tar": { |
|
"num_bytes": 67246080, |
|
"checksum": "230bd2febfe9bc7012d73e52e517240c6a33ae11e3c64fb99086328e2b650fa5" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/dev/ta_dev_0.tar": { |
|
"num_bytes": 423659520, |
|
"checksum": "80b9977ebcd1b484880dafac7e28c184440a7c2d4558d501240e069d9b4dd628" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/test/ta_test_0.tar": { |
|
"num_bytes": 443709440, |
|
"checksum": "188a061b89e27517f06f713e5be835f7634242afe5f9cdc828d91fbf0bb6ffd7" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/other/ta_other_0.tar": { |
|
"num_bytes": 1563248640, |
|
"checksum": "91ca2c227f2d59a9a1817b38adfca9fe14e62484d7b5ffe44ae23811442a2b97" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/other/ta_other_1.tar": { |
|
"num_bytes": 1506631680, |
|
"checksum": "83809ecfcf1826812817e8d826d240823569aecb447a39a2a38dcb47090fa3fd" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/other/ta_other_2.tar": { |
|
"num_bytes": 319129600, |
|
"checksum": "6df52e8d650db95f7537b7acfecafc89433a586036c3dda21939a4be87421027" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/invalidated/ta_invalidated_0.tar": { |
|
"num_bytes": 227020800, |
|
"checksum": "33fe28d8a4e50f48bf0f7a0007179b479d11525467aa0743583e64b246799d1d" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/train.tsv": { |
|
"num_bytes": 15058319, |
|
"checksum": "f8214e19cb767e9911de46ab64672a39a77814b8c3d9330a71d8f9ff07c12d33" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/dev.tsv": { |
|
"num_bytes": 4205514, |
|
"checksum": "a3136b8f038de5e90ef79fd814ebb1f29282137d1e850b2db2a354d6cc76c345" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/test.tsv": { |
|
"num_bytes": 3936521, |
|
"checksum": "f3346901e118f481b32880dcc0dcf52f066fcb70c1355ad49d6bc532c73a6480" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/other.tsv": { |
|
"num_bytes": 30745385, |
|
"checksum": "18f1ac9e239e40d839a70c962d42e8d75a610f535c476453e6330a4cc5ec1a47" |
|
}, |
|
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/invalidated.tsv": { |
|
"num_bytes": 2044387, |
|
"checksum": "9b8b0c7a216423f9070824f17fa5722779357d56e0c908db0b3dfc59968514b1" |
|
} |
|
}, |
|
"download_size": 6211378945, |
|
"features": { |
|
"input_features": { |
|
"feature": { |
|
"feature": { |
|
"dtype": "float32", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"input_length": { |
|
"dtype": "float64", |
|
"_type": "Value" |
|
}, |
|
"labels": { |
|
"feature": { |
|
"dtype": "int64", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
} |
|
}, |
|
"homepage": "https://commonvoice.mozilla.org/en/datasets", |
|
"license": "https://creativecommons.org/publicdomain/zero/1.0/", |
|
"size_in_bytes": 12349916418, |
|
"splits": { |
|
"train": { |
|
"name": "train", |
|
"num_bytes": 1667970011, |
|
"num_examples": 41710, |
|
"shard_lengths": [ |
|
14000, |
|
13000, |
|
12000, |
|
2710 |
|
], |
|
"dataset_name": "common_voice_11_0" |
|
}, |
|
"validation": { |
|
"name": "validation", |
|
"num_bytes": 422495922, |
|
"num_examples": 11758, |
|
"dataset_name": "common_voice_11_0" |
|
}, |
|
"test": { |
|
"name": "test", |
|
"num_bytes": 442244039, |
|
"num_examples": 11815, |
|
"dataset_name": "common_voice_11_0" |
|
}, |
|
"other": { |
|
"name": "other", |
|
"num_bytes": 3379297630, |
|
"num_examples": 87993, |
|
"shard_lengths": [ |
|
13000, |
|
13000, |
|
14000, |
|
14000, |
|
15000, |
|
13000, |
|
5993 |
|
], |
|
"dataset_name": "common_voice_11_0" |
|
}, |
|
"invalidated": { |
|
"name": "invalidated", |
|
"num_bytes": 226529871, |
|
"num_examples": 5575, |
|
"dataset_name": "common_voice_11_0" |
|
} |
|
}, |
|
"version": { |
|
"version_str": "11.0.0", |
|
"major": 11, |
|
"minor": 0, |
|
"patch": 0 |
|
} |
|
} |