whisper-medium-tamil / test.hf /test /dataset_info.json
kurianbenoy's picture
Training in progress, step 1000
9e49e62
raw
history blame
5.93 kB
{
"builder_name": "common_voice_11_0",
"citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n",
"config_name": "ta",
"dataset_size": 6138537473,
"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 16413 validated hours of speech in 100 languages, but more voices and languages are always added.",
"download_checksums": {
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/n_shards.json": {
"num_bytes": 12179,
"checksum": "584aa91a99f678abe7ad1e181e5cdc6af970d20c82bcb63f54e86c4fd2e5a2a8"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/train/ta_train_0.tar": {
"num_bytes": 1604730880,
"checksum": "ad4bbc42512f1bc9efd98c188e44750ac83b71e4829f4a52d6d9cf7259c0e6cb"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/train/ta_train_1.tar": {
"num_bytes": 67246080,
"checksum": "230bd2febfe9bc7012d73e52e517240c6a33ae11e3c64fb99086328e2b650fa5"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/dev/ta_dev_0.tar": {
"num_bytes": 423659520,
"checksum": "80b9977ebcd1b484880dafac7e28c184440a7c2d4558d501240e069d9b4dd628"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/test/ta_test_0.tar": {
"num_bytes": 443709440,
"checksum": "188a061b89e27517f06f713e5be835f7634242afe5f9cdc828d91fbf0bb6ffd7"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/other/ta_other_0.tar": {
"num_bytes": 1563248640,
"checksum": "91ca2c227f2d59a9a1817b38adfca9fe14e62484d7b5ffe44ae23811442a2b97"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/other/ta_other_1.tar": {
"num_bytes": 1506631680,
"checksum": "83809ecfcf1826812817e8d826d240823569aecb447a39a2a38dcb47090fa3fd"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/other/ta_other_2.tar": {
"num_bytes": 319129600,
"checksum": "6df52e8d650db95f7537b7acfecafc89433a586036c3dda21939a4be87421027"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/ta/invalidated/ta_invalidated_0.tar": {
"num_bytes": 227020800,
"checksum": "33fe28d8a4e50f48bf0f7a0007179b479d11525467aa0743583e64b246799d1d"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/train.tsv": {
"num_bytes": 15058319,
"checksum": "f8214e19cb767e9911de46ab64672a39a77814b8c3d9330a71d8f9ff07c12d33"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/dev.tsv": {
"num_bytes": 4205514,
"checksum": "a3136b8f038de5e90ef79fd814ebb1f29282137d1e850b2db2a354d6cc76c345"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/test.tsv": {
"num_bytes": 3936521,
"checksum": "f3346901e118f481b32880dcc0dcf52f066fcb70c1355ad49d6bc532c73a6480"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/other.tsv": {
"num_bytes": 30745385,
"checksum": "18f1ac9e239e40d839a70c962d42e8d75a610f535c476453e6330a4cc5ec1a47"
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/ta/invalidated.tsv": {
"num_bytes": 2044387,
"checksum": "9b8b0c7a216423f9070824f17fa5722779357d56e0c908db0b3dfc59968514b1"
}
},
"download_size": 6211378945,
"features": {
"input_features": {
"feature": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
},
"_type": "Sequence"
},
"input_length": {
"dtype": "float64",
"_type": "Value"
},
"labels": {
"feature": {
"dtype": "int64",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "https://commonvoice.mozilla.org/en/datasets",
"license": "https://creativecommons.org/publicdomain/zero/1.0/",
"size_in_bytes": 12349916418,
"splits": {
"train": {
"name": "train",
"num_bytes": 1667970011,
"num_examples": 41710,
"shard_lengths": [
14000,
13000,
12000,
2710
],
"dataset_name": "common_voice_11_0"
},
"validation": {
"name": "validation",
"num_bytes": 422495922,
"num_examples": 11758,
"dataset_name": "common_voice_11_0"
},
"test": {
"name": "test",
"num_bytes": 442244039,
"num_examples": 11815,
"dataset_name": "common_voice_11_0"
},
"other": {
"name": "other",
"num_bytes": 3379297630,
"num_examples": 87993,
"shard_lengths": [
13000,
13000,
14000,
14000,
15000,
13000,
5993
],
"dataset_name": "common_voice_11_0"
},
"invalidated": {
"name": "invalidated",
"num_bytes": 226529871,
"num_examples": 5575,
"dataset_name": "common_voice_11_0"
}
},
"version": {
"version_str": "11.0.0",
"major": 11,
"minor": 0,
"patch": 0
}
}