Upload lm-boosted decoder

Files changed (9) hide show

added_tokens.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"<s>": 30, "</s>": 31}

alphabet.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"labels": ["s", "n", "y", "o", "t", "l", "r", "k", "w", "c", "v", "g", "h", "z", "'", "m", "a", "e", "u", "i", "q", " ", "b", "j", "p", "f", "x", "d", "\u2047", "", "<s>", "</s>"], "is_bpe": false}

language_model/3gram.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:761824bf3b24ecc641c50f1b43073e7cea0854cbe5c3bccf7c1ff4d0e7e51f0e
+size 10419611

language_model/attrs.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}

language_model/unigrams.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json CHANGED Viewed

@@ -4,6 +4,7 @@
   "feature_size": 1,
   "padding_side": "right",
   "padding_value": 0.0,
   "return_attention_mask": true,
   "sampling_rate": 16000
 }

   "feature_size": 1,
   "padding_side": "right",
   "padding_value": 0.0,
+  "processor_class": "Wav2Vec2ProcessorWithLM",
   "return_attention_mask": true,
   "sampling_rate": 16000
 }

special_tokens_map.json CHANGED Viewed

@@ -1,6 +1 @@
-{
-  "bos_token": "<s>",
-  "eos_token": "</s>",
-  "pad_token": "[PAD]",
-  "unk_token": "[UNK]"
-}


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

tokenizer_config.json CHANGED Viewed

@@ -1,10 +1 @@
-{
-  "bos_token": "<s>",
-  "do_lower_case": false,
-  "eos_token": "</s>",
-  "pad_token": "[PAD]",
-  "replace_word_delimiter_char": " ",
-  "tokenizer_class": "Wav2Vec2CTCTokenizer",
-  "unk_token": "[UNK]",
-  "word_delimiter_token": "|"
-}


1	+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "\|", "replace_word_delimiter_char": " ", "special_tokens_map_file": "/root/.cache/huggingface/transformers/f260852cdc917ea96f4c3e190c02d8e56bfc4b959b5160d503d135ab05754df1.fea372b8528a479b7415f13ca4e27a2f5f3782cbb3f15b4d19bb3cbe734e8137", "name_or_path": "crossdelenna/wav2vec2-base-eng-ind", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2ProcessorWithLM"}

vocab.json CHANGED Viewed

@@ -1,32 +1 @@
-{
-  "'": 14,
-  "[PAD]": 29,
-  "[UNK]": 28,
-  "a": 16,
-  "b": 22,
-  "c": 9,
-  "d": 27,
-  "e": 17,
-  "f": 25,
-  "g": 11,
-  "h": 12,
-  "i": 19,
-  "j": 23,
-  "k": 7,
-  "l": 5,
-  "m": 15,
-  "n": 1,
-  "o": 3,
-  "p": 24,
-  "q": 20,
-  "r": 6,
-  "s": 0,
-  "t": 4,
-  "u": 18,
-  "v": 10,
-  "w": 8,
-  "x": 26,
-  "y": 2,
-  "z": 13,
-  "|": 21
-}


1	+ {"'": 14, "[PAD]": 29, "[UNK]": 28, "a": 16, "b": 22, "c": 9, "d": 27, "e": 17, "f": 25, "g": 11, "h": 12, "i": 19, "j": 23, "k": 7, "l": 5, "m": 15, "n": 1, "o": 3, "p": 24, "q": 20, "r": 6, "s": 0, "t": 4, "u": 18, "v": 10, "w": 8, "x": 26, "y": 2, "z": 13, "\|": 21}