leo19941227 commited on
Commit
c98c327
1 Parent(s): 52e98aa

commit files to HF hub

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: superb
3
+ benchmark: superb
4
+ task: asr
5
+ datasets:
6
+ - superb
7
+ tags:
8
+ - automatic-speech-recognition
9
+ - osanseviero/hubert_base
10
+ widget:
11
+ - label: Librispeech sample 1
12
+ src: https://cdn-media.huggingface.co/speech_samples/sample1.flac
13
+ ---
14
+
15
+ # Fine-tuned s3prl model for ASR
args_2021-09-01-23-24-17.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_resume: false
2
+ backend: nccl
3
+ cache_dir: null
4
+ config: ./downstream/asr/config.yaml
5
+ device: cuda
6
+ downstream: asr
7
+ downstream_variant: null
8
+ evaluate_split: test
9
+ expdir: result/downstream/asr-push-to-hub
10
+ expname: asr-push-to-hub
11
+ from_hf_hub: true
12
+ hub: huggingface
13
+ init_ckpt: null
14
+ local_rank: null
15
+ mode: train
16
+ override: config.downstream_expert.datarc.libri_root='/home/leo/d/datasets/LibriSpeech',,config.downstream_expert.datarc.bucket_file='/home/leo/d/datasets/LibriSpeech/len_for_bucket',,config.runner.total_steps=10,,config.runner.save_step=5
17
+ past_exp: null
18
+ push_to_hf_hub: 'True'
19
+ seed: 1337
20
+ upstream: osanseviero/hubert_base
21
+ upstream_ckpt: null
22
+ upstream_feature_selection: hidden_states
23
+ upstream_model_config: null
24
+ upstream_model_name: model.pt
25
+ upstream_refresh: false
26
+ upstream_trainable: false
27
+ verbose: false
char.dict ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | 1980202
2
+ E 1091870
3
+ T 789572
4
+ A 689048
5
+ O 647720
6
+ N 591778
7
+ I 585614
8
+ H 557204
9
+ S 545238
10
+ R 499568
11
+ D 380912
12
+ L 344952
13
+ U 242014
14
+ M 217730
15
+ C 210734
16
+ W 204598
17
+ F 195086
18
+ G 174098
19
+ Y 168548
20
+ P 146722
21
+ B 129608
22
+ V 81496
23
+ K 65070
24
+ ' 19660
25
+ X 12530
26
+ J 12062
27
+ Q 8164
28
+ Z 4916
config_2021-09-01-23-24-17.yaml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ downstream_expert:
2
+ datarc:
3
+ batch_size: 32
4
+ bucket_file: /home/leo/d/datasets/LibriSpeech/len_for_bucket
5
+ decoder_args:
6
+ beam: 5
7
+ beam_threshold: 25
8
+ criterion: ctc
9
+ decoder_type: None
10
+ kenlm_model: /path/to/KenLM
11
+ lexicon: /path/to/4-gram.arpa
12
+ lm_weight: 2
13
+ nbest: 1
14
+ sil_weight: 0
15
+ unk_weight: -math.inf
16
+ word_score: -1
17
+ dev-clean:
18
+ - dev-clean
19
+ dev-other:
20
+ - dev-other
21
+ dict_path: ./downstream/asr/char.dict
22
+ eval_batch_size: 1
23
+ libri_root: /home/leo/d/datasets/LibriSpeech
24
+ num_workers: 12
25
+ test-clean:
26
+ - test-clean
27
+ test-other:
28
+ - test-other
29
+ train:
30
+ - train-clean-100
31
+ train_batch_size: 32
32
+ zero_infinity: true
33
+ modelrc:
34
+ RNNs:
35
+ bidirection: true
36
+ dim:
37
+ - 1024
38
+ - 1024
39
+ dropout:
40
+ - 0.2
41
+ - 0.2
42
+ layer_norm:
43
+ - false
44
+ - false
45
+ module: LSTM
46
+ proj:
47
+ - false
48
+ - false
49
+ sample_rate:
50
+ - 1
51
+ - 1
52
+ sample_style: concat
53
+ total_rate: -1
54
+ Wav2Letter:
55
+ total_rate: 320
56
+ project_dim: 1024
57
+ select: RNNs
58
+ optimizer:
59
+ lr: 0.0001
60
+ name: TorchOptim
61
+ torch_optim_name: Adam
62
+ runner:
63
+ eval_dataloaders:
64
+ - dev-clean
65
+ eval_step: 2000
66
+ gradient_accumulate_steps: 1
67
+ gradient_clipping: 1
68
+ log_step: 100
69
+ max_keep: 1
70
+ save_step: 5
71
+ total_steps: 10
72
+ specaug:
73
+ apply_freq_mask: true
74
+ apply_time_mask: true
75
+ apply_time_warp: true
76
+ freq_mask_width_range:
77
+ - 0
78
+ - 50
79
+ num_freq_mask: 4
80
+ num_time_mask: 2
81
+ time_mask_width_range:
82
+ - 0
83
+ - 40
84
+ time_warp_window: 5
events.out.tfevents.1630509866.speechlab ADDED
File without changes
hub_repo ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 52e98aadf14d5152a1381f0cf824cc9ecbb6c8eb
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2f498f67241452a9e69d7ef2c8bdd4fdbb983a33423590aba9967704a9bf592
3
+ size 513965775
model.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from s3prl.downstream.runner import Runner
2
+ from typing import Dict
3
+ import torch
4
+ import os
5
+
6
+
7
+ class PreTrainedModel(Runner):
8
+ def __init__(self, path=""):
9
+ """
10
+ Initialize downstream model.
11
+ """
12
+ ckp_file = os.path.join(path, "model.ckpt")
13
+ ckp = torch.load(ckp_file, map_location='cpu')
14
+ ckp["Args"].init_ckpt = ckp_file
15
+ ckp["Args"].mode = "inference"
16
+ ckp["Args"].device = "cpu"
17
+ ckp["Config"]["downstream_expert"]["datarc"]["dict_path"] = os.path.join(path,'char.dict')
18
+
19
+ Runner.__init__(self, ckp["Args"], ckp["Config"])
20
+
21
+ def __call__(self, inputs)-> Dict[str, str]:
22
+ """
23
+ Args:
24
+ inputs (:obj:`np.array`):
25
+ The raw waveform of audio received. By default at 16KHz.
26
+ Return:
27
+ A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
28
+ the detected text from the input audio.
29
+ """
30
+ for entry in self.all_entries:
31
+ entry.model.eval()
32
+
33
+ inputs = [torch.FloatTensor(inputs)]
34
+
35
+ with torch.no_grad():
36
+ features = self.upstream.model(inputs)
37
+ features = self.featurizer.model(inputs, features)
38
+ preds = self.downstream.model.inference(features, [])
39
+ return {"text": preds[0]}