Fhrozen commited on
Commit
695f67d
1 Parent(s): fdd521a
README.md CHANGED
@@ -1,3 +1,25 @@
1
  ---
 
 
 
 
 
 
 
 
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - text-to-speech
6
+ language:
7
+ - en
8
+ datasets:
9
+ - libritts
10
  license: cc-by-4.0
11
  ---
12
+
13
+ ## TTS model - ProDiff with GST + X-Vector
14
+
15
+ **No support given.**
16
+
17
+ ### Details
18
+
19
+ ```
20
+ num_iters_per_epoch: 250
21
+ max_epoch: 800
22
+ batch_bins: 8000000
23
+ tts_conf:
24
+ spk_embed_dim: 192
25
+ ```
dump/xvector/dev-clean_phn/spk_xvector.ark ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d722f8ab5a511b81e45c36a6d87bfdcd7224ceb6539799c8b74f9ab5c8f6cadc
3
+ size 76265
dump/xvector/dev-clean_phn/spk_xvector.scp ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1272_128104 dump/xvector/dev-clean_phn/spk_xvector.ark:12
2
+ 1272_135031 dump/xvector/dev-clean_phn/spk_xvector.ark:807
3
+ 1272_141231 dump/xvector/dev-clean_phn/spk_xvector.ark:1602
4
+ 1462_170138 dump/xvector/dev-clean_phn/spk_xvector.ark:2397
5
+ 1462_170142 dump/xvector/dev-clean_phn/spk_xvector.ark:3192
6
+ 1462_170145 dump/xvector/dev-clean_phn/spk_xvector.ark:3987
7
+ 1673_143396 dump/xvector/dev-clean_phn/spk_xvector.ark:4782
8
+ 1673_143397 dump/xvector/dev-clean_phn/spk_xvector.ark:5577
9
+ 174_168635 dump/xvector/dev-clean_phn/spk_xvector.ark:6371
10
+ 174_50561 dump/xvector/dev-clean_phn/spk_xvector.ark:7164
11
+ 174_84280 dump/xvector/dev-clean_phn/spk_xvector.ark:7957
12
+ 1919_142785 dump/xvector/dev-clean_phn/spk_xvector.ark:8752
13
+ 1988_147956 dump/xvector/dev-clean_phn/spk_xvector.ark:9547
14
+ 1988_148538 dump/xvector/dev-clean_phn/spk_xvector.ark:10342
15
+ 1988_24833 dump/xvector/dev-clean_phn/spk_xvector.ark:11136
16
+ 1993_147149 dump/xvector/dev-clean_phn/spk_xvector.ark:11931
17
+ 1993_147964 dump/xvector/dev-clean_phn/spk_xvector.ark:12726
18
+ 1993_147965 dump/xvector/dev-clean_phn/spk_xvector.ark:13521
19
+ 1993_147966 dump/xvector/dev-clean_phn/spk_xvector.ark:14316
20
+ 2035_147960 dump/xvector/dev-clean_phn/spk_xvector.ark:15111
21
+ 2035_147961 dump/xvector/dev-clean_phn/spk_xvector.ark:15906
22
+ 2035_152373 dump/xvector/dev-clean_phn/spk_xvector.ark:16701
23
+ 2078_142845 dump/xvector/dev-clean_phn/spk_xvector.ark:17496
24
+ 2086_149214 dump/xvector/dev-clean_phn/spk_xvector.ark:18291
25
+ 2086_149220 dump/xvector/dev-clean_phn/spk_xvector.ark:19086
26
+ 2277_149874 dump/xvector/dev-clean_phn/spk_xvector.ark:19881
27
+ 2277_149896 dump/xvector/dev-clean_phn/spk_xvector.ark:20676
28
+ 2277_149897 dump/xvector/dev-clean_phn/spk_xvector.ark:21471
29
+ 2412_153947 dump/xvector/dev-clean_phn/spk_xvector.ark:22266
30
+ 2412_153948 dump/xvector/dev-clean_phn/spk_xvector.ark:23061
31
+ 2412_153954 dump/xvector/dev-clean_phn/spk_xvector.ark:23856
32
+ 2428_83699 dump/xvector/dev-clean_phn/spk_xvector.ark:24650
33
+ 2428_83705 dump/xvector/dev-clean_phn/spk_xvector.ark:25444
34
+ 251_118436 dump/xvector/dev-clean_phn/spk_xvector.ark:26238
35
+ 251_136532 dump/xvector/dev-clean_phn/spk_xvector.ark:27032
36
+ 251_137823 dump/xvector/dev-clean_phn/spk_xvector.ark:27826
37
+ 2803_154320 dump/xvector/dev-clean_phn/spk_xvector.ark:28621
38
+ 2803_154328 dump/xvector/dev-clean_phn/spk_xvector.ark:29416
39
+ 2803_161169 dump/xvector/dev-clean_phn/spk_xvector.ark:30211
40
+ 2902_9006 dump/xvector/dev-clean_phn/spk_xvector.ark:31004
41
+ 2902_9008 dump/xvector/dev-clean_phn/spk_xvector.ark:31797
42
+ 3000_15664 dump/xvector/dev-clean_phn/spk_xvector.ark:32591
43
+ 3081_166546 dump/xvector/dev-clean_phn/spk_xvector.ark:33386
44
+ 3170_137482 dump/xvector/dev-clean_phn/spk_xvector.ark:34181
45
+ 3536_23268 dump/xvector/dev-clean_phn/spk_xvector.ark:34975
46
+ 3536_8226 dump/xvector/dev-clean_phn/spk_xvector.ark:35768
47
+ 3576_138058 dump/xvector/dev-clean_phn/spk_xvector.ark:36563
48
+ 3752_4943 dump/xvector/dev-clean_phn/spk_xvector.ark:37356
49
+ 3752_4944 dump/xvector/dev-clean_phn/spk_xvector.ark:38149
50
+ 3853_163249 dump/xvector/dev-clean_phn/spk_xvector.ark:38944
51
+ 422_122949 dump/xvector/dev-clean_phn/spk_xvector.ark:39738
52
+ 5338_24615 dump/xvector/dev-clean_phn/spk_xvector.ark:40532
53
+ 5338_24640 dump/xvector/dev-clean_phn/spk_xvector.ark:41326
54
+ 5338_284437 dump/xvector/dev-clean_phn/spk_xvector.ark:42121
55
+ 5536_43358 dump/xvector/dev-clean_phn/spk_xvector.ark:42915
56
+ 5536_43359 dump/xvector/dev-clean_phn/spk_xvector.ark:43709
57
+ 5536_43363 dump/xvector/dev-clean_phn/spk_xvector.ark:44503
58
+ 5694_64025 dump/xvector/dev-clean_phn/spk_xvector.ark:45297
59
+ 5694_64029 dump/xvector/dev-clean_phn/spk_xvector.ark:46091
60
+ 5694_64038 dump/xvector/dev-clean_phn/spk_xvector.ark:46885
61
+ 5895_34615 dump/xvector/dev-clean_phn/spk_xvector.ark:47679
62
+ 5895_34622 dump/xvector/dev-clean_phn/spk_xvector.ark:48473
63
+ 5895_34629 dump/xvector/dev-clean_phn/spk_xvector.ark:49267
64
+ 6241_61943 dump/xvector/dev-clean_phn/spk_xvector.ark:50061
65
+ 6241_61946 dump/xvector/dev-clean_phn/spk_xvector.ark:50855
66
+ 6241_66616 dump/xvector/dev-clean_phn/spk_xvector.ark:51649
67
+ 6295_244435 dump/xvector/dev-clean_phn/spk_xvector.ark:52444
68
+ 6295_64301 dump/xvector/dev-clean_phn/spk_xvector.ark:53238
69
+ 6313_66125 dump/xvector/dev-clean_phn/spk_xvector.ark:54032
70
+ 6313_66129 dump/xvector/dev-clean_phn/spk_xvector.ark:54826
71
+ 6313_76958 dump/xvector/dev-clean_phn/spk_xvector.ark:55620
72
+ 6319_275224 dump/xvector/dev-clean_phn/spk_xvector.ark:56415
73
+ 6319_57405 dump/xvector/dev-clean_phn/spk_xvector.ark:57209
74
+ 6319_64726 dump/xvector/dev-clean_phn/spk_xvector.ark:58003
75
+ 6345_64257 dump/xvector/dev-clean_phn/spk_xvector.ark:58797
76
+ 6345_93302 dump/xvector/dev-clean_phn/spk_xvector.ark:59591
77
+ 6345_93306 dump/xvector/dev-clean_phn/spk_xvector.ark:60385
78
+ 652_129742 dump/xvector/dev-clean_phn/spk_xvector.ark:61179
79
+ 652_130737 dump/xvector/dev-clean_phn/spk_xvector.ark:61973
80
+ 777_126732 dump/xvector/dev-clean_phn/spk_xvector.ark:62767
81
+ 7850_111771 dump/xvector/dev-clean_phn/spk_xvector.ark:63562
82
+ 7850_281318 dump/xvector/dev-clean_phn/spk_xvector.ark:64357
83
+ 7850_286674 dump/xvector/dev-clean_phn/spk_xvector.ark:65152
84
+ 7850_73752 dump/xvector/dev-clean_phn/spk_xvector.ark:65946
85
+ 7976_105575 dump/xvector/dev-clean_phn/spk_xvector.ark:66741
86
+ 7976_110124 dump/xvector/dev-clean_phn/spk_xvector.ark:67536
87
+ 7976_110523 dump/xvector/dev-clean_phn/spk_xvector.ark:68331
88
+ 8297_275154 dump/xvector/dev-clean_phn/spk_xvector.ark:69126
89
+ 8297_275155 dump/xvector/dev-clean_phn/spk_xvector.ark:69921
90
+ 8297_275156 dump/xvector/dev-clean_phn/spk_xvector.ark:70716
91
+ 84_121123 dump/xvector/dev-clean_phn/spk_xvector.ark:71509
92
+ 84_121550 dump/xvector/dev-clean_phn/spk_xvector.ark:72302
93
+ 8842_302196 dump/xvector/dev-clean_phn/spk_xvector.ark:73097
94
+ 8842_302201 dump/xvector/dev-clean_phn/spk_xvector.ark:73892
95
+ 8842_302203 dump/xvector/dev-clean_phn/spk_xvector.ark:74687
96
+ 8842_304647 dump/xvector/dev-clean_phn/spk_xvector.ark:75482
dump/xvector/test-clean_phn/spk_xvector.ark ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16195491f8b865a1c6efa87133219fe8cf8a07b4a877b5dbb9043e5ebcdfe104
3
+ size 64316
dump/xvector/test-clean_phn/spk_xvector.scp ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1089_134686 dump/xvector/test-clean_phn/spk_xvector.ark:12
2
+ 1089_134691 dump/xvector/test-clean_phn/spk_xvector.ark:807
3
+ 1188_133604 dump/xvector/test-clean_phn/spk_xvector.ark:1602
4
+ 121_121726 dump/xvector/test-clean_phn/spk_xvector.ark:2396
5
+ 121_123859 dump/xvector/test-clean_phn/spk_xvector.ark:3190
6
+ 121_127105 dump/xvector/test-clean_phn/spk_xvector.ark:3984
7
+ 1221_135766 dump/xvector/test-clean_phn/spk_xvector.ark:4779
8
+ 1221_135767 dump/xvector/test-clean_phn/spk_xvector.ark:5574
9
+ 1284_1180 dump/xvector/test-clean_phn/spk_xvector.ark:6367
10
+ 1284_1181 dump/xvector/test-clean_phn/spk_xvector.ark:7160
11
+ 1320_122612 dump/xvector/test-clean_phn/spk_xvector.ark:7955
12
+ 1320_122617 dump/xvector/test-clean_phn/spk_xvector.ark:8750
13
+ 1580_141083 dump/xvector/test-clean_phn/spk_xvector.ark:9545
14
+ 1580_141084 dump/xvector/test-clean_phn/spk_xvector.ark:10340
15
+ 1995_1826 dump/xvector/test-clean_phn/spk_xvector.ark:11133
16
+ 1995_1836 dump/xvector/test-clean_phn/spk_xvector.ark:11926
17
+ 1995_1837 dump/xvector/test-clean_phn/spk_xvector.ark:12719
18
+ 2300_131720 dump/xvector/test-clean_phn/spk_xvector.ark:13514
19
+ 237_126133 dump/xvector/test-clean_phn/spk_xvector.ark:14308
20
+ 237_134493 dump/xvector/test-clean_phn/spk_xvector.ark:15102
21
+ 237_134500 dump/xvector/test-clean_phn/spk_xvector.ark:15896
22
+ 260_123286 dump/xvector/test-clean_phn/spk_xvector.ark:16690
23
+ 260_123288 dump/xvector/test-clean_phn/spk_xvector.ark:17484
24
+ 260_123440 dump/xvector/test-clean_phn/spk_xvector.ark:18278
25
+ 2830_3979 dump/xvector/test-clean_phn/spk_xvector.ark:19071
26
+ 2830_3980 dump/xvector/test-clean_phn/spk_xvector.ark:19864
27
+ 2961_961 dump/xvector/test-clean_phn/spk_xvector.ark:20656
28
+ 3570_5694 dump/xvector/test-clean_phn/spk_xvector.ark:21449
29
+ 3570_5695 dump/xvector/test-clean_phn/spk_xvector.ark:22242
30
+ 3570_5696 dump/xvector/test-clean_phn/spk_xvector.ark:23035
31
+ 3575_170457 dump/xvector/test-clean_phn/spk_xvector.ark:23830
32
+ 3729_6852 dump/xvector/test-clean_phn/spk_xvector.ark:24623
33
+ 4077_13751 dump/xvector/test-clean_phn/spk_xvector.ark:25417
34
+ 4077_13754 dump/xvector/test-clean_phn/spk_xvector.ark:26211
35
+ 4446_2271 dump/xvector/test-clean_phn/spk_xvector.ark:27004
36
+ 4446_2273 dump/xvector/test-clean_phn/spk_xvector.ark:27797
37
+ 4446_2275 dump/xvector/test-clean_phn/spk_xvector.ark:28590
38
+ 4507_16021 dump/xvector/test-clean_phn/spk_xvector.ark:29384
39
+ 4970_29093 dump/xvector/test-clean_phn/spk_xvector.ark:30178
40
+ 4970_29095 dump/xvector/test-clean_phn/spk_xvector.ark:30972
41
+ 4992_23283 dump/xvector/test-clean_phn/spk_xvector.ark:31766
42
+ 4992_41797 dump/xvector/test-clean_phn/spk_xvector.ark:32560
43
+ 4992_41806 dump/xvector/test-clean_phn/spk_xvector.ark:33354
44
+ 5105_28233 dump/xvector/test-clean_phn/spk_xvector.ark:34148
45
+ 5105_28240 dump/xvector/test-clean_phn/spk_xvector.ark:34942
46
+ 5105_28241 dump/xvector/test-clean_phn/spk_xvector.ark:35736
47
+ 5142_33396 dump/xvector/test-clean_phn/spk_xvector.ark:36530
48
+ 5142_36377 dump/xvector/test-clean_phn/spk_xvector.ark:37324
49
+ 5142_36586 dump/xvector/test-clean_phn/spk_xvector.ark:38118
50
+ 5142_36600 dump/xvector/test-clean_phn/spk_xvector.ark:38912
51
+ 5639_40744 dump/xvector/test-clean_phn/spk_xvector.ark:39706
52
+ 5683_32865 dump/xvector/test-clean_phn/spk_xvector.ark:40500
53
+ 5683_32866 dump/xvector/test-clean_phn/spk_xvector.ark:41294
54
+ 5683_32879 dump/xvector/test-clean_phn/spk_xvector.ark:42088
55
+ 61_70970 dump/xvector/test-clean_phn/spk_xvector.ark:42880
56
+ 672_122797 dump/xvector/test-clean_phn/spk_xvector.ark:43674
57
+ 6829_68769 dump/xvector/test-clean_phn/spk_xvector.ark:44468
58
+ 6829_68771 dump/xvector/test-clean_phn/spk_xvector.ark:45262
59
+ 6930_75918 dump/xvector/test-clean_phn/spk_xvector.ark:46056
60
+ 6930_76324 dump/xvector/test-clean_phn/spk_xvector.ark:46850
61
+ 6930_81414 dump/xvector/test-clean_phn/spk_xvector.ark:47644
62
+ 7021_79730 dump/xvector/test-clean_phn/spk_xvector.ark:48438
63
+ 7021_79740 dump/xvector/test-clean_phn/spk_xvector.ark:49232
64
+ 7021_79759 dump/xvector/test-clean_phn/spk_xvector.ark:50026
65
+ 7021_85628 dump/xvector/test-clean_phn/spk_xvector.ark:50820
66
+ 7127_75946 dump/xvector/test-clean_phn/spk_xvector.ark:51614
67
+ 7127_75947 dump/xvector/test-clean_phn/spk_xvector.ark:52408
68
+ 7176_88083 dump/xvector/test-clean_phn/spk_xvector.ark:53202
69
+ 7176_92135 dump/xvector/test-clean_phn/spk_xvector.ark:53996
70
+ 7729_102255 dump/xvector/test-clean_phn/spk_xvector.ark:54791
71
+ 8224_274384 dump/xvector/test-clean_phn/spk_xvector.ark:55586
72
+ 8230_279154 dump/xvector/test-clean_phn/spk_xvector.ark:56381
73
+ 8455_210777 dump/xvector/test-clean_phn/spk_xvector.ark:57176
74
+ 8463_287645 dump/xvector/test-clean_phn/spk_xvector.ark:57971
75
+ 8463_294825 dump/xvector/test-clean_phn/spk_xvector.ark:58766
76
+ 8463_294828 dump/xvector/test-clean_phn/spk_xvector.ark:59561
77
+ 8555_284447 dump/xvector/test-clean_phn/spk_xvector.ark:60356
78
+ 8555_284449 dump/xvector/test-clean_phn/spk_xvector.ark:61151
79
+ 8555_292519 dump/xvector/test-clean_phn/spk_xvector.ark:61946
80
+ 908_157963 dump/xvector/test-clean_phn/spk_xvector.ark:62740
81
+ 908_31957 dump/xvector/test-clean_phn/spk_xvector.ark:63533
dump/xvector/train-clean-460_phn/spk_xvector.ark ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b140d860c5014234c0233e6eacd6b2df1175f1ae6318a2f8bc94f258aff5fcc1
3
+ size 2032667
dump/xvector/train-clean-460_phn/spk_xvector.scp ADDED
The diff for this file is too large to render. See raw diff
 
exp/tts_prodiff_gst_xvector_raw_phn_none/config.yaml ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/prodiff_gst_xvector.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_prodiff_gst_xvector_raw_phn_none
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 6
10
+ num_att_plot: 3
11
+ num_valid_artifacts: 5
12
+ dist_backend: nccl
13
+ dist_init_method: env://
14
+ dist_world_size: null
15
+ dist_rank: null
16
+ local_rank: 0
17
+ dist_master_addr: null
18
+ dist_master_port: null
19
+ dist_launcher: null
20
+ multiprocessing_distributed: false
21
+ unused_parameters: false
22
+ sharded_ddp: false
23
+ growth_interval: 0
24
+ min_grad_scale: -1
25
+ cudnn_enabled: true
26
+ cudnn_benchmark: false
27
+ cudnn_deterministic: true
28
+ collect_stats: false
29
+ write_collected_feats: false
30
+ max_epoch: 800
31
+ patience: null
32
+ val_scheduler_criterion:
33
+ - valid
34
+ - loss
35
+ early_stopping_criterion:
36
+ - valid
37
+ - loss
38
+ - min
39
+ best_model_criterion:
40
+ - - valid
41
+ - loss
42
+ - min
43
+ - - train
44
+ - loss
45
+ - min
46
+ keep_nbest_models: 5
47
+ nbest_averaging_interval: 0
48
+ grad_clip: 1.0
49
+ grad_clip_type: 2.0
50
+ grad_noise: false
51
+ accum_grad: 1
52
+ no_forward_run: false
53
+ resume: true
54
+ train_dtype: float32
55
+ use_amp: false
56
+ log_interval: null
57
+ use_matplotlib: true
58
+ use_tensorboard: true
59
+ detect_anomaly: false
60
+ pretrain_path: null
61
+ init_param: []
62
+ ignore_init_mismatch: false
63
+ freeze_param: []
64
+ num_iters_per_epoch: 250
65
+ batch_size: 20
66
+ valid_batch_size: null
67
+ batch_bins: 8000000
68
+ valid_batch_bins: null
69
+ train_shape_file:
70
+ - exp/tts_stats_raw_phn_none/train/text_shape.phn
71
+ - exp/tts_stats_raw_phn_none/train/speech_shape
72
+ valid_shape_file:
73
+ - exp/tts_stats_raw_phn_none/valid/text_shape.phn
74
+ - exp/tts_stats_raw_phn_none/valid/speech_shape
75
+ batch_type: numel
76
+ valid_batch_type: null
77
+ fold_length:
78
+ - 150
79
+ - 240000
80
+ sort_in_batch: descending
81
+ sort_batch: descending
82
+ multiple_iterator: false
83
+ chunk_length: 500
84
+ chunk_shift_ratio: 0.5
85
+ num_cache_chunks: 1024
86
+ train_data_path_and_name_and_type:
87
+ - - dump/raw/train-clean-460_phn/text
88
+ - text
89
+ - text
90
+ - - data/train-clean-460_phn/durations
91
+ - durations
92
+ - text_int
93
+ - - dump/raw/train-clean-460_phn/wav.scp
94
+ - speech
95
+ - sound
96
+ - - exp/tts_stats_raw_phn_none/train/collect_feats/pitch.scp
97
+ - pitch
98
+ - npy
99
+ - - exp/tts_stats_raw_phn_none/train/collect_feats/energy.scp
100
+ - energy
101
+ - npy
102
+ - - dump/xvector/train-clean-460_phn/xvector.scp
103
+ - spembs
104
+ - kaldi_ark
105
+ valid_data_path_and_name_and_type:
106
+ - - dump/raw/dev-clean_phn/text
107
+ - text
108
+ - text
109
+ - - data/dev-clean_phn/durations
110
+ - durations
111
+ - text_int
112
+ - - dump/raw/dev-clean_phn/wav.scp
113
+ - speech
114
+ - sound
115
+ - - exp/tts_stats_raw_phn_none/valid/collect_feats/pitch.scp
116
+ - pitch
117
+ - npy
118
+ - - exp/tts_stats_raw_phn_none/valid/collect_feats/energy.scp
119
+ - energy
120
+ - npy
121
+ - - dump/xvector/dev-clean_phn/xvector.scp
122
+ - spembs
123
+ - kaldi_ark
124
+ allow_variable_data_keys: false
125
+ max_cache_size: 0.0
126
+ max_cache_fd: 32
127
+ valid_max_cache_size: null
128
+ optim: adamw
129
+ optim_conf:
130
+ lr: 1.0
131
+ betas:
132
+ - 0.9
133
+ - 0.98
134
+ scheduler: noamlr
135
+ scheduler_conf:
136
+ model_size: 384
137
+ warmup_steps: 2000
138
+ token_list:
139
+ - <blank>
140
+ - <unk>
141
+ - ˈ
142
+ - ɪ
143
+ - ː
144
+ - t
145
+ - n
146
+ - d
147
+ - s
148
+ - ɹ
149
+ - ə
150
+ - l
151
+ - æ
152
+ - i
153
+ - ð
154
+ - ɛ
155
+ - ʌ
156
+ - m
157
+ - k
158
+ - z
159
+ - ʊ
160
+ - a
161
+ - w
162
+ - h
163
+ - sil
164
+ - p
165
+ - v
166
+ - f
167
+ - u
168
+ - ','
169
+ - b
170
+ - ˌ
171
+ - ɚ
172
+ - o
173
+ - e
174
+ - ɑ
175
+ - .
176
+ - ʃ
177
+ - ɔ
178
+ - ŋ
179
+ - ɐ
180
+ - ᵻ
181
+ - ɡ
182
+ - ɜ
183
+ - θ
184
+ - j
185
+ - ɾ
186
+ - ʒ
187
+ - '?'
188
+ - '!'
189
+ - ̩
190
+ - '"'
191
+ - ʔ
192
+ - ''''
193
+ - r
194
+ - x
195
+ - ̃
196
+ - ç
197
+ - ʲ
198
+ - <sos/eos>
199
+ odim: null
200
+ model_conf:
201
+ requires_word_duration: false
202
+ use_preprocessor: true
203
+ token_type: phn
204
+ bpemodel: null
205
+ non_linguistic_symbols: null
206
+ cleaner: null
207
+ g2p: null
208
+ feats_extract: fbank
209
+ feats_extract_conf:
210
+ n_fft: 2048
211
+ hop_length: 300
212
+ win_length: 1200
213
+ fs: 24000
214
+ fmin: 80
215
+ fmax: 7600
216
+ n_mels: 80
217
+ normalize: global_mvn
218
+ normalize_conf:
219
+ stats_file: exp/tts_stats_raw_phn_none/train/feats_stats.npz
220
+ tts: prodiff
221
+ tts_conf:
222
+ adim: 256
223
+ aheads: 2
224
+ elayers: 4
225
+ eunits: 1024
226
+ positionwise_layer_type: conv1d-linear
227
+ positionwise_conv_kernel_size: 9
228
+ duration_predictor_layers: 2
229
+ duration_predictor_chans: 256
230
+ duration_predictor_kernel_size: 3
231
+ use_masking: true
232
+ use_scaled_pos_enc: true
233
+ encoder_normalize_before: true
234
+ reduction_factor: 1
235
+ init_type: xavier_uniform
236
+ init_enc_alpha: 1.0
237
+ transformer_enc_dropout_rate: 0.05
238
+ transformer_enc_positional_dropout_rate: 0.05
239
+ transformer_enc_attn_dropout_rate: 0.05
240
+ pitch_predictor_layers: 2
241
+ pitch_predictor_chans: 256
242
+ pitch_predictor_kernel_size: 3
243
+ pitch_predictor_dropout: 0.5
244
+ pitch_embed_kernel_size: 1
245
+ pitch_embed_dropout: 0.0
246
+ stop_gradient_from_pitch_predictor: true
247
+ energy_predictor_layers: 2
248
+ energy_predictor_chans: 256
249
+ energy_predictor_kernel_size: 3
250
+ energy_predictor_dropout: 0.5
251
+ energy_embed_kernel_size: 1
252
+ energy_embed_dropout: 0.0
253
+ stop_gradient_from_energy_predictor: false
254
+ spks: -1
255
+ spk_embed_dim: 192
256
+ denoiser_dim: 256
257
+ denoiser_layers: 20
258
+ denoiser_channels: 256
259
+ diffusion_steps: 4
260
+ diffusion_timescale: 1
261
+ diffusion_beta: 40.0
262
+ diffusion_scheduler: vpsde
263
+ diffusion_cycle_ln: 1
264
+ use_gst: true
265
+ gst_heads: 8
266
+ gst_tokens: 128
267
+ pitch_extract: dio
268
+ pitch_extract_conf:
269
+ fs: 24000
270
+ n_fft: 2048
271
+ hop_length: 300
272
+ f0max: 400
273
+ f0min: 80
274
+ reduction_factor: 1
275
+ pitch_normalize: global_mvn
276
+ pitch_normalize_conf:
277
+ stats_file: exp/tts_stats_raw_phn_none/train/pitch_stats.npz
278
+ energy_extract: energy
279
+ energy_extract_conf:
280
+ fs: 24000
281
+ n_fft: 2048
282
+ hop_length: 300
283
+ win_length: 1200
284
+ reduction_factor: 1
285
+ energy_normalize: global_mvn
286
+ energy_normalize_conf:
287
+ stats_file: exp/tts_stats_raw_phn_none/train/energy_stats.npz
288
+ required:
289
+ - output_dir
290
+ - token_list
291
+ version: '202207'
292
+ distributed: false
exp/tts_prodiff_gst_xvector_raw_phn_none/train.loss.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515c13e93c97102ee056ecb876c584b3a25bb19bc819c050f636eb0e82ab5e8e
3
+ size 114211753
exp/tts_stats_raw_phn_none/train/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77441f19c1089a1f1bdd278c65306f58cb2c3175add2589b960da0770378255
3
+ size 770
exp/tts_stats_raw_phn_none/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1d27e5047a24f16c71120cff991928e2f34de2d8dae65e42ba8ee2e3cf1741
3
+ size 1402
exp/tts_stats_raw_phn_none/train/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c675d24eb48cba744aaac4f1820104691aab94938b1f41e582221293566496b
3
+ size 770
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202207'
2
+ files:
3
+ model_file: exp/tts_prodiff_gst_xvector_raw_phn_none/train.loss.ave_5best.pth
4
+ python: "3.9.13 (main, Aug 25 2022, 23:26:10) \n[GCC 11.2.0]"
5
+ timestamp: 1673252764.559993
6
+ torch: 1.12.1
7
+ yaml_files:
8
+ train_config: exp/tts_prodiff_gst_xvector_raw_phn_none/config.yaml