cantillation
commited on
Commit
•
9457afd
1
Parent(s):
0d17908
Model save
Browse files- README.md +36 -40
- config.json +10 -10
- generation_config.json +13 -13
- logs/events.out.tfevents.1720616464.8ba778dc7a53.54433.0 +3 -0
- model.safetensors +2 -2
- training_args.bin +2 -2
README.md
CHANGED
@@ -1,45 +1,42 @@
|
|
1 |
---
|
2 |
-
language:
|
3 |
-
- he
|
4 |
license: apache-2.0
|
5 |
-
base_model: openai/whisper-
|
6 |
tags:
|
7 |
-
- hf-asr-leaderboard
|
8 |
- generated_from_trainer
|
9 |
metrics:
|
10 |
- wer
|
11 |
model-index:
|
12 |
-
- name:
|
13 |
results: []
|
14 |
---
|
15 |
|
16 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
17 |
should probably proofread and complete it, then remove this comment. -->
|
18 |
|
19 |
-
#
|
20 |
|
21 |
-
This model is a fine-tuned version of [openai/whisper-
|
22 |
It achieves the following results on the evaluation set:
|
23 |
-
- Loss:
|
24 |
-
- Wer:
|
25 |
-
- Avg Precision Exact: 0.
|
26 |
-
- Avg Recall Exact: 0.
|
27 |
-
- Avg F1 Exact: 0.
|
28 |
-
- Avg Precision Letter Shift: 0.
|
29 |
-
- Avg Recall Letter Shift: 0.
|
30 |
-
- Avg F1 Letter Shift: 0.
|
31 |
-
- Avg Precision Word Level: 0.
|
32 |
-
- Avg Recall Word Level: 0.
|
33 |
-
- Avg F1 Word Level: 0.
|
34 |
-
- Avg Precision Word Shift: 0.
|
35 |
-
- Avg Recall Word Shift: 0.
|
36 |
-
- Avg F1 Word Shift: 0.
|
37 |
-
- Precision Median Exact: 0.
|
38 |
-
- Recall Median Exact: 0.
|
39 |
-
- F1 Median Exact: 0.
|
40 |
-
- Precision Max Exact: 0.
|
41 |
-
- Recall Max Exact: 0.
|
42 |
-
- F1 Max Exact: 0.
|
43 |
- Precision Min Exact: 0.0
|
44 |
- Recall Min Exact: 0.0
|
45 |
- F1 Min Exact: 0.0
|
@@ -70,28 +67,27 @@ More information needed
|
|
70 |
### Training hyperparameters
|
71 |
|
72 |
The following hyperparameters were used during training:
|
73 |
-
- learning_rate: 1e-
|
74 |
-
- train_batch_size:
|
75 |
- eval_batch_size: 32
|
76 |
- seed: 42
|
77 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
78 |
- lr_scheduler_type: linear
|
79 |
-
-
|
|
|
80 |
- mixed_precision_training: Native AMP
|
81 |
|
82 |
### Training results
|
83 |
|
84 |
-
| Training Loss | Epoch
|
85 |
-
|
86 |
-
| No log | 0.
|
87 |
-
|
|
88 |
-
| 1.8957 | 0.0 | 60 | 1.9215 | 102.0325 | 0.0804 | 0.0967 | 0.0861 | 0.1169 | 0.1421 | 0.1264 | 0.1370 | 0.1739 | 0.1512 | 0.2660 | 0.3394 | 0.2929 | 0.0833 | 0.0909 | 0.0833 | 0.25 | 0.4286 | 0.3158 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
|
89 |
-
| 1.2925 | 0.0 | 80 | 1.8710 | 98.7805 | 0.0834 | 0.0774 | 0.0795 | 0.1227 | 0.1114 | 0.1157 | 0.1433 | 0.1299 | 0.1351 | 0.2771 | 0.2479 | 0.2589 | 0.0714 | 0.0556 | 0.0645 | 0.3333 | 0.3636 | 0.3478 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
|
90 |
|
91 |
|
92 |
### Framework versions
|
93 |
|
94 |
-
- Transformers 4.
|
95 |
-
- Pytorch 2.2.1
|
96 |
-
- Datasets 2.
|
97 |
-
- Tokenizers 0.
|
|
|
1 |
---
|
|
|
|
|
2 |
license: apache-2.0
|
3 |
+
base_model: openai/whisper-tiny
|
4 |
tags:
|
|
|
5 |
- generated_from_trainer
|
6 |
metrics:
|
7 |
- wer
|
8 |
model-index:
|
9 |
+
- name: test
|
10 |
results: []
|
11 |
---
|
12 |
|
13 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
should probably proofread and complete it, then remove this comment. -->
|
15 |
|
16 |
+
# test
|
17 |
|
18 |
+
This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 8.6412
|
21 |
+
- Wer: 161.3707
|
22 |
+
- Avg Precision Exact: 0.0022
|
23 |
+
- Avg Recall Exact: 0.0010
|
24 |
+
- Avg F1 Exact: 0.0013
|
25 |
+
- Avg Precision Letter Shift: 0.0160
|
26 |
+
- Avg Recall Letter Shift: 0.0017
|
27 |
+
- Avg F1 Letter Shift: 0.0030
|
28 |
+
- Avg Precision Word Level: 0.0171
|
29 |
+
- Avg Recall Word Level: 0.0191
|
30 |
+
- Avg F1 Word Level: 0.0124
|
31 |
+
- Avg Precision Word Shift: 0.0892
|
32 |
+
- Avg Recall Word Shift: 0.0453
|
33 |
+
- Avg F1 Word Shift: 0.0484
|
34 |
+
- Precision Median Exact: 0.0
|
35 |
+
- Recall Median Exact: 0.0
|
36 |
+
- F1 Median Exact: 0.0
|
37 |
+
- Precision Max Exact: 0.0667
|
38 |
+
- Recall Max Exact: 0.0303
|
39 |
+
- F1 Max Exact: 0.0417
|
40 |
- Precision Min Exact: 0.0
|
41 |
- Recall Min Exact: 0.0
|
42 |
- F1 Min Exact: 0.0
|
|
|
67 |
### Training hyperparameters
|
68 |
|
69 |
The following hyperparameters were used during training:
|
70 |
+
- learning_rate: 1e-06
|
71 |
+
- train_batch_size: 8
|
72 |
- eval_batch_size: 32
|
73 |
- seed: 42
|
74 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
75 |
- lr_scheduler_type: linear
|
76 |
+
- lr_scheduler_warmup_steps: 20
|
77 |
+
- training_steps: 5
|
78 |
- mixed_precision_training: Native AMP
|
79 |
|
80 |
### Training results
|
81 |
|
82 |
+
| Training Loss | Epoch | Step | Validation Loss | Wer | Avg Precision Exact | Avg Recall Exact | Avg F1 Exact | Avg Precision Letter Shift | Avg Recall Letter Shift | Avg F1 Letter Shift | Avg Precision Word Level | Avg Recall Word Level | Avg F1 Word Level | Avg Precision Word Shift | Avg Recall Word Shift | Avg F1 Word Shift | Precision Median Exact | Recall Median Exact | F1 Median Exact | Precision Max Exact | Recall Max Exact | F1 Max Exact | Precision Min Exact | Recall Min Exact | F1 Min Exact | Precision Min Letter Shift | Recall Min Letter Shift | F1 Min Letter Shift | Precision Min Word Level | Recall Min Word Level | F1 Min Word Level | Precision Min Word Shift | Recall Min Word Shift | F1 Min Word Shift |
|
83 |
+
|:-------------:|:------:|:----:|:---------------:|:--------:|:-------------------:|:----------------:|:------------:|:--------------------------:|:-----------------------:|:-------------------:|:------------------------:|:---------------------:|:-----------------:|:------------------------:|:---------------------:|:-----------------:|:----------------------:|:-------------------:|:---------------:|:-------------------:|:----------------:|:------------:|:-------------------:|:----------------:|:------------:|:--------------------------:|:-----------------------:|:-------------------:|:------------------------:|:---------------------:|:-----------------:|:------------------------:|:---------------------:|:-----------------:|
|
84 |
+
| No log | 0.0040 | 1 | 8.6412 | 161.3707 | 0.0022 | 0.0010 | 0.0013 | 0.0160 | 0.0017 | 0.0030 | 0.0171 | 0.0191 | 0.0124 | 0.0892 | 0.0453 | 0.0484 | 0.0 | 0.0 | 0.0 | 0.0667 | 0.0303 | 0.0417 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
|
85 |
+
| No log | 0.0202 | 5 | 8.6412 | 161.3707 | 0.0022 | 0.0010 | 0.0013 | 0.0160 | 0.0017 | 0.0030 | 0.0171 | 0.0191 | 0.0124 | 0.0892 | 0.0453 | 0.0484 | 0.0 | 0.0 | 0.0 | 0.0667 | 0.0303 | 0.0417 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
|
|
|
|
|
86 |
|
87 |
|
88 |
### Framework versions
|
89 |
|
90 |
+
- Transformers 4.41.2
|
91 |
+
- Pytorch 2.2.1
|
92 |
+
- Datasets 2.20.0
|
93 |
+
- Tokenizers 0.19.1
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "openai/whisper-
|
3 |
"activation_dropout": 0.0,
|
4 |
"activation_function": "gelu",
|
5 |
"apply_spec_augment": false,
|
@@ -13,18 +13,18 @@
|
|
13 |
],
|
14 |
"bos_token_id": 50257,
|
15 |
"classifier_proj_size": 256,
|
16 |
-
"d_model":
|
17 |
-
"decoder_attention_heads":
|
18 |
-
"decoder_ffn_dim":
|
19 |
"decoder_input_ids": null,
|
20 |
"decoder_layerdrop": 0.0,
|
21 |
-
"decoder_layers":
|
22 |
"decoder_start_token_id": 50258,
|
23 |
"dropout": 0.0,
|
24 |
-
"encoder_attention_heads":
|
25 |
-
"encoder_ffn_dim":
|
26 |
"encoder_layerdrop": 0.0,
|
27 |
-
"encoder_layers":
|
28 |
"eos_token_id": 50257,
|
29 |
"forced_decoder_ids": null,
|
30 |
"init_std": 0.02,
|
@@ -40,13 +40,13 @@
|
|
40 |
"max_target_positions": 448,
|
41 |
"median_filter_width": 7,
|
42 |
"model_type": "whisper",
|
43 |
-
"num_hidden_layers":
|
44 |
"num_mel_bins": 80,
|
45 |
"pad_token_id": 50257,
|
46 |
"scale_embedding": false,
|
47 |
"suppress_tokens": [],
|
48 |
"torch_dtype": "float32",
|
49 |
-
"transformers_version": "4.
|
50 |
"use_cache": false,
|
51 |
"use_weighted_layer_sum": false,
|
52 |
"vocab_size": 51896
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "openai/whisper-tiny",
|
3 |
"activation_dropout": 0.0,
|
4 |
"activation_function": "gelu",
|
5 |
"apply_spec_augment": false,
|
|
|
13 |
],
|
14 |
"bos_token_id": 50257,
|
15 |
"classifier_proj_size": 256,
|
16 |
+
"d_model": 384,
|
17 |
+
"decoder_attention_heads": 6,
|
18 |
+
"decoder_ffn_dim": 1536,
|
19 |
"decoder_input_ids": null,
|
20 |
"decoder_layerdrop": 0.0,
|
21 |
+
"decoder_layers": 4,
|
22 |
"decoder_start_token_id": 50258,
|
23 |
"dropout": 0.0,
|
24 |
+
"encoder_attention_heads": 6,
|
25 |
+
"encoder_ffn_dim": 1536,
|
26 |
"encoder_layerdrop": 0.0,
|
27 |
+
"encoder_layers": 4,
|
28 |
"eos_token_id": 50257,
|
29 |
"forced_decoder_ids": null,
|
30 |
"init_std": 0.02,
|
|
|
40 |
"max_target_positions": 448,
|
41 |
"median_filter_width": 7,
|
42 |
"model_type": "whisper",
|
43 |
+
"num_hidden_layers": 4,
|
44 |
"num_mel_bins": 80,
|
45 |
"pad_token_id": 50257,
|
46 |
"scale_embedding": false,
|
47 |
"suppress_tokens": [],
|
48 |
"torch_dtype": "float32",
|
49 |
+
"transformers_version": "4.41.2",
|
50 |
"use_cache": false,
|
51 |
"use_weighted_layer_sum": false,
|
52 |
"vocab_size": 51896
|
generation_config.json
CHANGED
@@ -1,28 +1,28 @@
|
|
1 |
{
|
2 |
"alignment_heads": [
|
3 |
[
|
4 |
-
|
5 |
-
|
6 |
],
|
7 |
[
|
8 |
-
|
9 |
-
|
10 |
],
|
11 |
[
|
12 |
-
|
13 |
-
|
14 |
],
|
15 |
[
|
16 |
-
|
17 |
-
|
18 |
],
|
19 |
[
|
20 |
-
|
21 |
-
|
22 |
],
|
23 |
[
|
24 |
-
|
25 |
-
|
26 |
]
|
27 |
],
|
28 |
"begin_suppress_tokens": [
|
@@ -245,6 +245,6 @@
|
|
245 |
"transcribe": 50359,
|
246 |
"translate": 50358
|
247 |
},
|
248 |
-
"transformers_version": "4.
|
249 |
"use_cache": false
|
250 |
}
|
|
|
1 |
{
|
2 |
"alignment_heads": [
|
3 |
[
|
4 |
+
2,
|
5 |
+
2
|
6 |
],
|
7 |
[
|
8 |
+
3,
|
9 |
+
0
|
10 |
],
|
11 |
[
|
12 |
+
3,
|
13 |
+
2
|
14 |
],
|
15 |
[
|
16 |
+
3,
|
17 |
+
3
|
18 |
],
|
19 |
[
|
20 |
+
3,
|
21 |
+
4
|
22 |
],
|
23 |
[
|
24 |
+
3,
|
25 |
+
5
|
26 |
]
|
27 |
],
|
28 |
"begin_suppress_tokens": [
|
|
|
245 |
"transcribe": 50359,
|
246 |
"translate": 50358
|
247 |
},
|
248 |
+
"transformers_version": "4.41.2",
|
249 |
"use_cache": false
|
250 |
}
|
logs/events.out.tfevents.1720616464.8ba778dc7a53.54433.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5f34cb197ab8e52228a36a71ebd2abfcce77200b18e6f734f912035ed4fbc61
|
3 |
+
size 10373
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dcc940ee6ceacb215635c54c91d2163fc7bff40dd2b61939b601e07ca78edee
|
3 |
+
size 151109288
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b4a71d5782b69d52484d7b28c3d46d9a266c9265c4bc84a8a01b1c68fc01b19
|
3 |
+
size 5240
|