File size: 3,159 Bytes
e6931b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{
  "best_metric": 16.48035114189699,
  "best_model_checkpoint": "CHECKPOINTS/checkpoint-8",
  "epoch": 13.025,
  "eval_steps": 8,
  "global_step": 80,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.025,
      "eval_loss": 1.0526559352874756,
      "eval_runtime": 256.5611,
      "eval_samples_per_second": 0.873,
      "eval_steps_per_second": 0.008,
      "eval_wer": 16.48035114189699,
      "step": 8
    },
    {
      "epoch": 2.05,
      "eval_loss": 0.55241459608078,
      "eval_runtime": 257.8672,
      "eval_samples_per_second": 0.869,
      "eval_steps_per_second": 0.008,
      "eval_wer": 16.72038954804197,
      "step": 16
    },
    {
      "epoch": 3.075,
      "eval_loss": 0.4660560190677643,
      "eval_runtime": 258.0886,
      "eval_samples_per_second": 0.868,
      "eval_steps_per_second": 0.008,
      "eval_wer": 18.23606062684315,
      "step": 24
    },
    {
      "epoch": 5.025,
      "eval_loss": 0.41761377453804016,
      "eval_runtime": 270.1611,
      "eval_samples_per_second": 0.829,
      "eval_steps_per_second": 0.007,
      "eval_wer": 21.301693985323368,
      "step": 32
    },
    {
      "epoch": 6.05,
      "eval_loss": 0.4736975133419037,
      "eval_runtime": 287.4266,
      "eval_samples_per_second": 0.779,
      "eval_steps_per_second": 0.007,
      "eval_wer": 31.82909265482477,
      "step": 40
    },
    {
      "epoch": 7.075,
      "eval_loss": 0.4601687788963318,
      "eval_runtime": 256.8801,
      "eval_samples_per_second": 0.872,
      "eval_steps_per_second": 0.008,
      "eval_wer": 17.399355325423496,
      "step": 48
    },
    {
      "epoch": 9.025,
      "eval_loss": 0.45703616738319397,
      "eval_runtime": 256.3162,
      "eval_samples_per_second": 0.874,
      "eval_steps_per_second": 0.008,
      "eval_wer": 18.105754063507305,
      "step": 56
    },
    {
      "epoch": 10.05,
      "eval_loss": 0.4644158184528351,
      "eval_runtime": 256.0296,
      "eval_samples_per_second": 0.875,
      "eval_steps_per_second": 0.008,
      "eval_wer": 18.318359508950003,
      "step": 64
    },
    {
      "epoch": 11.075,
      "eval_loss": 0.46031108498573303,
      "eval_runtime": 256.8198,
      "eval_samples_per_second": 0.872,
      "eval_steps_per_second": 0.008,
      "eval_wer": 18.215485906316438,
      "step": 72
    },
    {
      "epoch": 13.025,
      "eval_loss": 0.4596288502216339,
      "eval_runtime": 254.7423,
      "eval_samples_per_second": 0.879,
      "eval_steps_per_second": 0.008,
      "eval_wer": 18.14690350456073,
      "step": 80
    },
    {
      "epoch": 13.025,
      "step": 80,
      "total_flos": 3.4790381338471956e+19,
      "train_loss": 0.15404987335205078,
      "train_runtime": 3979.2182,
      "train_samples_per_second": 2.573,
      "train_steps_per_second": 0.02
    }
  ],
  "logging_steps": 500,
  "max_steps": 80,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 9223372036854775807,
  "save_steps": 8,
  "total_flos": 3.4790381338471956e+19,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}