File size: 2,307 Bytes
3a3b1b6
d07a7e7
 
 
3a3b1b6
d07a7e7
3a3b1b6
 
 
 
 
 
 
d07a7e7
3a3b1b6
 
 
 
d07a7e7
 
 
 
3a3b1b6
e5c5089
 
 
 
d07a7e7
e5c5089
 
 
 
d07a7e7
 
 
 
e5c5089
d68298f
 
 
 
d07a7e7
d68298f
 
 
 
d07a7e7
 
 
 
d68298f
d27a2cf
 
 
 
d07a7e7
d27a2cf
 
 
 
d07a7e7
 
 
 
d27a2cf
4d3928e
 
 
 
d07a7e7
4d3928e
 
 
 
d07a7e7
 
 
 
4d3928e
f51b3be
 
 
 
d07a7e7
f51b3be
 
 
 
d07a7e7
 
 
 
f51b3be
3a3b1b6
 
 
 
 
 
d07a7e7
3a3b1b6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
{
  "best_metric": 2.475057601928711,
  "best_model_checkpoint": "./outputs/checkpoint-600",
  "epoch": 0.4371584699453552,
  "eval_steps": 100,
  "global_step": 600,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.07,
      "learning_rate": 0.0002,
      "loss": 2.7401,
      "step": 100
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.6424810886383057,
      "eval_runtime": 206.4108,
      "eval_samples_per_second": 30.396,
      "eval_steps_per_second": 3.803,
      "step": 100
    },
    {
      "epoch": 0.15,
      "learning_rate": 0.0002,
      "loss": 2.6061,
      "step": 200
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.5929574966430664,
      "eval_runtime": 206.2177,
      "eval_samples_per_second": 30.424,
      "eval_steps_per_second": 3.807,
      "step": 200
    },
    {
      "epoch": 0.22,
      "learning_rate": 0.0002,
      "loss": 2.5643,
      "step": 300
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.5578970909118652,
      "eval_runtime": 206.509,
      "eval_samples_per_second": 30.381,
      "eval_steps_per_second": 3.801,
      "step": 300
    },
    {
      "epoch": 0.29,
      "learning_rate": 0.0002,
      "loss": 2.5383,
      "step": 400
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.5257716178894043,
      "eval_runtime": 206.1349,
      "eval_samples_per_second": 30.436,
      "eval_steps_per_second": 3.808,
      "step": 400
    },
    {
      "epoch": 0.36,
      "learning_rate": 0.0002,
      "loss": 2.4959,
      "step": 500
    },
    {
      "epoch": 0.36,
      "eval_loss": 2.5014102458953857,
      "eval_runtime": 282.6461,
      "eval_samples_per_second": 22.197,
      "eval_steps_per_second": 2.777,
      "step": 500
    },
    {
      "epoch": 0.44,
      "learning_rate": 0.0002,
      "loss": 2.4693,
      "step": 600
    },
    {
      "epoch": 0.44,
      "eval_loss": 2.475057601928711,
      "eval_runtime": 205.7759,
      "eval_samples_per_second": 30.489,
      "eval_steps_per_second": 3.815,
      "step": 600
    }
  ],
  "logging_steps": 100,
  "max_steps": 4116,
  "num_train_epochs": 3,
  "save_steps": 100,
  "total_flos": 1.752346796949504e+16,
  "trial_name": null,
  "trial_params": null
}