File size: 2,306 Bytes
3a3b1b6
be6bc28
d07a7e7
 
3a3b1b6
d07a7e7
3a3b1b6
 
 
 
 
 
 
d07a7e7
3a3b1b6
 
 
 
be6bc28
 
 
 
3a3b1b6
e5c5089
 
 
 
be6bc28
e5c5089
 
 
 
be6bc28
 
 
 
e5c5089
d68298f
 
 
 
be6bc28
d68298f
 
 
 
be6bc28
 
 
 
d68298f
d27a2cf
 
 
 
be6bc28
d27a2cf
 
 
 
be6bc28
 
 
 
d27a2cf
4d3928e
 
 
 
be6bc28
4d3928e
 
 
 
be6bc28
 
 
 
4d3928e
f51b3be
 
 
 
be6bc28
f51b3be
 
 
 
be6bc28
 
 
 
f51b3be
3a3b1b6
 
 
 
 
 
d07a7e7
3a3b1b6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
{
  "best_metric": 2.474956512451172,
  "best_model_checkpoint": "./outputs/checkpoint-600",
  "epoch": 0.4371584699453552,
  "eval_steps": 100,
  "global_step": 600,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.07,
      "learning_rate": 0.0002,
      "loss": 2.7401,
      "step": 100
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.6424834728240967,
      "eval_runtime": 206.3704,
      "eval_samples_per_second": 30.402,
      "eval_steps_per_second": 3.804,
      "step": 100
    },
    {
      "epoch": 0.15,
      "learning_rate": 0.0002,
      "loss": 2.6053,
      "step": 200
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.592982053756714,
      "eval_runtime": 206.2608,
      "eval_samples_per_second": 30.418,
      "eval_steps_per_second": 3.806,
      "step": 200
    },
    {
      "epoch": 0.22,
      "learning_rate": 0.0002,
      "loss": 2.5637,
      "step": 300
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.558687448501587,
      "eval_runtime": 206.6302,
      "eval_samples_per_second": 30.363,
      "eval_steps_per_second": 3.799,
      "step": 300
    },
    {
      "epoch": 0.29,
      "learning_rate": 0.0002,
      "loss": 2.5384,
      "step": 400
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.5261261463165283,
      "eval_runtime": 206.3988,
      "eval_samples_per_second": 30.397,
      "eval_steps_per_second": 3.803,
      "step": 400
    },
    {
      "epoch": 0.36,
      "learning_rate": 0.0002,
      "loss": 2.4965,
      "step": 500
    },
    {
      "epoch": 0.36,
      "eval_loss": 2.5020904541015625,
      "eval_runtime": 273.7006,
      "eval_samples_per_second": 22.923,
      "eval_steps_per_second": 2.868,
      "step": 500
    },
    {
      "epoch": 0.44,
      "learning_rate": 0.0002,
      "loss": 2.4706,
      "step": 600
    },
    {
      "epoch": 0.44,
      "eval_loss": 2.474956512451172,
      "eval_runtime": 206.4725,
      "eval_samples_per_second": 30.387,
      "eval_steps_per_second": 3.802,
      "step": 600
    }
  ],
  "logging_steps": 100,
  "max_steps": 4116,
  "num_train_epochs": 3,
  "save_steps": 100,
  "total_flos": 1.752346796949504e+16,
  "trial_name": null,
  "trial_params": null
}