File size: 3,183 Bytes
0c3eda2
 
 
97a3a05
0c3eda2
bd1ad57
0c3eda2
 
 
 
 
97a3a05
 
0c3eda2
97a3a05
0c3eda2
 
 
97a3a05
 
 
 
 
0c3eda2
 
 
97a3a05
 
0c3eda2
97a3a05
0c3eda2
 
 
97a3a05
 
0c3eda2
97a3a05
0c3eda2
 
 
97a3a05
 
 
 
 
0c3eda2
dd06f81
 
97a3a05
 
dd06f81
97a3a05
dd06f81
 
 
97a3a05
 
dd06f81
97a3a05
dd06f81
 
 
97a3a05
 
dd06f81
97a3a05
dd06f81
 
 
97a3a05
 
 
 
 
dd06f81
5fdc735
 
97a3a05
 
5fdc735
97a3a05
5fdc735
 
 
97a3a05
 
5fdc735
97a3a05
5fdc735
 
 
97a3a05
 
5fdc735
97a3a05
5fdc735
 
 
97a3a05
 
 
 
 
5fdc735
bd1ad57
 
97a3a05
 
bd1ad57
97a3a05
bd1ad57
0c3eda2
 
 
 
 
 
 
 
 
 
 
 
 
 
bd1ad57
0c3eda2
 
 
 
97a3a05
0c3eda2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.02702702702702703,
  "eval_steps": 3,
  "global_step": 10,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.002702702702702703,
      "grad_norm": 0.22516648471355438,
      "learning_rate": 2e-05,
      "loss": 2.4235,
      "step": 1
    },
    {
      "epoch": 0.002702702702702703,
      "eval_loss": 2.200819492340088,
      "eval_runtime": 8.0954,
      "eval_samples_per_second": 9.635,
      "eval_steps_per_second": 9.635,
      "step": 1
    },
    {
      "epoch": 0.005405405405405406,
      "grad_norm": 0.2626628577709198,
      "learning_rate": 4e-05,
      "loss": 2.2166,
      "step": 2
    },
    {
      "epoch": 0.008108108108108109,
      "grad_norm": 0.21446576714515686,
      "learning_rate": 6e-05,
      "loss": 2.6546,
      "step": 3
    },
    {
      "epoch": 0.008108108108108109,
      "eval_loss": 2.199615955352783,
      "eval_runtime": 8.2871,
      "eval_samples_per_second": 9.412,
      "eval_steps_per_second": 9.412,
      "step": 3
    },
    {
      "epoch": 0.010810810810810811,
      "grad_norm": 0.2510293424129486,
      "learning_rate": 8e-05,
      "loss": 2.6251,
      "step": 4
    },
    {
      "epoch": 0.013513513513513514,
      "grad_norm": 0.2600695788860321,
      "learning_rate": 0.0001,
      "loss": 2.0934,
      "step": 5
    },
    {
      "epoch": 0.016216216216216217,
      "grad_norm": 0.23503589630126953,
      "learning_rate": 0.00012,
      "loss": 2.3495,
      "step": 6
    },
    {
      "epoch": 0.016216216216216217,
      "eval_loss": 2.193514585494995,
      "eval_runtime": 8.4696,
      "eval_samples_per_second": 9.209,
      "eval_steps_per_second": 9.209,
      "step": 6
    },
    {
      "epoch": 0.01891891891891892,
      "grad_norm": 0.23834799230098724,
      "learning_rate": 0.00014,
      "loss": 2.0365,
      "step": 7
    },
    {
      "epoch": 0.021621621621621623,
      "grad_norm": 0.34777796268463135,
      "learning_rate": 0.00016,
      "loss": 2.0777,
      "step": 8
    },
    {
      "epoch": 0.024324324324324326,
      "grad_norm": 0.2863074839115143,
      "learning_rate": 0.00018,
      "loss": 2.3295,
      "step": 9
    },
    {
      "epoch": 0.024324324324324326,
      "eval_loss": 2.1840600967407227,
      "eval_runtime": 8.339,
      "eval_samples_per_second": 9.354,
      "eval_steps_per_second": 9.354,
      "step": 9
    },
    {
      "epoch": 0.02702702702702703,
      "grad_norm": 0.25496789813041687,
      "learning_rate": 0.0002,
      "loss": 2.1022,
      "step": 10
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3467932965273600.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}