File size: 3,187 Bytes
0c3eda2
 
 
14109f8
0c3eda2
14109f8
0c3eda2
 
 
 
 
97a3a05
314ba90
0c3eda2
97a3a05
0c3eda2
 
 
97a3a05
 
314ba90
 
 
0c3eda2
 
 
97a3a05
314ba90
0c3eda2
97a3a05
0c3eda2
 
 
97a3a05
314ba90
0c3eda2
314ba90
0c3eda2
 
 
97a3a05
314ba90
 
 
 
0c3eda2
685fde8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2aa8f3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14109f8
 
 
 
 
 
 
0c3eda2
 
 
 
 
 
 
 
 
 
 
 
 
 
14109f8
0c3eda2
 
 
 
14109f8
0c3eda2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.02702702702702703,
  "eval_steps": 3,
  "global_step": 10,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.002702702702702703,
      "grad_norm": 0.2378644198179245,
      "learning_rate": 2e-05,
      "loss": 2.4235,
      "step": 1
    },
    {
      "epoch": 0.002702702702702703,
      "eval_loss": 2.200819492340088,
      "eval_runtime": 23.968,
      "eval_samples_per_second": 3.254,
      "eval_steps_per_second": 3.254,
      "step": 1
    },
    {
      "epoch": 0.005405405405405406,
      "grad_norm": 0.2817547917366028,
      "learning_rate": 4e-05,
      "loss": 2.2166,
      "step": 2
    },
    {
      "epoch": 0.008108108108108109,
      "grad_norm": 0.22646959125995636,
      "learning_rate": 6e-05,
      "loss": 2.6545,
      "step": 3
    },
    {
      "epoch": 0.008108108108108109,
      "eval_loss": 2.1994752883911133,
      "eval_runtime": 23.8826,
      "eval_samples_per_second": 3.266,
      "eval_steps_per_second": 3.266,
      "step": 3
    },
    {
      "epoch": 0.010810810810810811,
      "grad_norm": 0.25837552547454834,
      "learning_rate": 8e-05,
      "loss": 2.625,
      "step": 4
    },
    {
      "epoch": 0.013513513513513514,
      "grad_norm": 0.3031831681728363,
      "learning_rate": 0.0001,
      "loss": 2.0932,
      "step": 5
    },
    {
      "epoch": 0.016216216216216217,
      "grad_norm": 0.24016548693180084,
      "learning_rate": 0.00012,
      "loss": 2.3491,
      "step": 6
    },
    {
      "epoch": 0.016216216216216217,
      "eval_loss": 2.193232774734497,
      "eval_runtime": 23.9312,
      "eval_samples_per_second": 3.259,
      "eval_steps_per_second": 3.259,
      "step": 6
    },
    {
      "epoch": 0.01891891891891892,
      "grad_norm": 0.24453988671302795,
      "learning_rate": 0.00014,
      "loss": 2.0364,
      "step": 7
    },
    {
      "epoch": 0.021621621621621623,
      "grad_norm": 0.3633563816547394,
      "learning_rate": 0.00016,
      "loss": 2.0773,
      "step": 8
    },
    {
      "epoch": 0.024324324324324326,
      "grad_norm": 0.29696428775787354,
      "learning_rate": 0.00018,
      "loss": 2.3294,
      "step": 9
    },
    {
      "epoch": 0.024324324324324326,
      "eval_loss": 2.1837782859802246,
      "eval_runtime": 24.0965,
      "eval_samples_per_second": 3.237,
      "eval_steps_per_second": 3.237,
      "step": 9
    },
    {
      "epoch": 0.02702702702702703,
      "grad_norm": 0.26503047347068787,
      "learning_rate": 0.0002,
      "loss": 2.1022,
      "step": 10
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3467932965273600.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}