File size: 2,359 Bytes
cd1ce5e
 
 
ceafa83
cd1ce5e
ceafa83
cd1ce5e
 
 
 
 
 
7cc434b
cd1ce5e
 
 
 
 
 
 
7cc434b
 
 
cd1ce5e
 
 
 
7cc434b
cd1ce5e
 
 
 
 
 
7cc434b
cd1ce5e
7cc434b
cd1ce5e
 
 
 
7cc434b
 
 
 
cd1ce5e
ceafa83
 
 
7cc434b
ceafa83
7cc434b
ceafa83
 
 
 
7cc434b
ceafa83
7cc434b
ceafa83
 
 
 
7cc434b
ceafa83
7cc434b
ceafa83
 
 
 
7cc434b
 
 
 
ceafa83
cd1ce5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceafa83
cd1ce5e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 8.899535370090886e-05,
  "eval_steps": 3,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.4832558950151478e-05,
      "grad_norm": 1032.5780029296875,
      "learning_rate": 2e-05,
      "loss": 95.6979,
      "step": 1
    },
    {
      "epoch": 1.4832558950151478e-05,
      "eval_loss": 23.534069061279297,
      "eval_runtime": 1979.9599,
      "eval_samples_per_second": 14.337,
      "eval_steps_per_second": 7.169,
      "step": 1
    },
    {
      "epoch": 2.9665117900302956e-05,
      "grad_norm": 1325.1285400390625,
      "learning_rate": 4e-05,
      "loss": 96.301,
      "step": 2
    },
    {
      "epoch": 4.449767685045443e-05,
      "grad_norm": 1420.2266845703125,
      "learning_rate": 6e-05,
      "loss": 95.2096,
      "step": 3
    },
    {
      "epoch": 4.449767685045443e-05,
      "eval_loss": 23.52667999267578,
      "eval_runtime": 1953.433,
      "eval_samples_per_second": 14.532,
      "eval_steps_per_second": 7.266,
      "step": 3
    },
    {
      "epoch": 5.933023580060591e-05,
      "grad_norm": 1357.5682373046875,
      "learning_rate": 8e-05,
      "loss": 93.3733,
      "step": 4
    },
    {
      "epoch": 7.416279475075739e-05,
      "grad_norm": 1045.089599609375,
      "learning_rate": 0.0001,
      "loss": 94.556,
      "step": 5
    },
    {
      "epoch": 8.899535370090886e-05,
      "grad_norm": 1248.4478759765625,
      "learning_rate": 0.00012,
      "loss": 94.1798,
      "step": 6
    },
    {
      "epoch": 8.899535370090886e-05,
      "eval_loss": 23.505847930908203,
      "eval_runtime": 2012.6089,
      "eval_samples_per_second": 14.105,
      "eval_steps_per_second": 7.053,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 53623740432384.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}