File size: 2,333 Bytes
c3e763a
 
 
1b7aafe
c3e763a
1b7aafe
c3e763a
 
 
 
 
 
0cfa448
c3e763a
 
 
 
 
 
 
0cfa448
 
 
c3e763a
 
 
 
0cfa448
c3e763a
 
 
 
 
 
0cfa448
c3e763a
0cfa448
c3e763a
 
 
 
0cfa448
 
 
 
c3e763a
1b7aafe
 
 
0cfa448
1b7aafe
0cfa448
1b7aafe
 
 
 
0cfa448
1b7aafe
0cfa448
1b7aafe
 
 
 
0cfa448
1b7aafe
0cfa448
1b7aafe
 
 
 
0cfa448
 
 
 
1b7aafe
c3e763a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b7aafe
c3e763a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.013036393264530146,
  "eval_steps": 3,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0021727322107550242,
      "grad_norm": 38.3082275390625,
      "learning_rate": 2e-05,
      "loss": 26.2925,
      "step": 1
    },
    {
      "epoch": 0.0021727322107550242,
      "eval_loss": 6.362123489379883,
      "eval_runtime": 69.9182,
      "eval_samples_per_second": 2.775,
      "eval_steps_per_second": 1.387,
      "step": 1
    },
    {
      "epoch": 0.0043454644215100485,
      "grad_norm": 34.35226821899414,
      "learning_rate": 4e-05,
      "loss": 23.3629,
      "step": 2
    },
    {
      "epoch": 0.006518196632265073,
      "grad_norm": 46.58251953125,
      "learning_rate": 6e-05,
      "loss": 31.458,
      "step": 3
    },
    {
      "epoch": 0.006518196632265073,
      "eval_loss": 6.052692413330078,
      "eval_runtime": 69.8071,
      "eval_samples_per_second": 2.779,
      "eval_steps_per_second": 1.39,
      "step": 3
    },
    {
      "epoch": 0.008690928843020097,
      "grad_norm": 34.892906188964844,
      "learning_rate": 8e-05,
      "loss": 22.7088,
      "step": 4
    },
    {
      "epoch": 0.010863661053775122,
      "grad_norm": 27.14177703857422,
      "learning_rate": 0.0001,
      "loss": 19.6406,
      "step": 5
    },
    {
      "epoch": 0.013036393264530146,
      "grad_norm": 33.119747161865234,
      "learning_rate": 0.00012,
      "loss": 24.4518,
      "step": 6
    },
    {
      "epoch": 0.013036393264530146,
      "eval_loss": 4.772887706756592,
      "eval_runtime": 70.422,
      "eval_samples_per_second": 2.755,
      "eval_steps_per_second": 1.377,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1081911388667904.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}