File size: 1,695 Bytes
ab808b0
 
 
 
 
2cbfd51
ab808b0
 
 
 
 
2cbfd51
 
ab808b0
2cbfd51
ab808b0
 
 
2cbfd51
 
ab808b0
2cbfd51
ab808b0
 
 
2cbfd51
 
ab808b0
2cbfd51
ab808b0
 
 
2cbfd51
 
ab808b0
2cbfd51
ab808b0
 
 
2cbfd51
 
ab808b0
2cbfd51
ab808b0
 
 
2cbfd51
 
ab808b0
2cbfd51
ab808b0
 
 
 
2cbfd51
ab808b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2cbfd51
ab808b0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1656,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9057971014492754,
      "grad_norm": 1.3455069065093994,
      "learning_rate": 0.0002,
      "loss": 0.9137,
      "step": 250
    },
    {
      "epoch": 1.8115942028985508,
      "grad_norm": 0.6821095943450928,
      "learning_rate": 0.0002,
      "loss": 0.4615,
      "step": 500
    },
    {
      "epoch": 2.717391304347826,
      "grad_norm": 1.3968886137008667,
      "learning_rate": 0.0002,
      "loss": 0.3043,
      "step": 750
    },
    {
      "epoch": 3.6231884057971016,
      "grad_norm": 0.9846513271331787,
      "learning_rate": 0.0002,
      "loss": 0.2248,
      "step": 1000
    },
    {
      "epoch": 4.528985507246377,
      "grad_norm": 0.889771580696106,
      "learning_rate": 0.0002,
      "loss": 0.1835,
      "step": 1250
    },
    {
      "epoch": 5.434782608695652,
      "grad_norm": 0.5584134459495544,
      "learning_rate": 0.0002,
      "loss": 0.1685,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1656,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.1047252202307584e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}