File size: 2,335 Bytes
0c3eda2
 
 
bb017bb
0c3eda2
dd06f81
0c3eda2
 
 
 
 
bb017bb
 
0c3eda2
bb017bb
0c3eda2
 
 
bb017bb
 
 
 
 
0c3eda2
 
 
bb017bb
 
0c3eda2
bb017bb
0c3eda2
 
 
bb017bb
 
0c3eda2
bb017bb
0c3eda2
 
 
bb017bb
 
 
 
 
0c3eda2
dd06f81
 
bb017bb
 
dd06f81
bb017bb
dd06f81
 
 
bb017bb
 
dd06f81
bb017bb
dd06f81
 
 
bb017bb
 
dd06f81
bb017bb
dd06f81
 
 
bb017bb
 
 
 
 
dd06f81
0c3eda2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb017bb
0c3eda2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.016216216216216217,
  "eval_steps": 3,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.002702702702702703,
      "grad_norm": 0.22516648471355438,
      "learning_rate": 2e-05,
      "loss": 2.4235,
      "step": 1
    },
    {
      "epoch": 0.002702702702702703,
      "eval_loss": 2.200819492340088,
      "eval_runtime": 8.0954,
      "eval_samples_per_second": 9.635,
      "eval_steps_per_second": 9.635,
      "step": 1
    },
    {
      "epoch": 0.005405405405405406,
      "grad_norm": 0.2626628577709198,
      "learning_rate": 4e-05,
      "loss": 2.2166,
      "step": 2
    },
    {
      "epoch": 0.008108108108108109,
      "grad_norm": 0.21446576714515686,
      "learning_rate": 6e-05,
      "loss": 2.6546,
      "step": 3
    },
    {
      "epoch": 0.008108108108108109,
      "eval_loss": 2.199615955352783,
      "eval_runtime": 8.2871,
      "eval_samples_per_second": 9.412,
      "eval_steps_per_second": 9.412,
      "step": 3
    },
    {
      "epoch": 0.010810810810810811,
      "grad_norm": 0.2510293424129486,
      "learning_rate": 8e-05,
      "loss": 2.6251,
      "step": 4
    },
    {
      "epoch": 0.013513513513513514,
      "grad_norm": 0.2600695788860321,
      "learning_rate": 0.0001,
      "loss": 2.0934,
      "step": 5
    },
    {
      "epoch": 0.016216216216216217,
      "grad_norm": 0.23503589630126953,
      "learning_rate": 0.00012,
      "loss": 2.3495,
      "step": 6
    },
    {
      "epoch": 0.016216216216216217,
      "eval_loss": 2.193514585494995,
      "eval_runtime": 8.4696,
      "eval_samples_per_second": 9.209,
      "eval_steps_per_second": 9.209,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2034520672960512.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}