leixa commited on
Commit
f515448
·
verified ·
1 Parent(s): fbcf8e8

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6dec7a09f866490c070952c80d1eae0da3944936d8142a305986d38ea4f369
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eabac952144362e06e5038d22466558ecfc80b1c3460d48ad53cdedadbc92ef
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebefd5a4b2f2e7334933eade895e777f7386020e8121e871cb3022882a1538b5
3
  size 509176980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f15680ec5d975078f2eab9b86ba0790c3f2779737ea0dbc67e51f7cde46cab
3
  size 509176980
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:760b3cc5c54425216789bc22796c36c6292e4beb44015aabaf5769440b06d5c4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b466971bc56e50d45ed53ec8544dcc547c522ab03e3c19ab6c40616db98b9af2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b12f644dca975aafa7cd4cb8ce5e2b576382360a37463a3cc5d512a9fe53213d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1996614c6c53967c1177fd38254c7aeb2afcca06720e7eecf62b90cd631403c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2597402597402597,
5
  "eval_steps": 15,
6
- "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -58,6 +58,49 @@
58
  "eval_samples_per_second": 4.439,
59
  "eval_steps_per_second": 0.595,
60
  "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  ],
63
  "logging_steps": 3,
@@ -77,7 +120,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 7.72809336815616e+16,
81
  "train_batch_size": 8,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5194805194805194,
5
  "eval_steps": 15,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
58
  "eval_samples_per_second": 4.439,
59
  "eval_steps_per_second": 0.595,
60
  "step": 15
61
+ },
62
+ {
63
+ "epoch": 0.3116883116883117,
64
+ "grad_norm": 0.7993184924125671,
65
+ "learning_rate": 9.940682350363912e-05,
66
+ "loss": 2.5783,
67
+ "step": 18
68
+ },
69
+ {
70
+ "epoch": 0.36363636363636365,
71
+ "grad_norm": 0.88717120885849,
72
+ "learning_rate": 9.888050389939172e-05,
73
+ "loss": 3.0288,
74
+ "step": 21
75
+ },
76
+ {
77
+ "epoch": 0.4155844155844156,
78
+ "grad_norm": 0.9446913003921509,
79
+ "learning_rate": 9.819081075450014e-05,
80
+ "loss": 2.6752,
81
+ "step": 24
82
+ },
83
+ {
84
+ "epoch": 0.4675324675324675,
85
+ "grad_norm": 0.7598215937614441,
86
+ "learning_rate": 9.734004923364257e-05,
87
+ "loss": 2.6636,
88
+ "step": 27
89
+ },
90
+ {
91
+ "epoch": 0.5194805194805194,
92
+ "grad_norm": 0.9138664603233337,
93
+ "learning_rate": 9.63310628410961e-05,
94
+ "loss": 2.6451,
95
+ "step": 30
96
+ },
97
+ {
98
+ "epoch": 0.5194805194805194,
99
+ "eval_loss": 0.6947051286697388,
100
+ "eval_runtime": 21.8569,
101
+ "eval_samples_per_second": 4.438,
102
+ "eval_steps_per_second": 0.595,
103
+ "step": 30
104
  }
105
  ],
106
  "logging_steps": 3,
 
120
  "attributes": {}
121
  }
122
  },
123
+ "total_flos": 1.545618673631232e+17,
124
  "train_batch_size": 8,
125
  "trial_name": null,
126
  "trial_params": null