VERSIL91 commited on
Commit
8a4be28
1 Parent(s): 98c0dcf

Training in progress, step 12, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:335cb31cbb4f422864d2b05106af654fc50279e92bc67bd1006de6bb8d914df7
3
  size 27024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efe715dfc3c6655975b4c0cb344e7c945494e1463e96c18bf9c77000e7066604
3
  size 27024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd7fe6e9dd385d88d37e6100bc6ead9650910c40bd8e9f5e0c841df05366de41
3
  size 63974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa28aca63d30e6a77fca80f9f9286ec5bb1020314276276e1f34a85650641b30
3
  size 63974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9264abfa6d2cc5145363bf71d2fb16332b1ac4e60642a70827881d9a447c36a4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af82c1f1d4ec999545a290f8875f59e0434314b9e98a3c2553c0c541daf4cdfa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92b4cf85d7ba7a497f88ff799bf4dec5af7dd95be6e00f78bf46ba5deb56bbf8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541ccba4d24c3fd7a3a1bd1b7c1a8ef18368821282014bf91f536020daf3aafe
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.28486646884273,
5
  "eval_steps": 6,
6
- "global_step": 6,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -65,6 +65,56 @@
65
  "eval_samples_per_second": 480.233,
66
  "eval_steps_per_second": 240.117,
67
  "step": 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
  ],
70
  "logging_steps": 1,
@@ -84,7 +134,7 @@
84
  "attributes": {}
85
  }
86
  },
87
- "total_flos": 481676328960.0,
88
  "train_batch_size": 2,
89
  "trial_name": null,
90
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.56973293768546,
5
  "eval_steps": 6,
6
+ "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
65
  "eval_samples_per_second": 480.233,
66
  "eval_steps_per_second": 240.117,
67
  "step": 6
68
+ },
69
+ {
70
+ "epoch": 0.3323442136498516,
71
+ "grad_norm": 0.10505552589893341,
72
+ "learning_rate": 7e-05,
73
+ "loss": 10.3793,
74
+ "step": 7
75
+ },
76
+ {
77
+ "epoch": 0.3798219584569733,
78
+ "grad_norm": 0.10710933804512024,
79
+ "learning_rate": 8e-05,
80
+ "loss": 10.3793,
81
+ "step": 8
82
+ },
83
+ {
84
+ "epoch": 0.42729970326409494,
85
+ "grad_norm": 0.104378342628479,
86
+ "learning_rate": 9e-05,
87
+ "loss": 10.3886,
88
+ "step": 9
89
+ },
90
+ {
91
+ "epoch": 0.47477744807121663,
92
+ "grad_norm": 0.1055062860250473,
93
+ "learning_rate": 0.0001,
94
+ "loss": 10.3806,
95
+ "step": 10
96
+ },
97
+ {
98
+ "epoch": 0.5222551928783383,
99
+ "grad_norm": 0.11344417184591293,
100
+ "learning_rate": 9.829629131445342e-05,
101
+ "loss": 10.3823,
102
+ "step": 11
103
+ },
104
+ {
105
+ "epoch": 0.56973293768546,
106
+ "grad_norm": 0.10140898078680038,
107
+ "learning_rate": 9.330127018922194e-05,
108
+ "loss": 10.3856,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 0.56973293768546,
113
+ "eval_loss": 10.380664825439453,
114
+ "eval_runtime": 0.0723,
115
+ "eval_samples_per_second": 497.765,
116
+ "eval_steps_per_second": 248.882,
117
+ "step": 12
118
  }
119
  ],
120
  "logging_steps": 1,
 
134
  "attributes": {}
135
  }
136
  },
137
+ "total_flos": 966585384960.0,
138
  "train_batch_size": 2,
139
  "trial_name": null,
140
  "trial_params": null