RodrigoSalazar-U commited on
Commit
4903096
1 Parent(s): 9092f77

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dbcd6e3a8591bfaed32d8abcf25a9918694b23aa2ff2e871d72f30f3cfd28de
3
  size 4785762744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c1e8afc5d6e724647aa6282eea420a5674252560cfe3ca227d9079c5ab3503
3
  size 4785762744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:262612171054ff07dc873ba588a92aaaca5297f604de6fbcc2b57a0efda9c97b
3
  size 3497859804
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:925357869e6ba85e6c835b49837b1223de96991a316b48a2f12c8541300e39be
3
  size 3497859804
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c065ee50c4698f410f7f4a607a3fd9ca80c14452c25d8cb68381d6bcdc56f10
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8062f178cdbb1c5d808c1d4d26e9f0f29c4b409a65e50547129df87250075971
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2752546105147261,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -42,6 +42,41 @@
42
  "learning_rate": 9.09422799766929e-05,
43
  "loss": 1.5134,
44
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
  ],
47
  "logging_steps": 100,
@@ -61,7 +96,7 @@
61
  "attributes": {}
62
  }
63
  },
64
- "total_flos": 2.650241834511237e+17,
65
  "train_batch_size": 16,
66
  "trial_name": null,
67
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5505092210294522,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
42
  "learning_rate": 9.09422799766929e-05,
43
  "loss": 1.5134,
44
  "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.33030553261767137,
48
+ "grad_norm": 2.3742384910583496,
49
+ "learning_rate": 8.47037097610317e-05,
50
+ "loss": 1.4548,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.3853564547206166,
55
+ "grad_norm": 2.96374773979187,
56
+ "learning_rate": 7.718624920225358e-05,
57
+ "loss": 1.4309,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.4404073768235618,
62
+ "grad_norm": 2.140960693359375,
63
+ "learning_rate": 6.866692942410824e-05,
64
+ "loss": 1.4181,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.495458298926507,
69
+ "grad_norm": 1.9198336601257324,
70
+ "learning_rate": 5.9459701755408125e-05,
71
+ "loss": 1.3413,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.5505092210294522,
76
+ "grad_norm": 1.8933072090148926,
77
+ "learning_rate": 4.9903868093081854e-05,
78
+ "loss": 1.3367,
79
+ "step": 1000
80
  }
81
  ],
82
  "logging_steps": 100,
 
96
  "attributes": {}
97
  }
98
  },
99
+ "total_flos": 5.293567140023501e+17,
100
  "train_batch_size": 16,
101
  "trial_name": null,
102
  "trial_params": null