leixa commited on
Commit
a56e673
·
verified ·
1 Parent(s): 82c9f08

Training in progress, step 378, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abb56d38a89e7b62a7798d623f95ec7bceb0800b1e0e250ab37ccd5fc69a012a
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ababc76d414a3afda1e50007a605bd4376359db0b794d4bc845165b4cecd9b
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e53496acb3ee81f29db8e8dd644eb76ba4b47ccf9f2ab343ac4a3ae57576051b
3
  size 102864868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3023e0d5ddef1baf9cb4c5e49655a72087a416ca4f489e97d5bc70ad963d8b1e
3
  size 102864868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:207840526dea4fa9627489a495e4077d47d6217bc1c29fded5fdb4cce6503140
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3a22211f7dda4b0e0f527fa4802e452b6112cd704d0429874313a62a33ed2b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10affc9ced28dcfaf0d40e3497a97c8e7416bd057324538f99a7e1756fd84408
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac207b57c6cefba3838e335ba7ebf320ffdaee8162f1c0afc72ea9ad9f0725f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07043286867204696,
5
  "eval_steps": 42,
6
- "global_step": 336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -863,6 +863,112 @@
863
  "eval_samples_per_second": 48.692,
864
  "eval_steps_per_second": 6.09,
865
  "step": 336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  }
867
  ],
868
  "logging_steps": 3,
@@ -882,7 +988,7 @@
882
  "attributes": {}
883
  }
884
  },
885
- "total_flos": 7.167376784306995e+16,
886
  "train_batch_size": 8,
887
  "trial_name": null,
888
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07923697725605282,
5
  "eval_steps": 42,
6
+ "global_step": 378,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
863
  "eval_samples_per_second": 48.692,
864
  "eval_steps_per_second": 6.09,
865
  "step": 336
866
+ },
867
+ {
868
+ "epoch": 0.07106173357090452,
869
+ "grad_norm": 0.4022517502307892,
870
+ "learning_rate": 2.43550361297047e-05,
871
+ "loss": 1.0749,
872
+ "step": 339
873
+ },
874
+ {
875
+ "epoch": 0.07169059846976208,
876
+ "grad_norm": 0.35164332389831543,
877
+ "learning_rate": 2.353425010381063e-05,
878
+ "loss": 1.0352,
879
+ "step": 342
880
+ },
881
+ {
882
+ "epoch": 0.07231946336861964,
883
+ "grad_norm": 0.40566059947013855,
884
+ "learning_rate": 2.272325493947257e-05,
885
+ "loss": 1.0625,
886
+ "step": 345
887
+ },
888
+ {
889
+ "epoch": 0.0729483282674772,
890
+ "grad_norm": 0.37188711762428284,
891
+ "learning_rate": 2.192235065998126e-05,
892
+ "loss": 1.072,
893
+ "step": 348
894
+ },
895
+ {
896
+ "epoch": 0.07357719316633476,
897
+ "grad_norm": 0.3737729489803314,
898
+ "learning_rate": 2.1131833555559037e-05,
899
+ "loss": 1.0546,
900
+ "step": 351
901
+ },
902
+ {
903
+ "epoch": 0.07420605806519233,
904
+ "grad_norm": 0.35681530833244324,
905
+ "learning_rate": 2.0351996073748713e-05,
906
+ "loss": 1.0669,
907
+ "step": 354
908
+ },
909
+ {
910
+ "epoch": 0.0748349229640499,
911
+ "grad_norm": 0.3745366334915161,
912
+ "learning_rate": 1.9583126711224343e-05,
913
+ "loss": 1.0731,
914
+ "step": 357
915
+ },
916
+ {
917
+ "epoch": 0.07546378786290746,
918
+ "grad_norm": 0.3998556137084961,
919
+ "learning_rate": 1.8825509907063327e-05,
920
+ "loss": 1.1069,
921
+ "step": 360
922
+ },
923
+ {
924
+ "epoch": 0.07609265276176501,
925
+ "grad_norm": 0.4358106255531311,
926
+ "learning_rate": 1.807942593751973e-05,
927
+ "loss": 1.0876,
928
+ "step": 363
929
+ },
930
+ {
931
+ "epoch": 0.07672151766062257,
932
+ "grad_norm": 0.3841058313846588,
933
+ "learning_rate": 1.7345150812337564e-05,
934
+ "loss": 1.0822,
935
+ "step": 366
936
+ },
937
+ {
938
+ "epoch": 0.07735038255948014,
939
+ "grad_norm": 0.4276648759841919,
940
+ "learning_rate": 1.66229561726426e-05,
941
+ "loss": 1.0894,
942
+ "step": 369
943
+ },
944
+ {
945
+ "epoch": 0.0779792474583377,
946
+ "grad_norm": 0.40756258368492126,
947
+ "learning_rate": 1.5913109190450032e-05,
948
+ "loss": 1.0673,
949
+ "step": 372
950
+ },
951
+ {
952
+ "epoch": 0.07860811235719527,
953
+ "grad_norm": 0.34232285618782043,
954
+ "learning_rate": 1.5215872469825682e-05,
955
+ "loss": 1.0659,
956
+ "step": 375
957
+ },
958
+ {
959
+ "epoch": 0.07923697725605282,
960
+ "grad_norm": 0.35964226722717285,
961
+ "learning_rate": 1.4531503949737108e-05,
962
+ "loss": 1.0067,
963
+ "step": 378
964
+ },
965
+ {
966
+ "epoch": 0.07923697725605282,
967
+ "eval_loss": 1.0661753416061401,
968
+ "eval_runtime": 165.0357,
969
+ "eval_samples_per_second": 48.686,
970
+ "eval_steps_per_second": 6.09,
971
+ "step": 378
972
  }
973
  ],
974
  "logging_steps": 3,
 
988
  "attributes": {}
989
  }
990
  },
991
+ "total_flos": 8.06329888234537e+16,
992
  "train_batch_size": 8,
993
  "trial_name": null,
994
  "trial_params": null