sedrickkeh commited on
Commit
9f4b16e
1 Parent(s): 52a29c3

Training in progress, epoch 0

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:469a98fbc6896c783d21345cf47acbe97a753d3fb3ddbcd50e7ebfbce6f0f52d
3
  size 4903351912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38e348bfbb2ec3a0da3ff57e390c5da768660ad08e48b5701283e0ab59522c9
3
  size 4903351912
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50fd02c3d56456a8fcb35def311d083e600956628065f7dbc7e6f06e8703b9ee
3
  size 4947570872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6666343f113fc44ea86be205ed643aa1f4f257df0c37ecf57835224cd4e3f475
3
  size 4947570872
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38284cd6ec70205e8fbbdf52927470e82a119fab94c91e85fbdb0fa0fbb0c45c
3
  size 4962221464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ce4d338162ea7672300255cb65aa8910baeb0fe0b23c54c992d6e5686bbacb
3
  size 4962221464
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc002edd557284ed00d2e010b52ca0029d2bc118d0c5aa5a8009b558a19b40c4
3
  size 3670322200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf890cc71ce38e73b9b7a769dcc61ce33fa9e38521855babaeba5ffcbb72345a
3
  size 3670322200
tokenizer_config.json CHANGED
@@ -2000,6 +2000,7 @@
2000
  "<end_of_turn>"
2001
  ],
2002
  "bos_token": "<bos>",
 
2003
  "clean_up_tokenization_spaces": false,
2004
  "eos_token": "<eos>",
2005
  "model_max_length": 1000000000000000019884624838656,
 
2000
  "<end_of_turn>"
2001
  ],
2002
  "bos_token": "<bos>",
2003
+ "chat_template": "{{ '<bos>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<start_of_turn>user\n' + content + '<end_of_turn>\n<start_of_turn>model\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<end_of_turn>\n' }}{% endif %}{% endfor %}",
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
2006
  "model_max_length": 1000000000000000019884624838656,
trainer_log.jsonl CHANGED
@@ -1,70 +1,23 @@
1
- {"current_steps": 10, "total_steps": 663, "loss": 0.7438, "learning_rate": 5e-06, "epoch": 0.045133991537376586, "percentage": 1.51, "elapsed_time": "0:15:07", "remaining_time": "16:27:57"}
2
- {"current_steps": 20, "total_steps": 663, "loss": 0.6709, "learning_rate": 5e-06, "epoch": 0.09026798307475317, "percentage": 3.02, "elapsed_time": "0:30:12", "remaining_time": "16:11:26"}
3
- {"current_steps": 30, "total_steps": 663, "loss": 0.6463, "learning_rate": 5e-06, "epoch": 0.13540197461212977, "percentage": 4.52, "elapsed_time": "0:45:18", "remaining_time": "15:55:58"}
4
- {"current_steps": 40, "total_steps": 663, "loss": 0.6389, "learning_rate": 5e-06, "epoch": 0.18053596614950634, "percentage": 6.03, "elapsed_time": "1:00:24", "remaining_time": "15:40:44"}
5
- {"current_steps": 50, "total_steps": 663, "loss": 0.6349, "learning_rate": 5e-06, "epoch": 0.22566995768688294, "percentage": 7.54, "elapsed_time": "1:15:28", "remaining_time": "15:25:22"}
6
- {"current_steps": 60, "total_steps": 663, "loss": 0.6247, "learning_rate": 5e-06, "epoch": 0.27080394922425954, "percentage": 9.05, "elapsed_time": "1:30:31", "remaining_time": "15:09:49"}
7
- {"current_steps": 70, "total_steps": 663, "loss": 0.6227, "learning_rate": 5e-06, "epoch": 0.3159379407616361, "percentage": 10.56, "elapsed_time": "1:45:38", "remaining_time": "14:54:59"}
8
- {"current_steps": 80, "total_steps": 663, "loss": 0.6225, "learning_rate": 5e-06, "epoch": 0.3610719322990127, "percentage": 12.07, "elapsed_time": "2:00:45", "remaining_time": "14:40:03"}
9
- {"current_steps": 90, "total_steps": 663, "loss": 0.6161, "learning_rate": 5e-06, "epoch": 0.40620592383638926, "percentage": 13.57, "elapsed_time": "2:15:50", "remaining_time": "14:24:53"}
10
- {"current_steps": 100, "total_steps": 663, "loss": 0.6118, "learning_rate": 5e-06, "epoch": 0.4513399153737659, "percentage": 15.08, "elapsed_time": "2:30:55", "remaining_time": "14:09:44"}
11
- {"current_steps": 110, "total_steps": 663, "loss": 0.6071, "learning_rate": 5e-06, "epoch": 0.49647390691114246, "percentage": 16.59, "elapsed_time": "2:46:00", "remaining_time": "13:54:35"}
12
- {"current_steps": 120, "total_steps": 663, "loss": 0.6108, "learning_rate": 5e-06, "epoch": 0.5416078984485191, "percentage": 18.1, "elapsed_time": "3:01:05", "remaining_time": "13:39:25"}
13
- {"current_steps": 130, "total_steps": 663, "loss": 0.6056, "learning_rate": 5e-06, "epoch": 0.5867418899858956, "percentage": 19.61, "elapsed_time": "3:16:11", "remaining_time": "13:24:21"}
14
- {"current_steps": 140, "total_steps": 663, "loss": 0.6056, "learning_rate": 5e-06, "epoch": 0.6318758815232722, "percentage": 21.12, "elapsed_time": "3:31:16", "remaining_time": "13:09:15"}
15
- {"current_steps": 150, "total_steps": 663, "loss": 0.6054, "learning_rate": 5e-06, "epoch": 0.6770098730606487, "percentage": 22.62, "elapsed_time": "3:46:21", "remaining_time": "12:54:08"}
16
- {"current_steps": 160, "total_steps": 663, "loss": 0.6031, "learning_rate": 5e-06, "epoch": 0.7221438645980254, "percentage": 24.13, "elapsed_time": "4:01:27", "remaining_time": "12:39:03"}
17
- {"current_steps": 170, "total_steps": 663, "loss": 0.5996, "learning_rate": 5e-06, "epoch": 0.767277856135402, "percentage": 25.64, "elapsed_time": "4:16:30", "remaining_time": "12:23:51"}
18
- {"current_steps": 180, "total_steps": 663, "loss": 0.6003, "learning_rate": 5e-06, "epoch": 0.8124118476727785, "percentage": 27.15, "elapsed_time": "4:31:34", "remaining_time": "12:08:43"}
19
- {"current_steps": 190, "total_steps": 663, "loss": 0.6049, "learning_rate": 5e-06, "epoch": 0.8575458392101551, "percentage": 28.66, "elapsed_time": "4:46:37", "remaining_time": "11:53:33"}
20
- {"current_steps": 200, "total_steps": 663, "loss": 0.5985, "learning_rate": 5e-06, "epoch": 0.9026798307475318, "percentage": 30.17, "elapsed_time": "5:01:43", "remaining_time": "11:38:29"}
21
- {"current_steps": 210, "total_steps": 663, "loss": 0.5975, "learning_rate": 5e-06, "epoch": 0.9478138222849083, "percentage": 31.67, "elapsed_time": "5:16:48", "remaining_time": "11:23:23"}
22
- {"current_steps": 220, "total_steps": 663, "loss": 0.5944, "learning_rate": 5e-06, "epoch": 0.9929478138222849, "percentage": 33.18, "elapsed_time": "5:31:53", "remaining_time": "11:08:18"}
23
- {"current_steps": 221, "total_steps": 663, "eval_loss": 0.5952667593955994, "epoch": 0.9974612129760225, "percentage": 33.33, "elapsed_time": "5:39:46", "remaining_time": "11:19:32"}
24
- {"current_steps": 230, "total_steps": 663, "loss": 0.5957, "learning_rate": 5e-06, "epoch": 1.039210155148096, "percentage": 34.69, "elapsed_time": "5:53:55", "remaining_time": "11:06:18"}
25
- {"current_steps": 240, "total_steps": 663, "loss": 0.5546, "learning_rate": 5e-06, "epoch": 1.0843441466854724, "percentage": 36.2, "elapsed_time": "6:09:01", "remaining_time": "10:50:24"}
26
- {"current_steps": 250, "total_steps": 663, "loss": 0.5576, "learning_rate": 5e-06, "epoch": 1.1294781382228491, "percentage": 37.71, "elapsed_time": "6:24:09", "remaining_time": "10:34:37"}
27
- {"current_steps": 260, "total_steps": 663, "loss": 0.5531, "learning_rate": 5e-06, "epoch": 1.1746121297602257, "percentage": 39.22, "elapsed_time": "6:39:15", "remaining_time": "10:18:51"}
28
- {"current_steps": 270, "total_steps": 663, "loss": 0.5549, "learning_rate": 5e-06, "epoch": 1.2197461212976022, "percentage": 40.72, "elapsed_time": "6:54:21", "remaining_time": "10:03:06"}
29
- {"current_steps": 280, "total_steps": 663, "loss": 0.5572, "learning_rate": 5e-06, "epoch": 1.264880112834979, "percentage": 42.23, "elapsed_time": "7:09:26", "remaining_time": "9:47:25"}
30
- {"current_steps": 290, "total_steps": 663, "loss": 0.5565, "learning_rate": 5e-06, "epoch": 1.3100141043723554, "percentage": 43.74, "elapsed_time": "7:24:32", "remaining_time": "9:31:46"}
31
- {"current_steps": 300, "total_steps": 663, "loss": 0.5587, "learning_rate": 5e-06, "epoch": 1.355148095909732, "percentage": 45.25, "elapsed_time": "7:39:38", "remaining_time": "9:16:10"}
32
- {"current_steps": 310, "total_steps": 663, "loss": 0.5557, "learning_rate": 5e-06, "epoch": 1.4002820874471085, "percentage": 46.76, "elapsed_time": "7:54:45", "remaining_time": "9:00:36"}
33
- {"current_steps": 320, "total_steps": 663, "loss": 0.555, "learning_rate": 5e-06, "epoch": 1.4454160789844852, "percentage": 48.27, "elapsed_time": "8:09:51", "remaining_time": "8:45:03"}
34
- {"current_steps": 330, "total_steps": 663, "loss": 0.5578, "learning_rate": 5e-06, "epoch": 1.4905500705218617, "percentage": 49.77, "elapsed_time": "8:24:58", "remaining_time": "8:29:33"}
35
- {"current_steps": 340, "total_steps": 663, "loss": 0.5519, "learning_rate": 5e-06, "epoch": 1.5356840620592385, "percentage": 51.28, "elapsed_time": "8:40:04", "remaining_time": "8:14:04"}
36
- {"current_steps": 350, "total_steps": 663, "loss": 0.5523, "learning_rate": 5e-06, "epoch": 1.580818053596615, "percentage": 52.79, "elapsed_time": "8:55:09", "remaining_time": "7:58:35"}
37
- {"current_steps": 360, "total_steps": 663, "loss": 0.561, "learning_rate": 5e-06, "epoch": 1.6259520451339915, "percentage": 54.3, "elapsed_time": "9:10:16", "remaining_time": "7:43:08"}
38
- {"current_steps": 370, "total_steps": 663, "loss": 0.5562, "learning_rate": 5e-06, "epoch": 1.671086036671368, "percentage": 55.81, "elapsed_time": "9:25:22", "remaining_time": "7:27:42"}
39
- {"current_steps": 380, "total_steps": 663, "loss": 0.5552, "learning_rate": 5e-06, "epoch": 1.7162200282087448, "percentage": 57.32, "elapsed_time": "9:40:28", "remaining_time": "7:12:18"}
40
- {"current_steps": 390, "total_steps": 663, "loss": 0.5528, "learning_rate": 5e-06, "epoch": 1.7613540197461213, "percentage": 58.82, "elapsed_time": "9:55:34", "remaining_time": "6:56:54"}
41
- {"current_steps": 400, "total_steps": 663, "loss": 0.5517, "learning_rate": 5e-06, "epoch": 1.806488011283498, "percentage": 60.33, "elapsed_time": "10:10:39", "remaining_time": "6:41:30"}
42
- {"current_steps": 410, "total_steps": 663, "loss": 0.5543, "learning_rate": 5e-06, "epoch": 1.8516220028208745, "percentage": 61.84, "elapsed_time": "10:25:45", "remaining_time": "6:26:08"}
43
- {"current_steps": 420, "total_steps": 663, "loss": 0.5578, "learning_rate": 5e-06, "epoch": 1.896755994358251, "percentage": 63.35, "elapsed_time": "10:40:52", "remaining_time": "6:10:47"}
44
- {"current_steps": 430, "total_steps": 663, "loss": 0.5486, "learning_rate": 5e-06, "epoch": 1.9418899858956276, "percentage": 64.86, "elapsed_time": "10:55:58", "remaining_time": "5:55:26"}
45
- {"current_steps": 440, "total_steps": 663, "loss": 0.5591, "learning_rate": 5e-06, "epoch": 1.987023977433004, "percentage": 66.37, "elapsed_time": "11:11:04", "remaining_time": "5:40:06"}
46
- {"current_steps": 442, "total_steps": 663, "eval_loss": 0.5866958498954773, "epoch": 1.9960507757404795, "percentage": 66.67, "elapsed_time": "11:20:56", "remaining_time": "5:40:28"}
47
- {"current_steps": 450, "total_steps": 663, "loss": 0.5574, "learning_rate": 5e-06, "epoch": 2.0332863187588153, "percentage": 67.87, "elapsed_time": "11:33:17", "remaining_time": "5:28:09"}
48
- {"current_steps": 460, "total_steps": 663, "loss": 0.5064, "learning_rate": 5e-06, "epoch": 2.078420310296192, "percentage": 69.38, "elapsed_time": "11:48:25", "remaining_time": "5:12:37"}
49
- {"current_steps": 470, "total_steps": 663, "loss": 0.5109, "learning_rate": 5e-06, "epoch": 2.1235543018335683, "percentage": 70.89, "elapsed_time": "12:03:32", "remaining_time": "4:57:06"}
50
- {"current_steps": 480, "total_steps": 663, "loss": 0.5119, "learning_rate": 5e-06, "epoch": 2.168688293370945, "percentage": 72.4, "elapsed_time": "12:18:40", "remaining_time": "4:41:37"}
51
- {"current_steps": 490, "total_steps": 663, "loss": 0.5045, "learning_rate": 5e-06, "epoch": 2.213822284908322, "percentage": 73.91, "elapsed_time": "12:33:48", "remaining_time": "4:26:08"}
52
- {"current_steps": 500, "total_steps": 663, "loss": 0.5058, "learning_rate": 5e-06, "epoch": 2.2589562764456983, "percentage": 75.41, "elapsed_time": "12:48:56", "remaining_time": "4:10:40"}
53
- {"current_steps": 510, "total_steps": 663, "loss": 0.5134, "learning_rate": 5e-06, "epoch": 2.304090267983075, "percentage": 76.92, "elapsed_time": "13:04:03", "remaining_time": "3:55:13"}
54
- {"current_steps": 520, "total_steps": 663, "loss": 0.5135, "learning_rate": 5e-06, "epoch": 2.3492242595204513, "percentage": 78.43, "elapsed_time": "13:19:11", "remaining_time": "3:39:46"}
55
- {"current_steps": 530, "total_steps": 663, "loss": 0.5115, "learning_rate": 5e-06, "epoch": 2.394358251057828, "percentage": 79.94, "elapsed_time": "13:34:19", "remaining_time": "3:24:21"}
56
- {"current_steps": 540, "total_steps": 663, "loss": 0.5102, "learning_rate": 5e-06, "epoch": 2.4394922425952044, "percentage": 81.45, "elapsed_time": "13:49:27", "remaining_time": "3:08:55"}
57
- {"current_steps": 550, "total_steps": 663, "loss": 0.5163, "learning_rate": 5e-06, "epoch": 2.4846262341325813, "percentage": 82.96, "elapsed_time": "14:04:35", "remaining_time": "2:53:31"}
58
- {"current_steps": 560, "total_steps": 663, "loss": 0.5115, "learning_rate": 5e-06, "epoch": 2.529760225669958, "percentage": 84.46, "elapsed_time": "14:19:42", "remaining_time": "2:38:07"}
59
- {"current_steps": 570, "total_steps": 663, "loss": 0.5129, "learning_rate": 5e-06, "epoch": 2.5748942172073344, "percentage": 85.97, "elapsed_time": "14:34:49", "remaining_time": "2:22:44"}
60
- {"current_steps": 580, "total_steps": 663, "loss": 0.5121, "learning_rate": 5e-06, "epoch": 2.620028208744711, "percentage": 87.48, "elapsed_time": "14:49:56", "remaining_time": "2:07:21"}
61
- {"current_steps": 590, "total_steps": 663, "loss": 0.5117, "learning_rate": 5e-06, "epoch": 2.6651622002820874, "percentage": 88.99, "elapsed_time": "15:05:02", "remaining_time": "1:51:58"}
62
- {"current_steps": 600, "total_steps": 663, "loss": 0.5162, "learning_rate": 5e-06, "epoch": 2.710296191819464, "percentage": 90.5, "elapsed_time": "15:20:08", "remaining_time": "1:36:36"}
63
- {"current_steps": 610, "total_steps": 663, "loss": 0.5132, "learning_rate": 5e-06, "epoch": 2.7554301833568404, "percentage": 92.01, "elapsed_time": "15:35:16", "remaining_time": "1:21:15"}
64
- {"current_steps": 620, "total_steps": 663, "loss": 0.518, "learning_rate": 5e-06, "epoch": 2.800564174894217, "percentage": 93.51, "elapsed_time": "15:50:22", "remaining_time": "1:05:54"}
65
- {"current_steps": 630, "total_steps": 663, "loss": 0.5195, "learning_rate": 5e-06, "epoch": 2.845698166431594, "percentage": 95.02, "elapsed_time": "16:05:28", "remaining_time": "0:50:34"}
66
- {"current_steps": 640, "total_steps": 663, "loss": 0.5135, "learning_rate": 5e-06, "epoch": 2.8908321579689704, "percentage": 96.53, "elapsed_time": "16:20:36", "remaining_time": "0:35:14"}
67
- {"current_steps": 650, "total_steps": 663, "loss": 0.5154, "learning_rate": 5e-06, "epoch": 2.935966149506347, "percentage": 98.04, "elapsed_time": "16:35:40", "remaining_time": "0:19:54"}
68
- {"current_steps": 660, "total_steps": 663, "loss": 0.5153, "learning_rate": 5e-06, "epoch": 2.9811001410437235, "percentage": 99.55, "elapsed_time": "16:50:45", "remaining_time": "0:04:35"}
69
- {"current_steps": 663, "total_steps": 663, "eval_loss": 0.5917297005653381, "epoch": 2.9946403385049365, "percentage": 100.0, "elapsed_time": "17:02:19", "remaining_time": "0:00:00"}
70
- {"current_steps": 663, "total_steps": 663, "epoch": 2.9946403385049365, "percentage": 100.0, "elapsed_time": "17:03:37", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 663, "loss": 0.7438, "lr": 5e-06, "epoch": 0.045133991537376586, "percentage": 1.51, "elapsed_time": "0:15:06", "remaining_time": "16:27:01"}
2
+ {"current_steps": 20, "total_steps": 663, "loss": 0.6709, "lr": 5e-06, "epoch": 0.09026798307475317, "percentage": 3.02, "elapsed_time": "0:30:11", "remaining_time": "16:10:28"}
3
+ {"current_steps": 30, "total_steps": 663, "loss": 0.6463, "lr": 5e-06, "epoch": 0.13540197461212977, "percentage": 4.52, "elapsed_time": "0:45:16", "remaining_time": "15:55:11"}
4
+ {"current_steps": 40, "total_steps": 663, "loss": 0.6389, "lr": 5e-06, "epoch": 0.18053596614950634, "percentage": 6.03, "elapsed_time": "1:00:21", "remaining_time": "15:40:01"}
5
+ {"current_steps": 50, "total_steps": 663, "loss": 0.6349, "lr": 5e-06, "epoch": 0.22566995768688294, "percentage": 7.54, "elapsed_time": "1:15:25", "remaining_time": "15:24:43"}
6
+ {"current_steps": 60, "total_steps": 663, "loss": 0.6247, "lr": 5e-06, "epoch": 0.27080394922425954, "percentage": 9.05, "elapsed_time": "1:30:28", "remaining_time": "15:09:19"}
7
+ {"current_steps": 70, "total_steps": 663, "loss": 0.6227, "lr": 5e-06, "epoch": 0.3159379407616361, "percentage": 10.56, "elapsed_time": "1:45:33", "remaining_time": "14:54:11"}
8
+ {"current_steps": 80, "total_steps": 663, "loss": 0.6225, "lr": 5e-06, "epoch": 0.3610719322990127, "percentage": 12.07, "elapsed_time": "2:00:37", "remaining_time": "14:39:01"}
9
+ {"current_steps": 90, "total_steps": 663, "loss": 0.6161, "lr": 5e-06, "epoch": 0.40620592383638926, "percentage": 13.57, "elapsed_time": "2:15:41", "remaining_time": "14:23:54"}
10
+ {"current_steps": 100, "total_steps": 663, "loss": 0.6118, "lr": 5e-06, "epoch": 0.4513399153737659, "percentage": 15.08, "elapsed_time": "2:30:46", "remaining_time": "14:08:49"}
11
+ {"current_steps": 110, "total_steps": 663, "loss": 0.6071, "lr": 5e-06, "epoch": 0.49647390691114246, "percentage": 16.59, "elapsed_time": "2:45:49", "remaining_time": "13:53:39"}
12
+ {"current_steps": 120, "total_steps": 663, "loss": 0.6108, "lr": 5e-06, "epoch": 0.5416078984485191, "percentage": 18.1, "elapsed_time": "3:00:54", "remaining_time": "13:38:36"}
13
+ {"current_steps": 130, "total_steps": 663, "loss": 0.6056, "lr": 5e-06, "epoch": 0.5867418899858956, "percentage": 19.61, "elapsed_time": "3:15:59", "remaining_time": "13:23:34"}
14
+ {"current_steps": 140, "total_steps": 663, "loss": 0.6056, "lr": 5e-06, "epoch": 0.6318758815232722, "percentage": 21.12, "elapsed_time": "3:31:03", "remaining_time": "13:08:27"}
15
+ {"current_steps": 150, "total_steps": 663, "loss": 0.6054, "lr": 5e-06, "epoch": 0.6770098730606487, "percentage": 22.62, "elapsed_time": "3:46:06", "remaining_time": "12:53:17"}
16
+ {"current_steps": 160, "total_steps": 663, "loss": 0.6031, "lr": 5e-06, "epoch": 0.7221438645980254, "percentage": 24.13, "elapsed_time": "4:01:09", "remaining_time": "12:38:09"}
17
+ {"current_steps": 170, "total_steps": 663, "loss": 0.5996, "lr": 5e-06, "epoch": 0.767277856135402, "percentage": 25.64, "elapsed_time": "4:16:13", "remaining_time": "12:23:04"}
18
+ {"current_steps": 180, "total_steps": 663, "loss": 0.6003, "lr": 5e-06, "epoch": 0.8124118476727785, "percentage": 27.15, "elapsed_time": "4:31:18", "remaining_time": "12:08:01"}
19
+ {"current_steps": 190, "total_steps": 663, "loss": 0.6049, "lr": 5e-06, "epoch": 0.8575458392101551, "percentage": 28.66, "elapsed_time": "4:46:23", "remaining_time": "11:52:58"}
20
+ {"current_steps": 200, "total_steps": 663, "loss": 0.5985, "lr": 5e-06, "epoch": 0.9026798307475318, "percentage": 30.17, "elapsed_time": "5:01:27", "remaining_time": "11:37:52"}
21
+ {"current_steps": 210, "total_steps": 663, "loss": 0.5975, "lr": 5e-06, "epoch": 0.9478138222849083, "percentage": 31.67, "elapsed_time": "5:16:32", "remaining_time": "11:22:48"}
22
+ {"current_steps": 220, "total_steps": 663, "loss": 0.5944, "lr": 5e-06, "epoch": 0.9929478138222849, "percentage": 33.18, "elapsed_time": "5:31:35", "remaining_time": "11:07:41"}
23
+ {"current_steps": 221, "total_steps": 663, "eval_loss": 0.5952667593955994, "epoch": 0.9974612129760225, "percentage": 33.33, "elapsed_time": "5:39:28", "remaining_time": "11:18:57"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff797719b4b42d7b8ff9bb921070a22acbdfd425569add03d51a49898e2a07f
3
  size 7160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1389170815dbb2b16a471a61e839744ddd05a3c8bcea601ac3a891091dd38c98
3
  size 7160