DongfuJiang commited on
Commit
2511e8e
1 Parent(s): e531747

Training in progress, step 400

Browse files
README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: microsoft/Phi-3-mini-128k-instruct
3
+ library_name: peft
4
+ license: mit
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: PairRM-V2-phi3-3-mini-unified-feedback
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/dongfu/huggingface/runs/336nlkkc)
18
+ # PairRM-V2-phi3-3-mini-unified-feedback
19
+
20
+ This model is a fine-tuned version of [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) on the all dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 0.2755
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 1e-05
42
+ - train_batch_size: 1
43
+ - eval_batch_size: 1
44
+ - seed: 42
45
+ - distributed_type: multi-GPU
46
+ - num_devices: 8
47
+ - gradient_accumulation_steps: 16
48
+ - total_train_batch_size: 128
49
+ - total_eval_batch_size: 8
50
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
+ - lr_scheduler_type: cosine
52
+ - lr_scheduler_warmup_ratio: 0.05
53
+ - num_epochs: 3
54
+
55
+ ### Training results
56
+
57
+ | Training Loss | Epoch | Step | Validation Loss |
58
+ |:-------------:|:------:|:----:|:---------------:|
59
+ | 0.3099 | 0.3245 | 500 | 0.3066 |
60
+ | 0.3073 | 0.6490 | 1000 | 0.2901 |
61
+ | 0.263 | 0.9736 | 1500 | 0.2846 |
62
+ | 0.2822 | 1.2981 | 2000 | 0.2831 |
63
+ | 0.2693 | 1.6226 | 2500 | 0.2787 |
64
+ | 0.2741 | 1.9471 | 3000 | 0.2778 |
65
+ | 0.2869 | 2.2716 | 3500 | 0.2762 |
66
+ | 0.2339 | 2.5961 | 4000 | 0.2756 |
67
+ | 0.2879 | 2.9207 | 4500 | 0.2755 |
68
+
69
+
70
+ ### Framework versions
71
+
72
+ - PEFT 0.11.1
73
+ - Transformers 4.43.1
74
+ - Pytorch 2.3.0+cu121
75
+ - Datasets 2.20.0
76
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "down_proj",
24
+ "gate_up_proj",
25
+ "qkv_proj",
26
+ "o_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM",
29
+ "use_dora": true,
30
+ "use_rslora": false
31
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4829feccfe2b3d5f59b69cf8d60d26df07a9bdf9b6da112485125238cf36312
3
+ size 54446840
added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9985396722375466,
3
+ "eval_loss": 0.275532990694046,
4
+ "eval_runtime": 396.9488,
5
+ "eval_samples_per_second": 26.149,
6
+ "eval_steps_per_second": 3.27,
7
+ "total_flos": 8.62655347699168e+18,
8
+ "train_loss": 0.29317476286903604,
9
+ "train_runtime": 46635.6944,
10
+ "train_samples_per_second": 12.686,
11
+ "train_steps_per_second": 0.099
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9985396722375466,
3
+ "eval_loss": 0.275532990694046,
4
+ "eval_runtime": 396.9488,
5
+ "eval_samples_per_second": 26.149,
6
+ "eval_steps_per_second": 3.27
7
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<|system|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<|end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<|user|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": true,
115
+ "single_word": false,
116
+ "special": true
117
+ }
118
+ },
119
+ "bos_token": "<s>",
120
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|system|>\n' + system_message + '<|end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + content + '<|end|>\n<|assistant|>\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end|>' + '\n' }}{% endif %}{% endfor %}",
121
+ "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|end|>",
123
+ "legacy": false,
124
+ "model_max_length": 4096,
125
+ "pad_token": "<|endoftext|>",
126
+ "padding_side": "right",
127
+ "sp_model_kwargs": {},
128
+ "split_special_tokens": false,
129
+ "tokenizer_class": "LlamaTokenizer",
130
+ "unk_token": "<unk>",
131
+ "use_default_system_prompt": false
132
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9985396722375466,
3
+ "total_flos": 8.62655347699168e+18,
4
+ "train_loss": 0.29317476286903604,
5
+ "train_runtime": 46635.6944,
6
+ "train_samples_per_second": 12.686,
7
+ "train_steps_per_second": 0.099
8
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 2, "total_steps": 3080, "loss": 1.1716, "learning_rate": 1.298701298701299e-07, "epoch": 0.0012980691221807562, "percentage": 0.06, "elapsed_time": "0:00:35", "remaining_time": "15:01:33", "throughput": "0.00", "total_tokens": 0}
2
+ {"current_steps": 4, "total_steps": 3080, "loss": 1.1279, "learning_rate": 2.597402597402598e-07, "epoch": 0.0025961382443615124, "percentage": 0.13, "elapsed_time": "0:01:16", "remaining_time": "16:18:24", "throughput": "0.00", "total_tokens": 0}
3
+ {"current_steps": 6, "total_steps": 3080, "loss": 1.2463, "learning_rate": 3.896103896103896e-07, "epoch": 0.0038942073665422685, "percentage": 0.19, "elapsed_time": "0:01:58", "remaining_time": "16:48:50", "throughput": "0.00", "total_tokens": 0}
4
+ {"current_steps": 8, "total_steps": 3080, "loss": 1.1527, "learning_rate": 5.194805194805196e-07, "epoch": 0.005192276488723025, "percentage": 0.26, "elapsed_time": "0:02:38", "remaining_time": "16:51:15", "throughput": "0.00", "total_tokens": 0}
5
+ {"current_steps": 10, "total_steps": 3080, "loss": 1.1291, "learning_rate": 6.493506493506493e-07, "epoch": 0.0064903456109037805, "percentage": 0.32, "elapsed_time": "0:03:17", "remaining_time": "16:52:13", "throughput": "0.00", "total_tokens": 0}
6
+ {"current_steps": 12, "total_steps": 3080, "loss": 1.1338, "learning_rate": 7.792207792207792e-07, "epoch": 0.007788414733084537, "percentage": 0.39, "elapsed_time": "0:03:59", "remaining_time": "17:00:16", "throughput": "0.00", "total_tokens": 0}
7
+ {"current_steps": 14, "total_steps": 3080, "loss": 1.136, "learning_rate": 9.090909090909091e-07, "epoch": 0.009086483855265294, "percentage": 0.45, "elapsed_time": "0:04:36", "remaining_time": "16:50:47", "throughput": "0.00", "total_tokens": 0}
8
+ {"current_steps": 16, "total_steps": 3080, "loss": 1.1277, "learning_rate": 1.0389610389610392e-06, "epoch": 0.01038455297744605, "percentage": 0.52, "elapsed_time": "0:05:15", "remaining_time": "16:45:32", "throughput": "0.00", "total_tokens": 0}
9
+ {"current_steps": 18, "total_steps": 3080, "loss": 1.117, "learning_rate": 1.168831168831169e-06, "epoch": 0.011682622099626805, "percentage": 0.58, "elapsed_time": "0:05:55", "remaining_time": "16:47:09", "throughput": "0.00", "total_tokens": 0}
10
+ {"current_steps": 20, "total_steps": 3080, "loss": 1.1154, "learning_rate": 1.2987012987012986e-06, "epoch": 0.012980691221807561, "percentage": 0.65, "elapsed_time": "0:06:34", "remaining_time": "16:46:09", "throughput": "0.00", "total_tokens": 0}
11
+ {"current_steps": 22, "total_steps": 3080, "loss": 1.0971, "learning_rate": 1.4285714285714286e-06, "epoch": 0.014278760343988317, "percentage": 0.71, "elapsed_time": "0:07:16", "remaining_time": "16:50:21", "throughput": "0.00", "total_tokens": 0}
12
+ {"current_steps": 24, "total_steps": 3080, "loss": 1.1125, "learning_rate": 1.5584415584415584e-06, "epoch": 0.015576829466169074, "percentage": 0.78, "elapsed_time": "0:07:52", "remaining_time": "16:43:27", "throughput": "0.00", "total_tokens": 0}
13
+ {"current_steps": 26, "total_steps": 3080, "loss": 1.0816, "learning_rate": 1.6883116883116885e-06, "epoch": 0.01687489858834983, "percentage": 0.84, "elapsed_time": "0:08:34", "remaining_time": "16:47:51", "throughput": "0.00", "total_tokens": 0}
14
+ {"current_steps": 28, "total_steps": 3080, "loss": 1.024, "learning_rate": 1.8181818181818183e-06, "epoch": 0.018172967710530587, "percentage": 0.91, "elapsed_time": "0:09:20", "remaining_time": "16:58:46", "throughput": "0.00", "total_tokens": 0}
15
+ {"current_steps": 30, "total_steps": 3080, "loss": 1.009, "learning_rate": 1.9480519480519483e-06, "epoch": 0.01947103683271134, "percentage": 0.97, "elapsed_time": "0:09:58", "remaining_time": "16:54:01", "throughput": "0.00", "total_tokens": 0}
16
+ {"current_steps": 32, "total_steps": 3080, "loss": 0.9808, "learning_rate": 2.0779220779220784e-06, "epoch": 0.0207691059548921, "percentage": 1.04, "elapsed_time": "0:10:39", "remaining_time": "16:55:13", "throughput": "0.00", "total_tokens": 0}
17
+ {"current_steps": 34, "total_steps": 3080, "loss": 0.9697, "learning_rate": 2.207792207792208e-06, "epoch": 0.022067175077072853, "percentage": 1.1, "elapsed_time": "0:11:18", "remaining_time": "16:53:46", "throughput": "0.00", "total_tokens": 0}
18
+ {"current_steps": 36, "total_steps": 3080, "loss": 0.9016, "learning_rate": 2.337662337662338e-06, "epoch": 0.02336524419925361, "percentage": 1.17, "elapsed_time": "0:11:58", "remaining_time": "16:53:02", "throughput": "0.00", "total_tokens": 0}
19
+ {"current_steps": 38, "total_steps": 3080, "loss": 0.8959, "learning_rate": 2.4675324675324676e-06, "epoch": 0.024663313321434368, "percentage": 1.23, "elapsed_time": "0:12:40", "remaining_time": "16:54:15", "throughput": "0.00", "total_tokens": 0}
20
+ {"current_steps": 40, "total_steps": 3080, "loss": 0.8125, "learning_rate": 2.597402597402597e-06, "epoch": 0.025961382443615122, "percentage": 1.3, "elapsed_time": "0:13:19", "remaining_time": "16:52:45", "throughput": "0.00", "total_tokens": 0}
21
+ {"current_steps": 42, "total_steps": 3080, "loss": 0.8685, "learning_rate": 2.7272727272727272e-06, "epoch": 0.02725945156579588, "percentage": 1.36, "elapsed_time": "0:13:59", "remaining_time": "16:52:21", "throughput": "0.00", "total_tokens": 0}
22
+ {"current_steps": 44, "total_steps": 3080, "loss": 0.7961, "learning_rate": 2.8571428571428573e-06, "epoch": 0.028557520687976633, "percentage": 1.43, "elapsed_time": "0:14:41", "remaining_time": "16:53:19", "throughput": "0.00", "total_tokens": 0}
23
+ {"current_steps": 46, "total_steps": 3080, "loss": 0.7846, "learning_rate": 2.9870129870129873e-06, "epoch": 0.02985558981015739, "percentage": 1.49, "elapsed_time": "0:15:21", "remaining_time": "16:53:31", "throughput": "0.00", "total_tokens": 0}
24
+ {"current_steps": 48, "total_steps": 3080, "loss": 0.7042, "learning_rate": 3.116883116883117e-06, "epoch": 0.03115365893233815, "percentage": 1.56, "elapsed_time": "0:16:02", "remaining_time": "16:52:50", "throughput": "0.00", "total_tokens": 0}
25
+ {"current_steps": 50, "total_steps": 3080, "loss": 0.7043, "learning_rate": 3.246753246753247e-06, "epoch": 0.0324517280545189, "percentage": 1.62, "elapsed_time": "0:16:43", "remaining_time": "16:53:48", "throughput": "0.00", "total_tokens": 0}
26
+ {"current_steps": 52, "total_steps": 3080, "loss": 0.6678, "learning_rate": 3.376623376623377e-06, "epoch": 0.03374979717669966, "percentage": 1.69, "elapsed_time": "0:17:22", "remaining_time": "16:51:34", "throughput": "0.00", "total_tokens": 0}
27
+ {"current_steps": 54, "total_steps": 3080, "loss": 0.7099, "learning_rate": 3.506493506493507e-06, "epoch": 0.03504786629888042, "percentage": 1.75, "elapsed_time": "0:17:57", "remaining_time": "16:46:18", "throughput": "0.00", "total_tokens": 0}
28
+ {"current_steps": 56, "total_steps": 3080, "loss": 0.6423, "learning_rate": 3.6363636363636366e-06, "epoch": 0.036345935421061175, "percentage": 1.82, "elapsed_time": "0:18:44", "remaining_time": "16:51:37", "throughput": "0.00", "total_tokens": 0}
29
+ {"current_steps": 58, "total_steps": 3080, "loss": 0.5418, "learning_rate": 3.7662337662337666e-06, "epoch": 0.037644004543241925, "percentage": 1.88, "elapsed_time": "0:19:24", "remaining_time": "16:51:37", "throughput": "0.00", "total_tokens": 0}
30
+ {"current_steps": 60, "total_steps": 3080, "loss": 0.5407, "learning_rate": 3.896103896103897e-06, "epoch": 0.03894207366542268, "percentage": 1.95, "elapsed_time": "0:20:03", "remaining_time": "16:49:47", "throughput": "0.00", "total_tokens": 0}
31
+ {"current_steps": 62, "total_steps": 3080, "loss": 0.5437, "learning_rate": 4.025974025974026e-06, "epoch": 0.04024014278760344, "percentage": 2.01, "elapsed_time": "0:20:46", "remaining_time": "16:51:09", "throughput": "0.00", "total_tokens": 0}
32
+ {"current_steps": 64, "total_steps": 3080, "loss": 0.4655, "learning_rate": 4.155844155844157e-06, "epoch": 0.0415382119097842, "percentage": 2.08, "elapsed_time": "0:21:30", "remaining_time": "16:53:53", "throughput": "0.00", "total_tokens": 0}
33
+ {"current_steps": 66, "total_steps": 3080, "loss": 0.4997, "learning_rate": 4.2857142857142855e-06, "epoch": 0.042836281031964955, "percentage": 2.14, "elapsed_time": "0:22:09", "remaining_time": "16:51:45", "throughput": "0.00", "total_tokens": 0}
34
+ {"current_steps": 68, "total_steps": 3080, "loss": 0.4846, "learning_rate": 4.415584415584416e-06, "epoch": 0.044134350154145706, "percentage": 2.21, "elapsed_time": "0:22:46", "remaining_time": "16:48:38", "throughput": "0.00", "total_tokens": 0}
35
+ {"current_steps": 70, "total_steps": 3080, "loss": 0.4522, "learning_rate": 4.5454545454545455e-06, "epoch": 0.04543241927632646, "percentage": 2.27, "elapsed_time": "0:23:28", "remaining_time": "16:49:19", "throughput": "0.00", "total_tokens": 0}
36
+ {"current_steps": 72, "total_steps": 3080, "loss": 0.3711, "learning_rate": 4.675324675324676e-06, "epoch": 0.04673048839850722, "percentage": 2.34, "elapsed_time": "0:24:10", "remaining_time": "16:49:59", "throughput": "0.00", "total_tokens": 0}
37
+ {"current_steps": 74, "total_steps": 3080, "loss": 0.3815, "learning_rate": 4.805194805194806e-06, "epoch": 0.04802855752068798, "percentage": 2.4, "elapsed_time": "0:24:50", "remaining_time": "16:48:54", "throughput": "0.00", "total_tokens": 0}
38
+ {"current_steps": 76, "total_steps": 3080, "loss": 0.4273, "learning_rate": 4.935064935064935e-06, "epoch": 0.049326626642868736, "percentage": 2.47, "elapsed_time": "0:25:29", "remaining_time": "16:47:47", "throughput": "0.00", "total_tokens": 0}
39
+ {"current_steps": 78, "total_steps": 3080, "loss": 0.408, "learning_rate": 5.064935064935065e-06, "epoch": 0.050624695765049486, "percentage": 2.53, "elapsed_time": "0:26:09", "remaining_time": "16:46:46", "throughput": "0.00", "total_tokens": 0}
40
+ {"current_steps": 80, "total_steps": 3080, "loss": 0.388, "learning_rate": 5.194805194805194e-06, "epoch": 0.051922764887230244, "percentage": 2.6, "elapsed_time": "0:26:48", "remaining_time": "16:45:22", "throughput": "0.00", "total_tokens": 0}
41
+ {"current_steps": 82, "total_steps": 3080, "loss": 0.347, "learning_rate": 5.324675324675325e-06, "epoch": 0.053220834009411, "percentage": 2.66, "elapsed_time": "0:27:27", "remaining_time": "16:43:40", "throughput": "0.00", "total_tokens": 0}
42
+ {"current_steps": 84, "total_steps": 3080, "loss": 0.3856, "learning_rate": 5.4545454545454545e-06, "epoch": 0.05451890313159176, "percentage": 2.73, "elapsed_time": "0:28:01", "remaining_time": "16:39:27", "throughput": "0.00", "total_tokens": 0}
43
+ {"current_steps": 86, "total_steps": 3080, "loss": 0.372, "learning_rate": 5.584415584415585e-06, "epoch": 0.055816972253772516, "percentage": 2.79, "elapsed_time": "0:28:39", "remaining_time": "16:37:58", "throughput": "0.00", "total_tokens": 0}
44
+ {"current_steps": 88, "total_steps": 3080, "loss": 0.3667, "learning_rate": 5.7142857142857145e-06, "epoch": 0.05711504137595327, "percentage": 2.86, "elapsed_time": "0:29:19", "remaining_time": "16:36:55", "throughput": "0.00", "total_tokens": 0}
45
+ {"current_steps": 90, "total_steps": 3080, "loss": 0.3881, "learning_rate": 5.844155844155844e-06, "epoch": 0.058413110498134024, "percentage": 2.92, "elapsed_time": "0:29:57", "remaining_time": "16:35:13", "throughput": "0.00", "total_tokens": 0}
46
+ {"current_steps": 92, "total_steps": 3080, "loss": 0.366, "learning_rate": 5.9740259740259746e-06, "epoch": 0.05971117962031478, "percentage": 2.99, "elapsed_time": "0:30:37", "remaining_time": "16:34:27", "throughput": "0.00", "total_tokens": 0}
47
+ {"current_steps": 94, "total_steps": 3080, "loss": 0.3555, "learning_rate": 6.103896103896104e-06, "epoch": 0.06100924874249554, "percentage": 3.05, "elapsed_time": "0:31:16", "remaining_time": "16:33:15", "throughput": "0.00", "total_tokens": 0}
48
+ {"current_steps": 96, "total_steps": 3080, "loss": 0.3948, "learning_rate": 6.233766233766234e-06, "epoch": 0.0623073178646763, "percentage": 3.12, "elapsed_time": "0:31:56", "remaining_time": "16:33:01", "throughput": "0.00", "total_tokens": 0}
49
+ {"current_steps": 98, "total_steps": 3080, "loss": 0.3536, "learning_rate": 6.363636363636364e-06, "epoch": 0.06360538698685705, "percentage": 3.18, "elapsed_time": "0:32:34", "remaining_time": "16:31:17", "throughput": "0.00", "total_tokens": 0}
50
+ {"current_steps": 100, "total_steps": 3080, "loss": 0.3367, "learning_rate": 6.493506493506494e-06, "epoch": 0.0649034561090378, "percentage": 3.25, "elapsed_time": "0:33:14", "remaining_time": "16:30:23", "throughput": "0.00", "total_tokens": 0}
51
+ {"current_steps": 102, "total_steps": 3080, "loss": 0.3424, "learning_rate": 6.623376623376624e-06, "epoch": 0.06620152523121857, "percentage": 3.31, "elapsed_time": "0:33:58", "remaining_time": "16:31:54", "throughput": "0.00", "total_tokens": 0}
52
+ {"current_steps": 104, "total_steps": 3080, "loss": 0.3337, "learning_rate": 6.753246753246754e-06, "epoch": 0.06749959435339932, "percentage": 3.38, "elapsed_time": "0:34:42", "remaining_time": "16:33:10", "throughput": "0.00", "total_tokens": 0}
53
+ {"current_steps": 106, "total_steps": 3080, "loss": 0.3646, "learning_rate": 6.8831168831168835e-06, "epoch": 0.06879766347558007, "percentage": 3.44, "elapsed_time": "0:35:21", "remaining_time": "16:31:50", "throughput": "0.00", "total_tokens": 0}
54
+ {"current_steps": 108, "total_steps": 3080, "loss": 0.3351, "learning_rate": 7.012987012987014e-06, "epoch": 0.07009573259776083, "percentage": 3.51, "elapsed_time": "0:36:01", "remaining_time": "16:31:25", "throughput": "0.00", "total_tokens": 0}
55
+ {"current_steps": 110, "total_steps": 3080, "loss": 0.3414, "learning_rate": 7.1428571428571436e-06, "epoch": 0.07139380171994159, "percentage": 3.57, "elapsed_time": "0:36:39", "remaining_time": "16:29:48", "throughput": "0.00", "total_tokens": 0}
56
+ {"current_steps": 112, "total_steps": 3080, "loss": 0.3445, "learning_rate": 7.272727272727273e-06, "epoch": 0.07269187084212235, "percentage": 3.64, "elapsed_time": "0:37:18", "remaining_time": "16:28:46", "throughput": "0.00", "total_tokens": 0}
57
+ {"current_steps": 114, "total_steps": 3080, "loss": 0.3645, "learning_rate": 7.402597402597404e-06, "epoch": 0.0739899399643031, "percentage": 3.7, "elapsed_time": "0:38:01", "remaining_time": "16:29:16", "throughput": "0.00", "total_tokens": 0}
58
+ {"current_steps": 116, "total_steps": 3080, "loss": 0.3386, "learning_rate": 7.532467532467533e-06, "epoch": 0.07528800908648385, "percentage": 3.77, "elapsed_time": "0:38:37", "remaining_time": "16:27:05", "throughput": "0.00", "total_tokens": 0}
59
+ {"current_steps": 118, "total_steps": 3080, "loss": 0.3298, "learning_rate": 7.662337662337663e-06, "epoch": 0.07658607820866462, "percentage": 3.83, "elapsed_time": "0:39:17", "remaining_time": "16:26:25", "throughput": "0.00", "total_tokens": 0}
60
+ {"current_steps": 120, "total_steps": 3080, "loss": 0.3667, "learning_rate": 7.792207792207793e-06, "epoch": 0.07788414733084537, "percentage": 3.9, "elapsed_time": "0:39:56", "remaining_time": "16:25:16", "throughput": "0.00", "total_tokens": 0}
61
+ {"current_steps": 122, "total_steps": 3080, "loss": 0.3581, "learning_rate": 7.922077922077924e-06, "epoch": 0.07918221645302613, "percentage": 3.96, "elapsed_time": "0:40:33", "remaining_time": "16:23:33", "throughput": "0.00", "total_tokens": 0}
62
+ {"current_steps": 124, "total_steps": 3080, "loss": 0.3472, "learning_rate": 8.051948051948052e-06, "epoch": 0.08048028557520688, "percentage": 4.03, "elapsed_time": "0:41:19", "remaining_time": "16:24:57", "throughput": "0.00", "total_tokens": 0}
63
+ {"current_steps": 126, "total_steps": 3080, "loss": 0.3731, "learning_rate": 8.181818181818183e-06, "epoch": 0.08177835469738763, "percentage": 4.09, "elapsed_time": "0:42:03", "remaining_time": "16:25:59", "throughput": "0.00", "total_tokens": 0}
64
+ {"current_steps": 128, "total_steps": 3080, "loss": 0.3662, "learning_rate": 8.311688311688313e-06, "epoch": 0.0830764238195684, "percentage": 4.16, "elapsed_time": "0:42:46", "remaining_time": "16:26:21", "throughput": "0.00", "total_tokens": 0}
65
+ {"current_steps": 130, "total_steps": 3080, "loss": 0.4006, "learning_rate": 8.441558441558442e-06, "epoch": 0.08437449294174915, "percentage": 4.22, "elapsed_time": "0:43:29", "remaining_time": "16:26:56", "throughput": "0.00", "total_tokens": 0}
66
+ {"current_steps": 132, "total_steps": 3080, "loss": 0.3308, "learning_rate": 8.571428571428571e-06, "epoch": 0.08567256206392991, "percentage": 4.29, "elapsed_time": "0:44:06", "remaining_time": "16:25:01", "throughput": "0.00", "total_tokens": 0}
67
+ {"current_steps": 134, "total_steps": 3080, "loss": 0.3306, "learning_rate": 8.701298701298701e-06, "epoch": 0.08697063118611066, "percentage": 4.35, "elapsed_time": "0:44:45", "remaining_time": "16:23:50", "throughput": "0.00", "total_tokens": 0}
68
+ {"current_steps": 136, "total_steps": 3080, "loss": 0.3972, "learning_rate": 8.831168831168832e-06, "epoch": 0.08826870030829141, "percentage": 4.42, "elapsed_time": "0:45:30", "remaining_time": "16:24:58", "throughput": "0.00", "total_tokens": 0}
69
+ {"current_steps": 138, "total_steps": 3080, "loss": 0.3602, "learning_rate": 8.96103896103896e-06, "epoch": 0.08956676943047218, "percentage": 4.48, "elapsed_time": "0:46:08", "remaining_time": "16:23:46", "throughput": "0.00", "total_tokens": 0}
70
+ {"current_steps": 140, "total_steps": 3080, "loss": 0.3551, "learning_rate": 9.090909090909091e-06, "epoch": 0.09086483855265293, "percentage": 4.55, "elapsed_time": "0:46:46", "remaining_time": "16:22:23", "throughput": "0.00", "total_tokens": 0}
71
+ {"current_steps": 142, "total_steps": 3080, "loss": 0.3412, "learning_rate": 9.220779220779221e-06, "epoch": 0.09216290767483369, "percentage": 4.61, "elapsed_time": "0:47:27", "remaining_time": "16:21:47", "throughput": "0.00", "total_tokens": 0}
72
+ {"current_steps": 144, "total_steps": 3080, "loss": 0.3265, "learning_rate": 9.350649350649352e-06, "epoch": 0.09346097679701444, "percentage": 4.68, "elapsed_time": "0:48:03", "remaining_time": "16:19:42", "throughput": "0.00", "total_tokens": 0}
73
+ {"current_steps": 146, "total_steps": 3080, "loss": 0.3353, "learning_rate": 9.48051948051948e-06, "epoch": 0.09475904591919519, "percentage": 4.74, "elapsed_time": "0:48:44", "remaining_time": "16:19:30", "throughput": "0.00", "total_tokens": 0}
74
+ {"current_steps": 148, "total_steps": 3080, "loss": 0.3603, "learning_rate": 9.610389610389611e-06, "epoch": 0.09605711504137596, "percentage": 4.81, "elapsed_time": "0:49:23", "remaining_time": "16:18:28", "throughput": "0.00", "total_tokens": 0}
75
+ {"current_steps": 150, "total_steps": 3080, "loss": 0.3707, "learning_rate": 9.740259740259742e-06, "epoch": 0.09735518416355671, "percentage": 4.87, "elapsed_time": "0:50:07", "remaining_time": "16:19:12", "throughput": "0.00", "total_tokens": 0}
76
+ {"current_steps": 152, "total_steps": 3080, "loss": 0.3549, "learning_rate": 9.87012987012987e-06, "epoch": 0.09865325328573747, "percentage": 4.94, "elapsed_time": "0:50:51", "remaining_time": "16:19:37", "throughput": "0.00", "total_tokens": 0}
77
+ {"current_steps": 154, "total_steps": 3080, "loss": 0.32, "learning_rate": 1e-05, "epoch": 0.09995132240791822, "percentage": 5.0, "elapsed_time": "0:51:30", "remaining_time": "16:18:37", "throughput": "0.00", "total_tokens": 0}
78
+ {"current_steps": 156, "total_steps": 3080, "loss": 0.3527, "learning_rate": 9.999988472080506e-06, "epoch": 0.10124939153009897, "percentage": 5.06, "elapsed_time": "0:52:08", "remaining_time": "16:17:12", "throughput": "0.00", "total_tokens": 0}
79
+ {"current_steps": 158, "total_steps": 3080, "loss": 0.3368, "learning_rate": 9.999953888375178e-06, "epoch": 0.10254746065227974, "percentage": 5.13, "elapsed_time": "0:52:49", "remaining_time": "16:16:47", "throughput": "0.00", "total_tokens": 0}
80
+ {"current_steps": 160, "total_steps": 3080, "loss": 0.345, "learning_rate": 9.999896249043488e-06, "epoch": 0.10384552977446049, "percentage": 5.19, "elapsed_time": "0:53:31", "remaining_time": "16:16:48", "throughput": "0.00", "total_tokens": 0}
81
+ {"current_steps": 162, "total_steps": 3080, "loss": 0.3544, "learning_rate": 9.999815554351224e-06, "epoch": 0.10514359889664125, "percentage": 5.26, "elapsed_time": "0:54:10", "remaining_time": "16:15:55", "throughput": "0.00", "total_tokens": 0}
82
+ {"current_steps": 164, "total_steps": 3080, "loss": 0.3177, "learning_rate": 9.999711804670478e-06, "epoch": 0.106441668018822, "percentage": 5.32, "elapsed_time": "0:54:53", "remaining_time": "16:15:55", "throughput": "0.00", "total_tokens": 0}
83
+ {"current_steps": 166, "total_steps": 3080, "loss": 0.3642, "learning_rate": 9.999585000479658e-06, "epoch": 0.10773973714100275, "percentage": 5.39, "elapsed_time": "0:55:33", "remaining_time": "16:15:21", "throughput": "0.00", "total_tokens": 0}
84
+ {"current_steps": 168, "total_steps": 3080, "loss": 0.3599, "learning_rate": 9.999435142363484e-06, "epoch": 0.10903780626318352, "percentage": 5.45, "elapsed_time": "0:56:14", "remaining_time": "16:14:53", "throughput": "0.00", "total_tokens": 0}
85
+ {"current_steps": 170, "total_steps": 3080, "loss": 0.3509, "learning_rate": 9.999262231012971e-06, "epoch": 0.11033587538536427, "percentage": 5.52, "elapsed_time": "0:56:55", "remaining_time": "16:14:26", "throughput": "0.00", "total_tokens": 0}
86
+ {"current_steps": 172, "total_steps": 3080, "loss": 0.339, "learning_rate": 9.999066267225447e-06, "epoch": 0.11163394450754503, "percentage": 5.58, "elapsed_time": "0:57:34", "remaining_time": "16:13:24", "throughput": "0.00", "total_tokens": 0}
87
+ {"current_steps": 174, "total_steps": 3080, "loss": 0.3526, "learning_rate": 9.998847251904529e-06, "epoch": 0.11293201362972578, "percentage": 5.65, "elapsed_time": "0:58:11", "remaining_time": "16:11:48", "throughput": "0.00", "total_tokens": 0}
88
+ {"current_steps": 176, "total_steps": 3080, "loss": 0.3286, "learning_rate": 9.998605186060138e-06, "epoch": 0.11423008275190653, "percentage": 5.71, "elapsed_time": "0:58:47", "remaining_time": "16:10:08", "throughput": "0.00", "total_tokens": 0}
89
+ {"current_steps": 178, "total_steps": 3080, "loss": 0.3522, "learning_rate": 9.998340070808478e-06, "epoch": 0.1155281518740873, "percentage": 5.78, "elapsed_time": "0:59:29", "remaining_time": "16:09:57", "throughput": "0.00", "total_tokens": 0}
90
+ {"current_steps": 180, "total_steps": 3080, "loss": 0.3546, "learning_rate": 9.99805190737204e-06, "epoch": 0.11682622099626805, "percentage": 5.84, "elapsed_time": "1:00:08", "remaining_time": "16:08:59", "throughput": "0.00", "total_tokens": 0}
91
+ {"current_steps": 182, "total_steps": 3080, "loss": 0.3295, "learning_rate": 9.997740697079595e-06, "epoch": 0.11812429011844881, "percentage": 5.91, "elapsed_time": "1:00:43", "remaining_time": "16:07:01", "throughput": "0.00", "total_tokens": 0}
92
+ {"current_steps": 184, "total_steps": 3080, "loss": 0.3439, "learning_rate": 9.997406441366182e-06, "epoch": 0.11942235924062956, "percentage": 5.97, "elapsed_time": "1:01:25", "remaining_time": "16:06:38", "throughput": "0.00", "total_tokens": 0}
93
+ {"current_steps": 186, "total_steps": 3080, "loss": 0.3635, "learning_rate": 9.997049141773117e-06, "epoch": 0.12072042836281031, "percentage": 6.04, "elapsed_time": "1:02:07", "remaining_time": "16:06:33", "throughput": "0.00", "total_tokens": 0}
94
+ {"current_steps": 188, "total_steps": 3080, "loss": 0.342, "learning_rate": 9.996668799947962e-06, "epoch": 0.12201849748499108, "percentage": 6.1, "elapsed_time": "1:02:45", "remaining_time": "16:05:28", "throughput": "0.00", "total_tokens": 0}
95
+ {"current_steps": 190, "total_steps": 3080, "loss": 0.322, "learning_rate": 9.99626541764454e-06, "epoch": 0.12331656660717183, "percentage": 6.17, "elapsed_time": "1:03:26", "remaining_time": "16:04:51", "throughput": "0.00", "total_tokens": 0}
96
+ {"current_steps": 192, "total_steps": 3080, "loss": 0.3706, "learning_rate": 9.995838996722916e-06, "epoch": 0.1246146357293526, "percentage": 6.23, "elapsed_time": "1:04:03", "remaining_time": "16:03:37", "throughput": "0.00", "total_tokens": 0}
97
+ {"current_steps": 194, "total_steps": 3080, "loss": 0.3224, "learning_rate": 9.995389539149386e-06, "epoch": 0.12591270485153336, "percentage": 6.3, "elapsed_time": "1:04:47", "remaining_time": "16:03:48", "throughput": "0.00", "total_tokens": 0}
98
+ {"current_steps": 196, "total_steps": 3080, "loss": 0.3507, "learning_rate": 9.994917046996472e-06, "epoch": 0.1272107739737141, "percentage": 6.36, "elapsed_time": "1:05:27", "remaining_time": "16:03:13", "throughput": "0.00", "total_tokens": 0}
99
+ {"current_steps": 198, "total_steps": 3080, "loss": 0.3375, "learning_rate": 9.99442152244292e-06, "epoch": 0.12850884309589486, "percentage": 6.43, "elapsed_time": "1:06:09", "remaining_time": "16:02:59", "throughput": "0.00", "total_tokens": 0}
100
+ {"current_steps": 200, "total_steps": 3080, "loss": 0.3355, "learning_rate": 9.993902967773674e-06, "epoch": 0.1298069122180756, "percentage": 6.49, "elapsed_time": "1:06:50", "remaining_time": "16:02:26", "throughput": "0.00", "total_tokens": 0}
101
+ {"current_steps": 202, "total_steps": 3080, "loss": 0.3303, "learning_rate": 9.993361385379876e-06, "epoch": 0.13110498134025636, "percentage": 6.56, "elapsed_time": "1:07:28", "remaining_time": "16:01:28", "throughput": "0.00", "total_tokens": 0}
102
+ {"current_steps": 204, "total_steps": 3080, "loss": 0.3539, "learning_rate": 9.992796777758855e-06, "epoch": 0.13240305046243714, "percentage": 6.62, "elapsed_time": "1:08:07", "remaining_time": "16:00:18", "throughput": "0.00", "total_tokens": 0}
103
+ {"current_steps": 206, "total_steps": 3080, "loss": 0.3123, "learning_rate": 9.992209147514108e-06, "epoch": 0.1337011195846179, "percentage": 6.69, "elapsed_time": "1:08:47", "remaining_time": "15:59:44", "throughput": "0.00", "total_tokens": 0}
104
+ {"current_steps": 208, "total_steps": 3080, "loss": 0.3491, "learning_rate": 9.991598497355304e-06, "epoch": 0.13499918870679864, "percentage": 6.75, "elapsed_time": "1:09:26", "remaining_time": "15:58:53", "throughput": "0.00", "total_tokens": 0}
105
+ {"current_steps": 210, "total_steps": 3080, "loss": 0.358, "learning_rate": 9.990964830098246e-06, "epoch": 0.1362972578289794, "percentage": 6.82, "elapsed_time": "1:10:11", "remaining_time": "15:59:12", "throughput": "0.00", "total_tokens": 0}
106
+ {"current_steps": 212, "total_steps": 3080, "loss": 0.321, "learning_rate": 9.990308148664882e-06, "epoch": 0.13759532695116014, "percentage": 6.88, "elapsed_time": "1:10:54", "remaining_time": "15:59:19", "throughput": "0.00", "total_tokens": 0}
107
+ {"current_steps": 214, "total_steps": 3080, "loss": 0.3341, "learning_rate": 9.989628456083283e-06, "epoch": 0.13889339607334092, "percentage": 6.95, "elapsed_time": "1:11:31", "remaining_time": "15:57:49", "throughput": "0.00", "total_tokens": 0}
108
+ {"current_steps": 216, "total_steps": 3080, "loss": 0.3606, "learning_rate": 9.988925755487622e-06, "epoch": 0.14019146519552167, "percentage": 7.01, "elapsed_time": "1:12:10", "remaining_time": "15:56:59", "throughput": "0.00", "total_tokens": 0}
109
+ {"current_steps": 218, "total_steps": 3080, "loss": 0.3554, "learning_rate": 9.98820005011817e-06, "epoch": 0.14148953431770242, "percentage": 7.08, "elapsed_time": "1:12:52", "remaining_time": "15:56:50", "throughput": "0.00", "total_tokens": 0}
110
+ {"current_steps": 220, "total_steps": 3080, "loss": 0.337, "learning_rate": 9.98745134332128e-06, "epoch": 0.14278760343988317, "percentage": 7.14, "elapsed_time": "1:13:32", "remaining_time": "15:56:03", "throughput": "0.00", "total_tokens": 0}
111
+ {"current_steps": 222, "total_steps": 3080, "loss": 0.3436, "learning_rate": 9.98667963854936e-06, "epoch": 0.14408567256206392, "percentage": 7.21, "elapsed_time": "1:14:11", "remaining_time": "15:55:12", "throughput": "0.00", "total_tokens": 0}
112
+ {"current_steps": 224, "total_steps": 3080, "loss": 0.356, "learning_rate": 9.985884939360873e-06, "epoch": 0.1453837416842447, "percentage": 7.27, "elapsed_time": "1:14:53", "remaining_time": "15:54:52", "throughput": "0.00", "total_tokens": 0}
113
+ {"current_steps": 226, "total_steps": 3080, "loss": 0.3241, "learning_rate": 9.985067249420308e-06, "epoch": 0.14668181080642545, "percentage": 7.34, "elapsed_time": "1:15:31", "remaining_time": "15:53:42", "throughput": "0.00", "total_tokens": 0}
114
+ {"current_steps": 228, "total_steps": 3080, "loss": 0.3095, "learning_rate": 9.984226572498173e-06, "epoch": 0.1479798799286062, "percentage": 7.4, "elapsed_time": "1:16:13", "remaining_time": "15:53:32", "throughput": "0.00", "total_tokens": 0}
115
+ {"current_steps": 230, "total_steps": 3080, "loss": 0.3292, "learning_rate": 9.983362912470967e-06, "epoch": 0.14927794905078695, "percentage": 7.47, "elapsed_time": "1:16:54", "remaining_time": "15:52:57", "throughput": "0.00", "total_tokens": 0}
116
+ {"current_steps": 232, "total_steps": 3080, "loss": 0.3428, "learning_rate": 9.982476273321175e-06, "epoch": 0.1505760181729677, "percentage": 7.53, "elapsed_time": "1:17:32", "remaining_time": "15:51:55", "throughput": "0.00", "total_tokens": 0}
117
+ {"current_steps": 234, "total_steps": 3080, "loss": 0.3245, "learning_rate": 9.981566659137238e-06, "epoch": 0.15187408729514848, "percentage": 7.6, "elapsed_time": "1:18:14", "remaining_time": "15:51:35", "throughput": "0.00", "total_tokens": 0}
118
+ {"current_steps": 236, "total_steps": 3080, "loss": 0.3447, "learning_rate": 9.980634074113538e-06, "epoch": 0.15317215641732923, "percentage": 7.66, "elapsed_time": "1:18:51", "remaining_time": "15:50:21", "throughput": "0.00", "total_tokens": 0}
119
+ {"current_steps": 238, "total_steps": 3080, "loss": 0.3328, "learning_rate": 9.979678522550382e-06, "epoch": 0.15447022553950998, "percentage": 7.73, "elapsed_time": "1:19:32", "remaining_time": "15:49:50", "throughput": "0.00", "total_tokens": 0}
120
+ {"current_steps": 240, "total_steps": 3080, "loss": 0.3513, "learning_rate": 9.97870000885398e-06, "epoch": 0.15576829466169073, "percentage": 7.79, "elapsed_time": "1:20:14", "remaining_time": "15:49:33", "throughput": "0.00", "total_tokens": 0}
121
+ {"current_steps": 242, "total_steps": 3080, "loss": 0.3047, "learning_rate": 9.97769853753642e-06, "epoch": 0.15706636378387148, "percentage": 7.86, "elapsed_time": "1:20:57", "remaining_time": "15:49:27", "throughput": "0.00", "total_tokens": 0}
122
+ {"current_steps": 244, "total_steps": 3080, "loss": 0.3237, "learning_rate": 9.976674113215655e-06, "epoch": 0.15836443290605226, "percentage": 7.92, "elapsed_time": "1:21:36", "remaining_time": "15:48:32", "throughput": "0.00", "total_tokens": 0}
123
+ {"current_steps": 246, "total_steps": 3080, "loss": 0.3276, "learning_rate": 9.975626740615478e-06, "epoch": 0.159662502028233, "percentage": 7.99, "elapsed_time": "1:22:15", "remaining_time": "15:47:33", "throughput": "0.00", "total_tokens": 0}
124
+ {"current_steps": 248, "total_steps": 3080, "loss": 0.319, "learning_rate": 9.974556424565503e-06, "epoch": 0.16096057115041376, "percentage": 8.05, "elapsed_time": "1:22:59", "remaining_time": "15:47:37", "throughput": "0.00", "total_tokens": 0}
125
+ {"current_steps": 250, "total_steps": 3080, "loss": 0.3259, "learning_rate": 9.973463170001131e-06, "epoch": 0.1622586402725945, "percentage": 8.12, "elapsed_time": "1:23:41", "remaining_time": "15:47:26", "throughput": "0.00", "total_tokens": 0}
126
+ {"current_steps": 252, "total_steps": 3080, "loss": 0.3016, "learning_rate": 9.972346981963546e-06, "epoch": 0.16355670939477526, "percentage": 8.18, "elapsed_time": "1:24:21", "remaining_time": "15:46:38", "throughput": "0.00", "total_tokens": 0}
127
+ {"current_steps": 254, "total_steps": 3080, "loss": 0.3151, "learning_rate": 9.971207865599679e-06, "epoch": 0.16485477851695604, "percentage": 8.25, "elapsed_time": "1:25:02", "remaining_time": "15:46:07", "throughput": "0.00", "total_tokens": 0}
128
+ {"current_steps": 256, "total_steps": 3080, "loss": 0.3193, "learning_rate": 9.970045826162182e-06, "epoch": 0.1661528476391368, "percentage": 8.31, "elapsed_time": "1:25:43", "remaining_time": "15:45:36", "throughput": "0.00", "total_tokens": 0}
129
+ {"current_steps": 258, "total_steps": 3080, "loss": 0.3412, "learning_rate": 9.96886086900942e-06, "epoch": 0.16745091676131754, "percentage": 8.38, "elapsed_time": "1:26:20", "remaining_time": "15:44:22", "throughput": "0.00", "total_tokens": 0}
130
+ {"current_steps": 260, "total_steps": 3080, "loss": 0.324, "learning_rate": 9.967652999605424e-06, "epoch": 0.1687489858834983, "percentage": 8.44, "elapsed_time": "1:27:00", "remaining_time": "15:43:46", "throughput": "0.00", "total_tokens": 0}
131
+ {"current_steps": 262, "total_steps": 3080, "loss": 0.3476, "learning_rate": 9.966422223519887e-06, "epoch": 0.17004705500567904, "percentage": 8.51, "elapsed_time": "1:27:39", "remaining_time": "15:42:49", "throughput": "0.00", "total_tokens": 0}
132
+ {"current_steps": 264, "total_steps": 3080, "loss": 0.3333, "learning_rate": 9.965168546428122e-06, "epoch": 0.17134512412785982, "percentage": 8.57, "elapsed_time": "1:28:17", "remaining_time": "15:41:48", "throughput": "0.00", "total_tokens": 0}
133
+ {"current_steps": 266, "total_steps": 3080, "loss": 0.3283, "learning_rate": 9.963891974111042e-06, "epoch": 0.17264319325004057, "percentage": 8.64, "elapsed_time": "1:28:57", "remaining_time": "15:41:04", "throughput": "0.00", "total_tokens": 0}
134
+ {"current_steps": 268, "total_steps": 3080, "loss": 0.3187, "learning_rate": 9.96259251245514e-06, "epoch": 0.17394126237222132, "percentage": 8.7, "elapsed_time": "1:29:39", "remaining_time": "15:40:49", "throughput": "0.00", "total_tokens": 0}
135
+ {"current_steps": 270, "total_steps": 3080, "loss": 0.3162, "learning_rate": 9.961270167452449e-06, "epoch": 0.17523933149440207, "percentage": 8.77, "elapsed_time": "1:30:21", "remaining_time": "15:40:23", "throughput": "0.00", "total_tokens": 0}
136
+ {"current_steps": 272, "total_steps": 3080, "loss": 0.3442, "learning_rate": 9.959924945200525e-06, "epoch": 0.17653740061658282, "percentage": 8.83, "elapsed_time": "1:31:02", "remaining_time": "15:39:54", "throughput": "0.00", "total_tokens": 0}
137
+ {"current_steps": 274, "total_steps": 3080, "loss": 0.3248, "learning_rate": 9.958556851902414e-06, "epoch": 0.1778354697387636, "percentage": 8.9, "elapsed_time": "1:31:46", "remaining_time": "15:39:51", "throughput": "0.00", "total_tokens": 0}
138
+ {"current_steps": 276, "total_steps": 3080, "loss": 0.314, "learning_rate": 9.957165893866623e-06, "epoch": 0.17913353886094435, "percentage": 8.96, "elapsed_time": "1:32:24", "remaining_time": "15:38:52", "throughput": "0.00", "total_tokens": 0}
139
+ {"current_steps": 278, "total_steps": 3080, "loss": 0.3207, "learning_rate": 9.955752077507093e-06, "epoch": 0.1804316079831251, "percentage": 9.03, "elapsed_time": "1:33:05", "remaining_time": "15:38:13", "throughput": "0.00", "total_tokens": 0}
140
+ {"current_steps": 280, "total_steps": 3080, "loss": 0.3166, "learning_rate": 9.95431540934317e-06, "epoch": 0.18172967710530585, "percentage": 9.09, "elapsed_time": "1:33:42", "remaining_time": "15:37:04", "throughput": "0.00", "total_tokens": 0}
141
+ {"current_steps": 282, "total_steps": 3080, "loss": 0.3198, "learning_rate": 9.952855895999567e-06, "epoch": 0.1830277462274866, "percentage": 9.16, "elapsed_time": "1:34:19", "remaining_time": "15:35:56", "throughput": "0.00", "total_tokens": 0}
142
+ {"current_steps": 284, "total_steps": 3080, "loss": 0.3227, "learning_rate": 9.951373544206352e-06, "epoch": 0.18432581534966738, "percentage": 9.22, "elapsed_time": "1:35:01", "remaining_time": "15:35:28", "throughput": "0.00", "total_tokens": 0}
143
+ {"current_steps": 286, "total_steps": 3080, "loss": 0.3325, "learning_rate": 9.949868360798893e-06, "epoch": 0.18562388447184813, "percentage": 9.29, "elapsed_time": "1:35:42", "remaining_time": "15:34:56", "throughput": "0.00", "total_tokens": 0}
144
+ {"current_steps": 288, "total_steps": 3080, "loss": 0.3007, "learning_rate": 9.948340352717845e-06, "epoch": 0.18692195359402888, "percentage": 9.35, "elapsed_time": "1:36:21", "remaining_time": "15:34:08", "throughput": "0.00", "total_tokens": 0}
145
+ {"current_steps": 290, "total_steps": 3080, "loss": 0.3168, "learning_rate": 9.946789527009108e-06, "epoch": 0.18822002271620963, "percentage": 9.42, "elapsed_time": "1:37:02", "remaining_time": "15:33:35", "throughput": "0.00", "total_tokens": 0}
146
+ {"current_steps": 292, "total_steps": 3080, "loss": 0.3234, "learning_rate": 9.9452158908238e-06, "epoch": 0.18951809183839038, "percentage": 9.48, "elapsed_time": "1:37:43", "remaining_time": "15:32:59", "throughput": "0.00", "total_tokens": 0}
147
+ {"current_steps": 294, "total_steps": 3080, "loss": 0.3549, "learning_rate": 9.943619451418225e-06, "epoch": 0.19081616096057116, "percentage": 9.55, "elapsed_time": "1:38:20", "remaining_time": "15:31:57", "throughput": "0.00", "total_tokens": 0}
148
+ {"current_steps": 296, "total_steps": 3080, "loss": 0.3219, "learning_rate": 9.942000216153829e-06, "epoch": 0.1921142300827519, "percentage": 9.61, "elapsed_time": "1:39:01", "remaining_time": "15:31:23", "throughput": "0.00", "total_tokens": 0}
149
+ {"current_steps": 298, "total_steps": 3080, "loss": 0.3177, "learning_rate": 9.940358192497178e-06, "epoch": 0.19341229920493266, "percentage": 9.68, "elapsed_time": "1:39:41", "remaining_time": "15:30:37", "throughput": "0.00", "total_tokens": 0}
150
+ {"current_steps": 300, "total_steps": 3080, "loss": 0.3151, "learning_rate": 9.93869338801992e-06, "epoch": 0.19471036832711341, "percentage": 9.74, "elapsed_time": "1:40:24", "remaining_time": "15:30:31", "throughput": "0.00", "total_tokens": 0}
151
+ {"current_steps": 302, "total_steps": 3080, "loss": 0.3183, "learning_rate": 9.937005810398747e-06, "epoch": 0.19600843744929417, "percentage": 9.81, "elapsed_time": "1:41:01", "remaining_time": "15:29:21", "throughput": "0.00", "total_tokens": 0}
152
+ {"current_steps": 304, "total_steps": 3080, "loss": 0.3382, "learning_rate": 9.935295467415363e-06, "epoch": 0.19730650657147494, "percentage": 9.87, "elapsed_time": "1:41:38", "remaining_time": "15:28:07", "throughput": "0.00", "total_tokens": 0}
153
+ {"current_steps": 306, "total_steps": 3080, "loss": 0.327, "learning_rate": 9.933562366956445e-06, "epoch": 0.1986045756936557, "percentage": 9.94, "elapsed_time": "1:42:20", "remaining_time": "15:27:48", "throughput": "0.00", "total_tokens": 0}
154
+ {"current_steps": 308, "total_steps": 3080, "loss": 0.3025, "learning_rate": 9.931806517013612e-06, "epoch": 0.19990264481583644, "percentage": 10.0, "elapsed_time": "1:42:59", "remaining_time": "15:26:58", "throughput": "0.00", "total_tokens": 0}
155
+ {"current_steps": 310, "total_steps": 3080, "loss": 0.3499, "learning_rate": 9.930027925683384e-06, "epoch": 0.2012007139380172, "percentage": 10.06, "elapsed_time": "1:43:40", "remaining_time": "15:26:25", "throughput": "0.00", "total_tokens": 0}
156
+ {"current_steps": 312, "total_steps": 3080, "loss": 0.3319, "learning_rate": 9.928226601167139e-06, "epoch": 0.20249878306019795, "percentage": 10.13, "elapsed_time": "1:44:19", "remaining_time": "15:25:36", "throughput": "0.00", "total_tokens": 0}
157
+ {"current_steps": 314, "total_steps": 3080, "loss": 0.3031, "learning_rate": 9.92640255177109e-06, "epoch": 0.20379685218237872, "percentage": 10.19, "elapsed_time": "1:44:59", "remaining_time": "15:24:50", "throughput": "0.00", "total_tokens": 0}
158
+ {"current_steps": 316, "total_steps": 3080, "loss": 0.3245, "learning_rate": 9.924555785906235e-06, "epoch": 0.20509492130455947, "percentage": 10.26, "elapsed_time": "1:45:36", "remaining_time": "15:23:42", "throughput": "0.00", "total_tokens": 0}
159
+ {"current_steps": 318, "total_steps": 3080, "loss": 0.3003, "learning_rate": 9.922686312088323e-06, "epoch": 0.20639299042674022, "percentage": 10.32, "elapsed_time": "1:46:16", "remaining_time": "15:23:05", "throughput": "0.00", "total_tokens": 0}
160
+ {"current_steps": 320, "total_steps": 3080, "loss": 0.3258, "learning_rate": 9.920794138937807e-06, "epoch": 0.20769105954892098, "percentage": 10.39, "elapsed_time": "1:46:52", "remaining_time": "15:21:49", "throughput": "0.00", "total_tokens": 0}
161
+ {"current_steps": 322, "total_steps": 3080, "loss": 0.3447, "learning_rate": 9.918879275179819e-06, "epoch": 0.20898912867110173, "percentage": 10.45, "elapsed_time": "1:47:33", "remaining_time": "15:21:19", "throughput": "0.00", "total_tokens": 0}
162
+ {"current_steps": 324, "total_steps": 3080, "loss": 0.3075, "learning_rate": 9.916941729644112e-06, "epoch": 0.2102871977932825, "percentage": 10.52, "elapsed_time": "1:48:12", "remaining_time": "15:20:24", "throughput": "0.00", "total_tokens": 0}
163
+ {"current_steps": 326, "total_steps": 3080, "loss": 0.3392, "learning_rate": 9.914981511265039e-06, "epoch": 0.21158526691546325, "percentage": 10.58, "elapsed_time": "1:48:50", "remaining_time": "15:19:30", "throughput": "0.00", "total_tokens": 0}
164
+ {"current_steps": 328, "total_steps": 3080, "loss": 0.3018, "learning_rate": 9.912998629081495e-06, "epoch": 0.212883336037644, "percentage": 10.65, "elapsed_time": "1:49:27", "remaining_time": "15:18:25", "throughput": "0.00", "total_tokens": 0}
165
+ {"current_steps": 330, "total_steps": 3080, "loss": 0.3241, "learning_rate": 9.910993092236878e-06, "epoch": 0.21418140515982476, "percentage": 10.71, "elapsed_time": "1:50:05", "remaining_time": "15:17:23", "throughput": "0.00", "total_tokens": 0}
166
+ {"current_steps": 332, "total_steps": 3080, "loss": 0.2979, "learning_rate": 9.90896490997906e-06, "epoch": 0.2154794742820055, "percentage": 10.78, "elapsed_time": "1:50:46", "remaining_time": "15:16:54", "throughput": "0.00", "total_tokens": 0}
167
+ {"current_steps": 334, "total_steps": 3080, "loss": 0.3036, "learning_rate": 9.906914091660327e-06, "epoch": 0.21677754340418628, "percentage": 10.84, "elapsed_time": "1:51:27", "remaining_time": "15:16:24", "throughput": "0.00", "total_tokens": 0}
168
+ {"current_steps": 336, "total_steps": 3080, "loss": 0.3457, "learning_rate": 9.904840646737346e-06, "epoch": 0.21807561252636704, "percentage": 10.91, "elapsed_time": "1:52:09", "remaining_time": "15:16:00", "throughput": "0.00", "total_tokens": 0}
169
+ {"current_steps": 338, "total_steps": 3080, "loss": 0.3235, "learning_rate": 9.902744584771123e-06, "epoch": 0.21937368164854779, "percentage": 10.97, "elapsed_time": "1:52:49", "remaining_time": "15:15:14", "throughput": "0.00", "total_tokens": 0}
170
+ {"current_steps": 340, "total_steps": 3080, "loss": 0.2932, "learning_rate": 9.900625915426948e-06, "epoch": 0.22067175077072854, "percentage": 11.04, "elapsed_time": "1:53:28", "remaining_time": "15:14:31", "throughput": "0.00", "total_tokens": 0}
171
+ {"current_steps": 342, "total_steps": 3080, "loss": 0.3243, "learning_rate": 9.898484648474362e-06, "epoch": 0.2219698198929093, "percentage": 11.1, "elapsed_time": "1:54:10", "remaining_time": "15:14:04", "throughput": "0.00", "total_tokens": 0}
172
+ {"current_steps": 344, "total_steps": 3080, "loss": 0.3184, "learning_rate": 9.896320793787106e-06, "epoch": 0.22326788901509007, "percentage": 11.17, "elapsed_time": "1:54:52", "remaining_time": "15:13:39", "throughput": "0.00", "total_tokens": 0}
173
+ {"current_steps": 346, "total_steps": 3080, "loss": 0.3394, "learning_rate": 9.894134361343077e-06, "epoch": 0.22456595813727082, "percentage": 11.23, "elapsed_time": "1:55:37", "remaining_time": "15:13:39", "throughput": "0.00", "total_tokens": 0}
174
+ {"current_steps": 348, "total_steps": 3080, "loss": 0.327, "learning_rate": 9.891925361224284e-06, "epoch": 0.22586402725945157, "percentage": 11.3, "elapsed_time": "1:56:18", "remaining_time": "15:13:02", "throughput": "0.00", "total_tokens": 0}
175
+ {"current_steps": 350, "total_steps": 3080, "loss": 0.3652, "learning_rate": 9.889693803616793e-06, "epoch": 0.22716209638163232, "percentage": 11.36, "elapsed_time": "1:56:56", "remaining_time": "15:12:08", "throughput": "0.00", "total_tokens": 0}
176
+ {"current_steps": 352, "total_steps": 3080, "loss": 0.314, "learning_rate": 9.887439698810694e-06, "epoch": 0.22846016550381307, "percentage": 11.43, "elapsed_time": "1:57:32", "remaining_time": "15:10:58", "throughput": "0.00", "total_tokens": 0}
177
+ {"current_steps": 354, "total_steps": 3080, "loss": 0.3081, "learning_rate": 9.88516305720004e-06, "epoch": 0.22975823462599385, "percentage": 11.49, "elapsed_time": "1:58:14", "remaining_time": "15:10:31", "throughput": "0.00", "total_tokens": 0}
178
+ {"current_steps": 356, "total_steps": 3080, "loss": 0.3154, "learning_rate": 9.88286388928281e-06, "epoch": 0.2310563037481746, "percentage": 11.56, "elapsed_time": "1:58:51", "remaining_time": "15:09:29", "throughput": "0.00", "total_tokens": 0}
179
+ {"current_steps": 358, "total_steps": 3080, "loss": 0.3115, "learning_rate": 9.880542205660853e-06, "epoch": 0.23235437287035535, "percentage": 11.62, "elapsed_time": "1:59:27", "remaining_time": "15:08:17", "throughput": "0.00", "total_tokens": 0}
180
+ {"current_steps": 360, "total_steps": 3080, "loss": 0.2975, "learning_rate": 9.878198017039839e-06, "epoch": 0.2336524419925361, "percentage": 11.69, "elapsed_time": "2:00:06", "remaining_time": "15:07:30", "throughput": "0.00", "total_tokens": 0}
181
+ {"current_steps": 362, "total_steps": 3080, "loss": 0.2996, "learning_rate": 9.875831334229217e-06, "epoch": 0.23495051111471685, "percentage": 11.75, "elapsed_time": "2:00:45", "remaining_time": "15:06:41", "throughput": "0.00", "total_tokens": 0}
182
+ {"current_steps": 364, "total_steps": 3080, "loss": 0.2892, "learning_rate": 9.873442168142158e-06, "epoch": 0.23624858023689763, "percentage": 11.82, "elapsed_time": "2:01:25", "remaining_time": "15:06:04", "throughput": "0.00", "total_tokens": 0}
183
+ {"current_steps": 366, "total_steps": 3080, "loss": 0.3073, "learning_rate": 9.87103052979551e-06, "epoch": 0.23754664935907838, "percentage": 11.88, "elapsed_time": "2:02:06", "remaining_time": "15:05:28", "throughput": "0.00", "total_tokens": 0}
184
+ {"current_steps": 368, "total_steps": 3080, "loss": 0.3216, "learning_rate": 9.868596430309739e-06, "epoch": 0.23884471848125913, "percentage": 11.95, "elapsed_time": "2:02:45", "remaining_time": "15:04:37", "throughput": "0.00", "total_tokens": 0}
185
+ {"current_steps": 370, "total_steps": 3080, "loss": 0.3071, "learning_rate": 9.866139880908887e-06, "epoch": 0.24014278760343988, "percentage": 12.01, "elapsed_time": "2:03:25", "remaining_time": "15:03:58", "throughput": "0.00", "total_tokens": 0}
186
+ {"current_steps": 372, "total_steps": 3080, "loss": 0.3239, "learning_rate": 9.863660892920514e-06, "epoch": 0.24144085672562063, "percentage": 12.08, "elapsed_time": "2:04:06", "remaining_time": "15:03:26", "throughput": "0.00", "total_tokens": 0}
187
+ {"current_steps": 374, "total_steps": 3080, "loss": 0.3079, "learning_rate": 9.861159477775653e-06, "epoch": 0.2427389258478014, "percentage": 12.14, "elapsed_time": "2:04:49", "remaining_time": "15:03:09", "throughput": "0.00", "total_tokens": 0}
188
+ {"current_steps": 376, "total_steps": 3080, "loss": 0.3124, "learning_rate": 9.858635647008747e-06, "epoch": 0.24403699496998216, "percentage": 12.21, "elapsed_time": "2:05:26", "remaining_time": "15:02:06", "throughput": "0.00", "total_tokens": 0}
189
+ {"current_steps": 378, "total_steps": 3080, "loss": 0.3067, "learning_rate": 9.856089412257605e-06, "epoch": 0.2453350640921629, "percentage": 12.27, "elapsed_time": "2:06:06", "remaining_time": "15:01:25", "throughput": "0.00", "total_tokens": 0}
190
+ {"current_steps": 380, "total_steps": 3080, "loss": 0.308, "learning_rate": 9.85352078526334e-06, "epoch": 0.24663313321434366, "percentage": 12.34, "elapsed_time": "2:06:48", "remaining_time": "15:01:00", "throughput": "0.00", "total_tokens": 0}
191
+ {"current_steps": 382, "total_steps": 3080, "loss": 0.3277, "learning_rate": 9.850929777870324e-06, "epoch": 0.2479312023365244, "percentage": 12.4, "elapsed_time": "2:07:27", "remaining_time": "15:00:14", "throughput": "0.00", "total_tokens": 0}
192
+ {"current_steps": 384, "total_steps": 3080, "loss": 0.2848, "learning_rate": 9.848316402026125e-06, "epoch": 0.2492292714587052, "percentage": 12.47, "elapsed_time": "2:08:09", "remaining_time": "14:59:50", "throughput": "0.00", "total_tokens": 0}
193
+ {"current_steps": 386, "total_steps": 3080, "loss": 0.2932, "learning_rate": 9.845680669781459e-06, "epoch": 0.25052734058088594, "percentage": 12.53, "elapsed_time": "2:08:49", "remaining_time": "14:59:08", "throughput": "0.00", "total_tokens": 0}
194
+ {"current_steps": 388, "total_steps": 3080, "loss": 0.3067, "learning_rate": 9.843022593290129e-06, "epoch": 0.2518254097030667, "percentage": 12.6, "elapsed_time": "2:09:26", "remaining_time": "14:58:04", "throughput": "0.00", "total_tokens": 0}
195
+ {"current_steps": 390, "total_steps": 3080, "loss": 0.3132, "learning_rate": 9.840342184808973e-06, "epoch": 0.25312347882524744, "percentage": 12.66, "elapsed_time": "2:10:06", "remaining_time": "14:57:22", "throughput": "0.00", "total_tokens": 0}
196
+ {"current_steps": 392, "total_steps": 3080, "loss": 0.2996, "learning_rate": 9.837639456697802e-06, "epoch": 0.2544215479474282, "percentage": 12.73, "elapsed_time": "2:10:46", "remaining_time": "14:56:47", "throughput": "0.00", "total_tokens": 0}
197
+ {"current_steps": 394, "total_steps": 3080, "loss": 0.3138, "learning_rate": 9.83491442141935e-06, "epoch": 0.25571961706960894, "percentage": 12.79, "elapsed_time": "2:11:28", "remaining_time": "14:56:20", "throughput": "0.00", "total_tokens": 0}
198
+ {"current_steps": 396, "total_steps": 3080, "loss": 0.3257, "learning_rate": 9.832167091539215e-06, "epoch": 0.2570176861917897, "percentage": 12.86, "elapsed_time": "2:12:14", "remaining_time": "14:56:17", "throughput": "0.00", "total_tokens": 0}
199
+ {"current_steps": 398, "total_steps": 3080, "loss": 0.3049, "learning_rate": 9.829397479725791e-06, "epoch": 0.2583157553139705, "percentage": 12.92, "elapsed_time": "2:12:54", "remaining_time": "14:55:39", "throughput": "0.00", "total_tokens": 0}
200
+ {"current_steps": 400, "total_steps": 3080, "loss": 0.3137, "learning_rate": 9.826605598750223e-06, "epoch": 0.2596138244361512, "percentage": 12.99, "elapsed_time": "2:13:34", "remaining_time": "14:54:55", "throughput": "0.00", "total_tokens": 0}
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2cd93abf4907e8397ac7a7fecc78c6c64d21b1ec6c2cb3cc420f868f0a1ade7
3
+ size 5432
training_eval_loss.png ADDED
training_loss.png ADDED