DongfuJiang
commited on
Commit
•
2511e8e
1
Parent(s):
e531747
Training in progress, step 400
Browse files- README.md +76 -0
- adapter_config.json +31 -0
- adapter_model.safetensors +3 -0
- added_tokens.json +13 -0
- all_results.json +12 -0
- eval_results.json +7 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +132 -0
- train_results.json +8 -0
- trainer_log.jsonl +200 -0
- trainer_state.json +0 -0
- training_args.bin +3 -0
- training_eval_loss.png +0 -0
- training_loss.png +0 -0
README.md
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: microsoft/Phi-3-mini-128k-instruct
|
3 |
+
library_name: peft
|
4 |
+
license: mit
|
5 |
+
tags:
|
6 |
+
- llama-factory
|
7 |
+
- lora
|
8 |
+
- generated_from_trainer
|
9 |
+
model-index:
|
10 |
+
- name: PairRM-V2-phi3-3-mini-unified-feedback
|
11 |
+
results: []
|
12 |
+
---
|
13 |
+
|
14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
+
should probably proofread and complete it, then remove this comment. -->
|
16 |
+
|
17 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/dongfu/huggingface/runs/336nlkkc)
|
18 |
+
# PairRM-V2-phi3-3-mini-unified-feedback
|
19 |
+
|
20 |
+
This model is a fine-tuned version of [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) on the all dataset.
|
21 |
+
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 0.2755
|
23 |
+
|
24 |
+
## Model description
|
25 |
+
|
26 |
+
More information needed
|
27 |
+
|
28 |
+
## Intended uses & limitations
|
29 |
+
|
30 |
+
More information needed
|
31 |
+
|
32 |
+
## Training and evaluation data
|
33 |
+
|
34 |
+
More information needed
|
35 |
+
|
36 |
+
## Training procedure
|
37 |
+
|
38 |
+
### Training hyperparameters
|
39 |
+
|
40 |
+
The following hyperparameters were used during training:
|
41 |
+
- learning_rate: 1e-05
|
42 |
+
- train_batch_size: 1
|
43 |
+
- eval_batch_size: 1
|
44 |
+
- seed: 42
|
45 |
+
- distributed_type: multi-GPU
|
46 |
+
- num_devices: 8
|
47 |
+
- gradient_accumulation_steps: 16
|
48 |
+
- total_train_batch_size: 128
|
49 |
+
- total_eval_batch_size: 8
|
50 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
51 |
+
- lr_scheduler_type: cosine
|
52 |
+
- lr_scheduler_warmup_ratio: 0.05
|
53 |
+
- num_epochs: 3
|
54 |
+
|
55 |
+
### Training results
|
56 |
+
|
57 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
58 |
+
|:-------------:|:------:|:----:|:---------------:|
|
59 |
+
| 0.3099 | 0.3245 | 500 | 0.3066 |
|
60 |
+
| 0.3073 | 0.6490 | 1000 | 0.2901 |
|
61 |
+
| 0.263 | 0.9736 | 1500 | 0.2846 |
|
62 |
+
| 0.2822 | 1.2981 | 2000 | 0.2831 |
|
63 |
+
| 0.2693 | 1.6226 | 2500 | 0.2787 |
|
64 |
+
| 0.2741 | 1.9471 | 3000 | 0.2778 |
|
65 |
+
| 0.2869 | 2.2716 | 3500 | 0.2762 |
|
66 |
+
| 0.2339 | 2.5961 | 4000 | 0.2756 |
|
67 |
+
| 0.2879 | 2.9207 | 4500 | 0.2755 |
|
68 |
+
|
69 |
+
|
70 |
+
### Framework versions
|
71 |
+
|
72 |
+
- PEFT 0.11.1
|
73 |
+
- Transformers 4.43.1
|
74 |
+
- Pytorch 2.3.0+cu121
|
75 |
+
- Datasets 2.20.0
|
76 |
+
- Tokenizers 0.19.1
|
adapter_config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.0,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 8,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"down_proj",
|
24 |
+
"gate_up_proj",
|
25 |
+
"qkv_proj",
|
26 |
+
"o_proj"
|
27 |
+
],
|
28 |
+
"task_type": "CAUSAL_LM",
|
29 |
+
"use_dora": true,
|
30 |
+
"use_rslora": false
|
31 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4829feccfe2b3d5f59b69cf8d60d26df07a9bdf9b6da112485125238cf36312
|
3 |
+
size 54446840
|
added_tokens.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|assistant|>": 32001,
|
3 |
+
"<|endoftext|>": 32000,
|
4 |
+
"<|end|>": 32007,
|
5 |
+
"<|placeholder1|>": 32002,
|
6 |
+
"<|placeholder2|>": 32003,
|
7 |
+
"<|placeholder3|>": 32004,
|
8 |
+
"<|placeholder4|>": 32005,
|
9 |
+
"<|placeholder5|>": 32008,
|
10 |
+
"<|placeholder6|>": 32009,
|
11 |
+
"<|system|>": 32006,
|
12 |
+
"<|user|>": 32010
|
13 |
+
}
|
all_results.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.9985396722375466,
|
3 |
+
"eval_loss": 0.275532990694046,
|
4 |
+
"eval_runtime": 396.9488,
|
5 |
+
"eval_samples_per_second": 26.149,
|
6 |
+
"eval_steps_per_second": 3.27,
|
7 |
+
"total_flos": 8.62655347699168e+18,
|
8 |
+
"train_loss": 0.29317476286903604,
|
9 |
+
"train_runtime": 46635.6944,
|
10 |
+
"train_samples_per_second": 12.686,
|
11 |
+
"train_steps_per_second": 0.099
|
12 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.9985396722375466,
|
3 |
+
"eval_loss": 0.275532990694046,
|
4 |
+
"eval_runtime": 396.9488,
|
5 |
+
"eval_samples_per_second": 26.149,
|
6 |
+
"eval_steps_per_second": 3.27
|
7 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|end|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<|endoftext|>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"0": {
|
7 |
+
"content": "<unk>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"1": {
|
15 |
+
"content": "<s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": true
|
21 |
+
},
|
22 |
+
"2": {
|
23 |
+
"content": "</s>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": true,
|
27 |
+
"single_word": false,
|
28 |
+
"special": false
|
29 |
+
},
|
30 |
+
"32000": {
|
31 |
+
"content": "<|endoftext|>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false,
|
36 |
+
"special": true
|
37 |
+
},
|
38 |
+
"32001": {
|
39 |
+
"content": "<|assistant|>",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": true,
|
43 |
+
"single_word": false,
|
44 |
+
"special": true
|
45 |
+
},
|
46 |
+
"32002": {
|
47 |
+
"content": "<|placeholder1|>",
|
48 |
+
"lstrip": false,
|
49 |
+
"normalized": false,
|
50 |
+
"rstrip": true,
|
51 |
+
"single_word": false,
|
52 |
+
"special": true
|
53 |
+
},
|
54 |
+
"32003": {
|
55 |
+
"content": "<|placeholder2|>",
|
56 |
+
"lstrip": false,
|
57 |
+
"normalized": false,
|
58 |
+
"rstrip": true,
|
59 |
+
"single_word": false,
|
60 |
+
"special": true
|
61 |
+
},
|
62 |
+
"32004": {
|
63 |
+
"content": "<|placeholder3|>",
|
64 |
+
"lstrip": false,
|
65 |
+
"normalized": false,
|
66 |
+
"rstrip": true,
|
67 |
+
"single_word": false,
|
68 |
+
"special": true
|
69 |
+
},
|
70 |
+
"32005": {
|
71 |
+
"content": "<|placeholder4|>",
|
72 |
+
"lstrip": false,
|
73 |
+
"normalized": false,
|
74 |
+
"rstrip": true,
|
75 |
+
"single_word": false,
|
76 |
+
"special": true
|
77 |
+
},
|
78 |
+
"32006": {
|
79 |
+
"content": "<|system|>",
|
80 |
+
"lstrip": false,
|
81 |
+
"normalized": false,
|
82 |
+
"rstrip": true,
|
83 |
+
"single_word": false,
|
84 |
+
"special": true
|
85 |
+
},
|
86 |
+
"32007": {
|
87 |
+
"content": "<|end|>",
|
88 |
+
"lstrip": false,
|
89 |
+
"normalized": false,
|
90 |
+
"rstrip": false,
|
91 |
+
"single_word": false,
|
92 |
+
"special": true
|
93 |
+
},
|
94 |
+
"32008": {
|
95 |
+
"content": "<|placeholder5|>",
|
96 |
+
"lstrip": false,
|
97 |
+
"normalized": false,
|
98 |
+
"rstrip": true,
|
99 |
+
"single_word": false,
|
100 |
+
"special": true
|
101 |
+
},
|
102 |
+
"32009": {
|
103 |
+
"content": "<|placeholder6|>",
|
104 |
+
"lstrip": false,
|
105 |
+
"normalized": false,
|
106 |
+
"rstrip": true,
|
107 |
+
"single_word": false,
|
108 |
+
"special": true
|
109 |
+
},
|
110 |
+
"32010": {
|
111 |
+
"content": "<|user|>",
|
112 |
+
"lstrip": false,
|
113 |
+
"normalized": false,
|
114 |
+
"rstrip": true,
|
115 |
+
"single_word": false,
|
116 |
+
"special": true
|
117 |
+
}
|
118 |
+
},
|
119 |
+
"bos_token": "<s>",
|
120 |
+
"chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|system|>\n' + system_message + '<|end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + content + '<|end|>\n<|assistant|>\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end|>' + '\n' }}{% endif %}{% endfor %}",
|
121 |
+
"clean_up_tokenization_spaces": false,
|
122 |
+
"eos_token": "<|end|>",
|
123 |
+
"legacy": false,
|
124 |
+
"model_max_length": 4096,
|
125 |
+
"pad_token": "<|endoftext|>",
|
126 |
+
"padding_side": "right",
|
127 |
+
"sp_model_kwargs": {},
|
128 |
+
"split_special_tokens": false,
|
129 |
+
"tokenizer_class": "LlamaTokenizer",
|
130 |
+
"unk_token": "<unk>",
|
131 |
+
"use_default_system_prompt": false
|
132 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.9985396722375466,
|
3 |
+
"total_flos": 8.62655347699168e+18,
|
4 |
+
"train_loss": 0.29317476286903604,
|
5 |
+
"train_runtime": 46635.6944,
|
6 |
+
"train_samples_per_second": 12.686,
|
7 |
+
"train_steps_per_second": 0.099
|
8 |
+
}
|
trainer_log.jsonl
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"current_steps": 2, "total_steps": 3080, "loss": 1.1716, "learning_rate": 1.298701298701299e-07, "epoch": 0.0012980691221807562, "percentage": 0.06, "elapsed_time": "0:00:35", "remaining_time": "15:01:33", "throughput": "0.00", "total_tokens": 0}
|
2 |
+
{"current_steps": 4, "total_steps": 3080, "loss": 1.1279, "learning_rate": 2.597402597402598e-07, "epoch": 0.0025961382443615124, "percentage": 0.13, "elapsed_time": "0:01:16", "remaining_time": "16:18:24", "throughput": "0.00", "total_tokens": 0}
|
3 |
+
{"current_steps": 6, "total_steps": 3080, "loss": 1.2463, "learning_rate": 3.896103896103896e-07, "epoch": 0.0038942073665422685, "percentage": 0.19, "elapsed_time": "0:01:58", "remaining_time": "16:48:50", "throughput": "0.00", "total_tokens": 0}
|
4 |
+
{"current_steps": 8, "total_steps": 3080, "loss": 1.1527, "learning_rate": 5.194805194805196e-07, "epoch": 0.005192276488723025, "percentage": 0.26, "elapsed_time": "0:02:38", "remaining_time": "16:51:15", "throughput": "0.00", "total_tokens": 0}
|
5 |
+
{"current_steps": 10, "total_steps": 3080, "loss": 1.1291, "learning_rate": 6.493506493506493e-07, "epoch": 0.0064903456109037805, "percentage": 0.32, "elapsed_time": "0:03:17", "remaining_time": "16:52:13", "throughput": "0.00", "total_tokens": 0}
|
6 |
+
{"current_steps": 12, "total_steps": 3080, "loss": 1.1338, "learning_rate": 7.792207792207792e-07, "epoch": 0.007788414733084537, "percentage": 0.39, "elapsed_time": "0:03:59", "remaining_time": "17:00:16", "throughput": "0.00", "total_tokens": 0}
|
7 |
+
{"current_steps": 14, "total_steps": 3080, "loss": 1.136, "learning_rate": 9.090909090909091e-07, "epoch": 0.009086483855265294, "percentage": 0.45, "elapsed_time": "0:04:36", "remaining_time": "16:50:47", "throughput": "0.00", "total_tokens": 0}
|
8 |
+
{"current_steps": 16, "total_steps": 3080, "loss": 1.1277, "learning_rate": 1.0389610389610392e-06, "epoch": 0.01038455297744605, "percentage": 0.52, "elapsed_time": "0:05:15", "remaining_time": "16:45:32", "throughput": "0.00", "total_tokens": 0}
|
9 |
+
{"current_steps": 18, "total_steps": 3080, "loss": 1.117, "learning_rate": 1.168831168831169e-06, "epoch": 0.011682622099626805, "percentage": 0.58, "elapsed_time": "0:05:55", "remaining_time": "16:47:09", "throughput": "0.00", "total_tokens": 0}
|
10 |
+
{"current_steps": 20, "total_steps": 3080, "loss": 1.1154, "learning_rate": 1.2987012987012986e-06, "epoch": 0.012980691221807561, "percentage": 0.65, "elapsed_time": "0:06:34", "remaining_time": "16:46:09", "throughput": "0.00", "total_tokens": 0}
|
11 |
+
{"current_steps": 22, "total_steps": 3080, "loss": 1.0971, "learning_rate": 1.4285714285714286e-06, "epoch": 0.014278760343988317, "percentage": 0.71, "elapsed_time": "0:07:16", "remaining_time": "16:50:21", "throughput": "0.00", "total_tokens": 0}
|
12 |
+
{"current_steps": 24, "total_steps": 3080, "loss": 1.1125, "learning_rate": 1.5584415584415584e-06, "epoch": 0.015576829466169074, "percentage": 0.78, "elapsed_time": "0:07:52", "remaining_time": "16:43:27", "throughput": "0.00", "total_tokens": 0}
|
13 |
+
{"current_steps": 26, "total_steps": 3080, "loss": 1.0816, "learning_rate": 1.6883116883116885e-06, "epoch": 0.01687489858834983, "percentage": 0.84, "elapsed_time": "0:08:34", "remaining_time": "16:47:51", "throughput": "0.00", "total_tokens": 0}
|
14 |
+
{"current_steps": 28, "total_steps": 3080, "loss": 1.024, "learning_rate": 1.8181818181818183e-06, "epoch": 0.018172967710530587, "percentage": 0.91, "elapsed_time": "0:09:20", "remaining_time": "16:58:46", "throughput": "0.00", "total_tokens": 0}
|
15 |
+
{"current_steps": 30, "total_steps": 3080, "loss": 1.009, "learning_rate": 1.9480519480519483e-06, "epoch": 0.01947103683271134, "percentage": 0.97, "elapsed_time": "0:09:58", "remaining_time": "16:54:01", "throughput": "0.00", "total_tokens": 0}
|
16 |
+
{"current_steps": 32, "total_steps": 3080, "loss": 0.9808, "learning_rate": 2.0779220779220784e-06, "epoch": 0.0207691059548921, "percentage": 1.04, "elapsed_time": "0:10:39", "remaining_time": "16:55:13", "throughput": "0.00", "total_tokens": 0}
|
17 |
+
{"current_steps": 34, "total_steps": 3080, "loss": 0.9697, "learning_rate": 2.207792207792208e-06, "epoch": 0.022067175077072853, "percentage": 1.1, "elapsed_time": "0:11:18", "remaining_time": "16:53:46", "throughput": "0.00", "total_tokens": 0}
|
18 |
+
{"current_steps": 36, "total_steps": 3080, "loss": 0.9016, "learning_rate": 2.337662337662338e-06, "epoch": 0.02336524419925361, "percentage": 1.17, "elapsed_time": "0:11:58", "remaining_time": "16:53:02", "throughput": "0.00", "total_tokens": 0}
|
19 |
+
{"current_steps": 38, "total_steps": 3080, "loss": 0.8959, "learning_rate": 2.4675324675324676e-06, "epoch": 0.024663313321434368, "percentage": 1.23, "elapsed_time": "0:12:40", "remaining_time": "16:54:15", "throughput": "0.00", "total_tokens": 0}
|
20 |
+
{"current_steps": 40, "total_steps": 3080, "loss": 0.8125, "learning_rate": 2.597402597402597e-06, "epoch": 0.025961382443615122, "percentage": 1.3, "elapsed_time": "0:13:19", "remaining_time": "16:52:45", "throughput": "0.00", "total_tokens": 0}
|
21 |
+
{"current_steps": 42, "total_steps": 3080, "loss": 0.8685, "learning_rate": 2.7272727272727272e-06, "epoch": 0.02725945156579588, "percentage": 1.36, "elapsed_time": "0:13:59", "remaining_time": "16:52:21", "throughput": "0.00", "total_tokens": 0}
|
22 |
+
{"current_steps": 44, "total_steps": 3080, "loss": 0.7961, "learning_rate": 2.8571428571428573e-06, "epoch": 0.028557520687976633, "percentage": 1.43, "elapsed_time": "0:14:41", "remaining_time": "16:53:19", "throughput": "0.00", "total_tokens": 0}
|
23 |
+
{"current_steps": 46, "total_steps": 3080, "loss": 0.7846, "learning_rate": 2.9870129870129873e-06, "epoch": 0.02985558981015739, "percentage": 1.49, "elapsed_time": "0:15:21", "remaining_time": "16:53:31", "throughput": "0.00", "total_tokens": 0}
|
24 |
+
{"current_steps": 48, "total_steps": 3080, "loss": 0.7042, "learning_rate": 3.116883116883117e-06, "epoch": 0.03115365893233815, "percentage": 1.56, "elapsed_time": "0:16:02", "remaining_time": "16:52:50", "throughput": "0.00", "total_tokens": 0}
|
25 |
+
{"current_steps": 50, "total_steps": 3080, "loss": 0.7043, "learning_rate": 3.246753246753247e-06, "epoch": 0.0324517280545189, "percentage": 1.62, "elapsed_time": "0:16:43", "remaining_time": "16:53:48", "throughput": "0.00", "total_tokens": 0}
|
26 |
+
{"current_steps": 52, "total_steps": 3080, "loss": 0.6678, "learning_rate": 3.376623376623377e-06, "epoch": 0.03374979717669966, "percentage": 1.69, "elapsed_time": "0:17:22", "remaining_time": "16:51:34", "throughput": "0.00", "total_tokens": 0}
|
27 |
+
{"current_steps": 54, "total_steps": 3080, "loss": 0.7099, "learning_rate": 3.506493506493507e-06, "epoch": 0.03504786629888042, "percentage": 1.75, "elapsed_time": "0:17:57", "remaining_time": "16:46:18", "throughput": "0.00", "total_tokens": 0}
|
28 |
+
{"current_steps": 56, "total_steps": 3080, "loss": 0.6423, "learning_rate": 3.6363636363636366e-06, "epoch": 0.036345935421061175, "percentage": 1.82, "elapsed_time": "0:18:44", "remaining_time": "16:51:37", "throughput": "0.00", "total_tokens": 0}
|
29 |
+
{"current_steps": 58, "total_steps": 3080, "loss": 0.5418, "learning_rate": 3.7662337662337666e-06, "epoch": 0.037644004543241925, "percentage": 1.88, "elapsed_time": "0:19:24", "remaining_time": "16:51:37", "throughput": "0.00", "total_tokens": 0}
|
30 |
+
{"current_steps": 60, "total_steps": 3080, "loss": 0.5407, "learning_rate": 3.896103896103897e-06, "epoch": 0.03894207366542268, "percentage": 1.95, "elapsed_time": "0:20:03", "remaining_time": "16:49:47", "throughput": "0.00", "total_tokens": 0}
|
31 |
+
{"current_steps": 62, "total_steps": 3080, "loss": 0.5437, "learning_rate": 4.025974025974026e-06, "epoch": 0.04024014278760344, "percentage": 2.01, "elapsed_time": "0:20:46", "remaining_time": "16:51:09", "throughput": "0.00", "total_tokens": 0}
|
32 |
+
{"current_steps": 64, "total_steps": 3080, "loss": 0.4655, "learning_rate": 4.155844155844157e-06, "epoch": 0.0415382119097842, "percentage": 2.08, "elapsed_time": "0:21:30", "remaining_time": "16:53:53", "throughput": "0.00", "total_tokens": 0}
|
33 |
+
{"current_steps": 66, "total_steps": 3080, "loss": 0.4997, "learning_rate": 4.2857142857142855e-06, "epoch": 0.042836281031964955, "percentage": 2.14, "elapsed_time": "0:22:09", "remaining_time": "16:51:45", "throughput": "0.00", "total_tokens": 0}
|
34 |
+
{"current_steps": 68, "total_steps": 3080, "loss": 0.4846, "learning_rate": 4.415584415584416e-06, "epoch": 0.044134350154145706, "percentage": 2.21, "elapsed_time": "0:22:46", "remaining_time": "16:48:38", "throughput": "0.00", "total_tokens": 0}
|
35 |
+
{"current_steps": 70, "total_steps": 3080, "loss": 0.4522, "learning_rate": 4.5454545454545455e-06, "epoch": 0.04543241927632646, "percentage": 2.27, "elapsed_time": "0:23:28", "remaining_time": "16:49:19", "throughput": "0.00", "total_tokens": 0}
|
36 |
+
{"current_steps": 72, "total_steps": 3080, "loss": 0.3711, "learning_rate": 4.675324675324676e-06, "epoch": 0.04673048839850722, "percentage": 2.34, "elapsed_time": "0:24:10", "remaining_time": "16:49:59", "throughput": "0.00", "total_tokens": 0}
|
37 |
+
{"current_steps": 74, "total_steps": 3080, "loss": 0.3815, "learning_rate": 4.805194805194806e-06, "epoch": 0.04802855752068798, "percentage": 2.4, "elapsed_time": "0:24:50", "remaining_time": "16:48:54", "throughput": "0.00", "total_tokens": 0}
|
38 |
+
{"current_steps": 76, "total_steps": 3080, "loss": 0.4273, "learning_rate": 4.935064935064935e-06, "epoch": 0.049326626642868736, "percentage": 2.47, "elapsed_time": "0:25:29", "remaining_time": "16:47:47", "throughput": "0.00", "total_tokens": 0}
|
39 |
+
{"current_steps": 78, "total_steps": 3080, "loss": 0.408, "learning_rate": 5.064935064935065e-06, "epoch": 0.050624695765049486, "percentage": 2.53, "elapsed_time": "0:26:09", "remaining_time": "16:46:46", "throughput": "0.00", "total_tokens": 0}
|
40 |
+
{"current_steps": 80, "total_steps": 3080, "loss": 0.388, "learning_rate": 5.194805194805194e-06, "epoch": 0.051922764887230244, "percentage": 2.6, "elapsed_time": "0:26:48", "remaining_time": "16:45:22", "throughput": "0.00", "total_tokens": 0}
|
41 |
+
{"current_steps": 82, "total_steps": 3080, "loss": 0.347, "learning_rate": 5.324675324675325e-06, "epoch": 0.053220834009411, "percentage": 2.66, "elapsed_time": "0:27:27", "remaining_time": "16:43:40", "throughput": "0.00", "total_tokens": 0}
|
42 |
+
{"current_steps": 84, "total_steps": 3080, "loss": 0.3856, "learning_rate": 5.4545454545454545e-06, "epoch": 0.05451890313159176, "percentage": 2.73, "elapsed_time": "0:28:01", "remaining_time": "16:39:27", "throughput": "0.00", "total_tokens": 0}
|
43 |
+
{"current_steps": 86, "total_steps": 3080, "loss": 0.372, "learning_rate": 5.584415584415585e-06, "epoch": 0.055816972253772516, "percentage": 2.79, "elapsed_time": "0:28:39", "remaining_time": "16:37:58", "throughput": "0.00", "total_tokens": 0}
|
44 |
+
{"current_steps": 88, "total_steps": 3080, "loss": 0.3667, "learning_rate": 5.7142857142857145e-06, "epoch": 0.05711504137595327, "percentage": 2.86, "elapsed_time": "0:29:19", "remaining_time": "16:36:55", "throughput": "0.00", "total_tokens": 0}
|
45 |
+
{"current_steps": 90, "total_steps": 3080, "loss": 0.3881, "learning_rate": 5.844155844155844e-06, "epoch": 0.058413110498134024, "percentage": 2.92, "elapsed_time": "0:29:57", "remaining_time": "16:35:13", "throughput": "0.00", "total_tokens": 0}
|
46 |
+
{"current_steps": 92, "total_steps": 3080, "loss": 0.366, "learning_rate": 5.9740259740259746e-06, "epoch": 0.05971117962031478, "percentage": 2.99, "elapsed_time": "0:30:37", "remaining_time": "16:34:27", "throughput": "0.00", "total_tokens": 0}
|
47 |
+
{"current_steps": 94, "total_steps": 3080, "loss": 0.3555, "learning_rate": 6.103896103896104e-06, "epoch": 0.06100924874249554, "percentage": 3.05, "elapsed_time": "0:31:16", "remaining_time": "16:33:15", "throughput": "0.00", "total_tokens": 0}
|
48 |
+
{"current_steps": 96, "total_steps": 3080, "loss": 0.3948, "learning_rate": 6.233766233766234e-06, "epoch": 0.0623073178646763, "percentage": 3.12, "elapsed_time": "0:31:56", "remaining_time": "16:33:01", "throughput": "0.00", "total_tokens": 0}
|
49 |
+
{"current_steps": 98, "total_steps": 3080, "loss": 0.3536, "learning_rate": 6.363636363636364e-06, "epoch": 0.06360538698685705, "percentage": 3.18, "elapsed_time": "0:32:34", "remaining_time": "16:31:17", "throughput": "0.00", "total_tokens": 0}
|
50 |
+
{"current_steps": 100, "total_steps": 3080, "loss": 0.3367, "learning_rate": 6.493506493506494e-06, "epoch": 0.0649034561090378, "percentage": 3.25, "elapsed_time": "0:33:14", "remaining_time": "16:30:23", "throughput": "0.00", "total_tokens": 0}
|
51 |
+
{"current_steps": 102, "total_steps": 3080, "loss": 0.3424, "learning_rate": 6.623376623376624e-06, "epoch": 0.06620152523121857, "percentage": 3.31, "elapsed_time": "0:33:58", "remaining_time": "16:31:54", "throughput": "0.00", "total_tokens": 0}
|
52 |
+
{"current_steps": 104, "total_steps": 3080, "loss": 0.3337, "learning_rate": 6.753246753246754e-06, "epoch": 0.06749959435339932, "percentage": 3.38, "elapsed_time": "0:34:42", "remaining_time": "16:33:10", "throughput": "0.00", "total_tokens": 0}
|
53 |
+
{"current_steps": 106, "total_steps": 3080, "loss": 0.3646, "learning_rate": 6.8831168831168835e-06, "epoch": 0.06879766347558007, "percentage": 3.44, "elapsed_time": "0:35:21", "remaining_time": "16:31:50", "throughput": "0.00", "total_tokens": 0}
|
54 |
+
{"current_steps": 108, "total_steps": 3080, "loss": 0.3351, "learning_rate": 7.012987012987014e-06, "epoch": 0.07009573259776083, "percentage": 3.51, "elapsed_time": "0:36:01", "remaining_time": "16:31:25", "throughput": "0.00", "total_tokens": 0}
|
55 |
+
{"current_steps": 110, "total_steps": 3080, "loss": 0.3414, "learning_rate": 7.1428571428571436e-06, "epoch": 0.07139380171994159, "percentage": 3.57, "elapsed_time": "0:36:39", "remaining_time": "16:29:48", "throughput": "0.00", "total_tokens": 0}
|
56 |
+
{"current_steps": 112, "total_steps": 3080, "loss": 0.3445, "learning_rate": 7.272727272727273e-06, "epoch": 0.07269187084212235, "percentage": 3.64, "elapsed_time": "0:37:18", "remaining_time": "16:28:46", "throughput": "0.00", "total_tokens": 0}
|
57 |
+
{"current_steps": 114, "total_steps": 3080, "loss": 0.3645, "learning_rate": 7.402597402597404e-06, "epoch": 0.0739899399643031, "percentage": 3.7, "elapsed_time": "0:38:01", "remaining_time": "16:29:16", "throughput": "0.00", "total_tokens": 0}
|
58 |
+
{"current_steps": 116, "total_steps": 3080, "loss": 0.3386, "learning_rate": 7.532467532467533e-06, "epoch": 0.07528800908648385, "percentage": 3.77, "elapsed_time": "0:38:37", "remaining_time": "16:27:05", "throughput": "0.00", "total_tokens": 0}
|
59 |
+
{"current_steps": 118, "total_steps": 3080, "loss": 0.3298, "learning_rate": 7.662337662337663e-06, "epoch": 0.07658607820866462, "percentage": 3.83, "elapsed_time": "0:39:17", "remaining_time": "16:26:25", "throughput": "0.00", "total_tokens": 0}
|
60 |
+
{"current_steps": 120, "total_steps": 3080, "loss": 0.3667, "learning_rate": 7.792207792207793e-06, "epoch": 0.07788414733084537, "percentage": 3.9, "elapsed_time": "0:39:56", "remaining_time": "16:25:16", "throughput": "0.00", "total_tokens": 0}
|
61 |
+
{"current_steps": 122, "total_steps": 3080, "loss": 0.3581, "learning_rate": 7.922077922077924e-06, "epoch": 0.07918221645302613, "percentage": 3.96, "elapsed_time": "0:40:33", "remaining_time": "16:23:33", "throughput": "0.00", "total_tokens": 0}
|
62 |
+
{"current_steps": 124, "total_steps": 3080, "loss": 0.3472, "learning_rate": 8.051948051948052e-06, "epoch": 0.08048028557520688, "percentage": 4.03, "elapsed_time": "0:41:19", "remaining_time": "16:24:57", "throughput": "0.00", "total_tokens": 0}
|
63 |
+
{"current_steps": 126, "total_steps": 3080, "loss": 0.3731, "learning_rate": 8.181818181818183e-06, "epoch": 0.08177835469738763, "percentage": 4.09, "elapsed_time": "0:42:03", "remaining_time": "16:25:59", "throughput": "0.00", "total_tokens": 0}
|
64 |
+
{"current_steps": 128, "total_steps": 3080, "loss": 0.3662, "learning_rate": 8.311688311688313e-06, "epoch": 0.0830764238195684, "percentage": 4.16, "elapsed_time": "0:42:46", "remaining_time": "16:26:21", "throughput": "0.00", "total_tokens": 0}
|
65 |
+
{"current_steps": 130, "total_steps": 3080, "loss": 0.4006, "learning_rate": 8.441558441558442e-06, "epoch": 0.08437449294174915, "percentage": 4.22, "elapsed_time": "0:43:29", "remaining_time": "16:26:56", "throughput": "0.00", "total_tokens": 0}
|
66 |
+
{"current_steps": 132, "total_steps": 3080, "loss": 0.3308, "learning_rate": 8.571428571428571e-06, "epoch": 0.08567256206392991, "percentage": 4.29, "elapsed_time": "0:44:06", "remaining_time": "16:25:01", "throughput": "0.00", "total_tokens": 0}
|
67 |
+
{"current_steps": 134, "total_steps": 3080, "loss": 0.3306, "learning_rate": 8.701298701298701e-06, "epoch": 0.08697063118611066, "percentage": 4.35, "elapsed_time": "0:44:45", "remaining_time": "16:23:50", "throughput": "0.00", "total_tokens": 0}
|
68 |
+
{"current_steps": 136, "total_steps": 3080, "loss": 0.3972, "learning_rate": 8.831168831168832e-06, "epoch": 0.08826870030829141, "percentage": 4.42, "elapsed_time": "0:45:30", "remaining_time": "16:24:58", "throughput": "0.00", "total_tokens": 0}
|
69 |
+
{"current_steps": 138, "total_steps": 3080, "loss": 0.3602, "learning_rate": 8.96103896103896e-06, "epoch": 0.08956676943047218, "percentage": 4.48, "elapsed_time": "0:46:08", "remaining_time": "16:23:46", "throughput": "0.00", "total_tokens": 0}
|
70 |
+
{"current_steps": 140, "total_steps": 3080, "loss": 0.3551, "learning_rate": 9.090909090909091e-06, "epoch": 0.09086483855265293, "percentage": 4.55, "elapsed_time": "0:46:46", "remaining_time": "16:22:23", "throughput": "0.00", "total_tokens": 0}
|
71 |
+
{"current_steps": 142, "total_steps": 3080, "loss": 0.3412, "learning_rate": 9.220779220779221e-06, "epoch": 0.09216290767483369, "percentage": 4.61, "elapsed_time": "0:47:27", "remaining_time": "16:21:47", "throughput": "0.00", "total_tokens": 0}
|
72 |
+
{"current_steps": 144, "total_steps": 3080, "loss": 0.3265, "learning_rate": 9.350649350649352e-06, "epoch": 0.09346097679701444, "percentage": 4.68, "elapsed_time": "0:48:03", "remaining_time": "16:19:42", "throughput": "0.00", "total_tokens": 0}
|
73 |
+
{"current_steps": 146, "total_steps": 3080, "loss": 0.3353, "learning_rate": 9.48051948051948e-06, "epoch": 0.09475904591919519, "percentage": 4.74, "elapsed_time": "0:48:44", "remaining_time": "16:19:30", "throughput": "0.00", "total_tokens": 0}
|
74 |
+
{"current_steps": 148, "total_steps": 3080, "loss": 0.3603, "learning_rate": 9.610389610389611e-06, "epoch": 0.09605711504137596, "percentage": 4.81, "elapsed_time": "0:49:23", "remaining_time": "16:18:28", "throughput": "0.00", "total_tokens": 0}
|
75 |
+
{"current_steps": 150, "total_steps": 3080, "loss": 0.3707, "learning_rate": 9.740259740259742e-06, "epoch": 0.09735518416355671, "percentage": 4.87, "elapsed_time": "0:50:07", "remaining_time": "16:19:12", "throughput": "0.00", "total_tokens": 0}
|
76 |
+
{"current_steps": 152, "total_steps": 3080, "loss": 0.3549, "learning_rate": 9.87012987012987e-06, "epoch": 0.09865325328573747, "percentage": 4.94, "elapsed_time": "0:50:51", "remaining_time": "16:19:37", "throughput": "0.00", "total_tokens": 0}
|
77 |
+
{"current_steps": 154, "total_steps": 3080, "loss": 0.32, "learning_rate": 1e-05, "epoch": 0.09995132240791822, "percentage": 5.0, "elapsed_time": "0:51:30", "remaining_time": "16:18:37", "throughput": "0.00", "total_tokens": 0}
|
78 |
+
{"current_steps": 156, "total_steps": 3080, "loss": 0.3527, "learning_rate": 9.999988472080506e-06, "epoch": 0.10124939153009897, "percentage": 5.06, "elapsed_time": "0:52:08", "remaining_time": "16:17:12", "throughput": "0.00", "total_tokens": 0}
|
79 |
+
{"current_steps": 158, "total_steps": 3080, "loss": 0.3368, "learning_rate": 9.999953888375178e-06, "epoch": 0.10254746065227974, "percentage": 5.13, "elapsed_time": "0:52:49", "remaining_time": "16:16:47", "throughput": "0.00", "total_tokens": 0}
|
80 |
+
{"current_steps": 160, "total_steps": 3080, "loss": 0.345, "learning_rate": 9.999896249043488e-06, "epoch": 0.10384552977446049, "percentage": 5.19, "elapsed_time": "0:53:31", "remaining_time": "16:16:48", "throughput": "0.00", "total_tokens": 0}
|
81 |
+
{"current_steps": 162, "total_steps": 3080, "loss": 0.3544, "learning_rate": 9.999815554351224e-06, "epoch": 0.10514359889664125, "percentage": 5.26, "elapsed_time": "0:54:10", "remaining_time": "16:15:55", "throughput": "0.00", "total_tokens": 0}
|
82 |
+
{"current_steps": 164, "total_steps": 3080, "loss": 0.3177, "learning_rate": 9.999711804670478e-06, "epoch": 0.106441668018822, "percentage": 5.32, "elapsed_time": "0:54:53", "remaining_time": "16:15:55", "throughput": "0.00", "total_tokens": 0}
|
83 |
+
{"current_steps": 166, "total_steps": 3080, "loss": 0.3642, "learning_rate": 9.999585000479658e-06, "epoch": 0.10773973714100275, "percentage": 5.39, "elapsed_time": "0:55:33", "remaining_time": "16:15:21", "throughput": "0.00", "total_tokens": 0}
|
84 |
+
{"current_steps": 168, "total_steps": 3080, "loss": 0.3599, "learning_rate": 9.999435142363484e-06, "epoch": 0.10903780626318352, "percentage": 5.45, "elapsed_time": "0:56:14", "remaining_time": "16:14:53", "throughput": "0.00", "total_tokens": 0}
|
85 |
+
{"current_steps": 170, "total_steps": 3080, "loss": 0.3509, "learning_rate": 9.999262231012971e-06, "epoch": 0.11033587538536427, "percentage": 5.52, "elapsed_time": "0:56:55", "remaining_time": "16:14:26", "throughput": "0.00", "total_tokens": 0}
|
86 |
+
{"current_steps": 172, "total_steps": 3080, "loss": 0.339, "learning_rate": 9.999066267225447e-06, "epoch": 0.11163394450754503, "percentage": 5.58, "elapsed_time": "0:57:34", "remaining_time": "16:13:24", "throughput": "0.00", "total_tokens": 0}
|
87 |
+
{"current_steps": 174, "total_steps": 3080, "loss": 0.3526, "learning_rate": 9.998847251904529e-06, "epoch": 0.11293201362972578, "percentage": 5.65, "elapsed_time": "0:58:11", "remaining_time": "16:11:48", "throughput": "0.00", "total_tokens": 0}
|
88 |
+
{"current_steps": 176, "total_steps": 3080, "loss": 0.3286, "learning_rate": 9.998605186060138e-06, "epoch": 0.11423008275190653, "percentage": 5.71, "elapsed_time": "0:58:47", "remaining_time": "16:10:08", "throughput": "0.00", "total_tokens": 0}
|
89 |
+
{"current_steps": 178, "total_steps": 3080, "loss": 0.3522, "learning_rate": 9.998340070808478e-06, "epoch": 0.1155281518740873, "percentage": 5.78, "elapsed_time": "0:59:29", "remaining_time": "16:09:57", "throughput": "0.00", "total_tokens": 0}
|
90 |
+
{"current_steps": 180, "total_steps": 3080, "loss": 0.3546, "learning_rate": 9.99805190737204e-06, "epoch": 0.11682622099626805, "percentage": 5.84, "elapsed_time": "1:00:08", "remaining_time": "16:08:59", "throughput": "0.00", "total_tokens": 0}
|
91 |
+
{"current_steps": 182, "total_steps": 3080, "loss": 0.3295, "learning_rate": 9.997740697079595e-06, "epoch": 0.11812429011844881, "percentage": 5.91, "elapsed_time": "1:00:43", "remaining_time": "16:07:01", "throughput": "0.00", "total_tokens": 0}
|
92 |
+
{"current_steps": 184, "total_steps": 3080, "loss": 0.3439, "learning_rate": 9.997406441366182e-06, "epoch": 0.11942235924062956, "percentage": 5.97, "elapsed_time": "1:01:25", "remaining_time": "16:06:38", "throughput": "0.00", "total_tokens": 0}
|
93 |
+
{"current_steps": 186, "total_steps": 3080, "loss": 0.3635, "learning_rate": 9.997049141773117e-06, "epoch": 0.12072042836281031, "percentage": 6.04, "elapsed_time": "1:02:07", "remaining_time": "16:06:33", "throughput": "0.00", "total_tokens": 0}
|
94 |
+
{"current_steps": 188, "total_steps": 3080, "loss": 0.342, "learning_rate": 9.996668799947962e-06, "epoch": 0.12201849748499108, "percentage": 6.1, "elapsed_time": "1:02:45", "remaining_time": "16:05:28", "throughput": "0.00", "total_tokens": 0}
|
95 |
+
{"current_steps": 190, "total_steps": 3080, "loss": 0.322, "learning_rate": 9.99626541764454e-06, "epoch": 0.12331656660717183, "percentage": 6.17, "elapsed_time": "1:03:26", "remaining_time": "16:04:51", "throughput": "0.00", "total_tokens": 0}
|
96 |
+
{"current_steps": 192, "total_steps": 3080, "loss": 0.3706, "learning_rate": 9.995838996722916e-06, "epoch": 0.1246146357293526, "percentage": 6.23, "elapsed_time": "1:04:03", "remaining_time": "16:03:37", "throughput": "0.00", "total_tokens": 0}
|
97 |
+
{"current_steps": 194, "total_steps": 3080, "loss": 0.3224, "learning_rate": 9.995389539149386e-06, "epoch": 0.12591270485153336, "percentage": 6.3, "elapsed_time": "1:04:47", "remaining_time": "16:03:48", "throughput": "0.00", "total_tokens": 0}
|
98 |
+
{"current_steps": 196, "total_steps": 3080, "loss": 0.3507, "learning_rate": 9.994917046996472e-06, "epoch": 0.1272107739737141, "percentage": 6.36, "elapsed_time": "1:05:27", "remaining_time": "16:03:13", "throughput": "0.00", "total_tokens": 0}
|
99 |
+
{"current_steps": 198, "total_steps": 3080, "loss": 0.3375, "learning_rate": 9.99442152244292e-06, "epoch": 0.12850884309589486, "percentage": 6.43, "elapsed_time": "1:06:09", "remaining_time": "16:02:59", "throughput": "0.00", "total_tokens": 0}
|
100 |
+
{"current_steps": 200, "total_steps": 3080, "loss": 0.3355, "learning_rate": 9.993902967773674e-06, "epoch": 0.1298069122180756, "percentage": 6.49, "elapsed_time": "1:06:50", "remaining_time": "16:02:26", "throughput": "0.00", "total_tokens": 0}
|
101 |
+
{"current_steps": 202, "total_steps": 3080, "loss": 0.3303, "learning_rate": 9.993361385379876e-06, "epoch": 0.13110498134025636, "percentage": 6.56, "elapsed_time": "1:07:28", "remaining_time": "16:01:28", "throughput": "0.00", "total_tokens": 0}
|
102 |
+
{"current_steps": 204, "total_steps": 3080, "loss": 0.3539, "learning_rate": 9.992796777758855e-06, "epoch": 0.13240305046243714, "percentage": 6.62, "elapsed_time": "1:08:07", "remaining_time": "16:00:18", "throughput": "0.00", "total_tokens": 0}
|
103 |
+
{"current_steps": 206, "total_steps": 3080, "loss": 0.3123, "learning_rate": 9.992209147514108e-06, "epoch": 0.1337011195846179, "percentage": 6.69, "elapsed_time": "1:08:47", "remaining_time": "15:59:44", "throughput": "0.00", "total_tokens": 0}
|
104 |
+
{"current_steps": 208, "total_steps": 3080, "loss": 0.3491, "learning_rate": 9.991598497355304e-06, "epoch": 0.13499918870679864, "percentage": 6.75, "elapsed_time": "1:09:26", "remaining_time": "15:58:53", "throughput": "0.00", "total_tokens": 0}
|
105 |
+
{"current_steps": 210, "total_steps": 3080, "loss": 0.358, "learning_rate": 9.990964830098246e-06, "epoch": 0.1362972578289794, "percentage": 6.82, "elapsed_time": "1:10:11", "remaining_time": "15:59:12", "throughput": "0.00", "total_tokens": 0}
|
106 |
+
{"current_steps": 212, "total_steps": 3080, "loss": 0.321, "learning_rate": 9.990308148664882e-06, "epoch": 0.13759532695116014, "percentage": 6.88, "elapsed_time": "1:10:54", "remaining_time": "15:59:19", "throughput": "0.00", "total_tokens": 0}
|
107 |
+
{"current_steps": 214, "total_steps": 3080, "loss": 0.3341, "learning_rate": 9.989628456083283e-06, "epoch": 0.13889339607334092, "percentage": 6.95, "elapsed_time": "1:11:31", "remaining_time": "15:57:49", "throughput": "0.00", "total_tokens": 0}
|
108 |
+
{"current_steps": 216, "total_steps": 3080, "loss": 0.3606, "learning_rate": 9.988925755487622e-06, "epoch": 0.14019146519552167, "percentage": 7.01, "elapsed_time": "1:12:10", "remaining_time": "15:56:59", "throughput": "0.00", "total_tokens": 0}
|
109 |
+
{"current_steps": 218, "total_steps": 3080, "loss": 0.3554, "learning_rate": 9.98820005011817e-06, "epoch": 0.14148953431770242, "percentage": 7.08, "elapsed_time": "1:12:52", "remaining_time": "15:56:50", "throughput": "0.00", "total_tokens": 0}
|
110 |
+
{"current_steps": 220, "total_steps": 3080, "loss": 0.337, "learning_rate": 9.98745134332128e-06, "epoch": 0.14278760343988317, "percentage": 7.14, "elapsed_time": "1:13:32", "remaining_time": "15:56:03", "throughput": "0.00", "total_tokens": 0}
|
111 |
+
{"current_steps": 222, "total_steps": 3080, "loss": 0.3436, "learning_rate": 9.98667963854936e-06, "epoch": 0.14408567256206392, "percentage": 7.21, "elapsed_time": "1:14:11", "remaining_time": "15:55:12", "throughput": "0.00", "total_tokens": 0}
|
112 |
+
{"current_steps": 224, "total_steps": 3080, "loss": 0.356, "learning_rate": 9.985884939360873e-06, "epoch": 0.1453837416842447, "percentage": 7.27, "elapsed_time": "1:14:53", "remaining_time": "15:54:52", "throughput": "0.00", "total_tokens": 0}
|
113 |
+
{"current_steps": 226, "total_steps": 3080, "loss": 0.3241, "learning_rate": 9.985067249420308e-06, "epoch": 0.14668181080642545, "percentage": 7.34, "elapsed_time": "1:15:31", "remaining_time": "15:53:42", "throughput": "0.00", "total_tokens": 0}
|
114 |
+
{"current_steps": 228, "total_steps": 3080, "loss": 0.3095, "learning_rate": 9.984226572498173e-06, "epoch": 0.1479798799286062, "percentage": 7.4, "elapsed_time": "1:16:13", "remaining_time": "15:53:32", "throughput": "0.00", "total_tokens": 0}
|
115 |
+
{"current_steps": 230, "total_steps": 3080, "loss": 0.3292, "learning_rate": 9.983362912470967e-06, "epoch": 0.14927794905078695, "percentage": 7.47, "elapsed_time": "1:16:54", "remaining_time": "15:52:57", "throughput": "0.00", "total_tokens": 0}
|
116 |
+
{"current_steps": 232, "total_steps": 3080, "loss": 0.3428, "learning_rate": 9.982476273321175e-06, "epoch": 0.1505760181729677, "percentage": 7.53, "elapsed_time": "1:17:32", "remaining_time": "15:51:55", "throughput": "0.00", "total_tokens": 0}
|
117 |
+
{"current_steps": 234, "total_steps": 3080, "loss": 0.3245, "learning_rate": 9.981566659137238e-06, "epoch": 0.15187408729514848, "percentage": 7.6, "elapsed_time": "1:18:14", "remaining_time": "15:51:35", "throughput": "0.00", "total_tokens": 0}
|
118 |
+
{"current_steps": 236, "total_steps": 3080, "loss": 0.3447, "learning_rate": 9.980634074113538e-06, "epoch": 0.15317215641732923, "percentage": 7.66, "elapsed_time": "1:18:51", "remaining_time": "15:50:21", "throughput": "0.00", "total_tokens": 0}
|
119 |
+
{"current_steps": 238, "total_steps": 3080, "loss": 0.3328, "learning_rate": 9.979678522550382e-06, "epoch": 0.15447022553950998, "percentage": 7.73, "elapsed_time": "1:19:32", "remaining_time": "15:49:50", "throughput": "0.00", "total_tokens": 0}
|
120 |
+
{"current_steps": 240, "total_steps": 3080, "loss": 0.3513, "learning_rate": 9.97870000885398e-06, "epoch": 0.15576829466169073, "percentage": 7.79, "elapsed_time": "1:20:14", "remaining_time": "15:49:33", "throughput": "0.00", "total_tokens": 0}
|
121 |
+
{"current_steps": 242, "total_steps": 3080, "loss": 0.3047, "learning_rate": 9.97769853753642e-06, "epoch": 0.15706636378387148, "percentage": 7.86, "elapsed_time": "1:20:57", "remaining_time": "15:49:27", "throughput": "0.00", "total_tokens": 0}
|
122 |
+
{"current_steps": 244, "total_steps": 3080, "loss": 0.3237, "learning_rate": 9.976674113215655e-06, "epoch": 0.15836443290605226, "percentage": 7.92, "elapsed_time": "1:21:36", "remaining_time": "15:48:32", "throughput": "0.00", "total_tokens": 0}
|
123 |
+
{"current_steps": 246, "total_steps": 3080, "loss": 0.3276, "learning_rate": 9.975626740615478e-06, "epoch": 0.159662502028233, "percentage": 7.99, "elapsed_time": "1:22:15", "remaining_time": "15:47:33", "throughput": "0.00", "total_tokens": 0}
|
124 |
+
{"current_steps": 248, "total_steps": 3080, "loss": 0.319, "learning_rate": 9.974556424565503e-06, "epoch": 0.16096057115041376, "percentage": 8.05, "elapsed_time": "1:22:59", "remaining_time": "15:47:37", "throughput": "0.00", "total_tokens": 0}
|
125 |
+
{"current_steps": 250, "total_steps": 3080, "loss": 0.3259, "learning_rate": 9.973463170001131e-06, "epoch": 0.1622586402725945, "percentage": 8.12, "elapsed_time": "1:23:41", "remaining_time": "15:47:26", "throughput": "0.00", "total_tokens": 0}
|
126 |
+
{"current_steps": 252, "total_steps": 3080, "loss": 0.3016, "learning_rate": 9.972346981963546e-06, "epoch": 0.16355670939477526, "percentage": 8.18, "elapsed_time": "1:24:21", "remaining_time": "15:46:38", "throughput": "0.00", "total_tokens": 0}
|
127 |
+
{"current_steps": 254, "total_steps": 3080, "loss": 0.3151, "learning_rate": 9.971207865599679e-06, "epoch": 0.16485477851695604, "percentage": 8.25, "elapsed_time": "1:25:02", "remaining_time": "15:46:07", "throughput": "0.00", "total_tokens": 0}
|
128 |
+
{"current_steps": 256, "total_steps": 3080, "loss": 0.3193, "learning_rate": 9.970045826162182e-06, "epoch": 0.1661528476391368, "percentage": 8.31, "elapsed_time": "1:25:43", "remaining_time": "15:45:36", "throughput": "0.00", "total_tokens": 0}
|
129 |
+
{"current_steps": 258, "total_steps": 3080, "loss": 0.3412, "learning_rate": 9.96886086900942e-06, "epoch": 0.16745091676131754, "percentage": 8.38, "elapsed_time": "1:26:20", "remaining_time": "15:44:22", "throughput": "0.00", "total_tokens": 0}
|
130 |
+
{"current_steps": 260, "total_steps": 3080, "loss": 0.324, "learning_rate": 9.967652999605424e-06, "epoch": 0.1687489858834983, "percentage": 8.44, "elapsed_time": "1:27:00", "remaining_time": "15:43:46", "throughput": "0.00", "total_tokens": 0}
|
131 |
+
{"current_steps": 262, "total_steps": 3080, "loss": 0.3476, "learning_rate": 9.966422223519887e-06, "epoch": 0.17004705500567904, "percentage": 8.51, "elapsed_time": "1:27:39", "remaining_time": "15:42:49", "throughput": "0.00", "total_tokens": 0}
|
132 |
+
{"current_steps": 264, "total_steps": 3080, "loss": 0.3333, "learning_rate": 9.965168546428122e-06, "epoch": 0.17134512412785982, "percentage": 8.57, "elapsed_time": "1:28:17", "remaining_time": "15:41:48", "throughput": "0.00", "total_tokens": 0}
|
133 |
+
{"current_steps": 266, "total_steps": 3080, "loss": 0.3283, "learning_rate": 9.963891974111042e-06, "epoch": 0.17264319325004057, "percentage": 8.64, "elapsed_time": "1:28:57", "remaining_time": "15:41:04", "throughput": "0.00", "total_tokens": 0}
|
134 |
+
{"current_steps": 268, "total_steps": 3080, "loss": 0.3187, "learning_rate": 9.96259251245514e-06, "epoch": 0.17394126237222132, "percentage": 8.7, "elapsed_time": "1:29:39", "remaining_time": "15:40:49", "throughput": "0.00", "total_tokens": 0}
|
135 |
+
{"current_steps": 270, "total_steps": 3080, "loss": 0.3162, "learning_rate": 9.961270167452449e-06, "epoch": 0.17523933149440207, "percentage": 8.77, "elapsed_time": "1:30:21", "remaining_time": "15:40:23", "throughput": "0.00", "total_tokens": 0}
|
136 |
+
{"current_steps": 272, "total_steps": 3080, "loss": 0.3442, "learning_rate": 9.959924945200525e-06, "epoch": 0.17653740061658282, "percentage": 8.83, "elapsed_time": "1:31:02", "remaining_time": "15:39:54", "throughput": "0.00", "total_tokens": 0}
|
137 |
+
{"current_steps": 274, "total_steps": 3080, "loss": 0.3248, "learning_rate": 9.958556851902414e-06, "epoch": 0.1778354697387636, "percentage": 8.9, "elapsed_time": "1:31:46", "remaining_time": "15:39:51", "throughput": "0.00", "total_tokens": 0}
|
138 |
+
{"current_steps": 276, "total_steps": 3080, "loss": 0.314, "learning_rate": 9.957165893866623e-06, "epoch": 0.17913353886094435, "percentage": 8.96, "elapsed_time": "1:32:24", "remaining_time": "15:38:52", "throughput": "0.00", "total_tokens": 0}
|
139 |
+
{"current_steps": 278, "total_steps": 3080, "loss": 0.3207, "learning_rate": 9.955752077507093e-06, "epoch": 0.1804316079831251, "percentage": 9.03, "elapsed_time": "1:33:05", "remaining_time": "15:38:13", "throughput": "0.00", "total_tokens": 0}
|
140 |
+
{"current_steps": 280, "total_steps": 3080, "loss": 0.3166, "learning_rate": 9.95431540934317e-06, "epoch": 0.18172967710530585, "percentage": 9.09, "elapsed_time": "1:33:42", "remaining_time": "15:37:04", "throughput": "0.00", "total_tokens": 0}
|
141 |
+
{"current_steps": 282, "total_steps": 3080, "loss": 0.3198, "learning_rate": 9.952855895999567e-06, "epoch": 0.1830277462274866, "percentage": 9.16, "elapsed_time": "1:34:19", "remaining_time": "15:35:56", "throughput": "0.00", "total_tokens": 0}
|
142 |
+
{"current_steps": 284, "total_steps": 3080, "loss": 0.3227, "learning_rate": 9.951373544206352e-06, "epoch": 0.18432581534966738, "percentage": 9.22, "elapsed_time": "1:35:01", "remaining_time": "15:35:28", "throughput": "0.00", "total_tokens": 0}
|
143 |
+
{"current_steps": 286, "total_steps": 3080, "loss": 0.3325, "learning_rate": 9.949868360798893e-06, "epoch": 0.18562388447184813, "percentage": 9.29, "elapsed_time": "1:35:42", "remaining_time": "15:34:56", "throughput": "0.00", "total_tokens": 0}
|
144 |
+
{"current_steps": 288, "total_steps": 3080, "loss": 0.3007, "learning_rate": 9.948340352717845e-06, "epoch": 0.18692195359402888, "percentage": 9.35, "elapsed_time": "1:36:21", "remaining_time": "15:34:08", "throughput": "0.00", "total_tokens": 0}
|
145 |
+
{"current_steps": 290, "total_steps": 3080, "loss": 0.3168, "learning_rate": 9.946789527009108e-06, "epoch": 0.18822002271620963, "percentage": 9.42, "elapsed_time": "1:37:02", "remaining_time": "15:33:35", "throughput": "0.00", "total_tokens": 0}
|
146 |
+
{"current_steps": 292, "total_steps": 3080, "loss": 0.3234, "learning_rate": 9.9452158908238e-06, "epoch": 0.18951809183839038, "percentage": 9.48, "elapsed_time": "1:37:43", "remaining_time": "15:32:59", "throughput": "0.00", "total_tokens": 0}
|
147 |
+
{"current_steps": 294, "total_steps": 3080, "loss": 0.3549, "learning_rate": 9.943619451418225e-06, "epoch": 0.19081616096057116, "percentage": 9.55, "elapsed_time": "1:38:20", "remaining_time": "15:31:57", "throughput": "0.00", "total_tokens": 0}
|
148 |
+
{"current_steps": 296, "total_steps": 3080, "loss": 0.3219, "learning_rate": 9.942000216153829e-06, "epoch": 0.1921142300827519, "percentage": 9.61, "elapsed_time": "1:39:01", "remaining_time": "15:31:23", "throughput": "0.00", "total_tokens": 0}
|
149 |
+
{"current_steps": 298, "total_steps": 3080, "loss": 0.3177, "learning_rate": 9.940358192497178e-06, "epoch": 0.19341229920493266, "percentage": 9.68, "elapsed_time": "1:39:41", "remaining_time": "15:30:37", "throughput": "0.00", "total_tokens": 0}
|
150 |
+
{"current_steps": 300, "total_steps": 3080, "loss": 0.3151, "learning_rate": 9.93869338801992e-06, "epoch": 0.19471036832711341, "percentage": 9.74, "elapsed_time": "1:40:24", "remaining_time": "15:30:31", "throughput": "0.00", "total_tokens": 0}
|
151 |
+
{"current_steps": 302, "total_steps": 3080, "loss": 0.3183, "learning_rate": 9.937005810398747e-06, "epoch": 0.19600843744929417, "percentage": 9.81, "elapsed_time": "1:41:01", "remaining_time": "15:29:21", "throughput": "0.00", "total_tokens": 0}
|
152 |
+
{"current_steps": 304, "total_steps": 3080, "loss": 0.3382, "learning_rate": 9.935295467415363e-06, "epoch": 0.19730650657147494, "percentage": 9.87, "elapsed_time": "1:41:38", "remaining_time": "15:28:07", "throughput": "0.00", "total_tokens": 0}
|
153 |
+
{"current_steps": 306, "total_steps": 3080, "loss": 0.327, "learning_rate": 9.933562366956445e-06, "epoch": 0.1986045756936557, "percentage": 9.94, "elapsed_time": "1:42:20", "remaining_time": "15:27:48", "throughput": "0.00", "total_tokens": 0}
|
154 |
+
{"current_steps": 308, "total_steps": 3080, "loss": 0.3025, "learning_rate": 9.931806517013612e-06, "epoch": 0.19990264481583644, "percentage": 10.0, "elapsed_time": "1:42:59", "remaining_time": "15:26:58", "throughput": "0.00", "total_tokens": 0}
|
155 |
+
{"current_steps": 310, "total_steps": 3080, "loss": 0.3499, "learning_rate": 9.930027925683384e-06, "epoch": 0.2012007139380172, "percentage": 10.06, "elapsed_time": "1:43:40", "remaining_time": "15:26:25", "throughput": "0.00", "total_tokens": 0}
|
156 |
+
{"current_steps": 312, "total_steps": 3080, "loss": 0.3319, "learning_rate": 9.928226601167139e-06, "epoch": 0.20249878306019795, "percentage": 10.13, "elapsed_time": "1:44:19", "remaining_time": "15:25:36", "throughput": "0.00", "total_tokens": 0}
|
157 |
+
{"current_steps": 314, "total_steps": 3080, "loss": 0.3031, "learning_rate": 9.92640255177109e-06, "epoch": 0.20379685218237872, "percentage": 10.19, "elapsed_time": "1:44:59", "remaining_time": "15:24:50", "throughput": "0.00", "total_tokens": 0}
|
158 |
+
{"current_steps": 316, "total_steps": 3080, "loss": 0.3245, "learning_rate": 9.924555785906235e-06, "epoch": 0.20509492130455947, "percentage": 10.26, "elapsed_time": "1:45:36", "remaining_time": "15:23:42", "throughput": "0.00", "total_tokens": 0}
|
159 |
+
{"current_steps": 318, "total_steps": 3080, "loss": 0.3003, "learning_rate": 9.922686312088323e-06, "epoch": 0.20639299042674022, "percentage": 10.32, "elapsed_time": "1:46:16", "remaining_time": "15:23:05", "throughput": "0.00", "total_tokens": 0}
|
160 |
+
{"current_steps": 320, "total_steps": 3080, "loss": 0.3258, "learning_rate": 9.920794138937807e-06, "epoch": 0.20769105954892098, "percentage": 10.39, "elapsed_time": "1:46:52", "remaining_time": "15:21:49", "throughput": "0.00", "total_tokens": 0}
|
161 |
+
{"current_steps": 322, "total_steps": 3080, "loss": 0.3447, "learning_rate": 9.918879275179819e-06, "epoch": 0.20898912867110173, "percentage": 10.45, "elapsed_time": "1:47:33", "remaining_time": "15:21:19", "throughput": "0.00", "total_tokens": 0}
|
162 |
+
{"current_steps": 324, "total_steps": 3080, "loss": 0.3075, "learning_rate": 9.916941729644112e-06, "epoch": 0.2102871977932825, "percentage": 10.52, "elapsed_time": "1:48:12", "remaining_time": "15:20:24", "throughput": "0.00", "total_tokens": 0}
|
163 |
+
{"current_steps": 326, "total_steps": 3080, "loss": 0.3392, "learning_rate": 9.914981511265039e-06, "epoch": 0.21158526691546325, "percentage": 10.58, "elapsed_time": "1:48:50", "remaining_time": "15:19:30", "throughput": "0.00", "total_tokens": 0}
|
164 |
+
{"current_steps": 328, "total_steps": 3080, "loss": 0.3018, "learning_rate": 9.912998629081495e-06, "epoch": 0.212883336037644, "percentage": 10.65, "elapsed_time": "1:49:27", "remaining_time": "15:18:25", "throughput": "0.00", "total_tokens": 0}
|
165 |
+
{"current_steps": 330, "total_steps": 3080, "loss": 0.3241, "learning_rate": 9.910993092236878e-06, "epoch": 0.21418140515982476, "percentage": 10.71, "elapsed_time": "1:50:05", "remaining_time": "15:17:23", "throughput": "0.00", "total_tokens": 0}
|
166 |
+
{"current_steps": 332, "total_steps": 3080, "loss": 0.2979, "learning_rate": 9.90896490997906e-06, "epoch": 0.2154794742820055, "percentage": 10.78, "elapsed_time": "1:50:46", "remaining_time": "15:16:54", "throughput": "0.00", "total_tokens": 0}
|
167 |
+
{"current_steps": 334, "total_steps": 3080, "loss": 0.3036, "learning_rate": 9.906914091660327e-06, "epoch": 0.21677754340418628, "percentage": 10.84, "elapsed_time": "1:51:27", "remaining_time": "15:16:24", "throughput": "0.00", "total_tokens": 0}
|
168 |
+
{"current_steps": 336, "total_steps": 3080, "loss": 0.3457, "learning_rate": 9.904840646737346e-06, "epoch": 0.21807561252636704, "percentage": 10.91, "elapsed_time": "1:52:09", "remaining_time": "15:16:00", "throughput": "0.00", "total_tokens": 0}
|
169 |
+
{"current_steps": 338, "total_steps": 3080, "loss": 0.3235, "learning_rate": 9.902744584771123e-06, "epoch": 0.21937368164854779, "percentage": 10.97, "elapsed_time": "1:52:49", "remaining_time": "15:15:14", "throughput": "0.00", "total_tokens": 0}
|
170 |
+
{"current_steps": 340, "total_steps": 3080, "loss": 0.2932, "learning_rate": 9.900625915426948e-06, "epoch": 0.22067175077072854, "percentage": 11.04, "elapsed_time": "1:53:28", "remaining_time": "15:14:31", "throughput": "0.00", "total_tokens": 0}
|
171 |
+
{"current_steps": 342, "total_steps": 3080, "loss": 0.3243, "learning_rate": 9.898484648474362e-06, "epoch": 0.2219698198929093, "percentage": 11.1, "elapsed_time": "1:54:10", "remaining_time": "15:14:04", "throughput": "0.00", "total_tokens": 0}
|
172 |
+
{"current_steps": 344, "total_steps": 3080, "loss": 0.3184, "learning_rate": 9.896320793787106e-06, "epoch": 0.22326788901509007, "percentage": 11.17, "elapsed_time": "1:54:52", "remaining_time": "15:13:39", "throughput": "0.00", "total_tokens": 0}
|
173 |
+
{"current_steps": 346, "total_steps": 3080, "loss": 0.3394, "learning_rate": 9.894134361343077e-06, "epoch": 0.22456595813727082, "percentage": 11.23, "elapsed_time": "1:55:37", "remaining_time": "15:13:39", "throughput": "0.00", "total_tokens": 0}
|
174 |
+
{"current_steps": 348, "total_steps": 3080, "loss": 0.327, "learning_rate": 9.891925361224284e-06, "epoch": 0.22586402725945157, "percentage": 11.3, "elapsed_time": "1:56:18", "remaining_time": "15:13:02", "throughput": "0.00", "total_tokens": 0}
|
175 |
+
{"current_steps": 350, "total_steps": 3080, "loss": 0.3652, "learning_rate": 9.889693803616793e-06, "epoch": 0.22716209638163232, "percentage": 11.36, "elapsed_time": "1:56:56", "remaining_time": "15:12:08", "throughput": "0.00", "total_tokens": 0}
|
176 |
+
{"current_steps": 352, "total_steps": 3080, "loss": 0.314, "learning_rate": 9.887439698810694e-06, "epoch": 0.22846016550381307, "percentage": 11.43, "elapsed_time": "1:57:32", "remaining_time": "15:10:58", "throughput": "0.00", "total_tokens": 0}
|
177 |
+
{"current_steps": 354, "total_steps": 3080, "loss": 0.3081, "learning_rate": 9.88516305720004e-06, "epoch": 0.22975823462599385, "percentage": 11.49, "elapsed_time": "1:58:14", "remaining_time": "15:10:31", "throughput": "0.00", "total_tokens": 0}
|
178 |
+
{"current_steps": 356, "total_steps": 3080, "loss": 0.3154, "learning_rate": 9.88286388928281e-06, "epoch": 0.2310563037481746, "percentage": 11.56, "elapsed_time": "1:58:51", "remaining_time": "15:09:29", "throughput": "0.00", "total_tokens": 0}
|
179 |
+
{"current_steps": 358, "total_steps": 3080, "loss": 0.3115, "learning_rate": 9.880542205660853e-06, "epoch": 0.23235437287035535, "percentage": 11.62, "elapsed_time": "1:59:27", "remaining_time": "15:08:17", "throughput": "0.00", "total_tokens": 0}
|
180 |
+
{"current_steps": 360, "total_steps": 3080, "loss": 0.2975, "learning_rate": 9.878198017039839e-06, "epoch": 0.2336524419925361, "percentage": 11.69, "elapsed_time": "2:00:06", "remaining_time": "15:07:30", "throughput": "0.00", "total_tokens": 0}
|
181 |
+
{"current_steps": 362, "total_steps": 3080, "loss": 0.2996, "learning_rate": 9.875831334229217e-06, "epoch": 0.23495051111471685, "percentage": 11.75, "elapsed_time": "2:00:45", "remaining_time": "15:06:41", "throughput": "0.00", "total_tokens": 0}
|
182 |
+
{"current_steps": 364, "total_steps": 3080, "loss": 0.2892, "learning_rate": 9.873442168142158e-06, "epoch": 0.23624858023689763, "percentage": 11.82, "elapsed_time": "2:01:25", "remaining_time": "15:06:04", "throughput": "0.00", "total_tokens": 0}
|
183 |
+
{"current_steps": 366, "total_steps": 3080, "loss": 0.3073, "learning_rate": 9.87103052979551e-06, "epoch": 0.23754664935907838, "percentage": 11.88, "elapsed_time": "2:02:06", "remaining_time": "15:05:28", "throughput": "0.00", "total_tokens": 0}
|
184 |
+
{"current_steps": 368, "total_steps": 3080, "loss": 0.3216, "learning_rate": 9.868596430309739e-06, "epoch": 0.23884471848125913, "percentage": 11.95, "elapsed_time": "2:02:45", "remaining_time": "15:04:37", "throughput": "0.00", "total_tokens": 0}
|
185 |
+
{"current_steps": 370, "total_steps": 3080, "loss": 0.3071, "learning_rate": 9.866139880908887e-06, "epoch": 0.24014278760343988, "percentage": 12.01, "elapsed_time": "2:03:25", "remaining_time": "15:03:58", "throughput": "0.00", "total_tokens": 0}
|
186 |
+
{"current_steps": 372, "total_steps": 3080, "loss": 0.3239, "learning_rate": 9.863660892920514e-06, "epoch": 0.24144085672562063, "percentage": 12.08, "elapsed_time": "2:04:06", "remaining_time": "15:03:26", "throughput": "0.00", "total_tokens": 0}
|
187 |
+
{"current_steps": 374, "total_steps": 3080, "loss": 0.3079, "learning_rate": 9.861159477775653e-06, "epoch": 0.2427389258478014, "percentage": 12.14, "elapsed_time": "2:04:49", "remaining_time": "15:03:09", "throughput": "0.00", "total_tokens": 0}
|
188 |
+
{"current_steps": 376, "total_steps": 3080, "loss": 0.3124, "learning_rate": 9.858635647008747e-06, "epoch": 0.24403699496998216, "percentage": 12.21, "elapsed_time": "2:05:26", "remaining_time": "15:02:06", "throughput": "0.00", "total_tokens": 0}
|
189 |
+
{"current_steps": 378, "total_steps": 3080, "loss": 0.3067, "learning_rate": 9.856089412257605e-06, "epoch": 0.2453350640921629, "percentage": 12.27, "elapsed_time": "2:06:06", "remaining_time": "15:01:25", "throughput": "0.00", "total_tokens": 0}
|
190 |
+
{"current_steps": 380, "total_steps": 3080, "loss": 0.308, "learning_rate": 9.85352078526334e-06, "epoch": 0.24663313321434366, "percentage": 12.34, "elapsed_time": "2:06:48", "remaining_time": "15:01:00", "throughput": "0.00", "total_tokens": 0}
|
191 |
+
{"current_steps": 382, "total_steps": 3080, "loss": 0.3277, "learning_rate": 9.850929777870324e-06, "epoch": 0.2479312023365244, "percentage": 12.4, "elapsed_time": "2:07:27", "remaining_time": "15:00:14", "throughput": "0.00", "total_tokens": 0}
|
192 |
+
{"current_steps": 384, "total_steps": 3080, "loss": 0.2848, "learning_rate": 9.848316402026125e-06, "epoch": 0.2492292714587052, "percentage": 12.47, "elapsed_time": "2:08:09", "remaining_time": "14:59:50", "throughput": "0.00", "total_tokens": 0}
|
193 |
+
{"current_steps": 386, "total_steps": 3080, "loss": 0.2932, "learning_rate": 9.845680669781459e-06, "epoch": 0.25052734058088594, "percentage": 12.53, "elapsed_time": "2:08:49", "remaining_time": "14:59:08", "throughput": "0.00", "total_tokens": 0}
|
194 |
+
{"current_steps": 388, "total_steps": 3080, "loss": 0.3067, "learning_rate": 9.843022593290129e-06, "epoch": 0.2518254097030667, "percentage": 12.6, "elapsed_time": "2:09:26", "remaining_time": "14:58:04", "throughput": "0.00", "total_tokens": 0}
|
195 |
+
{"current_steps": 390, "total_steps": 3080, "loss": 0.3132, "learning_rate": 9.840342184808973e-06, "epoch": 0.25312347882524744, "percentage": 12.66, "elapsed_time": "2:10:06", "remaining_time": "14:57:22", "throughput": "0.00", "total_tokens": 0}
|
196 |
+
{"current_steps": 392, "total_steps": 3080, "loss": 0.2996, "learning_rate": 9.837639456697802e-06, "epoch": 0.2544215479474282, "percentage": 12.73, "elapsed_time": "2:10:46", "remaining_time": "14:56:47", "throughput": "0.00", "total_tokens": 0}
|
197 |
+
{"current_steps": 394, "total_steps": 3080, "loss": 0.3138, "learning_rate": 9.83491442141935e-06, "epoch": 0.25571961706960894, "percentage": 12.79, "elapsed_time": "2:11:28", "remaining_time": "14:56:20", "throughput": "0.00", "total_tokens": 0}
|
198 |
+
{"current_steps": 396, "total_steps": 3080, "loss": 0.3257, "learning_rate": 9.832167091539215e-06, "epoch": 0.2570176861917897, "percentage": 12.86, "elapsed_time": "2:12:14", "remaining_time": "14:56:17", "throughput": "0.00", "total_tokens": 0}
|
199 |
+
{"current_steps": 398, "total_steps": 3080, "loss": 0.3049, "learning_rate": 9.829397479725791e-06, "epoch": 0.2583157553139705, "percentage": 12.92, "elapsed_time": "2:12:54", "remaining_time": "14:55:39", "throughput": "0.00", "total_tokens": 0}
|
200 |
+
{"current_steps": 400, "total_steps": 3080, "loss": 0.3137, "learning_rate": 9.826605598750223e-06, "epoch": 0.2596138244361512, "percentage": 12.99, "elapsed_time": "2:13:34", "remaining_time": "14:54:55", "throughput": "0.00", "total_tokens": 0}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2cd93abf4907e8397ac7a7fecc78c6c64d21b1ec6c2cb3cc420f868f0a1ade7
|
3 |
+
size 5432
|
training_eval_loss.png
ADDED
training_loss.png
ADDED