weijie210 commited on
Commit
9298b39
1 Parent(s): c93d359

Model save

Browse files
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: zephyr-7b-dpo-reference
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # zephyr-7b-dpo-reference
17
+
18
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.0429
21
+ - Rewards/chosen: -0.5987
22
+ - Rewards/rejected: -10.2552
23
+ - Rewards/accuracies: 0.9741
24
+ - Rewards/margins: 9.6565
25
+ - Logps/rejected: -175.1052
26
+ - Logps/chosen: -304.8906
27
+ - Logits/rejected: -1.9643
28
+ - Logits/chosen: -2.1592
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 5e-07
48
+ - train_batch_size: 8
49
+ - eval_batch_size: 8
50
+ - seed: 42
51
+ - distributed_type: multi-GPU
52
+ - num_devices: 4
53
+ - total_train_batch_size: 32
54
+ - total_eval_batch_size: 32
55
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
56
+ - lr_scheduler_type: linear
57
+ - lr_scheduler_warmup_ratio: 0.1
58
+ - num_epochs: 1
59
+
60
+ ### Training results
61
+
62
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
+ | 0.0811 | 0.29 | 500 | 0.0543 | -0.5167 | -8.9128 | 0.9720 | 8.3961 | -161.6813 | -304.0705 | -2.0037 | -2.1857 |
65
+ | 0.0362 | 0.57 | 1000 | 0.0483 | -0.4980 | -9.5824 | 0.9720 | 9.0844 | -168.3771 | -303.8834 | -2.0113 | -2.2030 |
66
+ | 0.0318 | 0.86 | 1500 | 0.0442 | -0.8458 | -10.5987 | 0.9720 | 9.7529 | -178.5403 | -307.3617 | -1.9506 | -2.1461 |
67
+
68
+
69
+ ### Framework versions
70
+
71
+ - Transformers 4.36.1
72
+ - Pytorch 2.0.1+cu117
73
+ - Datasets 2.16.1
74
+ - Tokenizers 0.15.0
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.159156560897827,
4
+ "eval_logits/rejected": -1.9642761945724487,
5
+ "eval_logps/chosen": -304.89056396484375,
6
+ "eval_logps/rejected": -175.10516357421875,
7
+ "eval_loss": 0.04293975234031677,
8
+ "eval_rewards/accuracies": 0.9741379022598267,
9
+ "eval_rewards/chosen": -0.5987315773963928,
10
+ "eval_rewards/margins": 9.656463623046875,
11
+ "eval_rewards/rejected": -10.255194664001465,
12
+ "eval_runtime": 497.484,
13
+ "eval_samples": 1843,
14
+ "eval_samples_per_second": 3.705,
15
+ "eval_steps_per_second": 0.117,
16
+ "train_loss": 0.06308119606848633,
17
+ "train_runtime": 23531.0319,
18
+ "train_samples": 55762,
19
+ "train_samples_per_second": 2.37,
20
+ "train_steps_per_second": 0.074
21
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "alignment-handbook/zephyr-7b-sft-full",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.36.1",
24
+ "use_cache": false,
25
+ "vocab_size": 32000
26
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.159156560897827,
4
+ "eval_logits/rejected": -1.9642761945724487,
5
+ "eval_logps/chosen": -304.89056396484375,
6
+ "eval_logps/rejected": -175.10516357421875,
7
+ "eval_loss": 0.04293975234031677,
8
+ "eval_rewards/accuracies": 0.9741379022598267,
9
+ "eval_rewards/chosen": -0.5987315773963928,
10
+ "eval_rewards/margins": 9.656463623046875,
11
+ "eval_rewards/rejected": -10.255194664001465,
12
+ "eval_runtime": 497.484,
13
+ "eval_samples": 1843,
14
+ "eval_samples_per_second": 3.705,
15
+ "eval_steps_per_second": 0.117
16
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.36.1"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:905838749b852fa9332ddfae123aa13b6b7c0349bd967c2a35ee7dc50e9ea1dc
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9fe8a51c40d49c2089461ed6bcf0ffcc80af50863a6acb9b33406f500ad379c
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ecfac24d65a7bbf384ff46d62368f2d99a8fef93ff5aa780daae30a41d9a75a
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
runs/Feb01_13-51-56_node01/events.out.tfevents.1706766942.node01.3302100.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58ce9b9f34b5cbd0918834ba4b73c33f7b78b8f12733296a6fc6144cdcb6e622
3
+ size 117846
runs/Feb01_13-51-56_node01/events.out.tfevents.1706790971.node01.3302100.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:464dbb90fc1872bd1fdb9d0efd96060d8e99d51e4aab550ad26cd2dbef2d0392
3
+ size 828
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 4096,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.06308119606848633,
4
+ "train_runtime": 23531.0319,
5
+ "train_samples": 55762,
6
+ "train_samples_per_second": 2.37,
7
+ "train_steps_per_second": 0.074
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1743,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 2.857142857142857e-09,
14
+ "logits/chosen": -2.8170976638793945,
15
+ "logits/rejected": -2.583425998687744,
16
+ "logps/chosen": -267.58819580078125,
17
+ "logps/rejected": -55.24143981933594,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 2.857142857142857e-08,
28
+ "logits/chosen": -2.728940486907959,
29
+ "logits/rejected": -2.597886800765991,
30
+ "logps/chosen": -287.7469482421875,
31
+ "logps/rejected": -66.66583251953125,
32
+ "loss": 0.6912,
33
+ "rewards/accuracies": 0.4583333432674408,
34
+ "rewards/chosen": -0.003922327421605587,
35
+ "rewards/margins": -0.0009250044822692871,
36
+ "rewards/rejected": -0.0029973229393363,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.01,
41
+ "learning_rate": 5.714285714285714e-08,
42
+ "logits/chosen": -2.6918883323669434,
43
+ "logits/rejected": -2.6110966205596924,
44
+ "logps/chosen": -280.5570068359375,
45
+ "logps/rejected": -69.74317169189453,
46
+ "loss": 0.6445,
47
+ "rewards/accuracies": 0.862500011920929,
48
+ "rewards/chosen": 0.10821928083896637,
49
+ "rewards/margins": 0.11207082122564316,
50
+ "rewards/rejected": -0.003851559478789568,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.02,
55
+ "learning_rate": 8.571428571428572e-08,
56
+ "logits/chosen": -2.6756694316864014,
57
+ "logits/rejected": -2.5497803688049316,
58
+ "logps/chosen": -334.42138671875,
59
+ "logps/rejected": -74.35704040527344,
60
+ "loss": 0.5256,
61
+ "rewards/accuracies": 0.9750000238418579,
62
+ "rewards/chosen": 0.3521518111228943,
63
+ "rewards/margins": 0.4118792414665222,
64
+ "rewards/rejected": -0.05972742289304733,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.02,
69
+ "learning_rate": 1.1428571428571427e-07,
70
+ "logits/chosen": -2.585674524307251,
71
+ "logits/rejected": -2.4876582622528076,
72
+ "logps/chosen": -259.1927185058594,
73
+ "logps/rejected": -70.0163345336914,
74
+ "loss": 0.3967,
75
+ "rewards/accuracies": 0.925000011920929,
76
+ "rewards/chosen": 0.5247544050216675,
77
+ "rewards/margins": 0.7782431244850159,
78
+ "rewards/rejected": -0.2534886598587036,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.03,
83
+ "learning_rate": 1.4285714285714285e-07,
84
+ "logits/chosen": -2.556175947189331,
85
+ "logits/rejected": -2.4422554969787598,
86
+ "logps/chosen": -293.9497375488281,
87
+ "logps/rejected": -78.49222564697266,
88
+ "loss": 0.3084,
89
+ "rewards/accuracies": 0.987500011920929,
90
+ "rewards/chosen": 0.4779079556465149,
91
+ "rewards/margins": 1.3577072620391846,
92
+ "rewards/rejected": -0.8797992467880249,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.03,
97
+ "learning_rate": 1.7142857142857143e-07,
98
+ "logits/chosen": -2.5218682289123535,
99
+ "logits/rejected": -2.413239002227783,
100
+ "logps/chosen": -348.8528747558594,
101
+ "logps/rejected": -90.35295867919922,
102
+ "loss": 0.2281,
103
+ "rewards/accuracies": 0.9375,
104
+ "rewards/chosen": 0.8038774728775024,
105
+ "rewards/margins": 2.1713979244232178,
106
+ "rewards/rejected": -1.3675200939178467,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.04,
111
+ "learning_rate": 2e-07,
112
+ "logits/chosen": -2.5286450386047363,
113
+ "logits/rejected": -2.4316279888153076,
114
+ "logps/chosen": -277.53424072265625,
115
+ "logps/rejected": -77.01128387451172,
116
+ "loss": 0.2105,
117
+ "rewards/accuracies": 0.8999999761581421,
118
+ "rewards/chosen": 0.6769354343414307,
119
+ "rewards/margins": 2.166170358657837,
120
+ "rewards/rejected": -1.4892350435256958,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.05,
125
+ "learning_rate": 2.2857142857142855e-07,
126
+ "logits/chosen": -2.498818874359131,
127
+ "logits/rejected": -2.377124071121216,
128
+ "logps/chosen": -307.00531005859375,
129
+ "logps/rejected": -99.22511291503906,
130
+ "loss": 0.1428,
131
+ "rewards/accuracies": 0.949999988079071,
132
+ "rewards/chosen": 0.6349704265594482,
133
+ "rewards/margins": 2.8562676906585693,
134
+ "rewards/rejected": -2.2212975025177,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.05,
139
+ "learning_rate": 2.571428571428571e-07,
140
+ "logits/chosen": -2.5420005321502686,
141
+ "logits/rejected": -2.4419803619384766,
142
+ "logps/chosen": -297.6865539550781,
143
+ "logps/rejected": -105.5224609375,
144
+ "loss": 0.1443,
145
+ "rewards/accuracies": 0.9624999761581421,
146
+ "rewards/chosen": 0.5000184774398804,
147
+ "rewards/margins": 3.1510608196258545,
148
+ "rewards/rejected": -2.6510424613952637,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.06,
153
+ "learning_rate": 2.857142857142857e-07,
154
+ "logits/chosen": -2.4766311645507812,
155
+ "logits/rejected": -2.364316701889038,
156
+ "logps/chosen": -269.93719482421875,
157
+ "logps/rejected": -100.0908432006836,
158
+ "loss": 0.1393,
159
+ "rewards/accuracies": 0.9624999761581421,
160
+ "rewards/chosen": 0.4442061483860016,
161
+ "rewards/margins": 3.083071231842041,
162
+ "rewards/rejected": -2.638864517211914,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.06,
167
+ "learning_rate": 3.142857142857143e-07,
168
+ "logits/chosen": -2.388690233230591,
169
+ "logits/rejected": -2.3019070625305176,
170
+ "logps/chosen": -259.3259582519531,
171
+ "logps/rejected": -92.60478210449219,
172
+ "loss": 0.1255,
173
+ "rewards/accuracies": 0.9750000238418579,
174
+ "rewards/chosen": 0.3840440511703491,
175
+ "rewards/margins": 3.357806444168091,
176
+ "rewards/rejected": -2.973762035369873,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.07,
181
+ "learning_rate": 3.4285714285714286e-07,
182
+ "logits/chosen": -2.413675546646118,
183
+ "logits/rejected": -2.366042375564575,
184
+ "logps/chosen": -291.9371643066406,
185
+ "logps/rejected": -122.68412017822266,
186
+ "loss": 0.1103,
187
+ "rewards/accuracies": 0.987500011920929,
188
+ "rewards/chosen": 0.07346852123737335,
189
+ "rewards/margins": 3.9470927715301514,
190
+ "rewards/rejected": -3.873624324798584,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.07,
195
+ "learning_rate": 3.7142857142857145e-07,
196
+ "logits/chosen": -2.2968249320983887,
197
+ "logits/rejected": -2.225172519683838,
198
+ "logps/chosen": -244.48300170898438,
199
+ "logps/rejected": -106.67143249511719,
200
+ "loss": 0.1152,
201
+ "rewards/accuracies": 0.949999988079071,
202
+ "rewards/chosen": -0.45761212706565857,
203
+ "rewards/margins": 3.7605834007263184,
204
+ "rewards/rejected": -4.21819543838501,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.08,
209
+ "learning_rate": 4e-07,
210
+ "logits/chosen": -2.4523983001708984,
211
+ "logits/rejected": -2.301741123199463,
212
+ "logps/chosen": -299.51251220703125,
213
+ "logps/rejected": -111.51058197021484,
214
+ "loss": 0.0923,
215
+ "rewards/accuracies": 0.987500011920929,
216
+ "rewards/chosen": 0.3345158100128174,
217
+ "rewards/margins": 4.5269455909729,
218
+ "rewards/rejected": -4.192429542541504,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.09,
223
+ "learning_rate": 4.285714285714285e-07,
224
+ "logits/chosen": -2.2728230953216553,
225
+ "logits/rejected": -2.1703383922576904,
226
+ "logps/chosen": -217.3053741455078,
227
+ "logps/rejected": -100.21586608886719,
228
+ "loss": 0.0937,
229
+ "rewards/accuracies": 0.9624999761581421,
230
+ "rewards/chosen": 0.14594963192939758,
231
+ "rewards/margins": 3.915192127227783,
232
+ "rewards/rejected": -3.76924204826355,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.09,
237
+ "learning_rate": 4.571428571428571e-07,
238
+ "logits/chosen": -2.311068058013916,
239
+ "logits/rejected": -2.1951611042022705,
240
+ "logps/chosen": -224.5479736328125,
241
+ "logps/rejected": -97.22464752197266,
242
+ "loss": 0.081,
243
+ "rewards/accuracies": 0.9750000238418579,
244
+ "rewards/chosen": 0.23994378745555878,
245
+ "rewards/margins": 4.240386009216309,
246
+ "rewards/rejected": -4.000441551208496,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.1,
251
+ "learning_rate": 4.857142857142857e-07,
252
+ "logits/chosen": -2.4215264320373535,
253
+ "logits/rejected": -2.3117451667785645,
254
+ "logps/chosen": -338.24591064453125,
255
+ "logps/rejected": -135.39236450195312,
256
+ "loss": 0.0497,
257
+ "rewards/accuracies": 0.987500011920929,
258
+ "rewards/chosen": 0.741433322429657,
259
+ "rewards/margins": 6.07324743270874,
260
+ "rewards/rejected": -5.331814765930176,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.1,
265
+ "learning_rate": 4.984056122448979e-07,
266
+ "logits/chosen": -2.376217842102051,
267
+ "logits/rejected": -2.2611336708068848,
268
+ "logps/chosen": -308.8148498535156,
269
+ "logps/rejected": -132.30233764648438,
270
+ "loss": 0.1005,
271
+ "rewards/accuracies": 0.987500011920929,
272
+ "rewards/chosen": 0.16114525496959686,
273
+ "rewards/margins": 5.889215469360352,
274
+ "rewards/rejected": -5.72806978225708,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.11,
279
+ "learning_rate": 4.952168367346938e-07,
280
+ "logits/chosen": -2.3446619510650635,
281
+ "logits/rejected": -2.239445209503174,
282
+ "logps/chosen": -316.2629699707031,
283
+ "logps/rejected": -133.4289093017578,
284
+ "loss": 0.0733,
285
+ "rewards/accuracies": 0.987500011920929,
286
+ "rewards/chosen": -0.2028081864118576,
287
+ "rewards/margins": 5.608849048614502,
288
+ "rewards/rejected": -5.811657905578613,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.11,
293
+ "learning_rate": 4.920280612244897e-07,
294
+ "logits/chosen": -2.263739585876465,
295
+ "logits/rejected": -2.1694753170013428,
296
+ "logps/chosen": -233.5961456298828,
297
+ "logps/rejected": -109.9480209350586,
298
+ "loss": 0.0772,
299
+ "rewards/accuracies": 0.9750000238418579,
300
+ "rewards/chosen": 0.2176302969455719,
301
+ "rewards/margins": 5.1773786544799805,
302
+ "rewards/rejected": -4.959748268127441,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.12,
307
+ "learning_rate": 4.888392857142857e-07,
308
+ "logits/chosen": -2.3166961669921875,
309
+ "logits/rejected": -2.2644801139831543,
310
+ "logps/chosen": -266.83099365234375,
311
+ "logps/rejected": -127.63531494140625,
312
+ "loss": 0.0682,
313
+ "rewards/accuracies": 0.9624999761581421,
314
+ "rewards/chosen": 0.30769938230514526,
315
+ "rewards/margins": 5.930529594421387,
316
+ "rewards/rejected": -5.622830390930176,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.13,
321
+ "learning_rate": 4.856505102040816e-07,
322
+ "logits/chosen": -2.289324998855591,
323
+ "logits/rejected": -2.145235538482666,
324
+ "logps/chosen": -287.94903564453125,
325
+ "logps/rejected": -135.3939666748047,
326
+ "loss": 0.0824,
327
+ "rewards/accuracies": 0.9624999761581421,
328
+ "rewards/chosen": 0.18774142861366272,
329
+ "rewards/margins": 6.2231669425964355,
330
+ "rewards/rejected": -6.035426139831543,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.13,
335
+ "learning_rate": 4.824617346938776e-07,
336
+ "logits/chosen": -2.216264009475708,
337
+ "logits/rejected": -2.047607183456421,
338
+ "logps/chosen": -271.09869384765625,
339
+ "logps/rejected": -129.23095703125,
340
+ "loss": 0.1054,
341
+ "rewards/accuracies": 0.987500011920929,
342
+ "rewards/chosen": 0.05019025877118111,
343
+ "rewards/margins": 6.176311016082764,
344
+ "rewards/rejected": -6.126120567321777,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.14,
349
+ "learning_rate": 4.792729591836734e-07,
350
+ "logits/chosen": -2.2174153327941895,
351
+ "logits/rejected": -2.094043254852295,
352
+ "logps/chosen": -295.2961120605469,
353
+ "logps/rejected": -132.24790954589844,
354
+ "loss": 0.0579,
355
+ "rewards/accuracies": 0.9624999761581421,
356
+ "rewards/chosen": 0.24180030822753906,
357
+ "rewards/margins": 6.183707237243652,
358
+ "rewards/rejected": -5.941906929016113,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 0.14,
363
+ "learning_rate": 4.760841836734694e-07,
364
+ "logits/chosen": -2.3349456787109375,
365
+ "logits/rejected": -2.2391867637634277,
366
+ "logps/chosen": -301.3727722167969,
367
+ "logps/rejected": -140.08639526367188,
368
+ "loss": 0.0578,
369
+ "rewards/accuracies": 1.0,
370
+ "rewards/chosen": -0.2325439751148224,
371
+ "rewards/margins": 6.470588684082031,
372
+ "rewards/rejected": -6.703132629394531,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 0.15,
377
+ "learning_rate": 4.728954081632653e-07,
378
+ "logits/chosen": -2.2977206707000732,
379
+ "logits/rejected": -2.1363391876220703,
380
+ "logps/chosen": -305.45477294921875,
381
+ "logps/rejected": -140.94717407226562,
382
+ "loss": 0.0599,
383
+ "rewards/accuracies": 0.9624999761581421,
384
+ "rewards/chosen": -0.0075416178442537785,
385
+ "rewards/margins": 6.882362365722656,
386
+ "rewards/rejected": -6.8899030685424805,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 0.15,
391
+ "learning_rate": 4.697066326530612e-07,
392
+ "logits/chosen": -2.2209079265594482,
393
+ "logits/rejected": -2.1426846981048584,
394
+ "logps/chosen": -269.04010009765625,
395
+ "logps/rejected": -142.6284942626953,
396
+ "loss": 0.056,
397
+ "rewards/accuracies": 0.9750000238418579,
398
+ "rewards/chosen": -0.23809853196144104,
399
+ "rewards/margins": 6.732804298400879,
400
+ "rewards/rejected": -6.970902919769287,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 0.16,
405
+ "learning_rate": 4.665178571428571e-07,
406
+ "logits/chosen": -2.3430752754211426,
407
+ "logits/rejected": -2.2075252532958984,
408
+ "logps/chosen": -314.6163024902344,
409
+ "logps/rejected": -146.22244262695312,
410
+ "loss": 0.0529,
411
+ "rewards/accuracies": 1.0,
412
+ "rewards/chosen": -0.08103661239147186,
413
+ "rewards/margins": 7.245053768157959,
414
+ "rewards/rejected": -7.3260908126831055,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 0.17,
419
+ "learning_rate": 4.6332908163265305e-07,
420
+ "logits/chosen": -2.2507033348083496,
421
+ "logits/rejected": -2.094573736190796,
422
+ "logps/chosen": -262.8319396972656,
423
+ "logps/rejected": -141.3706817626953,
424
+ "loss": 0.0589,
425
+ "rewards/accuracies": 0.987500011920929,
426
+ "rewards/chosen": -0.06328751146793365,
427
+ "rewards/margins": 6.6208062171936035,
428
+ "rewards/rejected": -6.684094429016113,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 0.17,
433
+ "learning_rate": 4.60140306122449e-07,
434
+ "logits/chosen": -2.2667717933654785,
435
+ "logits/rejected": -2.16914701461792,
436
+ "logps/chosen": -282.3851623535156,
437
+ "logps/rejected": -144.63890075683594,
438
+ "loss": 0.0585,
439
+ "rewards/accuracies": 0.949999988079071,
440
+ "rewards/chosen": -0.35556235909461975,
441
+ "rewards/margins": 6.836700439453125,
442
+ "rewards/rejected": -7.192262172698975,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 0.18,
447
+ "learning_rate": 4.569515306122449e-07,
448
+ "logits/chosen": -2.2799079418182373,
449
+ "logits/rejected": -2.1546857357025146,
450
+ "logps/chosen": -307.00457763671875,
451
+ "logps/rejected": -144.7294921875,
452
+ "loss": 0.0401,
453
+ "rewards/accuracies": 0.987500011920929,
454
+ "rewards/chosen": 0.4573780596256256,
455
+ "rewards/margins": 7.150294303894043,
456
+ "rewards/rejected": -6.6929168701171875,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 0.18,
461
+ "learning_rate": 4.5376275510204084e-07,
462
+ "logits/chosen": -2.2312560081481934,
463
+ "logits/rejected": -2.0914812088012695,
464
+ "logps/chosen": -261.8872985839844,
465
+ "logps/rejected": -139.28701782226562,
466
+ "loss": 0.0619,
467
+ "rewards/accuracies": 0.9750000238418579,
468
+ "rewards/chosen": -0.15933240950107574,
469
+ "rewards/margins": 6.717618465423584,
470
+ "rewards/rejected": -6.876950740814209,
471
+ "step": 320
472
+ },
473
+ {
474
+ "epoch": 0.19,
475
+ "learning_rate": 4.505739795918367e-07,
476
+ "logits/chosen": -2.2225937843322754,
477
+ "logits/rejected": -2.0411500930786133,
478
+ "logps/chosen": -304.5137634277344,
479
+ "logps/rejected": -132.83473205566406,
480
+ "loss": 0.065,
481
+ "rewards/accuracies": 0.9624999761581421,
482
+ "rewards/chosen": 0.07894410938024521,
483
+ "rewards/margins": 6.419223785400391,
484
+ "rewards/rejected": -6.340278625488281,
485
+ "step": 330
486
+ },
487
+ {
488
+ "epoch": 0.2,
489
+ "learning_rate": 4.473852040816326e-07,
490
+ "logits/chosen": -2.304166793823242,
491
+ "logits/rejected": -2.144609212875366,
492
+ "logps/chosen": -274.3529968261719,
493
+ "logps/rejected": -131.06246948242188,
494
+ "loss": 0.0907,
495
+ "rewards/accuracies": 0.987500011920929,
496
+ "rewards/chosen": -0.192825049161911,
497
+ "rewards/margins": 6.38867712020874,
498
+ "rewards/rejected": -6.5815019607543945,
499
+ "step": 340
500
+ },
501
+ {
502
+ "epoch": 0.2,
503
+ "learning_rate": 4.441964285714285e-07,
504
+ "logits/chosen": -2.305800676345825,
505
+ "logits/rejected": -2.111426830291748,
506
+ "logps/chosen": -340.38702392578125,
507
+ "logps/rejected": -148.1328887939453,
508
+ "loss": 0.0496,
509
+ "rewards/accuracies": 0.9750000238418579,
510
+ "rewards/chosen": -0.030660927295684814,
511
+ "rewards/margins": 7.517220973968506,
512
+ "rewards/rejected": -7.547882080078125,
513
+ "step": 350
514
+ },
515
+ {
516
+ "epoch": 0.21,
517
+ "learning_rate": 4.4100765306122445e-07,
518
+ "logits/chosen": -2.249474287033081,
519
+ "logits/rejected": -2.114851474761963,
520
+ "logps/chosen": -276.80303955078125,
521
+ "logps/rejected": -139.46585083007812,
522
+ "loss": 0.06,
523
+ "rewards/accuracies": 0.9624999761581421,
524
+ "rewards/chosen": -0.2887513041496277,
525
+ "rewards/margins": 7.209240913391113,
526
+ "rewards/rejected": -7.497992038726807,
527
+ "step": 360
528
+ },
529
+ {
530
+ "epoch": 0.21,
531
+ "learning_rate": 4.378188775510204e-07,
532
+ "logits/chosen": -2.227515935897827,
533
+ "logits/rejected": -2.0700523853302,
534
+ "logps/chosen": -279.75494384765625,
535
+ "logps/rejected": -127.5013198852539,
536
+ "loss": 0.0517,
537
+ "rewards/accuracies": 0.9624999761581421,
538
+ "rewards/chosen": 0.12594351172447205,
539
+ "rewards/margins": 6.841870307922363,
540
+ "rewards/rejected": -6.715925693511963,
541
+ "step": 370
542
+ },
543
+ {
544
+ "epoch": 0.22,
545
+ "learning_rate": 4.346301020408163e-07,
546
+ "logits/chosen": -2.196239948272705,
547
+ "logits/rejected": -2.0718905925750732,
548
+ "logps/chosen": -291.7792663574219,
549
+ "logps/rejected": -154.97567749023438,
550
+ "loss": 0.0363,
551
+ "rewards/accuracies": 0.9750000238418579,
552
+ "rewards/chosen": -0.4449889063835144,
553
+ "rewards/margins": 6.8718414306640625,
554
+ "rewards/rejected": -7.316830635070801,
555
+ "step": 380
556
+ },
557
+ {
558
+ "epoch": 0.22,
559
+ "learning_rate": 4.314413265306122e-07,
560
+ "logits/chosen": -2.2047321796417236,
561
+ "logits/rejected": -2.04673171043396,
562
+ "logps/chosen": -278.8887023925781,
563
+ "logps/rejected": -148.29612731933594,
564
+ "loss": 0.0549,
565
+ "rewards/accuracies": 0.987500011920929,
566
+ "rewards/chosen": 0.8236015439033508,
567
+ "rewards/margins": 8.402433395385742,
568
+ "rewards/rejected": -7.578830718994141,
569
+ "step": 390
570
+ },
571
+ {
572
+ "epoch": 0.23,
573
+ "learning_rate": 4.282525510204081e-07,
574
+ "logits/chosen": -2.2842113971710205,
575
+ "logits/rejected": -2.1249568462371826,
576
+ "logps/chosen": -252.1570587158203,
577
+ "logps/rejected": -131.91351318359375,
578
+ "loss": 0.0658,
579
+ "rewards/accuracies": 0.949999988079071,
580
+ "rewards/chosen": -0.4603995382785797,
581
+ "rewards/margins": 6.588088035583496,
582
+ "rewards/rejected": -7.048487186431885,
583
+ "step": 400
584
+ },
585
+ {
586
+ "epoch": 0.24,
587
+ "learning_rate": 4.2506377551020405e-07,
588
+ "logits/chosen": -2.2204508781433105,
589
+ "logits/rejected": -2.0328991413116455,
590
+ "logps/chosen": -277.8280334472656,
591
+ "logps/rejected": -131.99388122558594,
592
+ "loss": 0.0548,
593
+ "rewards/accuracies": 0.9750000238418579,
594
+ "rewards/chosen": -0.6896305084228516,
595
+ "rewards/margins": 6.525734901428223,
596
+ "rewards/rejected": -7.215365409851074,
597
+ "step": 410
598
+ },
599
+ {
600
+ "epoch": 0.24,
601
+ "learning_rate": 4.21875e-07,
602
+ "logits/chosen": -2.281752586364746,
603
+ "logits/rejected": -2.1253433227539062,
604
+ "logps/chosen": -295.5191345214844,
605
+ "logps/rejected": -153.25,
606
+ "loss": 0.0481,
607
+ "rewards/accuracies": 0.9624999761581421,
608
+ "rewards/chosen": -0.4880848824977875,
609
+ "rewards/margins": 7.4977922439575195,
610
+ "rewards/rejected": -7.98587703704834,
611
+ "step": 420
612
+ },
613
+ {
614
+ "epoch": 0.25,
615
+ "learning_rate": 4.186862244897959e-07,
616
+ "logits/chosen": -2.2700979709625244,
617
+ "logits/rejected": -2.0792222023010254,
618
+ "logps/chosen": -246.54293823242188,
619
+ "logps/rejected": -140.80882263183594,
620
+ "loss": 0.047,
621
+ "rewards/accuracies": 0.9624999761581421,
622
+ "rewards/chosen": -0.25689664483070374,
623
+ "rewards/margins": 7.780982971191406,
624
+ "rewards/rejected": -8.037880897521973,
625
+ "step": 430
626
+ },
627
+ {
628
+ "epoch": 0.25,
629
+ "learning_rate": 4.1549744897959185e-07,
630
+ "logits/chosen": -2.2732179164886475,
631
+ "logits/rejected": -2.10884690284729,
632
+ "logps/chosen": -301.9032897949219,
633
+ "logps/rejected": -160.38320922851562,
634
+ "loss": 0.0668,
635
+ "rewards/accuracies": 0.987500011920929,
636
+ "rewards/chosen": -0.4491173326969147,
637
+ "rewards/margins": 8.146552085876465,
638
+ "rewards/rejected": -8.595669746398926,
639
+ "step": 440
640
+ },
641
+ {
642
+ "epoch": 0.26,
643
+ "learning_rate": 4.123086734693877e-07,
644
+ "logits/chosen": -2.259704113006592,
645
+ "logits/rejected": -2.1263084411621094,
646
+ "logps/chosen": -248.52328491210938,
647
+ "logps/rejected": -142.90264892578125,
648
+ "loss": 0.037,
649
+ "rewards/accuracies": 1.0,
650
+ "rewards/chosen": -0.04825396090745926,
651
+ "rewards/margins": 7.279501914978027,
652
+ "rewards/rejected": -7.327755928039551,
653
+ "step": 450
654
+ },
655
+ {
656
+ "epoch": 0.26,
657
+ "learning_rate": 4.0911989795918365e-07,
658
+ "logits/chosen": -2.1421685218811035,
659
+ "logits/rejected": -2.0395822525024414,
660
+ "logps/chosen": -253.42160034179688,
661
+ "logps/rejected": -134.23333740234375,
662
+ "loss": 0.0699,
663
+ "rewards/accuracies": 0.987500011920929,
664
+ "rewards/chosen": -0.702133059501648,
665
+ "rewards/margins": 6.782039642333984,
666
+ "rewards/rejected": -7.484173774719238,
667
+ "step": 460
668
+ },
669
+ {
670
+ "epoch": 0.27,
671
+ "learning_rate": 4.059311224489796e-07,
672
+ "logits/chosen": -2.2776074409484863,
673
+ "logits/rejected": -2.075753688812256,
674
+ "logps/chosen": -261.2068176269531,
675
+ "logps/rejected": -140.44773864746094,
676
+ "loss": 0.0891,
677
+ "rewards/accuracies": 0.987500011920929,
678
+ "rewards/chosen": -0.07892769575119019,
679
+ "rewards/margins": 7.501675605773926,
680
+ "rewards/rejected": -7.580602169036865,
681
+ "step": 470
682
+ },
683
+ {
684
+ "epoch": 0.28,
685
+ "learning_rate": 4.027423469387755e-07,
686
+ "logits/chosen": -2.144608974456787,
687
+ "logits/rejected": -2.0202698707580566,
688
+ "logps/chosen": -230.21023559570312,
689
+ "logps/rejected": -146.96124267578125,
690
+ "loss": 0.1102,
691
+ "rewards/accuracies": 0.9375,
692
+ "rewards/chosen": -1.1200422048568726,
693
+ "rewards/margins": 7.595203399658203,
694
+ "rewards/rejected": -8.715245246887207,
695
+ "step": 480
696
+ },
697
+ {
698
+ "epoch": 0.28,
699
+ "learning_rate": 3.9955357142857144e-07,
700
+ "logits/chosen": -2.1214406490325928,
701
+ "logits/rejected": -1.9482629299163818,
702
+ "logps/chosen": -271.87554931640625,
703
+ "logps/rejected": -156.08773803710938,
704
+ "loss": 0.0559,
705
+ "rewards/accuracies": 0.9750000238418579,
706
+ "rewards/chosen": -1.0995118618011475,
707
+ "rewards/margins": 7.6725311279296875,
708
+ "rewards/rejected": -8.772043228149414,
709
+ "step": 490
710
+ },
711
+ {
712
+ "epoch": 0.29,
713
+ "learning_rate": 3.963647959183674e-07,
714
+ "logits/chosen": -2.222761392593384,
715
+ "logits/rejected": -2.065107822418213,
716
+ "logps/chosen": -331.9025573730469,
717
+ "logps/rejected": -174.4883575439453,
718
+ "loss": 0.0811,
719
+ "rewards/accuracies": 0.9750000238418579,
720
+ "rewards/chosen": -0.9387734532356262,
721
+ "rewards/margins": 8.301498413085938,
722
+ "rewards/rejected": -9.240272521972656,
723
+ "step": 500
724
+ },
725
+ {
726
+ "epoch": 0.29,
727
+ "eval_logits/chosen": -2.1856682300567627,
728
+ "eval_logits/rejected": -2.003657579421997,
729
+ "eval_logps/chosen": -304.0705261230469,
730
+ "eval_logps/rejected": -161.6812744140625,
731
+ "eval_loss": 0.05427403748035431,
732
+ "eval_rewards/accuracies": 0.9719827771186829,
733
+ "eval_rewards/chosen": -0.516727089881897,
734
+ "eval_rewards/margins": 8.396077156066895,
735
+ "eval_rewards/rejected": -8.91280460357666,
736
+ "eval_runtime": 515.5452,
737
+ "eval_samples_per_second": 3.575,
738
+ "eval_steps_per_second": 0.113,
739
+ "step": 500
740
+ },
741
+ {
742
+ "epoch": 0.29,
743
+ "learning_rate": 3.931760204081632e-07,
744
+ "logits/chosen": -2.1678292751312256,
745
+ "logits/rejected": -2.0178639888763428,
746
+ "logps/chosen": -281.6822509765625,
747
+ "logps/rejected": -152.28237915039062,
748
+ "loss": 0.0387,
749
+ "rewards/accuracies": 1.0,
750
+ "rewards/chosen": 0.07496733963489532,
751
+ "rewards/margins": 8.705911636352539,
752
+ "rewards/rejected": -8.63094425201416,
753
+ "step": 510
754
+ },
755
+ {
756
+ "epoch": 0.3,
757
+ "learning_rate": 3.8998724489795913e-07,
758
+ "logits/chosen": -2.124497652053833,
759
+ "logits/rejected": -1.9633057117462158,
760
+ "logps/chosen": -272.04278564453125,
761
+ "logps/rejected": -154.85665893554688,
762
+ "loss": 0.0747,
763
+ "rewards/accuracies": 0.9624999761581421,
764
+ "rewards/chosen": -0.6167508363723755,
765
+ "rewards/margins": 7.811673164367676,
766
+ "rewards/rejected": -8.428422927856445,
767
+ "step": 520
768
+ },
769
+ {
770
+ "epoch": 0.3,
771
+ "learning_rate": 3.8679846938775506e-07,
772
+ "logits/chosen": -2.0578837394714355,
773
+ "logits/rejected": -1.9298282861709595,
774
+ "logps/chosen": -290.99639892578125,
775
+ "logps/rejected": -168.87124633789062,
776
+ "loss": 0.0378,
777
+ "rewards/accuracies": 0.987500011920929,
778
+ "rewards/chosen": -0.20361948013305664,
779
+ "rewards/margins": 9.225129127502441,
780
+ "rewards/rejected": -9.428749084472656,
781
+ "step": 530
782
+ },
783
+ {
784
+ "epoch": 0.31,
785
+ "learning_rate": 3.83609693877551e-07,
786
+ "logits/chosen": -2.1337828636169434,
787
+ "logits/rejected": -1.9600626230239868,
788
+ "logps/chosen": -297.354248046875,
789
+ "logps/rejected": -156.31643676757812,
790
+ "loss": 0.0215,
791
+ "rewards/accuracies": 1.0,
792
+ "rewards/chosen": 0.2819109261035919,
793
+ "rewards/margins": 8.338506698608398,
794
+ "rewards/rejected": -8.056594848632812,
795
+ "step": 540
796
+ },
797
+ {
798
+ "epoch": 0.32,
799
+ "learning_rate": 3.804209183673469e-07,
800
+ "logits/chosen": -2.1427624225616455,
801
+ "logits/rejected": -1.9869788885116577,
802
+ "logps/chosen": -314.75238037109375,
803
+ "logps/rejected": -156.91665649414062,
804
+ "loss": 0.0719,
805
+ "rewards/accuracies": 0.9750000238418579,
806
+ "rewards/chosen": -0.07266904413700104,
807
+ "rewards/margins": 8.285478591918945,
808
+ "rewards/rejected": -8.358146667480469,
809
+ "step": 550
810
+ },
811
+ {
812
+ "epoch": 0.32,
813
+ "learning_rate": 3.7723214285714285e-07,
814
+ "logits/chosen": -2.193801164627075,
815
+ "logits/rejected": -2.023911476135254,
816
+ "logps/chosen": -280.3459777832031,
817
+ "logps/rejected": -159.136474609375,
818
+ "loss": 0.043,
819
+ "rewards/accuracies": 0.987500011920929,
820
+ "rewards/chosen": -0.6412056684494019,
821
+ "rewards/margins": 8.589024543762207,
822
+ "rewards/rejected": -9.230230331420898,
823
+ "step": 560
824
+ },
825
+ {
826
+ "epoch": 0.33,
827
+ "learning_rate": 3.7404336734693873e-07,
828
+ "logits/chosen": -2.1136326789855957,
829
+ "logits/rejected": -1.970041275024414,
830
+ "logps/chosen": -288.44378662109375,
831
+ "logps/rejected": -163.71575927734375,
832
+ "loss": 0.0628,
833
+ "rewards/accuracies": 0.9624999761581421,
834
+ "rewards/chosen": -0.7930206656455994,
835
+ "rewards/margins": 8.54659652709961,
836
+ "rewards/rejected": -9.339616775512695,
837
+ "step": 570
838
+ },
839
+ {
840
+ "epoch": 0.33,
841
+ "learning_rate": 3.7085459183673466e-07,
842
+ "logits/chosen": -2.181980848312378,
843
+ "logits/rejected": -2.0017356872558594,
844
+ "logps/chosen": -269.84637451171875,
845
+ "logps/rejected": -161.8075714111328,
846
+ "loss": 0.0811,
847
+ "rewards/accuracies": 0.9624999761581421,
848
+ "rewards/chosen": -1.1092716455459595,
849
+ "rewards/margins": 8.634279251098633,
850
+ "rewards/rejected": -9.743551254272461,
851
+ "step": 580
852
+ },
853
+ {
854
+ "epoch": 0.34,
855
+ "learning_rate": 3.676658163265306e-07,
856
+ "logits/chosen": -2.1273303031921387,
857
+ "logits/rejected": -1.9688310623168945,
858
+ "logps/chosen": -321.7066650390625,
859
+ "logps/rejected": -174.16650390625,
860
+ "loss": 0.0425,
861
+ "rewards/accuracies": 1.0,
862
+ "rewards/chosen": -0.19933632016181946,
863
+ "rewards/margins": 9.354225158691406,
864
+ "rewards/rejected": -9.553561210632324,
865
+ "step": 590
866
+ },
867
+ {
868
+ "epoch": 0.34,
869
+ "learning_rate": 3.644770408163265e-07,
870
+ "logits/chosen": -2.167726993560791,
871
+ "logits/rejected": -1.9377820491790771,
872
+ "logps/chosen": -290.7398376464844,
873
+ "logps/rejected": -156.63296508789062,
874
+ "loss": 0.0404,
875
+ "rewards/accuracies": 0.987500011920929,
876
+ "rewards/chosen": -0.34938400983810425,
877
+ "rewards/margins": 8.953282356262207,
878
+ "rewards/rejected": -9.302667617797852,
879
+ "step": 600
880
+ },
881
+ {
882
+ "epoch": 0.35,
883
+ "learning_rate": 3.6128826530612245e-07,
884
+ "logits/chosen": -2.037559747695923,
885
+ "logits/rejected": -1.890639305114746,
886
+ "logps/chosen": -251.7969970703125,
887
+ "logps/rejected": -145.12217712402344,
888
+ "loss": 0.0411,
889
+ "rewards/accuracies": 0.9624999761581421,
890
+ "rewards/chosen": -0.910443902015686,
891
+ "rewards/margins": 7.705912113189697,
892
+ "rewards/rejected": -8.61635684967041,
893
+ "step": 610
894
+ },
895
+ {
896
+ "epoch": 0.36,
897
+ "learning_rate": 3.580994897959184e-07,
898
+ "logits/chosen": -2.1508290767669678,
899
+ "logits/rejected": -1.962224006652832,
900
+ "logps/chosen": -297.48358154296875,
901
+ "logps/rejected": -163.69259643554688,
902
+ "loss": 0.0546,
903
+ "rewards/accuracies": 1.0,
904
+ "rewards/chosen": -0.33932703733444214,
905
+ "rewards/margins": 9.108827590942383,
906
+ "rewards/rejected": -9.448153495788574,
907
+ "step": 620
908
+ },
909
+ {
910
+ "epoch": 0.36,
911
+ "learning_rate": 3.5491071428571426e-07,
912
+ "logits/chosen": -2.179138422012329,
913
+ "logits/rejected": -2.000100612640381,
914
+ "logps/chosen": -311.79119873046875,
915
+ "logps/rejected": -169.6217041015625,
916
+ "loss": 0.044,
917
+ "rewards/accuracies": 0.9750000238418579,
918
+ "rewards/chosen": -0.6057596206665039,
919
+ "rewards/margins": 8.855595588684082,
920
+ "rewards/rejected": -9.461355209350586,
921
+ "step": 630
922
+ },
923
+ {
924
+ "epoch": 0.37,
925
+ "learning_rate": 3.517219387755102e-07,
926
+ "logits/chosen": -2.2315735816955566,
927
+ "logits/rejected": -2.1057209968566895,
928
+ "logps/chosen": -287.6869812011719,
929
+ "logps/rejected": -158.8007354736328,
930
+ "loss": 0.0373,
931
+ "rewards/accuracies": 0.987500011920929,
932
+ "rewards/chosen": -0.49593955278396606,
933
+ "rewards/margins": 7.963850975036621,
934
+ "rewards/rejected": -8.459790229797363,
935
+ "step": 640
936
+ },
937
+ {
938
+ "epoch": 0.37,
939
+ "learning_rate": 3.485331632653061e-07,
940
+ "logits/chosen": -2.150575876235962,
941
+ "logits/rejected": -2.0577378273010254,
942
+ "logps/chosen": -261.49163818359375,
943
+ "logps/rejected": -165.69863891601562,
944
+ "loss": 0.0429,
945
+ "rewards/accuracies": 0.949999988079071,
946
+ "rewards/chosen": -0.6350365281105042,
947
+ "rewards/margins": 8.936600685119629,
948
+ "rewards/rejected": -9.571638107299805,
949
+ "step": 650
950
+ },
951
+ {
952
+ "epoch": 0.38,
953
+ "learning_rate": 3.4534438775510205e-07,
954
+ "logits/chosen": -2.080155849456787,
955
+ "logits/rejected": -1.9665069580078125,
956
+ "logps/chosen": -288.9784240722656,
957
+ "logps/rejected": -173.96405029296875,
958
+ "loss": 0.0592,
959
+ "rewards/accuracies": 0.9624999761581421,
960
+ "rewards/chosen": -0.8485971689224243,
961
+ "rewards/margins": 9.569430351257324,
962
+ "rewards/rejected": -10.418027877807617,
963
+ "step": 660
964
+ },
965
+ {
966
+ "epoch": 0.38,
967
+ "learning_rate": 3.4215561224489793e-07,
968
+ "logits/chosen": -2.1020286083221436,
969
+ "logits/rejected": -2.015242338180542,
970
+ "logps/chosen": -286.51593017578125,
971
+ "logps/rejected": -197.00831604003906,
972
+ "loss": 0.0479,
973
+ "rewards/accuracies": 0.987500011920929,
974
+ "rewards/chosen": -0.8589476346969604,
975
+ "rewards/margins": 11.049829483032227,
976
+ "rewards/rejected": -11.90877628326416,
977
+ "step": 670
978
+ },
979
+ {
980
+ "epoch": 0.39,
981
+ "learning_rate": 3.3896683673469386e-07,
982
+ "logits/chosen": -2.207352876663208,
983
+ "logits/rejected": -2.026625394821167,
984
+ "logps/chosen": -283.9382019042969,
985
+ "logps/rejected": -170.56997680664062,
986
+ "loss": 0.0686,
987
+ "rewards/accuracies": 0.9750000238418579,
988
+ "rewards/chosen": -1.428898811340332,
989
+ "rewards/margins": 8.341761589050293,
990
+ "rewards/rejected": -9.770662307739258,
991
+ "step": 680
992
+ },
993
+ {
994
+ "epoch": 0.4,
995
+ "learning_rate": 3.3577806122448974e-07,
996
+ "logits/chosen": -2.226600408554077,
997
+ "logits/rejected": -1.9801476001739502,
998
+ "logps/chosen": -318.254150390625,
999
+ "logps/rejected": -169.10043334960938,
1000
+ "loss": 0.0859,
1001
+ "rewards/accuracies": 0.9750000238418579,
1002
+ "rewards/chosen": -0.8905173540115356,
1003
+ "rewards/margins": 8.872238159179688,
1004
+ "rewards/rejected": -9.762755393981934,
1005
+ "step": 690
1006
+ },
1007
+ {
1008
+ "epoch": 0.4,
1009
+ "learning_rate": 3.3258928571428567e-07,
1010
+ "logits/chosen": -2.1024184226989746,
1011
+ "logits/rejected": -1.9208921194076538,
1012
+ "logps/chosen": -336.98724365234375,
1013
+ "logps/rejected": -168.90625,
1014
+ "loss": 0.0523,
1015
+ "rewards/accuracies": 0.9750000238418579,
1016
+ "rewards/chosen": -0.09593725204467773,
1017
+ "rewards/margins": 10.029386520385742,
1018
+ "rewards/rejected": -10.125323295593262,
1019
+ "step": 700
1020
+ },
1021
+ {
1022
+ "epoch": 0.41,
1023
+ "learning_rate": 3.294005102040816e-07,
1024
+ "logits/chosen": -2.137101650238037,
1025
+ "logits/rejected": -1.9699983596801758,
1026
+ "logps/chosen": -261.7487487792969,
1027
+ "logps/rejected": -156.60157775878906,
1028
+ "loss": 0.0614,
1029
+ "rewards/accuracies": 0.987500011920929,
1030
+ "rewards/chosen": -0.6025804281234741,
1031
+ "rewards/margins": 8.209660530090332,
1032
+ "rewards/rejected": -8.812241554260254,
1033
+ "step": 710
1034
+ },
1035
+ {
1036
+ "epoch": 0.41,
1037
+ "learning_rate": 3.2621173469387753e-07,
1038
+ "logits/chosen": -2.2646114826202393,
1039
+ "logits/rejected": -2.1247591972351074,
1040
+ "logps/chosen": -329.1217041015625,
1041
+ "logps/rejected": -185.3964385986328,
1042
+ "loss": 0.0578,
1043
+ "rewards/accuracies": 0.987500011920929,
1044
+ "rewards/chosen": -1.2518744468688965,
1045
+ "rewards/margins": 9.136481285095215,
1046
+ "rewards/rejected": -10.388354301452637,
1047
+ "step": 720
1048
+ },
1049
+ {
1050
+ "epoch": 0.42,
1051
+ "learning_rate": 3.2302295918367346e-07,
1052
+ "logits/chosen": -2.3025715351104736,
1053
+ "logits/rejected": -2.1256372928619385,
1054
+ "logps/chosen": -310.5058288574219,
1055
+ "logps/rejected": -170.10592651367188,
1056
+ "loss": 0.029,
1057
+ "rewards/accuracies": 1.0,
1058
+ "rewards/chosen": -1.1804181337356567,
1059
+ "rewards/margins": 8.121767044067383,
1060
+ "rewards/rejected": -9.302184104919434,
1061
+ "step": 730
1062
+ },
1063
+ {
1064
+ "epoch": 0.42,
1065
+ "learning_rate": 3.198341836734694e-07,
1066
+ "logits/chosen": -2.2341511249542236,
1067
+ "logits/rejected": -2.04396915435791,
1068
+ "logps/chosen": -328.27667236328125,
1069
+ "logps/rejected": -192.09335327148438,
1070
+ "loss": 0.0565,
1071
+ "rewards/accuracies": 0.987500011920929,
1072
+ "rewards/chosen": -0.02377413585782051,
1073
+ "rewards/margins": 11.305741310119629,
1074
+ "rewards/rejected": -11.32951545715332,
1075
+ "step": 740
1076
+ },
1077
+ {
1078
+ "epoch": 0.43,
1079
+ "learning_rate": 3.1664540816326527e-07,
1080
+ "logits/chosen": -2.1826584339141846,
1081
+ "logits/rejected": -2.023871660232544,
1082
+ "logps/chosen": -276.5951843261719,
1083
+ "logps/rejected": -160.23538208007812,
1084
+ "loss": 0.0376,
1085
+ "rewards/accuracies": 0.9624999761581421,
1086
+ "rewards/chosen": -0.3585100769996643,
1087
+ "rewards/margins": 9.415512084960938,
1088
+ "rewards/rejected": -9.77402114868164,
1089
+ "step": 750
1090
+ },
1091
+ {
1092
+ "epoch": 0.44,
1093
+ "learning_rate": 3.134566326530612e-07,
1094
+ "logits/chosen": -2.227428436279297,
1095
+ "logits/rejected": -2.070985794067383,
1096
+ "logps/chosen": -294.4335021972656,
1097
+ "logps/rejected": -169.70211791992188,
1098
+ "loss": 0.0343,
1099
+ "rewards/accuracies": 0.9750000238418579,
1100
+ "rewards/chosen": -0.31169813871383667,
1101
+ "rewards/margins": 9.067781448364258,
1102
+ "rewards/rejected": -9.379480361938477,
1103
+ "step": 760
1104
+ },
1105
+ {
1106
+ "epoch": 0.44,
1107
+ "learning_rate": 3.1026785714285713e-07,
1108
+ "logits/chosen": -2.1913247108459473,
1109
+ "logits/rejected": -1.9936134815216064,
1110
+ "logps/chosen": -328.67083740234375,
1111
+ "logps/rejected": -175.294921875,
1112
+ "loss": 0.0432,
1113
+ "rewards/accuracies": 0.987500011920929,
1114
+ "rewards/chosen": -0.3559989333152771,
1115
+ "rewards/margins": 9.043487548828125,
1116
+ "rewards/rejected": -9.399484634399414,
1117
+ "step": 770
1118
+ },
1119
+ {
1120
+ "epoch": 0.45,
1121
+ "learning_rate": 3.0707908163265306e-07,
1122
+ "logits/chosen": -2.1028852462768555,
1123
+ "logits/rejected": -1.8983999490737915,
1124
+ "logps/chosen": -273.2193908691406,
1125
+ "logps/rejected": -158.0215301513672,
1126
+ "loss": 0.0428,
1127
+ "rewards/accuracies": 0.987500011920929,
1128
+ "rewards/chosen": -0.963762104511261,
1129
+ "rewards/margins": 8.092933654785156,
1130
+ "rewards/rejected": -9.056695938110352,
1131
+ "step": 780
1132
+ },
1133
+ {
1134
+ "epoch": 0.45,
1135
+ "learning_rate": 3.03890306122449e-07,
1136
+ "logits/chosen": -2.1897473335266113,
1137
+ "logits/rejected": -1.9860061407089233,
1138
+ "logps/chosen": -317.2612609863281,
1139
+ "logps/rejected": -167.63690185546875,
1140
+ "loss": 0.0308,
1141
+ "rewards/accuracies": 0.987500011920929,
1142
+ "rewards/chosen": -0.3040831685066223,
1143
+ "rewards/margins": 8.782797813415527,
1144
+ "rewards/rejected": -9.086880683898926,
1145
+ "step": 790
1146
+ },
1147
+ {
1148
+ "epoch": 0.46,
1149
+ "learning_rate": 3.007015306122449e-07,
1150
+ "logits/chosen": -2.2554848194122314,
1151
+ "logits/rejected": -2.0466835498809814,
1152
+ "logps/chosen": -359.5973815917969,
1153
+ "logps/rejected": -168.97488403320312,
1154
+ "loss": 0.0485,
1155
+ "rewards/accuracies": 0.987500011920929,
1156
+ "rewards/chosen": 0.31903451681137085,
1157
+ "rewards/margins": 9.262194633483887,
1158
+ "rewards/rejected": -8.943160057067871,
1159
+ "step": 800
1160
+ },
1161
+ {
1162
+ "epoch": 0.46,
1163
+ "learning_rate": 2.975127551020408e-07,
1164
+ "logits/chosen": -2.0375688076019287,
1165
+ "logits/rejected": -1.8554328680038452,
1166
+ "logps/chosen": -321.64581298828125,
1167
+ "logps/rejected": -150.6351776123047,
1168
+ "loss": 0.0425,
1169
+ "rewards/accuracies": 0.9750000238418579,
1170
+ "rewards/chosen": -0.05941831320524216,
1171
+ "rewards/margins": 8.272252082824707,
1172
+ "rewards/rejected": -8.331669807434082,
1173
+ "step": 810
1174
+ },
1175
+ {
1176
+ "epoch": 0.47,
1177
+ "learning_rate": 2.9432397959183673e-07,
1178
+ "logits/chosen": -2.2465949058532715,
1179
+ "logits/rejected": -2.003244638442993,
1180
+ "logps/chosen": -339.7053527832031,
1181
+ "logps/rejected": -173.89718627929688,
1182
+ "loss": 0.0453,
1183
+ "rewards/accuracies": 0.987500011920929,
1184
+ "rewards/chosen": -1.0983786582946777,
1185
+ "rewards/margins": 8.536051750183105,
1186
+ "rewards/rejected": -9.634429931640625,
1187
+ "step": 820
1188
+ },
1189
+ {
1190
+ "epoch": 0.48,
1191
+ "learning_rate": 2.911352040816326e-07,
1192
+ "logits/chosen": -2.157315254211426,
1193
+ "logits/rejected": -2.0112011432647705,
1194
+ "logps/chosen": -285.68023681640625,
1195
+ "logps/rejected": -163.58163452148438,
1196
+ "loss": 0.0364,
1197
+ "rewards/accuracies": 0.9624999761581421,
1198
+ "rewards/chosen": -0.5791069269180298,
1199
+ "rewards/margins": 8.840615272521973,
1200
+ "rewards/rejected": -9.419723510742188,
1201
+ "step": 830
1202
+ },
1203
+ {
1204
+ "epoch": 0.48,
1205
+ "learning_rate": 2.8794642857142854e-07,
1206
+ "logits/chosen": -2.1603357791900635,
1207
+ "logits/rejected": -1.983139991760254,
1208
+ "logps/chosen": -275.4844665527344,
1209
+ "logps/rejected": -165.48094177246094,
1210
+ "loss": 0.036,
1211
+ "rewards/accuracies": 0.987500011920929,
1212
+ "rewards/chosen": -0.849933922290802,
1213
+ "rewards/margins": 8.592988967895508,
1214
+ "rewards/rejected": -9.442923545837402,
1215
+ "step": 840
1216
+ },
1217
+ {
1218
+ "epoch": 0.49,
1219
+ "learning_rate": 2.8475765306122447e-07,
1220
+ "logits/chosen": -2.1025400161743164,
1221
+ "logits/rejected": -1.902698278427124,
1222
+ "logps/chosen": -294.97662353515625,
1223
+ "logps/rejected": -164.88980102539062,
1224
+ "loss": 0.0255,
1225
+ "rewards/accuracies": 0.987500011920929,
1226
+ "rewards/chosen": 0.18203288316726685,
1227
+ "rewards/margins": 9.839866638183594,
1228
+ "rewards/rejected": -9.657832145690918,
1229
+ "step": 850
1230
+ },
1231
+ {
1232
+ "epoch": 0.49,
1233
+ "learning_rate": 2.815688775510204e-07,
1234
+ "logits/chosen": -2.1174845695495605,
1235
+ "logits/rejected": -1.9745981693267822,
1236
+ "logps/chosen": -289.689208984375,
1237
+ "logps/rejected": -225.04946899414062,
1238
+ "loss": 0.0306,
1239
+ "rewards/accuracies": 0.9750000238418579,
1240
+ "rewards/chosen": -0.11574029922485352,
1241
+ "rewards/margins": 14.800374031066895,
1242
+ "rewards/rejected": -14.916116714477539,
1243
+ "step": 860
1244
+ },
1245
+ {
1246
+ "epoch": 0.5,
1247
+ "learning_rate": 2.783801020408163e-07,
1248
+ "logits/chosen": -2.099398136138916,
1249
+ "logits/rejected": -1.9381110668182373,
1250
+ "logps/chosen": -280.7049255371094,
1251
+ "logps/rejected": -162.89515686035156,
1252
+ "loss": 0.0412,
1253
+ "rewards/accuracies": 0.987500011920929,
1254
+ "rewards/chosen": -0.6393367052078247,
1255
+ "rewards/margins": 9.100909233093262,
1256
+ "rewards/rejected": -9.740245819091797,
1257
+ "step": 870
1258
+ },
1259
+ {
1260
+ "epoch": 0.5,
1261
+ "learning_rate": 2.751913265306122e-07,
1262
+ "logits/chosen": -2.130143404006958,
1263
+ "logits/rejected": -1.8786506652832031,
1264
+ "logps/chosen": -254.42440795898438,
1265
+ "logps/rejected": -148.86289978027344,
1266
+ "loss": 0.0416,
1267
+ "rewards/accuracies": 0.987500011920929,
1268
+ "rewards/chosen": -0.5891353487968445,
1269
+ "rewards/margins": 8.205097198486328,
1270
+ "rewards/rejected": -8.794232368469238,
1271
+ "step": 880
1272
+ },
1273
+ {
1274
+ "epoch": 0.51,
1275
+ "learning_rate": 2.7200255102040814e-07,
1276
+ "logits/chosen": -2.0689034461975098,
1277
+ "logits/rejected": -2.004838466644287,
1278
+ "logps/chosen": -253.21847534179688,
1279
+ "logps/rejected": -173.26658630371094,
1280
+ "loss": 0.0437,
1281
+ "rewards/accuracies": 0.987500011920929,
1282
+ "rewards/chosen": -0.9242547154426575,
1283
+ "rewards/margins": 8.997157096862793,
1284
+ "rewards/rejected": -9.921411514282227,
1285
+ "step": 890
1286
+ },
1287
+ {
1288
+ "epoch": 0.52,
1289
+ "learning_rate": 2.6881377551020407e-07,
1290
+ "logits/chosen": -2.24395751953125,
1291
+ "logits/rejected": -2.0530552864074707,
1292
+ "logps/chosen": -276.29327392578125,
1293
+ "logps/rejected": -168.1978759765625,
1294
+ "loss": 0.0338,
1295
+ "rewards/accuracies": 0.987500011920929,
1296
+ "rewards/chosen": -0.6147713661193848,
1297
+ "rewards/margins": 9.07136058807373,
1298
+ "rewards/rejected": -9.686132431030273,
1299
+ "step": 900
1300
+ },
1301
+ {
1302
+ "epoch": 0.52,
1303
+ "learning_rate": 2.65625e-07,
1304
+ "logits/chosen": -2.1872873306274414,
1305
+ "logits/rejected": -1.9537794589996338,
1306
+ "logps/chosen": -313.37164306640625,
1307
+ "logps/rejected": -169.32046508789062,
1308
+ "loss": 0.0408,
1309
+ "rewards/accuracies": 0.9750000238418579,
1310
+ "rewards/chosen": -0.2682819664478302,
1311
+ "rewards/margins": 9.430694580078125,
1312
+ "rewards/rejected": -9.69897747039795,
1313
+ "step": 910
1314
+ },
1315
+ {
1316
+ "epoch": 0.53,
1317
+ "learning_rate": 2.6243622448979593e-07,
1318
+ "logits/chosen": -2.152787208557129,
1319
+ "logits/rejected": -1.9356346130371094,
1320
+ "logps/chosen": -280.98541259765625,
1321
+ "logps/rejected": -152.12704467773438,
1322
+ "loss": 0.0839,
1323
+ "rewards/accuracies": 0.987500011920929,
1324
+ "rewards/chosen": -0.25039607286453247,
1325
+ "rewards/margins": 8.020809173583984,
1326
+ "rewards/rejected": -8.271204948425293,
1327
+ "step": 920
1328
+ },
1329
+ {
1330
+ "epoch": 0.53,
1331
+ "learning_rate": 2.592474489795918e-07,
1332
+ "logits/chosen": -2.0773465633392334,
1333
+ "logits/rejected": -1.9478585720062256,
1334
+ "logps/chosen": -294.1889953613281,
1335
+ "logps/rejected": -162.88656616210938,
1336
+ "loss": 0.0256,
1337
+ "rewards/accuracies": 0.987500011920929,
1338
+ "rewards/chosen": -0.10252566635608673,
1339
+ "rewards/margins": 9.275411605834961,
1340
+ "rewards/rejected": -9.377937316894531,
1341
+ "step": 930
1342
+ },
1343
+ {
1344
+ "epoch": 0.54,
1345
+ "learning_rate": 2.5605867346938774e-07,
1346
+ "logits/chosen": -2.1434168815612793,
1347
+ "logits/rejected": -2.0151402950286865,
1348
+ "logps/chosen": -255.9226531982422,
1349
+ "logps/rejected": -164.32913208007812,
1350
+ "loss": 0.033,
1351
+ "rewards/accuracies": 0.987500011920929,
1352
+ "rewards/chosen": -0.7857500314712524,
1353
+ "rewards/margins": 9.118104934692383,
1354
+ "rewards/rejected": -9.903855323791504,
1355
+ "step": 940
1356
+ },
1357
+ {
1358
+ "epoch": 0.55,
1359
+ "learning_rate": 2.5286989795918367e-07,
1360
+ "logits/chosen": -2.177273750305176,
1361
+ "logits/rejected": -2.035215139389038,
1362
+ "logps/chosen": -275.280517578125,
1363
+ "logps/rejected": -174.3971405029297,
1364
+ "loss": 0.0274,
1365
+ "rewards/accuracies": 0.987500011920929,
1366
+ "rewards/chosen": -0.5861722826957703,
1367
+ "rewards/margins": 9.521924018859863,
1368
+ "rewards/rejected": -10.1080961227417,
1369
+ "step": 950
1370
+ },
1371
+ {
1372
+ "epoch": 0.55,
1373
+ "learning_rate": 2.4968112244897954e-07,
1374
+ "logits/chosen": -2.123955011367798,
1375
+ "logits/rejected": -1.9494431018829346,
1376
+ "logps/chosen": -271.4372863769531,
1377
+ "logps/rejected": -159.25759887695312,
1378
+ "loss": 0.0183,
1379
+ "rewards/accuracies": 0.9750000238418579,
1380
+ "rewards/chosen": -0.16222012042999268,
1381
+ "rewards/margins": 8.81199836730957,
1382
+ "rewards/rejected": -8.974218368530273,
1383
+ "step": 960
1384
+ },
1385
+ {
1386
+ "epoch": 0.56,
1387
+ "learning_rate": 2.464923469387755e-07,
1388
+ "logits/chosen": -2.1805062294006348,
1389
+ "logits/rejected": -1.9929234981536865,
1390
+ "logps/chosen": -312.1669006347656,
1391
+ "logps/rejected": -173.0575714111328,
1392
+ "loss": 0.0542,
1393
+ "rewards/accuracies": 1.0,
1394
+ "rewards/chosen": 0.16012373566627502,
1395
+ "rewards/margins": 10.075935363769531,
1396
+ "rewards/rejected": -9.915812492370605,
1397
+ "step": 970
1398
+ },
1399
+ {
1400
+ "epoch": 0.56,
1401
+ "learning_rate": 2.433035714285714e-07,
1402
+ "logits/chosen": -2.1818442344665527,
1403
+ "logits/rejected": -1.9997507333755493,
1404
+ "logps/chosen": -302.0064392089844,
1405
+ "logps/rejected": -165.9043731689453,
1406
+ "loss": 0.0448,
1407
+ "rewards/accuracies": 0.9750000238418579,
1408
+ "rewards/chosen": -0.39435887336730957,
1409
+ "rewards/margins": 8.824365615844727,
1410
+ "rewards/rejected": -9.21872329711914,
1411
+ "step": 980
1412
+ },
1413
+ {
1414
+ "epoch": 0.57,
1415
+ "learning_rate": 2.4011479591836734e-07,
1416
+ "logits/chosen": -2.17393159866333,
1417
+ "logits/rejected": -2.0282928943634033,
1418
+ "logps/chosen": -297.4441833496094,
1419
+ "logps/rejected": -168.88804626464844,
1420
+ "loss": 0.0296,
1421
+ "rewards/accuracies": 0.987500011920929,
1422
+ "rewards/chosen": -0.8467293977737427,
1423
+ "rewards/margins": 8.895944595336914,
1424
+ "rewards/rejected": -9.742671966552734,
1425
+ "step": 990
1426
+ },
1427
+ {
1428
+ "epoch": 0.57,
1429
+ "learning_rate": 2.3692602040816327e-07,
1430
+ "logits/chosen": -2.210867166519165,
1431
+ "logits/rejected": -1.9793678522109985,
1432
+ "logps/chosen": -287.63763427734375,
1433
+ "logps/rejected": -174.19747924804688,
1434
+ "loss": 0.0362,
1435
+ "rewards/accuracies": 0.9750000238418579,
1436
+ "rewards/chosen": -0.5712664723396301,
1437
+ "rewards/margins": 10.053550720214844,
1438
+ "rewards/rejected": -10.62481689453125,
1439
+ "step": 1000
1440
+ },
1441
+ {
1442
+ "epoch": 0.57,
1443
+ "eval_logits/chosen": -2.203035593032837,
1444
+ "eval_logits/rejected": -2.0113065242767334,
1445
+ "eval_logps/chosen": -303.8834228515625,
1446
+ "eval_logps/rejected": -168.3771209716797,
1447
+ "eval_loss": 0.04832219332456589,
1448
+ "eval_rewards/accuracies": 0.9719827771186829,
1449
+ "eval_rewards/chosen": -0.4980185925960541,
1450
+ "eval_rewards/margins": 9.084371566772461,
1451
+ "eval_rewards/rejected": -9.582389831542969,
1452
+ "eval_runtime": 501.7343,
1453
+ "eval_samples_per_second": 3.673,
1454
+ "eval_steps_per_second": 0.116,
1455
+ "step": 1000
1456
+ },
1457
+ {
1458
+ "epoch": 0.58,
1459
+ "learning_rate": 2.3373724489795917e-07,
1460
+ "logits/chosen": -2.2325656414031982,
1461
+ "logits/rejected": -2.099182605743408,
1462
+ "logps/chosen": -298.896240234375,
1463
+ "logps/rejected": -178.80360412597656,
1464
+ "loss": 0.0381,
1465
+ "rewards/accuracies": 0.9750000238418579,
1466
+ "rewards/chosen": -1.1191880702972412,
1467
+ "rewards/margins": 8.851495742797852,
1468
+ "rewards/rejected": -9.970685005187988,
1469
+ "step": 1010
1470
+ },
1471
+ {
1472
+ "epoch": 0.59,
1473
+ "learning_rate": 2.305484693877551e-07,
1474
+ "logits/chosen": -2.1347708702087402,
1475
+ "logits/rejected": -1.9578421115875244,
1476
+ "logps/chosen": -310.70367431640625,
1477
+ "logps/rejected": -173.8622283935547,
1478
+ "loss": 0.022,
1479
+ "rewards/accuracies": 1.0,
1480
+ "rewards/chosen": -0.24546487629413605,
1481
+ "rewards/margins": 9.734888076782227,
1482
+ "rewards/rejected": -9.980352401733398,
1483
+ "step": 1020
1484
+ },
1485
+ {
1486
+ "epoch": 0.59,
1487
+ "learning_rate": 2.2735969387755103e-07,
1488
+ "logits/chosen": -2.114215135574341,
1489
+ "logits/rejected": -1.9470380544662476,
1490
+ "logps/chosen": -259.62371826171875,
1491
+ "logps/rejected": -150.71444702148438,
1492
+ "loss": 0.0263,
1493
+ "rewards/accuracies": 1.0,
1494
+ "rewards/chosen": -0.3662714660167694,
1495
+ "rewards/margins": 8.222164154052734,
1496
+ "rewards/rejected": -8.588435173034668,
1497
+ "step": 1030
1498
+ },
1499
+ {
1500
+ "epoch": 0.6,
1501
+ "learning_rate": 2.241709183673469e-07,
1502
+ "logits/chosen": -2.0746610164642334,
1503
+ "logits/rejected": -1.9087474346160889,
1504
+ "logps/chosen": -308.8531799316406,
1505
+ "logps/rejected": -173.00241088867188,
1506
+ "loss": 0.0478,
1507
+ "rewards/accuracies": 0.9750000238418579,
1508
+ "rewards/chosen": -0.7547154426574707,
1509
+ "rewards/margins": 9.060162544250488,
1510
+ "rewards/rejected": -9.814878463745117,
1511
+ "step": 1040
1512
+ },
1513
+ {
1514
+ "epoch": 0.6,
1515
+ "learning_rate": 2.2098214285714284e-07,
1516
+ "logits/chosen": -2.1720070838928223,
1517
+ "logits/rejected": -1.9714542627334595,
1518
+ "logps/chosen": -282.46026611328125,
1519
+ "logps/rejected": -165.5819549560547,
1520
+ "loss": 0.0484,
1521
+ "rewards/accuracies": 0.9750000238418579,
1522
+ "rewards/chosen": -0.8778654336929321,
1523
+ "rewards/margins": 8.815220832824707,
1524
+ "rewards/rejected": -9.693086624145508,
1525
+ "step": 1050
1526
+ },
1527
+ {
1528
+ "epoch": 0.61,
1529
+ "learning_rate": 2.1779336734693877e-07,
1530
+ "logits/chosen": -2.160717010498047,
1531
+ "logits/rejected": -2.0338234901428223,
1532
+ "logps/chosen": -293.3428649902344,
1533
+ "logps/rejected": -180.75949096679688,
1534
+ "loss": 0.0232,
1535
+ "rewards/accuracies": 1.0,
1536
+ "rewards/chosen": -0.8398637771606445,
1537
+ "rewards/margins": 9.461033821105957,
1538
+ "rewards/rejected": -10.300897598266602,
1539
+ "step": 1060
1540
+ },
1541
+ {
1542
+ "epoch": 0.61,
1543
+ "learning_rate": 2.1460459183673467e-07,
1544
+ "logits/chosen": -2.0859107971191406,
1545
+ "logits/rejected": -1.9392955303192139,
1546
+ "logps/chosen": -269.2712707519531,
1547
+ "logps/rejected": -160.63858032226562,
1548
+ "loss": 0.048,
1549
+ "rewards/accuracies": 0.9624999761581421,
1550
+ "rewards/chosen": -0.8251531720161438,
1551
+ "rewards/margins": 8.415184020996094,
1552
+ "rewards/rejected": -9.240337371826172,
1553
+ "step": 1070
1554
+ },
1555
+ {
1556
+ "epoch": 0.62,
1557
+ "learning_rate": 2.114158163265306e-07,
1558
+ "logits/chosen": -2.114478349685669,
1559
+ "logits/rejected": -2.0172407627105713,
1560
+ "logps/chosen": -313.1733093261719,
1561
+ "logps/rejected": -183.88912963867188,
1562
+ "loss": 0.02,
1563
+ "rewards/accuracies": 1.0,
1564
+ "rewards/chosen": -0.37718772888183594,
1565
+ "rewards/margins": 10.156351089477539,
1566
+ "rewards/rejected": -10.533537864685059,
1567
+ "step": 1080
1568
+ },
1569
+ {
1570
+ "epoch": 0.63,
1571
+ "learning_rate": 2.0822704081632654e-07,
1572
+ "logits/chosen": -2.151115894317627,
1573
+ "logits/rejected": -1.9639852046966553,
1574
+ "logps/chosen": -304.9864196777344,
1575
+ "logps/rejected": -179.6061248779297,
1576
+ "loss": 0.0374,
1577
+ "rewards/accuracies": 0.987500011920929,
1578
+ "rewards/chosen": -0.8430510759353638,
1579
+ "rewards/margins": 9.594018936157227,
1580
+ "rewards/rejected": -10.4370698928833,
1581
+ "step": 1090
1582
+ },
1583
+ {
1584
+ "epoch": 0.63,
1585
+ "learning_rate": 2.0503826530612244e-07,
1586
+ "logits/chosen": -2.07926082611084,
1587
+ "logits/rejected": -1.8545608520507812,
1588
+ "logps/chosen": -287.17510986328125,
1589
+ "logps/rejected": -175.82423400878906,
1590
+ "loss": 0.025,
1591
+ "rewards/accuracies": 0.987500011920929,
1592
+ "rewards/chosen": -0.6869755387306213,
1593
+ "rewards/margins": 9.874262809753418,
1594
+ "rewards/rejected": -10.561239242553711,
1595
+ "step": 1100
1596
+ },
1597
+ {
1598
+ "epoch": 0.64,
1599
+ "learning_rate": 2.0184948979591837e-07,
1600
+ "logits/chosen": -2.0271568298339844,
1601
+ "logits/rejected": -1.9322795867919922,
1602
+ "logps/chosen": -254.9891815185547,
1603
+ "logps/rejected": -171.43470764160156,
1604
+ "loss": 0.0285,
1605
+ "rewards/accuracies": 0.987500011920929,
1606
+ "rewards/chosen": -0.40473413467407227,
1607
+ "rewards/margins": 10.018217086791992,
1608
+ "rewards/rejected": -10.422952651977539,
1609
+ "step": 1110
1610
+ },
1611
+ {
1612
+ "epoch": 0.64,
1613
+ "learning_rate": 1.9866071428571427e-07,
1614
+ "logits/chosen": -1.9903011322021484,
1615
+ "logits/rejected": -1.9317058324813843,
1616
+ "logps/chosen": -239.4633026123047,
1617
+ "logps/rejected": -167.33950805664062,
1618
+ "loss": 0.052,
1619
+ "rewards/accuracies": 0.925000011920929,
1620
+ "rewards/chosen": -1.950304627418518,
1621
+ "rewards/margins": 8.423638343811035,
1622
+ "rewards/rejected": -10.373943328857422,
1623
+ "step": 1120
1624
+ },
1625
+ {
1626
+ "epoch": 0.65,
1627
+ "learning_rate": 1.9547193877551018e-07,
1628
+ "logits/chosen": -2.1156675815582275,
1629
+ "logits/rejected": -1.9115331172943115,
1630
+ "logps/chosen": -283.9968566894531,
1631
+ "logps/rejected": -171.2687225341797,
1632
+ "loss": 0.0356,
1633
+ "rewards/accuracies": 0.987500011920929,
1634
+ "rewards/chosen": -0.6293619275093079,
1635
+ "rewards/margins": 9.327872276306152,
1636
+ "rewards/rejected": -9.957234382629395,
1637
+ "step": 1130
1638
+ },
1639
+ {
1640
+ "epoch": 0.65,
1641
+ "learning_rate": 1.922831632653061e-07,
1642
+ "logits/chosen": -2.164762020111084,
1643
+ "logits/rejected": -1.9720051288604736,
1644
+ "logps/chosen": -332.24481201171875,
1645
+ "logps/rejected": -172.13970947265625,
1646
+ "loss": 0.0394,
1647
+ "rewards/accuracies": 0.987500011920929,
1648
+ "rewards/chosen": -0.5399401783943176,
1649
+ "rewards/margins": 8.87366008758545,
1650
+ "rewards/rejected": -9.413599967956543,
1651
+ "step": 1140
1652
+ },
1653
+ {
1654
+ "epoch": 0.66,
1655
+ "learning_rate": 1.8909438775510204e-07,
1656
+ "logits/chosen": -2.0545859336853027,
1657
+ "logits/rejected": -1.916473627090454,
1658
+ "logps/chosen": -279.626953125,
1659
+ "logps/rejected": -176.50621032714844,
1660
+ "loss": 0.0147,
1661
+ "rewards/accuracies": 1.0,
1662
+ "rewards/chosen": -1.018849492073059,
1663
+ "rewards/margins": 9.826019287109375,
1664
+ "rewards/rejected": -10.844868659973145,
1665
+ "step": 1150
1666
+ },
1667
+ {
1668
+ "epoch": 0.67,
1669
+ "learning_rate": 1.8590561224489794e-07,
1670
+ "logits/chosen": -2.088841438293457,
1671
+ "logits/rejected": -1.9257131814956665,
1672
+ "logps/chosen": -292.3592834472656,
1673
+ "logps/rejected": -173.84017944335938,
1674
+ "loss": 0.0291,
1675
+ "rewards/accuracies": 0.9750000238418579,
1676
+ "rewards/chosen": -1.085963487625122,
1677
+ "rewards/margins": 9.38125991821289,
1678
+ "rewards/rejected": -10.467223167419434,
1679
+ "step": 1160
1680
+ },
1681
+ {
1682
+ "epoch": 0.67,
1683
+ "learning_rate": 1.8271683673469387e-07,
1684
+ "logits/chosen": -2.1992461681365967,
1685
+ "logits/rejected": -2.0129992961883545,
1686
+ "logps/chosen": -298.34307861328125,
1687
+ "logps/rejected": -182.71925354003906,
1688
+ "loss": 0.039,
1689
+ "rewards/accuracies": 0.9750000238418579,
1690
+ "rewards/chosen": -1.1279481649398804,
1691
+ "rewards/margins": 9.488454818725586,
1692
+ "rewards/rejected": -10.61640453338623,
1693
+ "step": 1170
1694
+ },
1695
+ {
1696
+ "epoch": 0.68,
1697
+ "learning_rate": 1.795280612244898e-07,
1698
+ "logits/chosen": -2.144819974899292,
1699
+ "logits/rejected": -1.9659067392349243,
1700
+ "logps/chosen": -256.8332214355469,
1701
+ "logps/rejected": -152.0546112060547,
1702
+ "loss": 0.0389,
1703
+ "rewards/accuracies": 0.9750000238418579,
1704
+ "rewards/chosen": -0.4352855682373047,
1705
+ "rewards/margins": 7.898017883300781,
1706
+ "rewards/rejected": -8.33330249786377,
1707
+ "step": 1180
1708
+ },
1709
+ {
1710
+ "epoch": 0.68,
1711
+ "learning_rate": 1.763392857142857e-07,
1712
+ "logits/chosen": -2.188812494277954,
1713
+ "logits/rejected": -2.0105535984039307,
1714
+ "logps/chosen": -311.55633544921875,
1715
+ "logps/rejected": -167.11927795410156,
1716
+ "loss": 0.0237,
1717
+ "rewards/accuracies": 0.987500011920929,
1718
+ "rewards/chosen": -0.43777579069137573,
1719
+ "rewards/margins": 8.915369987487793,
1720
+ "rewards/rejected": -9.35314655303955,
1721
+ "step": 1190
1722
+ },
1723
+ {
1724
+ "epoch": 0.69,
1725
+ "learning_rate": 1.731505102040816e-07,
1726
+ "logits/chosen": -2.087813377380371,
1727
+ "logits/rejected": -1.9207570552825928,
1728
+ "logps/chosen": -271.0974426269531,
1729
+ "logps/rejected": -211.95523071289062,
1730
+ "loss": 0.0268,
1731
+ "rewards/accuracies": 0.987500011920929,
1732
+ "rewards/chosen": -0.8373743295669556,
1733
+ "rewards/margins": 13.245565414428711,
1734
+ "rewards/rejected": -14.082939147949219,
1735
+ "step": 1200
1736
+ },
1737
+ {
1738
+ "epoch": 0.69,
1739
+ "learning_rate": 1.6996173469387754e-07,
1740
+ "logits/chosen": -2.1115288734436035,
1741
+ "logits/rejected": -1.9904811382293701,
1742
+ "logps/chosen": -302.61138916015625,
1743
+ "logps/rejected": -177.2365264892578,
1744
+ "loss": 0.0235,
1745
+ "rewards/accuracies": 1.0,
1746
+ "rewards/chosen": -0.5768619775772095,
1747
+ "rewards/margins": 8.965620040893555,
1748
+ "rewards/rejected": -9.54248332977295,
1749
+ "step": 1210
1750
+ },
1751
+ {
1752
+ "epoch": 0.7,
1753
+ "learning_rate": 1.6677295918367345e-07,
1754
+ "logits/chosen": -2.1301310062408447,
1755
+ "logits/rejected": -1.879533052444458,
1756
+ "logps/chosen": -332.2015686035156,
1757
+ "logps/rejected": -165.51576232910156,
1758
+ "loss": 0.0481,
1759
+ "rewards/accuracies": 0.9750000238418579,
1760
+ "rewards/chosen": -0.5949792861938477,
1761
+ "rewards/margins": 9.159207344055176,
1762
+ "rewards/rejected": -9.754185676574707,
1763
+ "step": 1220
1764
+ },
1765
+ {
1766
+ "epoch": 0.71,
1767
+ "learning_rate": 1.6358418367346938e-07,
1768
+ "logits/chosen": -2.1042232513427734,
1769
+ "logits/rejected": -1.8513002395629883,
1770
+ "logps/chosen": -344.7572937011719,
1771
+ "logps/rejected": -172.65744018554688,
1772
+ "loss": 0.0231,
1773
+ "rewards/accuracies": 0.9750000238418579,
1774
+ "rewards/chosen": -0.2580934464931488,
1775
+ "rewards/margins": 9.699871063232422,
1776
+ "rewards/rejected": -9.957964897155762,
1777
+ "step": 1230
1778
+ },
1779
+ {
1780
+ "epoch": 0.71,
1781
+ "learning_rate": 1.603954081632653e-07,
1782
+ "logits/chosen": -2.068237066268921,
1783
+ "logits/rejected": -1.8824779987335205,
1784
+ "logps/chosen": -284.03509521484375,
1785
+ "logps/rejected": -162.17001342773438,
1786
+ "loss": 0.0275,
1787
+ "rewards/accuracies": 0.9750000238418579,
1788
+ "rewards/chosen": -0.403529554605484,
1789
+ "rewards/margins": 9.167604446411133,
1790
+ "rewards/rejected": -9.571134567260742,
1791
+ "step": 1240
1792
+ },
1793
+ {
1794
+ "epoch": 0.72,
1795
+ "learning_rate": 1.572066326530612e-07,
1796
+ "logits/chosen": -2.1395645141601562,
1797
+ "logits/rejected": -1.9513561725616455,
1798
+ "logps/chosen": -342.67254638671875,
1799
+ "logps/rejected": -174.70993041992188,
1800
+ "loss": 0.0345,
1801
+ "rewards/accuracies": 0.9750000238418579,
1802
+ "rewards/chosen": -0.07841818779706955,
1803
+ "rewards/margins": 9.598522186279297,
1804
+ "rewards/rejected": -9.676939964294434,
1805
+ "step": 1250
1806
+ },
1807
+ {
1808
+ "epoch": 0.72,
1809
+ "learning_rate": 1.5401785714285714e-07,
1810
+ "logits/chosen": -2.126796245574951,
1811
+ "logits/rejected": -1.9961979389190674,
1812
+ "logps/chosen": -316.7900695800781,
1813
+ "logps/rejected": -176.53733825683594,
1814
+ "loss": 0.0412,
1815
+ "rewards/accuracies": 0.987500011920929,
1816
+ "rewards/chosen": -0.37025541067123413,
1817
+ "rewards/margins": 9.903864860534668,
1818
+ "rewards/rejected": -10.27411937713623,
1819
+ "step": 1260
1820
+ },
1821
+ {
1822
+ "epoch": 0.73,
1823
+ "learning_rate": 1.5082908163265307e-07,
1824
+ "logits/chosen": -2.082777976989746,
1825
+ "logits/rejected": -1.9228655099868774,
1826
+ "logps/chosen": -297.30133056640625,
1827
+ "logps/rejected": -166.17623901367188,
1828
+ "loss": 0.0346,
1829
+ "rewards/accuracies": 0.9375,
1830
+ "rewards/chosen": -1.2105169296264648,
1831
+ "rewards/margins": 8.340936660766602,
1832
+ "rewards/rejected": -9.551454544067383,
1833
+ "step": 1270
1834
+ },
1835
+ {
1836
+ "epoch": 0.73,
1837
+ "learning_rate": 1.4764030612244895e-07,
1838
+ "logits/chosen": -2.1340670585632324,
1839
+ "logits/rejected": -2.0120952129364014,
1840
+ "logps/chosen": -306.83441162109375,
1841
+ "logps/rejected": -174.4154510498047,
1842
+ "loss": 0.0375,
1843
+ "rewards/accuracies": 0.9750000238418579,
1844
+ "rewards/chosen": -0.8796642422676086,
1845
+ "rewards/margins": 9.489983558654785,
1846
+ "rewards/rejected": -10.369647979736328,
1847
+ "step": 1280
1848
+ },
1849
+ {
1850
+ "epoch": 0.74,
1851
+ "learning_rate": 1.4445153061224488e-07,
1852
+ "logits/chosen": -2.180344581604004,
1853
+ "logits/rejected": -2.054753541946411,
1854
+ "logps/chosen": -319.8197326660156,
1855
+ "logps/rejected": -179.91517639160156,
1856
+ "loss": 0.0368,
1857
+ "rewards/accuracies": 0.9750000238418579,
1858
+ "rewards/chosen": -0.26657119393348694,
1859
+ "rewards/margins": 10.043919563293457,
1860
+ "rewards/rejected": -10.310490608215332,
1861
+ "step": 1290
1862
+ },
1863
+ {
1864
+ "epoch": 0.75,
1865
+ "learning_rate": 1.412627551020408e-07,
1866
+ "logits/chosen": -2.139643907546997,
1867
+ "logits/rejected": -1.9441308975219727,
1868
+ "logps/chosen": -280.61492919921875,
1869
+ "logps/rejected": -168.45120239257812,
1870
+ "loss": 0.0245,
1871
+ "rewards/accuracies": 1.0,
1872
+ "rewards/chosen": -0.6699168086051941,
1873
+ "rewards/margins": 9.325021743774414,
1874
+ "rewards/rejected": -9.994939804077148,
1875
+ "step": 1300
1876
+ },
1877
+ {
1878
+ "epoch": 0.75,
1879
+ "learning_rate": 1.3807397959183672e-07,
1880
+ "logits/chosen": -2.2181859016418457,
1881
+ "logits/rejected": -2.0422186851501465,
1882
+ "logps/chosen": -348.22802734375,
1883
+ "logps/rejected": -184.8292999267578,
1884
+ "loss": 0.0389,
1885
+ "rewards/accuracies": 1.0,
1886
+ "rewards/chosen": -0.8054157495498657,
1887
+ "rewards/margins": 9.742046356201172,
1888
+ "rewards/rejected": -10.547462463378906,
1889
+ "step": 1310
1890
+ },
1891
+ {
1892
+ "epoch": 0.76,
1893
+ "learning_rate": 1.3488520408163265e-07,
1894
+ "logits/chosen": -2.1537463665008545,
1895
+ "logits/rejected": -1.9965778589248657,
1896
+ "logps/chosen": -322.0302734375,
1897
+ "logps/rejected": -177.57180786132812,
1898
+ "loss": 0.039,
1899
+ "rewards/accuracies": 0.9750000238418579,
1900
+ "rewards/chosen": -0.8039339780807495,
1901
+ "rewards/margins": 9.590051651000977,
1902
+ "rewards/rejected": -10.3939847946167,
1903
+ "step": 1320
1904
+ },
1905
+ {
1906
+ "epoch": 0.76,
1907
+ "learning_rate": 1.3169642857142858e-07,
1908
+ "logits/chosen": -2.120084524154663,
1909
+ "logits/rejected": -1.944972276687622,
1910
+ "logps/chosen": -299.9647521972656,
1911
+ "logps/rejected": -174.93545532226562,
1912
+ "loss": 0.0365,
1913
+ "rewards/accuracies": 0.9624999761581421,
1914
+ "rewards/chosen": -1.0538573265075684,
1915
+ "rewards/margins": 9.612497329711914,
1916
+ "rewards/rejected": -10.666356086730957,
1917
+ "step": 1330
1918
+ },
1919
+ {
1920
+ "epoch": 0.77,
1921
+ "learning_rate": 1.2850765306122448e-07,
1922
+ "logits/chosen": -2.0994505882263184,
1923
+ "logits/rejected": -1.9836326837539673,
1924
+ "logps/chosen": -327.00103759765625,
1925
+ "logps/rejected": -176.6904754638672,
1926
+ "loss": 0.0426,
1927
+ "rewards/accuracies": 1.0,
1928
+ "rewards/chosen": -1.0558558702468872,
1929
+ "rewards/margins": 9.380393981933594,
1930
+ "rewards/rejected": -10.436250686645508,
1931
+ "step": 1340
1932
+ },
1933
+ {
1934
+ "epoch": 0.77,
1935
+ "learning_rate": 1.253188775510204e-07,
1936
+ "logits/chosen": -2.063891887664795,
1937
+ "logits/rejected": -1.8763225078582764,
1938
+ "logps/chosen": -245.1425018310547,
1939
+ "logps/rejected": -155.8802032470703,
1940
+ "loss": 0.0527,
1941
+ "rewards/accuracies": 0.9750000238418579,
1942
+ "rewards/chosen": -1.3180797100067139,
1943
+ "rewards/margins": 8.703597068786621,
1944
+ "rewards/rejected": -10.021677017211914,
1945
+ "step": 1350
1946
+ },
1947
+ {
1948
+ "epoch": 0.78,
1949
+ "learning_rate": 1.2213010204081632e-07,
1950
+ "logits/chosen": -2.1077721118927,
1951
+ "logits/rejected": -1.9732635021209717,
1952
+ "logps/chosen": -272.14886474609375,
1953
+ "logps/rejected": -177.23715209960938,
1954
+ "loss": 0.038,
1955
+ "rewards/accuracies": 1.0,
1956
+ "rewards/chosen": -0.5222896337509155,
1957
+ "rewards/margins": 9.948299407958984,
1958
+ "rewards/rejected": -10.470590591430664,
1959
+ "step": 1360
1960
+ },
1961
+ {
1962
+ "epoch": 0.79,
1963
+ "learning_rate": 1.1894132653061225e-07,
1964
+ "logits/chosen": -2.1442596912384033,
1965
+ "logits/rejected": -1.995617151260376,
1966
+ "logps/chosen": -297.60357666015625,
1967
+ "logps/rejected": -176.26931762695312,
1968
+ "loss": 0.0451,
1969
+ "rewards/accuracies": 0.9624999761581421,
1970
+ "rewards/chosen": -1.6867294311523438,
1971
+ "rewards/margins": 8.880840301513672,
1972
+ "rewards/rejected": -10.567569732666016,
1973
+ "step": 1370
1974
+ },
1975
+ {
1976
+ "epoch": 0.79,
1977
+ "learning_rate": 1.1575255102040815e-07,
1978
+ "logits/chosen": -2.1161892414093018,
1979
+ "logits/rejected": -1.9788262844085693,
1980
+ "logps/chosen": -291.28485107421875,
1981
+ "logps/rejected": -171.2949676513672,
1982
+ "loss": 0.0414,
1983
+ "rewards/accuracies": 0.987500011920929,
1984
+ "rewards/chosen": -1.612140417098999,
1985
+ "rewards/margins": 8.652326583862305,
1986
+ "rewards/rejected": -10.264467239379883,
1987
+ "step": 1380
1988
+ },
1989
+ {
1990
+ "epoch": 0.8,
1991
+ "learning_rate": 1.1256377551020407e-07,
1992
+ "logits/chosen": -2.1823105812072754,
1993
+ "logits/rejected": -1.997688889503479,
1994
+ "logps/chosen": -314.90081787109375,
1995
+ "logps/rejected": -177.48538208007812,
1996
+ "loss": 0.0348,
1997
+ "rewards/accuracies": 0.987500011920929,
1998
+ "rewards/chosen": -0.8158550262451172,
1999
+ "rewards/margins": 9.316900253295898,
2000
+ "rewards/rejected": -10.132753372192383,
2001
+ "step": 1390
2002
+ },
2003
+ {
2004
+ "epoch": 0.8,
2005
+ "learning_rate": 1.09375e-07,
2006
+ "logits/chosen": -2.1604926586151123,
2007
+ "logits/rejected": -1.92621648311615,
2008
+ "logps/chosen": -322.7879638671875,
2009
+ "logps/rejected": -174.97999572753906,
2010
+ "loss": 0.0337,
2011
+ "rewards/accuracies": 1.0,
2012
+ "rewards/chosen": -0.6046790480613708,
2013
+ "rewards/margins": 9.716318130493164,
2014
+ "rewards/rejected": -10.32099723815918,
2015
+ "step": 1400
2016
+ },
2017
+ {
2018
+ "epoch": 0.81,
2019
+ "learning_rate": 1.0618622448979592e-07,
2020
+ "logits/chosen": -2.1750383377075195,
2021
+ "logits/rejected": -1.9606304168701172,
2022
+ "logps/chosen": -304.9856872558594,
2023
+ "logps/rejected": -166.8131103515625,
2024
+ "loss": 0.0206,
2025
+ "rewards/accuracies": 1.0,
2026
+ "rewards/chosen": -0.726199746131897,
2027
+ "rewards/margins": 9.535404205322266,
2028
+ "rewards/rejected": -10.261604309082031,
2029
+ "step": 1410
2030
+ },
2031
+ {
2032
+ "epoch": 0.81,
2033
+ "learning_rate": 1.0299744897959183e-07,
2034
+ "logits/chosen": -2.1826305389404297,
2035
+ "logits/rejected": -1.9840447902679443,
2036
+ "logps/chosen": -287.8218078613281,
2037
+ "logps/rejected": -172.24215698242188,
2038
+ "loss": 0.0326,
2039
+ "rewards/accuracies": 0.9750000238418579,
2040
+ "rewards/chosen": -1.0618003606796265,
2041
+ "rewards/margins": 9.063058853149414,
2042
+ "rewards/rejected": -10.124858856201172,
2043
+ "step": 1420
2044
+ },
2045
+ {
2046
+ "epoch": 0.82,
2047
+ "learning_rate": 9.980867346938775e-08,
2048
+ "logits/chosen": -2.1716151237487793,
2049
+ "logits/rejected": -2.0073437690734863,
2050
+ "logps/chosen": -284.8751220703125,
2051
+ "logps/rejected": -177.5775146484375,
2052
+ "loss": 0.0339,
2053
+ "rewards/accuracies": 0.9750000238418579,
2054
+ "rewards/chosen": -0.5093313455581665,
2055
+ "rewards/margins": 10.088600158691406,
2056
+ "rewards/rejected": -10.597929954528809,
2057
+ "step": 1430
2058
+ },
2059
+ {
2060
+ "epoch": 0.83,
2061
+ "learning_rate": 9.661989795918367e-08,
2062
+ "logits/chosen": -2.136314868927002,
2063
+ "logits/rejected": -1.9978504180908203,
2064
+ "logps/chosen": -299.4578857421875,
2065
+ "logps/rejected": -177.2023468017578,
2066
+ "loss": 0.0253,
2067
+ "rewards/accuracies": 1.0,
2068
+ "rewards/chosen": -0.22061769664287567,
2069
+ "rewards/margins": 10.269851684570312,
2070
+ "rewards/rejected": -10.490468978881836,
2071
+ "step": 1440
2072
+ },
2073
+ {
2074
+ "epoch": 0.83,
2075
+ "learning_rate": 9.343112244897958e-08,
2076
+ "logits/chosen": -2.1121840476989746,
2077
+ "logits/rejected": -1.8750556707382202,
2078
+ "logps/chosen": -294.89984130859375,
2079
+ "logps/rejected": -169.1930389404297,
2080
+ "loss": 0.0209,
2081
+ "rewards/accuracies": 1.0,
2082
+ "rewards/chosen": -0.6526461839675903,
2083
+ "rewards/margins": 9.879607200622559,
2084
+ "rewards/rejected": -10.532255172729492,
2085
+ "step": 1450
2086
+ },
2087
+ {
2088
+ "epoch": 0.84,
2089
+ "learning_rate": 9.024234693877552e-08,
2090
+ "logits/chosen": -2.146242141723633,
2091
+ "logits/rejected": -1.8776633739471436,
2092
+ "logps/chosen": -313.9344177246094,
2093
+ "logps/rejected": -166.5650177001953,
2094
+ "loss": 0.0308,
2095
+ "rewards/accuracies": 0.9750000238418579,
2096
+ "rewards/chosen": 0.13873246312141418,
2097
+ "rewards/margins": 9.890599250793457,
2098
+ "rewards/rejected": -9.751866340637207,
2099
+ "step": 1460
2100
+ },
2101
+ {
2102
+ "epoch": 0.84,
2103
+ "learning_rate": 8.705357142857142e-08,
2104
+ "logits/chosen": -2.079200506210327,
2105
+ "logits/rejected": -1.8900413513183594,
2106
+ "logps/chosen": -302.3304443359375,
2107
+ "logps/rejected": -163.3623809814453,
2108
+ "loss": 0.0126,
2109
+ "rewards/accuracies": 0.987500011920929,
2110
+ "rewards/chosen": -0.21449720859527588,
2111
+ "rewards/margins": 9.112278938293457,
2112
+ "rewards/rejected": -9.326776504516602,
2113
+ "step": 1470
2114
+ },
2115
+ {
2116
+ "epoch": 0.85,
2117
+ "learning_rate": 8.386479591836734e-08,
2118
+ "logits/chosen": -2.063764810562134,
2119
+ "logits/rejected": -1.9468538761138916,
2120
+ "logps/chosen": -271.62640380859375,
2121
+ "logps/rejected": -183.78414916992188,
2122
+ "loss": 0.0364,
2123
+ "rewards/accuracies": 0.987500011920929,
2124
+ "rewards/chosen": -0.5422567129135132,
2125
+ "rewards/margins": 10.014627456665039,
2126
+ "rewards/rejected": -10.556883811950684,
2127
+ "step": 1480
2128
+ },
2129
+ {
2130
+ "epoch": 0.85,
2131
+ "learning_rate": 8.067602040816327e-08,
2132
+ "logits/chosen": -2.0192930698394775,
2133
+ "logits/rejected": -1.8542463779449463,
2134
+ "logps/chosen": -282.27069091796875,
2135
+ "logps/rejected": -164.059326171875,
2136
+ "loss": 0.0239,
2137
+ "rewards/accuracies": 1.0,
2138
+ "rewards/chosen": -0.4797702729701996,
2139
+ "rewards/margins": 9.287493705749512,
2140
+ "rewards/rejected": -9.767263412475586,
2141
+ "step": 1490
2142
+ },
2143
+ {
2144
+ "epoch": 0.86,
2145
+ "learning_rate": 7.748724489795918e-08,
2146
+ "logits/chosen": -2.097482204437256,
2147
+ "logits/rejected": -1.9764598608016968,
2148
+ "logps/chosen": -273.4100341796875,
2149
+ "logps/rejected": -175.4436492919922,
2150
+ "loss": 0.0318,
2151
+ "rewards/accuracies": 0.9624999761581421,
2152
+ "rewards/chosen": -0.9027210474014282,
2153
+ "rewards/margins": 9.17738151550293,
2154
+ "rewards/rejected": -10.080102920532227,
2155
+ "step": 1500
2156
+ },
2157
+ {
2158
+ "epoch": 0.86,
2159
+ "eval_logits/chosen": -2.1461496353149414,
2160
+ "eval_logits/rejected": -1.9506442546844482,
2161
+ "eval_logps/chosen": -307.3617248535156,
2162
+ "eval_logps/rejected": -178.54031372070312,
2163
+ "eval_loss": 0.04423515126109123,
2164
+ "eval_rewards/accuracies": 0.9719827771186829,
2165
+ "eval_rewards/chosen": -0.8458480834960938,
2166
+ "eval_rewards/margins": 9.752861022949219,
2167
+ "eval_rewards/rejected": -10.598710060119629,
2168
+ "eval_runtime": 498.2368,
2169
+ "eval_samples_per_second": 3.699,
2170
+ "eval_steps_per_second": 0.116,
2171
+ "step": 1500
2172
+ },
2173
+ {
2174
+ "epoch": 0.87,
2175
+ "learning_rate": 7.429846938775509e-08,
2176
+ "logits/chosen": -2.1621012687683105,
2177
+ "logits/rejected": -1.9208993911743164,
2178
+ "logps/chosen": -343.2023010253906,
2179
+ "logps/rejected": -189.607666015625,
2180
+ "loss": 0.0304,
2181
+ "rewards/accuracies": 0.987500011920929,
2182
+ "rewards/chosen": -0.39647844433784485,
2183
+ "rewards/margins": 10.542058944702148,
2184
+ "rewards/rejected": -10.938536643981934,
2185
+ "step": 1510
2186
+ },
2187
+ {
2188
+ "epoch": 0.87,
2189
+ "learning_rate": 7.110969387755102e-08,
2190
+ "logits/chosen": -2.077564239501953,
2191
+ "logits/rejected": -1.8879787921905518,
2192
+ "logps/chosen": -285.5229797363281,
2193
+ "logps/rejected": -171.03269958496094,
2194
+ "loss": 0.0418,
2195
+ "rewards/accuracies": 0.987500011920929,
2196
+ "rewards/chosen": -1.0820773839950562,
2197
+ "rewards/margins": 9.324197769165039,
2198
+ "rewards/rejected": -10.406275749206543,
2199
+ "step": 1520
2200
+ },
2201
+ {
2202
+ "epoch": 0.88,
2203
+ "learning_rate": 6.792091836734694e-08,
2204
+ "logits/chosen": -2.1050283908843994,
2205
+ "logits/rejected": -1.9014123678207397,
2206
+ "logps/chosen": -291.99993896484375,
2207
+ "logps/rejected": -167.6908721923828,
2208
+ "loss": 0.0245,
2209
+ "rewards/accuracies": 0.9624999761581421,
2210
+ "rewards/chosen": 0.04504892975091934,
2211
+ "rewards/margins": 10.022317886352539,
2212
+ "rewards/rejected": -9.97726821899414,
2213
+ "step": 1530
2214
+ },
2215
+ {
2216
+ "epoch": 0.88,
2217
+ "learning_rate": 6.473214285714285e-08,
2218
+ "logits/chosen": -2.155641794204712,
2219
+ "logits/rejected": -1.9214122295379639,
2220
+ "logps/chosen": -276.1907958984375,
2221
+ "logps/rejected": -168.76535034179688,
2222
+ "loss": 0.0403,
2223
+ "rewards/accuracies": 1.0,
2224
+ "rewards/chosen": -0.5811057090759277,
2225
+ "rewards/margins": 9.711630821228027,
2226
+ "rewards/rejected": -10.292736053466797,
2227
+ "step": 1540
2228
+ },
2229
+ {
2230
+ "epoch": 0.89,
2231
+ "learning_rate": 6.154336734693877e-08,
2232
+ "logits/chosen": -2.0060572624206543,
2233
+ "logits/rejected": -1.8284757137298584,
2234
+ "logps/chosen": -251.60006713867188,
2235
+ "logps/rejected": -161.85813903808594,
2236
+ "loss": 0.0558,
2237
+ "rewards/accuracies": 0.9375,
2238
+ "rewards/chosen": -1.079087495803833,
2239
+ "rewards/margins": 8.86231803894043,
2240
+ "rewards/rejected": -9.94140625,
2241
+ "step": 1550
2242
+ },
2243
+ {
2244
+ "epoch": 0.9,
2245
+ "learning_rate": 5.835459183673469e-08,
2246
+ "logits/chosen": -2.1795308589935303,
2247
+ "logits/rejected": -1.9668638706207275,
2248
+ "logps/chosen": -307.10614013671875,
2249
+ "logps/rejected": -198.31088256835938,
2250
+ "loss": 0.0456,
2251
+ "rewards/accuracies": 0.987500011920929,
2252
+ "rewards/chosen": -0.6056522130966187,
2253
+ "rewards/margins": 12.019081115722656,
2254
+ "rewards/rejected": -12.62473201751709,
2255
+ "step": 1560
2256
+ },
2257
+ {
2258
+ "epoch": 0.9,
2259
+ "learning_rate": 5.516581632653061e-08,
2260
+ "logits/chosen": -1.972507119178772,
2261
+ "logits/rejected": -1.8055419921875,
2262
+ "logps/chosen": -260.49920654296875,
2263
+ "logps/rejected": -156.6437225341797,
2264
+ "loss": 0.0285,
2265
+ "rewards/accuracies": 0.9750000238418579,
2266
+ "rewards/chosen": -0.8670014142990112,
2267
+ "rewards/margins": 8.884390830993652,
2268
+ "rewards/rejected": -9.751392364501953,
2269
+ "step": 1570
2270
+ },
2271
+ {
2272
+ "epoch": 0.91,
2273
+ "learning_rate": 5.197704081632653e-08,
2274
+ "logits/chosen": -2.0382626056671143,
2275
+ "logits/rejected": -1.8945573568344116,
2276
+ "logps/chosen": -251.30612182617188,
2277
+ "logps/rejected": -166.97303771972656,
2278
+ "loss": 0.0475,
2279
+ "rewards/accuracies": 0.987500011920929,
2280
+ "rewards/chosen": -0.7538485527038574,
2281
+ "rewards/margins": 9.353151321411133,
2282
+ "rewards/rejected": -10.107000350952148,
2283
+ "step": 1580
2284
+ },
2285
+ {
2286
+ "epoch": 0.91,
2287
+ "learning_rate": 4.878826530612245e-08,
2288
+ "logits/chosen": -2.1863982677459717,
2289
+ "logits/rejected": -1.9323434829711914,
2290
+ "logps/chosen": -323.969970703125,
2291
+ "logps/rejected": -168.3311767578125,
2292
+ "loss": 0.0474,
2293
+ "rewards/accuracies": 0.9624999761581421,
2294
+ "rewards/chosen": -0.45526188611984253,
2295
+ "rewards/margins": 9.516814231872559,
2296
+ "rewards/rejected": -9.972076416015625,
2297
+ "step": 1590
2298
+ },
2299
+ {
2300
+ "epoch": 0.92,
2301
+ "learning_rate": 4.5599489795918364e-08,
2302
+ "logits/chosen": -2.0845093727111816,
2303
+ "logits/rejected": -1.8628953695297241,
2304
+ "logps/chosen": -283.7400817871094,
2305
+ "logps/rejected": -173.89602661132812,
2306
+ "loss": 0.0331,
2307
+ "rewards/accuracies": 1.0,
2308
+ "rewards/chosen": 0.2650320529937744,
2309
+ "rewards/margins": 10.87845516204834,
2310
+ "rewards/rejected": -10.613424301147461,
2311
+ "step": 1600
2312
+ },
2313
+ {
2314
+ "epoch": 0.92,
2315
+ "learning_rate": 4.241071428571429e-08,
2316
+ "logits/chosen": -2.034493923187256,
2317
+ "logits/rejected": -1.8942550420761108,
2318
+ "logps/chosen": -270.16497802734375,
2319
+ "logps/rejected": -166.28965759277344,
2320
+ "loss": 0.0293,
2321
+ "rewards/accuracies": 0.9750000238418579,
2322
+ "rewards/chosen": -0.9170511960983276,
2323
+ "rewards/margins": 9.314929962158203,
2324
+ "rewards/rejected": -10.23198127746582,
2325
+ "step": 1610
2326
+ },
2327
+ {
2328
+ "epoch": 0.93,
2329
+ "learning_rate": 3.92219387755102e-08,
2330
+ "logits/chosen": -2.102139949798584,
2331
+ "logits/rejected": -1.9411756992340088,
2332
+ "logps/chosen": -301.23846435546875,
2333
+ "logps/rejected": -180.20574951171875,
2334
+ "loss": 0.0467,
2335
+ "rewards/accuracies": 0.9750000238418579,
2336
+ "rewards/chosen": -1.1675564050674438,
2337
+ "rewards/margins": 9.335586547851562,
2338
+ "rewards/rejected": -10.503143310546875,
2339
+ "step": 1620
2340
+ },
2341
+ {
2342
+ "epoch": 0.94,
2343
+ "learning_rate": 3.603316326530612e-08,
2344
+ "logits/chosen": -2.0670650005340576,
2345
+ "logits/rejected": -1.8098779916763306,
2346
+ "logps/chosen": -294.5066833496094,
2347
+ "logps/rejected": -173.11416625976562,
2348
+ "loss": 0.0246,
2349
+ "rewards/accuracies": 0.987500011920929,
2350
+ "rewards/chosen": -0.8192850351333618,
2351
+ "rewards/margins": 9.408272743225098,
2352
+ "rewards/rejected": -10.227557182312012,
2353
+ "step": 1630
2354
+ },
2355
+ {
2356
+ "epoch": 0.94,
2357
+ "learning_rate": 3.284438775510204e-08,
2358
+ "logits/chosen": -2.138176918029785,
2359
+ "logits/rejected": -1.978822112083435,
2360
+ "logps/chosen": -329.6404724121094,
2361
+ "logps/rejected": -190.4500274658203,
2362
+ "loss": 0.0184,
2363
+ "rewards/accuracies": 1.0,
2364
+ "rewards/chosen": -0.5538009405136108,
2365
+ "rewards/margins": 10.36497688293457,
2366
+ "rewards/rejected": -10.918777465820312,
2367
+ "step": 1640
2368
+ },
2369
+ {
2370
+ "epoch": 0.95,
2371
+ "learning_rate": 2.965561224489796e-08,
2372
+ "logits/chosen": -2.1873650550842285,
2373
+ "logits/rejected": -2.0006983280181885,
2374
+ "logps/chosen": -301.3078308105469,
2375
+ "logps/rejected": -175.1551055908203,
2376
+ "loss": 0.0297,
2377
+ "rewards/accuracies": 0.9750000238418579,
2378
+ "rewards/chosen": -1.220243215560913,
2379
+ "rewards/margins": 9.117000579833984,
2380
+ "rewards/rejected": -10.337244033813477,
2381
+ "step": 1650
2382
+ },
2383
+ {
2384
+ "epoch": 0.95,
2385
+ "learning_rate": 2.6466836734693878e-08,
2386
+ "logits/chosen": -2.089918851852417,
2387
+ "logits/rejected": -1.882301926612854,
2388
+ "logps/chosen": -287.04632568359375,
2389
+ "logps/rejected": -171.83804321289062,
2390
+ "loss": 0.0308,
2391
+ "rewards/accuracies": 1.0,
2392
+ "rewards/chosen": -0.6570901274681091,
2393
+ "rewards/margins": 9.935832023620605,
2394
+ "rewards/rejected": -10.592921257019043,
2395
+ "step": 1660
2396
+ },
2397
+ {
2398
+ "epoch": 0.96,
2399
+ "learning_rate": 2.3278061224489795e-08,
2400
+ "logits/chosen": -2.073734998703003,
2401
+ "logits/rejected": -1.8911638259887695,
2402
+ "logps/chosen": -297.2498474121094,
2403
+ "logps/rejected": -174.6929168701172,
2404
+ "loss": 0.0237,
2405
+ "rewards/accuracies": 0.987500011920929,
2406
+ "rewards/chosen": 0.06594938039779663,
2407
+ "rewards/margins": 10.632802963256836,
2408
+ "rewards/rejected": -10.566854476928711,
2409
+ "step": 1670
2410
+ },
2411
+ {
2412
+ "epoch": 0.96,
2413
+ "learning_rate": 2.0089285714285716e-08,
2414
+ "logits/chosen": -2.1300342082977295,
2415
+ "logits/rejected": -2.0111982822418213,
2416
+ "logps/chosen": -284.3525695800781,
2417
+ "logps/rejected": -170.9279022216797,
2418
+ "loss": 0.0392,
2419
+ "rewards/accuracies": 0.987500011920929,
2420
+ "rewards/chosen": -0.1906980723142624,
2421
+ "rewards/margins": 9.709856033325195,
2422
+ "rewards/rejected": -9.900552749633789,
2423
+ "step": 1680
2424
+ },
2425
+ {
2426
+ "epoch": 0.97,
2427
+ "learning_rate": 1.6900510204081633e-08,
2428
+ "logits/chosen": -2.114060878753662,
2429
+ "logits/rejected": -1.9764654636383057,
2430
+ "logps/chosen": -276.199462890625,
2431
+ "logps/rejected": -178.11782836914062,
2432
+ "loss": 0.0337,
2433
+ "rewards/accuracies": 1.0,
2434
+ "rewards/chosen": -0.643802285194397,
2435
+ "rewards/margins": 9.700589179992676,
2436
+ "rewards/rejected": -10.344388961791992,
2437
+ "step": 1690
2438
+ },
2439
+ {
2440
+ "epoch": 0.98,
2441
+ "learning_rate": 1.371173469387755e-08,
2442
+ "logits/chosen": -1.897796392440796,
2443
+ "logits/rejected": -1.800377607345581,
2444
+ "logps/chosen": -251.58102416992188,
2445
+ "logps/rejected": -162.783203125,
2446
+ "loss": 0.0293,
2447
+ "rewards/accuracies": 0.987500011920929,
2448
+ "rewards/chosen": -0.036834489554166794,
2449
+ "rewards/margins": 9.945042610168457,
2450
+ "rewards/rejected": -9.981878280639648,
2451
+ "step": 1700
2452
+ },
2453
+ {
2454
+ "epoch": 0.98,
2455
+ "learning_rate": 1.0522959183673468e-08,
2456
+ "logits/chosen": -2.0931904315948486,
2457
+ "logits/rejected": -1.8743259906768799,
2458
+ "logps/chosen": -295.89080810546875,
2459
+ "logps/rejected": -169.799560546875,
2460
+ "loss": 0.0309,
2461
+ "rewards/accuracies": 0.9750000238418579,
2462
+ "rewards/chosen": -0.7167753577232361,
2463
+ "rewards/margins": 9.488900184631348,
2464
+ "rewards/rejected": -10.205676078796387,
2465
+ "step": 1710
2466
+ },
2467
+ {
2468
+ "epoch": 0.99,
2469
+ "learning_rate": 7.334183673469387e-09,
2470
+ "logits/chosen": -2.086829662322998,
2471
+ "logits/rejected": -1.973637342453003,
2472
+ "logps/chosen": -272.9532165527344,
2473
+ "logps/rejected": -178.72866821289062,
2474
+ "loss": 0.0283,
2475
+ "rewards/accuracies": 0.987500011920929,
2476
+ "rewards/chosen": -0.3609329164028168,
2477
+ "rewards/margins": 9.991233825683594,
2478
+ "rewards/rejected": -10.352167129516602,
2479
+ "step": 1720
2480
+ },
2481
+ {
2482
+ "epoch": 0.99,
2483
+ "learning_rate": 4.145408163265306e-09,
2484
+ "logits/chosen": -2.1046552658081055,
2485
+ "logits/rejected": -1.9208564758300781,
2486
+ "logps/chosen": -248.65567016601562,
2487
+ "logps/rejected": -149.6916046142578,
2488
+ "loss": 0.0201,
2489
+ "rewards/accuracies": 1.0,
2490
+ "rewards/chosen": -0.8961359262466431,
2491
+ "rewards/margins": 8.393698692321777,
2492
+ "rewards/rejected": -9.289834976196289,
2493
+ "step": 1730
2494
+ },
2495
+ {
2496
+ "epoch": 1.0,
2497
+ "learning_rate": 9.566326530612245e-10,
2498
+ "logits/chosen": -2.1648898124694824,
2499
+ "logits/rejected": -1.9197168350219727,
2500
+ "logps/chosen": -293.1455078125,
2501
+ "logps/rejected": -166.76507568359375,
2502
+ "loss": 0.0269,
2503
+ "rewards/accuracies": 0.9750000238418579,
2504
+ "rewards/chosen": -0.503768265247345,
2505
+ "rewards/margins": 9.358155250549316,
2506
+ "rewards/rejected": -9.861923217773438,
2507
+ "step": 1740
2508
+ },
2509
+ {
2510
+ "epoch": 1.0,
2511
+ "step": 1743,
2512
+ "total_flos": 0.0,
2513
+ "train_loss": 0.06308119606848633,
2514
+ "train_runtime": 23531.0319,
2515
+ "train_samples_per_second": 2.37,
2516
+ "train_steps_per_second": 0.074
2517
+ }
2518
+ ],
2519
+ "logging_steps": 10,
2520
+ "max_steps": 1743,
2521
+ "num_input_tokens_seen": 0,
2522
+ "num_train_epochs": 1,
2523
+ "save_steps": 500,
2524
+ "total_flos": 0.0,
2525
+ "train_batch_size": 8,
2526
+ "trial_name": null,
2527
+ "trial_params": null
2528
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5244e74363277f1c7a62dc098914bce5b3a39695ac66f66a71f6f7df61ed2f9
3
+ size 5371