Update README.md
Browse files
README.md
CHANGED
@@ -16,4 +16,24 @@ Scheduler: Cosine with Warmup (0.03) and MinLR (0.1 * init_lr)
|
|
16 |
Rollout Batch Size: 20000
|
17 |
Training Batch Size: 256
|
18 |
Number of Iterations: 9
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
```
|
|
|
16 |
Rollout Batch Size: 20000
|
17 |
Training Batch Size: 256
|
18 |
Number of Iterations: 9
|
19 |
+
```
|
20 |
+
|
21 |
+
Evaluation
|
22 |
+
```
|
23 |
+
########## First turn ##########
|
24 |
+
score
|
25 |
+
model turn
|
26 |
+
Llama3-iter-dpo 1 8.55
|
27 |
+
|
28 |
+
########## Second turn ##########
|
29 |
+
score
|
30 |
+
model turn
|
31 |
+
Llama3-iter-dpo 2 7.95625
|
32 |
+
|
33 |
+
########## Average ##########
|
34 |
+
score
|
35 |
+
model
|
36 |
+
Llama3-iter-dpo 8.253125
|
37 |
+
|
38 |
+
Llama3-sft-baseline 7.69
|
39 |
```
|