RedaAlami
/

zephyr-7b-dpo-qlora

@@ -1,10 +1,7 @@
 ---
 base_model: TII-Frontier-Team/falcon3-3b-instruct
-datasets:
-- TII-Frontier-Team/Reasoning_DPO
 library_name: peft
 tags:
-- alignment-handbook
 - trl
 - dpo
 - generated_from_trainer
@@ -18,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
 # zephyr-7b-dpo-qlora
-This model is a fine-tuned version of [TII-Frontier-Team/PEFT-falcon3b-it-gsm8k](https://huggingface.co/TII-Frontier-Team/PEFT-falcon3b-it-gsm8k) on the TII-Frontier-Team/Reasoning_DPO dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0286
-- Rewards/chosen: -4.7078
-- Rewards/rejected: -10.6652
-- Rewards/accuracies: 0.9254
-- Rewards/margins: 5.9575
-- Logps/rejected: -1102.4209
-- Logps/chosen: -503.5470
-- Logits/rejected: 1.9412
-- Logits/chosen: 2.1408
 ## Model description
@@ -65,37 +62,37 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.6914        | 0.0315 | 100  | 0.6912          | 0.0006         | -0.0036          | 0.6340             | 0.0042          | -36.2582       | -32.7125     | -1.6841         | -1.6367       |
-| 0.6743        | 0.0629 | 200  | 0.6753          | -0.0009        | -0.0462          | 0.6321             | 0.0454          | -40.5232       | -32.8573     | -1.5154         | -1.4649       |
-| 0.6112        | 0.0944 | 300  | 0.5905          | -0.5010        | -0.8365          | 0.6631             | 0.3356          | -119.5518      | -82.8670     | -0.5166         | -0.4325       |
-| 0.4477        | 0.1258 | 400  | 0.4026          | -1.9267        | -3.0850          | 0.7201             | 1.1583          | -344.3972      | -225.4428    | -0.5023         | -0.3494       |
-| 0.3583        | 0.1573 | 500  | 0.3063          | -2.4869        | -4.1367          | 0.7646             | 1.6498          | -449.5698      | -281.4605    | 0.3124          | 0.4717        |
-| 0.3041        | 0.1887 | 600  | 0.2405          | -2.9070        | -4.9732          | 0.7918             | 2.0662          | -533.2189      | -323.4665    | 0.9644          | 1.1113        |
-| 0.2487        | 0.2202 | 700  | 0.1964          | -3.4123        | -5.8172          | 0.8209             | 2.4050          | -617.6231      | -373.9985    | 1.1343          | 1.2933        |
-| 0.218         | 0.2517 | 800  | 0.1547          | -3.6771        | -6.6251          | 0.8336             | 2.9480          | -698.4094      | -400.4795    | 1.5710          | 1.7290        |
-| 0.1858        | 0.2831 | 900  | 0.1394          | -3.5484        | -6.6808          | 0.8485             | 3.1324          | -703.9799      | -387.6123    | 1.6988          | 1.8631        |
-| 0.173         | 0.3146 | 1000 | 0.1176          | -3.4824        | -6.7705          | 0.8649             | 3.2881          | -712.9531      | -381.0118    | 1.8190          | 1.9776        |
-| 0.1494        | 0.3460 | 1100 | 0.0979          | -3.7942        | -7.4529          | 0.8713             | 3.6587          | -781.1857      | -412.1861    | 1.8179          | 1.9865        |
-| 0.149         | 0.3775 | 1200 | 0.0817          | -4.1856        | -8.2504          | 0.8843             | 4.0648          | -860.9355      | -451.3316    | 1.8715          | 2.0581        |
-| 0.1143        | 0.4089 | 1300 | 0.0702          | -4.2444        | -8.6154          | 0.8884             | 4.3710          | -897.4431      | -457.2141    | 1.7765          | 1.9770        |
-| 0.1204        | 0.4404 | 1400 | 0.0642          | -4.1442        | -8.6112          | 0.8966             | 4.4670          | -897.0154      | -447.1863    | 2.1996          | 2.3734        |
-| 0.1013        | 0.4718 | 1500 | 0.0580          | -4.5031        | -9.1159          | 0.8951             | 4.6128          | -947.4904      | -483.0838    | 1.9514          | 2.1364        |
-| 0.1011        | 0.5033 | 1600 | 0.0567          | -4.0373        | -8.5779          | 0.9067             | 4.5406          | -893.6846      | -436.5011    | 1.9239          | 2.1103        |
-| 0.0853        | 0.5348 | 1700 | 0.0482          | -4.3119        | -9.2927          | 0.9067             | 4.9808          | -965.1708      | -463.9637    | 2.0648          | 2.2336        |
-| 0.0897        | 0.5662 | 1800 | 0.0449          | -4.3018        | -9.4275          | 0.9101             | 5.1257          | -978.6490      | -462.9552    | 1.9037          | 2.0822        |
-| 0.0717        | 0.5977 | 1900 | 0.0402          | -4.4391        | -9.8395          | 0.9112             | 5.4004          | -1019.8445     | -476.6779    | 2.0003          | 2.1749        |
-| 0.0487        | 0.6291 | 2000 | 0.0368          | -5.4728        | -11.3180         | 0.9078             | 5.8452          | -1167.6968     | -580.0486    | 1.9355          | 2.1422        |
-| 0.0683        | 0.6606 | 2100 | 0.0356          | -4.6736        | -10.2835         | 0.9190             | 5.6099          | -1064.2465     | -500.1268    | 2.0206          | 2.2058        |
-| 0.0514        | 0.6920 | 2200 | 0.0341          | -4.6025        | -10.2228         | 0.9209             | 5.6203          | -1058.1812     | -493.0187    | 1.9362          | 2.1272        |
-| 0.0623        | 0.7235 | 2300 | 0.0326          | -4.9398        | -10.7061         | 0.9213             | 5.7663          | -1106.5096     | -526.7491    | 1.8240          | 2.0327        |
-| 0.0693        | 0.7550 | 2400 | 0.0313          | -4.8024        | -10.6310         | 0.9231             | 5.8286          | -1098.9999     | -513.0095    | 1.8580          | 2.0583        |
-| 0.0543        | 0.7864 | 2500 | 0.0303          | -4.8132        | -10.7352         | 0.9228             | 5.9221          | -1109.4199     | -514.0873    | 1.9534          | 2.1471        |
-| 0.0555        | 0.8179 | 2600 | 0.0301          | -4.7251        | -10.5626         | 0.9261             | 5.8375          | -1092.1620     | -505.2810    | 1.9398          | 2.1357        |
-| 0.0646        | 0.8493 | 2700 | 0.0294          | -4.6930        | -10.6307         | 0.9261             | 5.9377          | -1098.9694     | -502.0694    | 2.0003          | 2.1947        |
-| 0.0546        | 0.8808 | 2800 | 0.0287          | -4.8085        | -10.8169         | 0.9250             | 6.0084          | -1117.5887     | -513.6258    | 1.9596          | 2.1607        |
-| 0.0702        | 0.9122 | 2900 | 0.0288          | -4.6970        | -10.6904         | 0.9243             | 5.9934          | -1104.9371     | -502.4718    | 1.9696          | 2.1647        |
-| 0.0623        | 0.9437 | 3000 | 0.0286          | -4.7098        | -10.6743         | 0.9269             | 5.9645          | -1103.3302     | -503.7507    | 1.9440          | 2.1437        |
-| 0.0593        | 0.9751 | 3100 | 0.0287          | -4.6985        | -10.6531         | 0.9276             | 5.9547          | -1101.2122     | -502.6163    | 1.9469          | 2.1464        |
 ### Framework versions

 ---
 base_model: TII-Frontier-Team/falcon3-3b-instruct
 library_name: peft
 tags:
 - trl
 - dpo
 - generated_from_trainer
 # zephyr-7b-dpo-qlora
+This model is a fine-tuned version of [TII-Frontier-Team/falcon3-3b-instruct](https://huggingface.co/TII-Frontier-Team/falcon3-3b-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0299
+- Rewards/chosen: -4.6289
+- Rewards/rejected: -10.4404
+- Rewards/accuracies: 0.9302
+- Rewards/margins: 5.8116
+- Logps/rejected: -1079.9603
+- Logps/chosen: -495.6860
+- Logits/rejected: 2.0537
+- Logits/chosen: 2.2623
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6913        | 0.0315 | 100  | 0.6911          | 0.0007         | -0.0036          | 0.6220             | 0.0042          | -36.2718       | -32.7285     | -1.6824         | -1.6348       |
+| 0.6742        | 0.0629 | 200  | 0.6751          | 0.0003         | -0.0454          | 0.6276             | 0.0458          | -40.4596       | -32.7631     | -1.5097         | -1.4586       |
+| 0.6081        | 0.0944 | 300  | 0.5872          | -0.5193        | -0.8644          | 0.6619             | 0.3451          | -122.3552      | -84.7303     | -0.4701         | -0.3830       |
+| 0.4463        | 0.1258 | 400  | 0.3978          | -2.0312        | -3.2212          | 0.7190             | 1.1900          | -358.0407      | -235.9217    | -0.3673         | -0.2101       |
+| 0.3548        | 0.1573 | 500  | 0.3048          | -2.5142        | -4.1605          | 0.7698             | 1.6464          | -451.9689      | -284.2137    | 0.4417          | 0.6033        |
+| 0.3014        | 0.1887 | 600  | 0.2395          | -2.7662        | -4.8033          | 0.7963             | 2.0371          | -516.2451      | -309.4138    | 1.0026          | 1.1670        |
+| 0.25          | 0.2202 | 700  | 0.1989          | -3.1039        | -5.4194          | 0.8235             | 2.3155          | -577.8538      | -343.1828    | 1.3421          | 1.5051        |
+| 0.2163        | 0.2517 | 800  | 0.1564          | -3.4535        | -6.3881          | 0.8369             | 2.9346          | -674.7255      | -378.1511    | 1.8084          | 1.9697        |
+| 0.178         | 0.2831 | 900  | 0.1349          | -3.4355        | -6.5411          | 0.8586             | 3.1056          | -690.0276      | -376.3503    | 1.7688          | 1.9492        |
+| 0.1736        | 0.3146 | 1000 | 0.1127          | -3.5471        | -6.9599          | 0.8668             | 3.4128          | -731.9055      | -387.5069    | 2.0848          | 2.2440        |
+| 0.1474        | 0.3460 | 1100 | 0.0982          | -3.6177        | -7.2322          | 0.8799             | 3.6145          | -759.1403      | -394.5700    | 1.8280          | 2.0076        |
+| 0.1382        | 0.3775 | 1200 | 0.0819          | -4.3123        | -8.3603          | 0.8862             | 4.0480          | -871.9455      | -464.0287    | 2.0966          | 2.2833        |
+| 0.1133        | 0.4089 | 1300 | 0.0714          | -4.0671        | -8.3309          | 0.8955             | 4.2638          | -869.0029      | -439.5055    | 1.9082          | 2.1044        |
+| 0.1209        | 0.4404 | 1400 | 0.0634          | -4.8366        | -9.4739          | 0.8933             | 4.6374          | -983.3081      | -516.4533    | 2.0574          | 2.2678        |
+| 0.1057        | 0.4718 | 1500 | 0.0575          | -4.1835        | -8.8581          | 0.9019             | 4.6746          | -921.7241      | -451.1488    | 2.0907          | 2.2780        |
+| 0.1057        | 0.5033 | 1600 | 0.0536          | -4.2093        | -8.9250          | 0.9131             | 4.7157          | -928.4156      | -453.7231    | 2.0198          | 2.2136        |
+| 0.0881        | 0.5348 | 1700 | 0.0490          | -4.4577        | -9.3694          | 0.9101             | 4.9118          | -972.8605      | -478.5644    | 1.8760          | 2.0804        |
+| 0.0847        | 0.5662 | 1800 | 0.0441          | -4.2531        | -9.4108          | 0.9131             | 5.1578          | -977.0005      | -458.1054    | 2.0999          | 2.2904        |
+| 0.0713        | 0.5977 | 1900 | 0.0411          | -4.4101        | -9.6543          | 0.9168             | 5.2442          | -1001.3448     | -473.8065    | 2.0887          | 2.2861        |
+| 0.0553        | 0.6291 | 2000 | 0.0378          | -4.9687        | -10.5782         | 0.9123             | 5.6095          | -1093.7402     | -529.6686    | 2.0469          | 2.2608        |
+| 0.0668        | 0.6606 | 2100 | 0.0362          | -4.7485        | -10.3227         | 0.9190             | 5.5741          | -1068.1823     | -507.6488    | 2.1354          | 2.3368        |
+| 0.0528        | 0.6920 | 2200 | 0.0356          | -4.6766        | -10.2170         | 0.9175             | 5.5404          | -1057.6173     | -500.4605    | 1.9572          | 2.1594        |
+| 0.0596        | 0.7235 | 2300 | 0.0340          | -4.6180        | -10.2121         | 0.9235             | 5.5942          | -1057.1299     | -494.5929    | 2.0041          | 2.2117        |
+| 0.063         | 0.7550 | 2400 | 0.0328          | -4.5357        | -10.1876         | 0.9257             | 5.6519          | -1054.6713     | -486.3653    | 2.1493          | 2.3488        |
+| 0.0558        | 0.7864 | 2500 | 0.0311          | -4.7155        | -10.5680         | 0.9261             | 5.8526          | -1092.7185     | -504.3435    | 2.1208          | 2.3275        |
+| 0.0552        | 0.8179 | 2600 | 0.0312          | -4.6574        | -10.3658         | 0.9254             | 5.7084          | -1072.4943     | -498.5399    | 2.0544          | 2.2592        |
+| 0.066         | 0.8493 | 2700 | 0.0305          | -4.6506        | -10.4766         | 0.9287             | 5.8259          | -1083.5740     | -497.8611    | 2.0914          | 2.2968        |
+| 0.0568        | 0.8808 | 2800 | 0.0302          | -4.6423        | -10.4629         | 0.9302             | 5.8206          | -1082.2051     | -497.0266    | 2.0957          | 2.3026        |
+| 0.0602        | 0.9122 | 2900 | 0.0299          | -4.6260        | -10.4608         | 0.9299             | 5.8348          | -1081.9958     | -495.3989    | 2.0861          | 2.2911        |
+| 0.0634        | 0.9437 | 3000 | 0.0298          | -4.6454        | -10.4843         | 0.9313             | 5.8389          | -1084.3455     | -497.3409    | 2.0655          | 2.2739        |
+| 0.0602        | 0.9751 | 3100 | 0.0299          | -4.6289        | -10.4404         | 0.9302             | 5.8116          | -1079.9603     | -495.6860    | 2.0537          | 2.2623        |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,22 +1,9 @@
 {
     "epoch": 1.0,
-    "eval_logits/chosen": 2.140789270401001,
-    "eval_logits/rejected": 1.9412086009979248,
-    "eval_logps/chosen": -503.5469665527344,
-    "eval_logps/rejected": -1102.4208984375,
-    "eval_loss": 0.028598472476005554,
-    "eval_rewards/accuracies": 0.9253731369972229,
-    "eval_rewards/chosen": -4.707759380340576,
-    "eval_rewards/margins": 5.957462787628174,
-    "eval_rewards/rejected": -10.66522216796875,
-    "eval_runtime": 214.7372,
-    "eval_samples": 21417,
-    "eval_samples_per_second": 99.736,
-    "eval_steps_per_second": 1.56,
     "total_flos": 0.0,
-    "train_loss": 0.19036180805619987,
-    "train_runtime": 15997.0818,
     "train_samples": 406907,
-    "train_samples_per_second": 25.436,
-    "train_steps_per_second": 0.199
 }

 {
     "epoch": 1.0,
     "total_flos": 0.0,
+    "train_loss": 0.18858218038370866,
+    "train_runtime": 16104.4918,
     "train_samples": 406907,
+    "train_samples_per_second": 25.267,
+    "train_steps_per_second": 0.197
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
     "total_flos": 0.0,
-    "train_loss": 0.19036180805619987,
-    "train_runtime": 15997.0818,
     "train_samples": 406907,
-    "train_samples_per_second": 25.436,
-    "train_steps_per_second": 0.199
 }

 {
     "epoch": 1.0,
     "total_flos": 0.0,
+    "train_loss": 0.18858218038370866,
+    "train_runtime": 16104.4918,
     "train_samples": 406907,
+    "train_samples_per_second": 25.267,
+    "train_steps_per_second": 0.197
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff