gpandrad
/

qwen-2.5-3b-r1-countdown

Text Generation

Generated from Trainer

text-generation-inference

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

gpandrad commited on 5 days ago

Commit

65dca1c

·

verified ·

1 Parent(s): 021e030

Model save

Files changed (4) hide show

all_results.json +4 -4
config.json +1 -1
train_results.json +4 -4
trainer_state.json +14 -8

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 2.9823833263829473e-05,
-    "train_runtime": 18161.8968,
     "train_samples": 45000,
-    "train_samples_per_second": 1.388,
-    "train_steps_per_second": 0.025
 }

 {
     "total_flos": 0.0,
+    "train_loss": 6.298603717921231e-08,
+    "train_runtime": 66.8559,
     "train_samples": 45000,
+    "train_samples_per_second": 376.93,
+    "train_steps_per_second": 6.731
 }

config.json CHANGED Viewed

@@ -23,7 +23,7 @@
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.48.2",
-  "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 151936
 }

   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.48.2",
+  "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 2.9823833263829473e-05,
-    "train_runtime": 18161.8968,
     "train_samples": 45000,
-    "train_samples_per_second": 1.388,
-    "train_steps_per_second": 0.025
 }

 {
     "total_flos": 0.0,
+    "train_loss": 6.298603717921231e-08,
+    "train_runtime": 66.8559,
     "train_samples": 45000,
+    "train_samples_per_second": 376.93,
+    "train_steps_per_second": 6.731
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5599626691553896,
   "eval_steps": 500,
-  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2934,13 +2934,19 @@
       "step": 450
     },
     {
-      "epoch": 0.5599626691553896,
-      "step": 450,
       "total_flos": 0.0,
-      "train_loss": 2.9823833263829473e-05,
-      "train_runtime": 18161.8968,
-      "train_samples_per_second": 1.388,
-      "train_steps_per_second": 0.025
     }
   ],
   "logging_steps": 2,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5612070306424016,
   "eval_steps": 500,
+  "global_step": 451,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 450
     },
     {
+      "completion_length": 238.5446491241455,
+      "epoch": 0.5612070306424016,
+      "kl": 0.0272674560546875,
+      "reward": 0.9977678656578064,
+      "reward_std": 0.0063134534284472466,
+      "rewards/equation_reward_func": 0.0,
+      "rewards/format_reward_func": 0.9977678656578064,
+      "step": 451,
       "total_flos": 0.0,
+      "train_loss": 6.298603717921231e-08,
+      "train_runtime": 66.8559,
+      "train_samples_per_second": 376.93,
+      "train_steps_per_second": 6.731
     }
   ],
   "logging_steps": 2,