edbeeching
/

EleutherAI_pythia-1b

@@ -1,5 +1,8 @@
 ---
 tags:
 - generated_from_trainer
 model-index:
 - name: EleutherAI_pythia-1b
@@ -9,10 +12,10 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/edbeeching/huggingface/runs/egr46yzm)
 # EleutherAI_pythia-1b
-This model was trained from scratch on an unknown dataset.
 ## Model description
@@ -32,17 +35,21 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 3e-06
-- train_batch_size: 16
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 8
-- gradient_accumulation_steps: 4
 - total_train_batch_size: 512
 - total_eval_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 3.0
 ### Framework versions

 ---
+base_model: cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
 tags:
+- trl
+- dpo
 - generated_from_trainer
 model-index:
 - name: EleutherAI_pythia-1b
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/edbeeching/huggingface/runs/y5wg35ok)
 # EleutherAI_pythia-1b
+This model is a fine-tuned version of [cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr](https://huggingface.co/cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr) on an unknown dataset.
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 3e-06
+- train_batch_size: 4
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 8
+- gradient_accumulation_steps: 16
 - total_train_batch_size: 512
 - total_eval_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 1.0
+### Training results
 ### Framework versions

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b3403942c80da2e7b1137bd18faf5b87ad3b51dc533627e322a022e7eecb4f3
 size 2023586384

 version https://git-lfs.github.com/spec/v1
+oid sha256:7034c7913a51c91b12085dc1e274f24714cc3b08f47f44a5bb63615aa80e519e
 size 2023586384

tokenizer_config.json CHANGED Viewed

@@ -213,12 +213,12 @@
     }
   },
   "bos_token": "<|endoftext|>",
-  "chat_template": "{% for message in messages %}{{' ' + message['content']}}{% endfor %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
-  "padding_side": "left",
   "tokenizer_class": "GPTNeoXTokenizer",
   "unk_token": "<|endoftext|>"
 }

     }
   },
   "bos_token": "<|endoftext|>",
+  "chat_template": "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
+  "padding_side": "right",
   "tokenizer_class": "GPTNeoXTokenizer",
   "unk_token": "<|endoftext|>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e768deda1d544b8924d5cefc17ebda87ed520f6d87aebb90c093a53b9b136df3
-size 7032

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2cc9c5ea667fe384f1b50e0b7d9b1b2b11958bc59c4ae95f1930d0abf5a92c6
+size 7096