Model save

Files changed (10) hide show

README.md CHANGED Viewed

@@ -1,14 +1,12 @@
 ---
 license: apache-2.0
-library_name: peft
 tags:
-- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
-base_model: mistralai/Mistral-7B-Instruct-v0.2
-datasets:
-- preference-data
 model-index:
 - name: preference_p0.1_seed42_level2_raremixbatch16
   results: []
@@ -19,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
 # preference_p0.1_seed42_level2_raremixbatch16
-This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the preference-data dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8599
 ## Model description
@@ -57,13 +55,12 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.8409        | 1.0   | 6181 | 0.8599          |
 ### Framework versions
-- PEFT 0.11.1
 - Transformers 4.44.2
 - Pytorch 2.3.1+cu121
 - Datasets 2.19.1
-- Tokenizers 0.19.1

 ---
+library_name: transformers
 license: apache-2.0
+base_model: mistralai/Mistral-7B-Instruct-v0.2
 tags:
 - trl
 - sft
+- alignment-handbook
 - generated_from_trainer
 model-index:
 - name: preference_p0.1_seed42_level2_raremixbatch16
   results: []
 # preference_p0.1_seed42_level2_raremixbatch16
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.2881
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.3161        | 1.0   | 2414 | 0.2881          |
 ### Framework versions
 - Transformers 4.44.2
 - Pytorch 2.3.1+cu121
 - Datasets 2.19.1
+- Tokenizers 0.19.1

all_results.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
     "epoch": 1.0,
-    "eval_loss": 0.8599082231521606,
-    "eval_runtime": 2.8502,
     "eval_samples": 10,
-    "eval_samples_per_second": 3.508,
-    "eval_steps_per_second": 1.053,
-    "total_flos": 2571089024581632.0,
-    "train_loss": 0.8638364601301117,
-    "train_runtime": 28571.7374,
     "train_samples": 98881,
-    "train_samples_per_second": 3.461,
-    "train_steps_per_second": 0.216
 }

 {
     "epoch": 1.0,
+    "eval_loss": 0.28806865215301514,
+    "eval_runtime": 1.3149,
     "eval_samples": 10,
+    "eval_samples_per_second": 2.282,
+    "eval_steps_per_second": 0.761,
+    "total_flos": 252721244405760.0,
+    "train_loss": 0.0,
+    "train_runtime": 0.0097,
     "train_samples": 98881,
+    "train_samples_per_second": 3991047.139,
+    "train_steps_per_second": 249446.905
 }

config.json CHANGED Viewed

@@ -22,6 +22,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
-  "use_cache": true,
   "vocab_size": 32000
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
+  "use_cache": false,
   "vocab_size": 32000
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "eval_loss": 0.8599082231521606,
-    "eval_runtime": 2.8502,
     "eval_samples": 10,
-    "eval_samples_per_second": 3.508,
-    "eval_steps_per_second": 1.053
 }

 {
     "epoch": 1.0,
+    "eval_loss": 0.28806865215301514,
+    "eval_runtime": 1.3149,
     "eval_samples": 10,
+    "eval_samples_per_second": 2.282,
+    "eval_steps_per_second": 0.761
 }

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "transformers_version": "4.43.4"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "transformers_version": "4.44.2"
 }

runs/Sep24_11-17-32_COE-CS-sv003/events.out.tfevents.1727176715.COE-CS-sv003.779460.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:071577acc062c85ef97090426c62030ff7e01f6d95f6c06337d56e8cd48a19e1
+size 5620

tokenizer_config.json CHANGED Viewed

@@ -30,11 +30,11 @@
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
-  "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": false,
-  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,

   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": false,
+  "model_max_length": 2048,
   "pad_token": "</s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
-    "total_flos": 2571089024581632.0,
-    "train_loss": 0.8638364601301117,
-    "train_runtime": 28571.7374,
     "train_samples": 98881,
-    "train_samples_per_second": 3.461,
-    "train_steps_per_second": 0.216
 }

 {
     "epoch": 1.0,
+    "total_flos": 252721244405760.0,
+    "train_loss": 0.0,
+    "train_runtime": 0.0097,
     "train_samples": 98881,
+    "train_samples_per_second": 3991047.139,
+    "train_steps_per_second": 249446.905
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d2cfab61ea7d4ddc928c8af1ae1777847f06be2085febf03b4016a7ff48a13a
 size 7032

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf2d01fd7c3b20690fa23c87f64bf43148f8884951c9cf6af8410d6fcbb51ec7
 size 7032