Belred/DPO_SQL_Mistral

Browse files

Files changed (8) hide show

README.md +95 -0
adapter_config.json +26 -0
adapter_model.safetensors +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +42 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,95 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- trl
+- dpo
+- generated_from_trainer
+base_model: TheBloke/Mistral-7B-v0.1-GPTQ
+model-index:
+- name: mistral-dpo
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mistral-dpo
+This model is a fine-tuned version of [TheBloke/Mistral-7B-v0.1-GPTQ](https://huggingface.co/TheBloke/Mistral-7B-v0.1-GPTQ) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5603
+- Rewards/chosen: -12.5467
+- Rewards/rejected: -28.4037
+- Rewards/accuracies: 0.8571
+- Rewards/margins: 15.8571
+- Logps/rejected: -411.7001
+- Logps/chosen: -215.4742
+- Logits/rejected: -0.7509
+- Logits/chosen: -0.7707
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 2
+- training_steps: 250
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6785        | 0.02  | 10   | 0.6291          | -0.0030        | -0.1321          | 0.875              | 0.1291          | -128.9836      | -90.0372     | -2.3988         | -2.3489       |
+| 0.5661        | 0.04  | 20   | 0.4421          | 0.0008         | -0.6608          | 0.875              | 0.6616          | -134.2708      | -89.9997     | -2.3613         | -2.3042       |
+| 0.3257        | 0.06  | 30   | 0.3584          | -0.7139        | -2.3035          | 0.8393             | 1.5897          | -150.6985      | -97.1463     | -2.2995         | -2.2546       |
+| 0.3563        | 0.08  | 40   | 0.5522          | -3.0636        | -6.7067          | 0.8214             | 3.6431          | -194.7305      | -120.6441    | -2.1396         | -2.0849       |
+| 0.0013        | 0.1   | 50   | 1.3365          | -8.4317        | -16.1649         | 0.8036             | 7.7332          | -289.3120      | -174.3246    | -1.8243         | -1.7710       |
+| 0.0277        | 0.12  | 60   | 2.4224          | -14.8512       | -25.9570         | 0.8214             | 11.1059         | -387.2331      | -238.5192    | -1.5464         | -1.4950       |
+| 1.5742        | 0.14  | 70   | 3.1075          | -17.8751       | -29.6755         | 0.8214             | 11.8004         | -424.4176      | -268.7585    | -1.4071         | -1.3681       |
+| 14.1036       | 0.16  | 80   | 3.6238          | -20.4205       | -32.7881         | 0.8214             | 12.3675         | -455.5435      | -294.2129    | -1.3237         | -1.2729       |
+| 8.531         | 0.18  | 90   | 3.7123          | -21.7863       | -36.0729         | 0.8214             | 14.2866         | -488.3922      | -307.8707    | -1.2975         | -1.2388       |
+| 4.6429        | 0.2   | 100  | 2.0394          | -16.6472       | -29.8508         | 0.8393             | 13.2036         | -426.1712      | -256.4797    | -1.3189         | -1.2784       |
+| 0.0565        | 0.22  | 110  | 1.6331          | -14.8501       | -27.2015         | 0.8393             | 12.3514         | -399.6779      | -238.5090    | -1.2425         | -1.2118       |
+| 0.0056        | 0.24  | 120  | 1.4774          | -15.0784       | -28.0012         | 0.8214             | 12.9228         | -407.6750      | -240.7916    | -1.0819         | -1.0579       |
+| 0.0804        | 0.26  | 130  | 1.5398          | -16.7630       | -30.6346         | 0.8393             | 13.8716         | -434.0091      | -257.6378    | -1.0054         | -0.9846       |
+| 0.0001        | 0.28  | 140  | 1.5159          | -17.9940       | -33.3459         | 0.8393             | 15.3520         | -461.1225      | -269.9474    | -0.8887         | -0.8844       |
+| 0.0           | 0.3   | 150  | 1.5062          | -18.4614       | -34.3481         | 0.8393             | 15.8868         | -471.1445      | -274.6213    | -0.8496         | -0.8503       |
+| 0.0           | 0.32  | 160  | 1.5035          | -18.6474       | -34.7165         | 0.8393             | 16.0692         | -474.8286      | -276.4815    | -0.8343         | -0.8367       |
+| 4.2123        | 0.34  | 170  | 1.2949          | -17.3471       | -32.6721         | 0.8571             | 15.3250         | -454.3839      | -263.4789    | -0.8672         | -0.8661       |
+| 2.13          | 0.36  | 180  | 0.9892          | -15.2178       | -30.1177         | 0.8571             | 14.8999         | -428.8398      | -242.1859    | -0.8992         | -0.9047       |
+| 2.0146        | 0.38  | 190  | 0.8365          | -13.9461       | -28.5983         | 0.8571             | 14.6522         | -413.6459      | -229.4683    | -0.9104         | -0.9224       |
+| 0.0706        | 0.4   | 200  | 0.7897          | -14.5198       | -29.8989         | 0.8571             | 15.3791         | -426.6525      | -235.2058    | -0.8064         | -0.8224       |
+| 5.2517        | 0.42  | 210  | 0.6621          | -13.7049       | -29.2354         | 0.8571             | 15.5305         | -420.0170      | -227.0569    | -0.7981         | -0.8124       |
+| 0.0499        | 0.44  | 220  | 0.5752          | -12.8733       | -28.5299         | 0.8571             | 15.6566         | -412.9616      | -218.7403    | -0.7801         | -0.7990       |
+| 0.5779        | 0.46  | 230  | 0.5611          | -12.6633       | -28.3836         | 0.8571             | 15.7203         | -411.4988      | -216.6405    | -0.7789         | -0.7975       |
+| 0.0322        | 0.48  | 240  | 0.5624          | -12.6348       | -28.4766         | 0.8571             | 15.8418         | -412.4289      | -216.3556    | -0.7696         | -0.7878       |
+| 0.1347        | 0.5   | 250  | 0.5603          | -12.5467       | -28.4037         | 0.8571             | 15.8571         | -411.7001      | -215.4742    | -0.7509         | -0.7707       |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.36.2
+- Pytorch 2.0.1+cu118
+- Datasets 2.15.0
+- Tokenizers 0.15.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Mistral-7B-v0.1-GPTQ",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 6,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 4,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06d59f4242d5c7fac3dd69b7a0e289c48b193c18fedb61d27ef46709c70cc30b
+size 6832600

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fb108a6e83d2344c999c48cd6984ad4df8a20a04679af180096b98249993c69
+size 4219