End of training

Browse files

Files changed (13) hide show

README.md +57 -3
adapter_config.json +28 -0
adapter_model.safetensors +3 -0
runs/Jul18_17-22-15_MasanoMacBook-Air-2283.local/events.out.tfevents.1721290935.MasanoMacBook-Air-2283.local +3 -0
runs/Jul18_17-34-52_MasanoMacBook-Air-2283.local/events.out.tfevents.1721291693.MasanoMacBook-Air-2283.local +3 -0
runs/Jul18_17-55-35_MasanoMacBook-Air-2283.local/events.out.tfevents.1721292935.MasanoMacBook-Air-2283.local +3 -0
runs/Jul19_10-20-30_MasanoMacBook-Air-2283.local/events.out.tfevents.1721352030.MasanoMacBook-Air-2283.local +3 -0
runs/Jul19_11-11-12_MasanoMacBook-Air-2283.local/events.out.tfevents.1721355072.MasanoMacBook-Air-2283.local +3 -0
runs/Jul19_11-22-13_MasanoMacBook-Air-2283.local/events.out.tfevents.1721355733.MasanoMacBook-Air-2283.local +3 -0
runs/Jul19_13-32-23_MasanoMacBook-Air-2283.local/events.out.tfevents.1721363543.MasanoMacBook-Air-2283.local +3 -0
runs/Jul19_13-36-29_MasanoMacBook-Air-2283.local/events.out.tfevents.1721363789.MasanoMacBook-Air-2283.local +3 -0
trainer_state.json +105 -0
training_args.bin +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,57 @@
----
-license: apache-2.0
----

+---
+license: cc-by-sa-4.0
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: cyberagent/open-calm-large
+model-index:
+- name: opencalm-ft-llmhuman-adapter
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# opencalm-ft-llmhuman-adapter
+This model is a fine-tuned version of [cyberagent/open-calm-large](https://huggingface.co/cyberagent/open-calm-large) on the None dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 4
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 2
+### Training results
+### Framework versions
+- PEFT 0.11.1
+- Transformers 4.41.1
+- Pytorch 2.3.0
+- Datasets 2.20.0
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "cyberagent/open-calm-large",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 4,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b036eccc9129e1e05b30df44fd9c3582f560822baf3d6b0df2830e6ed910def4
+size 2366184

runs/Jul18_17-22-15_MasanoMacBook-Air-2283.local/events.out.tfevents.1721290935.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:240f00ab21bdf6313af93cf1303dda97c469d68cd37db0a9a4c0bb4f7e3172ce
+size 4913

runs/Jul18_17-34-52_MasanoMacBook-Air-2283.local/events.out.tfevents.1721291693.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b121881883e2a638f16d110f9a28e78e3359d49bf8b9c5846df4bc3c5d910054
+size 4913

runs/Jul18_17-55-35_MasanoMacBook-Air-2283.local/events.out.tfevents.1721292935.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e7bab7318ea03931c2dd537a4fc344658b4ae3f74663f95af63957f5fea5579
+size 6155

runs/Jul19_10-20-30_MasanoMacBook-Air-2283.local/events.out.tfevents.1721352030.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94b2fe1f6e262f059d6220de1777b59bebb3ef3fe552b530f892f3ada6e14813
+size 5741

runs/Jul19_11-11-12_MasanoMacBook-Air-2283.local/events.out.tfevents.1721355072.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0ce8d85679df84b1a6735a69e8e75c95b1f34c6b56e2d95646b21828fecab1
+size 5534

runs/Jul19_11-22-13_MasanoMacBook-Air-2283.local/events.out.tfevents.1721355733.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a478b053fa743c9ec0d4b76e0b92c34be1c8c55009de94924b89ae35dcfe199
+size 5538

runs/Jul19_13-32-23_MasanoMacBook-Air-2283.local/events.out.tfevents.1721363543.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5db18dc854ffbdccb28fac7d1ac85db61aaf9f05a6ad2049fc04733fc6bb37e0
+size 4914

runs/Jul19_13-36-29_MasanoMacBook-Air-2283.local/events.out.tfevents.1721363789.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d8043a6bd04b78383d71ac6849a6773f500750738476a4ecae166fc1a552c41
+size 7125

trainer_state.json ADDED Viewed

	@@ -0,0 +1,105 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9797979797979797,
+  "eval_steps": 500,
+  "global_step": 98,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.20202020202020202,
+      "grad_norm": 0.4713294208049774,
+      "learning_rate": 0.0002,
+      "loss": 3.2814,
+      "step": 10
+    },
+    {
+      "epoch": 0.40404040404040403,
+      "grad_norm": 0.8114767670631409,
+      "learning_rate": 0.00017727272727272728,
+      "loss": 3.2061,
+      "step": 20
+    },
+    {
+      "epoch": 0.6060606060606061,
+      "grad_norm": 0.9482328295707703,
+      "learning_rate": 0.00015454545454545454,
+      "loss": 2.6241,
+      "step": 30
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 1.0774873495101929,
+      "learning_rate": 0.0001318181818181818,
+      "loss": 2.3206,
+      "step": 40
+    },
+    {
+      "epoch": 1.0101010101010102,
+      "grad_norm": 0.9080483317375183,
+      "learning_rate": 0.00010909090909090909,
+      "loss": 2.0241,
+      "step": 50
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "grad_norm": 0.7166955471038818,
+      "learning_rate": 8.636363636363637e-05,
+      "loss": 1.7966,
+      "step": 60
+    },
+    {
+      "epoch": 1.4141414141414141,
+      "grad_norm": 0.8821200728416443,
+      "learning_rate": 6.363636363636364e-05,
+      "loss": 1.8161,
+      "step": 70
+    },
+    {
+      "epoch": 1.6161616161616161,
+      "grad_norm": 0.9654076099395752,
+      "learning_rate": 4.0909090909090915e-05,
+      "loss": 1.6785,
+      "step": 80
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 0.7666375041007996,
+      "learning_rate": 1.8181818181818182e-05,
+      "loss": 1.7247,
+      "step": 90
+    },
+    {
+      "epoch": 1.9797979797979797,
+      "step": 98,
+      "total_flos": 226829006180352.0,
+      "train_loss": 2.221530009289177,
+      "train_runtime": 2745.7828,
+      "train_samples_per_second": 0.144,
+      "train_steps_per_second": 0.036
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 98,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 226829006180352.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:159b874dc9ed3e701639d51a292e5ba5c80a66008d64d40f4e1bf31c22655aeb
+size 5112