Training in progress, step 20

Files changed (6) hide show

adapter_config.json CHANGED Viewed

@@ -17,13 +17,13 @@
   "revision": null,
   "target_modules": [
     "lm_head",
-    "k_proj",
-    "up_proj",
-    "down_proj",
     "q_proj",
     "o_proj",
-    "v_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "revision": null,
   "target_modules": [
     "lm_head",
     "q_proj",
     "o_proj",
+    "up_proj",
+    "k_proj",
+    "gate_proj",
+    "down_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3dae620ec39b0b9db0b2bc553ab23dc2fad78768b6ddf6e60cdfe7dedeea09ad
 size 26361536

 version https://git-lfs.github.com/spec/v1
+oid sha256:af20ad86889fc54078b57ce04d8e3d3f69d39268483dc7c87987727885e0c84b
 size 26361536

config.json ADDED Viewed

+{
+  "_name_or_path": "data/Tukan-1.1B-Chat-reasoning-sft_merged",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5632,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 32000
+}

generation_config.json ADDED Viewed

+{
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "max_length": 2048,
+  "pad_token_id": 0,
+  "transformers_version": "4.36.2"
+}

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc58f4c05aba2e42cd6d5fc4da48be22501f4aaaf7a97fa3f4a913b43b39876f
+size 4400216536

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:edc3f43974610cb0990b262c0061ad284b8fda735a6ad58671c1f6f61517e4c3
-size 4792

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ebde5ce4c8d4ffdf3004525c2f02ecd14bfbbe1288a0ee3bede2a30972f58aa
+size 4856