Training in progress, epoch 2

Files changed (5) hide show

config.json CHANGED Viewed

@@ -6,7 +6,7 @@
   "attention_dropout": 0.0,
   "bos_token_id": 0,
   "eos_token_id": 1,
-  "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 1024,
   "initializer_range": 0.02,
@@ -14,9 +14,9 @@
   "max_position_embeddings": 2048,
   "mlp_bias": false,
   "model_type": "llama",
-  "num_attention_heads": 8,
-  "num_hidden_layers": 8,
-  "num_key_value_heads": 8,
   "pad_token_id": 1,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-12,
@@ -25,6 +25,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
   "transformers_version": "4.46.2",
-  "use_cache": true,
   "vocab_size": 32000
 }

   "attention_dropout": 0.0,
   "bos_token_id": 0,
   "eos_token_id": 1,
+  "head_dim": 64,
   "hidden_act": "silu",
   "hidden_size": 1024,
   "initializer_range": 0.02,
   "max_position_embeddings": 2048,
   "mlp_bias": false,
   "model_type": "llama",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 12,
+  "num_key_value_heads": 16,
   "pad_token_id": 1,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-12,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
   "transformers_version": "4.46.2",
+  "use_cache": false,
   "vocab_size": 32000
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10b31be16809e80677d4c258ebb7a401a31046b47139ad4384932b68e3f19717
-size 648098016

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8cdd0c1858ad2c95d92fcc205ec97d7ebf47f66a7c00ef67404f9becf26e431
+size 841072832

runs/Nov26_16-58-34_3f18e734a088/events.out.tfevents.1732640325.3f18e734a088.1077.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:35623e090852dfc2c45d6b0fbbc7857af72c26b34b9943904f3e9733f8aba7c2
+size 22071

tokenizer.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 128,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
-      "Fixed": 128
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 256,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
+      "Fixed": 256
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b69e97e5d96d7d43fef8ba168db0752d244721906cedc570c6efc516e41a492d
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab5c5feb03617069a327bd87a742fd734017e1bde99b0f51c22fb4519209951a
 size 5368