jeremierostan commited on
Commit
15ee500
·
verified ·
1 Parent(s): e0485f6

Training in progress, epoch 2

Browse files
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
  "eos_token_id": 1,
9
- "head_dim": 128,
10
  "hidden_act": "silu",
11
  "hidden_size": 1024,
12
  "initializer_range": 0.02,
@@ -14,9 +14,9 @@
14
  "max_position_embeddings": 2048,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
- "num_attention_heads": 8,
18
- "num_hidden_layers": 8,
19
- "num_key_value_heads": 8,
20
  "pad_token_id": 1,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-12,
@@ -25,6 +25,6 @@
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.46.2",
28
- "use_cache": true,
29
  "vocab_size": 32000
30
  }
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
  "eos_token_id": 1,
9
+ "head_dim": 64,
10
  "hidden_act": "silu",
11
  "hidden_size": 1024,
12
  "initializer_range": 0.02,
 
14
  "max_position_embeddings": 2048,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 12,
19
+ "num_key_value_heads": 16,
20
  "pad_token_id": 1,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-12,
 
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.46.2",
28
+ "use_cache": false,
29
  "vocab_size": 32000
30
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b31be16809e80677d4c258ebb7a401a31046b47139ad4384932b68e3f19717
3
- size 648098016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8cdd0c1858ad2c95d92fcc205ec97d7ebf47f66a7c00ef67404f9becf26e431
3
+ size 841072832
runs/Nov26_16-58-34_3f18e734a088/events.out.tfevents.1732640325.3f18e734a088.1077.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35623e090852dfc2c45d6b0fbbc7857af72c26b34b9943904f3e9733f8aba7c2
3
+ size 22071
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 128
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 256,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 256
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69e97e5d96d7d43fef8ba168db0752d244721906cedc570c6efc516e41a492d
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab5c5feb03617069a327bd87a742fd734017e1bde99b0f51c22fb4519209951a
3
  size 5368