Norod78 commited on
Commit
05da82f
1 Parent(s): 77c0401

Upload 8 files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Hebrew_GPT2_345M",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -28,7 +28,7 @@
28
  "summary_type": "cls_index",
29
  "summary_use_proj": true,
30
  "torch_dtype": "float32",
31
- "transformers_version": "4.27.0.dev0",
32
  "use_cache": true,
33
  "vocab_size": 50257
34
  }
 
1
  {
2
+ "_name_or_path": "Norod78/Hebrew-GPT2-345M-Stage",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
28
  "summary_type": "cls_index",
29
  "summary_use_proj": true,
30
  "torch_dtype": "float32",
31
+ "transformers_version": "4.41.2",
32
  "use_cache": true,
33
  "vocab_size": 50257
34
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
- "transformers_version": "4.27.0.dev0"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
+ "transformers_version": "4.41.2"
6
  }
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ ×
3
  × Ļ
4
  × ķ
 
1
+ #version: 0.2
2
  Ġ ×
3
  × Ļ
4
  × ķ
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e1d8ecedc8fa6cad81f823d92ed697279ad3b9586fbe2ce82b0cae47351a486
3
- size 1444493752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aaf6ec1e396167aadd8d33d9dae19a3900c6f7a6bb249182b19da625c911b06
3
+ size 1419322880
tokenizer.json CHANGED
@@ -39,6 +39,8 @@
39
  "continuing_subword_prefix": "",
40
  "end_of_word_suffix": "",
41
  "fuse_unk": false,
 
 
42
  "vocab": {
43
  "startoftext": 0,
44
  "<pad>": 1,
 
39
  "continuing_subword_prefix": "",
40
  "end_of_word_suffix": "",
41
  "fuse_unk": false,
42
+ "byte_fallback": false,
43
+ "ignore_merges": false,
44
  "vocab": {
45
  "startoftext": 0,
46
  "<pad>": 1,
tokenizer_config.json CHANGED
@@ -1,37 +1,25 @@
1
  {
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
 
 
11
  },
 
 
12
  "do_lower_case": false,
13
- "eos_token": {
14
- "__type": "AddedToken",
15
- "content": "<|endoftext|>",
16
- "lstrip": false,
17
- "normalized": true,
18
- "rstrip": false,
19
- "single_word": false
20
- },
21
  "errors": "replace",
22
  "full_tokenizer_file": null,
23
  "max_len": 1024,
24
  "model_max_length": 1024,
25
- "name_or_path": "./Hebrew_GPT2_345M-Models_tokenizer",
26
  "pad_token": null,
27
- "special_tokens_map_file": "special_tokens_map.json",
28
  "tokenizer_class": "GPT2Tokenizer",
29
- "unk_token": {
30
- "__type": "AddedToken",
31
- "content": "<|endoftext|>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- }
37
  }
 
1
  {
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "305": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
  },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
  "do_lower_case": false,
17
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
18
  "errors": "replace",
19
  "full_tokenizer_file": null,
20
  "max_len": 1024,
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
 
23
  "tokenizer_class": "GPT2Tokenizer",
24
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
25
  }