newline special token 처리해서 다시 학습
Browse files- pytorch_model.bin +1 -1
- special_tokens_map.json +1 -0
- tokenizer.json +9 -0
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 513290985
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0b6970188d6812e550335c01eddeb3188c35ff2f34e1ff51f42a73dc4c5f864
|
3 |
size 513290985
|
special_tokens_map.json
CHANGED
@@ -2,5 +2,6 @@
|
|
2 |
"bos_token": "<s>",
|
3 |
"eos_token": "</s>",
|
4 |
"pad_token": "<pad>",
|
|
|
5 |
"unk_token": "<|endoftext|>"
|
6 |
}
|
|
|
2 |
"bos_token": "<s>",
|
3 |
"eos_token": "</s>",
|
4 |
"pad_token": "<pad>",
|
5 |
+
"sep_token": "\n",
|
6 |
"unk_token": "<|endoftext|>"
|
7 |
}
|
tokenizer.json
CHANGED
@@ -3378,6 +3378,15 @@
|
|
3378 |
"normalized": false,
|
3379 |
"special": true
|
3380 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3381 |
{
|
3382 |
"id": 51200,
|
3383 |
"content": "<|endoftext|>",
|
|
|
3378 |
"normalized": false,
|
3379 |
"special": true
|
3380 |
},
|
3381 |
+
{
|
3382 |
+
"id": 375,
|
3383 |
+
"content": "\n",
|
3384 |
+
"single_word": false,
|
3385 |
+
"lstrip": false,
|
3386 |
+
"rstrip": false,
|
3387 |
+
"normalized": false,
|
3388 |
+
"special": true
|
3389 |
+
},
|
3390 |
{
|
3391 |
"id": 51200,
|
3392 |
"content": "<|endoftext|>",
|