Upload processor
Browse files- special_tokens_map.json +3 -3
- tokenizer.json +2 -2
- tokenizer_config.json +6 -6
special_tokens_map.json
CHANGED
@@ -424,7 +424,7 @@
|
|
424 |
"<im_col>",
|
425 |
"<|image|>"
|
426 |
],
|
427 |
-
"boi_token": "<
|
428 |
"bos_token": {
|
429 |
"content": "<|endoftext|>",
|
430 |
"lstrip": false,
|
@@ -432,7 +432,7 @@
|
|
432 |
"rstrip": false,
|
433 |
"single_word": false
|
434 |
},
|
435 |
-
"eoi_token": "<
|
436 |
"eos_token": {
|
437 |
"content": "<|endoftext|>",
|
438 |
"lstrip": false,
|
@@ -441,7 +441,7 @@
|
|
441 |
"single_word": false
|
442 |
},
|
443 |
"im_col_token": "<im_col>",
|
444 |
-
"im_patch_token": "<
|
445 |
"image_token": "<image>",
|
446 |
"pad_token": {
|
447 |
"content": "<|endoftext|>",
|
|
|
424 |
"<im_col>",
|
425 |
"<|image|>"
|
426 |
],
|
427 |
+
"boi_token": "<im_start>",
|
428 |
"bos_token": {
|
429 |
"content": "<|endoftext|>",
|
430 |
"lstrip": false,
|
|
|
432 |
"rstrip": false,
|
433 |
"single_word": false
|
434 |
},
|
435 |
+
"eoi_token": "<im_end>",
|
436 |
"eos_token": {
|
437 |
"content": "<|endoftext|>",
|
438 |
"lstrip": false,
|
|
|
441 |
"single_word": false
|
442 |
},
|
443 |
"im_col_token": "<im_col>",
|
444 |
+
"im_patch_token": "<im_patch>",
|
445 |
"image_token": "<image>",
|
446 |
"pad_token": {
|
447 |
"content": "<|endoftext|>",
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec9836c15b34f4d6e1c2324574589d9886de43d5ba3339b27c5be18ed8a9c0df
|
3 |
+
size 11501616
|
tokenizer_config.json
CHANGED
@@ -3846,22 +3846,22 @@
|
|
3846 |
"auto_map": {
|
3847 |
"AutoProcessor": "preprocessing_molmo.MolmoProcessor"
|
3848 |
},
|
3849 |
-
"boi_token": "<
|
3850 |
"bos_token": "<|endoftext|>",
|
3851 |
"chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
|
3852 |
"clean_up_tokenization_spaces": false,
|
3853 |
-
"eoi_token": "<
|
3854 |
"eos_token": "<|endoftext|>",
|
3855 |
"errors": "replace",
|
3856 |
"extra_special_tokens": {
|
3857 |
-
"boi_token": "<
|
3858 |
-
"eoi_token": "<
|
3859 |
"im_col_token": "<im_col>",
|
3860 |
-
"im_patch_token": "<
|
3861 |
"image_token": "<image>"
|
3862 |
},
|
3863 |
"im_col_token": "<im_col>",
|
3864 |
-
"im_patch_token": "<
|
3865 |
"image_token": "<image>",
|
3866 |
"model_max_length": 32768,
|
3867 |
"pad_token": "<|endoftext|>",
|
|
|
3846 |
"auto_map": {
|
3847 |
"AutoProcessor": "preprocessing_molmo.MolmoProcessor"
|
3848 |
},
|
3849 |
+
"boi_token": "<im_start>",
|
3850 |
"bos_token": "<|endoftext|>",
|
3851 |
"chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
|
3852 |
"clean_up_tokenization_spaces": false,
|
3853 |
+
"eoi_token": "<im_end>",
|
3854 |
"eos_token": "<|endoftext|>",
|
3855 |
"errors": "replace",
|
3856 |
"extra_special_tokens": {
|
3857 |
+
"boi_token": "<im_start>",
|
3858 |
+
"eoi_token": "<im_end>",
|
3859 |
"im_col_token": "<im_col>",
|
3860 |
+
"im_patch_token": "<im_patch>",
|
3861 |
"image_token": "<image>"
|
3862 |
},
|
3863 |
"im_col_token": "<im_col>",
|
3864 |
+
"im_patch_token": "<im_patch>",
|
3865 |
"image_token": "<image>",
|
3866 |
"model_max_length": 32768,
|
3867 |
"pad_token": "<|endoftext|>",
|