Aixile
commited on
Commit
•
50e1e74
1
Parent(s):
323bf0d
support older transformer versions
Browse files- tokenization_qwen.py +7 -7
tokenization_qwen.py
CHANGED
@@ -18,7 +18,7 @@ from PIL import Image
|
|
18 |
from PIL import ImageFont
|
19 |
from PIL import ImageDraw
|
20 |
from transformers import PreTrainedTokenizer, AddedToken
|
21 |
-
from transformers.utils import try_to_load_from_cache
|
22 |
|
23 |
import matplotlib.colors as mcolors
|
24 |
from matplotlib.font_manager import FontProperties
|
@@ -27,12 +27,12 @@ logger = logging.getLogger(__name__)
|
|
27 |
|
28 |
|
29 |
VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
|
30 |
-
FONT_PATH = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
|
31 |
-
if FONT_PATH is None:
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
|
38 |
ENDOFTEXT = "<|endoftext|>"
|
|
|
18 |
from PIL import ImageFont
|
19 |
from PIL import ImageDraw
|
20 |
from transformers import PreTrainedTokenizer, AddedToken
|
21 |
+
# from transformers.utils import try_to_load_from_cache
|
22 |
|
23 |
import matplotlib.colors as mcolors
|
24 |
from matplotlib.font_manager import FontProperties
|
|
|
27 |
|
28 |
|
29 |
VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
|
30 |
+
# FONT_PATH = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
|
31 |
+
# if FONT_PATH is None:
|
32 |
+
# if not os.path.exists("SimSun.ttf"):
|
33 |
+
# ttf = requests.get("https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/SimSun.ttf")
|
34 |
+
# open("SimSun.ttf", "wb").write(ttf.content)
|
35 |
+
# FONT_PATH = "SimSun.ttf"
|
36 |
|
37 |
PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
|
38 |
ENDOFTEXT = "<|endoftext|>"
|