Paul Dufour commited on
Commit
5c5a02d
0 Parent(s):

Initial commit with existing Git LFS settings

Browse files
.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
37
+ onnx/**/* filter=lfs diff=lfs merge=lfs -text
38
+ **/.git* filter= diff= merge= text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_STORE
Makefile ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .SHELLFLAGS := -e -c
2
+ SHELL := /bin/bash
3
+
4
+ # Configuration variables
5
+ NATIVE_ANDROID = ../Native-LLM-for-Android
6
+ QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL
7
+ ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx
8
+ ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist
9
+ STAGING_DIR = /tmp/transformers.js/staging
10
+ TRANSFORMERS_JS_PATH = ../transformers.js
11
+ ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools
12
+
13
+ # Python paths from venvs
14
+ NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3
15
+ TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3
16
+
17
+ # Model parts
18
+ PARTS = A B C D E
19
+
20
+ define progress_bar
21
+ total=$$(echo $(1) | wc -w | tr -d ' '); \
22
+ current=0; \
23
+ for item in $(1); do \
24
+ current=$$((current + 1)); \
25
+ printf "\r Progress: \033[1;32m["; \
26
+ for ((i=0; i<current*20/total; i++)); do printf "="; done; \
27
+ printf "\033[0m"; \
28
+ for ((i=current*20/total; i<20; i++)); do printf " "; done; \
29
+ printf "\033[1;32m]\033[0m $$current/$$total "; \
30
+ printf "\033[1;34m$$item\033[K\033[0m\n"; \
31
+ cmd="$(2)"; \
32
+ cmd=$$(echo "$$cmd" | sed "s|{}|$$item|g"); \
33
+ $$cmd; \
34
+ done; \
35
+ printf "\n"
36
+ endef
37
+
38
+ .PHONY: all all-in-one clean clean-large-files export fix-gpu-buffers quantize quantize-% slim
39
+
40
+ all-in-one: export quantize clean-large-files slim fix-gpu-buffers
41
+ @echo "✨ All done! ONNX models exported, slimmed, quantized and fixed"
42
+
43
+ export: export-abcd export-e
44
+ @echo "✅ Export complete"
45
+
46
+ export-abcd:
47
+ @echo "🚀 Exporting parts A, B, C, D..."
48
+ cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
49
+ ../../.venv/bin/python3 QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct"
50
+
51
+ export-e:
52
+ @echo "🚀 Exporting part E..."
53
+ cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
54
+ ../../.venv/bin/python3 QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct"
55
+
56
+ slim:
57
+ @echo "🗜️ Slimming ONNX models..."
58
+ @files=$$(find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"); \
59
+ $(call progress_bar,$$files,onnxslim --verbose {} {})
60
+ @echo "✅ Slimming complete"
61
+
62
+ quantize:
63
+ @echo "⚡ Starting quantization..."
64
+ for part in $(PARTS); do \
65
+ $(MAKE) quantize-$$part || exit 1; \
66
+ done
67
+ @echo "✅ Quantization complete"
68
+
69
+ quantize-%:
70
+ @echo "⚡ Quantizing part $*..."
71
+ mkdir -p $(ONNX_DEST_DIR)
72
+ cd $(TRANSFORMERS_JS_PATH) && \
73
+ mkdir -p $(STAGING_DIR) && \
74
+ rm -f $(STAGING_DIR)/* && \
75
+ ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \
76
+ find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \
77
+ find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \
78
+ find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \
79
+ EXTRA_FLAGS=""; \
80
+ if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \
81
+ echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \
82
+ PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \
83
+ --input_folder '$(STAGING_DIR)' \
84
+ --output_folder '$(ONNX_DEST_DIR)' \
85
+ --mode q4f16 $$EXTRA_FLAGS
86
+
87
+ clean-large-files:
88
+ @echo "🧹 Removing ONNX files over 2GB..."
89
+ cd $(ONNX_DEST_DIR) && \
90
+ for f in $$(find . -name "*.onnx" -type f); do \
91
+ total_size=0; \
92
+ if [ -f "$$f"".data" ]; then \
93
+ total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \
94
+ elif [ -f "$$f""_data" ]; then \
95
+ total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \
96
+ else \
97
+ total_size=$$(stat -f %z "$$f"); \
98
+ fi; \
99
+ size_mb=$$(( total_size / 1048576 )); \
100
+ if [ $$total_size -ge 2147483648 ]; then \
101
+ echo " Removing $$f (size: $$size_mb MB)..."; \
102
+ rm -f "$$f" "$$f"".data" "$$f""_data"; \
103
+ fi \
104
+ done
105
+ @echo "✅ Large file cleanup complete"
106
+
107
+ fix-gpu-buffers:
108
+ @echo "🔧 Fixing GPU buffers for E models..."
109
+ cd $(NATIVE_ANDROID) && \
110
+ files=$$(find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f); \
111
+ $(call progress_bar,$$files, .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite {})
112
+ @echo "✅ GPU buffer fixes complete"
README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model:
4
+ - Qwen/Qwen2-VL-2B-Instruct
5
+ ---
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2VLForConditionalGeneration"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "vision_start_token_id": 151652,
9
+ "vision_end_token_id": 151653,
10
+ "vision_token_id": 151654,
11
+ "image_token_id": 151655,
12
+ "video_token_id": 151656,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 1536,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8960,
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 28,
19
+ "model_type": "qwen2_vl",
20
+ "num_attention_heads": 12,
21
+ "num_hidden_layers": 28,
22
+ "num_key_value_heads": 2,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_theta": 1000000.0,
25
+ "sliding_window": 32768,
26
+ "tie_word_embeddings": true,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.41.2",
29
+ "use_cache": true,
30
+ "use_sliding_window": false,
31
+ "vision_config": {
32
+ "depth": 32,
33
+ "embed_dim": 1280,
34
+ "mlp_ratio": 4,
35
+ "num_heads": 16,
36
+ "in_chans": 3,
37
+ "hidden_size": 1536,
38
+ "patch_size": 14,
39
+ "spatial_merge_size": 2,
40
+ "spatial_patch_size": 14,
41
+ "temporal_patch_size": 2
42
+ },
43
+ "rope_scaling": {
44
+ "type": "mrope",
45
+ "mrope_section": [
46
+ 16,
47
+ 24,
48
+ 24
49
+ ]
50
+ },
51
+ "vocab_size": 151936
52
+ }
embeddings_bf16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5387220a9d57908c2c6fa69bcfa64fda4234e59103fa74f56d07eaa6f9af2493
3
+ size 466747392
generation_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.0,
10
+ "temperature": 0.01,
11
+ "top_p": 0.001,
12
+ "top_k": 1,
13
+ "transformers_version": "4.37.0"
14
+ }
15
+
llm_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hidden_size": 1536,
3
+ "layer_nums": 28,
4
+ "attention_mask": "float",
5
+ "key_value_shape": [
6
+ 2,
7
+ 1,
8
+ 0,
9
+ 2,
10
+ 128
11
+ ],
12
+ "prompt_template": "<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n",
13
+ "is_visual": true,
14
+ "image_mean": [
15
+ 122.7709383,
16
+ 116.7460125,
17
+ 104.09373615000001
18
+ ],
19
+ "image_norm": [
20
+ 0.01459842661924292,
21
+ 0.015007768493717056,
22
+ 0.014220065717024088
23
+ ],
24
+ "image_size": 420,
25
+ "vision_start": 151652,
26
+ "vision_end": 151653,
27
+ "image_pad": 151655
28
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
onnx/QwenVL_A_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdd6e4c85b5a835227106c01b31be8220eb4684026e726372a97c74cfdbcd983
3
+ size 1330987067
onnx/QwenVL_B_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c5981ece4d144bc7f5352e56bb19d0d4b3bf22d1f8c472a106fcdcf83a9ebdf
3
+ size 233983290
onnx/QwenVL_C_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc4f5f601f5ac0b16632e4dc953ce7009f8c2bf0c5e5c1553b5250cda832a68a
3
+ size 6364
onnx/QwenVL_D_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b46671d4d41a864d1390ef87eb5819e8c6fd044cded45e688301ae8eb7ab57
3
+ size 25118
onnx/QwenVL_E_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599c81da0035adf98d77db9b5776e7070017887394d06dd901c4d72125f6fd2b
3
+ size 996827324
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ }
116
+ },
117
+ "additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|object_ref_start|>","<|object_ref_end|>","<|box_start|>","<|box_end|>","<|quad_start|>","<|quad_end|>","<|vision_start|>","<|vision_end|>","<|vision_pad|>","<|image_pad|>","<|video_pad|>"],
118
+ "bos_token": null,
119
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
120
+ "clean_up_tokenization_spaces": false,
121
+ "eos_token": "<|im_end|>",
122
+ "padding_side": "left",
123
+ "errors": "replace",
124
+ "model_max_length": 32768,
125
+ "pad_token": "<|endoftext|>",
126
+ "split_special_tokens": false,
127
+ "tokenizer_class": "Qwen2Tokenizer",
128
+ "unk_token": null
129
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff