Upload folder using huggingface_hub
Browse files- LICENSE +46 -0
- README.md +66 -3
- config.json +3 -0
- onnx/text_model.onnx +3 -0
- onnx/text_model_bnb4.onnx +3 -0
- onnx/text_model_fp16.onnx +3 -0
- onnx/text_model_int8.onnx +3 -0
- onnx/text_model_q4.onnx +3 -0
- onnx/text_model_quantized.onnx +3 -0
- onnx/text_model_uint8.onnx +3 -0
- onnx/vision_model.onnx +3 -0
- onnx/vision_model_bnb4.onnx +3 -0
- onnx/vision_model_fp16.onnx +3 -0
- onnx/vision_model_int8.onnx +3 -0
- onnx/vision_model_q4.onnx +3 -0
- onnx/vision_model_quantized.onnx +3 -0
- onnx/vision_model_uint8.onnx +3 -0
- preprocessor_config.json +18 -0
- tokenizer.json +0 -0
- tokenizer_config.json +34 -0
LICENSE
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright (C) 2024 Apple Inc. All Rights Reserved.
|
2 |
+
|
3 |
+
IMPORTANT: This Apple software is supplied to you by Apple
|
4 |
+
Inc. ("Apple") in consideration of your agreement to the following
|
5 |
+
terms, and your use, installation, modification or redistribution of
|
6 |
+
this Apple software constitutes acceptance of these terms. If you do
|
7 |
+
not agree with these terms, please do not use, install, modify or
|
8 |
+
redistribute this Apple software.
|
9 |
+
|
10 |
+
In consideration of your agreement to abide by the following terms, and
|
11 |
+
subject to these terms, Apple grants you a personal, non-exclusive
|
12 |
+
license, under Apple's copyrights in this original Apple software (the
|
13 |
+
"Apple Software"), to use, reproduce, modify and redistribute the Apple
|
14 |
+
Software, with or without modifications, in source and/or binary forms;
|
15 |
+
provided that if you redistribute the Apple Software in its entirety and
|
16 |
+
without modifications, you must retain this notice and the following
|
17 |
+
text and disclaimers in all such redistributions of the Apple Software.
|
18 |
+
Neither the name, trademarks, service marks or logos of Apple Inc. may
|
19 |
+
be used to endorse or promote products derived from the Apple Software
|
20 |
+
without specific prior written permission from Apple. Except as
|
21 |
+
expressly stated in this notice, no other rights or licenses, express or
|
22 |
+
implied, are granted by Apple herein, including but not limited to any
|
23 |
+
patent rights that may be infringed by your derivative works or by other
|
24 |
+
works in which the Apple Software may be incorporated.
|
25 |
+
|
26 |
+
The Apple Software is provided by Apple on an "AS IS" basis. APPLE
|
27 |
+
MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
|
28 |
+
THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS
|
29 |
+
FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND
|
30 |
+
OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
|
31 |
+
|
32 |
+
IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL
|
33 |
+
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
34 |
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
35 |
+
INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION,
|
36 |
+
MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED
|
37 |
+
AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE),
|
38 |
+
STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE
|
39 |
+
POSSIBILITY OF SUCH DAMAGE.
|
40 |
+
|
41 |
+
-------------------------------------------------------------------------------
|
42 |
+
SOFTWARE DISTRIBUTED WITH ML-MobileCLIP:
|
43 |
+
|
44 |
+
The ML-MobileCLIP software includes a number of subcomponents with separate
|
45 |
+
copyright notices and license terms - please see the file ACKNOWLEDGEMENTS.
|
46 |
+
-------------------------------------------------------------------------------
|
README.md
CHANGED
@@ -1,3 +1,66 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers.js
|
3 |
+
pipeline_tag: zero-shot-image-classification
|
4 |
+
license: other
|
5 |
+
tags:
|
6 |
+
- mobileclip
|
7 |
+
- image-feature-extraction
|
8 |
+
- feature-extraction
|
9 |
+
---
|
10 |
+
|
11 |
+
https://github.com/apple/ml-mobileclip with ONNX weights to be compatible with Transformers.js.
|
12 |
+
|
13 |
+
## Usage (Transformers.js)
|
14 |
+
|
15 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
|
16 |
+
```bash
|
17 |
+
npm i @xenova/transformers
|
18 |
+
```
|
19 |
+
|
20 |
+
**Example:** Perform zero-shot image classification.
|
21 |
+
```js
|
22 |
+
import {
|
23 |
+
AutoTokenizer,
|
24 |
+
CLIPTextModelWithProjection,
|
25 |
+
AutoProcessor,
|
26 |
+
CLIPVisionModelWithProjection,
|
27 |
+
RawImage,
|
28 |
+
dot,
|
29 |
+
softmax,
|
30 |
+
} from '@xenova/transformers';
|
31 |
+
|
32 |
+
const model_id = 'Xenova/mobileclip_s0';
|
33 |
+
|
34 |
+
// Load tokenizer and text model
|
35 |
+
const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
36 |
+
const text_model = await CLIPTextModelWithProjection.from_pretrained(model_id);
|
37 |
+
|
38 |
+
// Load processor and vision model
|
39 |
+
const processor = await AutoProcessor.from_pretrained(model_id);
|
40 |
+
const vision_model = await CLIPVisionModelWithProjection.from_pretrained(model_id, {
|
41 |
+
quantized: false, // NOTE: vision model is sensitive to quantization.
|
42 |
+
});
|
43 |
+
|
44 |
+
// Run tokenization
|
45 |
+
const texts = ['cats', 'dogs', 'birds'];
|
46 |
+
const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true });
|
47 |
+
|
48 |
+
// Compute text embeddings
|
49 |
+
const { text_embeds } = await text_model(text_inputs);
|
50 |
+
const normalized_text_embeds = text_embeds.normalize().tolist();
|
51 |
+
|
52 |
+
// Read image and run processor
|
53 |
+
const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
|
54 |
+
const image = await RawImage.read(url);
|
55 |
+
const image_inputs = await processor(image);
|
56 |
+
|
57 |
+
// Compute vision embeddings
|
58 |
+
const { image_embeds } = await vision_model(image_inputs);
|
59 |
+
const normalized_image_embeds = image_embeds.normalize().tolist();
|
60 |
+
|
61 |
+
// Compute probabilities
|
62 |
+
const probabilities = normalized_image_embeds.map(
|
63 |
+
x => softmax(normalized_text_embeds.map(y => 100 * dot(x, y)))
|
64 |
+
);
|
65 |
+
console.log(probabilities); // [[ 0.9989384093386391, 0.001060433633052551, 0.000001157028308360134 ]]
|
66 |
+
```
|
config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "clip"
|
3 |
+
}
|
onnx/text_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6e9bd5742bfc515889e901634d8a2ff2a57fab8564e4ad3760e800b1a51b77c
|
3 |
+
size 169807789
|
onnx/text_model_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9464281b0da079bb7e0bd65b769a96dadca7a076b93ede7faf8e14c621b2d39a
|
3 |
+
size 125655548
|
onnx/text_model_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f74b7b3abd9f3a70dcc60115f627bbadb9534606ac841400f9841f49bf980cc
|
3 |
+
size 84971030
|
onnx/text_model_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc8d87978623385c17a46331ffb9cb5ab7fe8b61c513c094602b85f08edd0a0b
|
3 |
+
size 42799230
|
onnx/text_model_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6758a2bc069c64dce6b8d35a53c828189cfb34e991f1c9426ce2ecb4fb1d5a4
|
3 |
+
size 126458232
|
onnx/text_model_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8557b10e5c23a0126c6d2e6eba48d240484979007917d128953b31618a04211
|
3 |
+
size 42799238
|
onnx/text_model_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8557b10e5c23a0126c6d2e6eba48d240484979007917d128953b31618a04211
|
3 |
+
size 42799238
|
onnx/vision_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17d3c037b1d488c10c50e09f6009ea5a198caef4e0e8f4ea5617b7cb2d067ac0
|
3 |
+
size 45543630
|
onnx/vision_model_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f30b8b45dd0b9fb0df7239b8836bcb2d49ab4a3f2d47b912b9311de0c63bda54
|
3 |
+
size 36533217
|
onnx/vision_model_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22b1d36ecc6837e8205aee05003440a25e1c1ee0c7e2945dbb9dd597211c59dc
|
3 |
+
size 22876479
|
onnx/vision_model_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a1b45f57fb9f3cde9d325759883e9451d7281336caeb9c576ae918e72080f0b
|
3 |
+
size 11846808
|
onnx/vision_model_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d122be38f8bef9cb165c63f67af2da5ddf037dc3239b9785b83f4ad4a683d6ea
|
3 |
+
size 36697020
|
onnx/vision_model_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcbd153d1aa1314fb72ea39b20c37e0572e7e7b05359b51f3efee5d682658472
|
3 |
+
size 11846843
|
onnx/vision_model_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcbd153d1aa1314fb72ea39b20c37e0572e7e7b05359b51f3efee5d682658472
|
3 |
+
size 11846843
|
preprocessor_config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 256,
|
4 |
+
"width": 256
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": false,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"feature_extractor_type": "CLIPFeatureExtractor",
|
12 |
+
"image_processor_type": "CLIPFeatureExtractor",
|
13 |
+
"resample": 2,
|
14 |
+
"rescale_factor": 0.00392156862745098,
|
15 |
+
"size": {
|
16 |
+
"shortest_edge": 256
|
17 |
+
}
|
18 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": {
|
4 |
+
"__type": "AddedToken",
|
5 |
+
"content": "<|startoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false
|
10 |
+
},
|
11 |
+
"clean_up_tokenization_spaces": true,
|
12 |
+
"do_lower_case": true,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "<|endoftext|>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"errors": "replace",
|
22 |
+
"model_max_length": 77,
|
23 |
+
"pad_token": "!",
|
24 |
+
"processor_class": "CLIPProcessor",
|
25 |
+
"tokenizer_class": "CLIPTokenizer",
|
26 |
+
"unk_token": {
|
27 |
+
"__type": "AddedToken",
|
28 |
+
"content": "<|endoftext|>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false
|
33 |
+
}
|
34 |
+
}
|