commit files to HF hub
Browse files- README.md +62 -0
- feature_extractor/preprocessor_config.json +28 -0
- inference.py +18 -0
- model_index.json +33 -0
- scheduler/scheduler_config.json +14 -0
- text_encoder/openvino_model.bin +3 -0
- text_encoder/openvino_model.xml +0 -0
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +24 -0
- tokenizer/tokenizer_config.json +34 -0
- tokenizer/vocab.json +0 -0
- unet/openvino_model.bin +3 -0
- unet/openvino_model.xml +0 -0
- vae_decoder/openvino_model.bin +3 -0
- vae_decoder/openvino_model.xml +0 -0
README.md
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: openrail++
|
3 |
+
tags:
|
4 |
+
- stable-diffusion
|
5 |
+
- text-to-image
|
6 |
+
- openvino
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
# OpenVINO Stable Diffusion
|
11 |
+
|
12 |
+
## stabilityai/stable-diffusion-2-1
|
13 |
+
|
14 |
+
This repository contains the models from [stabilityai/stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1) converted to
|
15 |
+
OpenVINO, for accelerated inference on CPU or Intel GPU with OpenVINO's integration into Optimum:
|
16 |
+
[optimum-intel](https://github.com/huggingface/optimum-intel#openvino). The model weights are stored with FP16
|
17 |
+
precision, which reduces the size of the model by half.
|
18 |
+
|
19 |
+
Please check out the [source model repository](https://huggingface.co/stabilityai/stable-diffusion-2-1) for more information about the model and its license.
|
20 |
+
|
21 |
+
To install the requirements for this demo, do `pip install optimum[openvino]`. This installs all the necessary dependencies,
|
22 |
+
including Transformers and OpenVINO. For more detailed steps, please see this [installation guide](https://github.com/helena-intel/optimum-intel/wiki/OpenVINO-Integration-Installation-Guide).
|
23 |
+
|
24 |
+
The simplest way to generate an image with stable diffusion takes only two lines of code, as shown below. The first line downloads the
|
25 |
+
model from the Hugging Face hub (if it has not been downloaded before) and loads it; the second line generates an image.
|
26 |
+
|
27 |
+
```python
|
28 |
+
from optimum.intel.openvino import OVStableDiffusionPipeline
|
29 |
+
|
30 |
+
stable_diffusion = OVStableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
|
31 |
+
images = stable_diffusion("a random image").images
|
32 |
+
```
|
33 |
+
|
34 |
+
The following example code uses static shapes for even faster inference. Using larger image sizes will
|
35 |
+
require more memory and take longer to generate.
|
36 |
+
|
37 |
+
If you have an 11th generation or later Intel Core processor, you can use the integrated GPU for inference, and if you have an Intel
|
38 |
+
discrete GPU, you can use that. Add the line `stable_diffusion.to("GPU")` before `stable_diffusion.compile()` in the example below.
|
39 |
+
Model loading will take some time the first time, but will be faster after that, because the model will be cached. On GPU, for stable
|
40 |
+
diffusion only static shapes are supported at the moment.
|
41 |
+
|
42 |
+
|
43 |
+
```python
|
44 |
+
from optimum.intel.openvino.modeling_diffusion import OVStableDiffusionPipeline
|
45 |
+
|
46 |
+
batch_size = 1
|
47 |
+
num_images_per_prompt = 1
|
48 |
+
height = 256
|
49 |
+
width = 256
|
50 |
+
|
51 |
+
# load the model and reshape to static shapes for faster inference
|
52 |
+
model_id = "stabilityai/stable-diffusion-2-1"
|
53 |
+
stable_diffusion = OVStableDiffusionPipeline.from_pretrained(model_id, compile=False)
|
54 |
+
stable_diffusion.reshape( batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
|
55 |
+
stable_diffusion.compile()
|
56 |
+
|
57 |
+
# generate image!
|
58 |
+
prompt = "a random image"
|
59 |
+
images = stable_diffusion(prompt, height=height, width=width, num_images_per_prompt=num_images_per_prompt).images
|
60 |
+
images[0].save("result.png")
|
61 |
+
```
|
62 |
+
|
feature_extractor/preprocessor_config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 224,
|
4 |
+
"width": 224
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"feature_extractor_type": "CLIPFeatureExtractor",
|
12 |
+
"image_mean": [
|
13 |
+
0.48145466,
|
14 |
+
0.4578275,
|
15 |
+
0.40821073
|
16 |
+
],
|
17 |
+
"image_processor_type": "CLIPFeatureExtractor",
|
18 |
+
"image_std": [
|
19 |
+
0.26862954,
|
20 |
+
0.26130258,
|
21 |
+
0.27577711
|
22 |
+
],
|
23 |
+
"resample": 3,
|
24 |
+
"rescale_factor": 0.00392156862745098,
|
25 |
+
"size": {
|
26 |
+
"shortest_edge": 224
|
27 |
+
}
|
28 |
+
}
|
inference.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from optimum.intel.openvino.modeling_diffusion import OVStableDiffusionPipeline
|
2 |
+
|
3 |
+
batch_size = 1
|
4 |
+
num_images_per_prompt = 1
|
5 |
+
height = 256
|
6 |
+
width = 256
|
7 |
+
|
8 |
+
# load the model and reshape to static shapes for faster inference
|
9 |
+
model_id = "helenai/stabilityai-stable-diffusion-2-1-ov"
|
10 |
+
stable_diffusion = OVStableDiffusionPipeline.from_pretrained(model_id, compile=False)
|
11 |
+
stable_diffusion.reshape( batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
|
12 |
+
stable_diffusion.compile()
|
13 |
+
|
14 |
+
# generate image!
|
15 |
+
prompt = "a random image"
|
16 |
+
images = stable_diffusion(prompt, height=height, width=width, num_images_per_prompt=num_images_per_prompt).images
|
17 |
+
images[0].save("result.png")
|
18 |
+
|
model_index.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "OVStableDiffusionPipeline",
|
3 |
+
"_diffusers_version": "0.13.1",
|
4 |
+
"feature_extractor": [
|
5 |
+
"transformers",
|
6 |
+
"CLIPFeatureExtractor"
|
7 |
+
],
|
8 |
+
"requires_safety_checker": false,
|
9 |
+
"safety_checker": [
|
10 |
+
null,
|
11 |
+
null
|
12 |
+
],
|
13 |
+
"scheduler": [
|
14 |
+
"diffusers",
|
15 |
+
"DDIMScheduler"
|
16 |
+
],
|
17 |
+
"text_encoder": [
|
18 |
+
"optimum",
|
19 |
+
"OVModelTextEncoder"
|
20 |
+
],
|
21 |
+
"tokenizer": [
|
22 |
+
"transformers",
|
23 |
+
"CLIPTokenizer"
|
24 |
+
],
|
25 |
+
"unet": [
|
26 |
+
"optimum",
|
27 |
+
"OVModelUnet"
|
28 |
+
],
|
29 |
+
"vae_decoder": [
|
30 |
+
"optimum",
|
31 |
+
"OVModelVaeDecoder"
|
32 |
+
]
|
33 |
+
}
|
scheduler/scheduler_config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "DDIMScheduler",
|
3 |
+
"_diffusers_version": "0.13.1",
|
4 |
+
"beta_end": 0.012,
|
5 |
+
"beta_schedule": "scaled_linear",
|
6 |
+
"beta_start": 0.00085,
|
7 |
+
"clip_sample": false,
|
8 |
+
"num_train_timesteps": 1000,
|
9 |
+
"prediction_type": "v_prediction",
|
10 |
+
"set_alpha_to_one": false,
|
11 |
+
"skip_prk_steps": true,
|
12 |
+
"steps_offset": 1,
|
13 |
+
"trained_betas": null
|
14 |
+
}
|
text_encoder/openvino_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3d5702e9aefe3f9426713201632b99005f17d77cf6f1bbfb54525a473206ffa
|
3 |
+
size 680776428
|
text_encoder/openvino_model.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|startoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "!",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": true,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer/tokenizer_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": {
|
4 |
+
"__type": "AddedToken",
|
5 |
+
"content": "<|startoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false
|
10 |
+
},
|
11 |
+
"do_lower_case": true,
|
12 |
+
"eos_token": {
|
13 |
+
"__type": "AddedToken",
|
14 |
+
"content": "<|endoftext|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false
|
19 |
+
},
|
20 |
+
"errors": "replace",
|
21 |
+
"model_max_length": 77,
|
22 |
+
"name_or_path": "models/stabilityai-stable-diffusion-2-1-ov/tokenizer",
|
23 |
+
"pad_token": "<|endoftext|>",
|
24 |
+
"special_tokens_map_file": "./special_tokens_map.json",
|
25 |
+
"tokenizer_class": "CLIPTokenizer",
|
26 |
+
"unk_token": {
|
27 |
+
"__type": "AddedToken",
|
28 |
+
"content": "<|endoftext|>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false
|
33 |
+
}
|
34 |
+
}
|
tokenizer/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
unet/openvino_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d753b9848294357fe4d0b0d818ac220a3a763b291434cd6c0fc9100c77774f7b
|
3 |
+
size 1731822168
|
unet/openvino_model.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
vae_decoder/openvino_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bea41feef073b8e247dceeb0ec95d6e523a3c5acc1310882e22b6245509390d9
|
3 |
+
size 98980700
|
vae_decoder/openvino_model.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|