Question Answering
NeMo
Akan
biology
xiaoshi commited on
Commit
2c0a335
1 Parent(s): 24ce4dd

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +2 -0
  2. FiddlerSetup2 - 副本.exe +3 -0
  3. FiddlerSetup3.exe +3 -0
  4. model.py +306 -0
.gitattributes CHANGED
@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  torch-2.0.0+cu118-cp38-cp38-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
37
  torchvision-0.15.1+cu118-cp38-cp38-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  torch-2.0.0+cu118-cp38-cp38-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
37
  torchvision-0.15.1+cu118-cp38-cp38-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
38
+ FiddlerSetup2[[:space:]]-[[:space:]]副本.exe filter=lfs diff=lfs merge=lfs -text
39
+ FiddlerSetup3.exe filter=lfs diff=lfs merge=lfs -text
FiddlerSetup2 - 副本.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c92f0738c290eac319d4ac3006b5725f1d2163fbfe68dbb2047e07920f4d5e8
3
+ size 6840480
FiddlerSetup3.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c92f0738c290eac319d4ac3006b5725f1d2163fbfe68dbb2047e07920f4d5e8
3
+ size 6840480
model.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import json
3
+ import numpy as np
4
+ import torch
5
+ # triton_python_backend_utils is available in every Triton Python model. You
6
+ # need to use this module to create inference requests and responses. It also
7
+ # contains some utility functions for extracting information from model_config
8
+ # and converting Triton input/output types to numpy types.
9
+ import triton_python_backend_utils as pb_utils
10
+ from diffusers import (StableDiffusionXLPipeline,
11
+ AutoencoderKL,
12
+ ControlNetModel,
13
+ StableDiffusionXLImg2ImgPipeline,
14
+ StableDiffusionXLControlNetPipeline,
15
+ StableDiffusionXLControlNetImg2ImgPipeline,
16
+ StableDiffusionPipeline)
17
+
18
+ from diffusers.utils import load_image
19
+
20
+ from PIL import Image
21
+
22
+
23
+ def prepare_tpose_image(img):
24
+ tpose_img_ratio = {}
25
+ padding_color = (0, 0, 0)
26
+
27
+ # img0
28
+ padded_image = Image.new(img.mode, (1024, 768), padding_color)
29
+ img768 = img.resize((768,768))
30
+ padded_image.paste(img768, ((1024 - 768) // 2, 0))
31
+ tpose_img_ratio[0] = padded_image
32
+
33
+ # img1
34
+ img800 = img.resize((800, 800))
35
+ tpose_img_ratio[1] = img800
36
+
37
+ # img2
38
+ padded_image = Image.new(img.mode, (600, 800), padding_color)
39
+ img600 = img.resize((600, 600))
40
+ padded_image.paste(img600, (0, (800 - 600) // 2))
41
+ tpose_img_ratio[2] = padded_image
42
+
43
+ # img3
44
+ padded_image = Image.new(img.mode, (1024, 576), padding_color)
45
+ img576 = img.resize((576, 576))
46
+ padded_image.paste(img576, ((1024 - 576) // 2, 0))
47
+ tpose_img_ratio[3] = padded_image
48
+
49
+ # img4
50
+ padded_image = Image.new(img.mode, (448, 800), padding_color)
51
+ img448 = img.resize((448, 448))
52
+ padded_image.paste(img448, (0, (800 - 448) // 2))
53
+ tpose_img_ratio[4] = padded_image
54
+
55
+ # img5
56
+ padded_image = Image.new(img.mode, (1024, 680), padding_color)
57
+ img576 = img.resize((680, 680))
58
+ padded_image.paste(img576, ((1024 - 680) // 2, 0))
59
+ tpose_img_ratio[5] = padded_image
60
+
61
+ # img6
62
+ padded_image = Image.new(img.mode, (528, 800), padding_color)
63
+ img448 = img.resize((528, 528))
64
+ padded_image.paste(img448, (0, (800 - 528) // 2))
65
+ tpose_img_ratio[6] = padded_image
66
+
67
+ return tpose_img_ratio
68
+
69
+
70
+ class TritonPythonModel:
71
+ """Your Python model must use the same class name.
72
+
73
+ Every Python model that is created must have "TritonPythonModel" as the
74
+ class name.
75
+ """
76
+
77
+ def initialize(self, args):
78
+ """`initialize` is called only once when the model is being loaded.
79
+ Implementing `initialize` function is optional. This function allows
80
+ the model to initialize any state associated with this model.
81
+ Parameters
82
+ ----------
83
+ args : dict
84
+ Both keys and values are strings. The dictionary keys and values are:
85
+ * model_config: A JSON string containing the model configuration
86
+ * model_instance_kind: A string containing model instance kind
87
+ * model_instance_device_id: A string containing model instance
88
+ device ID
89
+ * model_repository: Model repository path
90
+ * model_version: Model version
91
+ * model_name: Model name
92
+ """
93
+
94
+ print(args)
95
+
96
+ # You must parse model_config. JSON string is not parsed here
97
+ self.model_config = json.loads(args['model_config'])
98
+ weight_dtype = torch.float16
99
+
100
+ # pose control
101
+ self.controlnet = ControlNetModel.from_pretrained("/nvme/shared/huggingface_hub/models/controlnet-openpose-sdxl-1.0", torch_dtype=weight_dtype)
102
+ self.controlnet = self.controlnet.to(f"cuda:{args['model_instance_device_id']}")
103
+
104
+ self.tpose_image = load_image('/nvme/liuwenran/repos/magicmaker2-image-generation/data/t-pose.jpg')
105
+
106
+ # anime style
107
+ anime_ckpt_dir = '/nvme/shared/civitai_models/ckpts/models--gsdf--CounterfeitXL/snapshots/4708675873bd09833aabc3fd4cb2de5fcd1726ac'
108
+ self.pipeline_anime = StableDiffusionXLPipeline.from_pretrained(
109
+ anime_ckpt_dir, torch_dtype=weight_dtype
110
+ )
111
+ self.pipeline_anime = self.pipeline_anime.to(f"cuda:{args['model_instance_device_id']}")
112
+
113
+ # realistic style
114
+ realistic_ckpt_dir = '/nvme/shared/civitai_models/ckpt_save_pretrained/copaxTimelessxlSDXL1_v8'
115
+ self.pipeline_realistic = StableDiffusionXLPipeline.from_pretrained(
116
+ realistic_ckpt_dir, torch_dtype=weight_dtype
117
+ )
118
+ self.pipeline_realistic = self.pipeline_realistic.to(f"cuda:{args['model_instance_device_id']}")
119
+
120
+ # dim3 for oil painting style and sketch
121
+ dim3_ckpt_dir = '/nvme/shared/civitai_models/ckpt_save_pretrained/protovisionXLHighFidelity3D_release0630Bakedvae'
122
+ self.pipeline_oil_painting = StableDiffusionXLPipeline.from_pretrained(
123
+ dim3_ckpt_dir, torch_dtype=weight_dtype
124
+ )
125
+ oil_painting_lora_dir = '/nvme/shared/civitai_models/loras/ClassipeintXL1.9.safetensors'
126
+ self.pipeline_oil_painting.load_lora_weights(oil_painting_lora_dir)
127
+ self.pipeline_oil_painting = self.pipeline_oil_painting.to(f"cuda:{args['model_instance_device_id']}")
128
+
129
+ # sd xl base
130
+ # pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
131
+ pretrained_model_name_or_path = '/nvme/shared/huggingface_hub/huggingface/hub/models--stabilityai--stable-diffusion-xl-base-1.0/snapshots/76d28af79639c28a79fa5c6c6468febd3490a37e'
132
+ # vae_path = "madebyollin/sdxl-vae-fp16-fix"
133
+ vae_path = '/nvme/shared/huggingface_hub/huggingface/hub/models--madebyollin--sdxl-vae-fp16-fix/snapshots/4df413ca49271c25289a6482ab97a433f8117d15'
134
+ vae = AutoencoderKL.from_pretrained(
135
+ vae_path,
136
+ torch_dtype=weight_dtype,
137
+ )
138
+
139
+ # guofeng style
140
+ guofeng_lora_dir = '/nvme/shared/civitai_models/loras/minimalism.safetensors'
141
+ self.pipeline_guofeng = StableDiffusionXLPipeline.from_pretrained(
142
+ pretrained_model_name_or_path, vae=vae, torch_dtype=weight_dtype
143
+ )
144
+ self.pipeline_guofeng.load_lora_weights(guofeng_lora_dir)
145
+ self.pipeline_guofeng = self.pipeline_guofeng.to(f"cuda:{args['model_instance_device_id']}")
146
+
147
+ # manghe style
148
+ manghe_lora_dir = '/nvme/shared/civitai_models/loras/mengwa.safetensors'
149
+ self.pipeline_manghe = StableDiffusionXLPipeline.from_pretrained(
150
+ pretrained_model_name_or_path, vae=vae, torch_dtype=weight_dtype
151
+ )
152
+ self.pipeline_manghe.load_lora_weights(manghe_lora_dir)
153
+ self.pipeline_manghe = self.pipeline_manghe.to(f"cuda:{args['model_instance_device_id']}")
154
+
155
+ self.ratio_dict = {
156
+ 0: (1024, 768),
157
+ 1: (800, 800),
158
+ 2: (600, 800),
159
+ 3: (1024, 576),
160
+ 4: (448, 800),
161
+ 5: (1024, 680),
162
+ 6: (528, 800)
163
+ }
164
+
165
+ self.tpose_image_ratio = prepare_tpose_image(self.tpose_image)
166
+
167
+ sd15_dir = '/nvme/shared/stable-diffusion-v1-5'
168
+ self.sd15 = StableDiffusionPipeline.from_pretrained(sd15_dir)
169
+ self.sd15 = self.sd15.to(f"cuda:{args['model_instance_device_id']}")
170
+
171
+
172
+ def execute(self, requests):
173
+ """`execute` must be implemented in every Python model. `execute`
174
+ function receives a list of pb_utils.InferenceRequest as the only
175
+ argument. This function is called when an inference is requested
176
+ for this model. Depending on the batching configuration (e.g. Dynamic
177
+ Batching) used, `requests` may contain multiple requests. Every
178
+ Python model, must create one pb_utils.InferenceResponse for every
179
+ pb_utils.InferenceRequest in `requests`. If there is an error, you can
180
+ set the error argument when creating a pb_utils.InferenceResponse.
181
+ Parameters
182
+ ----------
183
+ requests : list
184
+ A list of pb_utils.InferenceRequest
185
+ Returns
186
+ -------
187
+ list
188
+ A list of pb_utils.InferenceResponse. The length of this list must
189
+ be the same as `requests`
190
+ """
191
+
192
+ responses = []
193
+
194
+ # Every Python backend must iterate over everyone of the requests
195
+ # and create a pb_utils.InferenceResponse for each of them.
196
+ for request in requests:
197
+ # Get INPUT
198
+
199
+ prompt = pb_utils.get_input_tensor_by_name(request, 'PROMPT').as_numpy()
200
+ prompt = prompt.item().decode('utf-8')
201
+
202
+ style = pb_utils.get_input_tensor_by_name(request,'STYLE').as_numpy()
203
+ style = style.item().decode('utf-8')
204
+
205
+ ref_img = pb_utils.get_input_tensor_by_name(request,'REFIMAGE').as_numpy()
206
+ tpose = pb_utils.get_input_tensor_by_name(request,'TPOSE').as_numpy()
207
+ ratio = pb_utils.get_input_tensor_by_name(request,'RATIO').as_numpy()
208
+
209
+ print(f"prompt:{prompt} style:{style} ref_img:{ref_img.shape} tpose:{tpose} ratio:{ratio}")
210
+
211
+ tpose = tpose[0]
212
+ pipeline_infer = self.pipeline_anime
213
+ # load lora
214
+ if style == 'manghe':
215
+ pipeline_infer = self.pipeline_manghe
216
+ prompt = 'chibi,' + prompt
217
+ elif style == 'guofeng':
218
+ pipeline_infer = self.pipeline_guofeng
219
+ prompt = 'minimalist style, Flat illustration, Chinese style,' + prompt
220
+ elif style == 'xieshi':
221
+ pipeline_infer = self.pipeline_realistic
222
+ elif style == 'youhua':
223
+ pipeline_infer = self.pipeline_oil_painting
224
+ prompt = 'oil painting,' + prompt
225
+ elif style == 'chahua':
226
+ pipeline_infer = self.pipeline_realistic
227
+ prompt = 'sketch, sketch painting,' + prompt
228
+
229
+ prompt_to_append = ', best quality, extremely detailed, perfect, 8k, masterpeice'
230
+ prompt = prompt + prompt_to_append
231
+
232
+ negative_prompt = 'nude'
233
+ # use img2img pipeline to infer ref img
234
+ if ref_img.shape != (1,1,3):
235
+ if tpose:
236
+ pipeline_infer = StableDiffusionXLControlNetImg2ImgPipeline(pipeline_infer.vae, pipeline_infer.text_encoder, pipeline_infer.text_encoder_2,
237
+ pipeline_infer.tokenizer, pipeline_infer.tokenizer_2, pipeline_infer.unet, self.controlnet, pipeline_infer.scheduler)
238
+ else:
239
+ pipeline_infer = StableDiffusionXLImg2ImgPipeline(pipeline_infer.vae, pipeline_infer.text_encoder, pipeline_infer.text_encoder_2,
240
+ pipeline_infer.tokenizer, pipeline_infer.tokenizer_2, pipeline_infer.unet, pipeline_infer.scheduler)
241
+ else:
242
+ if tpose:
243
+ pipeline_infer = StableDiffusionXLControlNetPipeline(pipeline_infer.vae, pipeline_infer.text_encoder, pipeline_infer.text_encoder_2,
244
+ pipeline_infer.tokenizer, pipeline_infer.tokenizer_2, pipeline_infer.unet, self.controlnet, pipeline_infer.scheduler)
245
+ else:
246
+ pipeline_infer = StableDiffusionXLPipeline(pipeline_infer.vae, pipeline_infer.text_encoder, pipeline_infer.text_encoder_2,
247
+ pipeline_infer.tokenizer, pipeline_infer.tokenizer_2, pipeline_infer.unet, pipeline_infer.scheduler)
248
+
249
+ ratio_type = ratio[0]
250
+ width, height = self.ratio_dict[ratio_type]
251
+
252
+ controlnet_conditioning_scale = 1.0
253
+
254
+ if ref_img.shape != (1, 1, 3):
255
+ init_image = Image.fromarray(ref_img)
256
+ if tpose:
257
+ image = pipeline_infer(prompt, negative_prompt=negative_prompt, controlnet_conditioning_scale=controlnet_conditioning_scale,
258
+ image=init_image.resize((width, height)),
259
+ control_image=self.tpose_image_ratio[ratio_type], strength=0.5).images[0]
260
+ else:
261
+ image = pipeline_infer(prompt, negative_prompt=negative_prompt, image=init_image, width=width, height=height, strength=0.5).images[0]
262
+
263
+ else:
264
+ if tpose:
265
+ image = pipeline_infer(prompt, negative_prompt=negative_prompt, controlnet_conditioning_scale=controlnet_conditioning_scale,
266
+ image=self.tpose_image_ratio[ratio_type]).images[0]
267
+ else:
268
+ image = pipeline_infer(prompt, negative_prompt=negative_prompt, num_inference_steps=25, width=width, height=height).images[0]
269
+
270
+ image_np = np.array(image).astype(np.float32) / 255.0
271
+ image_pt = torch.from_numpy(image_np.transpose(2, 0, 1)).unsqueeze(0)
272
+ image_pt = image_pt.to('cuda')
273
+ check_res, nsfw = self.sd15.run_safety_checker(image_pt, 'cuda', torch.float32)
274
+ if nsfw[0]:
275
+ image = Image.new("RGB", image.size, (0, 0, 0))
276
+
277
+ image = np.array(image).astype(np.uint8)
278
+ print(f"final result: {image.shape}, [{np.min(image)}-{np.max(image)}]")
279
+
280
+ # Create output tensors. You need pb_utils.Tensor
281
+ # objects to create pb_utils.InferenceResponse.
282
+ out_tensor = pb_utils.Tensor('OUTPUT', image)
283
+
284
+ # Create InferenceResponse. You can set an error here in case
285
+ # there was a problem with handling this inference request.
286
+ # Below is an example of how you can set errors in inference
287
+ # response:
288
+ #
289
+ # pb_utils.InferenceResponse(
290
+ # output_tensors=..., TritonError("An error occurred"))
291
+ inference_response = pb_utils.InferenceResponse(
292
+ output_tensors=[out_tensor])
293
+ responses.append(inference_response)
294
+
295
+
296
+ # You should return a list of pb_utils.InferenceResponse. Length
297
+ # of this list must match the length of `requests` list.
298
+ return responses
299
+
300
+ def finalize(self):
301
+ """`finalize` is called only once when the model is being unloaded.
302
+
303
+ Implementing `finalize` function is optional. This function allows the
304
+ model to perform any necessary clean ups before exit.
305
+ """
306
+ print('Cleaning up...')