JasonGilholme commited on
Commit
0feb70d
·
1 Parent(s): a372e74

temporalvideo usage changes

Browse files

Hi,

This is an updated version of the temporalvideo.py file that does the following things:

* Add argparser for easier basic usage
* Add dynamic lookup of controlnet models in case the names or commit ids of available models are different
* Ensure all image inputs are scaled to the same resolution - the optical flow maps were hard coded to 512x512 and would produce images that didn't line up with non square input images.
* Write out controlnet images to help debug resolution related errors. It's also interesting to see the output of the preprocessors for hed and openpose.

Hopefully you find this helpful. Thanks so much for your work on this topic, it's pretty awesome!

Jase

Files changed (1) hide show
  1. temporalvideo.py +98 -35
temporalvideo.py CHANGED
@@ -4,6 +4,7 @@ import requests
4
  import json
5
  import cv2
6
  import numpy as np
 
7
  import sys
8
  import torch
9
  from PIL import Image
@@ -20,6 +21,27 @@ import cv2
20
  from torchvision.io import write_jpeg
21
  import pickle
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
25
 
@@ -27,11 +49,9 @@ model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device
27
  model = model.eval()
28
 
29
  # Replace with the actual path to your image file and folder
30
- x_path = "./init.png"
31
- y_folder = "./Input_Images"
32
 
33
- output_folder = "output"
34
- os.makedirs(output_folder, exist_ok=True)
35
 
36
  def get_image_paths(folder):
37
  image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
@@ -40,7 +60,46 @@ def get_image_paths(folder):
40
  files.extend(glob.glob(os.path.join(folder, ext)))
41
  return sorted(files)
42
 
43
- y_paths = get_image_paths(y_folder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def send_request(last_image_path, optical_flow_path,current_image_path):
46
  url = "http://localhost:7860/sdapi/v1/img2img"
@@ -51,7 +110,6 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
51
  # Load and process the last image
52
  last_image = cv2.imread(last_image_path)
53
  last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
54
- last_image = cv2.resize(last_image, (512, 512))
55
 
56
  # Load and process the optical flow image
57
  flow_image = cv2.imread(optical_flow_path)
@@ -79,31 +137,39 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
79
  "inpainting_mask_invert": 1,
80
  "resize_mode": 0,
81
  "denoising_strength": 0.4,
82
- "prompt": "1girl, woman",
83
- "negative_prompt": "",
84
  "alwayson_scripts": {
85
  "ControlNet":{
86
  "args": [
87
  {
88
  "input_image": current_image,
89
  "module": "hed",
90
- "model": "control_hed-fp16 [13fee50b]",
91
  "weight": 0.7,
92
  "guidance": 1,
 
 
93
  },
94
  {
95
  "input_image": encoded_image,
96
- "model": "temporalnetversion2 [b146ac48]",
97
  "module": "none",
98
  "weight": 0.6,
99
  "guidance": 1,
 
 
 
 
100
  },
101
  {
102
  "input_image": current_image,
103
- "model": "control_v11p_sd15_openpose [cab727d4]",
104
  "module": "openpose_full",
105
  "weight": 0.7,
106
- "guidance":1,
 
 
107
  }
108
 
109
 
@@ -118,8 +184,8 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
118
  "n_iter": 1,
119
  "steps": 20,
120
  "cfg_scale": 6,
121
- "width": 512,
122
- "height": 512,
123
  "restore_faces": True,
124
  "include_init_images": True,
125
  "override_settings": {},
@@ -164,25 +230,18 @@ def infer(frameA, frameB):
164
  img2_batch = F.resize(img2_batch, size=[512, 512])
165
  return transforms(img1_batch, img2_batch)
166
 
167
-
168
  img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
169
 
170
-
171
  list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
172
 
173
- predicted_flows = list_of_flows[-1]
174
-
175
 
176
- #flow_imgs = flow_to_image(predicted_flows)
 
177
 
178
- #print(flow_imgs)
179
 
180
- predicted_flow = list_of_flows[-1][0]
181
- opitcal_flow_path = os.path.join(output_folder, f"flow_{i}.png")
182
- flow_img = flow_to_image(predicted_flow).to("cpu")
183
- write_jpeg(flow_img,opitcal_flow_path)
184
-
185
-
186
  return opitcal_flow_path
187
 
188
  output_images = []
@@ -190,13 +249,13 @@ output_paths = []
190
 
191
  # Initialize with the first image path
192
 
193
- result = x_path
194
- output_image_path = os.path.join(output_folder, f"output_image_0.png")
195
 
196
  #with open(output_image_path, "wb") as f:
197
  # f.write(result)
198
 
199
- last_image_path = x_path
200
  for i in range(1, len(y_paths)):
201
  # Use the last image path and optical flow map to generate the next input
202
  optical_flow = infer(y_paths[i - 1], y_paths[i])
@@ -204,10 +263,14 @@ for i in range(1, len(y_paths)):
204
  # Modify your send_request to use the last_image_path
205
  result = send_request(last_image_path, optical_flow, y_paths[i])
206
  data = json.loads(result)
207
- encoded_image = data["images"][0]
208
- output_image_path = os.path.join(output_folder, f"output_image_{i}.png")
209
- last_image_path = output_image_path
210
- with open(output_image_path, "wb") as f:
211
- f.write(base64.b64decode(encoded_image))
212
- print(f"Written data for frame {i}:")
213
 
 
 
 
 
 
 
 
 
 
 
 
4
  import json
5
  import cv2
6
  import numpy as np
7
+ import re
8
  import sys
9
  import torch
10
  from PIL import Image
 
21
  from torchvision.io import write_jpeg
22
  import pickle
23
 
24
+ import argparse
25
+
26
+
27
+ def get_args():
28
+ parser = argparse.ArgumentParser()
29
+
30
+ parser.add_argument('prompt')
31
+ parser.add_argument('--negative-prompt', dest='negative_prompt', default="")
32
+
33
+ parser.add_argument('--init-image', dest='init_image', default="./init.png")
34
+ parser.add_argument('--input-dir', dest='input_dir', default="./Input_Images")
35
+ parser.add_argument('--output-dir', dest='output_dir', default="./output")
36
+
37
+ parser.add_argument('--width', default=512, type=int)
38
+ parser.add_argument('--height', default=512, type=int)
39
+
40
+ return parser.parse_args()
41
+
42
+
43
+ args = get_args()
44
+
45
 
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
 
 
49
  model = model.eval()
50
 
51
  # Replace with the actual path to your image file and folder
 
 
52
 
53
+ os.makedirs(args.output_dir, exist_ok=True)
54
+
55
 
56
  def get_image_paths(folder):
57
  image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
 
60
  files.extend(glob.glob(os.path.join(folder, ext)))
61
  return sorted(files)
62
 
63
+
64
+ y_paths = get_image_paths(args.input_dir)
65
+
66
+
67
+ def get_controlnet_models():
68
+ url = "http://localhost:7860/controlnet/model_list"
69
+
70
+ temporalnet_model = None
71
+ temporalnet_re = re.compile("^temporalnetversion2 \[.{8}\]")
72
+
73
+ hed_model = None
74
+ hed_re = re.compile("^control_.*hed.* \[.{8}\]")
75
+
76
+ openpose_model = None
77
+ openpose_re = re.compile("^control_.*openpose.* \[.{8}\]")
78
+
79
+ response = requests.get(url)
80
+ if response.status_code == 200:
81
+ models = json.loads(response.content)
82
+ else:
83
+ raise Exception("Unable to list models from the SD Web API! "
84
+ "Is it running and is the controlnet extension installed?")
85
+
86
+ for model in models['model_list']:
87
+ if temporalnet_model is None and temporalnet_re.match(model):
88
+ temporalnet_model = model
89
+ elif hed_model is None and hed_re.match(model):
90
+ hed_model = model
91
+ elif openpose_model is None and openpose_re.match(model):
92
+ openpose_model = model
93
+
94
+ assert temporalnet_model is not None, "Unable to find the temporalnet2 model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
95
+ assert hed_model is not None, "Unable to find the hed_model model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
96
+ assert openpose_model is not None, "Unable to find the openpose model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
97
+
98
+ return temporalnet_model, hed_model, openpose_model
99
+
100
+
101
+ TEMPORALNET_MODEL, HED_MODEL, OPENPOSE_MODEL = get_controlnet_models()
102
+
103
 
104
  def send_request(last_image_path, optical_flow_path,current_image_path):
105
  url = "http://localhost:7860/sdapi/v1/img2img"
 
110
  # Load and process the last image
111
  last_image = cv2.imread(last_image_path)
112
  last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
 
113
 
114
  # Load and process the optical flow image
115
  flow_image = cv2.imread(optical_flow_path)
 
137
  "inpainting_mask_invert": 1,
138
  "resize_mode": 0,
139
  "denoising_strength": 0.4,
140
+ "prompt": args.prompt,
141
+ "negative_prompt": args.negative_prompt,
142
  "alwayson_scripts": {
143
  "ControlNet":{
144
  "args": [
145
  {
146
  "input_image": current_image,
147
  "module": "hed",
148
+ "model": HED_MODEL,
149
  "weight": 0.7,
150
  "guidance": 1,
151
+ "pixel_perfect": True,
152
+ "resize_mode": 0,
153
  },
154
  {
155
  "input_image": encoded_image,
156
+ "model": TEMPORALNET_MODEL,
157
  "module": "none",
158
  "weight": 0.6,
159
  "guidance": 1,
160
+ # "processor_res": 512,
161
+ "threshold_a": 64,
162
+ "threshold_b": 64,
163
+ "resize_mode": 0,
164
  },
165
  {
166
  "input_image": current_image,
167
+ "model": OPENPOSE_MODEL,
168
  "module": "openpose_full",
169
  "weight": 0.7,
170
+ "guidance": 1,
171
+ "pixel_perfect": True,
172
+ "resize_mode": 0,
173
  }
174
 
175
 
 
184
  "n_iter": 1,
185
  "steps": 20,
186
  "cfg_scale": 6,
187
+ "width": args.width,
188
+ "height": args.height,
189
  "restore_faces": True,
190
  "include_init_images": True,
191
  "override_settings": {},
 
230
  img2_batch = F.resize(img2_batch, size=[512, 512])
231
  return transforms(img1_batch, img2_batch)
232
 
 
233
  img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
234
 
 
235
  list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
236
 
237
+ predicted_flow = list_of_flows[-1][0]
238
+ opitcal_flow_path = os.path.join(args.output_dir, f"flow_{i}.png")
239
 
240
+ flow_img = flow_to_image(predicted_flow).to("cpu")
241
+ flow_img = F.resize(flow_img, size=[args.height, args.width])
242
 
243
+ write_jpeg(flow_img, opitcal_flow_path)
244
 
 
 
 
 
 
 
245
  return opitcal_flow_path
246
 
247
  output_images = []
 
249
 
250
  # Initialize with the first image path
251
 
252
+ result = args.init_image
253
+ output_image_path = os.path.join(args.output_dir, f"output_image_0.png")
254
 
255
  #with open(output_image_path, "wb") as f:
256
  # f.write(result)
257
 
258
+ last_image_path = args.init_image
259
  for i in range(1, len(y_paths)):
260
  # Use the last image path and optical flow map to generate the next input
261
  optical_flow = infer(y_paths[i - 1], y_paths[i])
 
263
  # Modify your send_request to use the last_image_path
264
  result = send_request(last_image_path, optical_flow, y_paths[i])
265
  data = json.loads(result)
 
 
 
 
 
 
266
 
267
+ for j, encoded_image in enumerate(data["images"]):
268
+ if j == 0:
269
+ output_image_path = os.path.join(args.output_dir, f"output_image_{i}.png")
270
+ last_image_path = output_image_path
271
+ else:
272
+ output_image_path = os.path.join(args.output_dir, f"controlnet_image_{j}_{i}.png")
273
+
274
+ with open(output_image_path, "wb") as f:
275
+ f.write(base64.b64decode(encoded_image))
276
+ print(f"Written data for frame {i}:")