Spaces:
Running
Running
File size: 10,958 Bytes
21d588d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
# to be used with https://github.com/a1lazydog/ComfyUI-AudioScheduler
import torch
from torchvision.transforms import functional as TF
from PIL import Image, ImageDraw
import numpy as np
from ..utility.utility import pil2tensor
from nodes import MAX_RESOLUTION
class NormalizedAmplitudeToMask:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"normalized_amp": ("NORMALIZED_AMPLITUDE",),
"width": ("INT", {"default": 512,"min": 16, "max": 4096, "step": 1}),
"height": ("INT", {"default": 512,"min": 16, "max": 4096, "step": 1}),
"frame_offset": ("INT", {"default": 0,"min": -255, "max": 255, "step": 1}),
"location_x": ("INT", {"default": 256,"min": 0, "max": 4096, "step": 1}),
"location_y": ("INT", {"default": 256,"min": 0, "max": 4096, "step": 1}),
"size": ("INT", {"default": 128,"min": 8, "max": 4096, "step": 1}),
"shape": (
[
'none',
'circle',
'square',
'triangle',
],
{
"default": 'none'
}),
"color": (
[
'white',
'amplitude',
],
{
"default": 'amplitude'
}),
},}
CATEGORY = "KJNodes/audio"
RETURN_TYPES = ("MASK",)
FUNCTION = "convert"
DESCRIPTION = """
Works as a bridge to the AudioScheduler -nodes:
https://github.com/a1lazydog/ComfyUI-AudioScheduler
Creates masks based on the normalized amplitude.
"""
def convert(self, normalized_amp, width, height, frame_offset, shape, location_x, location_y, size, color):
# Ensure normalized_amp is an array and within the range [0, 1]
normalized_amp = np.clip(normalized_amp, 0.0, 1.0)
# Offset the amplitude values by rolling the array
normalized_amp = np.roll(normalized_amp, frame_offset)
# Initialize an empty list to hold the image tensors
out = []
# Iterate over each amplitude value to create an image
for amp in normalized_amp:
# Scale the amplitude value to cover the full range of grayscale values
if color == 'amplitude':
grayscale_value = int(amp * 255)
elif color == 'white':
grayscale_value = 255
# Convert the grayscale value to an RGB format
gray_color = (grayscale_value, grayscale_value, grayscale_value)
finalsize = size * amp
if shape == 'none':
shapeimage = Image.new("RGB", (width, height), gray_color)
else:
shapeimage = Image.new("RGB", (width, height), "black")
draw = ImageDraw.Draw(shapeimage)
if shape == 'circle' or shape == 'square':
# Define the bounding box for the shape
left_up_point = (location_x - finalsize, location_y - finalsize)
right_down_point = (location_x + finalsize,location_y + finalsize)
two_points = [left_up_point, right_down_point]
if shape == 'circle':
draw.ellipse(two_points, fill=gray_color)
elif shape == 'square':
draw.rectangle(two_points, fill=gray_color)
elif shape == 'triangle':
# Define the points for the triangle
left_up_point = (location_x - finalsize, location_y + finalsize) # bottom left
right_down_point = (location_x + finalsize, location_y + finalsize) # bottom right
top_point = (location_x, location_y) # top point
draw.polygon([top_point, left_up_point, right_down_point], fill=gray_color)
shapeimage = pil2tensor(shapeimage)
mask = shapeimage[:, :, :, 0]
out.append(mask)
return (torch.cat(out, dim=0),)
class NormalizedAmplitudeToFloatList:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"normalized_amp": ("NORMALIZED_AMPLITUDE",),
},}
CATEGORY = "KJNodes/audio"
RETURN_TYPES = ("FLOAT",)
FUNCTION = "convert"
DESCRIPTION = """
Works as a bridge to the AudioScheduler -nodes:
https://github.com/a1lazydog/ComfyUI-AudioScheduler
Creates a list of floats from the normalized amplitude.
"""
def convert(self, normalized_amp):
# Ensure normalized_amp is an array and within the range [0, 1]
normalized_amp = np.clip(normalized_amp, 0.0, 1.0)
return (normalized_amp.tolist(),)
class OffsetMaskByNormalizedAmplitude:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"normalized_amp": ("NORMALIZED_AMPLITUDE",),
"mask": ("MASK",),
"x": ("INT", { "default": 0, "min": -4096, "max": MAX_RESOLUTION, "step": 1, "display": "number" }),
"y": ("INT", { "default": 0, "min": -4096, "max": MAX_RESOLUTION, "step": 1, "display": "number" }),
"rotate": ("BOOLEAN", { "default": False }),
"angle_multiplier": ("FLOAT", { "default": 0.0, "min": -1.0, "max": 1.0, "step": 0.001, "display": "number" }),
}
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("mask",)
FUNCTION = "offset"
CATEGORY = "KJNodes/audio"
DESCRIPTION = """
Works as a bridge to the AudioScheduler -nodes:
https://github.com/a1lazydog/ComfyUI-AudioScheduler
Offsets masks based on the normalized amplitude.
"""
def offset(self, mask, x, y, angle_multiplier, rotate, normalized_amp):
# Ensure normalized_amp is an array and within the range [0, 1]
offsetmask = mask.clone()
normalized_amp = np.clip(normalized_amp, 0.0, 1.0)
batch_size, height, width = mask.shape
if rotate:
for i in range(batch_size):
rotation_amp = int(normalized_amp[i] * (360 * angle_multiplier))
rotation_angle = rotation_amp
offsetmask[i] = TF.rotate(offsetmask[i].unsqueeze(0), rotation_angle).squeeze(0)
if x != 0 or y != 0:
for i in range(batch_size):
offset_amp = normalized_amp[i] * 10
shift_x = min(x*offset_amp, width-1)
shift_y = min(y*offset_amp, height-1)
if shift_x != 0:
offsetmask[i] = torch.roll(offsetmask[i], shifts=int(shift_x), dims=1)
if shift_y != 0:
offsetmask[i] = torch.roll(offsetmask[i], shifts=int(shift_y), dims=0)
return offsetmask,
class ImageTransformByNormalizedAmplitude:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"normalized_amp": ("NORMALIZED_AMPLITUDE",),
"zoom_scale": ("FLOAT", { "default": 0.0, "min": -1.0, "max": 1.0, "step": 0.001, "display": "number" }),
"x_offset": ("INT", { "default": 0, "min": (1 -MAX_RESOLUTION), "max": MAX_RESOLUTION, "step": 1, "display": "number" }),
"y_offset": ("INT", { "default": 0, "min": (1 -MAX_RESOLUTION), "max": MAX_RESOLUTION, "step": 1, "display": "number" }),
"cumulative": ("BOOLEAN", { "default": False }),
"image": ("IMAGE",),
}}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "amptransform"
CATEGORY = "KJNodes/audio"
DESCRIPTION = """
Works as a bridge to the AudioScheduler -nodes:
https://github.com/a1lazydog/ComfyUI-AudioScheduler
Transforms image based on the normalized amplitude.
"""
def amptransform(self, image, normalized_amp, zoom_scale, cumulative, x_offset, y_offset):
# Ensure normalized_amp is an array and within the range [0, 1]
normalized_amp = np.clip(normalized_amp, 0.0, 1.0)
transformed_images = []
# Initialize the cumulative zoom factor
prev_amp = 0.0
for i in range(image.shape[0]):
img = image[i] # Get the i-th image in the batch
amp = normalized_amp[i] # Get the corresponding amplitude value
# Incrementally increase the cumulative zoom factor
if cumulative:
prev_amp += amp
amp += prev_amp
# Convert the image tensor from BxHxWxC to CxHxW format expected by torchvision
img = img.permute(2, 0, 1)
# Convert PyTorch tensor to PIL Image for processing
pil_img = TF.to_pil_image(img)
# Calculate the crop size based on the amplitude
width, height = pil_img.size
crop_size = int(min(width, height) * (1 - amp * zoom_scale))
crop_size = max(crop_size, 1)
# Calculate the crop box coordinates (centered crop)
left = (width - crop_size) // 2
top = (height - crop_size) // 2
right = (width + crop_size) // 2
bottom = (height + crop_size) // 2
# Crop and resize back to original size
cropped_img = TF.crop(pil_img, top, left, crop_size, crop_size)
resized_img = TF.resize(cropped_img, (height, width))
# Convert back to tensor in CxHxW format
tensor_img = TF.to_tensor(resized_img)
# Convert the tensor back to BxHxWxC format
tensor_img = tensor_img.permute(1, 2, 0)
# Offset the image based on the amplitude
offset_amp = amp * 10 # Calculate the offset magnitude based on the amplitude
shift_x = min(x_offset * offset_amp, img.shape[1] - 1) # Calculate the shift in x direction
shift_y = min(y_offset * offset_amp, img.shape[0] - 1) # Calculate the shift in y direction
# Apply the offset to the image tensor
if shift_x != 0:
tensor_img = torch.roll(tensor_img, shifts=int(shift_x), dims=1)
if shift_y != 0:
tensor_img = torch.roll(tensor_img, shifts=int(shift_y), dims=0)
# Add to the list
transformed_images.append(tensor_img)
# Stack all transformed images into a batch
transformed_batch = torch.stack(transformed_images)
return (transformed_batch,) |