|
|
|
|
|
import gradio as gr |
|
|
|
import numpy as np |
|
import torch |
|
import pickle |
|
import PIL.Image |
|
import types |
|
|
|
from projector import project, imageio, _MODELS |
|
|
|
from huggingface_hub import hf_hub_url, cached_download |
|
|
|
|
|
|
|
|
|
with open(cached_download(hf_hub_url('ykilcher/apes', 'gamma500/network-snapshot-010000.pkl')), 'rb') as f: |
|
G = pickle.load(f)["G_ema"] |
|
|
|
device = torch.device("cpu") |
|
if torch.cuda.is_available(): |
|
device = torch.device("cuda") |
|
G = G.to(device) |
|
else: |
|
_old_forward = G.forward |
|
|
|
def _new_forward(self, *args, **kwargs): |
|
kwargs["force_fp32"] = True |
|
return _old_forward(*args, **kwargs) |
|
|
|
G.forward = types.MethodType(_new_forward, G) |
|
|
|
_old_synthesis_forward = G.synthesis.forward |
|
|
|
def _new_synthesis_forward(self, *args, **kwargs): |
|
kwargs["force_fp32"] = True |
|
return _old_synthesis_forward(*args, **kwargs) |
|
|
|
G.synthesis.forward = types.MethodType(_new_synthesis_forward, G.synthesis) |
|
|
|
|
|
def generate( |
|
target_image_upload, |
|
|
|
num_steps, |
|
seed, |
|
learning_rate, |
|
model_name, |
|
normalize_for_clip, |
|
loss_type, |
|
regularize_noise_weight, |
|
initial_noise_factor, |
|
): |
|
seed = round(seed) |
|
np.random.seed(seed) |
|
torch.manual_seed(seed) |
|
target_image = target_image_upload |
|
|
|
|
|
num_steps = round(num_steps) |
|
print(type(target_image)) |
|
print(target_image.dtype) |
|
print(target_image.max()) |
|
print(target_image.min()) |
|
print(target_image.shape) |
|
target_pil = PIL.Image.fromarray(target_image).convert("RGB") |
|
w, h = target_pil.size |
|
s = min(w, h) |
|
target_pil = target_pil.crop( |
|
((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2) |
|
) |
|
target_pil = target_pil.resize( |
|
(G.img_resolution, G.img_resolution), PIL.Image.LANCZOS |
|
) |
|
target_uint8 = np.array(target_pil, dtype=np.uint8) |
|
target_image = torch.from_numpy(target_uint8.transpose([2, 0, 1])).to(device) |
|
projected_w_steps = project( |
|
G, |
|
target=target_image, |
|
num_steps=num_steps, |
|
device=device, |
|
verbose=True, |
|
initial_learning_rate=learning_rate, |
|
model_name=model_name, |
|
normalize_for_clip=normalize_for_clip, |
|
loss_type=loss_type, |
|
regularize_noise_weight=regularize_noise_weight, |
|
initial_noise_factor=initial_noise_factor, |
|
) |
|
with torch.no_grad(): |
|
video = imageio.get_writer(f'proj.mp4', mode='I', fps=10, codec='libx264', bitrate='16M') |
|
for w in projected_w_steps: |
|
synth_image = G.synthesis(w.to(device).unsqueeze(0), noise_mode="const") |
|
synth_image = (synth_image + 1) * (255 / 2) |
|
synth_image = ( |
|
synth_image.permute(0, 2, 3, 1) |
|
.clamp(0, 255) |
|
.to(torch.uint8)[0] |
|
.cpu() |
|
.numpy() |
|
) |
|
video.append_data(np.concatenate([target_uint8, synth_image], axis=1)) |
|
video.close() |
|
return synth_image, "proj.mp4" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate, |
|
inputs=[ |
|
gr.inputs.Image(source="upload", optional=True), |
|
|
|
gr.inputs.Number(default=250, label="steps"), |
|
gr.inputs.Number(default=69420, label="seed"), |
|
gr.inputs.Number(default=0.05, label="learning_rate"), |
|
gr.inputs.Dropdown(default='RN50', label="model_name", choices=['vgg16', *_MODELS.keys()]), |
|
gr.inputs.Checkbox(default=True, label="normalize_for_clip"), |
|
gr.inputs.Dropdown( |
|
default="l2", label="loss_type", choices=["l2", "l1", "cosine"] |
|
), |
|
gr.inputs.Number(default=1e5, label="regularize_noise_weight"), |
|
gr.inputs.Number(default=0.05, label="initial_noise_factor"), |
|
], |
|
outputs=["image", "video"], |
|
) |
|
iface.launch(inbrowser=True) |
|
|