Spaces:

Kurokabe
/

GANime

Runtime error

App Files Files Community

GANime / ganime /model /vqgan_clean /experimental /net2net.py

Kurokabe

Upload 84 files

3be620b about 2 years ago

raw

history blame

10.6 kB

	import numpy as np
	import tensorflow as tf
	import tensorflow_addons as tfa
	from ganime.configs.model_configs import GPTConfig, ModelConfig
	from ganime.model.vqgan_clean.vqgan import VQGAN
	from ganime.trainer.warmup.cosine import WarmUpCosine
	from tensorflow import keras
	from tensorflow.keras import Model, layers
	from transformers import TFGPT2Model, GPT2Config
	from tensorflow.keras import mixed_precision


	class Net2Net(Model):
	def __init__(
	self,
	transformer_config: GPTConfig,
	first_stage_config: ModelConfig,
	trainer_config,
	**kwargs,
	):
	super().__init__(**kwargs)
	self.first_stage_model = VQGAN(**first_stage_config)

	# configuration = GPT2Config(**transformer_config)
	# self.transformer = TFGPT2Model(configuration)#.from_pretrained("gpt2", **self.transformer_config)
	# configuration = GPT2Config(**transformer_config)
	self.transformer = TFGPT2Model.from_pretrained(
	"gpt2-medium"
	) # , **transformer_config)
	if "checkpoint_path" in transformer_config:
	print(f"Restoring weights from {transformer_config['checkpoint_path']}")
	self.load_weights(transformer_config["checkpoint_path"])

	self.loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
	from_logits=True, reduction=tf.keras.losses.Reduction.NONE
	)

	self.loss_tracker = keras.metrics.Mean(name="loss")

	self.scheduled_lrs = self.create_warmup_scheduler(trainer_config)

	optimizer = tfa.optimizers.AdamW(
	learning_rate=self.scheduled_lrs, weight_decay=1e-4
	)
	self.compile(
	optimizer=optimizer,
	loss=self.loss_fn,
	# run_eagerly=True,
	)

	# Gradient accumulation
	self.gradient_accumulation = [
	tf.Variable(tf.zeros_like(v, dtype=tf.float32), trainable=False)
	for v in self.transformer.trainable_variables
	]

	def create_warmup_scheduler(self, trainer_config):
	len_x_train = trainer_config["len_x_train"]
	batch_size = trainer_config["batch_size"]
	n_epochs = trainer_config["n_epochs"]

	total_steps = int(len_x_train / batch_size * n_epochs)
	warmup_epoch_percentage = trainer_config["warmup_epoch_percentage"]
	warmup_steps = int(total_steps * warmup_epoch_percentage)

	scheduled_lrs = WarmUpCosine(
	lr_start=trainer_config["lr_start"],
	lr_max=trainer_config["lr_max"],
	warmup_steps=warmup_steps,
	total_steps=total_steps,
	)

	return scheduled_lrs

	def apply_accu_gradients(self):
	# apply accumulated gradients
	self.optimizer.apply_gradients(
	zip(self.gradient_accumulation, self.transformer.trainable_variables)
	)

	# reset
	for i in range(len(self.gradient_accumulation)):
	self.gradient_accumulation[i].assign(
	tf.zeros_like(self.transformer.trainable_variables[i], dtype=tf.float32)
	)

	@property
	def metrics(self):
	# We list our `Metric` objects here so that `reset_states()` can be
	# called automatically at the start of each epoch
	# or at the start of `evaluate()`.
	# If you don't implement this property, you have to call
	# `reset_states()` yourself at the time of your choosing.
	return [
	self.loss_tracker,
	]

	@tf.function(
	# input_signature=[
	# tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.float32),
	# ]
	)
	def encode_to_z(self, x):
	quant_z, indices, quantized_loss = self.first_stage_model.encode(x)

	batch_size = tf.shape(quant_z)[0]

	indices = tf.reshape(indices, shape=(batch_size, -1))
	return quant_z, indices

	def call(self, inputs, training=None, mask=None):

	first_frame = inputs["first_frame"]
	last_frame = inputs["last_frame"]
	n_frames = inputs["n_frames"]

	return self.generate_video(first_frame, last_frame, n_frames)



	@tf.function()
	def predict_next_indices(self, inputs, example_indices):
	logits = self.transformer(inputs)
	logits = logits.last_hidden_state
	logits = tf.cast(logits, dtype=tf.float32)
	# Remove the conditioned part
	logits = logits[
	:, tf.shape(example_indices)[1] - 1 :
	] # -1 here 'cause -1 above
	# logits = tf.reshape(logits, shape=(-1, tf.shape(logits)[-1]))
	return logits

	@tf.function()
	def body(self, total_loss, frames, index, last_frame_indices):

	previous_frame_indices = self.encode_to_z(frames[:, index - 1, ...])[1]
	cz_indices = tf.concat((last_frame_indices, previous_frame_indices), axis=1)
	target_indices = self.encode_to_z(frames[:, index, ...])[1]
	# target_indices = tf.reshape(target_indices, shape=(-1,))

	with tf.GradientTape() as tape:
	logits = self.predict_next_indices(
	cz_indices[:, :-1], last_frame_indices
	) # don't know why -1

	frame_loss = tf.cast(
	tf.reduce_mean(self.loss_fn(target_indices, logits)),
	dtype=tf.float32,
	)

	# Calculate batch gradients
	gradients = tape.gradient(frame_loss, self.transformer.trainable_variables)

	# Accumulate batch gradients
	for i in range(len(self.gradient_accumulation)):
	self.gradient_accumulation[i].assign_add(tf.cast(gradients[i], tf.float32))

	index = tf.add(index, 1)
	total_loss = tf.add(total_loss, frame_loss)
	return total_loss, frames, index, last_frame_indices

	def cond(self, total_loss, frames, index, last_frame_indices):
	return tf.less(index, tf.shape(frames)[1])

	def train_step(self, data):
	first_frame = data["first_frame"]
	last_frame = data["last_frame"]
	frames = data["y"]
	n_frames = data["n_frames"]

	last_frame_indices = self.encode_to_z(last_frame)[1]
	total_loss = 0.0

	total_loss, _, _, _ = tf.while_loop(
	cond=self.cond,
	body=self.body,
	loop_vars=(tf.constant(0.0), frames, tf.constant(1), last_frame_indices),
	)

	self.apply_accu_gradients()
	self.loss_tracker.update_state(total_loss)
	return {m.name: m.result() for m in self.metrics}

	def cond_test_step(self, total_loss, frames, index, last_frame_indices):
	return tf.less(index, tf.shape(frames)[1])

	@tf.function()
	def body_test_step(self, total_loss, frames, index, predicted_logits):
	target_indices = self.encode_to_z(frames[:, index, ...])[1]
	# target_indices = tf.reshape(target_indices, shape=(-1,))
	logits = predicted_logits[index]

	frame_loss = tf.cast(
	tf.reduce_mean(self.loss_fn(target_indices, logits)),
	dtype=tf.float32,
	)

	index = tf.add(index, 1)
	total_loss = tf.add(total_loss, frame_loss)
	return total_loss, frames, index, predicted_logits

	def test_step(self, data):
	first_frame = data["first_frame"]
	last_frame = data["last_frame"]
	frames = data["y"]
	n_frames = data["n_frames"]

	predicted_logits, _, _ = self.predict_logits(first_frame, last_frame, n_frames)

	total_loss, _, _, _ = tf.while_loop(
	cond=self.cond_test_step,
	body=self.body_test_step,
	loop_vars=(tf.constant(0.0), frames, tf.constant(1), predicted_logits),
	)


	self.loss_tracker.update_state(total_loss)
	return {m.name: m.result() for m in self.metrics}

	@tf.function()
	def convert_logits_to_indices(self, logits, shape):
	probs = tf.keras.activations.softmax(logits)
	_, generated_indices = tf.math.top_k(probs)
	generated_indices = tf.reshape(
	generated_indices,
	shape, # , self.first_stage_model.quantize.num_embeddings)
	)
	return generated_indices
	# quant = self.first_stage_model.quantize.get_codebook_entry(
	# generated_indices, shape=shape
	# )

	# return self.first_stage_model.decode(quant)

	@tf.function()
	def predict_logits(self, first_frame, last_frame, n_frames):
	quant_first, indices_first = self.encode_to_z(first_frame)
	quant_last, indices_last = self.encode_to_z(last_frame)

	indices_previous = indices_first

	predicted_logits = tf.TensorArray(
	tf.float32, size=0, dynamic_size=True, clear_after_read=False
	)

	index = tf.constant(1)
	while tf.less(index, tf.reduce_max(n_frames)):
	tf.autograph.experimental.set_loop_options(
	shape_invariants=[(indices_previous, tf.TensorShape([None, None]))]
	)
	cz_indices = tf.concat((indices_last, indices_previous), axis=1)
	logits = self.predict_next_indices(cz_indices[:, :-1], indices_last)

	# generated_indices = self.convert_logits_to_indices(
	# logits, tf.shape(indices_last)
	# )
	predicted_logits = predicted_logits.write(index, logits)
	indices_previous = self.convert_logits_to_indices(
	logits, tf.shape(indices_first)
	)
	index = tf.add(index, 1)

	return predicted_logits.stack(), tf.shape(quant_first), tf.shape(indices_first)

	@tf.function()
	def generate_video(self, first_frame, last_frame, n_frames):
	predicted_logits, quant_shape, indices_shape = self.predict_logits(
	first_frame, last_frame, n_frames
	)

	generated_images = tf.TensorArray(
	tf.float32, size=0, dynamic_size=True, clear_after_read=False
	)
	generated_images = generated_images.write(0, first_frame)

	index = tf.constant(1)
	while tf.less(index, tf.reduce_max(n_frames)):
	indices = self.convert_logits_to_indices(predicted_logits[index], indices_shape)
	quant = self.first_stage_model.quantize.get_codebook_entry(
	indices,
	shape=quant_shape,
	)
	decoded = self.first_stage_model.decode(quant)
	generated_images = generated_images.write(index, decoded)
	index = tf.add(index, 1)

	stacked_images = generated_images.stack()
	videos = tf.transpose(stacked_images, (1, 0, 2, 3, 4))
	return videos