Spaces:
Running
on
L40S
Running
on
L40S
import torch | |
import torch.nn as nn | |
# https://github.com/facebookresearch/DiT | |
class TimestepEmbedder(nn.Module): | |
""" | |
Embeds scalar timesteps into vector representations. | |
""" | |
def __init__(self, dim, frequency_embedding_size, max_period): | |
super().__init__() | |
self.mlp = nn.Sequential( | |
nn.Linear(frequency_embedding_size, dim), | |
nn.SiLU(), | |
nn.Linear(dim, dim), | |
) | |
self.dim = dim | |
self.max_period = max_period | |
assert dim % 2 == 0, 'dim must be even.' | |
with torch.autocast('cuda', enabled=False): | |
self.freqs = nn.Buffer( | |
1.0 / (10000**(torch.arange(0, frequency_embedding_size, 2, dtype=torch.float32) / | |
frequency_embedding_size)), | |
persistent=False) | |
freq_scale = 10000 / max_period | |
self.freqs = freq_scale * self.freqs | |
def timestep_embedding(self, t): | |
""" | |
Create sinusoidal timestep embeddings. | |
:param t: a 1-D Tensor of N indices, one per batch element. | |
These may be fractional. | |
:param dim: the dimension of the output. | |
:param max_period: controls the minimum frequency of the embeddings. | |
:return: an (N, D) Tensor of positional embeddings. | |
""" | |
# https://github.com/openai/glide-text2im/blob/main/glide_text2im/nn.py | |
args = t[:, None].float() * self.freqs[None] | |
embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) | |
return embedding | |
def forward(self, t): | |
t_freq = self.timestep_embedding(t).to(t.dtype) | |
t_emb = self.mlp(t_freq) | |
return t_emb | |