|
import torch.nn as nn |
|
from torchvision.models import efficientnet_v2_l, efficientnet_v2_s |
|
|
|
from diffusers.configuration_utils import ConfigMixin, register_to_config |
|
from diffusers.models.modeling_utils import ModelMixin |
|
|
|
|
|
class EfficientNetEncoder(ModelMixin, ConfigMixin): |
|
@register_to_config |
|
def __init__(self, c_latent=16, c_cond=1280, effnet="efficientnet_v2_s"): |
|
super().__init__() |
|
|
|
if effnet == "efficientnet_v2_s": |
|
self.backbone = efficientnet_v2_s(weights="DEFAULT").features |
|
else: |
|
self.backbone = efficientnet_v2_l(weights="DEFAULT").features |
|
self.mapper = nn.Sequential( |
|
nn.Conv2d(c_cond, c_latent, kernel_size=1, bias=False), |
|
nn.BatchNorm2d(c_latent), |
|
) |
|
|
|
def forward(self, x): |
|
return self.mapper(self.backbone(x)) |
|
|