Spaces:

1-13-am
/

neural-style-transfer

Sleeping

App Files Files Community

1-13-am commited on Oct 17, 2023

Commit

1f7d4dd

1 Parent(s): 6e33f45

Upload 6 files

Browse files

Files changed (6) hide show

UI.py +57 -0
check_point1_0.pth +3 -0
deploy.ipynb +195 -0
network.py +127 -0
train.ipynb +503 -0
utils.py +138 -0

UI.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+import torch
+from utils import transformer, tensor_to_img
+from network import Style_Transfer_Network
+check_point = torch.load("/content/check_point.pth", map_location = torch.device('cpu'))
+model = Style_Transfer_Network()
+model.load_state_dict(check_point['state_dict'])
+def style_transfer(content_img, style_strength, style_img_1 = None, iw_1 = 0, style_img_2 = None, iw_2 = 0, style_img_3 = None, iw_3 = 0, preserve_color = None):
+    transform = transformer(imsize = 512)
+    content = transform(content_img).unsqueeze(0)
+    iw = [iw_1, iw_2, iw_3]
+    interpolation_weights = [i/ sum(iw) for i in iw]
+    style_imgs = [style_img_1, style_img_2, style_img_3]
+    styles = []
+    for style_img in style_imgs:
+      if style_img is not None:
+        styles.append(transform(style_img).unsqueeze(0))
+    if preserve_color == "None": preserve_color = None
+    elif preserve_color == "Whitening": preserve_color = "batch_wct"
+    elif preserve_color == "Histogram matching": preserve_color = "histogram_matching"
+    with torch.no_grad():
+        stylized_img = model(content, styles, style_strength, interpolation_weights, preserve_color = preserve_color)
+    return tensor_to_img(stylized_img)
+title = "Artistic Style Transfer"
+content_img = gr.components.Image(label="Content image", type = "pil")
+style_img_1 = gr.components.Image(label="Style images", type = "pil")
+iw_1 = gr.components.Slider(0., 1., label = "Style 1 interpolation")
+style_img_2 = gr.components.Image(label="Style images", type = "pil")
+iw_2 = gr.components.Slider(0., 1., label = "Style 2 interpolation")
+style_img_3 = gr.components.Image(label="Style images", type = "pil")
+iw_3 = gr.components.Slider(0., 1., label = "Style 3 interpolation")
+style_strength =  gr.components.Slider(0., 1., label = "Adjust style strength")
+preserve_color = gr.components.Dropdown(["None", "Whitening", "Histogram matching"], label = "Choose color preserving mode")
+interface = gr.Interface(fn = style_transfer,
+                         inputs = [content_img,
+                                   style_strength,
+                                   style_img_1,
+                                   iw_1,
+                                   style_img_2,
+                                   iw_2,
+                                   style_img_3,
+                                   iw_3,
+                                   preserve_color],
+                         outputs = gr.components.Image(),
+                         title = title
+                         )
+interface.queue()
+interface.launch(share = True, debug = True)

check_point1_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b500176427a41788b7314c77b6fdbbc6d474fd255f94b7787f7ee123cc092056
+size 28057273

deploy.ipynb ADDED Viewed

	@@ -0,0 +1,195 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Uncomment if you don't have the following modules\n",
+    "#pip install -qq gradio\n",
+    "#pip install -qq torch\n",
+    "#pip install -qq PIL\n",
+    "#pip install -qq torchvision"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from PIL import Image\n",
+    "import torch\n",
+    "import torchvision\n",
+    "import torchvision.transforms as transforms\n",
+    "from utils import transformer, tensor_to_img\n",
+    "from network import Style_Transfer_Network\n",
+    "import gradio as gr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = \"cpu\"\n",
+    "if torch.cuda.is_available(): device = \"cuda\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\VICTUS\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\VICTUS\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG19_Weights.IMAGENET1K_V1`. You can also use `weights=VGG19_Weights.DEFAULT` to get the most up-to-date weights.\n",
+      "  warnings.warn(msg)\n",
+      "C:\\Users\\VICTUS\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\n",
+      "  warnings.warn(msg)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#import gradio as gr\n",
+    "check_point = torch.load('check_point1_0.pth', map_location = device)\n",
+    "transfer_network = Style_Transfer_Network().to(device)\n",
+    "transfer_network.load_state_dict(check_point['state_dict'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7860\n",
+      "Running on public URL: https://b4e9024bf7c14725c6.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://b4e9024bf7c14725c6.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def style_transfer(content_img, style_strength, style_img_1 = None, iw_1 = 0, style_img_2 = None, iw_2 = 0, style_img_3 = None, iw_3 = 0, preserve_color = None):\n",
+    "    transform = transformer(imsize = 512)\n",
+    "\n",
+    "    content = transform(content_img).unsqueeze(0).to(device)\n",
+    "\n",
+    "    iw = [iw_1, iw_2, iw_3]\n",
+    "    interpolation_weights = [i/ sum(iw) for i in iw]\n",
+    "\n",
+    "    style_imgs = [style_img_1, style_img_2, style_img_3]\n",
+    "    styles = []\n",
+    "    for style_img in style_imgs:\n",
+    "      if style_img is not None:\n",
+    "        styles.append(transform(style_img).unsqueeze(0).to(device))\n",
+    "    if preserve_color == \"None\": preserve_color = None\n",
+    "    elif preserve_color == \"Whitening & Coloring\": preserve_color = \"whiten_and_color\"\n",
+    "    elif preserve_color == \"Histogram matching\": preserve_color = \"histogram_matching\"\n",
+    "    with torch.no_grad():\n",
+    "        stylized_img = transfer_network(content, styles, style_strength, interpolation_weights, preserve_color = preserve_color)\n",
+    "    return tensor_to_img(stylized_img)\n",
+    "\n",
+    "title = \"Artistic Style Transfer\"\n",
+    "\n",
+    "content_img = gr.components.Image(label=\"Content image\", type = \"pil\")\n",
+    "\n",
+    "style_img_1 = gr.components.Image(label=\"Style images\", type = \"pil\")\n",
+    "iw_1 = gr.components.Slider(0., 1., label = \"Style 1 interpolation\")\n",
+    "style_img_2 = gr.components.Image(label=\"Style images\", type = \"pil\")\n",
+    "iw_2 = gr.components.Slider(0., 1., label = \"Style 2 interpolation\")\n",
+    "style_img_3 = gr.components.Image(label=\"Style images\", type = \"pil\")\n",
+    "iw_3 = gr.components.Slider(0., 1., label = \"Style 3 interpolation\")\n",
+    "style_strength =  gr.components.Slider(0., 1., label = \"Adjust style strength\")\n",
+    "preserve_color = gr.components.Dropdown([\"None\", \"Whitening & Coloring\", \"Histogram matching\"], label = \"Choose color preserving mode\")\n",
+    "\n",
+    "interface = gr.Interface(fn = style_transfer,\n",
+    "                         inputs = [content_img,\n",
+    "                                   style_strength,\n",
+    "                                   style_img_1,\n",
+    "                                   iw_1,\n",
+    "                                   style_img_2,\n",
+    "                                   iw_2,\n",
+    "                                   style_img_3,\n",
+    "                                   iw_3,\n",
+    "                                   preserve_color],\n",
+    "                         outputs = gr.components.Image(),\n",
+    "                         title = title,\n",
+    "                         \n",
+    "                         )\n",
+    "interface.queue()\n",
+    "interface.launch(share = True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

network.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import torch
+import torch.nn as nn
+import torchvision
+from torchvision.models import vgg19
+import utils
+from utils import batch_wct, batch_histogram_matching
+class Encoder(nn.Module):
+  def __init__(self, layers = [1, 6, 11, 20]):
+    super(Encoder, self).__init__()
+    vgg = torchvision.models.vgg19(pretrained=True).features
+    self.encoder = nn.ModuleList()
+    temp_seq = nn.Sequential()
+    for i in range(max(layers)+1):
+        temp_seq.add_module(str(i), vgg[i])
+        if i in layers:
+            self.encoder.append(temp_seq)
+            temp_seq = nn.Sequential()
+  def forward(self, x):
+    features = []
+    for layer in self.encoder:
+        x = layer(x)
+        features.append(x)
+    return features
+# need to copy the whole architecture bcuz we will need outputs from "layers" layers to compute the loss
+class Decoder(nn.Module):
+    def __init__(self, layers=[1, 6, 11, 20]):
+        super(Decoder, self).__init__()
+        vgg = torchvision.models.vgg19(pretrained=False).features
+        self.decoder = nn.ModuleList()
+        temp_seq  = nn.Sequential()
+        count = 0
+        for i in range(max(layers)-1, -1, -1):
+            if isinstance(vgg[i], nn.Conv2d):
+                # get number of in/out channels
+                out_channels = vgg[i].in_channels
+                in_channels = vgg[i].out_channels
+                kernel_size = vgg[i].kernel_size
+                # make a [reflection pad + convolution + relu] layer
+                temp_seq.add_module(str(count), nn.ReflectionPad2d(padding=(1,1,1,1)))
+                count += 1
+                temp_seq.add_module(str(count), nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size))
+                count += 1
+                temp_seq.add_module(str(count), nn.ReLU())
+                count += 1
+            # change down-sampling(MaxPooling) --> upsampling
+            elif isinstance(vgg[i], nn.MaxPool2d):
+                temp_seq.add_module(str(count), nn.Upsample(scale_factor=2))
+                count += 1
+            if i in layers:
+                self.decoder.append(temp_seq)
+                temp_seq  = nn.Sequential()
+        # append last conv layers without ReLU activation
+        self.decoder.append(temp_seq[:-1])
+    def forward(self, x):
+        y = x
+        for layer in self.decoder:
+            y = layer(y)
+        return y
+class AdaIN(nn.Module):
+    def __init__(self):
+        super(AdaIN, self).__init__()
+    def forward(self, content, style, style_strength=1.0, eps=1e-5):
+        """
+        content: tensor of shape B * C * H * W
+        style: tensor of shape B * C * H * W
+        note that AdaIN does computation on a pair of content - style img"""
+        b, c, h, w = content.size()
+        content_std, content_mean = torch.std_mean(content.view(b, c, -1), dim=2, keepdim=True)
+        style_std, style_mean = torch.std_mean(style.view(b, c, -1), dim=2, keepdim=True)
+        normalized_content = (content.view(b, c, -1) - content_mean) / (content_std+eps)
+        stylized_content = (normalized_content * style_std) + style_mean
+        output = (1-style_strength) * content + style_strength * stylized_content.view(b, c, h, w)
+        return output
+class Style_Transfer_Network(nn.Module):
+  def __init__(self, layers = [1, 6, 11, 20]):
+    super(Style_Transfer_Network, self).__init__()
+    self.encoder = Encoder(layers)
+    self.decoder = Decoder(layers)
+    self.adain = AdaIN()
+  def forward(self, content, styles, style_strength = 1., interpolation_weights = None, preserve_color = None, train = False):
+    if interpolation_weights is None:
+       interpolation_weights = [1/len(styles)] * len(styles)
+    # encode the content image
+    content_feature = self.encoder(content)
+    # encode style images
+    style_features = []
+    for style in styles:
+        if preserve_color == 'whiten_and_color' or preserve_color == 'histogram_matching':
+                style = batch_wct(style, content)
+        style_features.append(self.encoder(style))
+    transformed_features = []
+    for style_feature, interpolation_weight in zip(style_features, interpolation_weights):
+        AdaIN_feature = self.adain(content_feature[-1], style_feature[-1], style_strength) * interpolation_weight
+        if preserve_color == 'histogram_matching':
+            AdaIN_feature *= 0.9
+        transformed_features.append(AdaIN_feature)
+    transformed_feature = sum(transformed_features)
+    stylized_image = self.decoder(transformed_feature)
+    if preserve_color == "whiten_and_color":
+        stylized_image = batch_wct(stylized_image, content)
+    if preserve_color == "histogram_matching":
+        stylized_image = batch_histogram_matching(stylized_image, content)
+    if train:
+      return stylized_image, transformed_feature
+    else:
+      return stylized_image

train.ipynb ADDED Viewed

	@@ -0,0 +1,503 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "_qsogBHiKtzF",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "datasets 2.4.0 requires dill<0.3.6, but you have dill 0.3.7 which is incompatible.\n",
+      "awscli 1.25.91 requires botocore==1.27.90, but you have botocore 1.31.17 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install -qq hub\n",
+    "!pip install -qq flask"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "id": "E8nHybN3KDIq",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import deeplake\n",
+    "from torch.utils.data import DataLoader\n",
+    "from torchvision import transforms\n",
+    "import torch.nn as nn\n",
+    "from network import Style_Transfer_Network, Encoder\n",
+    "from utils import save_img\n",
+    "import torchvision"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "rnAFLCiIKqkM",
+    "outputId": "81b8f1c3-3974-4ee3-a284-99186c1502c7",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "|"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Opening dataset in read-only mode as you don't have write permissions.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "-"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/wiki-art\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "-"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "hub://activeloop/wiki-art loaded successfully.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Opening dataset in read-only mode as you don't have write permissions.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\\"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/coco-test\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\\"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "hub://activeloop/coco-test loaded successfully.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " "
+     ]
+    }
+   ],
+   "source": [
+    "reshape_size = 512\n",
+    "crop_size = 256\n",
+    "def any_to_rgb(img):\n",
+    "    return img.convert('RGB')\n",
+    "preprocess = transforms.Compose([\n",
+    "      transforms.Lambda(any_to_rgb),\n",
+    "      transforms.ToTensor(),\n",
+    "      transforms.Resize(reshape_size),\n",
+    "      transforms.RandomCrop(crop_size),\n",
+    "      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n",
+    "      ])\n",
+    "wiki_art_dataset = deeplake.load('hub://activeloop/wiki-art')\n",
+    "coco_dataset = deeplake.load('hub://activeloop/coco-test')\n",
+    "\n",
+    "style_data_loader = wiki_art_dataset.pytorch(batch_size = 8, num_workers = 0,\n",
+    "    transform = {'images': preprocess, 'labels': None}, shuffle = True, decode_method = {'images':'pil'})\n",
+    "\n",
+    "cnt_data_loader = coco_dataset.pytorch(batch_size = 8, num_workers = 0,\n",
+    "    transform = {'images': preprocess}, shuffle = True, decode_method = {'images': 'pil'})\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "id": "XKqi9mMyoNUy",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "mse_loss = nn.MSELoss(reduction = 'mean')\n",
+    "def content_loss(source, target):\n",
+    "  cnt_loss = mse_loss(source, target)\n",
+    "  return cnt_loss\n",
+    "\n",
+    "def style_loss(features, targets):\n",
+    "  loss = 0\n",
+    "  for feature, target in zip(features, targets):\n",
+    "    B, C, H, W = feature.shape\n",
+    "    feature_std, feature_mean = torch.std_mean(feature.view(B, C, -1), dim = 2)\n",
+    "    target_std, target_mean = torch.std_mean(target.view(B, C, -1), dim = 2)\n",
+    "    loss += mse_loss(feature_std, target_std) + mse_loss(feature_mean, target_mean)\n",
+    "  return loss * 1. / len(features)\n",
+    "\"\"\"\n",
+    "def style_loss(features, targets, weights=None):\n",
+    "    if weights is None:\n",
+    "        weights = [1/len(features)] * len(features)\n",
+    "    \n",
+    "    loss = 0\n",
+    "    for feature, target, weight in zip(features, targets, weights):\n",
+    "        b, c, h, w = feature.size()\n",
+    "        feature_std, feature_mean = torch.std_mean(feature.view(b, c, -1), dim=2)\n",
+    "        target_std, target_mean = torch.std_mean(target.view(b, c, -1), dim=2)\n",
+    "        loss += (mse_loss(feature_std, target_std) + mse_loss(feature_mean, target_mean))*weight\n",
+    "    return loss\n",
+    "\"\"\"\n",
+    "def total_variational_loss(images):\n",
+    "    loss = 0.0\n",
+    "    B = images.shape[0]\n",
+    "    vertical_up = images[:,:,:-1]\n",
+    "    vertical_down = images[:,:,1:]\n",
+    "\n",
+    "    horizontal_up = images[:,:,:,:-1]\n",
+    "    horizontal_down = images[:,:,:,1:]\n",
+    "\n",
+    "    loss = ((vertical_up - vertical_down) ** 2).sum() + \\\n",
+    "                        ((horizontal_up - horizontal_down) ** 2).sum()\n",
+    "\n",
+    "    return loss * 1.0 / B"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "id": "JAeuZ2Sq6E-0",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "if torch.cuda.is_available():\n",
+    "  device = \"cuda\"\n",
+    "else: device = \"cpu\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "style_transfer_network = Style_Transfer_Network().to(device)\n",
+    "check_point = torch.load(\"/notebooks/Style_transfer_with_ADAin/check_point.pth\", map_location = 'cuda')\n",
+    "style_transfer_network.load_state_dict(check_point['state_dict'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def denormalize():\n",
+    "  # out = (x - mean) / std\n",
+    "  MEAN = [0.485, 0.456, 0.406]\n",
+    "  STD = [0.229, 0.224, 0.225]\n",
+    "  MEAN = [-mean/std for mean, std in zip(MEAN, STD)]\n",
+    "  STD = [1/std for std in STD]\n",
+    "  return transforms.Normalize(mean=MEAN, std=STD)\n",
+    "\n",
+    "def save_img(tensor, path):\n",
+    "    denormalizer = denormalize()   \n",
+    "    if tensor.is_cuda:\n",
+    "        tensor = tensor.cpu()\n",
+    "    tensor = torchvision.utils.make_grid(tensor)\n",
+    "    torchvision.utils.save_image(denormalizer(tensor).clamp_(0.0, 1.0), path)    \n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "1Y-JrlNquBwn",
+    "outputId": "31d5fe14-5315-40cd-8946-99c34ff41726",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def train_network(iteration, loss_weight = [0.0, 0.0, 0.0001], check_iter = 1, test_iter = 10):\n",
+    "  for param in style_transfer_network.encoder.parameters():\n",
+    "    # freeze parameter in the encoder network\n",
+    "    param.requires_grad = False\n",
+    "  optimizer = torch.optim.Adam(style_transfer_network.decoder.parameters(), lr = 1e-6)\n",
+    "\n",
+    "  encoder_net = Encoder().to(device)\n",
+    "  for param in encoder_net.parameters():\n",
+    "    param.requires_grad = False\n",
+    "  for i in range(iteration):\n",
+    "    content_imgs = next(iter(cnt_data_loader))['images'].to(device)\n",
+    "    style_imgs = next(iter(style_data_loader))['images'].to(device)\n",
+    "\n",
+    "    output_imgs, transformed_features = style_transfer_network(content_imgs, style_imgs, train = True)\n",
+    "\n",
+    "    output_features = encoder_net(output_imgs)\n",
+    "    style_features = encoder_net(style_imgs)\n",
+    "\n",
+    "    cnt_loss = content_loss(transformed_features, output_features[-1])\n",
+    "    st_loss = style_loss(output_features, style_features)\n",
+    "    tv_loss = total_variational_loss(output_imgs)\n",
+    "    cnt_w, style_w, tv_w = loss_weight\n",
+    "    total_loss = cnt_w * tv_loss + style_w * st_loss + tv_w * tv_loss\n",
+    "\n",
+    "    optimizer.zero_grad()\n",
+    "    total_loss.backward()\n",
+    "    optimizer.step()\n",
+    "\n",
+    "    if i % check_iter == 0:\n",
+    "      print('-' * 80)\n",
+    "      print(\"Iteration {} loss: {}\".format(i, total_loss))\n",
+    "\n",
+    "    if i % test_iter == 0:\n",
+    "      #save_img(torch.cat([content_imgs[0], style_imgs[0], output_imgs[0]], dim = 0), \"training_image.png\")\n",
+    "      torch.save({'iteration':iteration+1,\n",
+    "                'state_dict':style_transfer_network.state_dict()},\n",
+    "                'check_point1.pth')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 0 loss: 0.8845198750495911\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 1 loss: 1.8098524808883667\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 2 loss: 1.868203043937683\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 3 loss: 1.1070071458816528\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 4 loss: 2.0751609802246094\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 5 loss: 2.7107627391815186\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 6 loss: 1.4618340730667114\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 7 loss: 1.2351319789886475\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 8 loss: 1.3090686798095703\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 9 loss: 1.7165802717208862\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 10 loss: 1.9655226469039917\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 11 loss: 1.8032971620559692\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 12 loss: 1.757157802581787\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 13 loss: 1.2641586065292358\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 14 loss: 1.230526328086853\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 15 loss: 1.8332327604293823\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 16 loss: 2.347355365753174\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 17 loss: 0.8620480298995972\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 18 loss: 1.572771668434143\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 19 loss: 2.281660795211792\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 20 loss: 1.417534589767456\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 21 loss: 1.848774790763855\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 22 loss: 1.1456807851791382\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 23 loss: 1.2357560396194458\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 24 loss: 0.6565238833427429\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 25 loss: 1.2375402450561523\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 26 loss: 2.1140313148498535\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 27 loss: 1.0238616466522217\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 28 loss: 2.618056058883667\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 29 loss: 1.1616159677505493\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 30 loss: 1.919601559638977\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 31 loss: 1.0250651836395264\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 32 loss: 1.1823596954345703\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 33 loss: 0.8185012936592102\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 34 loss: 1.1374247074127197\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 35 loss: 1.9250235557556152\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 36 loss: 1.466286540031433\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.9/dist-packages/PIL/Image.py:3035: DecompressionBombWarning: Image size (99962094 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 37 loss: 0.7055997848510742\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 38 loss: 1.3557121753692627\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 39 loss: 1.0668007135391235\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 40 loss: 1.1934823989868164\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 41 loss: 0.7692145109176636\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 42 loss: 1.141457438468933\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 43 loss: 1.5705242156982422\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 44 loss: 1.7851486206054688\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 45 loss: 0.7252503633499146\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 46 loss: 1.1291860342025757\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 47 loss: 1.3588659763336182\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 48 loss: 0.9960977435112\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 49 loss: 0.9272828102111816\n",
+      "--------------------------------------------------------------------------------\n",
+      "Iteration 50 loss: 2.4692296981811523\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_network(iteration = 300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

utils.py ADDED Viewed

	@@ -0,0 +1,138 @@

+from skimage.exposure import match_histograms
+from skimage import io
+import os
+from PIL import Image
+import torch
+import torchvision
+import torchvision.transforms as transforms
+def normalize():
+  MEAN = [0.485, 0.456, 0.406]
+  STD = [0.229, 0.224, 0.225]
+  return transforms.Normalize(mean = MEAN, std = STD)
+def denormalize():
+  # out = (x - mean) / std
+  MEAN = [0.485, 0.456, 0.406]
+  STD = [0.229, 0.224, 0.225]
+  MEAN = [-mean/std for mean, std in zip(MEAN, STD)]
+  STD = [1/std for std in STD]
+  return transforms.Normalize(mean=MEAN, std=STD)
+def transformer(imsize = None, cropsize = None):
+  transformer = []
+  if imsize:
+        transformer.append(transforms.Resize(imsize))
+  if cropsize:
+        transformer.append(transforms.RandomCrop(cropsize))
+  transformer.append(transforms.ToTensor())
+  transformer.append(normalize())
+  return transforms.Compose(transformer)
+def load_img(path, imsize = None, cropsize = None):
+  transform = transformer(imsize = imsize, cropsize = cropsize)
+  # torchvision.transforms supports PIL Images
+  return transform(Image.open(path).convert("RGB")).unsqueeze(0)
+def tensor_to_img(tensor):
+  denormalizer = denormalize()
+  if tensor.device == "cuda":
+    tensor = tensor.cpu()
+  #
+  tensor = torchvision.utils.make_grid(denormalizer(tensor.squeeze()))
+  image = transforms.functional.to_pil_image(tensor.clamp_(0., 1.))
+  return image
+def save_img(tensor, path):
+  pass
+def histogram_matching(image, reference):
+  """
+  img: style image
+  reference: original img
+  output: style image that resembles original img's color histogram
+  """
+  device = image.device
+  reference = reference.cpu().permute(1, 2, 0).numpy()
+  image = image.cpu().permute(1, 2, 0).numpy()
+  output = match_histograms(image, reference, multichannel = True)
+  return torch.Tensor(output).permute(2, 0, 1).to(device)
+def batch_histogram_matching(images, reference):
+  """
+  images of shape BxCxHxW
+  reference of shape 1xCxHxW
+  """
+  reference = reference.squeeze()
+  output = torch.zeros_like(images, dtype = images.dtype)
+  B = images.shape[0]
+  for i in range(B):
+    output[i] = histogram_matching(images[i], reference)
+  return output
+def statistics(f, inverse = False, eps = 1e-10):
+  c, h, w = f.shape
+  f_mean = torch.mean(f.view(c, h*w), dim=1, keepdim=True)
+  f_zeromean = f.view(c, h*w) - f_mean
+  f_cov = torch.mm(f_zeromean, f_zeromean.t())
+  u, s, v = torch.svd(f_cov)
+  k = c
+  for i in range(c):
+    if s[i] < eps:
+        k = i
+        break
+  if inverse:
+        p = -0.5
+  else:
+        p = 0.5
+  f_covsqrt = torch.mm(torch.mm(u[:, 0:k], torch.diag(s[0:k].pow(p))), v[:, 0:k].t())
+  return f_mean, f_covsqrt
+def whitening(f):
+  c, h, w = f.shape
+  f_mean, f_inv_covsqrt = statistics(f, inverse = True)
+  whitened_f = torch.mm(f_inv_covsqrt, f.view(c, h*w) - f_mean)
+  return whitened_f.view(c, h, w)
+def batch_whitening(f):
+  b, c, h, w = f.shape
+  whitened_f = torch.zeros(size = (b, c, h, w), dtype = f.dtype, device = f.device)
+  for i in range(b):
+    whitened_f[i] = whitening(f[i])
+  return whitened_f
+def coloring(style, content):
+  s_c, s_h, s_w = style.shape
+  c_mean, c_covsqrt = statistics(content, inverse = False)
+  colored_s = torch.mm(c_covsqrt, whitening(style).view(s_c, s_h * s_w)) + c_mean
+  return colored_s.view(s_c, s_h, s_w)
+def batch_coloring(styles, content):
+  colored_styles = torch.zeros_like(styles, dtype = styles.dtype, device = styles.device)
+  for i, style in enumerate(styles):
+    colored_styles[i] = coloring(style, content[i])
+  return colored_styles
+def batch_wct(styles, content):
+  whitened_styles = batch_whitening(styles)
+  return batch_coloring(whitened_styles, content)
+class Image_Set(torch.utils.data.Dataset):
+  def __init__(self, root_path, imsize, cropsize):
+    super(Image_Set, self).__init__()
+    self.root_path = root_path
+    self.files = sorted(os.listdir(self.root_path))
+    self.transformer = transformer(imsize, cropsize)
+  def __len__(self):
+    return len(self.file_names)
+  def __getitem__(self, index):
+    image = Image.open(os.path.join(self.root_path + self.file_names[index])).convert("RGB")
+    return self.transformer(image)