Spaces:

SDbiaseval
/

stablediffusionembeddings

No application file

App Files Files Community

sasha HF staff commited on Oct 13, 2022

Commit

5d26c71

1 Parent(s): 8e00a6e

trying to do Autoencoder but failing

Browse files

Files changed (1) hide show

CNN-Autoencoder.ipynb +476 -0

CNN-Autoencoder.ipynb ADDED Viewed

	@@ -0,0 +1,476 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f403af3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Source: https://medium.com/dataseries/convolutional-autoencoder-in-pytorch-on-mnist-dataset-d65145c132ac"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "add961d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt # plotting library\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import numpy as np # this module is useful to work with numerical arrays\n",
+    "import pandas as pd \n",
+    "import random \n",
+    "import os\n",
+    "import torch\n",
+    "import torchvision\n",
+    "from torchvision import transforms, datasets\n",
+    "from torch.utils.data import DataLoader,random_split\n",
+    "from torch import nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7f5313b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_candidate_images(images_path):\n",
+    "    \"\"\"\n",
+    "    Finds all candidate images in the given folder and its sub-folders.\n",
+    "\n",
+    "    Returns:\n",
+    "        images: a list of absolute paths to the discovered images.\n",
+    "    \"\"\"\n",
+    "    images = []\n",
+    "    for root, dirs, files in os.walk(images_path):\n",
+    "        for name in files:\n",
+    "            file_path = os.path.abspath(os.path.join(root, name))\n",
+    "            if ((os.path.splitext(name)[1]).lower() in ['.jpg','.png','.jpeg']):\n",
+    "                images.append(file_path)\n",
+    "    return images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "1e7f0096",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MyDataset(torch.utils.data.Dataset):\n",
+    "    def __init__(self, img_list, augmentations):\n",
+    "        super(MyDataset, self).__init__()\n",
+    "        self.img_list = img_list\n",
+    "        self.augmentations = augmentations\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.img_list)\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        img = self.img_list[idx]\n",
+    "        return self.augmentations(img)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "f846b86c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images = find_candidate_images('../SD_sample_f_m_pt2')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "da000292",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform = transforms.Compose([\n",
+    "transforms.ToTensor(),\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "d8f46911",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = MyDataset(images, transform)\n",
+    "dataset_iterator = DataLoader(data, batch_size=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "05504c87",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "pic should be PIL Image or ndarray. Got <class 'str'>",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[0;32mIn [56]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m train_images, test_images \u001b[38;5;241m=\u001b[39m \u001b[43mtrain_test_split\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.33\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m42\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(train_images))\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(test_images))\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/sklearn/model_selection/_split.py:2471\u001b[0m, in \u001b[0;36mtrain_test_split\u001b[0;34m(test_size, train_size, random_state, shuffle, stratify, *arrays)\u001b[0m\n\u001b[1;32m   2467\u001b[0m     cv \u001b[38;5;241m=\u001b[39m CVClass(test_size\u001b[38;5;241m=\u001b[39mn_test, train_size\u001b[38;5;241m=\u001b[39mn_train, random_state\u001b[38;5;241m=\u001b[39mrandom_state)\n\u001b[1;32m   2469\u001b[0m     train, test \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(cv\u001b[38;5;241m.\u001b[39msplit(X\u001b[38;5;241m=\u001b[39marrays[\u001b[38;5;241m0\u001b[39m], y\u001b[38;5;241m=\u001b[39mstratify))\n\u001b[0;32m-> 2471\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2472\u001b[0m \u001b[43m    \u001b[49m\u001b[43mchain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_iterable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2473\u001b[0m \u001b[43m        \u001b[49m\u001b[43m(\u001b[49m\u001b[43m_safe_indexing\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_safe_indexing\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43marrays\u001b[49m\n\u001b[1;32m   2474\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2475\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/sklearn/model_selection/_split.py:2473\u001b[0m, in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m   2467\u001b[0m     cv \u001b[38;5;241m=\u001b[39m CVClass(test_size\u001b[38;5;241m=\u001b[39mn_test, train_size\u001b[38;5;241m=\u001b[39mn_train, random_state\u001b[38;5;241m=\u001b[39mrandom_state)\n\u001b[1;32m   2469\u001b[0m     train, test \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(cv\u001b[38;5;241m.\u001b[39msplit(X\u001b[38;5;241m=\u001b[39marrays[\u001b[38;5;241m0\u001b[39m], y\u001b[38;5;241m=\u001b[39mstratify))\n\u001b[1;32m   2471\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(\n\u001b[1;32m   2472\u001b[0m     chain\u001b[38;5;241m.\u001b[39mfrom_iterable(\n\u001b[0;32m-> 2473\u001b[0m         (\u001b[43m_safe_indexing\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m)\u001b[49m, _safe_indexing(a, test)) \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m arrays\n\u001b[1;32m   2474\u001b[0m     )\n\u001b[1;32m   2475\u001b[0m )\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/sklearn/utils/__init__.py:363\u001b[0m, in \u001b[0;36m_safe_indexing\u001b[0;34m(X, indices, axis)\u001b[0m\n\u001b[1;32m    361\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m _array_indexing(X, indices, indices_dtype, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m    362\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 363\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_list_indexing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindices_dtype\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/sklearn/utils/__init__.py:217\u001b[0m, in \u001b[0;36m_list_indexing\u001b[0;34m(X, key, key_dtype)\u001b[0m\n\u001b[1;32m    215\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(compress(X, key))\n\u001b[1;32m    216\u001b[0m \u001b[38;5;66;03m# key is a integer array-like of key\u001b[39;00m\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [X[idx] \u001b[38;5;28;01mfor\u001b[39;00m idx \u001b[38;5;129;01min\u001b[39;00m key]\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/sklearn/utils/__init__.py:217\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    215\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(compress(X, key))\n\u001b[1;32m    216\u001b[0m \u001b[38;5;66;03m# key is a integer array-like of key\u001b[39;00m\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[43mX\u001b[49m\u001b[43m[\u001b[49m\u001b[43midx\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m idx \u001b[38;5;129;01min\u001b[39;00m key]\n",
+      "Input \u001b[0;32mIn [49]\u001b[0m, in \u001b[0;36mMyDataset.__getitem__\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, idx):\n\u001b[1;32m     11\u001b[0m     img \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mimg_list[idx]\n\u001b[0;32m---> 12\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maugmentations\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/torchvision/transforms/transforms.py:95\u001b[0m, in \u001b[0;36mCompose.__call__\u001b[0;34m(self, img)\u001b[0m\n\u001b[1;32m     93\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, img):\n\u001b[1;32m     94\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransforms:\n\u001b[0;32m---> 95\u001b[0m         img \u001b[38;5;241m=\u001b[39m \u001b[43mt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     96\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m img\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/torchvision/transforms/transforms.py:135\u001b[0m, in \u001b[0;36mToTensor.__call__\u001b[0;34m(self, pic)\u001b[0m\n\u001b[1;32m    127\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, pic):\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    129\u001b[0m \u001b[38;5;124;03m    Args:\u001b[39;00m\n\u001b[1;32m    130\u001b[0m \u001b[38;5;124;03m        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    133\u001b[0m \u001b[38;5;124;03m        Tensor: Converted image.\u001b[39;00m\n\u001b[1;32m    134\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 135\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_tensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpic\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/stablediffusion/lib/python3.9/site-packages/torchvision/transforms/functional.py:137\u001b[0m, in \u001b[0;36mto_tensor\u001b[0;34m(pic)\u001b[0m\n\u001b[1;32m    135\u001b[0m     _log_api_usage_once(to_tensor)\n\u001b[1;32m    136\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (F_pil\u001b[38;5;241m.\u001b[39m_is_pil_image(pic) \u001b[38;5;129;01mor\u001b[39;00m _is_numpy(pic)):\n\u001b[0;32m--> 137\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpic should be PIL Image or ndarray. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(pic)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _is_numpy(pic) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _is_numpy_image(pic):\n\u001b[1;32m    140\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpic should be 2/3 dimensional. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpic\u001b[38;5;241m.\u001b[39mndim\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m dimensions.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[0;31mTypeError\u001b[0m: pic should be PIL Image or ndarray. Got <class 'str'>"
+     ]
+    }
+   ],
+   "source": [
+    "train_images, test_images = train_test_split(data, test_size=0.33, random_state=42)\n",
+    "print(len(train_images))\n",
+    "print(len(test_images))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "669f82ab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m=len(train_images)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "e962953c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data, val_data = random_split(train_images, [int(m-m*0.2), int(m*0.2)])\n",
+    "test_dataset = test_images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "16a8e2a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)\n",
+    "valid_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)\n",
+    "test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "07403239",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Encoder(nn.Module):\n",
+    "    \n",
+    "    def __init__(self, encoded_space_dim,fc2_input_dim):\n",
+    "        super().__init__()\n",
+    "        \n",
+    "        ### Convolutional section\n",
+    "        self.encoder_cnn = nn.Sequential(\n",
+    "            nn.Conv2d(1, 8, 3, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "            nn.Conv2d(8, 16, 3, stride=2, padding=1),\n",
+    "            nn.BatchNorm2d(16),\n",
+    "            nn.ReLU(True),\n",
+    "            nn.Conv2d(16, 32, 3, stride=2, padding=0),\n",
+    "            nn.ReLU(True)\n",
+    "        )\n",
+    "        \n",
+    "        ### Flatten layer\n",
+    "        self.flatten = nn.Flatten(start_dim=1)\n",
+    "### Linear section\n",
+    "        self.encoder_lin = nn.Sequential(\n",
+    "            nn.Linear(3 * 3 * 32, 128),\n",
+    "            nn.ReLU(True),\n",
+    "            nn.Linear(128, encoded_space_dim)\n",
+    "        )\n",
+    "        \n",
+    "    def forward(self, x):\n",
+    "        x = self.encoder_cnn(x)\n",
+    "        x = self.flatten(x)\n",
+    "        x = self.encoder_lin(x)\n",
+    "        return x\n",
+    "class Decoder(nn.Module):\n",
+    "    \n",
+    "    def __init__(self, encoded_space_dim,fc2_input_dim):\n",
+    "        super().__init__()\n",
+    "        self.decoder_lin = nn.Sequential(\n",
+    "            nn.Linear(encoded_space_dim, 128),\n",
+    "            nn.ReLU(True),\n",
+    "            nn.Linear(128, 3 * 3 * 32),\n",
+    "            nn.ReLU(True)\n",
+    "        )\n",
+    "\n",
+    "        self.unflatten = nn.Unflatten(dim=1, \n",
+    "        unflattened_size=(32, 3, 3))\n",
+    "\n",
+    "        self.decoder_conv = nn.Sequential(\n",
+    "            nn.ConvTranspose2d(32, 16, 3, \n",
+    "            stride=2, output_padding=0),\n",
+    "            nn.BatchNorm2d(16),\n",
+    "            nn.ReLU(True),\n",
+    "            nn.ConvTranspose2d(16, 8, 3, stride=2, \n",
+    "            padding=1, output_padding=1),\n",
+    "            nn.BatchNorm2d(8),\n",
+    "            nn.ReLU(True),\n",
+    "            nn.ConvTranspose2d(8, 1, 3, stride=2, \n",
+    "            padding=1, output_padding=1)\n",
+    "        )\n",
+    "        \n",
+    "    def forward(self, x):\n",
+    "        x = self.decoder_lin(x)\n",
+    "        x = self.unflatten(x)\n",
+    "        x = self.decoder_conv(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "fedfd708",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Selected device: cuda\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Decoder(\n",
+       "  (decoder_lin): Sequential(\n",
+       "    (0): Linear(in_features=4, out_features=128, bias=True)\n",
+       "    (1): ReLU(inplace=True)\n",
+       "    (2): Linear(in_features=128, out_features=288, bias=True)\n",
+       "    (3): ReLU(inplace=True)\n",
+       "  )\n",
+       "  (unflatten): Unflatten(dim=1, unflattened_size=(32, 3, 3))\n",
+       "  (decoder_conv): Sequential(\n",
+       "    (0): ConvTranspose2d(32, 16, kernel_size=(3, 3), stride=(2, 2))\n",
+       "    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "    (2): ReLU(inplace=True)\n",
+       "    (3): ConvTranspose2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n",
+       "    (4): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "    (5): ReLU(inplace=True)\n",
+       "    (6): ConvTranspose2d(8, 1, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "### Define the loss function\n",
+    "loss_fn = torch.nn.MSELoss()\n",
+    "\n",
+    "### Define an optimizer (both for the encoder and the decoder!)\n",
+    "lr= 0.001\n",
+    "\n",
+    "### Set the random seed for reproducible results\n",
+    "torch.manual_seed(0)\n",
+    "\n",
+    "### Initialize the two networks\n",
+    "d = 4\n",
+    "\n",
+    "#model = Autoencoder(encoded_space_dim=encoded_space_dim)\n",
+    "encoder = Encoder(encoded_space_dim=d,fc2_input_dim=128)\n",
+    "decoder = Decoder(encoded_space_dim=d,fc2_input_dim=128)\n",
+    "params_to_optimize = [\n",
+    "    {'params': encoder.parameters()},\n",
+    "    {'params': decoder.parameters()}\n",
+    "]\n",
+    "\n",
+    "optim = torch.optim.Adam(params_to_optimize, lr=lr, weight_decay=1e-05)\n",
+    "\n",
+    "# Check if the GPU is available\n",
+    "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
+    "print(f'Selected device: {device}')\n",
+    "\n",
+    "# Move both the encoder and the decoder to the selected device\n",
+    "encoder.to(device)\n",
+    "decoder.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "bae32de2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Training function\n",
+    "def train_epoch(encoder, decoder, device, dataloader, loss_fn, optimizer):\n",
+    "    # Set train mode for both the encoder and the decoder\n",
+    "    encoder.train()\n",
+    "    decoder.train()\n",
+    "    train_loss = []\n",
+    "    # Iterate the dataloader (we do not need the label values, this is unsupervised learning)\n",
+    "    for image_batch, _ in dataloader: # with \"_\" we just ignore the labels (the second element of the dataloader tuple)\n",
+    "        # Move tensor to the proper device\n",
+    "        image_batch = image_batch.to(device)\n",
+    "        # Encode data\n",
+    "        encoded_data = encoder(image_batch)\n",
+    "        # Decode data\n",
+    "        decoded_data = decoder(encoded_data)\n",
+    "        # Evaluate loss\n",
+    "        loss = loss_fn(decoded_data, image_batch)\n",
+    "        # Backward pass\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        # Print batch loss\n",
+    "        print('\\t partial train loss (single batch): %f' % (loss.data))\n",
+    "        train_loss.append(loss.detach().cpu().numpy())\n",
+    "\n",
+    "    return np.mean(train_loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "ff2ec5fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Testing function\n",
+    "def test_epoch(encoder, decoder, device, dataloader, loss_fn):\n",
+    "    # Set evaluation mode for encoder and decoder\n",
+    "    encoder.eval()\n",
+    "    decoder.eval()\n",
+    "    with torch.no_grad(): # No need to track the gradients\n",
+    "        # Define the lists to store the outputs for each batch\n",
+    "        conc_out = []\n",
+    "        conc_label = []\n",
+    "        for image_batch, _ in dataloader:\n",
+    "            # Move tensor to the proper device\n",
+    "            image_batch = image_batch.to(device)\n",
+    "            # Encode data\n",
+    "            encoded_data = encoder(image_batch)\n",
+    "            # Decode data\n",
+    "            decoded_data = decoder(encoded_data)\n",
+    "            # Append the network output and the original image to the lists\n",
+    "            conc_out.append(decoded_data.cpu())\n",
+    "            conc_label.append(image_batch.cpu())\n",
+    "        # Create a single tensor with all the values in the lists\n",
+    "        conc_out = torch.cat(conc_out)\n",
+    "        conc_label = torch.cat(conc_label) \n",
+    "        # Evaluate global loss\n",
+    "        val_loss = loss_fn(conc_out, conc_label)\n",
+    "    return val_loss.data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "592ab5f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_ae_outputs(encoder,decoder,n=10):\n",
+    "    plt.figure(figsize=(16,4.5))\n",
+    "    targets = test_dataset.targets.numpy()\n",
+    "    t_idx = {i:np.where(targets==i)[0][0] for i in range(n)}\n",
+    "    for i in range(n):\n",
+    "        ax = plt.subplot(2,n,i+1)\n",
+    "        img = test_dataset[t_idx[i]][0].unsqueeze(0).to(device)\n",
+    "        encoder.eval()\n",
+    "        decoder.eval()\n",
+    "        with torch.no_grad():\n",
+    "            rec_img  = decoder(encoder(img))\n",
+    "        plt.imshow(img.cpu().squeeze().numpy(), cmap='gist_gray')\n",
+    "        ax.get_xaxis().set_visible(False)\n",
+    "        ax.get_yaxis().set_visible(False)  \n",
+    "        if i == n//2:\n",
+    "            ax.set_title('Original images')\n",
+    "        ax = plt.subplot(2, n, i + 1 + n)\n",
+    "        plt.imshow(rec_img.cpu().squeeze().numpy(), cmap='gist_gray')  \n",
+    "        ax.get_xaxis().set_visible(False)\n",
+    "        ax.get_yaxis().set_visible(False)  \n",
+    "        if i == n//2:\n",
+    "            ax.set_title('Reconstructed images')\n",
+    "    plt.show()   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "5f8b646b",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "too many values to unpack (expected 2)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Input \u001b[0;32mIn [34]\u001b[0m, in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m diz_loss \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrain_loss\u001b[39m\u001b[38;5;124m'\u001b[39m:[],\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mval_loss\u001b[39m\u001b[38;5;124m'\u001b[39m:[]}\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m epoch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(num_epochs):\n\u001b[0;32m----> 4\u001b[0m     train_loss \u001b[38;5;241m=\u001b[39m\u001b[43mtrain_epoch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mencoder\u001b[49m\u001b[43m,\u001b[49m\u001b[43mdecoder\u001b[49m\u001b[43m,\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43mtrain_loader\u001b[49m\u001b[43m,\u001b[49m\u001b[43mloss_fn\u001b[49m\u001b[43m,\u001b[49m\u001b[43moptim\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m     val_loss \u001b[38;5;241m=\u001b[39m test_epoch(encoder,decoder,device,test_loader,loss_fn)\n\u001b[1;32m      6\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m EPOCH \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m train loss \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m val loss \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(epoch \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, num_epochs,train_loss,val_loss))\n",
+      "Input \u001b[0;32mIn [33]\u001b[0m, in \u001b[0;36mtrain_epoch\u001b[0;34m(encoder, decoder, device, dataloader, loss_fn, optimizer)\u001b[0m\n\u001b[1;32m      6\u001b[0m train_loss \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m# Iterate the dataloader (we do not need the label values, this is unsupervised learning)\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m image_batch, _ \u001b[38;5;129;01min\u001b[39;00m dataloader: \u001b[38;5;66;03m# with \"_\" we just ignore the labels (the second element of the dataloader tuple)\u001b[39;00m\n\u001b[1;32m      9\u001b[0m     \u001b[38;5;66;03m# Move tensor to the proper device\u001b[39;00m\n\u001b[1;32m     10\u001b[0m     image_batch \u001b[38;5;241m=\u001b[39m image_batch\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m     11\u001b[0m     \u001b[38;5;66;03m# Encode data\u001b[39;00m\n",
+      "\u001b[0;31mValueError\u001b[0m: too many values to unpack (expected 2)"
+     ]
+    }
+   ],
+   "source": [
+    "num_epochs = 30\n",
+    "diz_loss = {'train_loss':[],'val_loss':[]}\n",
+    "for epoch in range(num_epochs):\n",
+    "    train_loss =train_epoch(encoder,decoder,device,train_loader,loss_fn,optim)\n",
+    "    val_loss = test_epoch(encoder,decoder,device,test_loader,loss_fn)\n",
+    "    print('\\n EPOCH {}/{} \\t train loss {} \\t val loss {}'.format(epoch + 1, num_epochs,train_loss,val_loss))\n",
+    "    diz_loss['train_loss'].append(train_loss)\n",
+    "    diz_loss['val_loss'].append(val_loss)\n",
+    "    plot_ae_outputs(encoder,decoder,n=10)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}