{"cells":[{"cell_type":"markdown","metadata":{"id":"Q-bj6K7Qv4ft"},"source":["# Fine-Tuning a Generative Pretrained Transformer (`GPT`)\n","\n","1. Install required libraries."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":16079,"status":"ok","timestamp":1687456413307,"user":{"displayName":"Nicholas Corrêa","userId":"09736120585766268588"},"user_tz":-120},"id":"SBWCrz5GfBXo","outputId":"0d3078fb-2d35-4fb2-b56e-6bed6ea2d895"},"outputs":[{"name":"stdout","output_type":"stream","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting transformers\n"," Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m57.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.0)\n","Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n"," Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m28.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n","Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n"," Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m103.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n"," Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m75.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.4.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.5.0)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.15)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2022.12.7)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n","Installing collected packages: tokenizers, safetensors, huggingface-hub, transformers\n","Successfully installed huggingface-hub-0.15.1 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.30.2\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting codecarbon\n"," Downloading codecarbon-2.2.4-py3-none-any.whl (176 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.0/176.0 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting arrow (from codecarbon)\n"," Downloading arrow-1.2.3-py3-none-any.whl (66 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from codecarbon) (1.5.3)\n","Collecting pynvml (from codecarbon)\n"," Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from codecarbon) (2.27.1)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from codecarbon) (5.9.5)\n","Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from codecarbon) (9.0.0)\n","Collecting fuzzywuzzy (from codecarbon)\n"," Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from codecarbon) (8.1.3)\n","Requirement already satisfied: python-dateutil>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from arrow->codecarbon) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->codecarbon) (2022.7.1)\n","Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas->codecarbon) (1.22.4)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (1.26.15)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (2022.12.7)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (3.4)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7.0->arrow->codecarbon) (1.16.0)\n","Installing collected packages: fuzzywuzzy, pynvml, arrow, codecarbon\n","Successfully installed arrow-1.2.3 codecarbon-2.2.4 fuzzywuzzy-0.18.0 pynvml-11.5.0\n"]}],"source":["!pip install transformers\n","!pip install codecarbon"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":17938,"status":"ok","timestamp":1687474615915,"user":{"displayName":"Nicholas Corrêa","userId":"09736120585766268588"},"user_tz":-120},"id":"kU_MRV-QB7CN","outputId":"517e66a1-5a4f-4a19-b2a8-b62642dcaf1e"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"markdown","metadata":{"id":"y5XnfvSH7w4z"},"source":["2. Load the data from the hub."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"executionInfo":{"elapsed":1483,"status":"ok","timestamp":1687456464404,"user":{"displayName":"Nicholas Corrêa","userId":"09736120585766268588"},"user_tz":-120},"id":"7MbpXGu-v4f1","outputId":"5971618b-9626-4253-c325-609ea639cb3e"},"outputs":[{"data":{"text/html":["\n","
\n"," | prompt | \n","completion | \n","
---|---|---|
0 | \n","Which is a species of fish? Tope or Rope | \n","Tope | \n","
1 | \n","Why can camels survive for long without water? | \n","Camels use the fat in their humps to keep them... | \n","
2 | \n","Alice's parents have three daughters: Amy, Jes... | \n","The name of the third daughter is Alice | \n","
3 | \n","Who gave the UN the land in NY to build their HQ | \n","John D Rockerfeller | \n","
4 | \n","Why mobile is bad for human | \n","We are always engaged one phone which is not g... | \n","
... | \n","... | \n","... | \n","
53129 | \n","How do computers communicate and network with ... | \n","Computers communicate and network with each ot... | \n","
53130 | \n","How are websites different from web applications? | \n","Websites and web applications are similar in t... | \n","
53131 | \n","What is open-source software and its benefits? | \n","Open-source software is software that is made ... | \n","
53132 | \n","What is a cookie and how is it used in web bro... | \n","A cookie is a small piece of data that a websi... | \n","
53133 | \n","What is cloud storage and its advantages for d... | \n","Cloud storage is a service that allows you to ... | \n","
53134 rows × 2 columns
\n","