{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "Q-bj6K7Qv4ft" }, "source": [ "# Fine-Tuning a Generative Pretrained Transformer (`GPT`)\n", "\n", "1. Install required libraries." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SBWCrz5GfBXo", "outputId": "8535819c-4f43-4196-9a5c-045e870c75ba" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m50.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.8/179.8 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m31.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m114.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m83.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hToken will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /root/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "!pip install transformers datasets codecarbon -q" ] }, { "cell_type": "markdown", "metadata": { "id": "y5XnfvSH7w4z" }, "source": [ "2. Load the data from the hub." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 675, "referenced_widgets": [ "d5e04bfbc64b477bb5e15ac348203528", "9df59218bba14560b7d088120f87788f", "5ab487c9cca54b64ac7624f8a777e6f0", "41f3e2728fc84ed5bd9b4b249360d906", "88f585c04c79416ea84199059b806966", "aae8907f283a4e9c9cd774b93e75b7cc", "ab80a02760e4440e9c36d93866a95c07", "f774461134374e5cad4450ec6695e8bd", "5920920624454874bc3fea6803079d59", "9486a14cba0d4a068f4662258c4e9deb", "f7414f81cf9d4acc9cbebe55f5d0443e", "e003395fbe9041f8a730e4329f0ef762", "4a5180f1755b4ab4b09fc69783c68d24", "ed7815efe36c4a59b963030befe1420e", "fb9a513b53be4dc6afe182339d9fb0ec", "25d28c439989494189b45d80377bafee", "7b05113d6d0646a99b9a94eea0d03e8b", "68e97900d1284e7f8a2a9e3e3703bc3d", "5d96ab80b67e44c0bc62026a93dd13b6", "26038d94169e4b2b991d713a6533b83b", "038b0f9499b54449b4cdbf2ee887d0d7", "79b2e22a2bab464884e8e5d4367562c8", "808e88bc3f4e4a819af9651f18d3d635", "1d583213933b461492d1b078496c11c3", "0d997ef6a2e149839b23cd1d5a92e1f9", "0bc9cfb678814b1fb21a2c830a425dcd", "aa3eca49cfdb4fbe9566a9c0acb12201", "f7391b34016f4914acf02fa754cec9a7", "9e2766960e6e48129fe23ed29855e46a", "09c94c45ae874084bd13d158b2e133e6", "0d1e9bbb94d74e719f54794165ecc9c5", "358124c3c0e6446eb01dd470cce09def", "5cc7fca7de824815af98539c9ec2e002", "63b4fd7c8c3a4eaf9aaac134075acfc2", "b95a18bea2664f529a3def0a192fe2a7", "87ff35e4f8a64b8783027d71a0424564", "d9f02d42aae249aaaa605eb40cca9bb1", "a1950867a6b6443396dd80c9071ae565", "113a01e053014cf792efd65ab26d5ebf", "5049fed8b3ba47809f4b23ca9f6eac3a", "2ad8e2b98fb544d1ac15a723231117ff", "385fa8795a124ff780ed119b518a5b87", "b28a5ff7ea754d5c8ef9a821d3172f0d", "4d6bb9e8d55a433481a4c24d0701e067", "5c988c3dc10f4e8eae0f6a512ec92d45", "35bbb2d275234087b333db673b44d335", "15a2af850ff547bd8ac38167622532b8", "24ba2f4066bf4296a0831f3e3c1c9951", "cb87854236d44fa7afbf538eb7f284ed", "36012faebbbf4123b27db6678a2aa7f6", "b901163cf07248ca8a821f8d7893829a", "72054151bb4f47e5bd45ce354a25492f", "6031435804f148748614da693ac8eb49", "dfd541e41e3742989862d6d4410db8e7", "7d50afc9a60641f9b6566cc78fcabde4", "28e0c7f64c7846b3b01807a0bc823fe2", "17babc6aa8c64c05826ca518702d20c5", "e7acfaa28f6b4992a3e0e8709229505f", "d78a794c2fe8418898d328b8508738d4", "3f47f94d203146f4a7b12c59e62b18c7", "5019cd005793458a9d1ddcfd8275d141", "d61b20a8d758417ea9308d46f912696c", "c306a3682766458580a736e365f4eed2", "3d6b938ad89a4eb7b5b659b8a58a67ea", "d1bc4d0a28ab4067a4862f3685f669f7", "990ec8338e204b6b905e02fa99b41d6c", "496d84b8409c4964b3c4a7fd5b9a30c3", "4879ed18e5594fdabab8d9c175d7b35c", "2912a20819a547c7b1234f90eb347c10", "f82df455a7a242488dcf0e02b41e0056", "5829efe6dd4a4b5bbe226cfcc8385d9b", "2d8fe959fd7d4d8a9b1044878540f16b", "a8cf5e9af8db4ac0bf52a759497da977", "8317d5521e544d2e8ad592a6c903e7d7", "e0e34a00222e47ddbc1a17b37e90baa1", "db5ebcf942fd4b20ac0e91a56d7a8db6", "37a668ecbf14481f9e30d6ce6357520b" ] }, "id": "7MbpXGu-v4f1", "outputId": "d745022c-c9d0-4260-d761-ad132f55f3b9" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d5e04bfbc64b477bb5e15ac348203528", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/471 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e003395fbe9041f8a730e4329f0ef762", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "808e88bc3f4e4a819af9651f18d3d635", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/26.7M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "63b4fd7c8c3a4eaf9aaac134075acfc2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/29.5M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5c988c3dc10f4e8eae0f6a512ec92d45", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Extracting data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "28e0c7f64c7846b3b01807a0bc823fe2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating english split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "496d84b8409c4964b3c4a7fd5b9a30c3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating portuguese split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", " | prompt | \n", "completion | \n", "
---|---|---|
0 | \n", "Why can camels survive for long without water? | \n", "Camels can survive for long periods without wa... | \n", "
1 | \n", "Why mobile is bad for human | \n", "Mobile phones as technological devices have be... | \n", "
2 | \n", "What is a polygon? | \n", "A polygon is a two-dimensional geometric figur... | \n", "
3 | \n", "How do I start running? | \n", "Starting a running routine can be a great way ... | \n", "
4 | \n", "Which episodes of season four of Game of Thron... | \n", "Michelle MacLaren directed two episodes in Sea... | \n", "
... | \n", "... | \n", "... | \n", "
41810 | \n", "What can I call you by? | \n", "You can call me Aira. How may I assist you today? | \n", "
41811 | \n", "What's your identification name? | \n", "As an artificial intelligence, I do not have a... | \n", "
41812 | \n", "What are you called? | \n", "Hello! I am called Aira. How can I assist you ... | \n", "
41813 | \n", "How do you introduce yourself? | \n", "Hello! I am Aira, your helpful, respectful, an... | \n", "
41814 | \n", "What should I say when I address you? | \n", "When addressing me, you can simply call me Air... | \n", "
41815 rows × 2 columns
\n", "