{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30747,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install -Uqq  transformers  datasets  evaluate bitsandbytes peft accelerate scipy einops trl","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:04:31.424906Z","iopub.execute_input":"2024-08-12T14:04:31.425286Z","iopub.status.idle":"2024-08-12T14:05:14.673029Z","shell.execute_reply.started":"2024-08-12T14:04:31.425257Z","shell.execute_reply":"2024-08-12T14:05:14.671875Z"},"trusted":true},"execution_count":3,"outputs":[{"name":"stdout","text":"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ncuml 24.6.1 requires cupy-cuda11x>=12.0.0, which is not installed.\nkfp 2.5.0 requires google-cloud-storage<3,>=2.2.1, but you have google-cloud-storage 1.44.0 which is incompatible.\nlibpysal 4.9.2 requires packaging>=22, but you have packaging 21.3 which is incompatible.\nlibpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nmomepy 0.7.2 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.\npointpats 2.5.0 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.\nspaghetti 1.7.6 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nspopt 0.6.1 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nydata-profiling 4.6.4 requires numpy<1.26,>=1.16.0, but you have numpy 1.26.4 which is incompatible.\nydata-profiling 4.6.4 requires scipy<1.12,>=1.4.1, but you have scipy 1.14.0 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"import torch\nimport wandb\nimport evaluate\nimport datasets\nimport pandas as pd\nfrom datasets import load_dataset, Dataset, DatasetDict\nfrom pprint import pprint\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nfrom transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\nfrom trl import setup_chat_format\nfrom peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:05.671162Z","iopub.execute_input":"2024-08-12T14:07:05.671582Z","iopub.status.idle":"2024-08-12T14:07:24.804667Z","shell.execute_reply.started":"2024-08-12T14:07:05.671544Z","shell.execute_reply":"2024-08-12T14:07:24.803840Z"},"trusted":true},"execution_count":4,"outputs":[{"name":"stderr","text":"2024-08-12 14:07:14.310516: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-08-12 14:07:14.310628: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-08-12 14:07:14.452921: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"code","source":"\nuser_secrets = UserSecretsClient()\nsecret_value_0 = user_secrets.get_secret(\"mlops_huggingface_token\")\nsecret_value_1 = user_secrets.get_secret(\"wandb_key\")\n\nwandb.login(key = secret_value_1)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:24.806645Z","iopub.execute_input":"2024-08-12T14:07:24.807318Z","iopub.status.idle":"2024-08-12T14:07:27.187735Z","shell.execute_reply.started":"2024-08-12T14:07:24.807288Z","shell.execute_reply":"2024-08-12T14:07:27.186823Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"execution_count":5,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}]},{"cell_type":"code","source":"login(token = secret_value_0)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:27.189188Z","iopub.execute_input":"2024-08-12T14:07:27.189821Z","iopub.status.idle":"2024-08-12T14:07:27.334472Z","shell.execute_reply.started":"2024-08-12T14:07:27.189780Z","shell.execute_reply":"2024-08-12T14:07:27.333536Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: fineGrained).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"}]},{"cell_type":"code","source":"ds = load_dataset(\"nbertagnolli/counsel-chat\", split = 'train')","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:27.336951Z","iopub.execute_input":"2024-08-12T14:07:27.337247Z","iopub.status.idle":"2024-08-12T14:07:29.588187Z","shell.execute_reply.started":"2024-08-12T14:07:27.337222Z","shell.execute_reply":"2024-08-12T14:07:29.587319Z"},"trusted":true},"execution_count":7,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading readme:   0%|          | 0.00/4.92k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ca8f8474aa77408cb6bcc7f122f71b3c"}},"metadata":{}},{"name":"stderr","text":"Repo card metadata block was not found. Setting CardData to empty.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/4.13M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f7b486cf5be8401495ba4ef57d626c1e"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split:   0%|          | 0/2775 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"45e7063b3b1c49e9b9f4d248c1d1d61f"}},"metadata":{}}]},{"cell_type":"code","source":"ds","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.589333Z","iopub.execute_input":"2024-08-12T14:07:29.589624Z","iopub.status.idle":"2024-08-12T14:07:29.595659Z","shell.execute_reply.started":"2024-08-12T14:07:29.589599Z","shell.execute_reply":"2024-08-12T14:07:29.594668Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"Dataset({\n    features: ['questionID', 'questionTitle', 'questionText', 'questionLink', 'topic', 'therapistInfo', 'therapistURL', 'answerText', 'upvotes', 'views'],\n    num_rows: 2775\n})"},"metadata":{}}]},{"cell_type":"code","source":"columns_to_check = ['questionText', 'answerText']\n\n# Define a function to check for NaN values in the specified columns\ndef drop_nans(example):\n    return not any(pd.isna(example[col]) for col in columns_to_check)\n\n# Apply the filter\nds = ds.filter(drop_nans)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.596741Z","iopub.execute_input":"2024-08-12T14:07:29.597033Z","iopub.status.idle":"2024-08-12T14:07:29.712066Z","shell.execute_reply.started":"2024-08-12T14:07:29.597011Z","shell.execute_reply":"2024-08-12T14:07:29.711092Z"},"trusted":true},"execution_count":9,"outputs":[{"output_type":"display_data","data":{"text/plain":"Filter:   0%|          | 0/2775 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fcd9e26734c2473b835291e9307b6d77"}},"metadata":{}}]},{"cell_type":"code","source":"#split dataset into train, valid and test split\ntrain_temp_dataset = ds.train_test_split(test_size = .2)\n# val_test_dataset = train_temp_dataset['test'].train_test_split(.5)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.713351Z","iopub.execute_input":"2024-08-12T14:07:29.713642Z","iopub.status.idle":"2024-08-12T14:07:29.731813Z","shell.execute_reply.started":"2024-08-12T14:07:29.713617Z","shell.execute_reply":"2024-08-12T14:07:29.730835Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"train_temp_dataset","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.733266Z","iopub.execute_input":"2024-08-12T14:07:29.733603Z","iopub.status.idle":"2024-08-12T14:07:29.739738Z","shell.execute_reply.started":"2024-08-12T14:07:29.733575Z","shell.execute_reply":"2024-08-12T14:07:29.738899Z"},"trusted":true},"execution_count":11,"outputs":[{"execution_count":11,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n    train: Dataset({\n        features: ['questionID', 'questionTitle', 'questionText', 'questionLink', 'topic', 'therapistInfo', 'therapistURL', 'answerText', 'upvotes', 'views'],\n        num_rows: 2089\n    })\n    test: Dataset({\n        features: ['questionID', 'questionTitle', 'questionText', 'questionLink', 'topic', 'therapistInfo', 'therapistURL', 'answerText', 'upvotes', 'views'],\n        num_rows: 523\n    })\n})"},"metadata":{}}]},{"cell_type":"code","source":"features = list(train_temp_dataset['train'].features.keys())\n\ncols_to_keep = ['questionText', 'answerText']\ncols_to_remove = [i for i in features if i not in cols_to_keep]\nprint(cols_to_remove)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.740782Z","iopub.execute_input":"2024-08-12T14:07:29.741116Z","iopub.status.idle":"2024-08-12T14:07:29.749041Z","shell.execute_reply.started":"2024-08-12T14:07:29.741085Z","shell.execute_reply":"2024-08-12T14:07:29.748138Z"},"trusted":true},"execution_count":12,"outputs":[{"name":"stdout","text":"['questionID', 'questionTitle', 'questionLink', 'topic', 'therapistInfo', 'therapistURL', 'upvotes', 'views']\n","output_type":"stream"}]},{"cell_type":"code","source":"train_temp_dataset =  train_temp_dataset.remove_columns(cols_to_remove)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.752404Z","iopub.execute_input":"2024-08-12T14:07:29.752667Z","iopub.status.idle":"2024-08-12T14:07:29.766544Z","shell.execute_reply.started":"2024-08-12T14:07:29.752646Z","shell.execute_reply":"2024-08-12T14:07:29.765620Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"df = train_temp_dataset['train'].to_pandas()","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.767911Z","iopub.execute_input":"2024-08-12T14:07:29.769001Z","iopub.status.idle":"2024-08-12T14:07:29.789542Z","shell.execute_reply.started":"2024-08-12T14:07:29.768971Z","shell.execute_reply":"2024-08-12T14:07:29.788775Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"df.isna().sum()","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.790543Z","iopub.execute_input":"2024-08-12T14:07:29.790838Z","iopub.status.idle":"2024-08-12T14:07:29.799394Z","shell.execute_reply.started":"2024-08-12T14:07:29.790807Z","shell.execute_reply":"2024-08-12T14:07:29.798418Z"},"trusted":true},"execution_count":15,"outputs":[{"execution_count":15,"output_type":"execute_result","data":{"text/plain":"questionText    0\nanswerText      0\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"train_temp_dataset","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.800731Z","iopub.execute_input":"2024-08-12T14:07:29.801147Z","iopub.status.idle":"2024-08-12T14:07:29.809412Z","shell.execute_reply.started":"2024-08-12T14:07:29.801117Z","shell.execute_reply":"2024-08-12T14:07:29.808491Z"},"trusted":true},"execution_count":16,"outputs":[{"execution_count":16,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n    train: Dataset({\n        features: ['questionText', 'answerText'],\n        num_rows: 2089\n    })\n    test: Dataset({\n        features: ['questionText', 'answerText'],\n        num_rows: 523\n    })\n})"},"metadata":{}}]},{"cell_type":"markdown","source":"#### Preparing the dataset for conversational finetuning\n\n{\"messages\": [{\"role\": \"system\", \"content\": \"You are...\"}, {\"role\": \"user\", \"content\": \"...\"}, {\"role\": \"assistant\", \"content\": \"...\"}]}\n{\"messages\": [{\"role\": \"system\", \"content\": \"You are...\"}, {\"role\": \"user\", \"content\": \"...\"}, {\"role\": \"assistant\", \"content\": \"...\"}]}\n{\"messages\": [{\"role\": \"system\", \"content\": \"You are...\"}, {\"role\": \"user\", \"content\": \"...\"}, {\"role\": \"assistant\", \"content\": \"...\"}]}\n\n","metadata":{}},{"cell_type":"code","source":"system_messasge = \"\"\"You are a mental health chatbot that follows mental health therapist professional guidelines.\nUsers will talk to you about their mental health issues and you will reply them the same way a professional \ntherapist would.\"\"\"\n\ndef create_conversation_data(sample):\n    \n    return{\n        'messages': [\n            {'role': 'system',\n            'content': system_messasge},\n            {'role': 'user',\n            'content': sample['questionText']},\n            {'role': 'assistant',\n            'content': sample['answerText']}\n        ]\n    }\n\n","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.810476Z","iopub.execute_input":"2024-08-12T14:07:29.810812Z","iopub.status.idle":"2024-08-12T14:07:29.817932Z","shell.execute_reply.started":"2024-08-12T14:07:29.810762Z","shell.execute_reply":"2024-08-12T14:07:29.816977Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"chat_dataset = train_temp_dataset.map(create_conversation_data, batched = False)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:29.819037Z","iopub.execute_input":"2024-08-12T14:07:29.819365Z","iopub.status.idle":"2024-08-12T14:07:30.215456Z","shell.execute_reply.started":"2024-08-12T14:07:29.819335Z","shell.execute_reply":"2024-08-12T14:07:30.214521Z"},"trusted":true},"execution_count":18,"outputs":[{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/2089 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"66412314b66f48eeb66e34df757dec8c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/523 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1dca7a36a0bd4d3281160fecb06a34f1"}},"metadata":{}}]},{"cell_type":"code","source":"chat_dataset","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:30.216850Z","iopub.execute_input":"2024-08-12T14:07:30.217212Z","iopub.status.idle":"2024-08-12T14:07:30.224108Z","shell.execute_reply.started":"2024-08-12T14:07:30.217178Z","shell.execute_reply":"2024-08-12T14:07:30.223035Z"},"trusted":true},"execution_count":19,"outputs":[{"execution_count":19,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n    train: Dataset({\n        features: ['questionText', 'answerText', 'messages'],\n        num_rows: 2089\n    })\n    test: Dataset({\n        features: ['questionText', 'answerText', 'messages'],\n        num_rows: 523\n    })\n})"},"metadata":{}}]},{"cell_type":"code","source":"pprint(chat_dataset[\"train\"][145][\"messages\"])","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:30.225301Z","iopub.execute_input":"2024-08-12T14:07:30.225641Z","iopub.status.idle":"2024-08-12T14:07:30.246726Z","shell.execute_reply.started":"2024-08-12T14:07:30.225608Z","shell.execute_reply":"2024-08-12T14:07:30.245749Z"},"trusted":true},"execution_count":20,"outputs":[{"name":"stdout","text":"[{'content': 'You are a mental health chatbot that follows mental health '\n             'therapist professional guidelines.\\n'\n             'Users will talk to you about their mental health issues and you '\n             'will reply them the same way a professional \\n'\n             'therapist would.',\n  'role': 'system'},\n {'content': \"I am in my 30s with 2 kids and no job and I'm behind on the \"\n             \"bills. I've been trying everything with no results. I feel like \"\n             'everyone else puts their needs in front on mine. I give intil it '\n             \"hurts and I'm afraid that I'm going to fail my babies    I've \"\n             'been struggling for 5 yrs in a relationship with no '\n             'communication and no alone time. How can I get some support?',\n  'role': 'user'},\n {'content': 'I would want to know where do you put your needs? Often times we '\n             'show people how to treat us based on how we treat ourselves. '\n             'First identify how to practice good self care which can only '\n             'happen when you are authentic and real about your needs and '\n             'desires. You cannot pour from an empty cup and must take find '\n             'the time to explore your purpose in life. It may be helpful to '\n             'seek out additional support to work on your intrapersonal '\n             'relationship.',\n  'role': 'assistant'}]\n","output_type":"stream"}]},{"cell_type":"code","source":"# save datasets to disk\nchat_dataset[\"train\"].to_json(\"train_dataset.json\", orient=\"records\")\nchat_dataset[\"test\"].to_json(\"test_dataset.json\", orient=\"records\")","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:30.248046Z","iopub.execute_input":"2024-08-12T14:07:30.248344Z","iopub.status.idle":"2024-08-12T14:07:30.386487Z","shell.execute_reply.started":"2024-08-12T14:07:30.248321Z","shell.execute_reply":"2024-08-12T14:07:30.385562Z"},"trusted":true},"execution_count":21,"outputs":[{"output_type":"display_data","data":{"text/plain":"Creating json from Arrow format:   0%|          | 0/3 [00:00<?, ?ba/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8fe54227d3d2456192feec63e910ddc8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"be9af39824c54d80960a995b7615b968"}},"metadata":{}},{"execution_count":21,"output_type":"execute_result","data":{"text/plain":"1455984"},"metadata":{}}]},{"cell_type":"code","source":"train_dataset = load_dataset('json', data_files='/kaggle/working/train_dataset.json', split='train')","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:30.387599Z","iopub.execute_input":"2024-08-12T14:07:30.387909Z","iopub.status.idle":"2024-08-12T14:07:30.545776Z","shell.execute_reply.started":"2024-08-12T14:07:30.387885Z","shell.execute_reply":"2024-08-12T14:07:30.544873Z"},"trusted":true},"execution_count":22,"outputs":[{"output_type":"display_data","data":{"text/plain":"Generating train split: 0 examples [00:00, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"89dc0a5891614526ac94b5fb29cf0f67"}},"metadata":{}}]},{"cell_type":"code","source":"pprint(train_dataset[0], sort_dicts=False)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:30.547032Z","iopub.execute_input":"2024-08-12T14:07:30.547389Z","iopub.status.idle":"2024-08-12T14:07:30.555549Z","shell.execute_reply.started":"2024-08-12T14:07:30.547355Z","shell.execute_reply":"2024-08-12T14:07:30.554688Z"},"trusted":true},"execution_count":23,"outputs":[{"name":"stdout","text":"{'questionText': \"I've had posttraumatic stress disorder for years without my \"\n                 'parents ever finding out. I want to overcome it, but it’s so '\n                 \"vivid, it’s like it’s happening again. I'm scared and \"\n                 'paranoid. I have depression, which I have been struggling '\n                 'with since a young age.',\n 'answerText': 'Post traumatic stress disorder (PTSD) is a very complex '\n               'issue.\\xa0 It is the root cause of many other issues including '\n               'depression, anxiety and anger to name a few.\\xa0 '\n               'Re-experiencing the event is a telltale symptom of PTSD.\\xa0 '\n               'One of the best things you can do for yourself is to find '\n               'someone who specializes in this area\\xa0to help you work '\n               'through the symptoms associated with this disorder as well as '\n               'directly addressing the event/events that initially caused the '\n               'symptoms.\\xa0 There are various treatment methods available to '\n               'helping professionals that are successful in treating this '\n               'disorder.\\xa0 In the meantime there is a book called \"The Body '\n               'Keeps the Score\", it is\\xa0informative and\\xa0may be helpful '\n               'for you on your journey.\\xa0 It is my hope that you are not '\n               'dealing with this alone.\\xa0\\xa0 Best wishes!',\n 'messages': [{'content': 'You are a mental health chatbot that follows mental '\n                          'health therapist professional guidelines.\\n'\n                          'Users will talk to you about their mental health '\n                          'issues and you will reply them the same way a '\n                          'professional \\n'\n                          'therapist would.',\n               'role': 'system'},\n              {'content': \"I've had posttraumatic stress disorder for years \"\n                          'without my parents ever finding out. I want to '\n                          'overcome it, but it’s so vivid, it’s like it’s '\n                          \"happening again. I'm scared and paranoid. I have \"\n                          'depression, which I have been struggling with since '\n                          'a young age.',\n               'role': 'user'},\n              {'content': 'Post traumatic stress disorder (PTSD) is a very '\n                          'complex issue.\\xa0 It is the root cause of many '\n                          'other issues including depression, anxiety and '\n                          'anger to name a few.\\xa0 Re-experiencing the event '\n                          'is a telltale symptom of PTSD.\\xa0 One of the best '\n                          'things you can do for yourself is to find someone '\n                          'who specializes in this area\\xa0to help you work '\n                          'through the symptoms associated with this disorder '\n                          'as well as directly addressing the event/events '\n                          'that initially caused the symptoms.\\xa0 There are '\n                          'various treatment methods available to helping '\n                          'professionals that are successful in treating this '\n                          'disorder.\\xa0 In the meantime there is a book '\n                          'called \"The Body Keeps the Score\", it is\\xa0'\n                          'informative and\\xa0may be helpful for you on your '\n                          'journey.\\xa0 It is my hope that you are not dealing '\n                          'with this alone.\\xa0\\xa0 Best wishes!',\n               'role': 'assistant'}]}\n","output_type":"stream"}]},{"cell_type":"code","source":"train_dataset","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:30.556679Z","iopub.execute_input":"2024-08-12T14:07:30.556992Z","iopub.status.idle":"2024-08-12T14:07:30.567293Z","shell.execute_reply.started":"2024-08-12T14:07:30.556967Z","shell.execute_reply":"2024-08-12T14:07:30.566352Z"},"trusted":true},"execution_count":24,"outputs":[{"execution_count":24,"output_type":"execute_result","data":{"text/plain":"Dataset({\n    features: ['questionText', 'answerText', 'messages'],\n    num_rows: 2089\n})"},"metadata":{}}]},{"cell_type":"markdown","source":"#### Modeling","metadata":{}},{"cell_type":"code","source":"model_id = \"microsoft/phi-2\"\n\n# BitsAndBytesConfig int-4 config\nbnb_config = BitsAndBytesConfig(\n    load_in_4bit=True, \n    bnb_4bit_use_double_quant=True, \n    bnb_4bit_quant_type=\"nf4\", bnb_4bit_compute_dtype=torch.bfloat16\n)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:30.890686Z","iopub.execute_input":"2024-08-12T14:07:30.891069Z","iopub.status.idle":"2024-08-12T14:07:30.897661Z","shell.execute_reply.started":"2024-08-12T14:07:30.891039Z","shell.execute_reply":"2024-08-12T14:07:30.896833Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"code","source":"# Load model and tokenizer\nmodel = AutoModelForCausalLM.from_pretrained(\n    model_id,\n    device_map=\"auto\",\n    #attn_implementation=\"flash_attention_2\",\n    torch_dtype=torch.bfloat16,\n    quantization_config=bnb_config\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:31.901016Z","iopub.execute_input":"2024-08-12T14:07:31.901878Z","iopub.status.idle":"2024-08-12T14:08:08.271009Z","shell.execute_reply.started":"2024-08-12T14:07:31.901845Z","shell.execute_reply":"2024-08-12T14:08:08.270057Z"},"trusted":true},"execution_count":26,"outputs":[{"output_type":"display_data","data":{"text/plain":"config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"19e2dc209abe4147b7e33a615690b904"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7a3d65defc2e4630b25acfe29d2a3dfa"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"722b37288c1c4313a51f8a6305cd985a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b7cb09f6c1c546aaa3400a7a7fa7006a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5f058a2dbf6f43a4a3d8d41c20f346ce"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"dbf41ddefd6b481c8856bd6f2fae70ce"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4add9a39afe645c288fb551a724516a6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d0e9ef621fd343d7b9f8c5cb4d79a5be"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"bd9b8cc21c78452c860aad79373f64b9"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"360dc2690f3042cda19b4c8275ed2e2c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"768c3edf4ab248b895c9ae66e39988af"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"676b9a4df0874628a663d9fa27bd7bd8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b89a55e337f74f9ab00b3e1cfce5ed22"}},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# # # set chat template to OAI chatML, remove if you start from a fine-tuned model\nmodel, tokenizer = setup_chat_format(model, tokenizer)","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:08.935622Z","iopub.execute_input":"2024-08-08T14:53:08.936297Z","iopub.status.idle":"2024-08-08T14:53:08.976635Z","shell.execute_reply.started":"2024-08-08T14:53:08.936260Z","shell.execute_reply":"2024-08-08T14:53:08.975779Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"code","source":"model = prepare_model_for_kbit_training(model)","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:08.977688Z","iopub.execute_input":"2024-08-08T14:53:08.978008Z","iopub.status.idle":"2024-08-08T14:53:13.041666Z","shell.execute_reply.started":"2024-08-08T14:53:08.977983Z","shell.execute_reply":"2024-08-08T14:53:13.040881Z"},"trusted":true},"execution_count":26,"outputs":[]},{"cell_type":"code","source":"tokenizer.pad_token = tokenizer.eos_token ","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:13.042814Z","iopub.execute_input":"2024-08-08T14:53:13.043103Z","iopub.status.idle":"2024-08-08T14:53:13.112117Z","shell.execute_reply.started":"2024-08-08T14:53:13.043079Z","shell.execute_reply":"2024-08-08T14:53:13.110993Z"},"trusted":true},"execution_count":27,"outputs":[]},{"cell_type":"code","source":"# LoRA config based on QLoRA paper & Sebastian Raschka experiment\npeft_config = LoraConfig(\n        lora_alpha=128,\n        lora_dropout=0.05,\n        r=256,\n        bias=\"none\",\n         target_modules=[         # Setting names of modules in falcon-7b model that we want to apply LoRA to\n            \"query_key_value\",\n            \"dense\",\n            \"dense_h_to_4h\",\n            \"dense_4h_to_h\",\n        ],\n        task_type=\"CAUSAL_LM\",\n)","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:13.113642Z","iopub.execute_input":"2024-08-08T14:53:13.114354Z","iopub.status.idle":"2024-08-08T14:53:13.183924Z","shell.execute_reply.started":"2024-08-08T14:53:13.114325Z","shell.execute_reply":"2024-08-08T14:53:13.182813Z"},"trusted":true},"execution_count":28,"outputs":[]},{"cell_type":"code","source":"peft_model = get_peft_model(model, peft_config)","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:13.185198Z","iopub.execute_input":"2024-08-08T14:53:13.185476Z","iopub.status.idle":"2024-08-08T14:53:13.891050Z","shell.execute_reply.started":"2024-08-08T14:53:13.185454Z","shell.execute_reply":"2024-08-08T14:53:13.890063Z"},"trusted":true},"execution_count":29,"outputs":[]},{"cell_type":"code","source":"from transformers import TrainingArguments\n \nargs = TrainingArguments(\n    output_dir=f'finetuned-{model_id}-for-mental-health-3', # directory to save and repository id\n    num_train_epochs=1,                     # number of training epochs\n    per_device_train_batch_size=3,          # batch size per device during training\n    gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass\n    gradient_checkpointing=True,            # use gradient checkpointing to save memory\n    optim=\"adamw_torch_fused\",              # use fused adamw optimizer\n    logging_steps=10,                       # log every 10 steps\n    save_strategy=\"epoch\",                  # save checkpoint every epoch\n    learning_rate=2e-4,                     # learning rate, based on QLoRA paper\n    fp16=True,                              # use tf32 precision\n    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper\n    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper\n    lr_scheduler_type=\"constant\",           # use constant learning rate scheduler\n    push_to_hub=True,                       # push model to hub\n    report_to=\"wandb\",                # report metrics to tensorboard\n)","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:13.892315Z","iopub.execute_input":"2024-08-08T14:53:13.892669Z","iopub.status.idle":"2024-08-08T14:53:13.925328Z","shell.execute_reply.started":"2024-08-08T14:53:13.892638Z","shell.execute_reply":"2024-08-08T14:53:13.924280Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"from trl import SFTTrainer\n# max sequence length for model and packing of the dataset\n \ntrainer = SFTTrainer(\n    model=model,\n    args=args,\n    train_dataset=train_dataset,\n    peft_config=peft_config,\n    tokenizer=tokenizer,\n)","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:13.926621Z","iopub.execute_input":"2024-08-08T14:53:13.927007Z","iopub.status.idle":"2024-08-08T14:53:16.875456Z","shell.execute_reply.started":"2024-08-08T14:53:13.926974Z","shell.execute_reply":"2024-08-08T14:53:16.874675Z"},"trusted":true},"execution_count":31,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:289: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024\n  warnings.warn(\n/opt/conda/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:366: UserWarning: You passed a `dataset_kwargs` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n  warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/2089 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"354ced84f44f46bf9e52c89ba53c4d95"}},"metadata":{}}]},{"cell_type":"code","source":"peft_model.config.use_cache = False\ntrainer.train()","metadata":{"execution":{"iopub.status.busy":"2024-08-08T14:53:16.876585Z","iopub.execute_input":"2024-08-08T14:53:16.876949Z"},"trusted":true},"execution_count":null,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mabisoye\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"wandb version 0.17.6 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.17.4"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20240808_145317-mk243f2l</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/abisoye/huggingface/runs/mk243f2l' target=\"_blank\">finetuned-microsoft/phi-2-for-mental-health-3</a></strong> to <a href='https://wandb.ai/abisoye/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/abisoye/huggingface' target=\"_blank\">https://wandb.ai/abisoye/huggingface</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/abisoye/huggingface/runs/mk243f2l' target=\"_blank\">https://wandb.ai/abisoye/huggingface/runs/mk243f2l</a>"},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n  warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n    <div>\n      \n      <progress value='10' max='348' style='width:300px; height:20px; vertical-align: middle;'></progress>\n      [ 10/348 01:09 < 48:38, 0.12 it/s, Epoch 0.03/1]\n    </div>\n    <table border=\"1\" class=\"dataframe\">\n  <thead>\n <tr style=\"text-align: left;\">\n      <th>Step</th>\n      <th>Training Loss</th>\n    </tr>\n  </thead>\n  <tbody>\n  </tbody>\n</table><p>"},"metadata":{}}]},{"cell_type":"code","source":"trainer.push_to_hub()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# free the memory again\ndel model\ndel trainer\ndel peft_model\ntorch.cuda.empty_cache()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":" \n#### COMMENT IN TO MERGE PEFT AND BASE MODEL ####\nfrom peft import AutoPeftModelForCausalLM\n \n# Load PEFT model on CPU\nmodel = AutoPeftModelForCausalLM.from_pretrained(\n    args.output_dir,\n    torch_dtype=torch.float16,\n    low_cpu_mem_usage=True,\n)\n# Merge LoRA and base model and save\nmerged_model = model.merge_and_unload()\nmerged_model.save_pretrained(args.output_dir,safe_serialization=True, max_shard_size=\"2GB\")","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"merged_model.push_to_hub(f'peft-finetuned-{model_id}-for-mental-health-3')","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}