{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30747,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install -Uqq transformers datasets evaluate bitsandbytes peft accelerate scipy einops trl","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:04:31.424906Z","iopub.execute_input":"2024-08-12T14:04:31.425286Z","iopub.status.idle":"2024-08-12T14:05:14.673029Z","shell.execute_reply.started":"2024-08-12T14:04:31.425257Z","shell.execute_reply":"2024-08-12T14:05:14.671875Z"},"trusted":true},"execution_count":3,"outputs":[{"name":"stdout","text":"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ncuml 24.6.1 requires cupy-cuda11x>=12.0.0, which is not installed.\nkfp 2.5.0 requires google-cloud-storage<3,>=2.2.1, but you have google-cloud-storage 1.44.0 which is incompatible.\nlibpysal 4.9.2 requires packaging>=22, but you have packaging 21.3 which is incompatible.\nlibpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nmomepy 0.7.2 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.\npointpats 2.5.0 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.\nspaghetti 1.7.6 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nspopt 0.6.1 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nydata-profiling 4.6.4 requires numpy<1.26,>=1.16.0, but you have numpy 1.26.4 which is incompatible.\nydata-profiling 4.6.4 requires scipy<1.12,>=1.4.1, but you have scipy 1.14.0 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"import torch\nimport wandb\nimport evaluate\nimport datasets\nimport pandas as pd\nfrom datasets import load_dataset, Dataset, DatasetDict\nfrom pprint import pprint\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nfrom transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\nfrom trl import setup_chat_format\nfrom peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:05.671162Z","iopub.execute_input":"2024-08-12T14:07:05.671582Z","iopub.status.idle":"2024-08-12T14:07:24.804667Z","shell.execute_reply.started":"2024-08-12T14:07:05.671544Z","shell.execute_reply":"2024-08-12T14:07:24.803840Z"},"trusted":true},"execution_count":4,"outputs":[{"name":"stderr","text":"2024-08-12 14:07:14.310516: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-08-12 14:07:14.310628: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-08-12 14:07:14.452921: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"code","source":"\nuser_secrets = UserSecretsClient()\nsecret_value_0 = user_secrets.get_secret(\"mlops_huggingface_token\")\nsecret_value_1 = user_secrets.get_secret(\"wandb_key\")\n\nwandb.login(key = secret_value_1)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:24.806645Z","iopub.execute_input":"2024-08-12T14:07:24.807318Z","iopub.status.idle":"2024-08-12T14:07:27.187735Z","shell.execute_reply.started":"2024-08-12T14:07:24.807288Z","shell.execute_reply":"2024-08-12T14:07:27.186823Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"execution_count":5,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}]},{"cell_type":"code","source":"login(token = secret_value_0)","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:27.189188Z","iopub.execute_input":"2024-08-12T14:07:27.189821Z","iopub.status.idle":"2024-08-12T14:07:27.334472Z","shell.execute_reply.started":"2024-08-12T14:07:27.189780Z","shell.execute_reply":"2024-08-12T14:07:27.333536Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: fineGrained).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"}]},{"cell_type":"code","source":"ds = load_dataset(\"nbertagnolli/counsel-chat\", split = 'train')","metadata":{"execution":{"iopub.status.busy":"2024-08-12T14:07:27.336951Z","iopub.execute_input":"2024-08-12T14:07:27.337247Z","iopub.status.idle":"2024-08-12T14:07:29.588187Z","shell.execute_reply.started":"2024-08-12T14:07:27.337222Z","shell.execute_reply":"2024-08-12T14:07:29.587319Z"},"trusted":true},"execution_count":7,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading readme: 0%| | 0.00/4.92k [00:00","text/html":"wandb version 0.17.6 is available! To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"Tracking run with wandb version 0.17.4"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"Run data is saved locally in /kaggle/working/wandb/run-20240808_145317-mk243f2l"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"Syncing run finetuned-microsoft/phi-2-for-mental-health-3 to Weights & Biases (docs)
"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":" View project at https://wandb.ai/abisoye/huggingface"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":" View run at https://wandb.ai/abisoye/huggingface/runs/mk243f2l"},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n
\n \n \n [ 10/348 01:09 < 48:38, 0.12 it/s, Epoch 0.03/1]\n
\n \n \n \n \n \n \n \n \n \n
StepTraining Loss

"},"metadata":{}}]},{"cell_type":"code","source":"trainer.push_to_hub()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# free the memory again\ndel model\ndel trainer\ndel peft_model\ntorch.cuda.empty_cache()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":" \n#### COMMENT IN TO MERGE PEFT AND BASE MODEL ####\nfrom peft import AutoPeftModelForCausalLM\n \n# Load PEFT model on CPU\nmodel = AutoPeftModelForCausalLM.from_pretrained(\n args.output_dir,\n torch_dtype=torch.float16,\n low_cpu_mem_usage=True,\n)\n# Merge LoRA and base model and save\nmerged_model = model.merge_and_unload()\nmerged_model.save_pretrained(args.output_dir,safe_serialization=True, max_shard_size=\"2GB\")","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"merged_model.push_to_hub(f'peft-finetuned-{model_id}-for-mental-health-3')","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}