Training in progress, epoch 0

Browse files

Files changed (8) hide show

Logs/events.out.tfevents.1718376136.e29ec45d9208.134.5 +3 -0
Logs/events.out.tfevents.1718376187.e29ec45d9208.134.6 +3 -0
Logs/events.out.tfevents.1718376786.e29ec45d9208.134.7 +3 -0
Logs/events.out.tfevents.1718376875.e29ec45d9208.134.8 +3 -0
Logs/events.out.tfevents.1718376932.e29ec45d9208.134.9 +3 -0
Untitled.ipynb +48 -9
adapter_config.json +2 -2
adapter_model.safetensors +1 -1

Logs/events.out.tfevents.1718376136.e29ec45d9208.134.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5663ff7319c7c244f3ef57d95d105a3050e8073f1e3db354c54ffe1acb723aec
+size 5616

Logs/events.out.tfevents.1718376187.e29ec45d9208.134.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:125592c2f566f59232d858d31742f19fa43baff01404d13f462cb6033d66c658
+size 5616

Logs/events.out.tfevents.1718376786.e29ec45d9208.134.7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e40f75ff9979b27774982c2ed6b8aaa50563d918f9e2649b5ac0fbc0be340b0
+size 5616

Logs/events.out.tfevents.1718376875.e29ec45d9208.134.8 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ef820545bf631355fde58dd966a6b388455911f8ae13aa62e8bd4ac8e6e5605
+size 5616

Logs/events.out.tfevents.1718376932.e29ec45d9208.134.9 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4134096bbfc95b27aec318c6110c8529485dc0c0c3aff3e8ecfbc81a4db0b9c
+size 6094

Untitled.ipynb CHANGED Viewed

@@ -429,7 +429,21 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8f49b17e69404b56ae7925565eead95c",
        "version_major": 2,
        "version_minor": 0
       },
@@ -472,8 +486,8 @@
        "\n",
        "    <div>\n",
        "      \n",
-       "      <progress value='65' max='645' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [ 65/645 14:25 < 2:12:46, 0.07 it/s, Epoch 0.49/5]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -638,10 +652,12 @@
     ").to(device)\n",
     "\n",
     "for param  in ModelToFineTune.vision_tower.parameters():\n",
-    "    \n",
     "\n",
     "def TokenGeneratorFn(DatasetEntries):\n",
-    "    try:\n",
     "        TextArr = [\"answer \"+i[\"question\"] + \"\\n\" + i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
     "        ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
     "        InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
@@ -661,15 +677,38 @@
     "        InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
     "        return InputTokens\n",
     "\n",
-    "    except Exception as err:\n",
-    "        print(\"Error:\",err)\n",
     "\n",
     "\n",
     "#=============================4) Initialize The Fine Tuning --> LoRA Config + Model=============================\n",
     "FineTuningLoraConfig = LoraConfig(\n",
     "    r=8,\n",
-    "    lora_alpha=32,\n",
-    "    lora_dropout=0.05,\n",
     "    bias=\"none\",\n",
     "    task_type=\"CAUSAL_LM\",\n",
     "    target_modules=[\"q_proj\",\"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"]\n",

     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a582824b64b041249949ed78eaade5d4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc5f4463bc564fcda93b0e03f4539c9f",
        "version_major": 2,
        "version_minor": 0
       },
        "\n",
        "    <div>\n",
        "      \n",
+       "      <progress value='98' max='645' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 98/645 22:20 < 2:07:16, 0.07 it/s, Epoch 0.75/5]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
     ").to(device)\n",
     "\n",
     "for param  in ModelToFineTune.vision_tower.parameters():\n",
+    "    param.requires_grad = False\n",
+    "for param  in ModelToFineTune.multi_modal_projector.parameters():\n",
+    "    param.requires_grad = False    \n",
     "\n",
     "def TokenGeneratorFn(DatasetEntries):\n",
+    "   \n",
     "        TextArr = [\"answer \"+i[\"question\"] + \"\\n\" + i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
     "        ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
     "        InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
     "        InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
     "        return InputTokens\n",
     "\n",
     "\n",
     "\n",
+    "\n",
+    "\n",
+    "# def TokenGeneratorFn(DatasetEntries):\n",
+    "   \n",
+    "#         TextArr = [\"answer \"+i[\"question\"]  for i in DatasetEntries]\n",
+    "#         ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
+    "#         labels = [i[\"multiple_choice_answer\"]  for i in DatasetEntries]\n",
+    "        \n",
+    "#         InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,suffix=labels,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
+    "#         # Labels = InputTokens[\"input_ids\"].clone()\n",
+    "\n",
+    "#         # Labels[Labels == ModelProcessor.tokenizer.pad_token_id] = -100\n",
+    "#         # Labels[Labels == TokenToIDFn] = -100\n",
+    "\n",
+    "#         #These above two lines are Equivalent to The commented portion but faster because the work with optimized numpy algorithms\n",
+    "#         # for i in range(len(Labels)):\n",
+    "#         #     if(Labels[i] == ModelProcessor.tokenizer.pad_token_id):\n",
+    "#         #          Labels[i] = -100\n",
+    "#         #     elif(Labels[i] == TokenToIDFn):\n",
+    "#         #         Labels[i] == -100\n",
+    "\n",
+    "#         # InputTokens[\"labels\"] = Labels # This is V.Imp you have to use labels with a small \"l\" because the model expects labels to be written this way and not as Labels\n",
+    "#         InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
+    "#         return InputTokens\n",
+    "\n",
     "#=============================4) Initialize The Fine Tuning --> LoRA Config + Model=============================\n",
     "FineTuningLoraConfig = LoraConfig(\n",
     "    r=8,\n",
+    "    # lora_alpha=32,\n",
+    "    # lora_dropout=0.05,\n",
     "    bias=\"none\",\n",
     "    task_type=\"CAUSAL_LM\",\n",
     "    target_modules=[\"q_proj\",\"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"]\n",

adapter_config.json CHANGED Viewed

@@ -10,8 +10,8 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
-  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f034f6296c891e2f9b7f4caf02dc78746ebc7d2855ca693f0a29142230e40b4
 size 45258384

 version https://git-lfs.github.com/spec/v1
+oid sha256:f33abf76f7f6eb32179854ef98008c59ee1b1fd3e76e421e495bd026ff59d4de
 size 45258384