Geohunterr commited on
Commit
fd704d0
·
verified ·
1 Parent(s): 0622c16

Training in progress, epoch 0

Browse files
Logs/events.out.tfevents.1718376136.e29ec45d9208.134.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5663ff7319c7c244f3ef57d95d105a3050e8073f1e3db354c54ffe1acb723aec
3
+ size 5616
Logs/events.out.tfevents.1718376187.e29ec45d9208.134.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125592c2f566f59232d858d31742f19fa43baff01404d13f462cb6033d66c658
3
+ size 5616
Logs/events.out.tfevents.1718376786.e29ec45d9208.134.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e40f75ff9979b27774982c2ed6b8aaa50563d918f9e2649b5ac0fbc0be340b0
3
+ size 5616
Logs/events.out.tfevents.1718376875.e29ec45d9208.134.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef820545bf631355fde58dd966a6b388455911f8ae13aa62e8bd4ac8e6e5605
3
+ size 5616
Logs/events.out.tfevents.1718376932.e29ec45d9208.134.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4134096bbfc95b27aec318c6110c8529485dc0c0c3aff3e8ecfbc81a4db0b9c
3
+ size 6094
Untitled.ipynb CHANGED
@@ -429,7 +429,21 @@
429
  {
430
  "data": {
431
  "application/vnd.jupyter.widget-view+json": {
432
- "model_id": "8f49b17e69404b56ae7925565eead95c",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  "version_major": 2,
434
  "version_minor": 0
435
  },
@@ -472,8 +486,8 @@
472
  "\n",
473
  " <div>\n",
474
  " \n",
475
- " <progress value='65' max='645' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
476
- " [ 65/645 14:25 < 2:12:46, 0.07 it/s, Epoch 0.49/5]\n",
477
  " </div>\n",
478
  " <table border=\"1\" class=\"dataframe\">\n",
479
  " <thead>\n",
@@ -638,10 +652,12 @@
638
  ").to(device)\n",
639
  "\n",
640
  "for param in ModelToFineTune.vision_tower.parameters():\n",
641
- " \n",
 
 
642
  "\n",
643
  "def TokenGeneratorFn(DatasetEntries):\n",
644
- " try:\n",
645
  " TextArr = [\"answer \"+i[\"question\"] + \"\\n\" + i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
646
  " ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
647
  " InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
@@ -661,15 +677,38 @@
661
  " InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
662
  " return InputTokens\n",
663
  "\n",
664
- " except Exception as err:\n",
665
- " print(\"Error:\",err)\n",
666
  "\n",
667
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  "#=============================4) Initialize The Fine Tuning --> LoRA Config + Model=============================\n",
669
  "FineTuningLoraConfig = LoraConfig(\n",
670
  " r=8,\n",
671
- " lora_alpha=32,\n",
672
- " lora_dropout=0.05,\n",
673
  " bias=\"none\",\n",
674
  " task_type=\"CAUSAL_LM\",\n",
675
  " target_modules=[\"q_proj\",\"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"]\n",
 
429
  {
430
  "data": {
431
  "application/vnd.jupyter.widget-view+json": {
432
+ "model_id": "a582824b64b041249949ed78eaade5d4",
433
+ "version_major": 2,
434
+ "version_minor": 0
435
+ },
436
+ "text/plain": [
437
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
438
+ ]
439
+ },
440
+ "metadata": {},
441
+ "output_type": "display_data"
442
+ },
443
+ {
444
+ "data": {
445
+ "application/vnd.jupyter.widget-view+json": {
446
+ "model_id": "dc5f4463bc564fcda93b0e03f4539c9f",
447
  "version_major": 2,
448
  "version_minor": 0
449
  },
 
486
  "\n",
487
  " <div>\n",
488
  " \n",
489
+ " <progress value='98' max='645' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
490
+ " [ 98/645 22:20 < 2:07:16, 0.07 it/s, Epoch 0.75/5]\n",
491
  " </div>\n",
492
  " <table border=\"1\" class=\"dataframe\">\n",
493
  " <thead>\n",
 
652
  ").to(device)\n",
653
  "\n",
654
  "for param in ModelToFineTune.vision_tower.parameters():\n",
655
+ " param.requires_grad = False\n",
656
+ "for param in ModelToFineTune.multi_modal_projector.parameters():\n",
657
+ " param.requires_grad = False \n",
658
  "\n",
659
  "def TokenGeneratorFn(DatasetEntries):\n",
660
+ " \n",
661
  " TextArr = [\"answer \"+i[\"question\"] + \"\\n\" + i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
662
  " ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
663
  " InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
 
677
  " InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
678
  " return InputTokens\n",
679
  "\n",
 
 
680
  "\n",
681
  "\n",
682
+ "\n",
683
+ "\n",
684
+ "# def TokenGeneratorFn(DatasetEntries):\n",
685
+ " \n",
686
+ "# TextArr = [\"answer \"+i[\"question\"] for i in DatasetEntries]\n",
687
+ "# ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
688
+ "# labels = [i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
689
+ " \n",
690
+ "# InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,suffix=labels,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
691
+ "# # Labels = InputTokens[\"input_ids\"].clone()\n",
692
+ "\n",
693
+ "# # Labels[Labels == ModelProcessor.tokenizer.pad_token_id] = -100\n",
694
+ "# # Labels[Labels == TokenToIDFn] = -100\n",
695
+ "\n",
696
+ "# #These above two lines are Equivalent to The commented portion but faster because the work with optimized numpy algorithms\n",
697
+ "# # for i in range(len(Labels)):\n",
698
+ "# # if(Labels[i] == ModelProcessor.tokenizer.pad_token_id):\n",
699
+ "# # Labels[i] = -100\n",
700
+ "# # elif(Labels[i] == TokenToIDFn):\n",
701
+ "# # Labels[i] == -100\n",
702
+ "\n",
703
+ "# # InputTokens[\"labels\"] = Labels # This is V.Imp you have to use labels with a small \"l\" because the model expects labels to be written this way and not as Labels\n",
704
+ "# InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
705
+ "# return InputTokens\n",
706
+ "\n",
707
  "#=============================4) Initialize The Fine Tuning --> LoRA Config + Model=============================\n",
708
  "FineTuningLoraConfig = LoraConfig(\n",
709
  " r=8,\n",
710
+ " # lora_alpha=32,\n",
711
+ " # lora_dropout=0.05,\n",
712
  " bias=\"none\",\n",
713
  " task_type=\"CAUSAL_LM\",\n",
714
  " target_modules=[\"q_proj\",\"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"]\n",
adapter_config.json CHANGED
@@ -10,8 +10,8 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
- "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 8,
14
+ "lora_dropout": 0.0,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f034f6296c891e2f9b7f4caf02dc78746ebc7d2855ca693f0a29142230e40b4
3
  size 45258384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f33abf76f7f6eb32179854ef98008c59ee1b1fd3e76e421e495bd026ff59d4de
3
  size 45258384