Training in progress, epoch 0
Browse files- Logs/events.out.tfevents.1718376136.e29ec45d9208.134.5 +3 -0
- Logs/events.out.tfevents.1718376187.e29ec45d9208.134.6 +3 -0
- Logs/events.out.tfevents.1718376786.e29ec45d9208.134.7 +3 -0
- Logs/events.out.tfevents.1718376875.e29ec45d9208.134.8 +3 -0
- Logs/events.out.tfevents.1718376932.e29ec45d9208.134.9 +3 -0
- Untitled.ipynb +48 -9
- adapter_config.json +2 -2
- adapter_model.safetensors +1 -1
Logs/events.out.tfevents.1718376136.e29ec45d9208.134.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5663ff7319c7c244f3ef57d95d105a3050e8073f1e3db354c54ffe1acb723aec
|
3 |
+
size 5616
|
Logs/events.out.tfevents.1718376187.e29ec45d9208.134.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:125592c2f566f59232d858d31742f19fa43baff01404d13f462cb6033d66c658
|
3 |
+
size 5616
|
Logs/events.out.tfevents.1718376786.e29ec45d9208.134.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e40f75ff9979b27774982c2ed6b8aaa50563d918f9e2649b5ac0fbc0be340b0
|
3 |
+
size 5616
|
Logs/events.out.tfevents.1718376875.e29ec45d9208.134.8
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ef820545bf631355fde58dd966a6b388455911f8ae13aa62e8bd4ac8e6e5605
|
3 |
+
size 5616
|
Logs/events.out.tfevents.1718376932.e29ec45d9208.134.9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4134096bbfc95b27aec318c6110c8529485dc0c0c3aff3e8ecfbc81a4db0b9c
|
3 |
+
size 6094
|
Untitled.ipynb
CHANGED
@@ -429,7 +429,21 @@
|
|
429 |
{
|
430 |
"data": {
|
431 |
"application/vnd.jupyter.widget-view+json": {
|
432 |
-
"model_id": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
"version_major": 2,
|
434 |
"version_minor": 0
|
435 |
},
|
@@ -472,8 +486,8 @@
|
|
472 |
"\n",
|
473 |
" <div>\n",
|
474 |
" \n",
|
475 |
-
" <progress value='
|
476 |
-
" [
|
477 |
" </div>\n",
|
478 |
" <table border=\"1\" class=\"dataframe\">\n",
|
479 |
" <thead>\n",
|
@@ -638,10 +652,12 @@
|
|
638 |
").to(device)\n",
|
639 |
"\n",
|
640 |
"for param in ModelToFineTune.vision_tower.parameters():\n",
|
641 |
-
" \n",
|
|
|
|
|
642 |
"\n",
|
643 |
"def TokenGeneratorFn(DatasetEntries):\n",
|
644 |
-
"
|
645 |
" TextArr = [\"answer \"+i[\"question\"] + \"\\n\" + i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
|
646 |
" ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
|
647 |
" InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
|
@@ -661,15 +677,38 @@
|
|
661 |
" InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
|
662 |
" return InputTokens\n",
|
663 |
"\n",
|
664 |
-
" except Exception as err:\n",
|
665 |
-
" print(\"Error:\",err)\n",
|
666 |
"\n",
|
667 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
668 |
"#=============================4) Initialize The Fine Tuning --> LoRA Config + Model=============================\n",
|
669 |
"FineTuningLoraConfig = LoraConfig(\n",
|
670 |
" r=8,\n",
|
671 |
-
" lora_alpha=32,\n",
|
672 |
-
" lora_dropout=0.05,\n",
|
673 |
" bias=\"none\",\n",
|
674 |
" task_type=\"CAUSAL_LM\",\n",
|
675 |
" target_modules=[\"q_proj\",\"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"]\n",
|
|
|
429 |
{
|
430 |
"data": {
|
431 |
"application/vnd.jupyter.widget-view+json": {
|
432 |
+
"model_id": "a582824b64b041249949ed78eaade5d4",
|
433 |
+
"version_major": 2,
|
434 |
+
"version_minor": 0
|
435 |
+
},
|
436 |
+
"text/plain": [
|
437 |
+
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
438 |
+
]
|
439 |
+
},
|
440 |
+
"metadata": {},
|
441 |
+
"output_type": "display_data"
|
442 |
+
},
|
443 |
+
{
|
444 |
+
"data": {
|
445 |
+
"application/vnd.jupyter.widget-view+json": {
|
446 |
+
"model_id": "dc5f4463bc564fcda93b0e03f4539c9f",
|
447 |
"version_major": 2,
|
448 |
"version_minor": 0
|
449 |
},
|
|
|
486 |
"\n",
|
487 |
" <div>\n",
|
488 |
" \n",
|
489 |
+
" <progress value='98' max='645' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
490 |
+
" [ 98/645 22:20 < 2:07:16, 0.07 it/s, Epoch 0.75/5]\n",
|
491 |
" </div>\n",
|
492 |
" <table border=\"1\" class=\"dataframe\">\n",
|
493 |
" <thead>\n",
|
|
|
652 |
").to(device)\n",
|
653 |
"\n",
|
654 |
"for param in ModelToFineTune.vision_tower.parameters():\n",
|
655 |
+
" param.requires_grad = False\n",
|
656 |
+
"for param in ModelToFineTune.multi_modal_projector.parameters():\n",
|
657 |
+
" param.requires_grad = False \n",
|
658 |
"\n",
|
659 |
"def TokenGeneratorFn(DatasetEntries):\n",
|
660 |
+
" \n",
|
661 |
" TextArr = [\"answer \"+i[\"question\"] + \"\\n\" + i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
|
662 |
" ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
|
663 |
" InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
|
|
|
677 |
" InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
|
678 |
" return InputTokens\n",
|
679 |
"\n",
|
|
|
|
|
680 |
"\n",
|
681 |
"\n",
|
682 |
+
"\n",
|
683 |
+
"\n",
|
684 |
+
"# def TokenGeneratorFn(DatasetEntries):\n",
|
685 |
+
" \n",
|
686 |
+
"# TextArr = [\"answer \"+i[\"question\"] for i in DatasetEntries]\n",
|
687 |
+
"# ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
|
688 |
+
"# labels = [i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
|
689 |
+
" \n",
|
690 |
+
"# InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,suffix=labels,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
|
691 |
+
"# # Labels = InputTokens[\"input_ids\"].clone()\n",
|
692 |
+
"\n",
|
693 |
+
"# # Labels[Labels == ModelProcessor.tokenizer.pad_token_id] = -100\n",
|
694 |
+
"# # Labels[Labels == TokenToIDFn] = -100\n",
|
695 |
+
"\n",
|
696 |
+
"# #These above two lines are Equivalent to The commented portion but faster because the work with optimized numpy algorithms\n",
|
697 |
+
"# # for i in range(len(Labels)):\n",
|
698 |
+
"# # if(Labels[i] == ModelProcessor.tokenizer.pad_token_id):\n",
|
699 |
+
"# # Labels[i] = -100\n",
|
700 |
+
"# # elif(Labels[i] == TokenToIDFn):\n",
|
701 |
+
"# # Labels[i] == -100\n",
|
702 |
+
"\n",
|
703 |
+
"# # InputTokens[\"labels\"] = Labels # This is V.Imp you have to use labels with a small \"l\" because the model expects labels to be written this way and not as Labels\n",
|
704 |
+
"# InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
|
705 |
+
"# return InputTokens\n",
|
706 |
+
"\n",
|
707 |
"#=============================4) Initialize The Fine Tuning --> LoRA Config + Model=============================\n",
|
708 |
"FineTuningLoraConfig = LoraConfig(\n",
|
709 |
" r=8,\n",
|
710 |
+
" # lora_alpha=32,\n",
|
711 |
+
" # lora_dropout=0.05,\n",
|
712 |
" bias=\"none\",\n",
|
713 |
" task_type=\"CAUSAL_LM\",\n",
|
714 |
" target_modules=[\"q_proj\",\"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"]\n",
|
adapter_config.json
CHANGED
@@ -10,8 +10,8 @@
|
|
10 |
"layers_pattern": null,
|
11 |
"layers_to_transform": null,
|
12 |
"loftq_config": {},
|
13 |
-
"lora_alpha":
|
14 |
-
"lora_dropout": 0.
|
15 |
"megatron_config": null,
|
16 |
"megatron_core": "megatron.core",
|
17 |
"modules_to_save": null,
|
|
|
10 |
"layers_pattern": null,
|
11 |
"layers_to_transform": null,
|
12 |
"loftq_config": {},
|
13 |
+
"lora_alpha": 8,
|
14 |
+
"lora_dropout": 0.0,
|
15 |
"megatron_config": null,
|
16 |
"megatron_core": "megatron.core",
|
17 |
"modules_to_save": null,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 45258384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f33abf76f7f6eb32179854ef98008c59ee1b1fd3e76e421e495bd026ff59d4de
|
3 |
size 45258384
|