Spaces:

zetavg
/

LLaMA-LoRA-Tuner-UI-Demo

Runtime error

zetavg commited on Apr 17, 2023

Commit

c5290ad

•

1 Parent(s): 3889cb7

update fine-tune resuming related stuff

Files changed (2) hide show

llama_lora/ui/finetune_ui.py CHANGED Viewed

@@ -316,6 +316,13 @@ def do_train(
             resume_from_checkpoint = os.path.join(Global.data_dir, "lora_models", continue_from_model)
             if continue_from_checkpoint:
                 resume_from_checkpoint = os.path.join(resume_from_checkpoint, continue_from_checkpoint)
         output_dir = os.path.join(Global.data_dir, "lora_models", model_name)
         if os.path.exists(output_dir):

             resume_from_checkpoint = os.path.join(Global.data_dir, "lora_models", continue_from_model)
             if continue_from_checkpoint:
                 resume_from_checkpoint = os.path.join(resume_from_checkpoint, continue_from_checkpoint)
+                will_be_resume_from_checkpoint_file = os.path.join(resume_from_checkpoint, "pytorch_model.bin")
+                if not os.path.exists(will_be_resume_from_checkpoint_file):
+                    raise ValueError(f"Unable to resume from checkpoint {continue_from_model}/{continue_from_checkpoint}. Resuming is only possible from checkpoints stored locally in the data directory. Please ensure that the file '{will_be_resume_from_checkpoint_file}' exists.")
+            else:
+                will_be_resume_from_checkpoint_file = os.path.join(resume_from_checkpoint, "adapter_model.bin")
+                if not os.path.exists(will_be_resume_from_checkpoint_file):
+                    raise ValueError(f"Unable to continue from model {continue_from_model}. Continuation is only possible from models stored locally in the data directory. Please ensure that the file '{will_be_resume_from_checkpoint_file}' exists.")
         output_dir = os.path.join(Global.data_dir, "lora_models", model_name)
         if os.path.exists(output_dir):

lora_models/unhelpful-ai-v01/finetune_params.json CHANGED Viewed

@@ -1,8 +1,9 @@
 {
-  "num_train_epochs": 16,
   "learning_rate": 0.0003,
   "cutoff_len": 512,
-  "lora_r": 12,
   "lora_alpha": 32,
   "lora_dropout": 0.05,
   "lora_target_modules": [
@@ -11,9 +12,5 @@
     "k_proj",
     "o_proj"
   ],
-  "train_on_inputs": false,
-  "group_by_length": false,
-  "save_steps": 500,
-  "save_total_limit": 5,
-  "logging_steps": 10
 }

 {
+  "num_train_epochs": 8,
   "learning_rate": 0.0003,
   "cutoff_len": 512,
+  "val_set_size": 0,
+  "lora_r": 16,
   "lora_alpha": 32,
   "lora_dropout": 0.05,
   "lora_target_modules": [
     "k_proj",
     "o_proj"
   ],
+  "train_on_inputs": false
 }