Spaces:
Runtime error
Runtime error
zetavg
commited on
Commit
•
c5290ad
1
Parent(s):
3889cb7
update fine-tune resuming related stuff
Browse files
llama_lora/ui/finetune_ui.py
CHANGED
@@ -316,6 +316,13 @@ def do_train(
|
|
316 |
resume_from_checkpoint = os.path.join(Global.data_dir, "lora_models", continue_from_model)
|
317 |
if continue_from_checkpoint:
|
318 |
resume_from_checkpoint = os.path.join(resume_from_checkpoint, continue_from_checkpoint)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
|
320 |
output_dir = os.path.join(Global.data_dir, "lora_models", model_name)
|
321 |
if os.path.exists(output_dir):
|
|
|
316 |
resume_from_checkpoint = os.path.join(Global.data_dir, "lora_models", continue_from_model)
|
317 |
if continue_from_checkpoint:
|
318 |
resume_from_checkpoint = os.path.join(resume_from_checkpoint, continue_from_checkpoint)
|
319 |
+
will_be_resume_from_checkpoint_file = os.path.join(resume_from_checkpoint, "pytorch_model.bin")
|
320 |
+
if not os.path.exists(will_be_resume_from_checkpoint_file):
|
321 |
+
raise ValueError(f"Unable to resume from checkpoint {continue_from_model}/{continue_from_checkpoint}. Resuming is only possible from checkpoints stored locally in the data directory. Please ensure that the file '{will_be_resume_from_checkpoint_file}' exists.")
|
322 |
+
else:
|
323 |
+
will_be_resume_from_checkpoint_file = os.path.join(resume_from_checkpoint, "adapter_model.bin")
|
324 |
+
if not os.path.exists(will_be_resume_from_checkpoint_file):
|
325 |
+
raise ValueError(f"Unable to continue from model {continue_from_model}. Continuation is only possible from models stored locally in the data directory. Please ensure that the file '{will_be_resume_from_checkpoint_file}' exists.")
|
326 |
|
327 |
output_dir = os.path.join(Global.data_dir, "lora_models", model_name)
|
328 |
if os.path.exists(output_dir):
|
lora_models/unhelpful-ai-v01/finetune_params.json
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
{
|
2 |
-
"num_train_epochs":
|
3 |
"learning_rate": 0.0003,
|
4 |
"cutoff_len": 512,
|
5 |
-
"
|
|
|
6 |
"lora_alpha": 32,
|
7 |
"lora_dropout": 0.05,
|
8 |
"lora_target_modules": [
|
@@ -11,9 +12,5 @@
|
|
11 |
"k_proj",
|
12 |
"o_proj"
|
13 |
],
|
14 |
-
"train_on_inputs": false
|
15 |
-
"group_by_length": false,
|
16 |
-
"save_steps": 500,
|
17 |
-
"save_total_limit": 5,
|
18 |
-
"logging_steps": 10
|
19 |
}
|
|
|
1 |
{
|
2 |
+
"num_train_epochs": 8,
|
3 |
"learning_rate": 0.0003,
|
4 |
"cutoff_len": 512,
|
5 |
+
"val_set_size": 0,
|
6 |
+
"lora_r": 16,
|
7 |
"lora_alpha": 32,
|
8 |
"lora_dropout": 0.05,
|
9 |
"lora_target_modules": [
|
|
|
12 |
"k_proj",
|
13 |
"o_proj"
|
14 |
],
|
15 |
+
"train_on_inputs": false
|
|
|
|
|
|
|
|
|
16 |
}
|