winglian commited on
Commit
a459383
1 Parent(s): 2393801

fix logging

Browse files
Files changed (1) hide show
  1. scripts/finetune.py +11 -12
scripts/finetune.py CHANGED
@@ -38,8 +38,7 @@ from axolotl.prompt_tokenizers import (
38
  )
39
  from axolotl.prompters import AlpacaPrompter, GPTeacherPrompter, ShareGPTPrompter
40
 
41
- logger = logging.getLogger(__name__)
42
- logger.setLevel(os.getenv("LOG_LEVEL", "INFO"))
43
  DEFAULT_DATASET_PREPARED_PATH = "data/last_run"
44
 
45
 
@@ -171,8 +170,8 @@ def check_dataset_labels(dataset, tokenizer):
171
  )
172
  colored_tokens.append(colored_token)
173
 
174
- logger.info(" ".join(colored_tokens))
175
- logger.info("\n\n\n")
176
 
177
 
178
  def do_inference(cfg, model, tokenizer):
@@ -349,9 +348,9 @@ def train(
349
  return
350
 
351
  if cfg.dataset_prepared_path and any(Path(cfg.dataset_prepared_path).glob("*")):
352
- logger.info("Loading prepared dataset from disk...")
353
  dataset = load_from_disk(cfg.dataset_prepared_path)
354
- logger.info("Prepared dataset loaded from disk...")
355
  else:
356
  datasets = []
357
  for d in cfg.datasets:
@@ -391,14 +390,14 @@ def train(
391
  ).train_test_split(test_size=cfg.val_set_size, shuffle=True, seed=42)
392
 
393
  if cfg.local_rank == 0:
394
- logger.info("Saving prepared dataset to disk...")
395
  if cfg.dataset_prepared_path:
396
  dataset.save_to_disk(cfg.dataset_prepared_path)
397
  else:
398
  dataset.save_to_disk(DEFAULT_DATASET_PREPARED_PATH)
399
 
400
  if prepare_ds_only:
401
- logger.info("Finished preparing dataset. Exiting...")
402
  return
403
 
404
  train_dataset = dataset["train"]
@@ -415,11 +414,11 @@ def train(
415
  model.config.use_cache = False
416
 
417
  if torch.__version__ >= "2" and sys.platform != "win32":
418
- logger.info("Compiling torch model")
419
  model = torch.compile(model)
420
 
421
  # go ahead and presave, so we have the adapter config available to inspect
422
- logger.info(f"Pre-saving adapter config to {cfg.output_dir}")
423
  lora_config.save_pretrained(cfg.output_dir)
424
 
425
  # In case we want to stop early with ctrl+c, this is a nice to have to save the pretrained model
@@ -428,11 +427,11 @@ def train(
428
  lambda signal, frame: (model.save_pretrained(cfg.output_dir), exit(0)),
429
  )
430
 
431
- logger.info("Starting trainer...")
432
  trainer.train(resume_from_checkpoint=cfg.resume_from_checkpoint)
433
 
434
  # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
435
- logger.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
436
  model.save_pretrained(cfg.output_dir)
437
 
438
 
 
38
  )
39
  from axolotl.prompters import AlpacaPrompter, GPTeacherPrompter, ShareGPTPrompter
40
 
41
+ logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
 
42
  DEFAULT_DATASET_PREPARED_PATH = "data/last_run"
43
 
44
 
 
170
  )
171
  colored_tokens.append(colored_token)
172
 
173
+ logging.info(" ".join(colored_tokens))
174
+ logging.info("\n\n\n")
175
 
176
 
177
  def do_inference(cfg, model, tokenizer):
 
348
  return
349
 
350
  if cfg.dataset_prepared_path and any(Path(cfg.dataset_prepared_path).glob("*")):
351
+ logging.info("Loading prepared dataset from disk...")
352
  dataset = load_from_disk(cfg.dataset_prepared_path)
353
+ logging.info("Prepared dataset loaded from disk...")
354
  else:
355
  datasets = []
356
  for d in cfg.datasets:
 
390
  ).train_test_split(test_size=cfg.val_set_size, shuffle=True, seed=42)
391
 
392
  if cfg.local_rank == 0:
393
+ logging.info("Saving prepared dataset to disk...")
394
  if cfg.dataset_prepared_path:
395
  dataset.save_to_disk(cfg.dataset_prepared_path)
396
  else:
397
  dataset.save_to_disk(DEFAULT_DATASET_PREPARED_PATH)
398
 
399
  if prepare_ds_only:
400
+ logging.info("Finished preparing dataset. Exiting...")
401
  return
402
 
403
  train_dataset = dataset["train"]
 
414
  model.config.use_cache = False
415
 
416
  if torch.__version__ >= "2" and sys.platform != "win32":
417
+ logging.info("Compiling torch model")
418
  model = torch.compile(model)
419
 
420
  # go ahead and presave, so we have the adapter config available to inspect
421
+ logging.info(f"Pre-saving adapter config to {cfg.output_dir}")
422
  lora_config.save_pretrained(cfg.output_dir)
423
 
424
  # In case we want to stop early with ctrl+c, this is a nice to have to save the pretrained model
 
427
  lambda signal, frame: (model.save_pretrained(cfg.output_dir), exit(0)),
428
  )
429
 
430
+ logging.info("Starting trainer...")
431
  trainer.train(resume_from_checkpoint=cfg.resume_from_checkpoint)
432
 
433
  # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
434
+ logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
435
  model.save_pretrained(cfg.output_dir)
436
 
437