winglian commited on
Commit
607a4d3
1 Parent(s): 99383f1

make sure to use train split if loading from hf

Browse files
Files changed (1) hide show
  1. src/axolotl/utils/data.py +2 -0
src/axolotl/utils/data.py CHANGED
@@ -58,6 +58,7 @@ def load_tokenized_prepared_datasets(tokenizer, cfg, default_dataset_prepared_pa
58
  try:
59
  if cfg.push_dataset_to_hub:
60
  dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
 
61
  except:
62
  pass
63
 
@@ -232,6 +233,7 @@ def load_prepare_datasets(tokenizer: PreTrainedTokenizerBase, cfg, default_datas
232
  f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
233
  )
234
  dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
 
235
  except:
236
  pass
237
 
 
58
  try:
59
  if cfg.push_dataset_to_hub:
60
  dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
61
+ dataset = dataset["train"]
62
  except:
63
  pass
64
 
 
233
  f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
234
  )
235
  dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
236
+ dataset = dataset["train"]
237
  except:
238
  pass
239