winglian commited on
Commit
b9b7d4c
2 Parent(s): 9bed281 9bdd30c

Merge pull request #221 from utensil/local_dataset

Browse files

[WIP] Support loading data files from a local directory

Files changed (1) hide show
  1. src/axolotl/utils/data.py +20 -7
src/axolotl/utils/data.py CHANGED
@@ -102,13 +102,26 @@ def load_tokenized_prepared_datasets(
102
  pass
103
 
104
  # prefer local dataset, even if hub exists
105
- if Path(d.path).exists():
106
- ds = load_dataset(
107
- "json",
108
- data_files=d.path,
109
- streaming=False,
110
- split=None,
111
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  elif ds_from_hub:
113
  if d.data_files:
114
  ds = load_dataset(
 
102
  pass
103
 
104
  # prefer local dataset, even if hub exists
105
+ local_path = Path(d.path)
106
+ if local_path.exists():
107
+ if local_path.is_dir():
108
+ ds = load_dataset(
109
+ d.path,
110
+ data_files=d.data_files,
111
+ streaming=False,
112
+ split=None,
113
+ )
114
+ elif local_path.is_file():
115
+ ds = load_dataset(
116
+ "json",
117
+ data_files=d.path,
118
+ streaming=False,
119
+ split=None,
120
+ )
121
+ else:
122
+ raise ValueError(
123
+ "unhandled dataset load: local path exists, but is neither a directory or a file"
124
+ )
125
  elif ds_from_hub:
126
  if d.data_files:
127
  ds = load_dataset(