jpfearnworks
commited on
Merge branch 'main' of https://github.com/OpenAccess-AI-Collective/axolotl into qlora-openllama-3b-example
Browse files- .github/workflows/base.yml +1 -1
- docker/Dockerfile-base +3 -0
- scripts/finetune.py +9 -9
- src/axolotl/prompt_tokenizers.py +4 -1
.github/workflows/base.yml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
name: ci-cd
|
2 |
|
3 |
on:
|
4 |
push:
|
|
|
1 |
+
name: ci-cd-base
|
2 |
|
3 |
on:
|
4 |
push:
|
docker/Dockerfile-base
CHANGED
@@ -62,6 +62,7 @@ RUN git clone https://github.com/microsoft/DeepSpeed.git && \
|
|
62 |
FROM base-builder AS bnb-builder
|
63 |
|
64 |
WORKDIR /workspace
|
|
|
65 |
|
66 |
RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
|
67 |
cd bitsandbytes && \
|
@@ -70,6 +71,8 @@ RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
|
|
70 |
|
71 |
FROM base-builder
|
72 |
|
|
|
|
|
73 |
# recompile apex
|
74 |
RUN python3 -m pip uninstall -y apex
|
75 |
RUN git clone https://github.com/NVIDIA/apex
|
|
|
62 |
FROM base-builder AS bnb-builder
|
63 |
|
64 |
WORKDIR /workspace
|
65 |
+
ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB
|
66 |
|
67 |
RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
|
68 |
cd bitsandbytes && \
|
|
|
71 |
|
72 |
FROM base-builder
|
73 |
|
74 |
+
ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB
|
75 |
+
|
76 |
# recompile apex
|
77 |
RUN python3 -m pip uninstall -y apex
|
78 |
RUN git clone https://github.com/NVIDIA/apex
|
scripts/finetune.py
CHANGED
@@ -178,6 +178,15 @@ def train(
|
|
178 |
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
179 |
)
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
if prepare_ds_only:
|
182 |
logging.info("Finished preparing dataset. Exiting...")
|
183 |
return
|
@@ -213,15 +222,6 @@ def train(
|
|
213 |
model.save_pretrained(cfg.output_dir)
|
214 |
return
|
215 |
|
216 |
-
if cfg.debug:
|
217 |
-
logging.info("check_dataset_labels...")
|
218 |
-
check_dataset_labels(
|
219 |
-
train_dataset.select(
|
220 |
-
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
221 |
-
),
|
222 |
-
tokenizer,
|
223 |
-
)
|
224 |
-
|
225 |
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
226 |
|
227 |
model.config.use_cache = False
|
|
|
178 |
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
179 |
)
|
180 |
|
181 |
+
if cfg.debug or "debug" in kwargs:
|
182 |
+
logging.info("check_dataset_labels...")
|
183 |
+
check_dataset_labels(
|
184 |
+
train_dataset.select(
|
185 |
+
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
186 |
+
),
|
187 |
+
tokenizer,
|
188 |
+
)
|
189 |
+
|
190 |
if prepare_ds_only:
|
191 |
logging.info("Finished preparing dataset. Exiting...")
|
192 |
return
|
|
|
222 |
model.save_pretrained(cfg.output_dir)
|
223 |
return
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
226 |
|
227 |
model.config.use_cache = False
|
src/axolotl/prompt_tokenizers.py
CHANGED
@@ -268,6 +268,9 @@ class AlpacaReflectionPTStrategy(ReflectionPromptTokenizingStrategy):
|
|
268 |
|
269 |
|
270 |
class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
|
|
|
|
|
|
271 |
def tokenize_prompt(self, prompt):
|
272 |
result = {
|
273 |
"input_ids": [],
|
@@ -279,7 +282,7 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
|
279 |
assistant_token = self._get_assistant_token()
|
280 |
try:
|
281 |
for i, part in enumerate(
|
282 |
-
self.prompter.build_prompt(prompt
|
283 |
):
|
284 |
if isinstance(part, tuple):
|
285 |
if part[0] == "USER:":
|
|
|
268 |
|
269 |
|
270 |
class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
271 |
+
def get_conversation_thread(self, prompt):
|
272 |
+
return prompt["conversations"]
|
273 |
+
|
274 |
def tokenize_prompt(self, prompt):
|
275 |
result = {
|
276 |
"input_ids": [],
|
|
|
282 |
assistant_token = self._get_assistant_token()
|
283 |
try:
|
284 |
for i, part in enumerate(
|
285 |
+
self.prompter.build_prompt(self.get_conversation_thread(prompt))
|
286 |
):
|
287 |
if isinstance(part, tuple):
|
288 |
if part[0] == "USER:":
|