Trying to finetune ... need help
#24
by
BliepBlop
- opened
Hi I am trying to finetune this model.
I downloaded a dataset from :
https://huggingface.co/datasets/deepmind/code_contests
Now I want to finetune this model using a script I build, now I get this error
from datasets import load_dataset
from trl import SFTTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import TrainingArguments
model_id = "./"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
dataset = load_dataset("parquet", data_files='/Users/mario/Downloads/code_contests/data/*.parquet', split='train')
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))
# Preprocess your dataset
def formatting_func(example):
text = f"### Question: {example['description']}\n ### Answer: {example['solutions'][0]['solution']}"
return text
training_args = TrainingArguments(
output_dir='./results', # output directory
num_train_epochs=3, # total number of training epochs
per_device_train_batch_size=16, # batch size per device during training
per_device_eval_batch_size=64, # batch size for evaluation
warmup_steps=500, # number of warmup steps for learning rate scheduler
weight_decay=0.01, # strength of weight decay
logging_dir='./logs', # directory for storing logs
logging_steps=10,
evaluation_strategy="steps", # evaluation is done at each logging step
save_strategy="steps", # model checkpoints are saved at each logging step
eval_steps=10, # evaluation and checkpoint saving is done every 10 steps
load_best_model_at_end=True, # the best model is loaded at the end of training
metric_for_best_model="loss", # use loss to determine the best model
greater_is_better=False, # lower loss is better
)
trainer = SFTTrainer(
model,
args=training_args,
tokenizer=tokenizer,
train_dataset=dataset,
formatting_func=formatting_func
)
trainer.train()
The error I get:
Loading checkpoint shards: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 2/2 [00:22<00:00, 11.05s/it]
Resolving data files: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 41/41 [00:00<00:00, 33019.67it/s]
Found cached dataset parquet (/Users/mario/.cache/huggingface/datasets/parquet/default-f2feb2edba9ed25e/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Using pad_token, but it is not set yet.
/opt/homebrew/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:165: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024
warnings.warn(
Loading cached processed dataset at /Users/mario/.cache/huggingface/datasets/parquet/default-f2feb2edba9ed25e/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-12ea81fe9ef7fc4c.arrow
/opt/homebrew/lib/python3.11/site-packages/transformers/optimization.py:407: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
warnings.warn(
0%| | 0/3 [00:00<?, ?it/s]You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Traceback (most recent call last):
File "/Users/mario/Downloads/falcon-7b/finetune2.py", line 46, in <module>
trainer.train()
File "/opt/homebrew/lib/python3.11/site-packages/transformers/trainer.py", line 1664, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/transformers/trainer.py", line 1940, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/transformers/trainer.py", line 2735, in training_step
loss = self.compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/transformers/trainer.py", line 2767, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mario/.cache/huggingface/modules/transformers_modules/modelling_RW.py", line 753, in forward
transformer_outputs = self.transformer(
^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mario/.cache/huggingface/modules/transformers_modules/modelling_RW.py", line 574, in forward
batch_size, seq_length = input_ids.shape
^^^^^^^^^^^^^^^^^^^^^^
ValueError: not enough values to unpack (expected 2, got 1)
BliepBlop
changed discussion status to
closed