Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on Jan 9, 2024

Commit

d7057cc

unverified ·

1 Parent(s): 768d348

paired kto support (#1069)

Files changed (3) hide show

README.md CHANGED Viewed

@@ -595,6 +595,9 @@ datasets:
       # For `completion` datsets only, uses the provided field instead of `text` column
       field:
 # Saves the desired chat template to the tokenizer_config.json for easier inferencing
 # Currently supports chatml and inst (mistral/mixtral)
 chat_template: chatml

       # For `completion` datsets only, uses the provided field instead of `text` column
       field:
+# use RL training: dpo, ipo, kto_pair
+rl:
 # Saves the desired chat template to the tokenizer_config.json for easier inferencing
 # Currently supports chatml and inst (mistral/mixtral)
 chat_template: chatml

requirements.txt CHANGED Viewed

@@ -40,4 +40,4 @@ s3fs
 gcsfs
 # adlfs
-trl @ git+https://github.com/huggingface/trl.git@main

 gcsfs
 # adlfs
+trl>=0.7.9

src/axolotl/core/trainer_builder.py CHANGED Viewed

@@ -927,6 +927,8 @@ class HFDPOTrainerBuilder(TrainerBuilderBase):
             dpo_trainer_kwargs["loss_type"] = "ipo"
             if self.cfg.dpo_label_smoothing:
                 dpo_trainer_kwargs["label_smoothing"] = self.cfg.dpo_label_smoothing
         dpo_trainer = DPOTrainer(
             self.model,

             dpo_trainer_kwargs["loss_type"] = "ipo"
             if self.cfg.dpo_label_smoothing:
                 dpo_trainer_kwargs["label_smoothing"] = self.cfg.dpo_label_smoothing
+        elif self.cfg.rl == "kto_pair":
+            dpo_trainer_kwargs["loss_type"] = "kto_pair"
         dpo_trainer = DPOTrainer(
             self.model,