Spaces:

asoria
/

auto-notebook-creator

Running

App Files Files Community

asoria HF staff commited on Sep 18, 2024

Commit

e205424

1 Parent(s): e77bff1

Minor details

Browse files

Files changed (2) hide show

app.py +7 -3
notebooks/sft.json +24 -4

app.py CHANGED Viewed

@@ -212,15 +212,19 @@ with gr.Blocks(css=css) as demo:
         examples=[
             [
                 "scikit-learn/iris",
-                "Try this dataset for Exploratory Data Analysis",
             ],
             [
                 "infinite-dataset-hub/GlobaleCuisineRecipes",
-                "Try this dataset for Embeddings generation",
             ],
             [
                 "infinite-dataset-hub/GlobalBestSellersSummaries",
-                "Try this dataset for RAG generation",
             ],
         ],
         inputs=[dataset_name, text_input],

         examples=[
             [
                 "scikit-learn/iris",
+                "Try this dataset for Exploratory Data Analysis (EDA)",
             ],
             [
                 "infinite-dataset-hub/GlobaleCuisineRecipes",
+                "Try this dataset for Text Embeddings",
             ],
             [
                 "infinite-dataset-hub/GlobalBestSellersSummaries",
+                "Try this dataset for Retrieval-augmented generation (RAG)",
+            ],
+            [
+                "asoria/english-quotes-text",
+                "Try this dataset for Supervised fine-tuning (SFT)",
             ],
         ],
         inputs=[dataset_name, text_input],

notebooks/sft.json CHANGED Viewed

@@ -14,7 +14,7 @@
         },
         {
             "cell_type": "code",
-            "source": "# Install and import necessary libraries.\n!pip install trl datasets transformers bitsandbytes"
         },
         {
             "cell_type": "code",
@@ -24,6 +24,10 @@
             "cell_type": "code",
             "source": "# Load the dataset\ndataset = load_dataset('{dataset_name}', name='{first_config}', split='{first_split}')\ndataset"
         },
         {
             "cell_type": "code",
             "source": "# Specify the column name that will be used for training\ndataset_text_field = '{longest_col}'"
@@ -34,15 +38,31 @@
         },
         {
             "cell_type": "code",
-            "source": "model_name = 'facebook/opt-350m'\noutput_model_name = f'{model_name}-{dataset_name}'.replace('/', '-')\n\ntrainer = SFTTrainer(\n  model = model_name,\n  train_dataset=dataset,\n  dataset_text_field=dataset_text_field,\n  max_seq_length=512,\n  args=TrainingArguments(\n      per_device_train_batch_size = 1, #Batch size per GPU for training\n      gradient_accumulation_steps = 4,\n      max_steps = 100, #Total number of training steps.(Overrides epochs)\n      learning_rate = 2e-4,\n      fp16 = True,\n      logging_steps=20,\n      output_dir = output_model_name,\n      optim = 'paged_adamw_8bit' #Optimizer to use\n  )\n)"
         },
         {
             "cell_type": "code",
-            "source": "# Start training\ntrainer.train()"
         },
         {
             "cell_type": "markdown",
-            "source": "## 3. Push model to hub"
         },
         {
             "cell_type": "code",

         },
         {
             "cell_type": "code",
+            "source": "# Install and import necessary libraries\n!pip install trl datasets transformers bitsandbytes"
         },
         {
             "cell_type": "code",
             "cell_type": "code",
             "source": "# Load the dataset\ndataset = load_dataset('{dataset_name}', name='{first_config}', split='{first_split}')\ndataset"
         },
+        {
+            "cell_type": "code",
+            "source": "# Split the dataset: 20% for evaluation, 80% for training\ntrain_test_split = dataset.train_test_split(test_size=0.2)\n\n# Get the training and evaluation datasets\ntrain_dataset = train_test_split['train']\neval_dataset = train_test_split['test']"
+        },
         {
             "cell_type": "code",
             "source": "# Specify the column name that will be used for training\ndataset_text_field = '{longest_col}'"
         },
         {
             "cell_type": "code",
+            "source": "model_name = 'facebook/opt-350m' # Replace with your desired model\noutput_model_name = f'{model_name}-{dataset_name}'.replace('/', '-')"
+        },
+        {
+            "cell_type": "code",
+            "source": "# Initialize training arguments, adjust parameters as needed\ntraining_args = TrainingArguments(\n      per_device_train_batch_size = 1, #Batch size per GPU for training\n      gradient_accumulation_steps = 4,\n      max_steps = 100, #Total number of training steps.(Overrides epochs)\n      learning_rate = 2e-4,\n      fp16 = True,\n      logging_steps=20,\n      output_dir = output_model_name,\n      optim = 'paged_adamw_8bit' #Optimizer to use\n  )"
+        },
+        {
+            "cell_type": "code",
+            "source": "# Initialize SFTTrainer\ntrainer = SFTTrainer(\n  model = model_name,\n  train_dataset=train_dataset,\n  eval_dataset=eval_dataset,\n  dataset_text_field=dataset_text_field,\n  max_seq_length=512,\n  args=training_args\n)"
+        },
+        {
+            "cell_type": "markdown",
+            "source": "## 3. Perform fine-tuning and capture the training process"
+        },
+        {
+            "cell_type": "code",
+            "source": "eval_result_before = trainer.evaluate()\n\n# Start training\ntrainer.train()\n\neval_result_after = trainer.evaluate()"
         },
         {
             "cell_type": "code",
+            "source": "print(f'Before training: {eval_result_before}')\nprint(f'After training: {eval_result_after}')"
         },
         {
             "cell_type": "markdown",
+            "source": "## 4. Push model to hub (Optional)"
         },
         {
             "cell_type": "code",