Spaces:
Running
Running
Minor details
Browse files- app.py +7 -3
- notebooks/sft.json +24 -4
app.py
CHANGED
@@ -212,15 +212,19 @@ with gr.Blocks(css=css) as demo:
|
|
212 |
examples=[
|
213 |
[
|
214 |
"scikit-learn/iris",
|
215 |
-
"Try this dataset for Exploratory Data Analysis",
|
216 |
],
|
217 |
[
|
218 |
"infinite-dataset-hub/GlobaleCuisineRecipes",
|
219 |
-
"Try this dataset for Embeddings
|
220 |
],
|
221 |
[
|
222 |
"infinite-dataset-hub/GlobalBestSellersSummaries",
|
223 |
-
"Try this dataset for
|
|
|
|
|
|
|
|
|
224 |
],
|
225 |
],
|
226 |
inputs=[dataset_name, text_input],
|
|
|
212 |
examples=[
|
213 |
[
|
214 |
"scikit-learn/iris",
|
215 |
+
"Try this dataset for Exploratory Data Analysis (EDA)",
|
216 |
],
|
217 |
[
|
218 |
"infinite-dataset-hub/GlobaleCuisineRecipes",
|
219 |
+
"Try this dataset for Text Embeddings",
|
220 |
],
|
221 |
[
|
222 |
"infinite-dataset-hub/GlobalBestSellersSummaries",
|
223 |
+
"Try this dataset for Retrieval-augmented generation (RAG)",
|
224 |
+
],
|
225 |
+
[
|
226 |
+
"asoria/english-quotes-text",
|
227 |
+
"Try this dataset for Supervised fine-tuning (SFT)",
|
228 |
],
|
229 |
],
|
230 |
inputs=[dataset_name, text_input],
|
notebooks/sft.json
CHANGED
@@ -14,7 +14,7 @@
|
|
14 |
},
|
15 |
{
|
16 |
"cell_type": "code",
|
17 |
-
"source": "# Install and import necessary libraries
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
@@ -24,6 +24,10 @@
|
|
24 |
"cell_type": "code",
|
25 |
"source": "# Load the dataset\ndataset = load_dataset('{dataset_name}', name='{first_config}', split='{first_split}')\ndataset"
|
26 |
},
|
|
|
|
|
|
|
|
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
"source": "# Specify the column name that will be used for training\ndataset_text_field = '{longest_col}'"
|
@@ -34,15 +38,31 @@
|
|
34 |
},
|
35 |
{
|
36 |
"cell_type": "code",
|
37 |
-
"source": "model_name = 'facebook/opt-350m'\noutput_model_name = f'{model_name}-{dataset_name}'.replace('/', '-')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
-
"source": "
|
42 |
},
|
43 |
{
|
44 |
"cell_type": "markdown",
|
45 |
-
"source": "##
|
46 |
},
|
47 |
{
|
48 |
"cell_type": "code",
|
|
|
14 |
},
|
15 |
{
|
16 |
"cell_type": "code",
|
17 |
+
"source": "# Install and import necessary libraries\n!pip install trl datasets transformers bitsandbytes"
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
|
|
24 |
"cell_type": "code",
|
25 |
"source": "# Load the dataset\ndataset = load_dataset('{dataset_name}', name='{first_config}', split='{first_split}')\ndataset"
|
26 |
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"source": "# Split the dataset: 20% for evaluation, 80% for training\ntrain_test_split = dataset.train_test_split(test_size=0.2)\n\n# Get the training and evaluation datasets\ntrain_dataset = train_test_split['train']\neval_dataset = train_test_split['test']"
|
30 |
+
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
"source": "# Specify the column name that will be used for training\ndataset_text_field = '{longest_col}'"
|
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
+
"source": "model_name = 'facebook/opt-350m' # Replace with your desired model\noutput_model_name = f'{model_name}-{dataset_name}'.replace('/', '-')"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"source": "# Initialize training arguments, adjust parameters as needed\ntraining_args = TrainingArguments(\n per_device_train_batch_size = 1, #Batch size per GPU for training\n gradient_accumulation_steps = 4,\n max_steps = 100, #Total number of training steps.(Overrides epochs)\n learning_rate = 2e-4,\n fp16 = True,\n logging_steps=20,\n output_dir = output_model_name,\n optim = 'paged_adamw_8bit' #Optimizer to use\n )"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"cell_type": "code",
|
49 |
+
"source": "# Initialize SFTTrainer\ntrainer = SFTTrainer(\n model = model_name,\n train_dataset=train_dataset,\n eval_dataset=eval_dataset,\n dataset_text_field=dataset_text_field,\n max_seq_length=512,\n args=training_args\n)"
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"cell_type": "markdown",
|
53 |
+
"source": "## 3. Perform fine-tuning and capture the training process"
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "code",
|
57 |
+
"source": "eval_result_before = trainer.evaluate()\n\n# Start training\ntrainer.train()\n\neval_result_after = trainer.evaluate()"
|
58 |
},
|
59 |
{
|
60 |
"cell_type": "code",
|
61 |
+
"source": "print(f'Before training: {eval_result_before}')\nprint(f'After training: {eval_result_after}')"
|
62 |
},
|
63 |
{
|
64 |
"cell_type": "markdown",
|
65 |
+
"source": "## 4. Push model to hub (Optional)"
|
66 |
},
|
67 |
{
|
68 |
"cell_type": "code",
|