baptiste-pasquier
commited on
Commit
•
c40e290
1
Parent(s):
ef176de
update model
Browse files- README.md +3 -3
- pytorch_model.bin +1 -1
- train_log.txt +24 -12
- training_args.json +4 -4
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
## TextAttack Model Card
|
2 |
|
3 |
This `cmarkea/distilcamembert-base` model was fine-tuned using TextAttackand the `allocine` dataset loaded using the `datasets` library. The model was fine-tuned
|
4 |
-
for
|
5 |
a maximum sequence length of 512, and an initial learning rate of 5e-05.
|
6 |
Since this was a classification task, the model was trained with a cross-entropy loss function.
|
7 |
-
The best score the model achieved on this task was 0.
|
8 |
-
eval set accuracy, found after
|
9 |
|
10 |
For more information, check out [TextAttack on Github](https://github.com/QData/TextAttack).
|
|
|
1 |
## TextAttack Model Card
|
2 |
|
3 |
This `cmarkea/distilcamembert-base` model was fine-tuned using TextAttackand the `allocine` dataset loaded using the `datasets` library. The model was fine-tuned
|
4 |
+
for 3 epochs with a batch size of 64,
|
5 |
a maximum sequence length of 512, and an initial learning rate of 5e-05.
|
6 |
Since this was a classification task, the model was trained with a cross-entropy loss function.
|
7 |
+
The best score the model achieved on this task was 0.9707, as measured by the
|
8 |
+
eval set accuracy, found after 3 epochs.
|
9 |
|
10 |
For more information, check out [TextAttack on Github](https://github.com/QData/TextAttack).
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 272425205
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cf75746015301662f1a8677d6e45d6c0efba735b927358db3cf1c57975d0875
|
3 |
size 272425205
|
train_log.txt
CHANGED
@@ -1,17 +1,29 @@
|
|
1 |
-
Writing logs to ./outputs/2023-02-
|
2 |
-
Wrote original training args to ./outputs/2023-02-
|
3 |
***** Running training *****
|
4 |
Num examples = 160000
|
5 |
-
Num epochs =
|
6 |
-
Num clean epochs =
|
7 |
-
Instantaneous batch size per device =
|
8 |
-
Total train batch size (w. parallel, distributed & accumulation) =
|
9 |
Gradient accumulation steps = 1
|
10 |
-
Total optimization steps =
|
11 |
==========================================================
|
12 |
Epoch 1
|
13 |
-
Running clean epoch 1/
|
14 |
-
Train accuracy:
|
15 |
-
Eval accuracy: 96.
|
16 |
-
Best score found. Saved model to ./outputs/2023-02-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Writing logs to ./outputs/2023-02-12-23-30-37-265125/train_log.txt.
|
2 |
+
Wrote original training args to ./outputs/2023-02-12-23-30-37-265125/training_args.json.
|
3 |
***** Running training *****
|
4 |
Num examples = 160000
|
5 |
+
Num epochs = 3
|
6 |
+
Num clean epochs = 3
|
7 |
+
Instantaneous batch size per device = 64
|
8 |
+
Total train batch size (w. parallel, distributed & accumulation) = 64
|
9 |
Gradient accumulation steps = 1
|
10 |
+
Total optimization steps = 7500
|
11 |
==========================================================
|
12 |
Epoch 1
|
13 |
+
Running clean epoch 1/3
|
14 |
+
Train accuracy: 94.11%
|
15 |
+
Eval accuracy: 96.77%
|
16 |
+
Best score found. Saved model to ./outputs/2023-02-12-23-30-37-265125/best_model/
|
17 |
+
==========================================================
|
18 |
+
Epoch 2
|
19 |
+
Running clean epoch 2/3
|
20 |
+
Train accuracy: 97.52%
|
21 |
+
Eval accuracy: 96.95%
|
22 |
+
Best score found. Saved model to ./outputs/2023-02-12-23-30-37-265125/best_model/
|
23 |
+
==========================================================
|
24 |
+
Epoch 3
|
25 |
+
Running clean epoch 3/3
|
26 |
+
Train accuracy: 98.70%
|
27 |
+
Eval accuracy: 97.07%
|
28 |
+
Best score found. Saved model to ./outputs/2023-02-12-23-30-37-265125/best_model/
|
29 |
+
Wrote README to ./outputs/2023-02-12-23-30-37-265125/README.md.
|
training_args.json
CHANGED
@@ -9,14 +9,14 @@
|
|
9 |
"dataset_eval_split": "validation",
|
10 |
"filter_train_by_labels": null,
|
11 |
"filter_eval_by_labels": null,
|
12 |
-
"num_epochs":
|
13 |
"num_clean_epochs": 1,
|
14 |
"attack_epoch_interval": 1,
|
15 |
"early_stopping_epochs": null,
|
16 |
"learning_rate": 5e-05,
|
17 |
"num_warmup_steps": 500,
|
18 |
"weight_decay": 0.01,
|
19 |
-
"per_device_train_batch_size":
|
20 |
"per_device_eval_batch_size": 32,
|
21 |
"gradient_accumulation_steps": 1,
|
22 |
"random_seed": 786,
|
@@ -26,11 +26,11 @@
|
|
26 |
"num_train_adv_examples": -1,
|
27 |
"query_budget_train": null,
|
28 |
"attack_num_workers_per_device": 1,
|
29 |
-
"output_dir": "./outputs/2023-02-
|
30 |
"checkpoint_interval_steps": null,
|
31 |
"checkpoint_interval_epochs": null,
|
32 |
"save_last": true,
|
33 |
-
"log_to_tb":
|
34 |
"tb_log_dir": null,
|
35 |
"log_to_wandb": false,
|
36 |
"wandb_project": "textattack",
|
|
|
9 |
"dataset_eval_split": "validation",
|
10 |
"filter_train_by_labels": null,
|
11 |
"filter_eval_by_labels": null,
|
12 |
+
"num_epochs": 3,
|
13 |
"num_clean_epochs": 1,
|
14 |
"attack_epoch_interval": 1,
|
15 |
"early_stopping_epochs": null,
|
16 |
"learning_rate": 5e-05,
|
17 |
"num_warmup_steps": 500,
|
18 |
"weight_decay": 0.01,
|
19 |
+
"per_device_train_batch_size": 64,
|
20 |
"per_device_eval_batch_size": 32,
|
21 |
"gradient_accumulation_steps": 1,
|
22 |
"random_seed": 786,
|
|
|
26 |
"num_train_adv_examples": -1,
|
27 |
"query_budget_train": null,
|
28 |
"attack_num_workers_per_device": 1,
|
29 |
+
"output_dir": "./outputs/2023-02-12-23-30-37-265125",
|
30 |
"checkpoint_interval_steps": null,
|
31 |
"checkpoint_interval_epochs": null,
|
32 |
"save_last": true,
|
33 |
+
"log_to_tb": false,
|
34 |
"tb_log_dir": null,
|
35 |
"log_to_wandb": false,
|
36 |
"wandb_project": "textattack",
|