arxyzan commited on
Commit
d57bcf7
1 Parent(s): cab9266

Upload 5 files

Browse files
model_config.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: distilbert_text_classification
2
+ config_type: model
3
+ task: text_classification
4
+ num_labels: 3
5
+ id2label:
6
+ '0': negative
7
+ '1': positive
8
+ '2': neutral
9
+ activation: gelu
10
+ attention_dropout: 0.1
11
+ dim: 768
12
+ dropout: 0.1
13
+ hidden_dim: 3072
14
+ initializer_range: 0.02
15
+ max_position_embeddings: 512
16
+ n_heads: 12
17
+ n_layers: 6
18
+ output_past: true
19
+ pad_token_id: 0
20
+ qa_dropout: 0.1
21
+ tie_weights_: true
22
+ vocab_size: 42000
preprocessor/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor/tokenizer_config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: wordpiece_tokenizer
2
+ config_type: preprocessor
3
+ pretrained_path: hezar-ai/bert-base-fa
4
+ max_length: 512
5
+ truncation_strategy: longest_first
6
+ truncation_direction: right
7
+ stride: 0
8
+ padding_strategy: longest
9
+ padding_direction: right
10
+ pad_to_multiple_of: 0
11
+ pad_token_id: 0
12
+ pad_token: '[PAD]'
13
+ pad_token_type_id: 0
14
+ unk_token: '[UNK]'
15
+ special_tokens:
16
+ - '[UNK]'
17
+ - '[SEP]'
18
+ - '[CLS]'
19
+ - '[PAD]'
20
+ - '[MASK]'
21
+ wordpieces_prefix: '##'
22
+ train_config:
23
+ name: wordpiece_tokenizer
24
+ config_type: preprocessor
25
+ vocab_size: 30000
26
+ min_frequency: 2
27
+ limit_alphabet: 1000
28
+ initial_alphabet: []
29
+ show_progress: true
train/dataset_config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: text_classification
2
+ config_type: dataset
3
+ task: text_classification
4
+ path: hezar-ai/sentiment_digikala_snappfood
5
+ tokenizer_path: hezar-ai/bert-base-fa
6
+ label_field: label
7
+ text_field: text
8
+ id2label:
9
+ '0': negative
10
+ '1': positive
11
+ '2': neutral
12
+ label2id:
13
+ negative: '0'
14
+ positive: '1'
15
+ neutral: '2'
16
+ num_labels: 3
train/train_config.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: bert_text_classification
2
+ config_type: train
3
+ device: cuda
4
+ init_weights_from: hezar-ai/bert-base-fa
5
+ seed: 42
6
+ batch_size: 8
7
+ metrics:
8
+ f1:
9
+ task: multiclass
10
+ num_train_epochs: 10
11
+ checkpoints_dir: checkpoints/