Upload 5 files

Files changed (5) hide show

model_config.yaml ADDED Viewed

+name: roberta_text_classification
+config_type: model
+task: text_classification
+num_labels: 3
+id2label:
+  0: negative
+  1: positive
+  2: neutral
+attention_probs_dropout_prob: 0.1
+bos_token_id: 0
+eos_token_id: 2
+gradient_checkpointing: false
+hidden_act: gelu
+hidden_dropout_prob: 0.1
+hidden_size: 768
+initializer_range: 0.02
+intermediate_size: 3072
+layer_norm_eps: 1.0e-12
+max_position_embeddings: 514
+num_attention_heads: 12
+num_hidden_layers: 12
+pad_token_id: 1
+position_embedding_type: absolute
+type_vocab_size: 1
+use_cache: true
+vocab_size: 42000

preprocessor/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor/tokenizer_config.yaml ADDED Viewed

+name: bpe_tokenizer
+config_type: preprocessor
+pretrained_path: hezarai/roberta-base-fa
+max_length: 512
+truncation_strategy: longest_first
+truncation_direction: right
+stride: 0
+padding_strategy: longest
+padding_direction: right
+pad_to_multiple_of: 0
+pad_token_id: 0
+pad_token: <pad>
+pad_token_type_id: 0
+unk_token: <unk>
+special_tokens:
+- <s>
+- <pad>
+- </s>
+- <unk>
+- <mask>
+- <|endoftext|>
+- <|startoftext|>
+- <nl>
+- <hs>
+- <sep>
+- <cls>
+continuing_subword_prefix: ''
+end_of_word_suffix: ''
+fuse_unk: false
+train_config:
+  name: bpe_tokenizer
+  config_type: preprocessor
+  vocab_size: 30000
+  min_frequency: 2
+  limit_alphabet: 1000
+  show_progress: true

train/dataset_config.yaml ADDED Viewed

+name: text_classification
+config_type: dataset
+task: text_classification
+path: hezarai/sentiment_digikala_snappfood
+tokenizer_path: hezarai/roberta-base-fa
+label_field: label
+text_field: text
+id2label:
+  0: negative
+  1: positive
+  2: neutral
+label2id:
+  negative: 0
+  positive: 1
+  neutral: 2
+num_labels: 3

train/train_config.yaml ADDED Viewed

+name: roberta_text_classification
+config_type: train
+device: cuda
+init_weights_from: hezarai/roberta-base-fa
+seed: 42
+batch_size: 8
+metrics:
+  f1:
+    task: multiclass
+num_train_epochs: 5
+checkpoints_dir: checkpoints/