now with correct tavbert model

Files changed (4) hide show

config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "_name_or_path": "tau/tavbert-ar",
   "architectures": [
-    "RobertaForTokenClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "bos_token_id": 0,

 {
+  "_name_or_path": "./training_5labs/tavbert-tashkeela-pretraining/checkpoint-21098/",
   "architectures": [
+    "TaatikModel"
   ],
   "attention_probs_dropout_prob": 0.1,
   "bos_token_id": 0,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81dbe43961509b495e3dbfbc084c8900fbe2c5a8b93c5bd169b7d204f462434d
-size 349857052

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f7641ddd572433f25139514654fcb1a55ba33d467f5d6185a7d6c393f3a9dc5
+size 349857228

tokenizer.json CHANGED Viewed

@@ -1,6 +1,11 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": {
     "strategy": "BatchLongest",
     "direction": "Right",

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 512,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": {
     "strategy": "BatchLongest",
     "direction": "Right",

tokenizer_config.json CHANGED Viewed

@@ -44,7 +44,7 @@
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",
-  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "PreTrainedTokenizerFast",

   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",
+  "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "PreTrainedTokenizerFast",