guymorlan commited on
Commit
c33e789
โ€ข
1 Parent(s): d5646eb

now with correct tavbert model

Browse files
Files changed (4) hide show
  1. config.json +2 -2
  2. model.safetensors +2 -2
  3. tokenizer.json +6 -1
  4. tokenizer_config.json +1 -1
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "_name_or_path": "tau/tavbert-ar",
3
  "architectures": [
4
- "RobertaForTokenClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
 
1
  {
2
+ "_name_or_path": "./training_5labs/tavbert-tashkeela-pretraining/checkpoint-21098/",
3
  "architectures": [
4
+ "TaatikModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81dbe43961509b495e3dbfbc084c8900fbe2c5a8b93c5bd169b7d204f462434d
3
- size 349857052
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f7641ddd572433f25139514654fcb1a55ba33d467f5d6185a7d6c393f3a9dc5
3
+ size 349857228
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": {
5
  "strategy": "BatchLongest",
6
  "direction": "Right",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": {
10
  "strategy": "BatchLongest",
11
  "direction": "Right",
tokenizer_config.json CHANGED
@@ -44,7 +44,7 @@
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "mask_token": "[MASK]",
47
- "model_max_length": 1000000000000000019884624838656,
48
  "pad_token": "[PAD]",
49
  "sep_token": "[SEP]",
50
  "tokenizer_class": "PreTrainedTokenizerFast",
 
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "mask_token": "[MASK]",
47
+ "model_max_length": 512,
48
  "pad_token": "[PAD]",
49
  "sep_token": "[SEP]",
50
  "tokenizer_class": "PreTrainedTokenizerFast",