jegormeister commited on
Commit
a134403
1 Parent(s): 7474f96

Update model with new data

Browse files
1_Pooling/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "word_embedding_dimension": 128,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 256,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
README.md CHANGED
@@ -7,9 +7,9 @@ tags:
7
  - transformers
8
  ---
9
 
10
- # {MODEL_NAME}
11
 
12
- This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 128 dimensional dense vector space and can be used for tasks like clustering or semantic search.
13
 
14
  <!--- Describe your model here -->
15
 
@@ -27,7 +27,7 @@ Then you can use the model like this:
27
  from sentence_transformers import SentenceTransformer
28
  sentences = ["This is an example sentence", "Each sentence is converted"]
29
 
30
- model = SentenceTransformer('{MODEL_NAME}')
31
  embeddings = model.encode(sentences)
32
  print(embeddings)
33
  ```
@@ -53,8 +53,8 @@ def mean_pooling(model_output, attention_mask):
53
  sentences = ['This is an example sentence', 'Each sentence is converted']
54
 
55
  # Load model from HuggingFace Hub
56
- tokenizer = AutoTokenizer.from_pretrained('{MODEL_NAME}')
57
- model = AutoModel.from_pretrained('{MODEL_NAME}')
58
 
59
  # Tokenize sentences
60
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
@@ -76,7 +76,7 @@ print(sentence_embeddings)
76
 
77
  <!--- Describe how your model was evaluated -->
78
 
79
- For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
80
 
81
 
82
  ## Training
@@ -84,7 +84,7 @@ The model was trained with the parameters:
84
 
85
  **DataLoader**:
86
 
87
- `torch.utils.data.dataloader.DataLoader` of length 559 with parameters:
88
  ```
89
  {'batch_size': 8, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
90
  ```
@@ -100,17 +100,17 @@ Parameters of the fit()-Method:
100
  ```
101
  {
102
  "callback": null,
103
- "epochs": 3,
104
  "evaluation_steps": 0,
105
- "evaluator": "NoneType",
106
  "max_grad_norm": 1,
107
  "optimizer_class": "<class 'transformers.optimization.AdamW'>",
108
  "optimizer_params": {
109
- "lr": 1.25e-05
110
  },
111
  "scheduler": "WarmupLinear",
112
  "steps_per_epoch": null,
113
- "warmup_steps": 336,
114
  "weight_decay": 0.01
115
  }
116
  ```
@@ -120,7 +120,7 @@ Parameters of the fit()-Method:
120
  ```
121
  SentenceTransformer(
122
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
123
- (1): Pooling({'word_embedding_dimension': 128, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
124
  )
125
  ```
126
 
 
7
  - transformers
8
  ---
9
 
10
+ # bert-base-dutch-cased-snli
11
 
12
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 256 dimensional dense vector space and can be used for tasks like clustering or semantic search.
13
 
14
  <!--- Describe your model here -->
15
 
 
27
  from sentence_transformers import SentenceTransformer
28
  sentences = ["This is an example sentence", "Each sentence is converted"]
29
 
30
+ model = SentenceTransformer('bert-base-dutch-cased-snli')
31
  embeddings = model.encode(sentences)
32
  print(embeddings)
33
  ```
 
53
  sentences = ['This is an example sentence', 'Each sentence is converted']
54
 
55
  # Load model from HuggingFace Hub
56
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-dutch-cased-snli')
57
+ model = AutoModel.from_pretrained('bert-base-dutch-cased-snli')
58
 
59
  # Tokenize sentences
60
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
 
76
 
77
  <!--- Describe how your model was evaluated -->
78
 
79
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=bert-base-dutch-cased-snli)
80
 
81
 
82
  ## Training
 
84
 
85
  **DataLoader**:
86
 
87
+ `torch.utils.data.dataloader.DataLoader` of length 339 with parameters:
88
  ```
89
  {'batch_size': 8, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
90
  ```
 
100
  ```
101
  {
102
  "callback": null,
103
+ "epochs": 1,
104
  "evaluation_steps": 0,
105
+ "evaluator": "utils.CombEvaluator",
106
  "max_grad_norm": 1,
107
  "optimizer_class": "<class 'transformers.optimization.AdamW'>",
108
  "optimizer_params": {
109
+ "lr": 2e-05
110
  },
111
  "scheduler": "WarmupLinear",
112
  "steps_per_epoch": null,
113
+ "warmup_steps": 10000,
114
  "weight_decay": 0.01
115
  }
116
  ```
 
120
  ```
121
  SentenceTransformer(
122
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
123
+ (1): Pooling({'word_embedding_dimension': 256, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
124
  )
125
  ```
126
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "GroNLP/bert-base-dutch-cased",
3
  "architectures": [
4
  "BertModel"
5
  ],
@@ -17,7 +17,8 @@
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 3,
19
  "position_embedding_type": "absolute",
20
- "transformers_version": "4.8.1",
 
21
  "type_vocab_size": 2,
22
  "use_cache": true,
23
  "vocab_size": 30073
 
1
  {
2
+ "_name_or_path": "./bert-base-dutch-cased-snli/",
3
  "architectures": [
4
  "BertModel"
5
  ],
 
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 3,
19
  "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.9.1",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 30073
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.0.0",
4
- "transformers": "4.8.1",
5
- "pytorch": "1.6.0"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.0.0",
4
+ "transformers": "4.9.1",
5
+ "pytorch": "1.9.0+cu102"
6
  }
7
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a58ecac26466378a622f286b45c08dcd9e7ce5bc35b153d3de5208b50db90a6c
3
- size 436634197
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b03cd6620d16ba1ed4f3d7b9ff94c8104284f3daa9b80f024f4419e5bede58
3
+ size 436630961
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "/root/.cache/huggingface/transformers/adb82a117c09b0f8768357de8e836a9e0610730782f82edc49dd0020c48f1d03.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "GroNLP/bert-base-dutch-cased", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "/root/.cache/huggingface/transformers/adb82a117c09b0f8768357de8e836a9e0610730782f82edc49dd0020c48f1d03.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "./bert-base-dutch-cased-snli/", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}