3v324v23 commited on
Commit
3debf49
1 Parent(s): 0b63fff
README.md CHANGED
@@ -4,7 +4,6 @@ tags:
4
  - sentence-transformers
5
  - feature-extraction
6
  - sentence-similarity
7
- - transformers
8
  ---
9
 
10
  # {MODEL_NAME}
@@ -34,44 +33,6 @@ print(embeddings)
34
 
35
 
36
 
37
- ## Usage (HuggingFace Transformers)
38
- Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
39
-
40
- ```python
41
- from transformers import AutoTokenizer, AutoModel
42
- import torch
43
-
44
-
45
- #Mean Pooling - Take attention mask into account for correct averaging
46
- def mean_pooling(model_output, attention_mask):
47
- token_embeddings = model_output[0] #First element of model_output contains all token embeddings
48
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
49
- return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
50
-
51
-
52
- # Sentences we want sentence embeddings for
53
- sentences = ['This is an example sentence', 'Each sentence is converted']
54
-
55
- # Load model from HuggingFace Hub
56
- tokenizer = AutoTokenizer.from_pretrained('{MODEL_NAME}')
57
- model = AutoModel.from_pretrained('{MODEL_NAME}')
58
-
59
- # Tokenize sentences
60
- encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
61
-
62
- # Compute token embeddings
63
- with torch.no_grad():
64
- model_output = model(**encoded_input)
65
-
66
- # Perform pooling. In this case, mean pooling.
67
- sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
68
-
69
- print("Sentence embeddings:")
70
- print(sentence_embeddings)
71
- ```
72
-
73
-
74
-
75
  ## Evaluation Results
76
 
77
  <!--- Describe how your model was evaluated -->
@@ -115,8 +76,9 @@ Parameters of the fit()-Method:
115
  ## Full Model Architecture
116
  ```
117
  SentenceTransformer(
118
- (0): Transformer({'max_seq_length': 514, 'do_lower_case': False}) with Transformer model: MPNetModel
119
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
 
120
  )
121
  ```
122
 
 
4
  - sentence-transformers
5
  - feature-extraction
6
  - sentence-similarity
 
7
  ---
8
 
9
  # {MODEL_NAME}
 
33
 
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ## Evaluation Results
37
 
38
  <!--- Describe how your model was evaluated -->
 
76
  ## Full Model Architecture
77
  ```
78
  SentenceTransformer(
79
+ (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel
80
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
81
+ (2): Normalize()
82
  )
83
  ```
84
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/root/.cache/torch/sentence_transformers/microsoft_mpnet-base",
3
  "architectures": [
4
  "MPNetModel"
5
  ],
 
1
  {
2
+ "_name_or_path": "/root/.cache/torch/sentence_transformers/sentence-transformers_all-mpnet-base-v2/",
3
  "architectures": [
4
  "MPNetModel"
5
  ],
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
- "sentence_transformers": "2.1.0",
4
- "transformers": "4.21.1",
5
- "pytorch": "1.7.0+cu110"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.6.1",
5
+ "pytorch": "1.8.1"
6
  }
7
  }
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35de644b5509f5efa3a0365d02ef2f48ff01e1284eb0bb80ff6e0d85680e02f6
3
  size 438019895
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2041457ed41c970a055ed9202a53a80dda4ba3ae8e3d9b31d780baea927ba402
3
  size 438019895
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 514,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 384,
3
  "do_lower_case": false
4
  }
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 514,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 384,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -4,7 +4,8 @@
4
  "do_lower_case": true,
5
  "eos_token": "</s>",
6
  "mask_token": "<mask>",
7
- "name_or_path": "/root/.cache/torch/sentence_transformers/microsoft_mpnet-base",
 
8
  "pad_token": "<pad>",
9
  "sep_token": "</s>",
10
  "special_tokens_map_file": null,
 
4
  "do_lower_case": true,
5
  "eos_token": "</s>",
6
  "mask_token": "<mask>",
7
+ "model_max_length": 512,
8
+ "name_or_path": "/root/.cache/torch/sentence_transformers/sentence-transformers_all-mpnet-base-v2/",
9
  "pad_token": "<pad>",
10
  "sep_token": "</s>",
11
  "special_tokens_map_file": null,