sergioburdisso commited on
Commit
3eaee80
1 Parent(s): 7861562

Push model to huggingface

Browse files
1_Pooling/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "word_embedding_dimension": 384,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 768,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
README.md CHANGED
@@ -11,13 +11,14 @@ datasets:
11
  - Salesforce/dialogstudio
12
  pipeline_tag: sentence-similarity
13
  base_model:
14
- - google-bert/bert-base-uncased
15
  ---
16
 
17
 
18
- # Dialog2Flow joint target (BERT-base)
19
 
20
- This is the original **D2F$_{joint}$** model introduced in the paper ["Dialog2Flow: Pre-training Soft-Contrastive Action-Driven Sentence Embeddings for Automatic Dialog Flow Extraction"](https://publications.idiap.ch/attachments/papers/2024/Burdisso_EMNLP2024_2024.pdf) published in the EMNLP 2024 main conference.
 
21
 
22
  Implementation-wise, this is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or search.
23
 
@@ -37,7 +38,7 @@ Then you can use the model like this:
37
  from sentence_transformers import SentenceTransformer
38
  sentences = ["your phone please", "okay may i have your telephone number please"]
39
 
40
- model = SentenceTransformer('sergioburdisso/dialog2flow-joint-bert-base')
41
  embeddings = model.encode(sentences)
42
  print(embeddings)
43
  ```
@@ -63,8 +64,8 @@ def mean_pooling(model_output, attention_mask):
63
  sentences = ['your phone please', 'okay may i have your telephone number please']
64
 
65
  # Load model from HuggingFace Hub
66
- tokenizer = AutoTokenizer.from_pretrained('sergioburdisso/dialog2flow-joint-bert-base')
67
- model = AutoModel.from_pretrained('sergioburdisso/dialog2flow-joint-bert-base')
68
 
69
  # Tokenize sentences
70
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
@@ -153,4 +154,4 @@ SentenceTransformer(
153
  ## License
154
 
155
  Copyright (c) 2024 [Idiap Research Institute](https://www.idiap.ch/).
156
- MIT License.
 
11
  - Salesforce/dialogstudio
12
  pipeline_tag: sentence-similarity
13
  base_model:
14
+ - aws-ai/dse-bert-base
15
  ---
16
 
17
 
18
+ # Dialog2Flow single target (DSE-base)
19
 
20
+ This a variation of the **D2F$_{single}$** model introduced in the paper ["Dialog2Flow: Pre-training Soft-Contrastive Action-Driven Sentence Embeddings for Automatic Dialog Flow Extraction"](https://publications.idiap.ch/attachments/papers/2024/Burdisso_EMNLP2024_2024.pdf) published in the EMNLP 2024 main conference.
21
+ This version uses DSE-base as the backbone model which yields to an increase in performance as compared to the vanilla version using BERT-base as the backbone (results reported in Appendix C).
22
 
23
  Implementation-wise, this is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or search.
24
 
 
38
  from sentence_transformers import SentenceTransformer
39
  sentences = ["your phone please", "okay may i have your telephone number please"]
40
 
41
+ model = SentenceTransformer('sergioburdisso/dialog2flow-single-dse-base')
42
  embeddings = model.encode(sentences)
43
  print(embeddings)
44
  ```
 
64
  sentences = ['your phone please', 'okay may i have your telephone number please']
65
 
66
  # Load model from HuggingFace Hub
67
+ tokenizer = AutoTokenizer.from_pretrained('sergioburdisso/dialog2flow-single-dse-base')
68
+ model = AutoModel.from_pretrained('sergioburdisso/dialog2flow-single-dse-base')
69
 
70
  # Tokenize sentences
71
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
 
154
  ## License
155
 
156
  Copyright (c) 2024 [Idiap Research Institute](https://www.idiap.ch/).
157
+ MIT License.
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
3
  "architectures": [
4
  "BertModel"
5
  ],
@@ -8,14 +8,14 @@
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
- "hidden_size": 384,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 1536,
14
  "layer_norm_eps": 1e-12,
15
  "max_position_embeddings": 512,
16
  "model_type": "bert",
17
  "num_attention_heads": 12,
18
- "num_hidden_layers": 6,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
 
1
  {
2
+ "_name_or_path": "/idiap/temp/sburdisso/repos/jsalt/keya-dialog/outputs/tod_das+slots/bert-base-uncased/soft-labels/label_multi-qa-mpnet-base-dot-v1_t0.35/msl64_pm-mean/ch-on_t0.05/lr3e-06_bs64_e15/best_model_metric_0",
3
  "architectures": [
4
  "BertModel"
5
  ],
 
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
  "layer_norm_eps": 1e-12,
15
  "max_position_embeddings": 512,
16
  "model_type": "bert",
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
- "sentence_transformers": "2.0.0",
4
- "transformers": "4.6.1",
5
- "pytorch": "1.8.1"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
+ "sentence_transformers": "2.2.2",
4
+ "transformers": "4.30.2",
5
+ "pytorch": "2.0.1"
6
  }
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1377e9af0ca0b016a9f2aa584d6fc71ab3ea6804fae21ef9fb1416e2944057ac
3
- size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133a5d2947ff9797b9bddeef74fcc957f7485fc0d219e59362e8489e9a4c3b76
3
+ size 437951328
modules.json CHANGED
@@ -10,11 +10,5 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
- },
14
- {
15
- "idx": 2,
16
- "name": "2",
17
- "path": "2_Normalize",
18
- "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 256,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 64,
3
  "do_lower_case": false
4
  }
tokenizer.json CHANGED
@@ -2,14 +2,12 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
- "strategy": {
11
- "Fixed": 128
12
- },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
15
  "pad_id": 0,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 64,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
+ "strategy": "BatchLongest",
 
 
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
  "pad_id": 0,
tokenizer_config.json CHANGED
@@ -43,12 +43,10 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "mask_token": "[MASK]",
49
- "max_length": 128,
50
  "model_max_length": 512,
51
- "never_split": null,
52
  "pad_to_multiple_of": null,
53
  "pad_token": "[PAD]",
54
  "pad_token_type_id": 0,
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
+ "max_length": 64,
49
  "model_max_length": 512,
 
50
  "pad_to_multiple_of": null,
51
  "pad_token": "[PAD]",
52
  "pad_token_type_id": 0,