taskswithcode commited on
Commit
0242b2e
1 Parent(s): 0c1f2c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -21
app.py CHANGED
@@ -2,18 +2,91 @@ import time
2
  import streamlit as st
3
  import string
4
  from io import StringIO
 
5
  import json
6
- from transformers import BertTokenizer, BertForMaskedLM
 
 
 
7
 
8
- MAX_INPUT = 1000
 
9
 
10
  model_names = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  { "name":"SGPT-125M",
12
  "model":"Muennighoff/SGPT-125M-weightedmean-nli-bitfit",
13
- "mark":False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "class":"SGPTModel"},
15
-
16
-
17
  { "name":"SGPT-5.8B",
18
  "model": "Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit" ,
19
  "fork_url":"https://github.com/taskswithcode/sgpt",
@@ -27,28 +100,39 @@ model_names = [
27
  "mark":True,
28
  "class":"SGPTModel"},
29
 
30
- { "name":"SGPT-1.3B",
31
- "model": "Muennighoff/SGPT-1.3B-weightedmean-msmarco-specb-bitfit",
32
- "mark":False,
33
- "class":"SGPTModel"},
 
 
 
 
 
 
 
 
34
 
35
- { "name":"sentence-transformers/all-MiniLM-L6-v2",
36
- "model":"sentence-transformers/all-MiniLM-L6-v2",
37
- "fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
38
- "orig_author_url":"https://github.com/UKPLab",
39
- "orig_author":"Ubiquitous Knowledge Processing Lab",
40
  "sota_info": {
41
- "task":"Nearly 4 million downloads from huggingface",
42
- "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
43
  },
44
- "paper_url":"https://arxiv.org/abs/1908.10084",
45
  "mark":True,
46
- "class":"HFModel"},
 
47
 
48
  ]
49
 
50
 
51
 
 
 
52
  example_file_names = {
53
  "Machine learning terms (30+ phrases)": "tests/small_test.txt",
54
  "Customer feedback mixed with noise (50+ sentences)":"tests/larger_test.txt"
@@ -61,15 +145,17 @@ def construct_model_info_for_display():
61
  for node in model_names:
62
  options_arr .append(node["name"])
63
  if (node["mark"] == True):
64
- markdown_str += f"<div style=\"font-size:16px; color: #5f5f5f; text-align: left\">&nbsp;•&nbsp;Model:&nbsp;<a href=\'{node['paper_url']}\' target='_blank'>{node['name']}</a><br/>&nbsp;&nbsp;&nbsp;&nbsp;Code released by:&nbsp;<a href=\'{node['orig_author_url']}\' target='_blank'>{node['orig_author']}</a><br/>&nbsp;&nbsp;&nbsp;&nbsp;Model info:&nbsp;<a href=\'{node['sota_info']['sota_link']}\' target='_blank'>{node['sota_info']['task']}</a><br/>&nbsp;&nbsp;&nbsp;&nbsp;Forked <a href=\'{node['fork_url']}\' target='_blank'>code</a><br/><br/></div>"
65
  markdown_str += "<div style=\"font-size:12px; color: #9f9f9f; text-align: left\"><b>Note:</b><br/>•&nbsp;Uploaded files are loaded into non-persistent memory for the duration of the computation. They are not saved</div>"
66
  limit = "{:,}".format(MAX_INPUT)
67
  markdown_str += f"<div style=\"font-size:12px; color: #9f9f9f; text-align: left\">•&nbsp;User uploaded file has a maximum limit of {limit} sentences.</div>"
68
  return options_arr,markdown_str
69
 
70
 
71
- st.set_page_config(page_title='TWC - Compare state-of-the-art models for Sentence Similarity task', page_icon="logo.jpg", layout='centered', initial_sidebar_state='auto',
72
  menu_items={
 
 
73
  'About': 'This app was created by taskswithcode. http://taskswithcode.com'
74
  })
75
  col,pad = st.columns([85,15])
@@ -153,7 +239,7 @@ def init_session():
153
 
154
  def main():
155
  init_session()
156
- st.markdown("<h4 style='text-align: center;'>Compare state-of-the-art models for Sentence Similarity task</h4>", unsafe_allow_html=True)
157
 
158
 
159
  try:
 
2
  import streamlit as st
3
  import string
4
  from io import StringIO
5
+ import pdb
6
  import json
7
+ from twc_embeddings import HFModel,SimCSEModel,SGPTModel
8
+
9
+
10
+ MAX_INPUT = 10000
11
 
12
+
13
+ from transformers import BertTokenizer, BertForMaskedLM
14
 
15
  model_names = [
16
+
17
+ { "name":"sentence-transformers/all-MiniLM-L6-v2",
18
+ "model":"sentence-transformers/all-MiniLM-L6-v2",
19
+ "fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
20
+ "orig_author_url":"https://github.com/UKPLab",
21
+ "orig_author":"Ubiquitous Knowledge Processing Lab",
22
+ "sota_info": {
23
+ "task":"Over 3.8 million downloads from huggingface",
24
+ "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
25
+ },
26
+ "paper_url":"https://arxiv.org/abs/1908.10084",
27
+ "mark":True,
28
+ "class":"HFModel"},
29
+ { "name":"sentence-transformers/paraphrase-MiniLM-L6-v2",
30
+ "model":"sentence-transformers/paraphrase-MiniLM-L6-v2",
31
+ "fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
32
+ "orig_author_url":"https://github.com/UKPLab",
33
+ "orig_author":"Ubiquitous Knowledge Processing Lab",
34
+ "sota_info": {
35
+ "task":"Over 2.4 million downloads from huggingface",
36
+ "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
37
+ },
38
+ "paper_url":"https://arxiv.org/abs/1908.10084",
39
+ "mark":True,
40
+ "class":"HFModel"},
41
+ { "name":"sentence-transformers/bert-base-nli-mean-tokens",
42
+ "model":"sentence-transformers/bert-base-nli-mean-tokens",
43
+ "fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
44
+ "orig_author_url":"https://github.com/UKPLab",
45
+ "orig_author":"Ubiquitous Knowledge Processing Lab",
46
+ "sota_info": {
47
+ "task":"Over 700,000 downloads from huggingface",
48
+ "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
49
+ },
50
+ "paper_url":"https://arxiv.org/abs/1908.10084",
51
+ "mark":True,
52
+ "class":"HFModel"},
53
+ { "name":"sentence-transformers/all-mpnet-base-v2",
54
+ "model":"sentence-transformers/all-mpnet-base-v2",
55
+ "fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
56
+ "orig_author_url":"https://github.com/UKPLab",
57
+ "orig_author":"Ubiquitous Knowledge Processing Lab",
58
+ "sota_info": {
59
+ "task":"Over 500,000 downloads from huggingface",
60
+ "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
61
+ },
62
+ "paper_url":"https://arxiv.org/abs/1908.10084",
63
+ "mark":True,
64
+ "class":"HFModel"},
65
+
66
  { "name":"SGPT-125M",
67
  "model":"Muennighoff/SGPT-125M-weightedmean-nli-bitfit",
68
+ "fork_url":"https://github.com/taskswithcode/sgpt",
69
+ "orig_author_url":"https://github.com/Muennighoff",
70
+ "orig_author":"Niklas Muennighoff",
71
+ "sota_info": {
72
+ "task":"#1 in multiple information retrieval & search tasks(smaller variant)",
73
+ "sota_link":"https://paperswithcode.com/paper/sgpt-gpt-sentence-embeddings-for-semantic",
74
+ },
75
+ "paper_url":"https://arxiv.org/abs/2202.08904v5",
76
+ "mark":True,
77
+ "class":"SGPTModel"},
78
+ { "name":"SGPT-1.3B",
79
+ "model": "Muennighoff/SGPT-1.3B-weightedmean-msmarco-specb-bitfit",
80
+ "fork_url":"https://github.com/taskswithcode/sgpt",
81
+ "orig_author_url":"https://github.com/Muennighoff",
82
+ "orig_author":"Niklas Muennighoff",
83
+ "sota_info": {
84
+ "task":"#1 in multiple information retrieval & search tasks(smaller variant)",
85
+ "sota_link":"https://paperswithcode.com/paper/sgpt-gpt-sentence-embeddings-for-semantic",
86
+ },
87
+ "paper_url":"https://arxiv.org/abs/2202.08904v5",
88
+ "mark":True,
89
  "class":"SGPTModel"},
 
 
90
  { "name":"SGPT-5.8B",
91
  "model": "Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit" ,
92
  "fork_url":"https://github.com/taskswithcode/sgpt",
 
100
  "mark":True,
101
  "class":"SGPTModel"},
102
 
103
+ { "name":"SIMCSE-large" ,
104
+ "model":"princeton-nlp/sup-simcse-roberta-large",
105
+ "fork_url":"https://github.com/taskswithcode/SimCSE",
106
+ "orig_author_url":"https://github.com/princeton-nlp",
107
+ "orig_author":"Princeton Natural Language Processing",
108
+ "sota_info": {
109
+ "task":"Within top 10 in multiple semantic textual similarity tasks",
110
+ "sota_link":"https://paperswithcode.com/paper/simcse-simple-contrastive-learning-of"
111
+ },
112
+ "paper_url":"https://arxiv.org/abs/2104.08821v4",
113
+ "mark":True,
114
+ "class":"SimCSEModel","sota_link":"https://paperswithcode.com/sota/semantic-textual-similarity-on-sick"},
115
 
116
+ { "name":"SIMCSE-base" ,
117
+ "model":"princeton-nlp/sup-simcse-roberta-base",
118
+ "fork_url":"https://github.com/taskswithcode/SimCSE",
119
+ "orig_author_url":"https://github.com/princeton-nlp",
120
+ "orig_author":"Princeton Natural Language Processing",
121
  "sota_info": {
122
+ "task":"Within top 10 in multiple semantic textual similarity tasks(smaller variant)",
123
+ "sota_link":"https://paperswithcode.com/paper/simcse-simple-contrastive-learning-of"
124
  },
125
+ "paper_url":"https://arxiv.org/abs/2104.08821v4",
126
  "mark":True,
127
+ "class":"SimCSEModel","sota_link":"https://paperswithcode.com/sota/semantic-textual-similarity-on-sick"},
128
+
129
 
130
  ]
131
 
132
 
133
 
134
+
135
+
136
  example_file_names = {
137
  "Machine learning terms (30+ phrases)": "tests/small_test.txt",
138
  "Customer feedback mixed with noise (50+ sentences)":"tests/larger_test.txt"
 
145
  for node in model_names:
146
  options_arr .append(node["name"])
147
  if (node["mark"] == True):
148
+ markdown_str += f"<div style=\"font-size:16px; color: #5f5f5f; text-align: left\">&nbsp;•&nbsp;Model:&nbsp;<a href=\'{node['paper_url']}\' target='_blank'>{node['name']}</a><br/>&nbsp;&nbsp;&nbsp;&nbsp;Code released by:&nbsp;<a href=\'{node['orig_author_url']}\' target='_blank'>{node['orig_author']}</a><br/>&nbsp;&nbsp;&nbsp;&nbsp;Model info:&nbsp;<a href=\'{node['sota_info']['sota_link']}\' target='_blank'>{node['sota_info']['task']}</a><br/><br/></div>"
149
  markdown_str += "<div style=\"font-size:12px; color: #9f9f9f; text-align: left\"><b>Note:</b><br/>•&nbsp;Uploaded files are loaded into non-persistent memory for the duration of the computation. They are not saved</div>"
150
  limit = "{:,}".format(MAX_INPUT)
151
  markdown_str += f"<div style=\"font-size:12px; color: #9f9f9f; text-align: left\">•&nbsp;User uploaded file has a maximum limit of {limit} sentences.</div>"
152
  return options_arr,markdown_str
153
 
154
 
155
+ st.set_page_config(page_title='TWC - Compare popular/state-of-the-art models for Sentence Similarity task', page_icon="logo.jpg", layout='centered', initial_sidebar_state='auto',
156
  menu_items={
157
+ 'Get help': "mailto:taskswithcode@gmail.com",
158
+ 'Report a Bug': "mailto:taskswithcode@gmail.com",
159
  'About': 'This app was created by taskswithcode. http://taskswithcode.com'
160
  })
161
  col,pad = st.columns([85,15])
 
239
 
240
  def main():
241
  init_session()
242
+ st.markdown("<h5 style='text-align: center;'>Compare popular/state-of-the-art models for Sentence Similarity task</h5>", unsafe_allow_html=True)
243
 
244
 
245
  try: