Spaces:
Runtime error
Runtime error
taskswithcode
commited on
Commit
•
0242b2e
1
Parent(s):
0c1f2c6
Update app.py
Browse files
app.py
CHANGED
@@ -2,18 +2,91 @@ import time
|
|
2 |
import streamlit as st
|
3 |
import string
|
4 |
from io import StringIO
|
|
|
5 |
import json
|
6 |
-
from
|
|
|
|
|
|
|
7 |
|
8 |
-
|
|
|
9 |
|
10 |
model_names = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
{ "name":"SGPT-125M",
|
12 |
"model":"Muennighoff/SGPT-125M-weightedmean-nli-bitfit",
|
13 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"class":"SGPTModel"},
|
15 |
-
|
16 |
-
|
17 |
{ "name":"SGPT-5.8B",
|
18 |
"model": "Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit" ,
|
19 |
"fork_url":"https://github.com/taskswithcode/sgpt",
|
@@ -27,28 +100,39 @@ model_names = [
|
|
27 |
"mark":True,
|
28 |
"class":"SGPTModel"},
|
29 |
|
30 |
-
{ "name":"
|
31 |
-
"model":
|
32 |
-
"
|
33 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
{
|
36 |
-
"model":"
|
37 |
-
"fork_url":"https://github.com/taskswithcode/
|
38 |
-
"orig_author_url":"https://github.com/
|
39 |
-
"orig_author":"
|
40 |
"sota_info": {
|
41 |
-
"task":"
|
42 |
-
"sota_link":"https://
|
43 |
},
|
44 |
-
"paper_url":"https://arxiv.org/abs/
|
45 |
"mark":True,
|
46 |
-
"class":"
|
|
|
47 |
|
48 |
]
|
49 |
|
50 |
|
51 |
|
|
|
|
|
52 |
example_file_names = {
|
53 |
"Machine learning terms (30+ phrases)": "tests/small_test.txt",
|
54 |
"Customer feedback mixed with noise (50+ sentences)":"tests/larger_test.txt"
|
@@ -61,15 +145,17 @@ def construct_model_info_for_display():
|
|
61 |
for node in model_names:
|
62 |
options_arr .append(node["name"])
|
63 |
if (node["mark"] == True):
|
64 |
-
markdown_str += f"<div style=\"font-size:16px; color: #5f5f5f; text-align: left\"> • Model: <a href=\'{node['paper_url']}\' target='_blank'>{node['name']}</a><br/> Code released by: <a href=\'{node['orig_author_url']}\' target='_blank'>{node['orig_author']}</a><br/> Model info: <a href=\'{node['sota_info']['sota_link']}\' target='_blank'>{node['sota_info']['task']}</a><br
|
65 |
markdown_str += "<div style=\"font-size:12px; color: #9f9f9f; text-align: left\"><b>Note:</b><br/>• Uploaded files are loaded into non-persistent memory for the duration of the computation. They are not saved</div>"
|
66 |
limit = "{:,}".format(MAX_INPUT)
|
67 |
markdown_str += f"<div style=\"font-size:12px; color: #9f9f9f; text-align: left\">• User uploaded file has a maximum limit of {limit} sentences.</div>"
|
68 |
return options_arr,markdown_str
|
69 |
|
70 |
|
71 |
-
st.set_page_config(page_title='TWC - Compare state-of-the-art models for Sentence Similarity task', page_icon="logo.jpg", layout='centered', initial_sidebar_state='auto',
|
72 |
menu_items={
|
|
|
|
|
73 |
'About': 'This app was created by taskswithcode. http://taskswithcode.com'
|
74 |
})
|
75 |
col,pad = st.columns([85,15])
|
@@ -153,7 +239,7 @@ def init_session():
|
|
153 |
|
154 |
def main():
|
155 |
init_session()
|
156 |
-
st.markdown("<
|
157 |
|
158 |
|
159 |
try:
|
|
|
2 |
import streamlit as st
|
3 |
import string
|
4 |
from io import StringIO
|
5 |
+
import pdb
|
6 |
import json
|
7 |
+
from twc_embeddings import HFModel,SimCSEModel,SGPTModel
|
8 |
+
|
9 |
+
|
10 |
+
MAX_INPUT = 10000
|
11 |
|
12 |
+
|
13 |
+
from transformers import BertTokenizer, BertForMaskedLM
|
14 |
|
15 |
model_names = [
|
16 |
+
|
17 |
+
{ "name":"sentence-transformers/all-MiniLM-L6-v2",
|
18 |
+
"model":"sentence-transformers/all-MiniLM-L6-v2",
|
19 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
20 |
+
"orig_author_url":"https://github.com/UKPLab",
|
21 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
22 |
+
"sota_info": {
|
23 |
+
"task":"Over 3.8 million downloads from huggingface",
|
24 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
25 |
+
},
|
26 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
27 |
+
"mark":True,
|
28 |
+
"class":"HFModel"},
|
29 |
+
{ "name":"sentence-transformers/paraphrase-MiniLM-L6-v2",
|
30 |
+
"model":"sentence-transformers/paraphrase-MiniLM-L6-v2",
|
31 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
32 |
+
"orig_author_url":"https://github.com/UKPLab",
|
33 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
34 |
+
"sota_info": {
|
35 |
+
"task":"Over 2.4 million downloads from huggingface",
|
36 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
37 |
+
},
|
38 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
39 |
+
"mark":True,
|
40 |
+
"class":"HFModel"},
|
41 |
+
{ "name":"sentence-transformers/bert-base-nli-mean-tokens",
|
42 |
+
"model":"sentence-transformers/bert-base-nli-mean-tokens",
|
43 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
44 |
+
"orig_author_url":"https://github.com/UKPLab",
|
45 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
46 |
+
"sota_info": {
|
47 |
+
"task":"Over 700,000 downloads from huggingface",
|
48 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
49 |
+
},
|
50 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
51 |
+
"mark":True,
|
52 |
+
"class":"HFModel"},
|
53 |
+
{ "name":"sentence-transformers/all-mpnet-base-v2",
|
54 |
+
"model":"sentence-transformers/all-mpnet-base-v2",
|
55 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
56 |
+
"orig_author_url":"https://github.com/UKPLab",
|
57 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
58 |
+
"sota_info": {
|
59 |
+
"task":"Over 500,000 downloads from huggingface",
|
60 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
61 |
+
},
|
62 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
63 |
+
"mark":True,
|
64 |
+
"class":"HFModel"},
|
65 |
+
|
66 |
{ "name":"SGPT-125M",
|
67 |
"model":"Muennighoff/SGPT-125M-weightedmean-nli-bitfit",
|
68 |
+
"fork_url":"https://github.com/taskswithcode/sgpt",
|
69 |
+
"orig_author_url":"https://github.com/Muennighoff",
|
70 |
+
"orig_author":"Niklas Muennighoff",
|
71 |
+
"sota_info": {
|
72 |
+
"task":"#1 in multiple information retrieval & search tasks(smaller variant)",
|
73 |
+
"sota_link":"https://paperswithcode.com/paper/sgpt-gpt-sentence-embeddings-for-semantic",
|
74 |
+
},
|
75 |
+
"paper_url":"https://arxiv.org/abs/2202.08904v5",
|
76 |
+
"mark":True,
|
77 |
+
"class":"SGPTModel"},
|
78 |
+
{ "name":"SGPT-1.3B",
|
79 |
+
"model": "Muennighoff/SGPT-1.3B-weightedmean-msmarco-specb-bitfit",
|
80 |
+
"fork_url":"https://github.com/taskswithcode/sgpt",
|
81 |
+
"orig_author_url":"https://github.com/Muennighoff",
|
82 |
+
"orig_author":"Niklas Muennighoff",
|
83 |
+
"sota_info": {
|
84 |
+
"task":"#1 in multiple information retrieval & search tasks(smaller variant)",
|
85 |
+
"sota_link":"https://paperswithcode.com/paper/sgpt-gpt-sentence-embeddings-for-semantic",
|
86 |
+
},
|
87 |
+
"paper_url":"https://arxiv.org/abs/2202.08904v5",
|
88 |
+
"mark":True,
|
89 |
"class":"SGPTModel"},
|
|
|
|
|
90 |
{ "name":"SGPT-5.8B",
|
91 |
"model": "Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit" ,
|
92 |
"fork_url":"https://github.com/taskswithcode/sgpt",
|
|
|
100 |
"mark":True,
|
101 |
"class":"SGPTModel"},
|
102 |
|
103 |
+
{ "name":"SIMCSE-large" ,
|
104 |
+
"model":"princeton-nlp/sup-simcse-roberta-large",
|
105 |
+
"fork_url":"https://github.com/taskswithcode/SimCSE",
|
106 |
+
"orig_author_url":"https://github.com/princeton-nlp",
|
107 |
+
"orig_author":"Princeton Natural Language Processing",
|
108 |
+
"sota_info": {
|
109 |
+
"task":"Within top 10 in multiple semantic textual similarity tasks",
|
110 |
+
"sota_link":"https://paperswithcode.com/paper/simcse-simple-contrastive-learning-of"
|
111 |
+
},
|
112 |
+
"paper_url":"https://arxiv.org/abs/2104.08821v4",
|
113 |
+
"mark":True,
|
114 |
+
"class":"SimCSEModel","sota_link":"https://paperswithcode.com/sota/semantic-textual-similarity-on-sick"},
|
115 |
|
116 |
+
{ "name":"SIMCSE-base" ,
|
117 |
+
"model":"princeton-nlp/sup-simcse-roberta-base",
|
118 |
+
"fork_url":"https://github.com/taskswithcode/SimCSE",
|
119 |
+
"orig_author_url":"https://github.com/princeton-nlp",
|
120 |
+
"orig_author":"Princeton Natural Language Processing",
|
121 |
"sota_info": {
|
122 |
+
"task":"Within top 10 in multiple semantic textual similarity tasks(smaller variant)",
|
123 |
+
"sota_link":"https://paperswithcode.com/paper/simcse-simple-contrastive-learning-of"
|
124 |
},
|
125 |
+
"paper_url":"https://arxiv.org/abs/2104.08821v4",
|
126 |
"mark":True,
|
127 |
+
"class":"SimCSEModel","sota_link":"https://paperswithcode.com/sota/semantic-textual-similarity-on-sick"},
|
128 |
+
|
129 |
|
130 |
]
|
131 |
|
132 |
|
133 |
|
134 |
+
|
135 |
+
|
136 |
example_file_names = {
|
137 |
"Machine learning terms (30+ phrases)": "tests/small_test.txt",
|
138 |
"Customer feedback mixed with noise (50+ sentences)":"tests/larger_test.txt"
|
|
|
145 |
for node in model_names:
|
146 |
options_arr .append(node["name"])
|
147 |
if (node["mark"] == True):
|
148 |
+
markdown_str += f"<div style=\"font-size:16px; color: #5f5f5f; text-align: left\"> • Model: <a href=\'{node['paper_url']}\' target='_blank'>{node['name']}</a><br/> Code released by: <a href=\'{node['orig_author_url']}\' target='_blank'>{node['orig_author']}</a><br/> Model info: <a href=\'{node['sota_info']['sota_link']}\' target='_blank'>{node['sota_info']['task']}</a><br/><br/></div>"
|
149 |
markdown_str += "<div style=\"font-size:12px; color: #9f9f9f; text-align: left\"><b>Note:</b><br/>• Uploaded files are loaded into non-persistent memory for the duration of the computation. They are not saved</div>"
|
150 |
limit = "{:,}".format(MAX_INPUT)
|
151 |
markdown_str += f"<div style=\"font-size:12px; color: #9f9f9f; text-align: left\">• User uploaded file has a maximum limit of {limit} sentences.</div>"
|
152 |
return options_arr,markdown_str
|
153 |
|
154 |
|
155 |
+
st.set_page_config(page_title='TWC - Compare popular/state-of-the-art models for Sentence Similarity task', page_icon="logo.jpg", layout='centered', initial_sidebar_state='auto',
|
156 |
menu_items={
|
157 |
+
'Get help': "mailto:taskswithcode@gmail.com",
|
158 |
+
'Report a Bug': "mailto:taskswithcode@gmail.com",
|
159 |
'About': 'This app was created by taskswithcode. http://taskswithcode.com'
|
160 |
})
|
161 |
col,pad = st.columns([85,15])
|
|
|
239 |
|
240 |
def main():
|
241 |
init_session()
|
242 |
+
st.markdown("<h5 style='text-align: center;'>Compare popular/state-of-the-art models for Sentence Similarity task</h5>", unsafe_allow_html=True)
|
243 |
|
244 |
|
245 |
try:
|