FremyCompany
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -45,12 +45,12 @@ if reroll_button:
|
|
45 |
sent1 = mid_st.text_input("Type your second sentence here", value=random_sentences[1], key="sent1")
|
46 |
else:
|
47 |
# Allow the user to input two sentences
|
48 |
-
sent0 = mid_st.text_input("Type your first sentence here", value="De fait, mon mari ne parlait jamais de ses affaires avec moi.", key="sent0")
|
49 |
-
sent1 = mid_st.text_input("Type your second sentence here", value="M'n man had het met mij nooit over z'n zaken, inderdaad.", key="sent1")
|
50 |
|
51 |
|
52 |
# Display the mapping between the two sentences
|
53 |
-
DEBUG =
|
54 |
if DEBUG:
|
55 |
|
56 |
# Use some dummy data
|
@@ -71,7 +71,7 @@ else:
|
|
71 |
import torch.nn.functional as F
|
72 |
from transformers import AutoTokenizer, AutoModel
|
73 |
|
74 |
-
@st.
|
75 |
def load_model_and_tokenizer():
|
76 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
77 |
model = AutoModel.from_pretrained('Parallia/Fairly-Multilingual-ModernBERT-Embed-BE', trust_remote_code=True).to(device)
|
@@ -80,16 +80,21 @@ else:
|
|
80 |
|
81 |
model, tokenizer = load_model_and_tokenizer()
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
# Encode the sentences
|
84 |
-
|
85 |
-
tokens = []
|
86 |
-
embeddings = []
|
87 |
-
for sentence in sentences:
|
88 |
-
with torch.no_grad():
|
89 |
-
encoded_sentence = tokenizer(sentence, padding=False, truncation=True, return_tensors="pt").to(model.device)
|
90 |
-
embedded_sentence = model(**encoded_sentence).last_hidden_state[0].detach().cpu().clone()
|
91 |
-
tokens.append(tokenizer.tokenize(sentence))
|
92 |
-
embeddings.append(embedded_sentence)
|
93 |
|
94 |
# Calculate the cross-token similarity
|
95 |
token_similarities = F.normalize(embeddings[0], dim=1) @ F.normalize(embeddings[1], dim=1).T
|
@@ -148,6 +153,8 @@ html += """
|
|
148 |
animation-duration: 15s;
|
149 |
animation-timing-function: steps(14, start);
|
150 |
animation-iteration-count: infinite;
|
|
|
|
|
151 |
text-decoration: underline 0.3em;
|
152 |
text-decoration-skip: none;
|
153 |
text-decoration-skip-ink: none;
|
@@ -180,6 +187,8 @@ for j in range(len(tokens[1])):
|
|
180 |
for i in range(len(tokens[0])):
|
181 |
html += f"""--p{i}: {token_probabilities_12[i][j]}; """
|
182 |
html += """}"""
|
|
|
|
|
183 |
for i in range(len(tokens[0])):
|
184 |
html += f"""
|
185 |
body:has(#sent0 span:nth-child({i+1}):hover) span {{ --p: var(--p{i}) !important; }}"""
|
|
|
45 |
sent1 = mid_st.text_input("Type your second sentence here", value=random_sentences[1], key="sent1")
|
46 |
else:
|
47 |
# Allow the user to input two sentences
|
48 |
+
sent0 = mid_st.text_input("Type your first sentence here", value="De fait, mon mari ne parlait jamais de ses affaires avec moi." if "sent0" not in st.session_state else st.session_state.sent0, key="sent0")
|
49 |
+
sent1 = mid_st.text_input("Type your second sentence here", value="M'n man had het met mij nooit over z'n zaken, inderdaad." if "sent1" not in st.session_state else st.session_state.sent1, key="sent1")
|
50 |
|
51 |
|
52 |
# Display the mapping between the two sentences
|
53 |
+
DEBUG = True
|
54 |
if DEBUG:
|
55 |
|
56 |
# Use some dummy data
|
|
|
71 |
import torch.nn.functional as F
|
72 |
from transformers import AutoTokenizer, AutoModel
|
73 |
|
74 |
+
@st.cache_resource
|
75 |
def load_model_and_tokenizer():
|
76 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
77 |
model = AutoModel.from_pretrained('Parallia/Fairly-Multilingual-ModernBERT-Embed-BE', trust_remote_code=True).to(device)
|
|
|
80 |
|
81 |
model, tokenizer = load_model_and_tokenizer()
|
82 |
|
83 |
+
@st.cache_data
|
84 |
+
def encode_sentences(sent0, sent1):
|
85 |
+
sentences = [sent0, sent1]
|
86 |
+
tokens = []
|
87 |
+
embeddings = []
|
88 |
+
for sentence in sentences:
|
89 |
+
with torch.no_grad():
|
90 |
+
encoded_sentence = tokenizer(sentence, padding=False, truncation=True, return_tensors="pt").to(model.device)
|
91 |
+
embedded_sentence = model(**encoded_sentence).last_hidden_state[0].detach().cpu().clone()
|
92 |
+
tokens.append(tokenizer.tokenize(sentence))
|
93 |
+
embeddings.append(embedded_sentence)
|
94 |
+
return tokens, embeddings
|
95 |
+
|
96 |
# Encode the sentences
|
97 |
+
tokens, embeddings = encode_sentences(sent0, sent1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
# Calculate the cross-token similarity
|
100 |
token_similarities = F.normalize(embeddings[0], dim=1) @ F.normalize(embeddings[1], dim=1).T
|
|
|
153 |
animation-duration: 15s;
|
154 |
animation-timing-function: steps(14, start);
|
155 |
animation-iteration-count: infinite;
|
156 |
+
text-decoration: underline;
|
157 |
+
text-decoration: underline double;
|
158 |
text-decoration: underline 0.3em;
|
159 |
text-decoration-skip: none;
|
160 |
text-decoration-skip-ink: none;
|
|
|
187 |
for i in range(len(tokens[0])):
|
188 |
html += f"""--p{i}: {token_probabilities_12[i][j]}; """
|
189 |
html += """}"""
|
190 |
+
html += """
|
191 |
+
body:has(#sent0:hover,#sent1:hover) span { --p: 0 !important; animation-play-state: paused; }"""
|
192 |
for i in range(len(tokens[0])):
|
193 |
html += f"""
|
194 |
body:has(#sent0 span:nth-child({i+1}):hover) span {{ --p: var(--p{i}) !important; }}"""
|