FremyCompany commited on
Commit
3290a51
·
verified ·
1 Parent(s): 4bc0b85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -29
app.py CHANGED
@@ -1,8 +1,58 @@
1
- import streamlit as st
2
  st.set_page_config(layout="wide")
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  DEBUG = False
5
  if DEBUG:
 
6
  # Use some dummy data
7
  tokens = [
8
  ["[0]\u2581De", "[0]\u2581fait", "[0],", "[0]\u2581mon", "[0]\u2581mari", "[0]\u2581ne", "[0]\u2581parlait", "[0]\u2581jamais", "[0]\u2581de", "[0]\u2581ses", "[0]\u2581affaires", "[0]\u2581avec", "[0]\u2581moi", "[0]."],
@@ -30,13 +80,8 @@ else:
30
 
31
  model, tokenizer = load_model_and_tokenizer()
32
 
33
- _, mid_st, _ = st.columns([1, 2, 1])
34
- sent0 = mid_st.text_input("Type your first sentence here", value="De fait, mon mari ne parlait jamais de ses affaires avec moi.", key="sent0")
35
- sent1 = mid_st.text_input("Type your second sentence here", value="M'n man had het met mij nooit over z'n zaken, inderdaad.", key="sent1")
36
-
37
- sentences = [
38
- sent0, sent1
39
- ]
40
  tokens = []
41
  embeddings = []
42
  for sentence in sentences:
@@ -46,25 +91,17 @@ else:
46
  tokens.append(tokenizer.tokenize(sentence))
47
  embeddings.append(embedded_sentence)
48
 
 
49
  token_similarities = F.normalize(embeddings[0], dim=1) @ F.normalize(embeddings[1], dim=1).T
50
 
 
51
  sentence_similarity = F.normalize(torch.mean(embeddings[0], dim=0), dim=-1) @ F.normalize(torch.mean(embeddings[1], dim=0), dim=-1)
52
 
53
- #print("="*60)
54
- #print("Mapping sentence1 to sentence2...")
55
- #print("="*60)
56
  token_probabilities_12 = F.softmax(20*token_similarities, dim=1)
57
- for i in range(len(tokens[0])):
58
- j = torch.argmax(token_probabilities_12[i])
59
- #print(tokens[0][i].ljust(15), tokens[1][j].ljust(15), round(token_probabilities_12[i][j].item(), 2))
60
 
61
- #print("="*60)
62
- #print("Mapping sentence2 to sentence1...")
63
- #print("="*60)
64
  token_probabilities_21 = F.softmax(20*token_similarities.T, dim=1)
65
- for j in range(len(tokens[1])):
66
- i = torch.argmax(token_probabilities_21[j])
67
- #print(tokens[1][j].ljust(15), tokens[0][i].ljust(15), round(token_probabilities_21[j][i].item(), 2))
68
 
69
  # Convert to naive python objects
70
  sentence_similarity = max(0, round(sentence_similarity.item(), 2))
@@ -72,12 +109,12 @@ else:
72
  token_probabilities_21 = token_probabilities_21.numpy().tolist()
73
 
74
  # Simplify the tokens for display
75
- tokens = [[token[3:].replace("\u2581", " ") for token in sentence] for sentence in tokens]
76
 
77
  html = ''
78
  html += """
79
  <article>
80
- <div>"""
81
  html += f"""{("✅ Congrats!" if sentence_similarity >= 0.65 else "❌ Sorry!")} These sentences have {100*sentence_similarity}% similarity."""
82
  html += """
83
  </div>
@@ -99,16 +136,13 @@ html += """
99
  article {
100
  font-family: sans-serif;
101
  text-align: center;
102
- }
103
- button:hover {
104
- background-color: #0056b3;
105
  }
106
  p {
107
  margin: 0.5em;
108
  font-size: 2em;
109
  text-wrap: balance;
110
  }
111
-
112
  span {
113
  animation-name: rotate_bg;
114
  animation-duration: 15s;
@@ -120,7 +154,6 @@ html += """
120
  color: rgba(0, 0, 0, calc((50% + 50% * var(--p))));
121
  text-decoration-color: hsla(161, 100%, 43%, var(--p));
122
  background-color: hsla(161, 100%, 43%, calc(var(--p) * 0.2));
123
-
124
  --p: var(--p0); """
125
  for i in range(len(tokens[0])):
126
  html += f"""--p{i}: 0; """
@@ -161,7 +194,6 @@ for i in range(len(tokens[0])):
161
  html += """
162
  }
163
  </style>
164
-
165
  """
166
 
167
- st.html(html)
 
1
+ import streamlit as st, random
2
  st.set_page_config(layout="wide")
3
 
4
+ # Give some context
5
+ st.html("""
6
+ <h1 style="text-align: center; margin: 0px; text-wrap: balance;">🔀 Word-level alignment between two sentences</h1>
7
+ <div style="text-align: center; color: gray; text-wrap: balance;">Supports English, French, Dutch, and German.</div>
8
+ <style>
9
+ .stButton { text-align: center; }
10
+ </style>
11
+ """)
12
+
13
+ # Create a layout with a columns on each side for padding
14
+ _, mid_st, _ = st.columns([1, 2, 1])
15
+
16
+ # Allow the user to reroll the example sentences
17
+ reroll_button = mid_st.button("Try a new example!", key="reroll")
18
+ if reroll_button:
19
+ example_sentences = [
20
+ # translations
21
+ ("The book, which was on the table, is now missing.", "Het boek, dat op de tafel lag, is nu verdwenen."),
22
+ ("If I had known, I would have acted differently.", "Si j'avais su, j'aurais agi différemment."),
23
+ ("She can speak three languages fluently.", "Sie kann drei Sprachen fließend sprechen."),
24
+ ("I wish I had more time to learn.", "Ich wünschte, ich hätte mehr Zeit zum Lernen."),
25
+ ("The children were playing while their parents were talking.", "De kinderen speelden terwijl hun ouders aan het praten waren."),
26
+ ("He would go to the gym every day if he had more energy.", "Il irait à la salle de sport tous les jours s'il avait plus d'énergie."),
27
+ ("By the time I arrived, she had already left.", "Als ich ankam, was zij al vertrokken."),
28
+ ("Despite the rain, they went for a walk.", "Malgré la pluie, ils sont allés se promener."),
29
+ ("If I were you, I wouldn't do that.", "Als ik jou was, zou ik dat niet doen."),
30
+ ("The movie, which I watched yesterday, was fantastic.", "Der Film, den ich gestern gesehen habe, war fantastisch."),
31
+ # paraphrases
32
+ ("She has a remarkable ability to solve problems quickly.", "Her problem-solving skills are impressive and rapid."),
33
+ ("Despite the fact that the project was delayed, they managed to finish it on time.", "Even though the project was delayed, they were able to complete it by the deadline."),
34
+ ("The teacher asked the students to submit their assignments by Friday.", "The students were required to hand in their assignments no later than Friday."),
35
+ ("I haven't seen him in years, and I wonder how he's doing.", "It's been years since I last saw him, and I'm curious about his well-being."),
36
+ ("He was hesitant to take the offer because it seemed too good to be true.", "He doubted the offer because it appeared to be too perfect to be genuine."),
37
+ ("She didn't have the necessary qualifications, but she still managed to get the job.", "Even though she lacked the required qualifications, she succeeded in securing the position."),
38
+ ("John said that he would be going to the meeting later.", "According to John, he planned to attend the meeting later."),
39
+ ("The weather was terrible, so we decided to cancel the outdoor event.", "Due to the poor weather, we chose to call off the outdoor event."),
40
+ ("They have lived in this city for a long time, and they're very familiar with it.", "Having resided in this city for many years, they know it quite well."),
41
+ ("The book was so captivating that I couldn't put it down until I finished it.", "I found the book so engrossing that I read it all the way through without stopping.")
42
+ ]
43
+ random_sentences = random.choice(example_sentences)
44
+ sent0 = mid_st.text_input("Type your first sentence here", value=random_sentences[0], key="sent0")
45
+ sent1 = mid_st.text_input("Type your second sentence here", value=random_sentences[1], key="sent1")
46
+ else:
47
+ # Allow the user to input two sentences
48
+ sent0 = mid_st.text_input("Type your first sentence here", value="De fait, mon mari ne parlait jamais de ses affaires avec moi.", key="sent0")
49
+ sent1 = mid_st.text_input("Type your second sentence here", value="M'n man had het met mij nooit over z'n zaken, inderdaad.", key="sent1")
50
+
51
+
52
+ # Display the mapping between the two sentences
53
  DEBUG = False
54
  if DEBUG:
55
+
56
  # Use some dummy data
57
  tokens = [
58
  ["[0]\u2581De", "[0]\u2581fait", "[0],", "[0]\u2581mon", "[0]\u2581mari", "[0]\u2581ne", "[0]\u2581parlait", "[0]\u2581jamais", "[0]\u2581de", "[0]\u2581ses", "[0]\u2581affaires", "[0]\u2581avec", "[0]\u2581moi", "[0]."],
 
80
 
81
  model, tokenizer = load_model_and_tokenizer()
82
 
83
+ # Encode the sentences
84
+ sentences = [sent0, sent1]
 
 
 
 
 
85
  tokens = []
86
  embeddings = []
87
  for sentence in sentences:
 
91
  tokens.append(tokenizer.tokenize(sentence))
92
  embeddings.append(embedded_sentence)
93
 
94
+ # Calculate the cross-token similarity
95
  token_similarities = F.normalize(embeddings[0], dim=1) @ F.normalize(embeddings[1], dim=1).T
96
 
97
+ # Calculate the overall sentence similarity
98
  sentence_similarity = F.normalize(torch.mean(embeddings[0], dim=0), dim=-1) @ F.normalize(torch.mean(embeddings[1], dim=0), dim=-1)
99
 
100
+ # Map sentence1 to sentence2
 
 
101
  token_probabilities_12 = F.softmax(20*token_similarities, dim=1)
 
 
 
102
 
103
+ # Map sentence2 to sentence1
 
 
104
  token_probabilities_21 = F.softmax(20*token_similarities.T, dim=1)
 
 
 
105
 
106
  # Convert to naive python objects
107
  sentence_similarity = max(0, round(sentence_similarity.item(), 2))
 
109
  token_probabilities_21 = token_probabilities_21.numpy().tolist()
110
 
111
  # Simplify the tokens for display
112
+ tokens = [[token[3:].replace("\u2581", " ").replace("Ġ", " ") for token in sentence] for sentence in tokens]
113
 
114
  html = ''
115
  html += """
116
  <article>
117
+ <div style="color: gray">"""
118
  html += f"""{("✅ Congrats!" if sentence_similarity >= 0.65 else "❌ Sorry!")} These sentences have {100*sentence_similarity}% similarity."""
119
  html += """
120
  </div>
 
136
  article {
137
  font-family: sans-serif;
138
  text-align: center;
139
+ margin-top: 2em;
 
 
140
  }
141
  p {
142
  margin: 0.5em;
143
  font-size: 2em;
144
  text-wrap: balance;
145
  }
 
146
  span {
147
  animation-name: rotate_bg;
148
  animation-duration: 15s;
 
154
  color: rgba(0, 0, 0, calc((50% + 50% * var(--p))));
155
  text-decoration-color: hsla(161, 100%, 43%, var(--p));
156
  background-color: hsla(161, 100%, 43%, calc(var(--p) * 0.2));
 
157
  --p: var(--p0); """
158
  for i in range(len(tokens[0])):
159
  html += f"""--p{i}: 0; """
 
194
  html += """
195
  }
196
  </style>
 
197
  """
198
 
199
+ st.html(html)