Tonic commited on
Commit
0443f2a
1 Parent(s): beaca17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -62
app.py CHANGED
@@ -44,78 +44,72 @@ def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tenso
44
  sequence_lengths = attention_mask.sum(dim=1) - 1
45
  batch_size = last_hidden_states.shape[0]
46
  return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 
47
  def clear_cuda_cache():
48
  torch.cuda.empty_cache()
49
 
50
  def free_memory(*args):
51
  for arg in args:
52
  del arg
53
-
54
- class EmbeddingModel:
55
- def __init__(self):
56
- self.tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct')
57
- self.model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct', torch_dtype=torch.float16, device_map=device)
58
-
59
- @spaces.GPU
60
- def _compute_cosine_similarity(self, emb1, emb2):
61
- tensor1 = torch.tensor(emb1).to(device).half()
62
- tensor2 = torch.tensor(emb2).to(device).half()
63
- similarity = F.cosine_similarity(tensor1, tensor2).item()
64
- free_memory(tensor1, tensor2)
65
- return similarity
66
-
67
- @spaces.GPU
68
- def compute_embeddings(self, selected_task, input_text):
69
- try:
70
- task_description = tasks[selected_task]
71
- except KeyError:
72
- print(f"Selected task not found: {selected_task}")
73
- return f"Error: Task '{selected_task}' not found. Please select a valid task."
74
- max_length = 2042
75
- processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
76
 
77
- batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
78
- batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
79
- batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
80
- batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
81
- outputs = self.model(**batch_dict)
82
- embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
83
- embeddings = F.normalize(embeddings, p=2, dim=1)
84
- embeddings_list = embeddings.detach().cpu().numpy().tolist()
85
- return embeddings_list
86
-
87
- @spaces.GPU
88
- def compute_similarity(self, selected_task, sentence1, sentence2, extra_sentence1, extra_sentence2):
89
- try:
90
- task_description = tasks[selected_task]
91
- except KeyError:
92
- print(f"Selected task not found: {selected_task}")
93
- return f"Error: Task '{selected_task}' not found. Please select a valid task."
94
- # Compute embeddings for each sentence
95
- embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
96
- embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
97
- embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
98
- embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
99
 
100
- # Convert embeddings to tensors
101
- embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
102
- embeddings_tensor2 = torch.tensor(embeddings2).to(device).half()
103
- embeddings_tensor3 = torch.tensor(embeddings3).to(device).half()
104
- embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # Compute cosine similarity
107
- similarity1 = self._compute_cosine_similarity(embeddings1, embeddings2)
108
- similarity2 = self._compute_cosine_similarity(embeddings1, embeddings3)
109
- similarity3 = self._compute_cosine_similarity(embeddings1, embeddings4)
 
 
 
 
 
 
110
 
111
- # Free memory
112
- free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
113
 
114
- return similarity1, similarity2, similarity3
 
 
 
 
 
 
 
 
115
 
116
-
117
  def app_interface():
118
- embedding_model = EmbeddingModel()
119
  with gr.Blocks() as demo:
120
  gr.Markdown(title)
121
  gr.Markdown(description)
@@ -127,7 +121,7 @@ def app_interface():
127
  compute_button = gr.Button("Try🐣🛌🏻e5")
128
  output_display = gr.Textbox(label="🐣e5-mistral🛌🏻 Embeddings")
129
  compute_button.click(
130
- fn=embedding_model.compute_embeddings,
131
  inputs=[task_dropdown, input_text_box],
132
  outputs=output_display
133
  )
@@ -140,8 +134,8 @@ def app_interface():
140
  similarity_button = gr.Button("Compute Similarity")
141
  similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
142
  similarity_button.click(
143
- fn=embedding_model.compute_similarity,
144
- inputs=[task_dropdown, sentence1_box, sentence2_box],
145
  outputs=similarity_output
146
  )
147
 
 
44
  sequence_lengths = attention_mask.sum(dim=1) - 1
45
  batch_size = last_hidden_states.shape[0]
46
  return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
47
+
48
  def clear_cuda_cache():
49
  torch.cuda.empty_cache()
50
 
51
  def free_memory(*args):
52
  for arg in args:
53
  del arg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ @spaces.GPU
56
+ def compute_embeddings(selected_task, input_text):
57
+ try:
58
+ task_description = tasks[selected_task]
59
+ except KeyError:
60
+ print(f"Selected task not found: {selected_task}")
61
+ return f"Error: Task '{selected_task}' not found. Please select a valid task."
62
+ max_length = 2042
63
+ processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
66
+ batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
67
+ batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
68
+ batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
69
+ outputs = self.model(**batch_dict)
70
+ embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
71
+ embeddings = F.normalize(embeddings, p=2, dim=1)
72
+ embeddings_list = embeddings.detach().cpu().numpy().tolist()
73
+ return embeddings_list
74
+
75
+ @spaces.GPU
76
+ def compute_similarity(self, selected_task, sentence1, sentence2, extra_sentence1, extra_sentence2):
77
+ try:
78
+ task_description = tasks[selected_task]
79
+ except KeyError:
80
+ print(f"Selected task not found: {selected_task}")
81
+ return f"Error: Task '{selected_task}' not found. Please select a valid task."
82
+ # Compute embeddings for each sentence
83
+ embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
84
+ embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
85
+ embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
86
+ embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
87
 
88
+ # Convert embeddings to tensors
89
+ embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
90
+ embeddings_tensor2 = torch.tensor(embeddings2).to(device).half()
91
+ embeddings_tensor3 = torch.tensor(embeddings3).to(device).half()
92
+ embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
93
+
94
+ # Compute cosine similarity
95
+ similarity1 = self._compute_cosine_similarity(embeddings1, embeddings2)
96
+ similarity2 = self._compute_cosine_similarity(embeddings1, embeddings3)
97
+ similarity3 = self._compute_cosine_similarity(embeddings1, embeddings4)
98
 
99
+ # Free memory
100
+ free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
101
 
102
+ return similarity1, similarity2, similarity3
103
+
104
+ @spaces.GPU
105
+ def _compute_cosine_similarity(emb1, emb2):
106
+ tensor1 = torch.tensor(emb1).to(device).half()
107
+ tensor2 = torch.tensor(emb2).to(device).half()
108
+ similarity = F.cosine_similarity(tensor1, tensor2).item()
109
+ free_memory(tensor1, tensor2)
110
+ return similarity
111
 
 
112
  def app_interface():
 
113
  with gr.Blocks() as demo:
114
  gr.Markdown(title)
115
  gr.Markdown(description)
 
121
  compute_button = gr.Button("Try🐣🛌🏻e5")
122
  output_display = gr.Textbox(label="🐣e5-mistral🛌🏻 Embeddings")
123
  compute_button.click(
124
+ fn=compute_embeddings,
125
  inputs=[task_dropdown, input_text_box],
126
  outputs=output_display
127
  )
 
134
  similarity_button = gr.Button("Compute Similarity")
135
  similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
136
  similarity_button.click(
137
+ fn=compute_similarity,
138
+ inputs=[task_dropdown, sentence1_box, sentence2_box, extra_sentence1_box, extra_sentence2_box],
139
  outputs=similarity_output
140
  )
141